1 package net.sourceforge.blogentis.trackback;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 import java.io.BufferedOutputStream;
26 import java.io.ByteArrayInputStream;
27 import java.io.ByteArrayOutputStream;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.OutputStreamWriter;
31 import java.io.PrintWriter;
32 import java.io.StringWriter;
33 import java.io.UnsupportedEncodingException;
34 import java.net.HttpURLConnection;
35 import java.net.MalformedURLException;
36 import java.net.URL;
37 import java.net.URLConnection;
38 import java.net.URLEncoder;
39 import java.util.ArrayList;
40 import java.util.Collections;
41 import java.util.HashSet;
42 import java.util.Iterator;
43 import java.util.List;
44 import java.util.Set;
45 import java.util.Vector;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49 import javax.xml.transform.TransformerException;
50
51 import org.apache.commons.logging.Log;
52 import org.apache.commons.logging.LogFactory;
53 import org.apache.torque.TorqueException;
54 import org.apache.turbine.services.velocity.TurbineVelocity;
55 import org.apache.turbine.util.RunData;
56 import org.apache.velocity.context.Context;
57 import org.apache.xmlrpc.XmlRpcClient;
58 import org.apache.xmlrpc.XmlRpcException;
59 import org.apache.xpath.XPathAPI;
60 import org.w3c.dom.Document;
61 import org.w3c.dom.Node;
62 import org.w3c.dom.traversal.NodeIterator;
63 import org.w3c.tidy.Tidy;
64
65 import net.sourceforge.blogentis.om.Blog;
66 import net.sourceforge.blogentis.om.Post;
67 import net.sourceforge.blogentis.om.PostPeer;
68 import net.sourceforge.blogentis.plugins.IPlugin;
69 import net.sourceforge.blogentis.plugins.base.AbstractPostEditExtension;
70 import net.sourceforge.blogentis.turbine.BlogRunData;
71 import net.sourceforge.blogentis.utils.AbsoluteLinkURL;
72 import net.sourceforge.blogentis.utils.JTidyService;
73 import net.sourceforge.blogentis.utils.LinkFactoryService;
74 import net.sourceforge.blogentis.utils.MappedConfiguration;
75 import net.sourceforge.blogentis.utils.StringUtils;
76 import net.sourceforge.blogentis.utils.tools.FragmentTool;
77
78 /***
79 * @author abas
80 */
81 public class TrackbackPostEditExtension
82 extends AbstractPostEditExtension {
83 public static final Log log = LogFactory
84 .getLog(TrackbackPostEditExtension.class);
85
86 public static final Pattern rdfPattern = Pattern
87 .compile("<rdf:RDF.*?</rdf:RDF>", Pattern.DOTALL);
88
89 public static final Pattern tbPattern = Pattern
90 .compile("trackback:ping=\"([^\"]+)\"");
91
92 public static final Pattern aboutPattern = Pattern
93 .compile("about=\"([^\"]+)\"");
94
95 public static final Pattern identPattern = Pattern
96 .compile("dc:identifier=\"([^\"]+)\"");
97
98 public static final String T_SENT = "trackback.sent";
99 public static final String T_URIS_SENT = "trackback.uris.sent";
100 public static final String T_URIS_NOT_SENT = "trackback.uris.notsent";
101 public static final String T_WEBLOGS_COM = "trackback.weblogsComSent";
102 public static final String T_BLOG_EXTRA_LINKS = "trackback.extraURLs";
103 public static final String T_BLOG_SEND = "trackback.sendTrackbacks";
104 public static final String T_BLOG_RECEIVE = "trackback.receiveTrackbacks";
105 public static final String T_BLOG_IGNORE_FROM_SELF = "trackback.ignoreBlogTrackbacks";
106 public static final String T_BLOG_IGNORE_FROM_SERVER = "trackback.ignoreServerTrackbacks";
107 public static final String T_BLOG_WEBLOGS_COM = "trackback.pingWeblogsCom";
108
109 public TrackbackPostEditExtension(IPlugin plugin, Blog blog) {
110 super(plugin, blog);
111 }
112
113 public String getName() {
114 return "Trackback support";
115 }
116
117 public String buildOptionsHTML(BlogRunData data, Post post) {
118 if (post == null || post.isNew() || post.isPublished())
119 return "";
120 if (this.blog.getConfiguration().getBoolean(T_BLOG_SEND, true) == false)
121 return "";
122 Context context = TurbineVelocity.getContext(data);
123 FragmentTool f = (FragmentTool)context.get("fragmentTool");
124 try {
125 return f.getFragment("TrackbackPostEdit").invoke(context, data,
126 post).build(data);
127 } catch (Exception e) {
128 return e.toString();
129 }
130 }
131
132 public void postPublicationStatusChanged(BlogRunData data, Post post,
133 int oldState) {
134 if (post.getPostType() != PostPeer.PUBLISHED_TYPE)
135 return;
136 MappedConfiguration conf = this.blog.getConfiguration();
137 MappedConfiguration postConf = post.getProperties();
138
139 if (conf.getBoolean(T_BLOG_SEND, true)
140 && data.getParameters().getString("sendTrackbacks", null) != null) {
141 processPost(post);
142 postConf.setProperty(T_SENT, Boolean.TRUE);
143 }
144 if (conf.getBoolean(T_BLOG_WEBLOGS_COM, true)
145 && data.getParameters().getString("sendWeblogsPing", null) != null) {
146 sendWeblogPing(data, post);
147 postConf.setProperty(T_WEBLOGS_COM, Boolean.TRUE);
148 }
149 }
150
151 private Document makePostDocument(Post p) {
152 Tidy t = JTidyService.getTidy();
153 StringWriter sw = new StringWriter();
154 t.setErrout(new PrintWriter(sw));
155
156 String contents = "<title>" + p.getTitle() + "</title>"
157 + p.getShortDescription() + p.getFullText();
158
159 try {
160 return t.parseDOM(new ByteArrayInputStream(contents
161 .getBytes("utf-8")), null);
162 } catch (UnsupportedEncodingException e) {
163 log.error(e);
164 return null;
165 }
166 }
167
168 private List getLinks(Document doc) {
169 NodeIterator i;
170 try {
171 i = XPathAPI.selectNodeIterator(doc, "//a/@href");
172 } catch (TransformerException e) {
173 log.error(e);
174 return null;
175 }
176 ArrayList l = new ArrayList();
177 Node n = null;
178 while ((n = i.nextNode()) != null) {
179 l.add(n.getNodeValue());
180 }
181 return l;
182 }
183
184 private void processPost(Post p) {
185 Document doc = makePostDocument(p);
186 List links = getLinks(doc);
187 Set alreadySent = new HashSet(p.getProperties()
188 .getList(T_URIS_SENT, Collections.EMPTY_LIST));
189 Set notSent = new HashSet(p.getProperties()
190 .getList(T_URIS_NOT_SENT, Collections.EMPTY_LIST));
191
192 for(Iterator i = links.iterator(); i.hasNext();) {
193 String link = (String)i.next();
194 if (!link.startsWith("https://") && !link.startsWith("http://")) {
195 log.debug("Skipping " + link);
196 continue;
197 }
198 log.debug("Looking at " + link + " for trackback URLs");
199 try {
200 if (alreadySent.contains(link) || notSent.contains(link))
201 continue;
202 String content = fetchURL(link);
203 String trackBackLink = getTrackbackFromHTML(content, link);
204 if (trackBackLink != null) {
205 if (sendTrackBackTo(trackBackLink, p))
206 alreadySent.add(link);
207 }
208 } finally {
209 if (!alreadySent.contains(link)) {
210 notSent.add(link);
211 }
212 }
213 }
214
215 links = this.blog.getConfiguration().getList(T_BLOG_EXTRA_LINKS,
216 new ArrayList());
217 for(Iterator i = links.iterator(); i.hasNext();) {
218 String link = (String)i.next();
219 if (link.length() < 5)
220 continue;
221 if (alreadySent.contains(link) || notSent.contains(link))
222 continue;
223 if (sendTrackBackTo(link, p))
224 alreadySent.add(link);
225 }
226 p.getProperties().setList(T_URIS_SENT, new ArrayList(alreadySent));
227 p.getProperties().setList(T_URIS_NOT_SENT, new ArrayList(notSent));
228 }
229
230 private byte[] fetchURLConnection(URLConnection con)
231 throws IOException {
232 InputStream is = con.getInputStream();
233 ByteArrayOutputStream bos = new ByteArrayOutputStream();
234 int len;
235 byte[] byffer = new byte[1024];
236 do {
237 len = is.read(byffer, 0, 1024);
238 if (len > 0)
239 bos.write(byffer, 0, len);
240 } while (len > 0);
241 return bos.toByteArray();
242 }
243
244 private String fetchURL(String link) {
245 URL url = null;
246 try {
247 url = new URL(link);
248 } catch (MalformedURLException e) {
249 log.debug("Incorrect URL " + link, e);
250 return null;
251 }
252 try {
253 URLConnection connection = url.openConnection();
254 byte[] content = fetchURLConnection(connection);
255
256 return new String(content, "iso8859-1");
257 } catch (IOException e1) {
258 log.debug("Could not fetch " + link, e1);
259 return null;
260 }
261 }
262
263 private String getTrackbackFromHTML(String s, String origUrl) {
264 Matcher m = rdfPattern.matcher(s);
265 while (m.find()) {
266 String rdf = m.group();
267 Matcher tb = identPattern.matcher(rdf);
268 if (!tb.find())
269 continue;
270 if (!origUrl.equals(tb.group(1)))
271 continue;
272 tb = tbPattern.matcher(rdf);
273 if (tb.find())
274 return tb.group(1);
275 tb = aboutPattern.matcher(rdf);
276 if (tb.find())
277 return tb.group(1);
278 }
279 return null;
280 }
281
282 private boolean sendTrackBackTo(String url, Post p) {
283 log.debug("Sending trackback to " + url);
284 try {
285 URL l = new URL(url);
286 HttpURLConnection con = (HttpURLConnection)l.openConnection();
287 con.setRequestMethod("POST");
288 con.setRequestProperty("Content-Type",
289 "application/x-www-form-urlencoded");
290 con.setDoOutput(true);
291 OutputStreamWriter bw = new OutputStreamWriter(
292 new BufferedOutputStream(con.getOutputStream()), "utf-8");
293 bw.write("title=");
294 bw.write(URLEncoder.encode(p.getTitle(), "utf-8"));
295 bw.write("&url=");
296 bw.write(new AbsoluteLinkURL().permaLink(p).toString());
297 bw.write("&excerpt=");
298 bw.write(URLEncoder.encode(StringUtils.removeTags(p
299 .getShortDescription()), "utf-8"));
300 bw.write("&blog_name=");
301 bw.write(URLEncoder.encode(p.getBlog().getTitle(), "utf-8"));
302 bw.flush();
303 bw.close();
304
305 fetchURLConnection(con);
306 } catch (MalformedURLException e) {
307 log.debug("URL specified in RDF as trackback was invalid.", e);
308 return false;
309 } catch (IOException e) {
310 log.debug("Error sending trackback.", e);
311 return false;
312 } catch (TorqueException e) {
313 log.error("Torque threw an error.", e);
314 return false;
315 }
316 return true;
317 }
318
319 private void sendWeblogPing(RunData data, Post p) {
320 try {
321 XmlRpcClient xrc = new XmlRpcClient(
322 "http://rpc.weblogs.com:80/RPC2");
323 Vector params = new Vector(2);
324 Blog b = p.getBlog();
325 params.add(b.getTitle());
326 params.add(LinkFactoryService.getLink().permaLink(b).toString());
327 xrc.execute("weblogUpdates.ping", params);
328 p.getProperties().setProperty(T_WEBLOGS_COM, "true");
329 } catch (MalformedURLException e) {
330 log.error(e);
331 } catch (TorqueException e) {
332 log.error(e);
333 } catch (XmlRpcException e) {
334 log.error(e);
335 } catch (IOException e) {
336 log.error(e);
337 }
338 }
339 }