View Javadoc

1   package net.sourceforge.blogentis.trackback;
2   
3   //-----------------------------------------------------------------------
4   //Blogentis - a blog publishing platform.
5   //Copyright (C) 2004 Tassos Bassoukos <abassouk@gmail.com>
6   //
7   //This library is free software; you can redistribute it and/or
8   //modify it under the terms of the GNU Lesser General Public
9   //License as published by the Free Software Foundation; either
10  //version 2.1 of the License, or (at your option) any later version.
11  //
12  //This library is distributed in the hope that it will be useful,
13  //but WITHOUT ANY WARRANTY; without even the implied warranty of
14  //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  //Lesser General Public License for more details.
16  //
17  //You should have received a copy of the GNU Lesser General Public
18  //License along with this library; if not, write to the Free Software
19  //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20  //-----------------------------------------------------------------------
21  //
22  //$Id: TrackbackPostEditExtension.java,v 1.2 2004/10/28 10:45:51 tassos Exp $
23  //
24  
25  import java.io.BufferedOutputStream;
26  import java.io.ByteArrayInputStream;
27  import java.io.ByteArrayOutputStream;
28  import java.io.IOException;
29  import java.io.InputStream;
30  import java.io.OutputStreamWriter;
31  import java.io.PrintWriter;
32  import java.io.StringWriter;
33  import java.io.UnsupportedEncodingException;
34  import java.net.HttpURLConnection;
35  import java.net.MalformedURLException;
36  import java.net.URL;
37  import java.net.URLConnection;
38  import java.net.URLEncoder;
39  import java.util.ArrayList;
40  import java.util.Collections;
41  import java.util.HashSet;
42  import java.util.Iterator;
43  import java.util.List;
44  import java.util.Set;
45  import java.util.Vector;
46  import java.util.regex.Matcher;
47  import java.util.regex.Pattern;
48  
49  import javax.xml.transform.TransformerException;
50  
51  import org.apache.commons.logging.Log;
52  import org.apache.commons.logging.LogFactory;
53  import org.apache.torque.TorqueException;
54  import org.apache.turbine.services.velocity.TurbineVelocity;
55  import org.apache.turbine.util.RunData;
56  import org.apache.velocity.context.Context;
57  import org.apache.xmlrpc.XmlRpcClient;
58  import org.apache.xmlrpc.XmlRpcException;
59  import org.apache.xpath.XPathAPI;
60  import org.w3c.dom.Document;
61  import org.w3c.dom.Node;
62  import org.w3c.dom.traversal.NodeIterator;
63  import org.w3c.tidy.Tidy;
64  
65  import net.sourceforge.blogentis.om.Blog;
66  import net.sourceforge.blogentis.om.Post;
67  import net.sourceforge.blogentis.om.PostPeer;
68  import net.sourceforge.blogentis.plugins.IPlugin;
69  import net.sourceforge.blogentis.plugins.base.AbstractPostEditExtension;
70  import net.sourceforge.blogentis.turbine.BlogRunData;
71  import net.sourceforge.blogentis.utils.AbsoluteLinkURL;
72  import net.sourceforge.blogentis.utils.JTidyService;
73  import net.sourceforge.blogentis.utils.LinkFactoryService;
74  import net.sourceforge.blogentis.utils.MappedConfiguration;
75  import net.sourceforge.blogentis.utils.StringUtils;
76  import net.sourceforge.blogentis.utils.tools.FragmentTool;
77  
78  /***
79   * @author abas
80   */
81  public class TrackbackPostEditExtension
82          extends AbstractPostEditExtension {
83      public static final Log log = LogFactory
84          .getLog(TrackbackPostEditExtension.class);
85  
86      public static final Pattern rdfPattern = Pattern
87          .compile("<rdf:RDF.*?</rdf:RDF>", Pattern.DOTALL);
88  
89      public static final Pattern tbPattern = Pattern
90          .compile("trackback:ping=\"([^\"]+)\"");
91  
92      public static final Pattern aboutPattern = Pattern
93          .compile("about=\"([^\"]+)\"");
94  
95      public static final Pattern identPattern = Pattern
96          .compile("dc:identifier=\"([^\"]+)\"");
97  
98      public static final String T_SENT = "trackback.sent";
99      public static final String T_URIS_SENT = "trackback.uris.sent";
100     public static final String T_URIS_NOT_SENT = "trackback.uris.notsent";
101     public static final String T_WEBLOGS_COM = "trackback.weblogsComSent";
102     public static final String T_BLOG_EXTRA_LINKS = "trackback.extraURLs";
103     public static final String T_BLOG_SEND = "trackback.sendTrackbacks";
104     public static final String T_BLOG_RECEIVE = "trackback.receiveTrackbacks";
105     public static final String T_BLOG_IGNORE_FROM_SELF = "trackback.ignoreBlogTrackbacks";
106     public static final String T_BLOG_IGNORE_FROM_SERVER = "trackback.ignoreServerTrackbacks";
107     public static final String T_BLOG_WEBLOGS_COM = "trackback.pingWeblogsCom";
108 
109     public TrackbackPostEditExtension(IPlugin plugin, Blog blog) {
110         super(plugin, blog);
111     }
112 
113     public String getName() {
114         return "Trackback support";
115     }
116 
117     public String buildOptionsHTML(BlogRunData data, Post post) {
118         if (post == null || post.isNew() || post.isPublished())
119             return "";
120         if (this.blog.getConfiguration().getBoolean(T_BLOG_SEND, true) == false)
121             return "";
122         Context context = TurbineVelocity.getContext(data);
123         FragmentTool f = (FragmentTool)context.get("fragmentTool");
124         try {
125             return f.getFragment("TrackbackPostEdit").invoke(context, data,
126                                                              post).build(data);
127         } catch (Exception e) {
128             return e.toString();
129         }
130     }
131 
132     public void postPublicationStatusChanged(BlogRunData data, Post post,
133                                              int oldState) {
134         if (post.getPostType() != PostPeer.PUBLISHED_TYPE)
135             return;
136         MappedConfiguration conf = this.blog.getConfiguration();
137         MappedConfiguration postConf = post.getProperties();
138 
139         if (conf.getBoolean(T_BLOG_SEND, true)
140             && data.getParameters().getString("sendTrackbacks", null) != null) {
141             processPost(post);
142             postConf.setProperty(T_SENT, Boolean.TRUE);
143         }
144         if (conf.getBoolean(T_BLOG_WEBLOGS_COM, true)
145             && data.getParameters().getString("sendWeblogsPing", null) != null) {
146             sendWeblogPing(data, post);
147             postConf.setProperty(T_WEBLOGS_COM, Boolean.TRUE);
148         }
149     }
150 
151     private Document makePostDocument(Post p) {
152         Tidy t = JTidyService.getTidy();
153         StringWriter sw = new StringWriter();
154         t.setErrout(new PrintWriter(sw));
155 
156         String contents = "<title>" + p.getTitle() + "</title>"
157                           + p.getShortDescription() + p.getFullText();
158 
159         try {
160             return t.parseDOM(new ByteArrayInputStream(contents
161                 .getBytes("utf-8")), null);
162         } catch (UnsupportedEncodingException e) {
163             log.error(e);
164             return null;
165         }
166     }
167 
168     private List getLinks(Document doc) {
169         NodeIterator i;
170         try {
171             i = XPathAPI.selectNodeIterator(doc, "//a/@href");
172         } catch (TransformerException e) {
173             log.error(e);
174             return null;
175         }
176         ArrayList l = new ArrayList();
177         Node n = null;
178         while ((n = i.nextNode()) != null) {
179             l.add(n.getNodeValue());
180         }
181         return l;
182     }
183 
184     private void processPost(Post p) {
185         Document doc = makePostDocument(p);
186         List links = getLinks(doc);
187         Set alreadySent = new HashSet(p.getProperties()
188             .getList(T_URIS_SENT, Collections.EMPTY_LIST));
189         Set notSent = new HashSet(p.getProperties()
190             .getList(T_URIS_NOT_SENT, Collections.EMPTY_LIST));
191 
192         for(Iterator i = links.iterator(); i.hasNext();) {
193             String link = (String)i.next();
194             if (!link.startsWith("https://") && !link.startsWith("http://")) {
195                 log.debug("Skipping " + link);
196                 continue;
197             }
198             log.debug("Looking at " + link + " for trackback URLs");
199             try {
200                 if (alreadySent.contains(link) || notSent.contains(link))
201                     continue;
202                 String content = fetchURL(link);
203                 String trackBackLink = getTrackbackFromHTML(content, link);
204                 if (trackBackLink != null) {
205                     if (sendTrackBackTo(trackBackLink, p))
206                         alreadySent.add(link);
207                 }
208             } finally {
209                 if (!alreadySent.contains(link)) {
210                     notSent.add(link);
211                 }
212             }
213         }
214 
215         links = this.blog.getConfiguration().getList(T_BLOG_EXTRA_LINKS,
216                                                      new ArrayList());
217         for(Iterator i = links.iterator(); i.hasNext();) {
218             String link = (String)i.next();
219             if (link.length() < 5)
220                 continue;
221             if (alreadySent.contains(link) || notSent.contains(link))
222                 continue;
223             if (sendTrackBackTo(link, p))
224                 alreadySent.add(link);
225         }
226         p.getProperties().setList(T_URIS_SENT, new ArrayList(alreadySent));
227         p.getProperties().setList(T_URIS_NOT_SENT, new ArrayList(notSent));
228     }
229 
230     private byte[] fetchURLConnection(URLConnection con)
231             throws IOException {
232         InputStream is = con.getInputStream();
233         ByteArrayOutputStream bos = new ByteArrayOutputStream();
234         int len;
235         byte[] byffer = new byte[1024];
236         do {
237             len = is.read(byffer, 0, 1024);
238             if (len > 0)
239                 bos.write(byffer, 0, len);
240         } while (len > 0);
241         return bos.toByteArray();
242     }
243 
244     private String fetchURL(String link) {
245         URL url = null;
246         try {
247             url = new URL(link);
248         } catch (MalformedURLException e) {
249             log.debug("Incorrect URL " + link, e);
250             return null;
251         }
252         try {
253             URLConnection connection = url.openConnection();
254             byte[] content = fetchURLConnection(connection);
255             // HACK, but we only need ASCII characters.
256             return new String(content, "iso8859-1");
257         } catch (IOException e1) {
258             log.debug("Could not fetch " + link, e1);
259             return null;
260         }
261     }
262 
263     private String getTrackbackFromHTML(String s, String origUrl) {
264         Matcher m = rdfPattern.matcher(s);
265         while (m.find()) {
266             String rdf = m.group();
267             Matcher tb = identPattern.matcher(rdf);
268             if (!tb.find())
269                 continue;
270             if (!origUrl.equals(tb.group(1)))
271                 continue;
272             tb = tbPattern.matcher(rdf);
273             if (tb.find())
274                 return tb.group(1);
275             tb = aboutPattern.matcher(rdf);
276             if (tb.find())
277                 return tb.group(1);
278         }
279         return null;
280     }
281 
282     private boolean sendTrackBackTo(String url, Post p) {
283         log.debug("Sending trackback to " + url);
284         try {
285             URL l = new URL(url);
286             HttpURLConnection con = (HttpURLConnection)l.openConnection();
287             con.setRequestMethod("POST");
288             con.setRequestProperty("Content-Type",
289                                    "application/x-www-form-urlencoded");
290             con.setDoOutput(true);
291             OutputStreamWriter bw = new OutputStreamWriter(
292                 new BufferedOutputStream(con.getOutputStream()), "utf-8");
293             bw.write("title=");
294             bw.write(URLEncoder.encode(p.getTitle(), "utf-8"));
295             bw.write("&url=");
296             bw.write(new AbsoluteLinkURL().permaLink(p).toString());
297             bw.write("&excerpt=");
298             bw.write(URLEncoder.encode(StringUtils.removeTags(p
299                 .getShortDescription()), "utf-8"));
300             bw.write("&blog_name=");
301             bw.write(URLEncoder.encode(p.getBlog().getTitle(), "utf-8"));
302             bw.flush();
303             bw.close();
304             // TODO: we currently ignore all results.
305             fetchURLConnection(con);
306         } catch (MalformedURLException e) {
307             log.debug("URL specified in RDF as trackback was invalid.", e);
308             return false;
309         } catch (IOException e) {
310             log.debug("Error sending trackback.", e);
311             return false;
312         } catch (TorqueException e) {
313             log.error("Torque threw an error.", e);
314             return false;
315         }
316         return true;
317     }
318 
319     private void sendWeblogPing(RunData data, Post p) {
320         try {
321             XmlRpcClient xrc = new XmlRpcClient(
322                 "http://rpc.weblogs.com:80/RPC2");
323             Vector params = new Vector(2);
324             Blog b = p.getBlog();
325             params.add(b.getTitle());
326             params.add(LinkFactoryService.getLink().permaLink(b).toString());
327             xrc.execute("weblogUpdates.ping", params);
328             p.getProperties().setProperty(T_WEBLOGS_COM, "true");
329         } catch (MalformedURLException e) {
330             log.error(e);
331         } catch (TorqueException e) {
332             log.error(e);
333         } catch (XmlRpcException e) {
334             log.error(e);
335         } catch (IOException e) {
336             log.error(e);
337         }
338     }
339 }