View Javadoc

1   /**
2    * This file Copyright (c) 2008-2013 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.importhandler;
35  
36  import static java.lang.String.*;
37  import static org.apache.commons.lang.StringUtils.isEmpty;
38  
39  import info.magnolia.cms.core.Content;
40  import info.magnolia.jcr.util.NodeTypes;
41  import info.magnolia.jcr.util.NodeUtil;
42  import info.magnolia.jcr.util.NodeVisitor;
43  import info.magnolia.jcr.util.PropertyUtil;
44  import info.magnolia.jcr.util.VersionUtil;
45  import info.magnolia.module.data.importer.ImportException;
46  import info.magnolia.module.data.importer.ImportHandler;
47  import info.magnolia.module.data.importer.ImportTarget;
48  import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
49  import info.magnolia.module.rssaggregator.util.Assert;
50  
51  import java.util.ArrayList;
52  import java.util.Collection;
53  import java.util.Collections;
54  import java.util.Date;
55  import java.util.HashSet;
56  import java.util.List;
57  import java.util.Set;
58  
59  import javax.inject.Inject;
60  import javax.jcr.Node;
61  import javax.jcr.NodeIterator;
62  import javax.jcr.RepositoryException;
63  
64  import org.apache.commons.lang.StringUtils;
65  import org.apache.jackrabbit.commons.predicate.Predicate;
66  import org.slf4j.Logger;
67  import org.slf4j.LoggerFactory;
68  
69  import com.sun.syndication.feed.synd.SyndCategory;
70  import com.sun.syndication.feed.synd.SyndContent;
71  import com.sun.syndication.feed.synd.SyndEntry;
72  import com.sun.syndication.feed.synd.SyndFeed;
73  
74  /**
75   * ImportHandler capable of importing RSS and Atom feeds over http for aggregate feeds defined in RSS Aggregator module.
76   * <p/>
77   * Allows optional configuration with a {@link RSSFeedFetcher} implementation of choice, by means of configuring the node:
78   *
79   * <pre>
80   * /data/config/importers/rssaggregator
81   * </pre>
82   *
83   * If no custom implementation is configured, will fall back to using the default {@link SimpleRSSFeedFetcher}.
84   *
85   * @author had
86   * @author Rob van der Linden Vooren
87   */
88  public class RSSFeedImportHandler extends ImportHandler {
89  
90      private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
91  
92      private RSSFeedFetcher feedFetcher;
93      private AggregateFeedContentMapper aggregateFeedMapper;
94      private FilterPredicateContentMapper filterPredicateMapper;
95  
96      /**
97       * Creates handler with default mappers {@link AggregateFeedContentMapper} and {@link FilterPredicateContentMapper}.
98       */
99      @Inject
100     public RSSFeedImportHandler(RSSFeedFetcher feedFetcher) {
101         setAggregateFeedContentMapper(new AggregateFeedContentMapper());
102         setFilterPredicateContentMapper(new FilterPredicateContentMapper());
103         this.feedFetcher = feedFetcher;
104     }
105 
106 
107     @Override
108     protected synchronized void checkPreConditions() throws ImportException {
109         super.checkPreConditions();
110         log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
111     }
112 
113     @SuppressWarnings("unchecked")
114     @Override
115     public Set doImport(final ImportTarget target, Content root, final Set newContentUUIDs) throws ImportException {
116         try {
117             NodeUtil.visit(root.getJCRNode(), new NodeVisitor() {
118                 @Override
119                 public void visit(Node node) throws RepositoryException {
120                     Set<AggregateFeed> aggregateFeeds = loadAggregates(node);
121                     if (!aggregateFeeds.isEmpty()) {
122                         log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(), countChannels(aggregateFeeds));
123                         Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
124                         Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, node);
125                         newContentUUIDs.addAll(newAggregateContentUUIDs);
126                         node.getSession().save();
127                         log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
128                     }
129                 }
130             }, new IsRootOrFolder());
131         } catch (RepositoryException e) {
132             String message = format("Failed to execute import for target '%s', parent node '%s'", target, root);
133             throw new ImportException(message, e);
134         }
135         return newContentUUIDs;
136     }
137 
138     // Helper methods
139 
140     private int countChannels(Set<AggregateFeed> aggregateFeeds) {
141         int channelCount = 0;
142         for (AggregateFeed aggregateFeed : aggregateFeeds) {
143             channelCount += aggregateFeed.getChannels().size();
144         }
145         return channelCount;
146     }
147 
148     /**
149      * Load the {@link AggregateFeed aggregate feed} definitions and their {@link FeedChannel feed channels} from the
150      * Content Repository.
151      *
152      * @param parentNode the parent content node that holds the aggregate feed nodes
153      * @return the aggregate feeds
154      * @throws RepositoryException when an exception occurs accessing the Content Repository
155      */
156     public Set<AggregateFeed> loadAggregates(Node parentNode) throws RepositoryException {
157 
158         List<Node> nodeIterator = NodeUtil.asList(NodeUtil.getNodes(parentNode, RSSAggregatorNodeTypes.RSSAggregator.NAME));
159         Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
160         for (Node aggregateNode : nodeIterator) {
161             AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
162             aggregateFeeds.add(aggregateFeed);
163         }
164         return aggregateFeeds;
165     }
166 
167     /**
168      * Save the {@link FeedChannel#feed feed entry} content contained in the {@link FeedChannel feed channels} of the
169      * given <code>aggregateFeeds</code> as children of the given <code>parentNode</code>. If an {@link AggregateFeed} has
170      * {@link AggregateFilter} defined, feed entries must pass the filter before they will be actually saved in the
171      * Content Repository.
172      *
173      * @param parentNode the parent content node of the aggregate feeds content to save
174      * @param aggregateFeeds the aggregate feeds to save
175      * @return a set of UUIDs of the newly created aggregate content nodes
176      * @throws RepositoryException when an exception occurs accessing the Content Repository
177      */
178     protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
179         Set<String> newAggregateContentUUIDs = new HashSet<String>();
180         for (AggregateFeed aggregateFeed : aggregateFeeds) {
181             Node aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
182             Node dataNode = getOrCreateNode(aggregateNode, "data", NodeTypes.Content.NAME);
183             newAggregateContentUUIDs.add(aggregateNode.getUUID());
184             AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
185             for (FeedChannel channel : aggregateFeed.getChannels()) {
186                 if (channel.hasFeed()) {
187                     saveFeedChannel(channel, aggregateFilter, dataNode);
188                 }
189             }
190         }
191         return newAggregateContentUUIDs;
192     }
193 
194     /**
195      * Load a single aggregate content node from the given <code>parentNode</code> with the given
196      * <code>aggregateName</code>. If no such aggregate could be found, <code>null</code> is returned.
197      *
198      * @param parentNode the parentNode to load the node from
199      * @param aggregateNodeName the name of the aggregate content node to load
200      * @return the aggregate content node, or <code>null</code> if no such node was found
201      * @throws IllegalStateException when multiple aggregate content nodes with the same name are found
202      */
203 
204     protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
205         // ////////////////
206         // SHOULD BE MOVE IN NodeUtil
207         // ///////////////
208         NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
209         Collection<Node> aggregateNodes = new ArrayList<Node>();
210         while (nodeIterator.hasNext()) {
211             Node currentNode = nodeIterator.nextNode();
212             if (NodeUtil.isNodeType(currentNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
213                 aggregateNodes.add(currentNode);
214             }
215         }
216         // ////////////////
217         // END
218         // ///////////////
219         int size = aggregateNodes.size();
220         if (size > 1) {
221             throw new IllegalStateException(format(
222                     "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
223                     parentNode, aggregateNodeName, RSSAggregatorNodeTypes.RSSAggregator.NAME, size));
224         }
225         if (aggregateNodes.isEmpty()) {
226             return null;
227         }
228         return aggregateNodes.iterator().next();
229     }
230 
231     /**
232      * Behaves exactly like {@link NodeUtil#createPath(Node contentNode, String name, String itemType, boolean save)}. This method exists for
233      * testability.
234      *
235      * @param contentNode the contentNode to (create if non-existent and then) get
236      * @param name the name of the node
237      * @param itemType the type of the content node
238      * @return the created content node
239      * @throws RepositoryException when an exception occurs accessing the Content Repository
240      */
241     protected Node getOrCreateNode(Node contentNode, String name, String itemType) throws RepositoryException {
242         return NodeUtil.createPath(contentNode, name, itemType, true);
243     }
244 
245     /**
246      * Load the {@link AggregateFilter} for the {@link AggregateFeed} which is represented by the given
247      * <code>aggregateNode</code>. Only
248      *
249      * @param aggregateNode the content node representing the AggregateFeed to load the AggregateFilter for
250      * @return the aggregate filter
251      * @throws RepositoryException when an exception occurs accessing the Content Repository
252      */
253     public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
254         Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
255         if (filtersNode == null) {
256             return new AggregateFilter(Collections.<FilterPredicate>emptySet());
257         }
258         Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
259         List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
260 
261         for (Node n : filterNodes) {
262             FilterPredicate filterPredicate = filterPredicateMapper.map(n);
263             if (filterPredicate == null) {
264                 continue;
265             }
266             filters.add(filterPredicate);
267         }
268         return new AggregateFilter(filters);
269     }
270 
271     /**
272      * Save the {@link SyndFeed#getEntries() entries} contained {@link FeedChannel#feed in} the given
273      * {@link FeedChannel} that pass the given {@link AggregateFilter} in the provided <code>dataNode</code>.
274      *
275      * @param dataNode the content node to store the feed content under
276      * @param feedChannel the feed channel to save
277      * @param aggregateFilter the aggregate filter to apply to entries in the feed channel
278      * @throws RepositoryException when an exception occurs accessing the Content Repository
279      */
280     @SuppressWarnings("unchecked")
281     protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
282         Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
283         List<SyndEntry> entries = feedChannel.getFeed().getEntries();
284         int size = entries.size();
285         for (int i = 0; i < size; i++) {
286             SyndEntry entry = entries.get(i);
287             String entryName = format("entry-%s", i);
288             if (aggregateFilter.include(entry)) {
289                 createFeedChannelEntryNode(entry, entryName, channelNode);
290             }
291         }
292         return channelNode;
293     }
294 
295     /**
296      * Recreate the feed channel content node the given feed channel in the Content Repository.
297      *
298      * @param dataNode the node to store the feed channel under
299      * @param feedChannel the feed channel to recreate
300      * @return the created feed channel content node
301      * @throws RepositoryException when an exception occurs accessing the Content Repository
302      */
303     protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
304         String channelName = feedChannel.getName();
305         if (dataNode.hasNode(channelName)) {
306             String absPath = dataNode.getNode(channelName).getPath();
307             dataNode.getSession().removeItem(absPath);
308         }
309         Node channelNode = NodeUtil.createPath(dataNode, channelName, NodeTypes.Content.NAME, true);
310 
311         SyndFeed feed = feedChannel.getFeed();
312         channelNode.setProperty("description", feed.getDescription()); // 'My Blog'
313         channelNode.setProperty("link", feed.getLink()); // 'http://domain.com'
314         channelNode.setProperty("rss", feedChannel.getUrl()); // 'http://domain.com/channel.rss'
315         channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
316         channelNode.setProperty("type", feed.getFeedType()); // 'rss_2.0'
317         return channelNode;
318     }
319 
320     /**
321      * Create a feed channel entry node under the given <code>channelNode</code> with the given <code>nodeName</code>
322      * for the given <code>entry</code>.
323      *
324      * @param entry the feed channel entry to save
325      * @param nodeName the name of the feed channel entry node to create
326      * @param channelNode the feed channel content node to create the feed channel entry under
327      * @throws RepositoryException when an exception occurs accessing the Content Repository
328      */
329     protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
330         Node entryNode = NodeUtil.createPath(channelNode, nodeName, NodeTypes.Content.NAME, true);
331         entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
332         entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode, "title"));
333         final SyndContent description = entry.getDescription();
334 
335         String descriptionString;
336         if (description != null && StringUtils.isNotBlank(description.getValue())) {
337             descriptionString = description.getValue();
338         } else {
339             descriptionString = getEntryContent(entry);
340         }
341 
342         entryNode.setProperty("description", descriptionString);
343         entryNode.setProperty("content", getEntryContent(entry));
344         entryNode.setProperty("link", entry.getLink());
345         Date publishedDate = entry.getPublishedDate();
346         if (publishedDate == null) {
347             publishedDate = new Date();
348         }
349         entryNode.setProperty("pubDate", publishedDate.getTime());
350         entryNode.setProperty("title", entry.getTitle());
351 
352         createCategoriesNode(entry, entryNode);
353         return entryNode;
354     }
355 
356     /**
357      * Retrieves the main content of an entry.
358      *
359      * @param entry Feed entry holding all data.
360      * @return Entry content as String or empty String if no content is available.
361      */
362     protected String getEntryContent(SyndEntry entry) {
363         String entryContent = "";
364 
365         if (entry != null && entry.getContents().size() > 0) {
366             @SuppressWarnings("unchecked")
367             final List<SyndContent> contents = entry.getContents();
368             for (SyndContent content : contents) {
369                 if (StringUtils.equalsIgnoreCase("html", content.getType()) && StringUtils.isNotBlank(content.getType())) {
370                     entryContent = content.getValue();
371                     break;
372                 }
373             }
374         }
375         return entryContent;
376     }
377 
378     @SuppressWarnings("unchecked")
379     protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
380         Node categoriesNode = NodeUtil.createPath(entryNode, "categories", NodeTypes.Content.NAME, true);
381         List<SyndCategory> categories = entry.getCategories();
382         for (int i = 0; i < categories.size(); i++) {
383             SyndCategory category = categories.get(i);
384             String categoryIndex = valueOf(i);
385             String categoryName = category.getName();
386             categoriesNode.setProperty(categoryIndex, categoryName);
387         }
388         return categoriesNode;
389     }
390 
391     // Getters & setters
392 
393     /**
394      * for testing.
395      */
396     protected AggregateFeedContentMapper setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
397         Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
398         this.aggregateFeedMapper = aggregateFeedMapper;
399         return this.aggregateFeedMapper;
400     }
401 
402     /**
403      * for testing.
404      */
405     public RSSFeedFetcher setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
406         Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
407         this.feedFetcher = rssFeedFetcher;
408         return this.feedFetcher;
409     }
410 
411 
412     /**
413      * for testing.
414      */
415     protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
416         Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
417         this.filterPredicateMapper = filterPredicateMapper;
418         return this.filterPredicateMapper;
419     }
420 
421     private static class IsRootOrFolder implements Predicate {
422 
423         public IsRootOrFolder() {
424         }
425 
426         @Override
427         public boolean evaluate(Object object) {
428             if (object instanceof Node) {
429                 Node node = (Node)object;
430                 try {
431                     return NodeUtil.isNodeType(node, NodeTypes.Folder.NAME) || "/".equals(node.getPath());
432                 } catch (RepositoryException e) {
433                     log.warn("Failed to check predicate on node: " + NodeUtil.getPathIfPossible(node));
434                 }
435             }
436             return false;
437         }
438     }
439 }