View Javadoc

1   /**
2    * This file Copyright (c) 2008-2011 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.importhandler;
35  
36  import static java.lang.String.format;
37  import static java.lang.String.valueOf;
38  import static org.apache.commons.lang.StringUtils.isEmpty;
39  import info.magnolia.cms.core.Content;
40  import info.magnolia.jcr.util.NodeUtil;
41  import info.magnolia.jcr.util.PropertyUtil;
42  import info.magnolia.jcr.util.VersionUtil;
43  import info.magnolia.module.data.DataConsts;
44  import info.magnolia.module.data.importer.ImportException;
45  import info.magnolia.module.data.importer.ImportHandler;
46  import info.magnolia.module.data.importer.ImportTarget;
47  import info.magnolia.module.rssaggregator.util.Assert;
48  
49  import java.util.ArrayList;
50  import java.util.Collection;
51  import java.util.Collections;
52  import java.util.Date;
53  import java.util.HashSet;
54  import java.util.List;
55  import java.util.Set;
56  
57  import javax.inject.Inject;
58  import javax.jcr.Node;
59  import javax.jcr.NodeIterator;
60  import javax.jcr.RepositoryException;
61  
62  import org.slf4j.Logger;
63  import org.slf4j.LoggerFactory;
64  
65  import com.sun.syndication.feed.synd.SyndCategory;
66  import com.sun.syndication.feed.synd.SyndContent;
67  import com.sun.syndication.feed.synd.SyndEntry;
68  import com.sun.syndication.feed.synd.SyndFeed;
69  
70  /**
71   * ImportHandler capable of importing RSS and Atom feeds over http for aggregate feeds defined in RSS Aggregator module.
72   * <p/>
73   * Allows optional configuration with a {@link RSSFeedFetcher} implementation of choice, by means of configuring the
74   * content node:
75   *
76   * <pre>
77   * /data/rssaggregator/
78   * </pre>
79   *
80   * If no custom implementation is configured, will fall back to using the default {@link SimpleRSSFeedFetcher}.
81   *
82   * @author had
83   * @author Rob van der Linden Vooren
84   */
85  public class RSSFeedImportHandler extends ImportHandler {
86  
87      private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
88  
89      private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
90  
91      private RSSFeedFetcher feedFetcher;
92      private AggregateFeedContentMapper aggregateFeedMapper;
93      private FilterPredicateContentMapper filterPredicateMapper;
94  
95      /**
96       * Creates handler with default mappers {@link AggregateFeedContentMapper} and {@link FilterPredicateContentMapper}.
97       */
98      @Inject
99      public RSSFeedImportHandler(RSSFeedFetcher feedFetcher) {
100         setAggregateFeedContentMapper(new AggregateFeedContentMapper());
101         setFilterPredicateContentMapper(new FilterPredicateContentMapper());
102         this.feedFetcher = feedFetcher;
103     }
104 
105     /** {@inheritDoc} */
106     @Override
107     protected synchronized void checkPreConditions() throws ImportException {
108         super.checkPreConditions();
109         log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
110     }
111 
112     @SuppressWarnings("unchecked")
113     @Override
114     public Set doImport(final ImportTarget target, final Content parentNode, final Set newContentUUIDs) throws ImportException {
115         try {
116             Set<AggregateFeed> aggregateFeeds = loadAggregates(parentNode.getJCRNode());
117             if (!aggregateFeeds.isEmpty()) {
118                 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(),
119                         countChannels(aggregateFeeds));
120                 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
121                 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, parentNode.getJCRNode());
122                 newContentUUIDs.addAll(newAggregateContentUUIDs);
123                 parentNode.getJCRNode().getSession().save();
124                 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
125             }
126             return newContentUUIDs;
127         } catch (Exception e) {
128             String message = format("Failed to execute import for target '%s', parent node '%s'", target, parentNode);
129             throw new ImportException(message, e);
130         }
131     }
132 
133     // Helper methods
134 
135     private int countChannels(Set<AggregateFeed> aggregateFeeds) {
136         int channelCount = 0;
137         for (AggregateFeed aggregateFeed : aggregateFeeds) {
138             channelCount += aggregateFeed.getChannels().size();
139         }
140         return channelCount;
141     }
142 
143     /**
144      * Load the {@link AggregateFeed aggregate feed} definitions and their {@link FeedChannel feed channels} from the
145      * Content Repository.
146      *
147      * @param parentNode
148      *            the parent content node that holds the aggregate feed nodes
149      * @return the aggregate feeds
150      * @throws RepositoryException
151      *             when an exception occurs accessing the Content Repository
152      */
153     @SuppressWarnings("unchecked")
154     public Set<AggregateFeed> loadAggregates(Node parentNode) throws RepositoryException {
155 
156         List<Node> nodeIterator = NodeUtil.asList(NodeUtil.getNodes(parentNode,CONTENTTYPE_RSSAGGREGATOR));
157         Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
158         for (Node aggregateNode : nodeIterator) {
159             AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
160             aggregateFeeds.add(aggregateFeed);
161         }
162         return aggregateFeeds;
163     }
164 
165     /**
166      * Save the {@link FeedChannel#feed feed entry} content contained in the {@link FeedChannel feed channels} of the
167      * given <code>aggregateFeeds</code> as childs of the given <code>parentNode</code>. If an {@link AggregateFeed} has
168      * {@link AggregateFilter} defined, feed entries must pass the filter before they will be actually saved in the
169      * Content Repository.
170      *
171      * @param parentNode
172      *            the parent content node of the aggregate feeds content to save
173      * @param aggregateFeeds
174      *            the aggregate feeds to save
175      * @return a set of UUIDs of the newly created aggregate content nodes
176      * @throws RepositoryException
177      *             when an exception occurs accessing the Content Repository
178      */
179     protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
180         Set<String> newAggregateContentUUIDs = new HashSet<String>();
181         for (AggregateFeed aggregateFeed : aggregateFeeds) {
182             Node aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
183             Node dataNode = getOrCreateNode(aggregateNode, "data", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE);
184             newAggregateContentUUIDs.add(aggregateNode.getUUID());
185             AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
186             for (FeedChannel channel : aggregateFeed.getChannels()) {
187                 if (channel.hasFeed()) {
188                     saveFeedChannel(channel, aggregateFilter, dataNode);
189                 }
190             }
191         }
192         return newAggregateContentUUIDs;
193     }
194 
195     /**
196      * Load a single aggregate content node from the given <code>parentNode</code> with the given
197      * <code>aggregateName</code>. If no such aggregate could be found, <code>null</code> is returned.
198      *
199      * @param parentNode
200      *            the parentNode to load the node from
201      * @param aggregateNodeName
202      *            the name of the aggregate content node to load
203      * @return the aggregate content node, or <code>null</code> if no such node was found
204      * @throws RepositoryException
205      * @throws IllegalStateException
206      *             when multiple aggregate content nodes with the same name are found
207      */
208 
209     protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
210         // ////////////////
211         // SHOULD BE MOVE IN NodeUtil
212         // ///////////////
213         NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
214         Collection<Node> aggregateNodes = new ArrayList<Node>();
215         while (nodeIterator.hasNext()) {
216             Node currentNode = nodeIterator.nextNode();
217             if (NodeUtil.isNodeType(currentNode, CONTENTTYPE_RSSAGGREGATOR)) {
218                 aggregateNodes.add(currentNode);
219             }
220         }
221         // ////////////////
222         // END
223         // ///////////////
224         int size = aggregateNodes.size();
225         if (size > 1) {
226             throw new IllegalStateException(format(
227                     "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
228                     parentNode, aggregateNodeName, CONTENTTYPE_RSSAGGREGATOR, size));
229         }
230         if (aggregateNodes.isEmpty()) {
231             return null;
232         }
233         return aggregateNodes.iterator().next();
234     }
235 
236     /**
237      * Behaves exactly like {@link ContentUtil#getOrCreateContent(Content, String, ItemType)}. This method exists for
238      * testability.
239      *
240      * @param contentNode
241      *            the contentNode to (create if non-existant and then) get
242      * @param name
243      *            the name of the node
244      * @param itemType
245      *            the type of the content node
246      * @return the created content node
247      * @throws RepositoryException
248      *             when an exception occurs accessing the Content Repository
249      */
250     protected Node getOrCreateNode(Node contentNode, String name, String itemType) throws RepositoryException {
251         return NodeUtil.createPath(contentNode, name, itemType ,true);
252     }
253 
254     /**
255      * Load the {@link AggregateFilter} for the {@link AggregateFeed} which is represented by the given
256      * <code>aggregateNode</code>. Only
257      *
258      * @param aggregateNode
259      *            the content node representing the AggregateFeed to load the AggregateFilter for
260      * @return the aggregate filter
261      * @throws RepositoryException
262      *             when an exception occurs accessing the Content Repository
263      */
264     public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
265         Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
266         if (filtersNode == null) {
267             return new AggregateFilter(Collections.<FilterPredicate> emptySet());
268         }
269         Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
270         List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode,VersionUtil.getNodeTypeName(filtersNode)));
271 
272         for (Node n:filterNodes) {
273             FilterPredicate filterPredicate = filterPredicateMapper.map(n);
274             if (filterPredicate == null) {
275                 continue;
276             }
277             filters.add(filterPredicate);
278         }
279         return new AggregateFilter(filters);
280     }
281 
282     /**
283      * Save the {@link SyndFeed#getEntries() entries} contained {@link FeedChannel#feed in} the given
284      * {@link FeedChannel} that pass the given {@link AggregateFilter} in the provided <code>dataNode</code>.
285      *
286      * @param dataNode
287      *            the content node to store the feed content under
288      * @param feedChannel
289      *            the feed channel to save
290      * @param aggregateFilter
291      *            the aggregate filter to apply to entries in the feed channel
292      * @throws RepositoryException
293      *             when an exception occurs accessing the Content Repository
294      */
295     @SuppressWarnings("unchecked")
296     protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
297         Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
298         List<SyndEntry> entries = feedChannel.getFeed().getEntries();
299         int size = entries.size();
300         for (int i = 0; i < size; i++) {
301             SyndEntry entry = entries.get(i);
302             String entryName = format("entry-%s", i);
303             if (aggregateFilter.include(entry)) {
304                 createFeedChannelEntryNode(entry, entryName, channelNode);
305             }
306         }
307         return channelNode;
308     }
309 
310     /**
311      * Recreate the feed channel content node the given feed channel in the Content Repository.
312      *
313      * @param dataNode
314      *            the node to store the feed channel under
315      * @param feedChannel
316      *            the feed channel to recreate
317      * @return the created feed channel content node
318      * @throws RepositoryException
319      *             when an exception occurs accessing the Content Repository
320      */
321     protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
322         String channelName = feedChannel.getName();
323         if (dataNode.hasNode(channelName)) {
324             String absPath = dataNode.getNode(channelName).getPath();
325             dataNode.getSession().removeItem(absPath);
326         }
327         Node channelNode = NodeUtil.createPath(dataNode, channelName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
328 
329         SyndFeed feed = feedChannel.getFeed();
330         channelNode.setProperty("description", feed.getDescription()); // 'My Blog'
331         channelNode.setProperty("link", feed.getLink()); // 'http://domain.com'
332         channelNode.setProperty("rss", feedChannel.getUrl()); // 'http://domain.com/channel.rss'
333         channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
334         channelNode.setProperty("type", feed.getFeedType()); // 'rss_2.0'
335         return channelNode;
336     }
337 
338     /**
339      * Create a feed channel entry node under the given <code>channelNode</code> with the given <code>nodeName</code>
340      * for the given <code>entry</code>.
341      *
342      * @param entry
343      *            the feed channel entry to save
344      * @param nodeName
345      *            the name of the feed channel entry node to create
346      * @param channelNode
347      *            the feed channel content node to create the feed channel entry under
348      * @throws RepositoryException
349      *             when an exception occurs accessing the Content Repository
350      */
351     protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
352         Node entryNode = NodeUtil.createPath(channelNode, nodeName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
353         entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
354         entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode,"title"));
355         final SyndContent description = entry.getDescription();
356         String descriptionString = null;
357         if (description != null) {
358             descriptionString = description.getValue();
359         } else {
360             @SuppressWarnings("unchecked")
361             final List<SyndContent> contents = entry.getContents();
362             for (int i = 0; i < contents.size(); i++) {
363                 SyndContent content = contents.get(i);
364                 if ("html".equals(content.getType())) {
365                     descriptionString = content.getValue();
366                     break;
367                 }
368             }
369         }
370         if (descriptionString == null) {
371             descriptionString = "";
372         }
373         entryNode.setProperty("description", descriptionString);
374         entryNode.setProperty("link", entry.getLink());
375         Date publishedDate = entry.getPublishedDate();
376         if (publishedDate == null) {
377             publishedDate = new Date();
378         }
379         entryNode.setProperty("pubDate", publishedDate.getTime());
380         entryNode.setProperty("title", entry.getTitle());
381 
382         createCategoriesNode(entry, entryNode);
383         return entryNode;
384     }
385 
386     @SuppressWarnings("unchecked")
387     protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
388         Node categoriesNode = NodeUtil.createPath(entryNode, "categories", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
389         List<SyndCategory> categories = entry.getCategories();
390         for (int i = 0; i < categories.size(); i++) {
391             SyndCategory category = categories.get(i);
392             String categoryIndex = valueOf(i);
393             String categoryName = category.getName();
394             categoriesNode.setProperty(categoryIndex, categoryName);
395         }
396         return categoriesNode;
397     }
398 
399     // Getters & setters
400 
401     /** for testing. */
402     protected AggregateFeedContentMapper setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
403         Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
404         this.aggregateFeedMapper = aggregateFeedMapper;
405         return this.aggregateFeedMapper;
406     }
407 
408     /** for testing. */
409     public RSSFeedFetcher setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
410         Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
411         this.feedFetcher = rssFeedFetcher;
412         return this.feedFetcher;
413     }
414 
415 
416     /** for testing. */
417     protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
418         Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
419         this.filterPredicateMapper = filterPredicateMapper;
420         return this.filterPredicateMapper;
421     }
422 
423     // /** for testing. */
424     // protected void setLogger(Logger logger) {
425     // Assert.notNull(logger, "'logger' must not be null");
426     // this.log = logger;
427     // }
428 }