View Javadoc

1   /**
2    * This file Copyright (c) 2008-2011 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.importhandler;
35  
36  import com.sun.syndication.feed.synd.SyndCategory;
37  import com.sun.syndication.feed.synd.SyndContent;
38  import com.sun.syndication.feed.synd.SyndEntry;
39  import com.sun.syndication.feed.synd.SyndFeed;
40  import info.magnolia.cms.core.Content;
41  import info.magnolia.cms.core.ItemType;
42  import info.magnolia.cms.util.ContentUtil;
43  import info.magnolia.cms.util.FactoryUtil;
44  import info.magnolia.module.data.DataConsts;
45  import info.magnolia.module.data.importer.ImportException;
46  import info.magnolia.module.data.importer.ImportHandler;
47  import info.magnolia.module.data.importer.ImportTarget;
48  import info.magnolia.module.rssaggregator.util.Assert;
49  import static org.apache.commons.lang.StringUtils.*;
50  import org.slf4j.Logger;
51  import org.slf4j.LoggerFactory;
52  
53  import javax.jcr.RepositoryException;
54  import java.util.Collection;
55  import java.util.Collections;
56  import java.util.Date;
57  import java.util.HashSet;
58  import java.util.List;
59  import java.util.Set;
60  
61  import static java.lang.String.*;
62  
63  /**
64   * ImportHandler capable of importing RSS and Atom feeds over http for aggregate feeds defined in RSS Aggregator module.
65   * <p/> Allows optional configuration with a {@link RSSFeedFetcher} implementation of choice, by means of configuring
66   * the content node:
67   * <pre>
68   * /data/rssaggregator/
69   * </pre>
70   * If no custom implementation is configured, will fall back to using the default {@link SimpleRSSFeedFetcher}.
71   *
72   * @author had
73   * @author Rob van der Linden Vooren
74   */
75  public class RSSFeedImportHandler extends ImportHandler {
76  
77      private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
78  
79      private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
80  
81      private RSSFeedFetcher feedFetcher;
82      private AggregateFeedContentMapper aggregateFeedMapper;
83      private FilterPredicateContentMapper filterPredicateMapper;
84  
85  
86      /**
87       * Creates handler with default mappers {@link AggregateFeedContentMapper} and {@link FilterPredicateContentMapper}.
88       */
89      public RSSFeedImportHandler() {
90          setAggregateFeedContentMapper(new AggregateFeedContentMapper());
91          setFilterPredicateContentMapper(new FilterPredicateContentMapper());
92      }
93  
94      /** {@inheritDoc} */
95      @Override
96      protected synchronized void checkPreConditions() throws ImportException {
97          super.checkPreConditions();
98          if (feedFetcher == null) {
99              // let factory for the default mapping of interface implementation
100             feedFetcher = (RSSFeedFetcher) FactoryUtil.newInstance(RSSFeedFetcher.class);
101         }
102         log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
103     }
104 
105     @SuppressWarnings("unchecked")
106     public Set doImport(final ImportTarget target, final Content parentNode, final Set newContentUUIDs) throws ImportException {
107         try {
108             Set<AggregateFeed> aggregateFeeds = loadAggregates(parentNode);
109             if (!aggregateFeeds.isEmpty()) {
110                 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(),
111                         countChannels(aggregateFeeds));
112                 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
113                 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, parentNode);
114                 newContentUUIDs.addAll(newAggregateContentUUIDs);
115                 parentNode.save();
116                 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
117             }
118             return newContentUUIDs;
119         } catch (Exception e) {
120             String message = format("Failed to execute import for target '%s', parent node '%s'", target, parentNode);
121             throw new ImportException(message, e);
122         }
123     }
124 
125     //    Helper methods
126 
127     private int countChannels(Set<AggregateFeed> aggregateFeeds) {
128         int channelCount = 0;
129         for (AggregateFeed aggregateFeed : aggregateFeeds) {
130             channelCount += aggregateFeed.getChannels().size();
131         }
132         return channelCount;
133     }
134 
135     /**
136      * Load the {@link AggregateFeed aggregate feed} definitions and their {@link FeedChannel feed channels} from the
137      * Content Repository.
138      *
139      * @param parentNode the parent content node that holds the aggregate feed nodes
140      * @return the aggregate feeds
141      * @throws RepositoryException when an exception occurs accessing the Content Repository
142      */
143     @SuppressWarnings("unchecked")
144     public Set<AggregateFeed> loadAggregates(Content parentNode) throws RepositoryException {
145         Collection<Content> aggregateNodes = parentNode.getChildren(CONTENTTYPE_RSSAGGREGATOR);
146         Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
147         for (Content aggregateNode : aggregateNodes) {
148             AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
149             aggregateFeeds.add(aggregateFeed);
150         }
151         return aggregateFeeds;
152     }
153 
154     /**
155      * Save the {@link FeedChannel#feed feed entry} content contained in the {@link FeedChannel feed channels} of the
156      * given <code>aggregateFeeds</code> as childs of the given <code>parentNode</code>. If an {@link AggregateFeed} has
157      * {@link AggregateFilter} defined, feed entries must pass the filter before they will be actually saved in the
158      * Content Repository.
159      *
160      * @param parentNode the parent content node of the aggregate feeds content to save
161      * @param aggregateFeeds the aggregate feeds to save
162      * @return a set of UUIDs of the newly created aggregate content nodes
163      * @throws RepositoryException when an exception occurs accessing the Content Repository
164      */
165     protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Content parentNode) throws RepositoryException {
166         Set<String> newAggregateContentUUIDs = new HashSet<String>();
167         for (AggregateFeed aggregateFeed : aggregateFeeds) {
168             Content aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
169             Content dataNode = getOrCreateContent(aggregateNode, "data", new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
170             newAggregateContentUUIDs.add(aggregateNode.getUUID());
171             AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
172             for (FeedChannel channel : aggregateFeed.getChannels()) {
173                 if (channel.hasFeed()) {
174                     saveFeedChannel(channel, aggregateFilter, dataNode);
175                 }
176             }
177         }
178         return newAggregateContentUUIDs;
179     }
180 
181     /**
182      * Load a single aggregate content node from the given <code>parentNode</code> with the given
183      * <code>aggregateName</code>. If no such aggregate could be found, <code>null</code> is returned.
184      *
185      * @param parentNode the parentNode to load the node from
186      * @param aggregateNodeName the name of the aggregate content node to load
187      * @return the aggregate content node, or <code>null</code> if no such node was found
188      * @throws IllegalStateException when multiple aggregate content nodes with the same name are found
189      */
190     @SuppressWarnings("unchecked")
191     protected Content loadSingleAggregateNode(Content parentNode, String aggregateNodeName) {
192         Collection<Content> aggregateNodes = parentNode.getChildren(CONTENTTYPE_RSSAGGREGATOR, aggregateNodeName);
193         int size = aggregateNodes.size();
194         if (size > 1) {
195             throw new IllegalStateException(format("Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
196                     parentNode, aggregateNodeName, CONTENTTYPE_RSSAGGREGATOR, size));
197         }
198         if (aggregateNodes.isEmpty()) {
199             return null;
200         }
201         return aggregateNodes.iterator().next();
202     }
203 
204     /**
205      * Behaves exactly like {@link ContentUtil#getOrCreateContent(Content, String, ItemType)}. This method exists for
206      * testability.
207      *
208      * @param contentNode the contentNode to (create if non-existant and then) get
209      * @param name the name of the node
210      * @param itemType the type of the content node
211      * @return the created content node
212      * @throws RepositoryException when an exception occurs accessing the Content Repository
213      */
214     protected Content getOrCreateContent(Content contentNode, String name, ItemType itemType) throws RepositoryException {
215         return ContentUtil.getOrCreateContent(contentNode, name, itemType);
216     }
217 
218     /**
219      * Load the {@link AggregateFilter} for the {@link AggregateFeed} which is represented by the given
220      * <code>aggregateNode</code>. Only
221      *
222      * @param aggregateNode the content node representing the AggregateFeed to load the AggregateFilter for
223      * @return the aggregate filter
224      * @throws RepositoryException when an exception occurs accessing the Content Repository
225      */
226     @SuppressWarnings("unchecked")
227     public AggregateFilter loadAggregateFilter(Content aggregateNode) throws RepositoryException {
228         Content filtersNode = aggregateNode.hasContent("filters") ? aggregateNode.getContent("filters") : null;
229         if (filtersNode == null) {
230             return new AggregateFilter(Collections.<FilterPredicate>emptySet());
231         }
232         Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
233         Collection<Content> filterNodes = filtersNode.getChildren();
234         for (Content filterNode : filterNodes) {
235             FilterPredicate filterPredicate = filterPredicateMapper.map(filterNode);
236             if (filterPredicate == null) {
237                 continue;
238             }
239             filters.add(filterPredicate);
240         }
241         return new AggregateFilter(filters);
242     }
243 
244     /**
245      * Save the {@link SyndFeed#getEntries() entries} contained {@link FeedChannel#feed in} the given {@link
246      * FeedChannel} that pass the given {@link AggregateFilter} in the provided <code>dataNode</code>.
247      *
248      * @param dataNode the content node to store the feed content under
249      * @param feedChannel the feed channel to save
250      * @param aggregateFilter the aggregate filter to apply to entries in the feed channel
251      * @throws RepositoryException when an exception occurs accessing the Content Repository
252      */
253     @SuppressWarnings("unchecked")
254     protected void saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Content dataNode) throws RepositoryException {
255         Content channelNode = recreateFeedChannelNode(feedChannel, dataNode);
256         List<SyndEntry> entries = feedChannel.getFeed().getEntries();
257         int size = entries.size();
258         for (int i = 0; i < size; i++) {
259             SyndEntry entry = entries.get(i);
260             String entryName = format("entry-%s", i);
261             if (aggregateFilter.include(entry)) {
262                 createFeedChannelEntryNode(entry, entryName, channelNode);
263             }
264         }
265     }
266 
267     /**
268      * Recreate the feed channel content node the given feed channel in the Content Repository.
269      *
270      * @param dataNode the node to store the feed channel under
271      * @param feedChannel the feed channel to recreate
272      * @return the created feed channel content node
273      * @throws RepositoryException when an exception occurs accessing the Content Repository
274      */
275     protected Content recreateFeedChannelNode(FeedChannel feedChannel, Content dataNode) throws RepositoryException {
276         String channelName = feedChannel.getName();
277         if (dataNode.hasContent(channelName)) {
278             dataNode.delete(channelName);
279         }
280         Content channelNode = dataNode.createContent(channelName, new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
281         SyndFeed feed = feedChannel.getFeed();
282         channelNode.createNodeData("description", feed.getDescription()); // 'My Blog'
283         channelNode.createNodeData("link", feed.getLink()); // 'http://domain.com'
284         channelNode.createNodeData("rss", feedChannel.getUrl()); // 'http://domain.com/channel.rss'
285         channelNode.createNodeData("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
286         channelNode.createNodeData("type", feed.getFeedType()); // 'rss_2.0'
287         return channelNode;
288     }
289 
290     /**
291      * Create a feed channel entry node under the given <code>channelNode</code> with the given <code>nodeName</code> for
292      * the given <code>entry</code>.
293      *
294      * @param entry the feed channel entry to save
295      * @param nodeName the name of the feed channel entry node to create
296      * @param channelNode the feed channel content node to create the feed channel entry under
297      * @throws RepositoryException when an exception occurs accessing the Content Repository
298      */
299     protected void createFeedChannelEntryNode(SyndEntry entry, String nodeName, Content channelNode) throws RepositoryException {
300         Content entryNode = channelNode.createContent(nodeName, new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
301         entryNode.createNodeData("author", entry.getAuthor() == null ? "" : entry.getAuthor());
302         entryNode.createNodeData("channelTitle", channelNode.getNodeData("title").getString());
303         final SyndContent description = entry.getDescription();
304         String descriptionString = null;
305         if (description != null) {
306             descriptionString = description.getValue();
307         } else {
308             @SuppressWarnings("unchecked")
309             final List<SyndContent> contents = entry.getContents();
310             for (int i = 0; i < contents.size(); i++) {
311                 SyndContent content = contents.get(i);
312                 if ("html".equals(content.getType())) {
313                     descriptionString = content.getValue();
314                     break;
315                 }
316             }
317         }
318         if (descriptionString == null) {
319             descriptionString = "";
320         }
321         entryNode.createNodeData("description", descriptionString);
322         entryNode.createNodeData("link", entry.getLink());
323         Date publishedDate = entry.getPublishedDate();
324         if (publishedDate == null) {
325             publishedDate = new Date();
326         }
327         entryNode.createNodeData("pubDate", publishedDate.getTime());
328         entryNode.createNodeData("title", entry.getTitle());
329 
330         createCategoriesNode(entry, entryNode);
331     }
332 
333     @SuppressWarnings("unchecked")
334     protected void createCategoriesNode(SyndEntry entry, Content entryNode) throws RepositoryException {
335         Content categoriesNode = entryNode.createContent("categories", new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
336         List<SyndCategory> categories = entry.getCategories();
337         for (int i = 0; i < categories.size(); i++) {
338             SyndCategory category = categories.get(i);
339             String categoryIndex = valueOf(i);
340             String categoryName = category.getName();
341             categoriesNode.createNodeData(categoryIndex, categoryName);
342         }
343     }
344 
345     //  Getters & setters
346 
347     public void setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
348         Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
349         this.feedFetcher = rssFeedFetcher;
350     }
351 
352     /** for testing. */
353     protected void setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
354         Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
355         this.aggregateFeedMapper = aggregateFeedMapper;
356     }
357 
358     /** for testing. */
359     protected void setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
360         Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
361         this.filterPredicateMapper = filterPredicateMapper;
362     }
363 
364 //    /** for testing. */
365 //    protected void setLogger(Logger logger) {
366 //        Assert.notNull(logger, "'logger' must not be null");
367 //        this.log = logger;
368 //    }
369 }