View Javadoc
1   /**
2    * This file Copyright (c) 2014-2017 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.command;
35  
36  import static java.lang.String.*;
37  import static org.apache.commons.lang3.StringUtils.isEmpty;
38  
39  import info.magnolia.commands.MgnlCommand;
40  import info.magnolia.context.Context;
41  import info.magnolia.context.MgnlContext;
42  import info.magnolia.jcr.util.NodeTypes;
43  import info.magnolia.jcr.util.NodeUtil;
44  import info.magnolia.jcr.util.PropertyUtil;
45  import info.magnolia.module.ModuleRegistry;
46  import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
47  import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
48  import info.magnolia.module.rssaggregator.RSSJob;
49  import info.magnolia.module.rssaggregator.importhandler.AggregateFeed;
50  import info.magnolia.module.rssaggregator.importhandler.AggregateFeedContentMapper;
51  import info.magnolia.module.rssaggregator.importhandler.AggregateFilter;
52  import info.magnolia.module.rssaggregator.importhandler.FeedChannel;
53  import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
54  import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
55  import info.magnolia.module.scheduler.SchedulerModule;
56  import info.magnolia.objectfactory.Components;
57  
58  import java.util.ArrayList;
59  import java.util.Collection;
60  import java.util.Collections;
61  import java.util.Date;
62  import java.util.HashSet;
63  import java.util.LinkedHashSet;
64  import java.util.List;
65  import java.util.Set;
66  
67  import javax.jcr.Node;
68  import javax.jcr.NodeIterator;
69  import javax.jcr.RepositoryException;
70  
71  import org.apache.commons.lang3.StringUtils;
72  import org.quartz.SchedulerException;
73  
74  import com.rometools.rome.feed.synd.SyndCategory;
75  import com.rometools.rome.feed.synd.SyndContent;
76  import com.rometools.rome.feed.synd.SyndEntry;
77  import com.rometools.rome.feed.synd.SyndFeed;
78  
79  /**
80   * Starts an update on specific RSS aggregator.
81   */
82  public class LaunchSingleRSSCommand extends MgnlCommand {
83  
84      private FilterPredicateContentMapper filterPredicateMapper;
85  
86      private RSSJob job;
87  
88      public LaunchSingleRSSCommand() {
89      }
90  
91      public LaunchSingleRSSCommand(RSSJob job) {
92          this.job = job;
93      }
94  
95      private void init() {
96          setFilterPredicateContentMapper(new FilterPredicateContentMapper());
97      }
98  
99      @Override
100     public boolean execute(Context context) throws Exception {
101         init();
102         Set<String> newContentUUIDs = new LinkedHashSet<String>();
103         Node rss = getRSSNodeByFeedName(job.getName());
104         if (rss != null) {
105             Set<AggregateFeed> fetchedAggregateFeeds = job.getFetcher().fetchAggregateFeeds(getFeeds(rss));
106             Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, rss);
107             newContentUUIDs.addAll(newAggregateContentUUIDs);
108             rss.getSession().save();
109         }
110         return false;
111     }
112 
113     private Node getRSSNodeByFeedName(String name) throws RepositoryException {
114         try {
115             return MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE).getNodeByIdentifier(name);
116         } catch (RepositoryException e) {
117             try {
118                 log.debug("RSS with name " + name + "doesn't exist anymore");
119                 SchedulerModule scheduler = (SchedulerModule) Components.getComponent(ModuleRegistry.class).getModuleInstance("scheduler");
120                 scheduler.stopJob(name);
121                 scheduler.removeJob(name);
122             } catch (SchedulerException ex) {
123                 // If the node does not exist we try to remove it from scheduler to prevent future launches
124             }
125         }
126         return null;
127     }
128 
129     public Set<AggregateFeed> getFeeds(Node node) throws RepositoryException {
130         AggregateFeedContentMapper aggregateFeedMapper = new AggregateFeedContentMapper();
131         Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
132         AggregateFeed aggregateFeed = aggregateFeedMapper.map(node);
133         aggregateFeeds.add(aggregateFeed);
134         return aggregateFeeds;
135     }
136 
137     protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
138         Set<String> newAggregateContentUUIDs = new HashSet<String>();
139         for (AggregateFeed aggregateFeed : aggregateFeeds) {
140             Node aggregateNode = parentNode;
141             Node dataNode = NodeUtil.createPath(aggregateNode, "data", NodeTypes.Content.NAME);
142             newAggregateContentUUIDs.add(aggregateNode.getIdentifier());
143             AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
144             for (FeedChannel channel : aggregateFeed.getChannels()) {
145                 if (channel.hasFeed()) {
146                     saveFeedChannel(channel, aggregateFilter, dataNode);
147                 }
148             }
149         }
150         return newAggregateContentUUIDs;
151     }
152 
153     protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
154         NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
155         Collection<Node> aggregateNodes = new ArrayList<Node>();
156         while (nodeIterator.hasNext()) {
157             Node currentNode = nodeIterator.nextNode();
158             if (NodeUtil.isNodeType(currentNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
159                 aggregateNodes.add(currentNode);
160             }
161         }
162         int size = aggregateNodes.size();
163         if (size > 1) {
164             throw new IllegalStateException(format(
165                     "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
166                     parentNode, aggregateNodeName, RSSAggregatorNodeTypes.RSSAggregator.NAME, size));
167         }
168         if (aggregateNodes.isEmpty()) {
169             return null;
170         }
171         return aggregateNodes.iterator().next();
172     }
173 
174     public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
175         Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
176         if (filtersNode == null) {
177             return new AggregateFilter(Collections.<FilterPredicate>emptySet());
178         }
179         /*
180          * order matters here. The elements in the set must be in the same order as they are in JCR boolean b = true; b |= true; b &= false; System.out.println(b); -> false
181          *
182          * b = true; b &= true; b |= false; System.out.println(b); -> true
183          *
184          * See http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.22.2
185          */
186         Set<FilterPredicate> filters = new LinkedHashSet<FilterPredicate>();
187 
188         List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, NodeTypes.ContentNode.NAME));
189 
190         for (Node n : filterNodes) {
191             FilterPredicate filterPredicate = filterPredicateMapper.map(n);
192             if (filterPredicate == null) {
193                 continue;
194             }
195             filters.add(filterPredicate);
196         }
197         return new AggregateFilter(filters);
198     }
199 
200     protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
201         Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
202         List<SyndEntry> entries = feedChannel.getFeed().getEntries();
203         int size = entries.size();
204         for (int i = 0; i < size; i++) {
205             SyndEntry entry = entries.get(i);
206             String entryName = format("entry-%s", i);
207             if (aggregateFilter.include(entry)) {
208                 createFeedChannelEntryNode(entry, entryName, channelNode);
209             }
210         }
211         return channelNode;
212     }
213 
214     protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
215         String channelName = feedChannel.getName();
216         if (dataNode.hasNode(channelName)) {
217             String absPath = dataNode.getNode(channelName).getPath();
218             dataNode.getSession().removeItem(absPath);
219         }
220         Node channelNode = NodeUtil.createPath(dataNode, channelName, NodeTypes.Content.NAME);
221 
222         SyndFeed feed = feedChannel.getFeed();
223         channelNode.setProperty("description", feed.getDescription()); // 'My Blog'
224         channelNode.setProperty("link", feed.getLink()); // 'http://domain.com'
225         channelNode.setProperty("rss", feedChannel.getUrl()); // 'http://domain.com/channel.rss'
226         channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
227         channelNode.setProperty("type", feed.getFeedType()); // 'rss_2.0'
228         channelNode.getSession().save();
229         return channelNode;
230     }
231 
232     protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
233         Node entryNode = NodeUtil.createPath(channelNode, nodeName, NodeTypes.Content.NAME);
234         entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
235         entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode, "title"));
236         final SyndContent description = entry.getDescription();
237 
238         String descriptionString;
239         if (description != null && StringUtils.isNotBlank(description.getValue())) {
240             descriptionString = description.getValue();
241         } else {
242             descriptionString = getEntryContent(entry);
243         }
244 
245         entryNode.setProperty("description", descriptionString);
246         entryNode.setProperty("content", getEntryContent(entry));
247         entryNode.setProperty("link", entry.getLink());
248         Date publishedDate = entry.getPublishedDate();
249         if (publishedDate == null) {
250             publishedDate = new Date();
251         }
252         entryNode.setProperty("pubDate", publishedDate.getTime());
253         entryNode.setProperty("title", entry.getTitle());
254 
255         createCategoriesNode(entry, entryNode);
256         return entryNode;
257     }
258 
259     protected String getEntryContent(SyndEntry entry) {
260         String entryContent = "";
261 
262         if (entry != null && entry.getContents().size() > 0) {
263             final List<SyndContent> contents = entry.getContents();
264             for (SyndContent content : contents) {
265                 if (StringUtils.equalsIgnoreCase("html", content.getType()) && StringUtils.isNotBlank(content.getType())) {
266                     entryContent = content.getValue();
267                     break;
268                 }
269             }
270         }
271         return entryContent;
272     }
273 
274     protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
275         Node categoriesNode = NodeUtil.createPath(entryNode, "categories", NodeTypes.Content.NAME);
276         List<SyndCategory> categories = entry.getCategories();
277         for (int i = 0; i < categories.size(); i++) {
278             SyndCategory category = categories.get(i);
279             String categoryIndex = valueOf(i);
280             String categoryName = category.getName();
281             categoriesNode.setProperty(categoryIndex, categoryName);
282         }
283         return categoriesNode;
284     }
285 
286     protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
287         if (filterPredicateMapper == null) {
288             throw new IllegalArgumentException("'filterPredicateContentMapper' must not be null");
289         }
290         this.filterPredicateMapper = filterPredicateMapper;
291         return this.filterPredicateMapper;
292     }
293 
294     public void setJob(RSSJob job) {
295         this.job = job;
296     }
297 
298     public RSSJob getJob() {
299         return this.job;
300     }
301 }