1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.importhandler;
35
36 import static java.lang.String.*;
37 import static org.apache.commons.lang.StringUtils.isEmpty;
38
39 import info.magnolia.cms.core.Content;
40 import info.magnolia.jcr.util.NodeTypes;
41 import info.magnolia.jcr.util.NodeUtil;
42 import info.magnolia.jcr.util.NodeVisitor;
43 import info.magnolia.jcr.util.PropertyUtil;
44 import info.magnolia.jcr.util.VersionUtil;
45 import info.magnolia.module.data.importer.ImportException;
46 import info.magnolia.module.data.importer.ImportHandler;
47 import info.magnolia.module.data.importer.ImportTarget;
48 import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
49 import info.magnolia.module.rssaggregator.util.Assert;
50
51 import java.util.ArrayList;
52 import java.util.Collection;
53 import java.util.Collections;
54 import java.util.Date;
55 import java.util.HashSet;
56 import java.util.List;
57 import java.util.Set;
58
59 import javax.inject.Inject;
60 import javax.jcr.Node;
61 import javax.jcr.NodeIterator;
62 import javax.jcr.RepositoryException;
63
64 import org.apache.commons.lang.StringUtils;
65 import org.apache.jackrabbit.commons.predicate.Predicate;
66 import org.slf4j.Logger;
67 import org.slf4j.LoggerFactory;
68
69 import com.sun.syndication.feed.synd.SyndCategory;
70 import com.sun.syndication.feed.synd.SyndContent;
71 import com.sun.syndication.feed.synd.SyndEntry;
72 import com.sun.syndication.feed.synd.SyndFeed;
73
74
75
76
77
78
79
80
81
82
83
84
85 public class RSSFeedImportHandler extends ImportHandler {
86
87 private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
88
89 private RSSFeedFetcher feedFetcher;
90 private AggregateFeedContentMapper aggregateFeedMapper;
91 private FilterPredicateContentMapper filterPredicateMapper;
92
93
94
95
96 @Inject
97 public RSSFeedImportHandler(RSSFeedFetcher feedFetcher) {
98 setAggregateFeedContentMapper(new AggregateFeedContentMapper());
99 setFilterPredicateContentMapper(new FilterPredicateContentMapper());
100 this.feedFetcher = feedFetcher;
101 }
102
103
104 @Override
105 protected synchronized void checkPreConditions() throws ImportException {
106 super.checkPreConditions();
107 log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
108 }
109
110 @SuppressWarnings("unchecked")
111 @Override
112 public Set doImport(final ImportTarget target, Content root, final Set newContentUUIDs) throws ImportException {
113 try {
114 NodeUtil.visit(root.getJCRNode(), new NodeVisitor() {
115 @Override
116 public void visit(Node node) throws RepositoryException {
117 Set<AggregateFeed> aggregateFeeds = loadAggregates(node);
118 if (!aggregateFeeds.isEmpty()) {
119 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(), countChannels(aggregateFeeds));
120 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
121 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, node);
122 newContentUUIDs.addAll(newAggregateContentUUIDs);
123 node.getSession().save();
124 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
125 }
126 }
127 }, new IsRootOrFolder());
128 } catch (RepositoryException e) {
129 String message = format("Failed to execute import for target '%s', parent node '%s'", target, root);
130 throw new ImportException(message, e);
131 }
132 return newContentUUIDs;
133 }
134
135
136
137 private int countChannels(Set<AggregateFeed> aggregateFeeds) {
138 int channelCount = 0;
139 for (AggregateFeed aggregateFeed : aggregateFeeds) {
140 channelCount += aggregateFeed.getChannels().size();
141 }
142 return channelCount;
143 }
144
145
146
147
148
149
150
151
152
153 public Set<AggregateFeed> loadAggregates(Node parentNode) throws RepositoryException {
154
155 List<Node> nodeIterator = NodeUtil.asList(NodeUtil.getNodes(parentNode, RSSAggregatorNodeTypes.RSSAggregator.NAME));
156 Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
157 for (Node aggregateNode : nodeIterator) {
158 AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
159 aggregateFeeds.add(aggregateFeed);
160 }
161 return aggregateFeeds;
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175 protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
176 Set<String> newAggregateContentUUIDs = new HashSet<String>();
177 for (AggregateFeed aggregateFeed : aggregateFeeds) {
178 Node aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
179 Node dataNode = getOrCreateNode(aggregateNode, "data", NodeTypes.Content.NAME);
180 newAggregateContentUUIDs.add(aggregateNode.getUUID());
181 AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
182 for (FeedChannel channel : aggregateFeed.getChannels()) {
183 if (channel.hasFeed()) {
184 saveFeedChannel(channel, aggregateFilter, dataNode);
185 }
186 }
187 }
188 return newAggregateContentUUIDs;
189 }
190
191
192
193
194
195
196
197
198
199
200
201 protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
202
203
204
205 NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
206 Collection<Node> aggregateNodes = new ArrayList<Node>();
207 while (nodeIterator.hasNext()) {
208 Node currentNode = nodeIterator.nextNode();
209 if (NodeUtil.isNodeType(currentNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
210 aggregateNodes.add(currentNode);
211 }
212 }
213
214
215
216 int size = aggregateNodes.size();
217 if (size > 1) {
218 throw new IllegalStateException(format(
219 "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
220 parentNode, aggregateNodeName, RSSAggregatorNodeTypes.RSSAggregator.NAME, size));
221 }
222 if (aggregateNodes.isEmpty()) {
223 return null;
224 }
225 return aggregateNodes.iterator().next();
226 }
227
228
229
230
231
232
233
234
235
236
237
238 protected Node getOrCreateNode(Node contentNode, String name, String itemType) throws RepositoryException {
239 return NodeUtil.createPath(contentNode, name, itemType, true);
240 }
241
242
243
244
245
246
247
248
249
250 public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
251 Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
252 if (filtersNode == null) {
253 return new AggregateFilter(Collections.<FilterPredicate>emptySet());
254 }
255 Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
256 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
257
258 for (Node n : filterNodes) {
259 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
260 if (filterPredicate == null) {
261 continue;
262 }
263 filters.add(filterPredicate);
264 }
265 return new AggregateFilter(filters);
266 }
267
268
269
270
271
272
273
274
275
276
277 @SuppressWarnings("unchecked")
278 protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
279 Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
280 List<SyndEntry> entries = feedChannel.getFeed().getEntries();
281 int size = entries.size();
282 for (int i = 0; i < size; i++) {
283 SyndEntry entry = entries.get(i);
284 String entryName = format("entry-%s", i);
285 if (aggregateFilter.include(entry)) {
286 createFeedChannelEntryNode(entry, entryName, channelNode);
287 }
288 }
289 return channelNode;
290 }
291
292
293
294
295
296
297
298
299
300 protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
301 String channelName = feedChannel.getName();
302 if (dataNode.hasNode(channelName)) {
303 String absPath = dataNode.getNode(channelName).getPath();
304 dataNode.getSession().removeItem(absPath);
305 }
306 Node channelNode = NodeUtil.createPath(dataNode, channelName, NodeTypes.Content.NAME, true);
307
308 SyndFeed feed = feedChannel.getFeed();
309 channelNode.setProperty("description", feed.getDescription());
310 channelNode.setProperty("link", feed.getLink());
311 channelNode.setProperty("rss", feedChannel.getUrl());
312 channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
313 channelNode.setProperty("type", feed.getFeedType());
314 return channelNode;
315 }
316
317
318
319
320
321
322
323
324
325
326 protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
327 Node entryNode = NodeUtil.createPath(channelNode, nodeName, NodeTypes.Content.NAME, true);
328 entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
329 entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode, "title"));
330 final SyndContent description = entry.getDescription();
331
332 String descriptionString;
333 if (description != null && StringUtils.isNotBlank(description.getValue())) {
334 descriptionString = description.getValue();
335 } else {
336 descriptionString = getEntryContent(entry);
337 }
338
339 entryNode.setProperty("description", descriptionString);
340 entryNode.setProperty("content", getEntryContent(entry));
341 entryNode.setProperty("link", entry.getLink());
342 Date publishedDate = entry.getPublishedDate();
343 if (publishedDate == null) {
344 publishedDate = new Date();
345 }
346 entryNode.setProperty("pubDate", publishedDate.getTime());
347 entryNode.setProperty("title", entry.getTitle());
348
349 createCategoriesNode(entry, entryNode);
350 return entryNode;
351 }
352
353
354
355
356
357
358
359 protected String getEntryContent(SyndEntry entry) {
360 String entryContent = "";
361
362 if (entry != null && entry.getContents().size() > 0) {
363 @SuppressWarnings("unchecked")
364 final List<SyndContent> contents = entry.getContents();
365 for (SyndContent content : contents) {
366 if (StringUtils.equalsIgnoreCase("html", content.getType()) && StringUtils.isNotBlank(content.getType())) {
367 entryContent = content.getValue();
368 break;
369 }
370 }
371 }
372 return entryContent;
373 }
374
375 @SuppressWarnings("unchecked")
376 protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
377 Node categoriesNode = NodeUtil.createPath(entryNode, "categories", NodeTypes.Content.NAME, true);
378 List<SyndCategory> categories = entry.getCategories();
379 for (int i = 0; i < categories.size(); i++) {
380 SyndCategory category = categories.get(i);
381 String categoryIndex = valueOf(i);
382 String categoryName = category.getName();
383 categoriesNode.setProperty(categoryIndex, categoryName);
384 }
385 return categoriesNode;
386 }
387
388
389
390
391
392
393 protected AggregateFeedContentMapper setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
394 Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
395 this.aggregateFeedMapper = aggregateFeedMapper;
396 return this.aggregateFeedMapper;
397 }
398
399
400
401
402 public RSSFeedFetcher setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
403 Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
404 this.feedFetcher = rssFeedFetcher;
405 return this.feedFetcher;
406 }
407
408
409
410
411
412 protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
413 Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
414 this.filterPredicateMapper = filterPredicateMapper;
415 return this.filterPredicateMapper;
416 }
417
418 private static class IsRootOrFolder implements Predicate {
419
420 public IsRootOrFolder() {
421 }
422
423 @Override
424 public boolean evaluate(Object object) {
425 if (object instanceof Node) {
426 Node node = (Node) object;
427 try {
428 return NodeUtil.isNodeType(node, NodeTypes.Folder.NAME) || "/".equals(node.getPath());
429 } catch (RepositoryException e) {
430 log.warn("Failed to check predicate on node: " + NodeUtil.getPathIfPossible(node));
431 }
432 }
433 return false;
434 }
435 }
436 }