1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.importhandler;
35
36 import static java.lang.String.format;
37 import static java.lang.String.valueOf;
38 import static org.apache.commons.lang.StringUtils.isEmpty;
39
40 import info.magnolia.cms.core.Content;
41 import info.magnolia.jcr.util.NodeUtil;
42 import info.magnolia.jcr.util.PropertyUtil;
43 import info.magnolia.jcr.util.VersionUtil;
44 import info.magnolia.module.data.DataConsts;
45 import info.magnolia.module.data.importer.ImportException;
46 import info.magnolia.module.data.importer.ImportHandler;
47 import info.magnolia.module.data.importer.ImportTarget;
48 import info.magnolia.module.rssaggregator.util.Assert;
49
50 import java.util.ArrayList;
51 import java.util.Collection;
52 import java.util.Collections;
53 import java.util.Date;
54 import java.util.HashSet;
55 import java.util.List;
56 import java.util.Set;
57
58 import javax.inject.Inject;
59 import javax.jcr.Node;
60 import javax.jcr.NodeIterator;
61 import javax.jcr.RepositoryException;
62
63 import org.apache.commons.lang.StringUtils;
64 import org.slf4j.Logger;
65 import org.slf4j.LoggerFactory;
66
67 import com.sun.syndication.feed.synd.SyndCategory;
68 import com.sun.syndication.feed.synd.SyndContent;
69 import com.sun.syndication.feed.synd.SyndEntry;
70 import com.sun.syndication.feed.synd.SyndFeed;
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87 public class RSSFeedImportHandler extends ImportHandler {
88
89 private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
90
91 private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
92
93 private RSSFeedFetcher feedFetcher;
94 private AggregateFeedContentMapper aggregateFeedMapper;
95 private FilterPredicateContentMapper filterPredicateMapper;
96
97
98
99
100 @Inject
101 public RSSFeedImportHandler(RSSFeedFetcher feedFetcher) {
102 setAggregateFeedContentMapper(new AggregateFeedContentMapper());
103 setFilterPredicateContentMapper(new FilterPredicateContentMapper());
104 this.feedFetcher = feedFetcher;
105 }
106
107
108
109
110 @Override
111 protected synchronized void checkPreConditions() throws ImportException {
112 super.checkPreConditions();
113 log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
114 }
115
116 @SuppressWarnings("unchecked")
117 @Override
118 public Set doImport(final ImportTarget target, final Content parentNode, final Set newContentUUIDs) throws ImportException {
119 try {
120 Set<AggregateFeed> aggregateFeeds = loadAggregates(parentNode.getJCRNode());
121 if (!aggregateFeeds.isEmpty()) {
122 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(),
123 countChannels(aggregateFeeds));
124 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
125 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, parentNode.getJCRNode());
126 newContentUUIDs.addAll(newAggregateContentUUIDs);
127 parentNode.getJCRNode().getSession().save();
128 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
129 }
130 return newContentUUIDs;
131 } catch (Exception e) {
132 String message = format("Failed to execute import for target '%s', parent node '%s'", target, parentNode);
133 throw new ImportException(message, e);
134 }
135 }
136
137
138
139 private int countChannels(Set<AggregateFeed> aggregateFeeds) {
140 int channelCount = 0;
141 for (AggregateFeed aggregateFeed : aggregateFeeds) {
142 channelCount += aggregateFeed.getChannels().size();
143 }
144 return channelCount;
145 }
146
147
148
149
150
151
152
153
154
155 @SuppressWarnings("unchecked")
156 public Set<AggregateFeed> loadAggregates(Node parentNode) throws RepositoryException {
157
158 List<Node> nodeIterator = NodeUtil.asList(NodeUtil.getNodes(parentNode, CONTENTTYPE_RSSAGGREGATOR));
159 Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
160 for (Node aggregateNode : nodeIterator) {
161 AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
162 aggregateFeeds.add(aggregateFeed);
163 }
164 return aggregateFeeds;
165 }
166
167
168
169
170
171
172
173
174
175
176
177
178 protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
179 Set<String> newAggregateContentUUIDs = new HashSet<String>();
180 for (AggregateFeed aggregateFeed : aggregateFeeds) {
181 Node aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
182 Node dataNode = getOrCreateNode(aggregateNode, "data", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE);
183 newAggregateContentUUIDs.add(aggregateNode.getUUID());
184 AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
185 for (FeedChannel channel : aggregateFeed.getChannels()) {
186 if (channel.hasFeed()) {
187 saveFeedChannel(channel, aggregateFilter, dataNode);
188 }
189 }
190 }
191 return newAggregateContentUUIDs;
192 }
193
194
195
196
197
198
199
200
201
202
203
204 protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
205
206
207
208 NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
209 Collection<Node> aggregateNodes = new ArrayList<Node>();
210 while (nodeIterator.hasNext()) {
211 Node currentNode = nodeIterator.nextNode();
212 if (NodeUtil.isNodeType(currentNode, CONTENTTYPE_RSSAGGREGATOR)) {
213 aggregateNodes.add(currentNode);
214 }
215 }
216
217
218
219 int size = aggregateNodes.size();
220 if (size > 1) {
221 throw new IllegalStateException(format(
222 "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
223 parentNode, aggregateNodeName, CONTENTTYPE_RSSAGGREGATOR, size));
224 }
225 if (aggregateNodes.isEmpty()) {
226 return null;
227 }
228 return aggregateNodes.iterator().next();
229 }
230
231
232
233
234
235
236
237
238
239
240
241 protected Node getOrCreateNode(Node contentNode, String name, String itemType) throws RepositoryException {
242 return NodeUtil.createPath(contentNode, name, itemType, true);
243 }
244
245
246
247
248
249
250
251
252
253 public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
254 Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
255 if (filtersNode == null) {
256 return new AggregateFilter(Collections.<FilterPredicate>emptySet());
257 }
258 Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
259 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
260
261 for (Node n : filterNodes) {
262 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
263 if (filterPredicate == null) {
264 continue;
265 }
266 filters.add(filterPredicate);
267 }
268 return new AggregateFilter(filters);
269 }
270
271
272
273
274
275
276
277
278
279
280 @SuppressWarnings("unchecked")
281 protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
282 Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
283 List<SyndEntry> entries = feedChannel.getFeed().getEntries();
284 int size = entries.size();
285 for (int i = 0; i < size; i++) {
286 SyndEntry entry = entries.get(i);
287 String entryName = format("entry-%s", i);
288 if (aggregateFilter.include(entry)) {
289 createFeedChannelEntryNode(entry, entryName, channelNode);
290 }
291 }
292 return channelNode;
293 }
294
295
296
297
298
299
300
301
302
303 protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
304 String channelName = feedChannel.getName();
305 if (dataNode.hasNode(channelName)) {
306 String absPath = dataNode.getNode(channelName).getPath();
307 dataNode.getSession().removeItem(absPath);
308 }
309 Node channelNode = NodeUtil.createPath(dataNode, channelName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
310
311 SyndFeed feed = feedChannel.getFeed();
312 channelNode.setProperty("description", feed.getDescription());
313 channelNode.setProperty("link", feed.getLink());
314 channelNode.setProperty("rss", feedChannel.getUrl());
315 channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
316 channelNode.setProperty("type", feed.getFeedType());
317 return channelNode;
318 }
319
320
321
322
323
324
325
326
327
328
329 protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
330 Node entryNode = NodeUtil.createPath(channelNode, nodeName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
331 entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
332 entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode, "title"));
333 final SyndContent description = entry.getDescription();
334
335 String descriptionString;
336 if (description != null && StringUtils.isNotBlank(description.getValue())) {
337 descriptionString = description.getValue();
338 } else {
339 descriptionString = getEntryContent(entry);
340 }
341
342 entryNode.setProperty("description", descriptionString);
343 entryNode.setProperty("content", getEntryContent(entry));
344 entryNode.setProperty("link", entry.getLink());
345 Date publishedDate = entry.getPublishedDate();
346 if (publishedDate == null) {
347 publishedDate = new Date();
348 }
349 entryNode.setProperty("pubDate", publishedDate.getTime());
350 entryNode.setProperty("title", entry.getTitle());
351
352 createCategoriesNode(entry, entryNode);
353 return entryNode;
354 }
355
356
357
358
359
360
361
362 protected String getEntryContent(SyndEntry entry) {
363 String entryContent = "";
364
365 if (entry != null && entry.getContents().size() > 0) {
366 @SuppressWarnings("unchecked")
367 final List<SyndContent> contents = entry.getContents();
368 for (SyndContent content : contents) {
369 if (StringUtils.equalsIgnoreCase("html", content.getType()) && StringUtils.isNotBlank(content.getType())) {
370 entryContent = content.getValue();
371 break;
372 }
373 }
374 }
375 return entryContent;
376 }
377
378 @SuppressWarnings("unchecked")
379 protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
380 Node categoriesNode = NodeUtil.createPath(entryNode, "categories", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
381 List<SyndCategory> categories = entry.getCategories();
382 for (int i = 0; i < categories.size(); i++) {
383 SyndCategory category = categories.get(i);
384 String categoryIndex = valueOf(i);
385 String categoryName = category.getName();
386 categoriesNode.setProperty(categoryIndex, categoryName);
387 }
388 return categoriesNode;
389 }
390
391
392
393
394
395
396 protected AggregateFeedContentMapper setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
397 Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
398 this.aggregateFeedMapper = aggregateFeedMapper;
399 return this.aggregateFeedMapper;
400 }
401
402
403
404
405 public RSSFeedFetcher setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
406 Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
407 this.feedFetcher = rssFeedFetcher;
408 return this.feedFetcher;
409 }
410
411
412
413
414
415 protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
416 Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
417 this.filterPredicateMapper = filterPredicateMapper;
418 return this.filterPredicateMapper;
419 }
420
421
422
423
424
425
426 }