1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.importhandler;
35
36 import static java.lang.String.format;
37 import static java.lang.String.valueOf;
38 import static org.apache.commons.lang.StringUtils.isEmpty;
39 import info.magnolia.cms.core.Content;
40 import info.magnolia.jcr.util.NodeUtil;
41 import info.magnolia.jcr.util.PropertyUtil;
42 import info.magnolia.jcr.util.VersionUtil;
43 import info.magnolia.module.data.DataConsts;
44 import info.magnolia.module.data.importer.ImportException;
45 import info.magnolia.module.data.importer.ImportHandler;
46 import info.magnolia.module.data.importer.ImportTarget;
47 import info.magnolia.module.rssaggregator.util.Assert;
48
49 import java.util.ArrayList;
50 import java.util.Collection;
51 import java.util.Collections;
52 import java.util.Date;
53 import java.util.HashSet;
54 import java.util.List;
55 import java.util.Set;
56
57 import javax.inject.Inject;
58 import javax.jcr.Node;
59 import javax.jcr.NodeIterator;
60 import javax.jcr.RepositoryException;
61
62 import org.slf4j.Logger;
63 import org.slf4j.LoggerFactory;
64
65 import com.sun.syndication.feed.synd.SyndCategory;
66 import com.sun.syndication.feed.synd.SyndContent;
67 import com.sun.syndication.feed.synd.SyndEntry;
68 import com.sun.syndication.feed.synd.SyndFeed;
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 public class RSSFeedImportHandler extends ImportHandler {
86
87 private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
88
89 private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
90
91 private RSSFeedFetcher feedFetcher;
92 private AggregateFeedContentMapper aggregateFeedMapper;
93 private FilterPredicateContentMapper filterPredicateMapper;
94
95
96
97
98 @Inject
99 public RSSFeedImportHandler(RSSFeedFetcher feedFetcher) {
100 setAggregateFeedContentMapper(new AggregateFeedContentMapper());
101 setFilterPredicateContentMapper(new FilterPredicateContentMapper());
102 this.feedFetcher = feedFetcher;
103 }
104
105
106 @Override
107 protected synchronized void checkPreConditions() throws ImportException {
108 super.checkPreConditions();
109 log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
110 }
111
112 @SuppressWarnings("unchecked")
113 @Override
114 public Set doImport(final ImportTarget target, final Content parentNode, final Set newContentUUIDs) throws ImportException {
115 try {
116 Set<AggregateFeed> aggregateFeeds = loadAggregates(parentNode.getJCRNode());
117 if (!aggregateFeeds.isEmpty()) {
118 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(),
119 countChannels(aggregateFeeds));
120 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
121 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, parentNode.getJCRNode());
122 newContentUUIDs.addAll(newAggregateContentUUIDs);
123 parentNode.getJCRNode().getSession().save();
124 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
125 }
126 return newContentUUIDs;
127 } catch (Exception e) {
128 String message = format("Failed to execute import for target '%s', parent node '%s'", target, parentNode);
129 throw new ImportException(message, e);
130 }
131 }
132
133
134
135 private int countChannels(Set<AggregateFeed> aggregateFeeds) {
136 int channelCount = 0;
137 for (AggregateFeed aggregateFeed : aggregateFeeds) {
138 channelCount += aggregateFeed.getChannels().size();
139 }
140 return channelCount;
141 }
142
143
144
145
146
147
148
149
150
151
152
153 @SuppressWarnings("unchecked")
154 public Set<AggregateFeed> loadAggregates(Node parentNode) throws RepositoryException {
155
156 List<Node> nodeIterator = NodeUtil.asList(NodeUtil.getNodes(parentNode,CONTENTTYPE_RSSAGGREGATOR));
157 Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
158 for (Node aggregateNode : nodeIterator) {
159 AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
160 aggregateFeeds.add(aggregateFeed);
161 }
162 return aggregateFeeds;
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179 protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Node parentNode) throws RepositoryException {
180 Set<String> newAggregateContentUUIDs = new HashSet<String>();
181 for (AggregateFeed aggregateFeed : aggregateFeeds) {
182 Node aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
183 Node dataNode = getOrCreateNode(aggregateNode, "data", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE);
184 newAggregateContentUUIDs.add(aggregateNode.getUUID());
185 AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
186 for (FeedChannel channel : aggregateFeed.getChannels()) {
187 if (channel.hasFeed()) {
188 saveFeedChannel(channel, aggregateFilter, dataNode);
189 }
190 }
191 }
192 return newAggregateContentUUIDs;
193 }
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 protected Node loadSingleAggregateNode(Node parentNode, String aggregateNodeName) throws RepositoryException {
210
211
212
213 NodeIterator nodeIterator = parentNode.getNodes(aggregateNodeName);
214 Collection<Node> aggregateNodes = new ArrayList<Node>();
215 while (nodeIterator.hasNext()) {
216 Node currentNode = nodeIterator.nextNode();
217 if (NodeUtil.isNodeType(currentNode, CONTENTTYPE_RSSAGGREGATOR)) {
218 aggregateNodes.add(currentNode);
219 }
220 }
221
222
223
224 int size = aggregateNodes.size();
225 if (size > 1) {
226 throw new IllegalStateException(format(
227 "Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
228 parentNode, aggregateNodeName, CONTENTTYPE_RSSAGGREGATOR, size));
229 }
230 if (aggregateNodes.isEmpty()) {
231 return null;
232 }
233 return aggregateNodes.iterator().next();
234 }
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250 protected Node getOrCreateNode(Node contentNode, String name, String itemType) throws RepositoryException {
251 return NodeUtil.createPath(contentNode, name, itemType ,true);
252 }
253
254
255
256
257
258
259
260
261
262
263
264 public AggregateFilter loadAggregateFilter(Node aggregateNode) throws RepositoryException {
265 Node filtersNode = aggregateNode.hasNode("filters") ? aggregateNode.getNode("filters") : null;
266 if (filtersNode == null) {
267 return new AggregateFilter(Collections.<FilterPredicate> emptySet());
268 }
269 Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
270 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode,VersionUtil.getNodeTypeName(filtersNode)));
271
272 for (Node n:filterNodes) {
273 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
274 if (filterPredicate == null) {
275 continue;
276 }
277 filters.add(filterPredicate);
278 }
279 return new AggregateFilter(filters);
280 }
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295 @SuppressWarnings("unchecked")
296 protected Node saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Node dataNode) throws RepositoryException {
297 Node channelNode = recreateFeedChannelNode(feedChannel, dataNode);
298 List<SyndEntry> entries = feedChannel.getFeed().getEntries();
299 int size = entries.size();
300 for (int i = 0; i < size; i++) {
301 SyndEntry entry = entries.get(i);
302 String entryName = format("entry-%s", i);
303 if (aggregateFilter.include(entry)) {
304 createFeedChannelEntryNode(entry, entryName, channelNode);
305 }
306 }
307 return channelNode;
308 }
309
310
311
312
313
314
315
316
317
318
319
320
321 protected Node recreateFeedChannelNode(FeedChannel feedChannel, Node dataNode) throws RepositoryException {
322 String channelName = feedChannel.getName();
323 if (dataNode.hasNode(channelName)) {
324 String absPath = dataNode.getNode(channelName).getPath();
325 dataNode.getSession().removeItem(absPath);
326 }
327 Node channelNode = NodeUtil.createPath(dataNode, channelName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
328
329 SyndFeed feed = feedChannel.getFeed();
330 channelNode.setProperty("description", feed.getDescription());
331 channelNode.setProperty("link", feed.getLink());
332 channelNode.setProperty("rss", feedChannel.getUrl());
333 channelNode.setProperty("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
334 channelNode.setProperty("type", feed.getFeedType());
335 return channelNode;
336 }
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351 protected Node createFeedChannelEntryNode(SyndEntry entry, String nodeName, Node channelNode) throws RepositoryException {
352 Node entryNode = NodeUtil.createPath(channelNode, nodeName, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
353 entryNode.setProperty("author", entry.getAuthor() == null ? "" : entry.getAuthor());
354 entryNode.setProperty("channelTitle", PropertyUtil.getString(channelNode,"title"));
355 final SyndContent description = entry.getDescription();
356 String descriptionString = null;
357 if (description != null) {
358 descriptionString = description.getValue();
359 } else {
360 @SuppressWarnings("unchecked")
361 final List<SyndContent> contents = entry.getContents();
362 for (int i = 0; i < contents.size(); i++) {
363 SyndContent content = contents.get(i);
364 if ("html".equals(content.getType())) {
365 descriptionString = content.getValue();
366 break;
367 }
368 }
369 }
370 if (descriptionString == null) {
371 descriptionString = "";
372 }
373 entryNode.setProperty("description", descriptionString);
374 entryNode.setProperty("link", entry.getLink());
375 Date publishedDate = entry.getPublishedDate();
376 if (publishedDate == null) {
377 publishedDate = new Date();
378 }
379 entryNode.setProperty("pubDate", publishedDate.getTime());
380 entryNode.setProperty("title", entry.getTitle());
381
382 createCategoriesNode(entry, entryNode);
383 return entryNode;
384 }
385
386 @SuppressWarnings("unchecked")
387 protected Node createCategoriesNode(SyndEntry entry, Node entryNode) throws RepositoryException {
388 Node categoriesNode = NodeUtil.createPath(entryNode, "categories", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE,true);
389 List<SyndCategory> categories = entry.getCategories();
390 for (int i = 0; i < categories.size(); i++) {
391 SyndCategory category = categories.get(i);
392 String categoryIndex = valueOf(i);
393 String categoryName = category.getName();
394 categoriesNode.setProperty(categoryIndex, categoryName);
395 }
396 return categoriesNode;
397 }
398
399
400
401
402 protected AggregateFeedContentMapper setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
403 Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
404 this.aggregateFeedMapper = aggregateFeedMapper;
405 return this.aggregateFeedMapper;
406 }
407
408
409 public RSSFeedFetcher setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
410 Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
411 this.feedFetcher = rssFeedFetcher;
412 return this.feedFetcher;
413 }
414
415
416
417 protected FilterPredicateContentMapper setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
418 Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
419 this.filterPredicateMapper = filterPredicateMapper;
420 return this.filterPredicateMapper;
421 }
422
423
424
425
426
427
428 }