1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.importhandler;
35
36 import com.sun.syndication.feed.synd.SyndCategory;
37 import com.sun.syndication.feed.synd.SyndContent;
38 import com.sun.syndication.feed.synd.SyndEntry;
39 import com.sun.syndication.feed.synd.SyndFeed;
40 import info.magnolia.cms.core.Content;
41 import info.magnolia.cms.core.ItemType;
42 import info.magnolia.cms.util.ContentUtil;
43 import info.magnolia.cms.util.FactoryUtil;
44 import info.magnolia.module.data.DataConsts;
45 import info.magnolia.module.data.importer.ImportException;
46 import info.magnolia.module.data.importer.ImportHandler;
47 import info.magnolia.module.data.importer.ImportTarget;
48 import info.magnolia.module.rssaggregator.util.Assert;
49 import static org.apache.commons.lang.StringUtils.*;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
52
53 import javax.jcr.RepositoryException;
54 import java.util.Collection;
55 import java.util.Collections;
56 import java.util.Date;
57 import java.util.HashSet;
58 import java.util.List;
59 import java.util.Set;
60
61 import static java.lang.String.*;
62
63
64
65
66
67
68
69
70
71
72
73
74
75 public class RSSFeedImportHandler extends ImportHandler {
76
77 private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
78
79 private static final Logger log = LoggerFactory.getLogger(RSSFeedImportHandler.class);
80
81 private RSSFeedFetcher feedFetcher;
82 private AggregateFeedContentMapper aggregateFeedMapper;
83 private FilterPredicateContentMapper filterPredicateMapper;
84
85
86
87
88
89 public RSSFeedImportHandler() {
90 setAggregateFeedContentMapper(new AggregateFeedContentMapper());
91 setFilterPredicateContentMapper(new FilterPredicateContentMapper());
92 }
93
94
95 @Override
96 protected synchronized void checkPreConditions() throws ImportException {
97 super.checkPreConditions();
98 if (feedFetcher == null) {
99
100 feedFetcher = (RSSFeedFetcher) FactoryUtil.newInstance(RSSFeedFetcher.class);
101 }
102 log.debug("Using feed fetcher '{}'", feedFetcher.getClass().getName());
103 }
104
105 @SuppressWarnings("unchecked")
106 public Set doImport(final ImportTarget target, final Content parentNode, final Set newContentUUIDs) throws ImportException {
107 try {
108 Set<AggregateFeed> aggregateFeeds = loadAggregates(parentNode);
109 if (!aggregateFeeds.isEmpty()) {
110 log.info("Fetching {} aggregate feeds ({} channels)", aggregateFeeds.size(),
111 countChannels(aggregateFeeds));
112 Set<AggregateFeed> fetchedAggregateFeeds = feedFetcher.fetchAggregateFeeds(aggregateFeeds);
113 Set<String> newAggregateContentUUIDs = saveAggregates(fetchedAggregateFeeds, parentNode);
114 newContentUUIDs.addAll(newAggregateContentUUIDs);
115 parentNode.save();
116 log.info("{} completed retrieving of RSS feeds", feedFetcher.getClass().getName());
117 }
118 return newContentUUIDs;
119 } catch (Exception e) {
120 String message = format("Failed to execute import for target '%s', parent node '%s'", target, parentNode);
121 throw new ImportException(message, e);
122 }
123 }
124
125
126
127 private int countChannels(Set<AggregateFeed> aggregateFeeds) {
128 int channelCount = 0;
129 for (AggregateFeed aggregateFeed : aggregateFeeds) {
130 channelCount += aggregateFeed.getChannels().size();
131 }
132 return channelCount;
133 }
134
135
136
137
138
139
140
141
142
143 @SuppressWarnings("unchecked")
144 public Set<AggregateFeed> loadAggregates(Content parentNode) throws RepositoryException {
145 Collection<Content> aggregateNodes = parentNode.getChildren(CONTENTTYPE_RSSAGGREGATOR);
146 Set<AggregateFeed> aggregateFeeds = new HashSet<AggregateFeed>();
147 for (Content aggregateNode : aggregateNodes) {
148 AggregateFeed aggregateFeed = aggregateFeedMapper.map(aggregateNode);
149 aggregateFeeds.add(aggregateFeed);
150 }
151 return aggregateFeeds;
152 }
153
154
155
156
157
158
159
160
161
162
163
164
165 protected Set<String> saveAggregates(Set<AggregateFeed> aggregateFeeds, Content parentNode) throws RepositoryException {
166 Set<String> newAggregateContentUUIDs = new HashSet<String>();
167 for (AggregateFeed aggregateFeed : aggregateFeeds) {
168 Content aggregateNode = loadSingleAggregateNode(parentNode, aggregateFeed.getName());
169 Content dataNode = getOrCreateContent(aggregateNode, "data", new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
170 newAggregateContentUUIDs.add(aggregateNode.getUUID());
171 AggregateFilter aggregateFilter = loadAggregateFilter(aggregateNode);
172 for (FeedChannel channel : aggregateFeed.getChannels()) {
173 if (channel.hasFeed()) {
174 saveFeedChannel(channel, aggregateFilter, dataNode);
175 }
176 }
177 }
178 return newAggregateContentUUIDs;
179 }
180
181
182
183
184
185
186
187
188
189
190 @SuppressWarnings("unchecked")
191 protected Content loadSingleAggregateNode(Content parentNode, String aggregateNodeName) {
192 Collection<Content> aggregateNodes = parentNode.getChildren(CONTENTTYPE_RSSAGGREGATOR, aggregateNodeName);
193 int size = aggregateNodes.size();
194 if (size > 1) {
195 throw new IllegalStateException(format("Expected content node '%s' to have at most 1 child named '%s' of item type '%s', but found %s",
196 parentNode, aggregateNodeName, CONTENTTYPE_RSSAGGREGATOR, size));
197 }
198 if (aggregateNodes.isEmpty()) {
199 return null;
200 }
201 return aggregateNodes.iterator().next();
202 }
203
204
205
206
207
208
209
210
211
212
213
214 protected Content getOrCreateContent(Content contentNode, String name, ItemType itemType) throws RepositoryException {
215 return ContentUtil.getOrCreateContent(contentNode, name, itemType);
216 }
217
218
219
220
221
222
223
224
225
226 @SuppressWarnings("unchecked")
227 public AggregateFilter loadAggregateFilter(Content aggregateNode) throws RepositoryException {
228 Content filtersNode = aggregateNode.hasContent("filters") ? aggregateNode.getContent("filters") : null;
229 if (filtersNode == null) {
230 return new AggregateFilter(Collections.<FilterPredicate>emptySet());
231 }
232 Set<FilterPredicate> filters = new HashSet<FilterPredicate>();
233 Collection<Content> filterNodes = filtersNode.getChildren();
234 for (Content filterNode : filterNodes) {
235 FilterPredicate filterPredicate = filterPredicateMapper.map(filterNode);
236 if (filterPredicate == null) {
237 continue;
238 }
239 filters.add(filterPredicate);
240 }
241 return new AggregateFilter(filters);
242 }
243
244
245
246
247
248
249
250
251
252
253 @SuppressWarnings("unchecked")
254 protected void saveFeedChannel(FeedChannel feedChannel, AggregateFilter aggregateFilter, Content dataNode) throws RepositoryException {
255 Content channelNode = recreateFeedChannelNode(feedChannel, dataNode);
256 List<SyndEntry> entries = feedChannel.getFeed().getEntries();
257 int size = entries.size();
258 for (int i = 0; i < size; i++) {
259 SyndEntry entry = entries.get(i);
260 String entryName = format("entry-%s", i);
261 if (aggregateFilter.include(entry)) {
262 createFeedChannelEntryNode(entry, entryName, channelNode);
263 }
264 }
265 }
266
267
268
269
270
271
272
273
274
275 protected Content recreateFeedChannelNode(FeedChannel feedChannel, Content dataNode) throws RepositoryException {
276 String channelName = feedChannel.getName();
277 if (dataNode.hasContent(channelName)) {
278 dataNode.delete(channelName);
279 }
280 Content channelNode = dataNode.createContent(channelName, new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
281 SyndFeed feed = feedChannel.getFeed();
282 channelNode.createNodeData("description", feed.getDescription());
283 channelNode.createNodeData("link", feed.getLink());
284 channelNode.createNodeData("rss", feedChannel.getUrl());
285 channelNode.createNodeData("title", !isEmpty(feedChannel.getTitle()) ? feedChannel.getTitle() : feed.getTitle());
286 channelNode.createNodeData("type", feed.getFeedType());
287 return channelNode;
288 }
289
290
291
292
293
294
295
296
297
298
299 protected void createFeedChannelEntryNode(SyndEntry entry, String nodeName, Content channelNode) throws RepositoryException {
300 Content entryNode = channelNode.createContent(nodeName, new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
301 entryNode.createNodeData("author", entry.getAuthor() == null ? "" : entry.getAuthor());
302 entryNode.createNodeData("channelTitle", channelNode.getNodeData("title").getString());
303 final SyndContent description = entry.getDescription();
304 String descriptionString = null;
305 if (description != null) {
306 descriptionString = description.getValue();
307 } else {
308 @SuppressWarnings("unchecked")
309 final List<SyndContent> contents = entry.getContents();
310 for (int i = 0; i < contents.size(); i++) {
311 SyndContent content = contents.get(i);
312 if ("html".equals(content.getType())) {
313 descriptionString = content.getValue();
314 break;
315 }
316 }
317 }
318 if (descriptionString == null) {
319 descriptionString = "";
320 }
321 entryNode.createNodeData("description", descriptionString);
322 entryNode.createNodeData("link", entry.getLink());
323 Date publishedDate = entry.getPublishedDate();
324 if (publishedDate == null) {
325 publishedDate = new Date();
326 }
327 entryNode.createNodeData("pubDate", publishedDate.getTime());
328 entryNode.createNodeData("title", entry.getTitle());
329
330 createCategoriesNode(entry, entryNode);
331 }
332
333 @SuppressWarnings("unchecked")
334 protected void createCategoriesNode(SyndEntry entry, Content entryNode) throws RepositoryException {
335 Content categoriesNode = entryNode.createContent("categories", new ItemType(DataConsts.MODULE_DATA_CONTENT_NODE_TYPE));
336 List<SyndCategory> categories = entry.getCategories();
337 for (int i = 0; i < categories.size(); i++) {
338 SyndCategory category = categories.get(i);
339 String categoryIndex = valueOf(i);
340 String categoryName = category.getName();
341 categoriesNode.createNodeData(categoryIndex, categoryName);
342 }
343 }
344
345
346
347 public void setFeedFetcher(RSSFeedFetcher rssFeedFetcher) {
348 Assert.notNull(rssFeedFetcher, "'rssFeedFetcher' must not be null");
349 this.feedFetcher = rssFeedFetcher;
350 }
351
352
353 protected void setAggregateFeedContentMapper(AggregateFeedContentMapper aggregateFeedMapper) {
354 Assert.notNull(aggregateFeedMapper, "'aggregateFeedContentMapper' must not be null");
355 this.aggregateFeedMapper = aggregateFeedMapper;
356 }
357
358
359 protected void setFilterPredicateContentMapper(FilterPredicateContentMapper filterPredicateMapper) {
360 Assert.notNull(filterPredicateMapper, "'filterPredicateContentMapper' must not be null");
361 this.filterPredicateMapper = filterPredicateMapper;
362 }
363
364
365
366
367
368
369 }