1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.generator;
35
36 import info.magnolia.commands.MgnlCommand;
37 import info.magnolia.context.Context;
38 import info.magnolia.context.MgnlContext;
39 import info.magnolia.jcr.util.NodeUtil;
40 import info.magnolia.jcr.util.PropertyUtil;
41 import info.magnolia.jcr.util.VersionUtil;
42 import info.magnolia.module.data.DataConsts;
43 import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
44 import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
45 import info.magnolia.module.rssaggregator.importhandler.PlanetFilter;
46 import info.magnolia.module.rssaggregator.util.PlanetUtil;
47
48 import java.math.BigInteger;
49 import java.security.MessageDigest;
50 import java.util.HashSet;
51 import java.util.List;
52 import java.util.Set;
53
54 import javax.jcr.Node;
55 import javax.jcr.NodeIterator;
56 import javax.jcr.RepositoryException;
57 import javax.jcr.Session;
58
59 import org.apache.commons.lang.StringUtils;
60 import org.slf4j.Logger;
61 import org.slf4j.LoggerFactory;
62
63 import com.sun.syndication.feed.synd.SyndContentImpl;
64 import com.sun.syndication.feed.synd.SyndEntry;
65 import com.sun.syndication.feed.synd.SyndEntryImpl;
66
67
68
69
70
71
72 public class PlanetDataGenerator extends MgnlCommand {
73
74 private static final Logger log = LoggerFactory.getLogger(PlanetDataGenerator.class);
75
76 private static final String PLANET_DATANODE_NAME = "planetData";
77 private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
78 private static final String POSTS_PREFIX = "posts-";
79 private static final String POSTS_FIRST = POSTS_PREFIX + "00000";
80 private static final String POST_ENTRY_PREFIX = "entry-";
81 private static final int MAX_NODE_ENTRIES = 999;
82
83 private Session session;
84
85 @Override
86 public boolean execute(Context context) throws Exception {
87 log.info("Starting command for Planet post archive.");
88
89
90 session = MgnlContext.getSystemContext().getJCRSession("data");
91
92 generatePlanetFeedData();
93
94 log.info("Finished generating Planet archive.");
95
96 return true;
97 }
98
99
100
101
102 void generatePlanetFeedData() {
103 try {
104
105 NodeIterator feeds = session.getNode("/rssaggregator").getNodes();
106
107 while (feeds.hasNext()) {
108 Node feedNode = feeds.nextNode();
109 if (NodeUtil.isNodeType(feedNode, CONTENTTYPE_RSSAGGREGATOR)) {
110
111 if (PlanetUtil.isPlanetNode(feedNode)) {
112 log.info("Storing data for planet feed " + feedNode.getName());
113
114 Node planetData = NodeUtil.createPath(feedNode, PLANET_DATANODE_NAME, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
115
116 if (feedNode.hasNode("data")) {
117
118 NodeIterator channels = feedNode.getNode("data").getNodes();
119
120 Set<FilterPredicate> planetFilters = loadPlanetFilters(feedNode);
121
122 while (channels.hasNext()) {
123 Node channel = channels.nextNode();
124 processChannelEntries(planetData, channel, planetFilters);
125 }
126 }
127 } else {
128 log.info("Items of feed " + feedNode.getName() + " will not be archived because the feed is not marked as Planet feed.");
129 }
130 }
131 }
132 } catch (RepositoryException e) {
133 log.error("Problem while copying feed data for planet: " + e.getMessage());
134 }
135 }
136
137
138
139
140
141
142
143
144 void processChannelEntries(Node planetData, Node channel, Set<FilterPredicate> planetFilters) {
145 try {
146 String postsParentNode = getPostsParent(planetData, channel.getNodes().getSize());
147
148 Node target = NodeUtil.createPath(planetData, postsParentNode, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
149 long entryCount = target.getNodes().getSize();
150 NodeIterator entries = channel.getNodes();
151 while (entries.hasNext()) {
152 Node entry = entries.nextNode();
153 entryCount += 1;
154 createPlanetEntry(target, entry, entryCount, planetFilters);
155 }
156 } catch (RepositoryException e) {
157 log.error("Problem while processing channel entries: " + e.getMessage());
158 }
159 }
160
161
162
163
164
165
166
167
168
169 void createPlanetEntry(Node targetPath, Node srcEntry, long currEntry, Set<FilterPredicate> planetFilters) {
170 try {
171 String author = PlanetUtil.formatName(srcEntry, "author");
172 String channelTitle = PlanetUtil.formatName(srcEntry, "channelTitle");
173 String title = PropertyUtil.getString(srcEntry, "title", "");
174 String description;
175 if (StringUtils.isNotBlank(PropertyUtil.getString(srcEntry, "content", ""))) {
176 description = PropertyUtil.getString(srcEntry, "content", "");
177 } else {
178 description = PropertyUtil.getString(srcEntry, "description", "");
179 }
180
181 String link = PropertyUtil.getString(srcEntry, "link", "");
182 Long pubDate = srcEntry.hasProperty("pubDate") ? srcEntry.getProperty("pubDate").getLong() : null;
183
184
185 if ((StringUtils.isBlank(author) || StringUtils.isBlank(channelTitle)) || StringUtils.isBlank(title)
186 || StringUtils.isBlank(description) || StringUtils.isBlank(link) || pubDate == null) {
187 log.error("Found entry with missing mandatory attributes. The post will not be included in the planet archive.");
188 } else {
189
190 String checksum1 = getPostChecksum(author + channelTitle + title + description + link + String.valueOf(pubDate));
191 String checksum2 = getPostChecksum(author + description);
192
193
194 Node targetSibling = NodeUtil.getSiblingBefore(targetPath);
195
196
197 if (!postExists(targetPath, checksum1, checksum2) && !postExists(targetSibling, checksum1, checksum2)) {
198
199
200 if (includePost(planetFilters, srcEntry)) {
201 Node channelNode = srcEntry.getParent();
202
203 Node trgEntry = NodeUtil.createPath(targetPath, POST_ENTRY_PREFIX + currEntry, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
204 PropertyUtil.setProperty(trgEntry, "checksum1", checksum1);
205 PropertyUtil.setProperty(trgEntry, "checksum2", checksum2);
206 PropertyUtil.setProperty(trgEntry, "author", author);
207 PropertyUtil.setProperty(trgEntry, "channelTitle", channelTitle);
208 PropertyUtil.setProperty(trgEntry, "title", title);
209 PropertyUtil.setProperty(trgEntry, "description", description);
210 PropertyUtil.setProperty(trgEntry, "link", link);
211 PropertyUtil.setProperty(trgEntry, "pubDate", pubDate);
212 PropertyUtil.setProperty(trgEntry, "authorLink", PropertyUtil.getString(channelNode, "link", ""));
213 PropertyUtil.setProperty(trgEntry, "rssLink", PropertyUtil.getString(channelNode, "rss", ""));
214 PropertyUtil.setProperty(trgEntry, "hidden", false);
215
216 session.save();
217 } else {
218 log.info("Post was not included because filter setting didn't match: " + StringUtils.abbreviate(title, 60));
219 }
220 } else {
221 log.info("Found already existing post: " + StringUtils.abbreviate(title, 60));
222 }
223 }
224 } catch (RepositoryException e) {
225 log.error("Problem while creating planet entry: " + e.getMessage());
226 }
227 }
228
229
230
231
232
233
234
235
236
237 boolean postExists(Node targetPath, String check1, String check2) {
238 boolean found = false;
239 if (targetPath != null) {
240 try {
241 NodeIterator targetEntries = targetPath.getNodes();
242 while (targetEntries.hasNext()) {
243 Node existing = targetEntries.nextNode();
244
245 String extCheck1 = "";
246 if (existing.hasProperty("checksum1")) {
247 extCheck1 = existing.getProperty("checksum1").getString();
248 }
249 String extCheck2 = "";
250 if (existing.hasProperty("checksum2")) {
251 extCheck2 = existing.getProperty("checksum2").getString();
252 }
253
254 if (StringUtils.equals(extCheck1, check1) || StringUtils.equals(extCheck2, check2)) {
255 found = true;
256 break;
257 }
258 }
259 } catch (RepositoryException e) {
260 log.error("Problem while searching for post: " + e.getMessage());
261 }
262 }
263 return found;
264 }
265
266 boolean includePost(Set<FilterPredicate> planetFilters, Node srcNode) {
267 if (planetFilters != null && planetFilters.size() > 0) {
268
269 try {
270 PlanetFilter planetFilter = new PlanetFilter(planetFilters);
271
272 SyndEntry entry = new SyndEntryImpl();
273 String author = PlanetUtil.formatName(srcNode, "author");
274 if (StringUtils.isNotBlank(author)) {
275 entry.setAuthor(author);
276 } else {
277 entry.setAuthor(PlanetUtil.formatName(srcNode, "channelTitle"));
278 }
279 entry.setTitle(PropertyUtil.getString(srcNode, "title"));
280 SyndContentImpl description = new SyndContentImpl();
281 description.setValue(PropertyUtil.getString(srcNode, "description", ""));
282 entry.setDescription(description);
283
284 return planetFilter.include(entry);
285 } catch (RepositoryException e) {
286 log.error("Problem while filtering planet feed content: " + e.getMessage());
287 }
288 }
289
290 return true;
291 }
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307 String getPostsParent(Node pdNode, long entryCount) {
308 String postsFolder = POSTS_FIRST;
309 int archCount;
310 try {
311
312 archCount = pdNode.getNodes().getSize() > 0 ? (int) (pdNode.getNodes().getSize() - 1) : 0;
313 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount), 5, "0");
314
315
316 Node postsNode = NodeUtil.createPath(pdNode, postsFolder, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
317 long existingEntries = postsNode.getNodes().getSize();
318
319
320 if ((existingEntries + entryCount) > MAX_NODE_ENTRIES) {
321 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount + 1), 5, "0");
322 }
323 } catch (RepositoryException e) {
324 log.error("Problem while getting number of highest posts node: " + e.getMessage());
325 }
326 return postsFolder;
327 }
328
329
330
331
332
333
334
335 String getPostChecksum(String postContent) {
336 String checksum = null;
337 MessageDigest md;
338
339 try {
340 md = MessageDigest.getInstance("MD5");
341 md.reset();
342 md.update(postContent.getBytes(), 0, postContent.length());
343 checksum = new BigInteger(1, md.digest()).toString(16);
344 } catch (Exception e) {
345 log.error("Problem while creating checksum for post: " + e.getMessage());
346 }
347 return checksum;
348 }
349
350 Set<FilterPredicate> loadPlanetFilters(Node feedNode) {
351 Set<FilterPredicate> planetFilters = new HashSet<FilterPredicate>();
352
353 try {
354 Node filtersNode = feedNode.hasNode("filters") ? feedNode.getNode("filters") : null;
355
356 if (filtersNode != null) {
357 FilterPredicateContentMapper filterPredicateMapper = new FilterPredicateContentMapper();
358 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
359
360 for (Node n : filterNodes) {
361 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
362 if (filterPredicate == null) {
363 continue;
364 }
365 planetFilters.add(filterPredicate);
366 }
367 }
368 } catch (RepositoryException e) {
369 log.error("Problem while retrieving planet feed node filters: " + e.getMessage());
370 }
371
372 return planetFilters;
373 }
374
375 }