1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.generator;
35
36 import info.magnolia.commands.MgnlCommand;
37 import info.magnolia.context.Context;
38 import info.magnolia.context.MgnlContext;
39 import info.magnolia.jcr.util.NodeTypes;
40 import info.magnolia.jcr.util.NodeUtil;
41 import info.magnolia.jcr.util.PropertyUtil;
42 import info.magnolia.jcr.util.VersionUtil;
43 import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
44 import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
45 import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
46 import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
47 import info.magnolia.module.rssaggregator.importhandler.PlanetFilter;
48 import info.magnolia.module.rssaggregator.util.PlanetUtil;
49
50 import java.math.BigInteger;
51 import java.security.MessageDigest;
52 import java.util.HashSet;
53 import java.util.List;
54 import java.util.Set;
55
56 import javax.jcr.Node;
57 import javax.jcr.NodeIterator;
58 import javax.jcr.RepositoryException;
59 import javax.jcr.Session;
60
61 import org.apache.commons.lang.StringUtils;
62 import org.slf4j.Logger;
63 import org.slf4j.LoggerFactory;
64
65 import com.sun.syndication.feed.synd.SyndContentImpl;
66 import com.sun.syndication.feed.synd.SyndEntry;
67 import com.sun.syndication.feed.synd.SyndEntryImpl;
68
69
70
71
72
73
74 public class PlanetDataGenerator extends MgnlCommand {
75
76 private static final Logger log = LoggerFactory.getLogger(PlanetDataGenerator.class);
77
78 private static final String PLANET_DATANODE_NAME = "planetData";
79 private static final String POSTS_PREFIX = "posts-";
80 private static final String POSTS_FIRST = POSTS_PREFIX + "00000";
81 private static final String POST_ENTRY_PREFIX = "entry-";
82 private static final int MAX_NODE_ENTRIES = 999;
83
84 private Session session;
85
86 @Override
87 public boolean execute(Context context) throws Exception {
88 log.info("Starting command for Planet post archive.");
89 session = MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE);
90 generatePlanetFeedData(session.getRootNode());
91 log.info("Finished generating Planet archive.");
92 return true;
93 }
94
95
96
97
98 void generatePlanetFeedData(Node root) {
99 try {
100 NodeIterator feeds = root.getNodes();
101 while (feeds.hasNext()) {
102 Node feedOrFolderNode = feeds.nextNode();
103 if (NodeUtil.isNodeType(feedOrFolderNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
104 doGeneratePlanetDataForRSSAggregator(feedOrFolderNode);
105 } else if (NodeUtil.isNodeType(feedOrFolderNode, NodeTypes.Folder.NAME)) {
106 generatePlanetFeedData(feedOrFolderNode);
107 }
108 }
109 } catch (RepositoryException e) {
110 log.error("Problem while copying feed data for planet: " + e.getMessage());
111 }
112 }
113
114 private void doGeneratePlanetDataForRSSAggregator(Node feedNode) throws RepositoryException {
115
116 if (PlanetUtil.isPlanetNode(feedNode)) {
117 log.info("Storing data for planet feed " + feedNode.getName());
118
119 Node planetData = NodeUtil.createPath(feedNode, PLANET_DATANODE_NAME, NodeTypes.Content.NAME, true);
120
121 if (feedNode.hasNode("data")) {
122
123 NodeIterator channels = feedNode.getNode("data").getNodes();
124
125 Set<FilterPredicate> planetFilters = loadPlanetFilters(feedNode);
126
127 while (channels.hasNext()) {
128 Node channel = channels.nextNode();
129 processChannelEntries(planetData, channel, planetFilters);
130 }
131 }
132 } else {
133 log.info("Items of feed " + feedNode.getName() + " will not be archived because the feed is not marked as Planet feed.");
134 }
135 }
136
137
138
139
140
141
142
143
144 void processChannelEntries(Node planetData, Node channel, Set<FilterPredicate> planetFilters) {
145 try {
146 String postsParentNode = getPostsParent(planetData, channel.getNodes().getSize());
147
148 Node target = NodeUtil.createPath(planetData, postsParentNode, NodeTypes.Content.NAME, true);
149 long entryCount = target.getNodes().getSize();
150 NodeIterator entries = channel.getNodes();
151 while (entries.hasNext()) {
152 Node entry = entries.nextNode();
153 entryCount += 1;
154 createPlanetEntry(target, entry, entryCount, planetFilters);
155 }
156 } catch (RepositoryException e) {
157 log.error("Problem while processing channel entries: " + e.getMessage());
158 }
159 }
160
161
162
163
164
165
166
167
168
169 void createPlanetEntry(Node targetPath, Node srcEntry, long currEntry, Set<FilterPredicate> planetFilters) {
170 try {
171 String author = PlanetUtil.formatName(srcEntry, "author");
172 String channelTitle = PlanetUtil.formatName(srcEntry, "channelTitle");
173 String title = PropertyUtil.getString(srcEntry, "title", "");
174 String description;
175 if (StringUtils.isNotBlank(PropertyUtil.getString(srcEntry, "content", ""))) {
176 description = PropertyUtil.getString(srcEntry, "content", "");
177 } else {
178 description = PropertyUtil.getString(srcEntry, "description", "");
179 }
180
181 String link = PropertyUtil.getString(srcEntry, "link", "");
182 Long pubDate = srcEntry.hasProperty("pubDate") ? srcEntry.getProperty("pubDate").getLong() : null;
183
184
185 if ((StringUtils.isBlank(author) || StringUtils.isBlank(channelTitle)) || StringUtils.isBlank(title)
186 || StringUtils.isBlank(description) || StringUtils.isBlank(link) || pubDate == null) {
187 log.error("Found entry with missing mandatory attributes. The post will not be included in the planet archive.");
188 } else {
189
190 String checksum1 = getPostChecksum(author + channelTitle + title + description + link + String.valueOf(pubDate));
191 String checksum2 = getPostChecksum(author + description);
192
193
194 Node targetSibling = NodeUtil.getSiblingBefore(targetPath);
195
196
197 if (!postExists(targetPath, checksum1, checksum2) && !postExists(targetSibling, checksum1, checksum2)) {
198
199
200 if (includePost(planetFilters, srcEntry)) {
201 Node channelNode = srcEntry.getParent();
202 while (targetPath.hasNode(POST_ENTRY_PREFIX + currEntry)) {
203 currEntry++;
204 }
205 Node trgEntry = NodeUtil.createPath(targetPath, POST_ENTRY_PREFIX + currEntry, NodeTypes.Content.NAME, true);
206 PropertyUtil.setProperty(trgEntry, "checksum1", checksum1);
207 PropertyUtil.setProperty(trgEntry, "checksum2", checksum2);
208 PropertyUtil.setProperty(trgEntry, "author", author);
209 PropertyUtil.setProperty(trgEntry, "channelTitle", channelTitle);
210 PropertyUtil.setProperty(trgEntry, "title", title);
211 PropertyUtil.setProperty(trgEntry, "description", description);
212 PropertyUtil.setProperty(trgEntry, "link", link);
213 PropertyUtil.setProperty(trgEntry, "pubDate", pubDate);
214 PropertyUtil.setProperty(trgEntry, "authorLink", PropertyUtil.getString(channelNode, "link", ""));
215 PropertyUtil.setProperty(trgEntry, "rssLink", PropertyUtil.getString(channelNode, "rss", ""));
216 PropertyUtil.setProperty(trgEntry, "hidden", false);
217
218 session.save();
219 } else {
220 log.info("Post was not included because filter setting didn't match: " + StringUtils.abbreviate(title, 60));
221 }
222 } else {
223 log.info("Found already existing post: " + StringUtils.abbreviate(title, 60));
224 }
225 }
226 } catch (RepositoryException e) {
227 log.error("Problem while creating planet entry: " + e.getMessage());
228 }
229 }
230
231
232
233
234
235
236
237
238
239 boolean postExists(Node targetPath, String check1, String check2) {
240 boolean found = false;
241 if (targetPath != null) {
242 try {
243 NodeIterator targetEntries = targetPath.getNodes();
244 while (targetEntries.hasNext()) {
245 Node existing = targetEntries.nextNode();
246
247 String extCheck1 = "";
248 if (existing.hasProperty("checksum1")) {
249 extCheck1 = existing.getProperty("checksum1").getString();
250 }
251 String extCheck2 = "";
252 if (existing.hasProperty("checksum2")) {
253 extCheck2 = existing.getProperty("checksum2").getString();
254 }
255
256 if (StringUtils.equals(extCheck1, check1) || StringUtils.equals(extCheck2, check2)) {
257 found = true;
258 break;
259 }
260 }
261 } catch (RepositoryException e) {
262 log.error("Problem while searching for post: " + e.getMessage());
263 }
264 }
265 return found;
266 }
267
268 boolean includePost(Set<FilterPredicate> planetFilters, Node srcNode) {
269 if (planetFilters != null && planetFilters.size() > 0) {
270
271 try {
272 PlanetFilter planetFilter = new PlanetFilter(planetFilters);
273
274 SyndEntry entry = new SyndEntryImpl();
275 String author = PlanetUtil.formatName(srcNode, "author");
276 if (StringUtils.isNotBlank(author)) {
277 entry.setAuthor(author);
278 } else {
279 entry.setAuthor(PlanetUtil.formatName(srcNode, "channelTitle"));
280 }
281 entry.setTitle(PropertyUtil.getString(srcNode, "title"));
282 SyndContentImpl description = new SyndContentImpl();
283 description.setValue(PropertyUtil.getString(srcNode, "description", ""));
284 entry.setDescription(description);
285
286 return planetFilter.include(entry);
287 } catch (RepositoryException e) {
288 log.error("Problem while filtering planet feed content: " + e.getMessage());
289 }
290 }
291
292 return true;
293 }
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309 String getPostsParent(Node pdNode, long entryCount) {
310 String postsFolder = POSTS_FIRST;
311 int archCount;
312 try {
313
314 archCount = pdNode.getNodes().getSize() > 0 ? (int) (pdNode.getNodes().getSize() - 1) : 0;
315 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount), 5, "0");
316
317
318 Node postsNode = NodeUtil.createPath(pdNode, postsFolder, NodeTypes.Content.NAME, true);
319 long existingEntries = postsNode.getNodes().getSize();
320
321
322 if ((existingEntries + entryCount) > MAX_NODE_ENTRIES) {
323 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount + 1), 5, "0");
324 }
325 } catch (RepositoryException e) {
326 log.error("Problem while getting number of highest posts node: " + e.getMessage());
327 }
328 return postsFolder;
329 }
330
331
332
333
334
335
336
337 String getPostChecksum(String postContent) {
338 String checksum = null;
339 MessageDigest md;
340
341 try {
342 md = MessageDigest.getInstance("MD5");
343 md.reset();
344 md.update(postContent.getBytes(), 0, postContent.length());
345 checksum = new BigInteger(1, md.digest()).toString(16);
346 } catch (Exception e) {
347 log.error("Problem while creating checksum for post: " + e.getMessage());
348 }
349 return checksum;
350 }
351
352 Set<FilterPredicate> loadPlanetFilters(Node feedNode) {
353 Set<FilterPredicate> planetFilters = new HashSet<FilterPredicate>();
354
355 try {
356 Node filtersNode = feedNode.hasNode("filters") ? feedNode.getNode("filters") : null;
357
358 if (filtersNode != null) {
359 FilterPredicateContentMapper filterPredicateMapper = new FilterPredicateContentMapper();
360 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
361
362 for (Node n : filterNodes) {
363 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
364 if (filterPredicate == null) {
365 continue;
366 }
367 planetFilters.add(filterPredicate);
368 }
369 }
370 } catch (RepositoryException e) {
371 log.error("Problem while retrieving planet feed node filters: " + e.getMessage());
372 }
373
374 return planetFilters;
375 }
376
377 }