1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.generator;
35
36 import info.magnolia.cms.util.QueryUtil;
37 import info.magnolia.commands.MgnlCommand;
38 import info.magnolia.context.Context;
39 import info.magnolia.context.MgnlContext;
40 import info.magnolia.jcr.util.NodeTypes;
41 import info.magnolia.jcr.util.NodeUtil;
42 import info.magnolia.jcr.util.PropertyUtil;
43 import info.magnolia.jcr.util.VersionUtil;
44 import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
45 import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
46 import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
47 import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
48 import info.magnolia.module.rssaggregator.importhandler.PlanetFilter;
49 import info.magnolia.module.rssaggregator.util.PlanetUtil;
50
51 import java.math.BigInteger;
52 import java.security.MessageDigest;
53 import java.util.HashSet;
54 import java.util.List;
55 import java.util.Set;
56
57 import javax.jcr.Node;
58 import javax.jcr.NodeIterator;
59 import javax.jcr.RepositoryException;
60 import javax.jcr.Session;
61
62 import org.apache.commons.lang3.StringUtils;
63 import org.slf4j.Logger;
64 import org.slf4j.LoggerFactory;
65
66 import com.sun.syndication.feed.synd.SyndContentImpl;
67 import com.sun.syndication.feed.synd.SyndEntry;
68 import com.sun.syndication.feed.synd.SyndEntryImpl;
69
70
71
72
73 public class PlanetDataGenerator extends MgnlCommand {
74
75 private static final Logger log = LoggerFactory.getLogger(PlanetDataGenerator.class);
76
77 private static final String PLANET_DATANODE_NAME = "planetData";
78 private static final String POSTS_PREFIX = "posts-";
79 private static final String POSTS_FIRST = POSTS_PREFIX + "00000";
80 private static final String POST_ENTRY_PREFIX = "entry-";
81 private static final int MAX_NODE_ENTRIES = 999;
82
83 private Session session;
84
85 @Override
86 public boolean execute(Context context) throws Exception {
87 log.info("Starting command for Planet post archive.");
88 session = MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE);
89 generatePlanetFeedData(session.getRootNode());
90 log.info("Finished generating Planet archive.");
91 return true;
92 }
93
94
95
96
97 void generatePlanetFeedData(Node root) {
98 try {
99 NodeIterator feeds = root.getNodes();
100 while (feeds.hasNext()) {
101 Node feedOrFolderNode = feeds.nextNode();
102 if (NodeUtil.isNodeType(feedOrFolderNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
103 doGeneratePlanetDataForRSSAggregator(feedOrFolderNode);
104 } else if (NodeUtil.isNodeType(feedOrFolderNode, NodeTypes.Folder.NAME)) {
105 generatePlanetFeedData(feedOrFolderNode);
106 }
107 }
108 } catch (RepositoryException e) {
109 log.error("Problem while copying feed data for planet: " + e.getMessage());
110 }
111 }
112
113 private void doGeneratePlanetDataForRSSAggregator(Node feedNode) throws RepositoryException {
114
115 if (PlanetUtil.isPlanetNode(feedNode)) {
116 log.info("Storing data for planet feed " + feedNode.getName());
117
118 Node planetData = NodeUtil.createPath(feedNode, PLANET_DATANODE_NAME, NodeTypes.Content.NAME, true);
119
120 if (feedNode.hasNode("data")) {
121
122 NodeIterator channels = feedNode.getNode("data").getNodes();
123
124 Set<FilterPredicate> planetFilters = loadPlanetFilters(feedNode);
125
126 while (channels.hasNext()) {
127 Node channel = channels.nextNode();
128 processChannelEntries(planetData, channel, planetFilters);
129 }
130 }
131 } else {
132 log.info("Items of feed " + feedNode.getName() + " will not be archived because the feed is not marked as Planet feed.");
133 }
134 }
135
136
137
138
139
140
141
142
143 void processChannelEntries(Node planetData, Node channel, Set<FilterPredicate> planetFilters) {
144 try {
145 String postsParentNode = getPostsParent(planetData, NodeUtil.asList(NodeUtil.getNodes(channel)).size());
146
147 Node target = NodeUtil.createPath(planetData, postsParentNode, NodeTypes.Content.NAME, true);
148 long entryCount = target.getNodes().getSize();
149 NodeIterator entries = channel.getNodes();
150 while (entries.hasNext()) {
151 Node entry = entries.nextNode();
152 entryCount += 1;
153 createPlanetEntry(target, entry, entryCount, planetFilters);
154 }
155 } catch (RepositoryException e) {
156 log.error("Problem while processing channel entries: " + e.getMessage());
157 }
158 }
159
160
161
162
163
164
165
166
167
168 void createPlanetEntry(Node targetPath, Node srcEntry, long currEntry, Set<FilterPredicate> planetFilters) {
169 try {
170 String author = PlanetUtil.formatName(srcEntry, "author");
171 String channelTitle = PlanetUtil.formatName(srcEntry, "channelTitle");
172 String title = PropertyUtil.getString(srcEntry, "title", "");
173 String description;
174 if (StringUtils.isNotBlank(PropertyUtil.getString(srcEntry, "content", ""))) {
175 description = PropertyUtil.getString(srcEntry, "content", "");
176 } else {
177 description = PropertyUtil.getString(srcEntry, "description", "");
178 }
179
180 String link = PropertyUtil.getString(srcEntry, "link", "");
181 Long pubDate = srcEntry.hasProperty("pubDate") ? srcEntry.getProperty("pubDate").getLong() : null;
182
183
184 if (StringUtils.isNotBlank(author) && StringUtils.isNotBlank(channelTitle) && StringUtils.isNotBlank(title)
185 && StringUtils.isNotBlank(description) && StringUtils.isNotBlank(link) && pubDate != null) {
186
187 String checksum1 = getPostChecksum(author + channelTitle + title + description + link + String.valueOf(pubDate));
188 String checksum2 = getPostChecksum(description);
189
190
191 if (!postExists(targetPath, link, checksum1, checksum2)) {
192 if (includePost(planetFilters, srcEntry)) {
193 Node channelNode = srcEntry.getParent();
194 while (targetPath.hasNode(POST_ENTRY_PREFIX + currEntry)) {
195 currEntry++;
196 }
197 Node trgEntry = NodeUtil.createPath(targetPath, POST_ENTRY_PREFIX + currEntry, NodeTypes.Content.NAME, true);
198 PropertyUtil.setProperty(trgEntry, "checksum1", checksum1);
199 PropertyUtil.setProperty(trgEntry, "checksum2", checksum2);
200 PropertyUtil.setProperty(trgEntry, "author", author);
201 PropertyUtil.setProperty(trgEntry, "channelTitle", channelTitle);
202 PropertyUtil.setProperty(trgEntry, "title", title);
203 PropertyUtil.setProperty(trgEntry, "description", description);
204 PropertyUtil.setProperty(trgEntry, "link", link);
205 PropertyUtil.setProperty(trgEntry, "pubDate", pubDate);
206 PropertyUtil.setProperty(trgEntry, "authorLink", PropertyUtil.getString(channelNode, "link", ""));
207 PropertyUtil.setProperty(trgEntry, "rssLink", PropertyUtil.getString(channelNode, "rss", ""));
208 PropertyUtil.setProperty(trgEntry, "hidden", false);
209 log.info("Added new blog post: " + StringUtils.abbreviate(title, 60));
210
211 session.save();
212 }
213 }
214 }
215 } catch (RepositoryException e) {
216 log.error("Problem while creating planet entry: " + e.getMessage());
217 }
218 }
219
220
221
222
223
224
225
226
227
228
229 boolean postExists(Node targetPath, String link, String check1, String check2) {
230 boolean found = false;
231 if (targetPath != null) {
232 try {
233 Node planetDataNode = targetPath.getParent();
234 if (planetDataNode != null) {
235 String sql = "select * from [mgnl:content] as t where ISDESCENDANTNODE([" + planetDataNode.getPath() + "]) and (t.link='" + link + "'" +
236 " or t.checksum1='" + check1 + "' or t.checksum2='" + check2 + "')";
237
238 NodeIterator posts = QueryUtil.search(RSSAggregatorConstants.WORKSPACE, sql);
239 if (posts.hasNext() && posts.nextNode() != null) {
240 found = true;
241 }
242 }
243 } catch (RepositoryException e) {
244 log.error("Problem while searching for post: " + e.getMessage());
245 }
246 }
247 return found;
248 }
249
250 boolean includePost(Set<FilterPredicate> planetFilters, Node srcNode) {
251 if (planetFilters != null && planetFilters.size() > 0) {
252
253 try {
254 PlanetFilter planetFilter = new PlanetFilter(planetFilters);
255
256 SyndEntry entry = new SyndEntryImpl();
257 String author = PlanetUtil.formatName(srcNode, "author");
258 if (StringUtils.isNotBlank(author)) {
259 entry.setAuthor(author);
260 } else {
261 entry.setAuthor(PlanetUtil.formatName(srcNode, "channelTitle"));
262 }
263 entry.setTitle(PropertyUtil.getString(srcNode, "title"));
264 SyndContentImpl description = new SyndContentImpl();
265 description.setValue(PropertyUtil.getString(srcNode, "description", ""));
266 entry.setDescription(description);
267
268 return planetFilter.include(entry);
269 } catch (RepositoryException e) {
270 log.error("Problem while filtering planet feed content: " + e.getMessage());
271 }
272 }
273
274 return true;
275 }
276
277
278
279
280
281
282
283
284
285
286
287
288
289 String getPostsParent(Node pdNode, long entryCount) {
290 String postsFolder = POSTS_FIRST;
291 int archCount;
292 try {
293
294 archCount = NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() > 0 ? (int) (NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() - 1) : 0;
295 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount), 5, "0");
296
297
298 Node postsNode = NodeUtil.createPath(pdNode, postsFolder, NodeTypes.Content.NAME, true);
299 long existingEntries = postsNode.getNodes().getSize();
300
301
302 if (existingEntries + entryCount > MAX_NODE_ENTRIES) {
303 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount + 1), 5, "0");
304 }
305 } catch (RepositoryException e) {
306 log.error("Problem while getting number of highest posts node: " + e.getMessage());
307 }
308 return postsFolder;
309 }
310
311
312
313
314
315
316
317 String getPostChecksum(String postContent) {
318 String checksum = null;
319 MessageDigest md;
320
321 try {
322 md = MessageDigest.getInstance("MD5");
323 md.reset();
324 md.update(postContent.getBytes(), 0, postContent.length());
325 checksum = new BigInteger(1, md.digest()).toString(16);
326 } catch (Exception e) {
327 log.error("Problem while creating checksum for post: " + e.getMessage());
328 }
329 return checksum;
330 }
331
332 Set<FilterPredicate> loadPlanetFilters(Node feedNode) {
333 Set<FilterPredicate> planetFilters = new HashSet<FilterPredicate>();
334
335 try {
336 Node filtersNode = feedNode.hasNode("filters") ? feedNode.getNode("filters") : null;
337
338 if (filtersNode != null) {
339 FilterPredicateContentMapper filterPredicateMapper = new FilterPredicateContentMapper();
340 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
341
342 for (Node n : filterNodes) {
343 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
344 if (filterPredicate == null) {
345 continue;
346 }
347 planetFilters.add(filterPredicate);
348 }
349 }
350 } catch (RepositoryException e) {
351 log.error("Problem while retrieving planet feed node filters: " + e.getMessage());
352 }
353
354 return planetFilters;
355 }
356
357 }