1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.generator;
35
36 import info.magnolia.cms.util.QueryUtil;
37 import info.magnolia.commands.MgnlCommand;
38 import info.magnolia.context.Context;
39 import info.magnolia.context.MgnlContext;
40 import info.magnolia.jcr.util.NodeTypes;
41 import info.magnolia.jcr.util.NodeUtil;
42 import info.magnolia.jcr.util.PropertyUtil;
43 import info.magnolia.jcr.util.VersionUtil;
44 import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
45 import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
46 import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
47 import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
48 import info.magnolia.module.rssaggregator.importhandler.PlanetFilter;
49 import info.magnolia.module.rssaggregator.util.PlanetUtil;
50
51 import java.math.BigInteger;
52 import java.security.MessageDigest;
53 import java.util.HashSet;
54 import java.util.List;
55 import java.util.Set;
56
57 import javax.jcr.Node;
58 import javax.jcr.NodeIterator;
59 import javax.jcr.RepositoryException;
60 import javax.jcr.Session;
61
62 import org.apache.commons.lang.StringUtils;
63 import org.slf4j.Logger;
64 import org.slf4j.LoggerFactory;
65
66 import com.sun.syndication.feed.synd.SyndContentImpl;
67 import com.sun.syndication.feed.synd.SyndEntry;
68 import com.sun.syndication.feed.synd.SyndEntryImpl;
69
70
71
72
73
74
75 public class PlanetDataGenerator extends MgnlCommand {
76
77 private static final Logger log = LoggerFactory.getLogger(PlanetDataGenerator.class);
78
79 private static final String PLANET_DATANODE_NAME = "planetData";
80 private static final String POSTS_PREFIX = "posts-";
81 private static final String POSTS_FIRST = POSTS_PREFIX + "00000";
82 private static final String POST_ENTRY_PREFIX = "entry-";
83 private static final int MAX_NODE_ENTRIES = 999;
84
85 private Session session;
86
87 @Override
88 public boolean execute(Context context) throws Exception {
89 log.info("Starting command for Planet post archive.");
90 session = MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE);
91 generatePlanetFeedData(session.getRootNode());
92 log.info("Finished generating Planet archive.");
93 return true;
94 }
95
96
97
98
99 void generatePlanetFeedData(Node root) {
100 try {
101 NodeIterator feeds = root.getNodes();
102 while (feeds.hasNext()) {
103 Node feedOrFolderNode = feeds.nextNode();
104 if (NodeUtil.isNodeType(feedOrFolderNode, RSSAggregatorNodeTypes.RSSAggregator.NAME)) {
105 doGeneratePlanetDataForRSSAggregator(feedOrFolderNode);
106 } else if (NodeUtil.isNodeType(feedOrFolderNode, NodeTypes.Folder.NAME)) {
107 generatePlanetFeedData(feedOrFolderNode);
108 }
109 }
110 } catch (RepositoryException e) {
111 log.error("Problem while copying feed data for planet: " + e.getMessage());
112 }
113 }
114
115 private void doGeneratePlanetDataForRSSAggregator(Node feedNode) throws RepositoryException {
116
117 if (PlanetUtil.isPlanetNode(feedNode)) {
118 log.info("Storing data for planet feed " + feedNode.getName());
119
120 Node planetData = NodeUtil.createPath(feedNode, PLANET_DATANODE_NAME, NodeTypes.Content.NAME, true);
121
122 if (feedNode.hasNode("data")) {
123
124 NodeIterator channels = feedNode.getNode("data").getNodes();
125
126 Set<FilterPredicate> planetFilters = loadPlanetFilters(feedNode);
127
128 while (channels.hasNext()) {
129 Node channel = channels.nextNode();
130 processChannelEntries(planetData, channel, planetFilters);
131 }
132 }
133 } else {
134 log.info("Items of feed " + feedNode.getName() + " will not be archived because the feed is not marked as Planet feed.");
135 }
136 }
137
138
139
140
141
142
143
144
145 void processChannelEntries(Node planetData, Node channel, Set<FilterPredicate> planetFilters) {
146 try {
147 String postsParentNode = getPostsParent(planetData, NodeUtil.asList(NodeUtil.getNodes(channel)).size());
148
149 Node target = NodeUtil.createPath(planetData, postsParentNode, NodeTypes.Content.NAME, true);
150 long entryCount = target.getNodes().getSize();
151 NodeIterator entries = channel.getNodes();
152 while (entries.hasNext()) {
153 Node entry = entries.nextNode();
154 entryCount += 1;
155 createPlanetEntry(target, entry, entryCount, planetFilters);
156 }
157 } catch (RepositoryException e) {
158 log.error("Problem while processing channel entries: " + e.getMessage());
159 }
160 }
161
162
163
164
165
166
167
168
169
170 void createPlanetEntry(Node targetPath, Node srcEntry, long currEntry, Set<FilterPredicate> planetFilters) {
171 try {
172 String author = PlanetUtil.formatName(srcEntry, "author");
173 String channelTitle = PlanetUtil.formatName(srcEntry, "channelTitle");
174 String title = PropertyUtil.getString(srcEntry, "title", "");
175 String description;
176 if (StringUtils.isNotBlank(PropertyUtil.getString(srcEntry, "content", ""))) {
177 description = PropertyUtil.getString(srcEntry, "content", "");
178 } else {
179 description = PropertyUtil.getString(srcEntry, "description", "");
180 }
181
182 String link = PropertyUtil.getString(srcEntry, "link", "");
183 Long pubDate = srcEntry.hasProperty("pubDate") ? srcEntry.getProperty("pubDate").getLong() : null;
184
185
186 if (StringUtils.isNotBlank(author) && StringUtils.isNotBlank(channelTitle) && StringUtils.isNotBlank(title)
187 && StringUtils.isNotBlank(description) && StringUtils.isNotBlank(link) && pubDate != null) {
188
189 String checksum1 = getPostChecksum(author + channelTitle + title + description + link + String.valueOf(pubDate));
190 String checksum2 = getPostChecksum(description);
191
192
193 if (!postExists(targetPath, link, checksum1, checksum2)) {
194 if (includePost(planetFilters, srcEntry)) {
195 Node channelNode = srcEntry.getParent();
196 while (targetPath.hasNode(POST_ENTRY_PREFIX + currEntry)) {
197 currEntry++;
198 }
199 Node trgEntry = NodeUtil.createPath(targetPath, POST_ENTRY_PREFIX + currEntry, NodeTypes.Content.NAME, true);
200 PropertyUtil.setProperty(trgEntry, "checksum1", checksum1);
201 PropertyUtil.setProperty(trgEntry, "checksum2", checksum2);
202 PropertyUtil.setProperty(trgEntry, "author", author);
203 PropertyUtil.setProperty(trgEntry, "channelTitle", channelTitle);
204 PropertyUtil.setProperty(trgEntry, "title", title);
205 PropertyUtil.setProperty(trgEntry, "description", description);
206 PropertyUtil.setProperty(trgEntry, "link", link);
207 PropertyUtil.setProperty(trgEntry, "pubDate", pubDate);
208 PropertyUtil.setProperty(trgEntry, "authorLink", PropertyUtil.getString(channelNode, "link", ""));
209 PropertyUtil.setProperty(trgEntry, "rssLink", PropertyUtil.getString(channelNode, "rss", ""));
210 PropertyUtil.setProperty(trgEntry, "hidden", false);
211 log.info("Added new blog post: " + StringUtils.abbreviate(title, 60));
212
213 session.save();
214 }
215 }
216 }
217 } catch (RepositoryException e) {
218 log.error("Problem while creating planet entry: " + e.getMessage());
219 }
220 }
221
222
223
224
225
226
227
228
229
230
231 boolean postExists(Node targetPath, String link, String check1, String check2) {
232 boolean found = false;
233 if (targetPath != null) {
234 try {
235 Node planetDataNode = targetPath.getParent();
236 if (planetDataNode != null) {
237 String sql = "select * from [mgnl:content] as t where ISDESCENDANTNODE([" + planetDataNode.getPath() + "]) and (t.link='" + link + "'" +
238 " or t.checksum1='" + check1 + "' or t.checksum2='" + check2 + "')";
239
240 NodeIterator posts = QueryUtil.search(RSSAggregatorConstants.WORKSPACE, sql);
241 if (posts.hasNext() && posts.nextNode() != null) {
242 found = true;
243 }
244 }
245 } catch (RepositoryException e) {
246 log.error("Problem while searching for post: " + e.getMessage());
247 }
248 }
249 return found;
250 }
251
252 boolean includePost(Set<FilterPredicate> planetFilters, Node srcNode) {
253 if (planetFilters != null && planetFilters.size() > 0) {
254
255 try {
256 PlanetFilter planetFilter = new PlanetFilter(planetFilters);
257
258 SyndEntry entry = new SyndEntryImpl();
259 String author = PlanetUtil.formatName(srcNode, "author");
260 if (StringUtils.isNotBlank(author)) {
261 entry.setAuthor(author);
262 } else {
263 entry.setAuthor(PlanetUtil.formatName(srcNode, "channelTitle"));
264 }
265 entry.setTitle(PropertyUtil.getString(srcNode, "title"));
266 SyndContentImpl description = new SyndContentImpl();
267 description.setValue(PropertyUtil.getString(srcNode, "description", ""));
268 entry.setDescription(description);
269
270 return planetFilter.include(entry);
271 } catch (RepositoryException e) {
272 log.error("Problem while filtering planet feed content: " + e.getMessage());
273 }
274 }
275
276 return true;
277 }
278
279
280
281
282
283
284
285
286
287
288
289
290
291 String getPostsParent(Node pdNode, long entryCount) {
292 String postsFolder = POSTS_FIRST;
293 int archCount;
294 try {
295
296 archCount = NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() > 0 ? (int) (NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() - 1) : 0;
297 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount), 5, "0");
298
299
300 Node postsNode = NodeUtil.createPath(pdNode, postsFolder, NodeTypes.Content.NAME, true);
301 long existingEntries = postsNode.getNodes().getSize();
302
303
304 if (existingEntries + entryCount > MAX_NODE_ENTRIES) {
305 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount + 1), 5, "0");
306 }
307 } catch (RepositoryException e) {
308 log.error("Problem while getting number of highest posts node: " + e.getMessage());
309 }
310 return postsFolder;
311 }
312
313
314
315
316
317
318
319 String getPostChecksum(String postContent) {
320 String checksum = null;
321 MessageDigest md;
322
323 try {
324 md = MessageDigest.getInstance("MD5");
325 md.reset();
326 md.update(postContent.getBytes(), 0, postContent.length());
327 checksum = new BigInteger(1, md.digest()).toString(16);
328 } catch (Exception e) {
329 log.error("Problem while creating checksum for post: " + e.getMessage());
330 }
331 return checksum;
332 }
333
334 Set<FilterPredicate> loadPlanetFilters(Node feedNode) {
335 Set<FilterPredicate> planetFilters = new HashSet<FilterPredicate>();
336
337 try {
338 Node filtersNode = feedNode.hasNode("filters") ? feedNode.getNode("filters") : null;
339
340 if (filtersNode != null) {
341 FilterPredicateContentMapper filterPredicateMapper = new FilterPredicateContentMapper();
342 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
343
344 for (Node n : filterNodes) {
345 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
346 if (filterPredicate == null) {
347 continue;
348 }
349 planetFilters.add(filterPredicate);
350 }
351 }
352 } catch (RepositoryException e) {
353 log.error("Problem while retrieving planet feed node filters: " + e.getMessage());
354 }
355
356 return planetFilters;
357 }
358
359 }