1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.module.rssaggregator.generator;
35
36 import info.magnolia.cms.util.QueryUtil;
37 import info.magnolia.commands.MgnlCommand;
38 import info.magnolia.context.Context;
39 import info.magnolia.context.MgnlContext;
40 import info.magnolia.jcr.util.NodeUtil;
41 import info.magnolia.jcr.util.PropertyUtil;
42 import info.magnolia.jcr.util.VersionUtil;
43 import info.magnolia.module.data.DataConsts;
44 import info.magnolia.module.rssaggregator.importhandler.FilterPredicate;
45 import info.magnolia.module.rssaggregator.importhandler.FilterPredicateContentMapper;
46 import info.magnolia.module.rssaggregator.importhandler.PlanetFilter;
47 import info.magnolia.module.rssaggregator.util.PlanetUtil;
48
49 import java.math.BigInteger;
50 import java.security.MessageDigest;
51 import java.util.HashSet;
52 import java.util.Iterator;
53 import java.util.List;
54 import java.util.Set;
55
56 import javax.jcr.Node;
57 import javax.jcr.NodeIterator;
58 import javax.jcr.RepositoryException;
59 import javax.jcr.Session;
60
61 import org.apache.commons.lang.StringUtils;
62 import org.slf4j.Logger;
63 import org.slf4j.LoggerFactory;
64
65 import com.sun.syndication.feed.synd.SyndContentImpl;
66 import com.sun.syndication.feed.synd.SyndEntry;
67 import com.sun.syndication.feed.synd.SyndEntryImpl;
68
69
70
71
72
73
74 public class PlanetDataGenerator extends MgnlCommand {
75
76 private static final Logger log = LoggerFactory.getLogger(PlanetDataGenerator.class);
77
78 private static final String PLANET_DATANODE_NAME = "planetData";
79 private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
80 private static final String POSTS_PREFIX = "posts-";
81 private static final String POSTS_FIRST = POSTS_PREFIX + "00000";
82 private static final String POST_ENTRY_PREFIX = "entry-";
83 private static final int MAX_NODE_ENTRIES = 999;
84
85 private Session session;
86
87 @Override
88 public boolean execute(Context context) throws Exception {
89 log.info("Starting command for Planet post archive.");
90
91
92 session = MgnlContext.getSystemContext().getJCRSession("data");
93
94 generatePlanetFeedData();
95
96 log.info("Finished generating Planet archive.");
97
98 return true;
99 }
100
101
102
103
104 void generatePlanetFeedData() {
105 try {
106 Iterator<Node> feeds = NodeUtil.getNodes(session.getNode("/rssaggregator")).iterator();
107
108 while (feeds.hasNext()) {
109 Node feedNode = feeds.next();
110 if (NodeUtil.isNodeType(feedNode, CONTENTTYPE_RSSAGGREGATOR)) {
111
112 if (PlanetUtil.isPlanetNode(feedNode)) {
113 log.info("Storing data for planet feed " + feedNode.getName());
114
115 Node planetData = NodeUtil.createPath(feedNode, PLANET_DATANODE_NAME, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
116
117 if (feedNode.hasNode("data")) {
118
119 Iterator<Node> channels = NodeUtil.getNodes(feedNode.getNode("data")).iterator();
120
121 Set<FilterPredicate> planetFilters = loadPlanetFilters(feedNode);
122
123 while (channels.hasNext()) {
124 Node channel = channels.next();
125 processChannelEntries(planetData, channel, planetFilters);
126 }
127 }
128 }
129 }
130 }
131 } catch (RepositoryException e) {
132 log.error("Problem while copying feed data for planet: " + e.getMessage());
133 }
134 }
135
136
137
138
139
140
141
142
143 void processChannelEntries(Node planetData, Node channel, Set<FilterPredicate> planetFilters) {
144 try {
145 String postsParentNode = getPostsParent(planetData, NodeUtil.asList(NodeUtil.getNodes(channel)).size());
146
147 Node target = NodeUtil.createPath(planetData, postsParentNode, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
148 long entryCount = NodeUtil.asList(NodeUtil.getNodes(target)).size();
149 Iterator<Node> entries = NodeUtil.getNodes(channel).iterator();
150 while (entries.hasNext()) {
151 Node entry = entries.next();
152 entryCount += 1;
153 createPlanetEntry(target, entry, entryCount, planetFilters);
154 }
155 } catch (RepositoryException e) {
156 log.error("Problem while processing channel entries: " + e.getMessage());
157 }
158 }
159
160
161
162
163
164
165
166
167
168 void createPlanetEntry(Node targetPath, Node srcEntry, long currEntry, Set<FilterPredicate> planetFilters) {
169 try {
170 String author = PlanetUtil.formatName(srcEntry, "author");
171 String channelTitle = PlanetUtil.formatName(srcEntry, "channelTitle");
172 String title = PropertyUtil.getString(srcEntry, "title", "");
173 String description;
174 if (StringUtils.isNotBlank(PropertyUtil.getString(srcEntry, "content", ""))) {
175 description = PropertyUtil.getString(srcEntry, "content", "");
176 } else {
177 description = PropertyUtil.getString(srcEntry, "description", "");
178 }
179
180 String link = PropertyUtil.getString(srcEntry, "link", "");
181 Long pubDate = srcEntry.hasProperty("pubDate") ? srcEntry.getProperty("pubDate").getLong() : null;
182
183
184 if (StringUtils.isNotBlank(author) && StringUtils.isNotBlank(channelTitle) && StringUtils.isNotBlank(title)
185 && StringUtils.isNotBlank(description) && StringUtils.isNotBlank(link) && pubDate != null) {
186
187 String checksum1 = getPostChecksum(author + channelTitle + title + description + link + String.valueOf(pubDate));
188 String checksum2 = getPostChecksum(description);
189
190
191 if (!postExists(targetPath, link, checksum1, checksum2)) {
192 if (includePost(planetFilters, srcEntry)) {
193 Node channelNode = srcEntry.getParent();
194 while (targetPath.hasNode(POST_ENTRY_PREFIX + currEntry)) {
195 currEntry++;
196 }
197 Node trgEntry = NodeUtil.createPath(targetPath, POST_ENTRY_PREFIX + currEntry, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
198 PropertyUtil.setProperty(trgEntry, "checksum1", checksum1);
199 PropertyUtil.setProperty(trgEntry, "checksum2", checksum2);
200 PropertyUtil.setProperty(trgEntry, "author", author);
201 PropertyUtil.setProperty(trgEntry, "channelTitle", channelTitle);
202 PropertyUtil.setProperty(trgEntry, "title", title);
203 PropertyUtil.setProperty(trgEntry, "description", description);
204 PropertyUtil.setProperty(trgEntry, "link", link);
205 PropertyUtil.setProperty(trgEntry, "pubDate", pubDate);
206 PropertyUtil.setProperty(trgEntry, "authorLink", PropertyUtil.getString(channelNode, "link", ""));
207 PropertyUtil.setProperty(trgEntry, "rssLink", PropertyUtil.getString(channelNode, "rss", ""));
208 PropertyUtil.setProperty(trgEntry, "hidden", false);
209 log.info("Added new blog post: " + StringUtils.abbreviate(title, 60));
210
211 session.save();
212 }
213 }
214 }
215 } catch (RepositoryException e) {
216 log.error("Problem while creating planet entry: " + e.getMessage());
217 }
218 }
219
220
221
222
223
224
225
226
227
228
229 boolean postExists(Node targetPath, String link, String check1, String check2) {
230 boolean found = false;
231 if (targetPath != null) {
232 try {
233 Node planetDataNode = targetPath.getParent();
234 if (planetDataNode != null) {
235 String sql = "select * from [dataItemNode] as t where ISDESCENDANTNODE([" + planetDataNode.getPath() + "]) and (t.link='" + link + "'" +
236 " or t.checksum1='" + check1 + "' or t.checksum2='" + check2 + "')";
237
238 NodeIterator posts = QueryUtil.search("data", sql);
239 if (posts.hasNext() && posts.nextNode() != null) {
240 found = true;
241 }
242 }
243 } catch (RepositoryException e) {
244 log.error("Problem while searching for post: " + e.getMessage());
245 }
246 }
247 return found;
248 }
249
250 boolean includePost(Set<FilterPredicate> planetFilters, Node srcNode) {
251 if (planetFilters != null && planetFilters.size() > 0) {
252
253 try {
254 PlanetFilter planetFilter = new PlanetFilter(planetFilters);
255
256 SyndEntry entry = new SyndEntryImpl();
257 String author = PlanetUtil.formatName(srcNode, "author");
258 if (StringUtils.isNotBlank(author)) {
259 entry.setAuthor(author);
260 } else {
261 entry.setAuthor(PlanetUtil.formatName(srcNode, "channelTitle"));
262 }
263 entry.setTitle(PropertyUtil.getString(srcNode, "title"));
264 SyndContentImpl description = new SyndContentImpl();
265 description.setValue(PropertyUtil.getString(srcNode, "description", ""));
266 entry.setDescription(description);
267
268 return planetFilter.include(entry);
269 } catch (RepositoryException e) {
270 log.error("Problem while filtering planet feed content: " + e.getMessage());
271 }
272 }
273
274 return true;
275 }
276
277
278
279
280
281
282
283
284
285
286
287
288
289 String getPostsParent(Node pdNode, long entryCount) {
290 String postsFolder = POSTS_FIRST;
291 int archCount;
292 try {
293
294 archCount = NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() > 0 ? (int) (NodeUtil.asList(NodeUtil.getNodes(pdNode)).size() - 1) : 0;
295 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount), 5, "0");
296
297
298 Node postsNode = NodeUtil.createPath(pdNode, postsFolder, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
299 long existingEntries = NodeUtil.asList(NodeUtil.getNodes(postsNode)).size();
300
301
302 if (existingEntries + entryCount > MAX_NODE_ENTRIES) {
303 postsFolder = POSTS_PREFIX + StringUtils.leftPad(String.valueOf(archCount + 1), 5, "0");
304 }
305 } catch (RepositoryException e) {
306 log.error("Problem while getting number of highest posts node: " + e.getMessage());
307 }
308 return postsFolder;
309 }
310
311
312
313
314
315
316
317 String getPostChecksum(String postContent) {
318 String checksum = null;
319 MessageDigest md;
320
321 try {
322 md = MessageDigest.getInstance("MD5");
323 md.reset();
324 md.update(postContent.getBytes(), 0, postContent.length());
325 checksum = new BigInteger(1, md.digest()).toString(16);
326 } catch (Exception e) {
327 log.error("Problem while creating checksum for post: " + e.getMessage());
328 }
329 return checksum;
330 }
331
332 Set<FilterPredicate> loadPlanetFilters(Node feedNode) {
333 Set<FilterPredicate> planetFilters = new HashSet<FilterPredicate>();
334
335 try {
336 Node filtersNode = feedNode.hasNode("filters") ? feedNode.getNode("filters") : null;
337
338 if (filtersNode != null) {
339 FilterPredicateContentMapper filterPredicateMapper = new FilterPredicateContentMapper();
340 List<Node> filterNodes = NodeUtil.asList(NodeUtil.getNodes(filtersNode, VersionUtil.getNodeTypeName(filtersNode)));
341
342 for (Node n : filterNodes) {
343 FilterPredicate filterPredicate = filterPredicateMapper.map(n);
344 if (filterPredicate == null) {
345 continue;
346 }
347 planetFilters.add(filterPredicate);
348 }
349 }
350 } catch (RepositoryException e) {
351 log.error("Problem while retrieving planet feed node filters: " + e.getMessage());
352 }
353
354 return planetFilters;
355 }
356
357 }