View Javadoc

1   /**
2    * This file Copyright (c) 2013 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.generator;
35  
36  import info.magnolia.cms.util.QueryUtil;
37  import info.magnolia.commands.MgnlCommand;
38  import info.magnolia.context.Context;
39  import info.magnolia.context.MgnlContext;
40  import info.magnolia.jcr.util.NodeTypes;
41  import info.magnolia.jcr.util.NodeUtil;
42  import info.magnolia.jcr.util.PropertyUtil;
43  import info.magnolia.module.ModuleRegistry;
44  import info.magnolia.module.rssaggregator.RSSAggregator;
45  import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
46  import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
47  import info.magnolia.module.rssaggregator.util.PlanetUtil;
48  import info.magnolia.objectfactory.Components;
49  
50  import java.util.ArrayList;
51  import java.util.Calendar;
52  import java.util.Date;
53  import java.util.List;
54  
55  import javax.jcr.Node;
56  import javax.jcr.NodeIterator;
57  import javax.jcr.RepositoryException;
58  import javax.jcr.Session;
59  
60  import org.apache.commons.collections.MapIterator;
61  import org.apache.commons.collections.keyvalue.MultiKey;
62  import org.apache.commons.collections.map.MultiKeyMap;
63  import org.slf4j.Logger;
64  import org.slf4j.LoggerFactory;
65  
66  /**
67   * Collects and generates statistics for imported RSS feed data.
68   *
69   * @author lfischer
70   */
71  public class CollectStatisticsCommand extends MgnlCommand {
72  
73      private static final Logger log = LoggerFactory.getLogger(CollectStatisticsCommand.class);
74  
75      private static final String FEED_DATA_NAME = "planetData";
76      private static final String STATISTICS_NODE = "statistics";
77      private static final String STATS_AUTHORS_NODE = "authors";
78  
79      private Session session;
80      private Date statisticsStartDate;
81      private int authorCount;
82  
83      @Override
84      public boolean execute(Context context) throws Exception {
85          log.info("Starting command for creating Planet data statistics.");
86          // needed for execution with scheduler in system context
87          session = MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE);
88  
89          traverseFeedEntries(session.getRootNode());
90  
91          log.info("Finished generating Planet data statistics.");
92  
93          return true;
94      }
95  
96      /**
97       * Get relevant date for statistics from Magnolia config workspace.
98       *
99       * @return Date relevant for inclusion of blog entries in statistics.
100      */
101     protected Date getStatisticsStartDate() {
102         RSSAggregator module = (RSSAggregator) Components.getComponent(ModuleRegistry.class).getModuleInstance("rssaggregator");
103         int lastMonthsIncluded = module.getMonthsIncluded();
104 
105         Calendar calendar = Calendar.getInstance();
106         calendar.setTime(new Date());
107         calendar.add(Calendar.MONTH, -lastMonthsIncluded);
108 
109         return calendar.getTime();
110     }
111 
112     private void traverseFeedEntries(Node rssParent) throws RepositoryException {
113         List<Node> feeds = NodeUtil.asList(NodeUtil.getNodes(rssParent, NodeTypes.Folder.NAME));
114         feeds.add(rssParent);
115         for (Node rootOrFolder : feeds) {
116             this.authorCount = 0;
117             doTraverseFeedEntries(rootOrFolder);
118         }
119     }
120     
121     /**
122      * Collect blog entry statistics information from a planet archive.
123      */
124     private void doTraverseFeedEntries(Node rssParent) throws RepositoryException {
125         Date startDate = getStatisticsStartDate();
126         log.info("Statistics will be build on content with date " + startDate + " or later.");
127 
128         if (rssParent != null) {
129             NodeIterator feeds = rssParent.getNodes();
130             while (feeds.hasNext()) {
131                 Node feedNode = feeds.nextNode();
132                 if (NodeUtil.isNodeType(feedNode, RSSAggregatorNodeTypes.RSSAggregator.NAME) && PlanetUtil.isPlanetNode(feedNode)) {
133                     log.info("Creating statistics for planet feed: " + feedNode.getName());
134 
135                     MultiKeyMap entryMap = new MultiKeyMap();
136 
137                     String sql2 = "select * from [mgnl:content] as t where ISDESCENDANTNODE([" + feedNode.getPath() + "/"
138                             + FEED_DATA_NAME + "]) and t.rssLink IS NOT NULL AND t.author IS NOT NULL ORDER BY t.author, t.rssLink";
139 
140                     NodeIterator entries = QueryUtil.search(RSSAggregatorConstants.WORKSPACE, sql2);
141                     while (entries.hasNext()) {
142                         Node entry = entries.nextNode();
143                         createMapEntry(entryMap, entry);
144                     }
145                     createStatisticsNodes(entryMap, feedNode);
146                 }
147             }
148         } else {
149             log.info("Could not find parent node for data/rssaggregator feed entries.");
150         }
151     }
152 
153     /**
154      * Add blog entries from the planet archive to a multi key map.
155      *
156      * @param mkm Map with blog entries stored with a key consisting of author name and rss feed link.
157      * @param blogEntry Single blog entry retrieved from planet archive.
158      */
159     @SuppressWarnings("unchecked")
160     protected void createMapEntry(MultiKeyMap mkm, Node blogEntry) {
161         List<Node> blogEntries = new ArrayList<Node>();
162 
163         try {
164             String author = PlanetUtil.formatName(blogEntry, "author");
165             String rssLink = PropertyUtil.getString(blogEntry, "rssLink", "");
166 
167             if (mkm.containsKey(author, rssLink)) {
168                 blogEntries = (List<Node>) mkm.get(author, rssLink);
169             }
170 
171             if (blogEntry.hasProperty("pubDate")) {
172                 blogEntries.add(blogEntry);
173                 mkm.put(author, rssLink, blogEntries);
174             }
175         } catch (Exception e) {
176             log.error("Could not add blog entry to statistics list: " + e.getMessage());
177         }
178     }
179 
180     /**
181      * Store collected statistics in JCR tree.
182      *
183      * @param statEntries Map with author and feed key containing blog entries.
184      * @param topNode Top node for the planet statistics feed.
185      */
186     @SuppressWarnings("unchecked")
187     protected void createStatisticsNodes(MultiKeyMap statEntries, Node topNode) {
188         Date startDate = getStatisticsStartDate();
189         try {
190             // first, remove the statistics node for cleanup
191             if (topNode.hasNode(STATISTICS_NODE)) {
192                 String absPath = topNode.getNode(STATISTICS_NODE).getPath();
193                 topNode.getSession().removeItem(absPath);
194             }
195             // get / create nodes for statistics
196             Node authors = NodeUtil.createPath(topNode, STATISTICS_NODE + "/" + STATS_AUTHORS_NODE,
197                                                   NodeTypes.Content.NAME, false);
198 
199             int authorCount = 0;
200             MapIterator mit = statEntries.mapIterator();
201             while (mit.hasNext()) {
202                 mit.next();
203 
204                 MultiKey mk = (MultiKey) mit.getKey();
205                 String author = (String) mk.getKey(0);
206                 String rssLink = (String) mk.getKey(1);
207 
208                 Node trgNode = PlanetUtil.findAuthorNode(authors, author, rssLink, authorCount);
209                 PropertyUtil.setProperty(trgNode, "author", author);
210                 PropertyUtil.setProperty(trgNode, "feedLink", rssLink);
211 
212                 Node countedPosts = NodeUtil.createPath(trgNode, "countedPosts", NodeTypes.Content.NAME, false);
213 
214                 List<Node> blogEntries = (List<Node>) mit.getValue();
215 
216                 long postCount = 0;
217                 for (Node entry : blogEntries) {
218                     PropertyUtil.setProperty(trgNode, "blogLink", PropertyUtil.getString(entry, "authorLink", ""));
219                     // check if blog entry is relevant for statistics
220                     Date pubDate = new Date(entry.getProperty("pubDate").getLong());
221                     if (pubDate.after(startDate)) {
222                         NodeUtil.createPath(countedPosts, entry.getIdentifier(), NodeTypes.Content.NAME, false);
223                         postCount++;
224                     }
225                 }
226 
227                 PropertyUtil.setProperty(trgNode, "postCount", postCount);
228                 authorCount++;
229 
230                 session.save();
231             }
232         } catch (Exception e) {
233             log.error("Problem while creating nodes for statistics: " + e.getMessage());
234         }
235     }
236 
237 }