View Javadoc

1   /**
2    * This file Copyright (c) 2013-2013 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.generator;
35  
36  import info.magnolia.cms.util.QueryUtil;
37  import info.magnolia.commands.MgnlCommand;
38  import info.magnolia.context.Context;
39  import info.magnolia.context.MgnlContext;
40  import info.magnolia.jcr.util.NodeUtil;
41  import info.magnolia.jcr.util.PropertyUtil;
42  import info.magnolia.module.data.DataConsts;
43  import info.magnolia.module.rssaggregator.RSSAggregator;
44  import info.magnolia.module.rssaggregator.util.PlanetUtil;
45  
46  import java.util.ArrayList;
47  import java.util.Calendar;
48  import java.util.Date;
49  import java.util.List;
50  
51  import javax.jcr.Node;
52  import javax.jcr.NodeIterator;
53  import javax.jcr.RepositoryException;
54  import javax.jcr.Session;
55  
56  import org.apache.commons.collections.MapIterator;
57  import org.apache.commons.collections.keyvalue.MultiKey;
58  import org.apache.commons.collections.map.MultiKeyMap;
59  import org.slf4j.Logger;
60  import org.slf4j.LoggerFactory;
61  
62  /**
63   * Collects and generates statistics for imported RSS feed data.
64   *
65   * @author lfischer
66   */
67  public class CollectStatisticsCommand extends MgnlCommand {
68  
69      private static final Logger log = LoggerFactory.getLogger(CollectStatisticsCommand.class);
70  
71      private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
72      private static final String FEED_DATA_NAME = "planetData";
73      private static final String STATISTICS_NODE = "statistics";
74      private static final String STATS_AUTHORS_NODE = "authors";
75      private Session session;
76  
77      @Override
78      public boolean execute(Context context) throws Exception {
79          log.info("Starting command for creating Planet data statistics.");
80          // needed for execution with scheduler in system context
81          session = MgnlContext.getSystemContext().getJCRSession("data");
82  
83          traverseFeedEntries();
84  
85          log.info("Finished generating Planet data statistics.");
86  
87          return true;
88      }
89  
90      /**
91       * Get relevant date for statistics from Magnolia config workspace.
92       *
93       * @return Date relevant for inclusion of blog entries in statistics.
94       */
95      protected Date getStatisticsStartDate() {
96          RSSAggregator module = RSSAggregator.getInstance();
97          int lastMonthsIncluded = module.getMonthsIncluded();
98  
99          Calendar calendar = Calendar.getInstance();
100         calendar.setTime(new Date());
101         calendar.add(Calendar.MONTH, (lastMonthsIncluded) * -1);
102 
103         return calendar.getTime();
104     }
105 
106     /**
107      * Collect blog entry statistics information from a planet archive.
108      */
109     private void traverseFeedEntries() throws RepositoryException {
110         Node rssParent = session.getNode("/rssaggregator");
111         Date startDate = getStatisticsStartDate();
112         log.info("Statistics will be build on content with date " + startDate + " or later.");
113 
114         if (rssParent != null) {
115             NodeIterator feeds = rssParent.getNodes();
116             while (feeds.hasNext()) {
117                 Node feedNode = feeds.nextNode();
118                 if (NodeUtil.isNodeType(feedNode, CONTENTTYPE_RSSAGGREGATOR) && PlanetUtil.isPlanetNode(feedNode)) {
119                     log.info("Creating statistics for planet feed: " + feedNode.getName());
120 
121                     MultiKeyMap entryMap = new MultiKeyMap();
122 
123                     String sql2 = "select * from [dataItemNode] as t where ISDESCENDANTNODE([" + feedNode.getPath() + "/"
124                             + FEED_DATA_NAME + "]) and t.rssLink IS NOT NULL AND t.author IS NOT NULL ORDER BY t.author, t.rssLink";
125 
126                     NodeIterator entries = QueryUtil.search("data", sql2);
127                     while (entries.hasNext()) {
128                         Node entry = entries.nextNode();
129                         createMapEntry(entryMap, entry);
130                     }
131                     createStatisticsNodes(entryMap, feedNode);
132                 }
133             }
134         } else {
135             log.info("Could not find parent node for data/rssaggregator feed entries.");
136         }
137     }
138 
139     /**
140      * Add blog entries from the planet archive to a multi key map.
141      *
142      * @param mkm Map with blog entries stored with a key consisting of author name and rss feed link.
143      * @param blogEntry Single blog entry retrieved from planet archive.
144      */
145     @SuppressWarnings("unchecked")
146     protected void createMapEntry(MultiKeyMap mkm, Node blogEntry) {
147         List<Node> blogEntries = new ArrayList<Node>();
148 
149         try {
150             String author = PlanetUtil.formatName(blogEntry, "author");
151             String rssLink = PropertyUtil.getString(blogEntry, "rssLink", "");
152 
153             if (mkm.containsKey(author, rssLink)) {
154                 blogEntries = (ArrayList<Node>) mkm.get(author, rssLink);
155             }
156 
157             if (blogEntry.hasProperty("pubDate")) {
158                 blogEntries.add(blogEntry);
159                 mkm.put(author, rssLink, blogEntries);
160             }
161         } catch (Exception e) {
162             log.error("Could not add blog entry to statistics list: " + e.getMessage());
163         }
164     }
165 
166     /**
167      * Store collected statistics in JCR tree.
168      *
169      * @param statEntries Map with author and feed key containing blog entries.
170      * @param topNode Top node for the planet statistics feed.
171      */
172     @SuppressWarnings("unchecked")
173     protected void createStatisticsNodes(MultiKeyMap statEntries, Node topNode) {
174         Date startDate = getStatisticsStartDate();
175         try {
176             // first, remove the statistics node for cleanup
177             if (topNode.hasNode(STATISTICS_NODE)) {
178                 String absPath = topNode.getNode(STATISTICS_NODE).getPath();
179                 topNode.getSession().removeItem(absPath);
180             }
181             // get / create nodes for statistics
182             Node authors = NodeUtil.createPath(topNode, STATISTICS_NODE + "/" + STATS_AUTHORS_NODE,
183                     DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, false);
184 
185             int authorCount = 0;
186             MapIterator mit = statEntries.mapIterator();
187             while (mit.hasNext()) {
188                 mit.next();
189 
190                 MultiKey mk = (MultiKey) mit.getKey();
191                 String author = (String) mk.getKey(0);
192                 String rssLink = (String) mk.getKey(1);
193 
194                 Node trgNode = PlanetUtil.findAuthorNode(authors, author, rssLink, authorCount);
195                 PropertyUtil.setProperty(trgNode, "author", author);
196                 PropertyUtil.setProperty(trgNode, "feedLink", rssLink);
197 
198                 Node countedPosts = NodeUtil.createPath(trgNode, "countedPosts", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, false);
199 
200                 List<Node> blogEntries = (ArrayList<Node>) mit.getValue();
201 
202                 long postCount = 0;
203                 for (Node entry : blogEntries) {
204                     PropertyUtil.setProperty(trgNode, "blogLink", PropertyUtil.getString(entry, "authorLink", ""));
205                     // check if blog entry is relevant for statistics
206                     Date pubDate = new Date(entry.getProperty("pubDate").getLong());
207                     if (pubDate.after(startDate)) {
208                         NodeUtil.createPath(countedPosts, entry.getIdentifier(), DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, false);
209                         postCount++;
210                     }
211                 }
212 
213                 PropertyUtil.setProperty(trgNode, "postCount", postCount);
214                 authorCount++;
215 
216                 session.save();
217             }
218         } catch (Exception e) {
219             log.error("Problem while creating nodes for statistics: " + e.getMessage());
220         }
221     }
222 
223 }