View Javadoc
1   /**
2    * This file Copyright (c) 2013-2016 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.generator;
35  
36  import info.magnolia.cms.util.QueryUtil;
37  import info.magnolia.commands.MgnlCommand;
38  import info.magnolia.context.Context;
39  import info.magnolia.context.MgnlContext;
40  import info.magnolia.jcr.util.NodeTypes;
41  import info.magnolia.jcr.util.NodeUtil;
42  import info.magnolia.jcr.util.PropertyUtil;
43  import info.magnolia.module.ModuleRegistry;
44  import info.magnolia.module.rssaggregator.RSSAggregator;
45  import info.magnolia.module.rssaggregator.RSSAggregatorConstants;
46  import info.magnolia.module.rssaggregator.RSSAggregatorNodeTypes;
47  import info.magnolia.module.rssaggregator.util.PlanetUtil;
48  import info.magnolia.objectfactory.Components;
49  
50  import java.util.ArrayList;
51  import java.util.Calendar;
52  import java.util.Date;
53  import java.util.List;
54  
55  import javax.jcr.Node;
56  import javax.jcr.NodeIterator;
57  import javax.jcr.RepositoryException;
58  import javax.jcr.Session;
59  
60  import org.apache.commons.collections4.MapIterator;
61  import org.apache.commons.collections4.keyvalue.MultiKey;
62  import org.apache.commons.collections4.map.MultiKeyMap;
63  import org.slf4j.Logger;
64  import org.slf4j.LoggerFactory;
65  
66  /**
67   * Collects and generates statistics for imported RSS feed data.
68   */
69  public class CollectStatisticsCommand extends MgnlCommand {
70  
71      private static final Logger log = LoggerFactory.getLogger(CollectStatisticsCommand.class);
72  
73      private static final String FEED_DATA_NAME = "planetData";
74      private static final String STATISTICS_NODE = "statistics";
75      private static final String STATS_AUTHORS_NODE = "authors";
76  
77      private Session session;
78      private Date statisticsStartDate;
79      private int authorCount;
80  
81      @Override
82      public boolean execute(Context context) throws Exception {
83          log.info("Starting command for creating Planet data statistics.");
84          // needed for execution with scheduler in system context
85          session = MgnlContext.getJCRSession(RSSAggregatorConstants.WORKSPACE);
86  
87          traverseFeedEntries(session.getRootNode());
88  
89          log.info("Finished generating Planet data statistics.");
90  
91          return true;
92      }
93  
94      /**
95       * Get relevant date for statistics from Magnolia config workspace.
96       *
97       * @return Date relevant for inclusion of blog entries in statistics.
98       */
99      protected Date getStatisticsStartDate() {
100         RSSAggregator module = (RSSAggregator) Components.getComponent(ModuleRegistry.class).getModuleInstance("rssaggregator");
101         int lastMonthsIncluded = module.getMonthsIncluded();
102 
103         Calendar calendar = Calendar.getInstance();
104         calendar.setTime(new Date());
105         calendar.add(Calendar.MONTH, -lastMonthsIncluded);
106 
107         return calendar.getTime();
108     }
109 
110     private void traverseFeedEntries(Node rssParent) throws RepositoryException {
111         List<Node> feeds = NodeUtil.asList(NodeUtil.getNodes(rssParent, NodeTypes.Folder.NAME));
112         feeds.add(rssParent);
113         for (Node rootOrFolder : feeds) {
114             this.authorCount = 0;
115             doTraverseFeedEntries(rootOrFolder);
116         }
117     }
118 
119     /**
120      * Collect blog entry statistics information from a planet archive.
121      */
122     private void doTraverseFeedEntries(Node rssParent) throws RepositoryException {
123         Date startDate = getStatisticsStartDate();
124         log.info("Statistics will be build on content with date " + startDate + " or later.");
125 
126         if (rssParent != null) {
127             NodeIterator feeds = rssParent.getNodes();
128             while (feeds.hasNext()) {
129                 Node feedNode = feeds.nextNode();
130                 if (NodeUtil.isNodeType(feedNode, RSSAggregatorNodeTypes.RSSAggregator.NAME) && PlanetUtil.isPlanetNode(feedNode)) {
131                     log.info("Creating statistics for planet feed: " + feedNode.getName());
132 
133                     MultiKeyMap entryMap = new MultiKeyMap();
134 
135                     String sql2 = "select * from [mgnl:content] as t where ISDESCENDANTNODE([" + feedNode.getPath() + "/"
136                             + FEED_DATA_NAME + "]) and t.rssLink IS NOT NULL AND t.author IS NOT NULL ORDER BY t.author, t.rssLink";
137 
138                     NodeIterator entries = QueryUtil.search(RSSAggregatorConstants.WORKSPACE, sql2);
139                     while (entries.hasNext()) {
140                         Node entry = entries.nextNode();
141                         createMapEntry(entryMap, entry);
142                     }
143                     createStatisticsNodes(entryMap, feedNode);
144                 }
145             }
146         } else {
147             log.info("Could not find parent node for data/rssaggregator feed entries.");
148         }
149     }
150 
151     /**
152      * Add blog entries from the planet archive to a multi key map.
153      *
154      * @param mkm Map with blog entries stored with a key consisting of author name and rss feed link.
155      * @param blogEntry Single blog entry retrieved from planet archive.
156      */
157     @SuppressWarnings("unchecked")
158     protected void createMapEntry(MultiKeyMap mkm, Node blogEntry) {
159         List<Node> blogEntries = new ArrayList<Node>();
160 
161         try {
162             String author = PlanetUtil.formatName(blogEntry, "author");
163             String rssLink = PropertyUtil.getString(blogEntry, "rssLink", "");
164 
165             if (mkm.containsKey(author, rssLink)) {
166                 blogEntries = (List<Node>) mkm.get(author, rssLink);
167             }
168 
169             if (blogEntry.hasProperty("pubDate")) {
170                 blogEntries.add(blogEntry);
171                 mkm.put(author, rssLink, blogEntries);
172             }
173         } catch (Exception e) {
174             log.error("Could not add blog entry to statistics list: " + e.getMessage());
175         }
176     }
177 
178     /**
179      * Store collected statistics in JCR tree.
180      *
181      * @param statEntries Map with author and feed key containing blog entries.
182      * @param topNode Top node for the planet statistics feed.
183      */
184     @SuppressWarnings("unchecked")
185     protected void createStatisticsNodes(MultiKeyMap statEntries, Node topNode) {
186         Date startDate = getStatisticsStartDate();
187         try {
188             // first, remove the statistics node for cleanup
189             if (topNode.hasNode(STATISTICS_NODE)) {
190                 String absPath = topNode.getNode(STATISTICS_NODE).getPath();
191                 topNode.getSession().removeItem(absPath);
192             }
193             // get / create nodes for statistics
194             Node authors = NodeUtil.createPath(topNode, STATISTICS_NODE + "/" + STATS_AUTHORS_NODE,
195                     NodeTypes.Content.NAME, false);
196 
197             int authorCount = 0;
198             MapIterator mit = statEntries.mapIterator();
199             while (mit.hasNext()) {
200                 mit.next();
201 
202                 MultiKey mk = (MultiKey) mit.getKey();
203                 String author = (String) mk.getKey(0);
204                 String rssLink = (String) mk.getKey(1);
205 
206                 Node trgNode = PlanetUtil.findAuthorNode(authors, author, rssLink, authorCount);
207                 PropertyUtil.setProperty(trgNode, "author", author);
208                 PropertyUtil.setProperty(trgNode, "feedLink", rssLink);
209 
210                 Node countedPosts = NodeUtil.createPath(trgNode, "countedPosts", NodeTypes.Content.NAME, false);
211 
212                 List<Node> blogEntries = (List<Node>) mit.getValue();
213 
214                 long postCount = 0;
215                 for (Node entry : blogEntries) {
216                     PropertyUtil.setProperty(trgNode, "blogLink", PropertyUtil.getString(entry, "authorLink", ""));
217                     // check if blog entry is relevant for statistics
218                     Date pubDate = new Date(entry.getProperty("pubDate").getLong());
219                     if (pubDate.after(startDate)) {
220                         NodeUtil.createPath(countedPosts, entry.getIdentifier(), NodeTypes.Content.NAME, false);
221                         postCount++;
222                     }
223                 }
224 
225                 PropertyUtil.setProperty(trgNode, "postCount", postCount);
226                 authorCount++;
227 
228                 session.save();
229             }
230         } catch (Exception e) {
231             log.error("Problem while creating nodes for statistics: " + e.getMessage());
232         }
233     }
234 
235 }