View Javadoc

1   /**
2    * This file Copyright (c) 2003-2013 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.generator;
35  
36  import info.magnolia.commands.MgnlCommand;
37  import info.magnolia.context.Context;
38  import info.magnolia.context.MgnlContext;
39  import info.magnolia.jcr.util.NodeUtil;
40  import info.magnolia.jcr.util.PropertyUtil;
41  import info.magnolia.module.data.DataConsts;
42  import info.magnolia.module.rssaggregator.RSSAggregator;
43  import info.magnolia.module.rssaggregator.util.PlanetUtil;
44  
45  import java.util.Calendar;
46  import java.util.Date;
47  
48  import javax.jcr.Node;
49  import javax.jcr.NodeIterator;
50  import javax.jcr.RepositoryException;
51  import javax.jcr.Session;
52  
53  import org.apache.commons.lang.StringUtils;
54  import org.slf4j.Logger;
55  import org.slf4j.LoggerFactory;
56  
57  /**
58   * Collects and generates statistics for imported RSS feed data.
59   *
60   * @author lfischer
61   */
62  public class CollectStatisticsCommand extends MgnlCommand {
63  
64      private static final Logger log = LoggerFactory.getLogger(CollectStatisticsCommand.class);
65  
66      private static final String CONTENTTYPE_RSSAGGREGATOR = "RssAggregator";
67      private static final String FEED_DATA_NAME = "planetData";
68      private static final String STATISTICS_NODE = "statistics";
69      private static final String STATS_AUTHORS_NODE = "authors";
70      private static final String STATS_CHANNEL_NODE = "channels";
71  
72      private Session session;
73  
74      @Override
75      public boolean execute(Context context) throws Exception {
76          log.info("Starting command for creating Planet data statistics.");
77  
78          //session = context.getJCRSession("data");
79          session = MgnlContext.getSystemContext().getJCRSession("data");
80  
81          traverseFeedEntries();
82  
83          log.info("Finished generating Planet data statistics.");
84  
85          return true;
86      }
87  
88      protected Date getStatisticsStartDate() {
89          RSSAggregator module = RSSAggregator.getInstance();
90          int lastMonthsIncluded = module.getMonthsIncluded();
91  
92          Calendar calendar = Calendar.getInstance();
93          calendar.setTime(new Date());
94          calendar.add(Calendar.MONTH, (lastMonthsIncluded) * -1);
95  
96          return calendar.getTime();
97      }
98  
99      private void traverseFeedEntries() throws RepositoryException {
100         //Node rssParent = SessionUtil.getNode("data", "/rssaggregator");
101         Node rssParent = session.getNode("/rssaggregator");
102 
103         Date startDate = getStatisticsStartDate();
104         log.info("Statistics will be build on content with date " + startDate + " or later.");
105 
106         if (rssParent != null) {
107             NodeIterator feeds = rssParent.getNodes();
108             while (feeds.hasNext()) {
109                 int authorCount = 0;
110                 Node feedNode = feeds.nextNode();
111                 if (NodeUtil.isNodeType(feedNode, CONTENTTYPE_RSSAGGREGATOR)) {
112 
113                     if (PlanetUtil.isPlanetNode(feedNode)) {
114                         log.info("Creating statistics for planet feed: " + feedNode.getName());
115 
116                         // remove the statistics node for cleanup
117                         if (feedNode.hasNode(STATISTICS_NODE)) {
118                             String absPath = feedNode.getNode(STATISTICS_NODE).getPath();
119                             feedNode.getSession().removeItem(absPath);
120                         }
121 
122                         // get / create nodes for statistics
123                         Node authors = NodeUtil.createPath(feedNode, STATISTICS_NODE + "/" + STATS_AUTHORS_NODE, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
124                         Node channels = NodeUtil.createPath(feedNode, STATISTICS_NODE + "/" + STATS_CHANNEL_NODE, DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
125 
126                         NodeIterator postArchives = feedNode.getNode(FEED_DATA_NAME).getNodes();
127 
128                         while (postArchives.hasNext()) {
129                             Node archive = postArchives.nextNode();
130 
131                             // feed data
132                             NodeIterator entries = archive.getNodes();
133                             while (entries.hasNext()) {
134                                 Node entry = entries.nextNode();
135 
136                                 String author = PlanetUtil.formatName(entry, "author");
137                                 String channel = PlanetUtil.formatName(entry, "channelTitle");
138 
139                                 try {
140                                     authorCount++;
141                                     String feedLink = PropertyUtil.getString(entry, "rssLink", "");
142                                     // if the author is blank, use the channel title and collect statistics for a team blog
143                                     Node trgNode;
144                                     if (StringUtils.isNotBlank(author)) {
145                                         //trgNode = PlanetUtil.findAuthorNode(authors, author, authorCount);
146                                         trgNode = PlanetUtil.findAuthorNode(authors, author, feedLink, authorCount);
147                                         PropertyUtil.setProperty(trgNode, "author", author);
148                                     } else {
149                                         //trgNode = PlanetUtil.findAuthorNode(authors, author, authorCount);
150                                         trgNode = PlanetUtil.findAuthorNode(authors, author, feedLink, authorCount);
151                                         PropertyUtil.setProperty(trgNode, "author", channel);
152                                     }
153 
154                                     Node countedPosts = NodeUtil.createPath(trgNode, "countedPosts", DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
155 
156                                     // only count the post for statistics if it's in the time range
157                                     if (entry.hasProperty("pubDate")) {
158                                         try {
159                                             Date pubDate = new Date(entry.getProperty("pubDate").getLong());
160                                             // entry must have happened after the defined start date
161                                             if (pubDate.after(startDate)) {
162                                                 if (!countedPosts.hasNode(entry.getIdentifier())) {
163                                                     NodeUtil.createPath(countedPosts, entry.getIdentifier(), DataConsts.MODULE_DATA_CONTENT_NODE_TYPE, true);
164                                                 }
165                                                 long postCount = countedPosts.getNodes().getSize();
166                                                 PropertyUtil.setProperty(trgNode, "postCount", postCount);
167                                             }
168                                         } catch (Exception e) {
169                                             log.error("Problem while adding post counter: " + e.getMessage());
170                                         }
171                                     }
172                                     PropertyUtil.setProperty(trgNode, "blogLink", PropertyUtil.getString(entry, "authorLink", ""));
173                                     // PropertyUtil.setProperty(trgNode, "feedLink", PropertyUtil.getString(entry, "rssLink", ""));
174                                     PropertyUtil.setProperty(trgNode, "feedLink", feedLink);
175 
176                                     session.save();
177                                 } catch (RepositoryException re) {
178                                     log.error("Exception while parsing entries: " + re.getMessage());
179                                 }
180                             }
181                         }
182                     } else {
183                         log.info("Statistics for feed " + feedNode.getName() + " will not be created because the feed is not marked as Planet feed.");
184                     }
185                 }
186             }
187         } else {
188             log.info("Could not find parent node for data/rssaggregator feed entries.");
189         }
190     }
191 }