View Javadoc
1   /**
2    * This file Copyright (c) 2012-2016 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.importhandler;
35  
36  import java.io.BufferedInputStream;
37  import java.io.IOException;
38  import java.io.InputStream;
39  import java.net.HttpURLConnection;
40  import java.net.URL;
41  import java.net.URLConnection;
42  import java.util.zip.GZIPInputStream;
43  
44  import com.rometools.fetcher.FetcherEvent;
45  import com.rometools.fetcher.FetcherException;
46  import com.rometools.fetcher.impl.FeedFetcherCache;
47  import com.rometools.fetcher.impl.HttpURLFeedFetcher;
48  import com.rometools.fetcher.impl.SyndFeedInfo;
49  import com.rometools.rome.feed.synd.SyndFeed;
50  import com.rometools.rome.io.FeedException;
51  import com.rometools.rome.io.SyndFeedInput;
52  import com.rometools.rome.io.XmlReader;
53  
54  /**
55   * Parts of the code below orignates from com.sun.syndication.fetcher.impl.HttpURLFeedFetcher. See annotated part of the code for details.
56   *
57   * @deprecated once https://rometools.jira.com/browse/FETCHER-2 is solved and released, this class will be removed.
58   */
59  public class MgnlHttpURLFeedFetcher extends HttpURLFeedFetcher {
60  
61      /**
62       * 600 seconds default.
63       */
64      private int readTimeout = 600000;
65  
66      /**
67       * 10 seconds default.
68       */
69      private int connectTimeout = 10000;
70  
71      public MgnlHttpURLFeedFetcher(FeedFetcherCache feedInfoCache) {
72          super(feedInfoCache);
73      }
74  
75      /*
76       * All the code below, except where noted, is copied from com.sun.syndication.fetcher.impl.HttpURLFeedFetcher 
77       * Once https://rometools.jira.com/browse/FETCHER-2 is solved, all the code will be retired.
78       * 
79       * Original parts of the code are distributed and retain following license:
80       * 
81       * Copyright 2004 Sun Microsystems, Inc.
82       *
83       * Licensed under the Apache License, Version 2.0 (the "License");
84       * you may not use this file except in compliance with the License.
85       * You may obtain a copy of the License at
86       *
87       *     http://www.apache.org/licenses/LICENSE-2.0
88       *
89       * Unless required by applicable law or agreed to in writing, software
90       * distributed under the License is distributed on an "AS IS" BASIS,
91       * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
92       * See the License for the specific language governing permissions and
93       * limitations under the License.
94       */
95      @Override
96      public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
97          if (feedUrl == null) {
98              throw new IllegalArgumentException("null is not a valid URL");
99          }
100 
101         URLConnection connection = feedUrl.openConnection();
102         // added by Magnolia
103         connection.setConnectTimeout(connectTimeout);
104         connection.setReadTimeout(readTimeout);
105         // end of code changes made by Magnolia
106         if (!(connection instanceof HttpURLConnection)) {
107             throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url");
108         }
109         HttpURLConnection httpConnection = (HttpURLConnection) connection;
110 
111         FeedFetcherCache cache = getFeedInfoCache();
112         if (cache != null) {
113             SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
114             setRequestHeaders(connection, syndFeedInfo, "");
115             httpConnection.connect();
116             try {
117                 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
118 
119                 if (syndFeedInfo == null) {
120                     // this is a feed that hasn't been retrieved
121                     syndFeedInfo = new SyndFeedInfo();
122                     retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
123                 } else {
124                     // check the response code
125                     int responseCode = httpConnection.getResponseCode();
126                     if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
127                         // the response code is not 304 NOT MODIFIED
128                         // This is either because the feed server
129                         // does not support condition gets
130                         // or because the feed hasn't changed
131                         retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
132                     } else {
133                         // the feed does not need retrieving
134                         fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
135                     }
136                 }
137 
138                 return syndFeedInfo.getSyndFeed();
139             } finally {
140                 httpConnection.disconnect();
141             }
142         } else {
143             fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
144             InputStream inputStream = null;
145             setRequestHeaders(connection, null, "");
146             httpConnection.connect();
147             try {
148                 inputStream = httpConnection.getInputStream();
149                 return getSyndFeedFromStream(inputStream, connection);
150             } catch (java.io.IOException e) {
151                 handleErrorCodes(((HttpURLConnection) connection).getResponseCode());
152             } finally {
153                 if (inputStream != null) {
154                     inputStream.close();
155                 }
156                 httpConnection.disconnect();
157             }
158             // we will never actually get to this line
159             return null;
160         }
161     }
162 
163     private SyndFeed readSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
164         BufferedInputStream is;
165         if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
166             // handle gzip encoded content
167             is = new BufferedInputStream(new GZIPInputStream(inputStream));
168         } else {
169             is = new BufferedInputStream(inputStream);
170         }
171 
172         XmlReader reader = null;
173         if (connection.getHeaderField("Content-Type") != null) {
174             reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
175         } else {
176             reader = new XmlReader(is, true);
177         }
178 
179         SyndFeedInput syndFeedInput = new SyndFeedInput();
180         syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
181 
182         return syndFeedInput.build(reader);
183 
184     }
185 
186     private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
187         SyndFeed feed = readSyndFeedFromStream(inputStream, connection);
188         fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
189         return feed;
190     }
191 
192 }