View Javadoc

1   /**
2    * This file Copyright (c) 2012 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.module.rssaggregator.importhandler;
35  
36  import java.io.BufferedInputStream;
37  import java.io.IOException;
38  import java.io.InputStream;
39  import java.net.HttpURLConnection;
40  import java.net.URL;
41  import java.net.URLConnection;
42  import java.util.zip.GZIPInputStream;
43  
44  import com.sun.syndication.feed.synd.SyndFeed;
45  import com.sun.syndication.fetcher.FetcherEvent;
46  import com.sun.syndication.fetcher.FetcherException;
47  import com.sun.syndication.fetcher.impl.FeedFetcherCache;
48  import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
49  import com.sun.syndication.fetcher.impl.SyndFeedInfo;
50  import com.sun.syndication.io.FeedException;
51  import com.sun.syndication.io.SyndFeedInput;
52  import com.sun.syndication.io.XmlReader;
53  
54  /**
55   * Parts of the code below orignates from com.sun.syndication.fetcher.impl.HttpURLFeedFetcher. See annotated part of the code for details.
56   * 
57   * @deprecated once https://rometools.jira.com/browse/FETCHER-2 is solved and released, this class will be removed.
58   */
59  public class MgnlHttpURLFeedFetcher extends HttpURLFeedFetcher{
60  
61      /**
62       * 600 seconds default.
63       */
64      private int readTimeout = 600000;
65  
66      /**
67       * 10 seconds default.
68       */
69      private int connectTimeout = 10000;
70  
71      public MgnlHttpURLFeedFetcher(FeedFetcherCache feedInfoCache) {
72          super(feedInfoCache);
73      }
74  
75      /*
76       * All the code below, except where noted, is copied from com.sun.syndication.fetcher.impl.HttpURLFeedFetcher 
77       * Once https://rometools.jira.com/browse/FETCHER-2 is solved, all the code will be retired.
78       * 
79       * Original parts of the code are distributed and retain following license:
80       * 
81       * Copyright 2004 Sun Microsystems, Inc.
82       *
83       * Licensed under the Apache License, Version 2.0 (the "License");
84       * you may not use this file except in compliance with the License.
85       * You may obtain a copy of the License at
86       *
87       *     http://www.apache.org/licenses/LICENSE-2.0
88       *
89       * Unless required by applicable law or agreed to in writing, software
90       * distributed under the License is distributed on an "AS IS" BASIS,
91       * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
92       * See the License for the specific language governing permissions and
93       * limitations under the License.
94       */
95      @Override
96      public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException {
97          if (feedUrl == null) {
98              throw new IllegalArgumentException("null is not a valid URL");
99          }
100         
101         URLConnection connection = feedUrl.openConnection();
102         // added by Magnolia
103         connection.setConnectTimeout(connectTimeout);
104         connection.setReadTimeout(readTimeout);
105         // end of code changes made by Magnolia
106         if (!(connection instanceof HttpURLConnection)) {           
107             throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url");
108         }
109         HttpURLConnection httpConnection = (HttpURLConnection)connection;       
110         // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be changed on a claswide basis      
111         
112         FeedFetcherCache cache = getFeedInfoCache();
113         if (cache != null) {
114             SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl);
115             setRequestHeaders(connection, syndFeedInfo);
116             httpConnection.connect();
117             try {
118                 fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
119                                 
120                 if (syndFeedInfo == null) {
121                     // this is a feed that hasn't been retrieved
122                     syndFeedInfo = new SyndFeedInfo();
123                     retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
124                 } else {
125                     // check the response code
126                     int responseCode = httpConnection.getResponseCode();
127                     if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) {
128                         // the response code is not 304 NOT MODIFIED
129                         // This is either because the feed server
130                         // does not support condition gets
131                         // or because the feed hasn't changed
132                         retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection);
133                     } else {
134                         // the feed does not need retrieving
135                         fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection);
136                     }
137                 }
138     
139                 return syndFeedInfo.getSyndFeed();
140             } finally {
141                 httpConnection.disconnect();
142             }
143         }
144         fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection);
145         InputStream inputStream = null;
146         setRequestHeaders(connection, null);
147         httpConnection.connect();
148         try {
149             inputStream = httpConnection.getInputStream();                      
150             return getSyndFeedFromStream(inputStream, connection);
151         } catch (java.io.IOException e) {
152             handleErrorCodes(((HttpURLConnection)connection).getResponseCode());
153         } finally {
154             if (inputStream != null) {
155                 inputStream.close();
156             }
157             httpConnection.disconnect();
158         }
159         // we will never actually get to this line
160         return null;
161     }
162 
163     private SyndFeed readSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
164         BufferedInputStream is;
165         if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) {
166             // handle gzip encoded content
167             is = new BufferedInputStream(new GZIPInputStream(inputStream));
168         } else {
169             is = new BufferedInputStream(inputStream);
170         }
171 
172         //InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection));
173 
174         //SyndFeedInput input = new SyndFeedInput();
175 
176         XmlReader reader = null;        
177         if (connection.getHeaderField("Content-Type") != null) {
178             reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true);
179         } else {
180             reader = new XmlReader(is, true);
181         }
182         
183         SyndFeedInput syndFeedInput = new SyndFeedInput();
184         syndFeedInput.setPreserveWireFeed(isPreserveWireFeed());
185         
186         return syndFeedInput.build(reader);
187         
188     }
189 
190     private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException {
191         SyndFeed feed = readSyndFeedFromStream(inputStream, connection);
192         fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed);
193         return feed;
194     }
195 
196 }