diff --git a/app/server/.gitignore b/app/server/.gitignore index 09331c6..eebc7cd 100644 --- a/app/server/.gitignore +++ b/app/server/.gitignore @@ -110,4 +110,5 @@ fabric.properties # Android studio 3.1+ serialized cache file .idea/caches/build_file_checksums.ser -lucene/ \ No newline at end of file +lucene/ +feed_cache/ \ No newline at end of file diff --git a/app/server/huntly-server/pom.xml b/app/server/huntly-server/pom.xml index f7d860a..556f442 100644 --- a/app/server/huntly-server/pom.xml +++ b/app/server/huntly-server/pom.xml @@ -146,6 +146,11 @@ cn.shenyanchao.ik-analyzer ik-analyzer + + com.squareup.okhttp3 + okhttp + 4.12.0 + org.springframework.boot diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java index e56ced8..31704db 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java @@ -8,55 +8,76 @@ import com.rometools.rome.io.FeedException; import com.rometools.rome.io.SyndFeedInput; import lombok.experimental.UtilityClass; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.io.StringReader; -import java.net.URI; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; import java.nio.charset.Charset; -import java.time.Duration; /** * Utility methods related to feed handling - * code from commafeed project */ @UtilityClass public class FeedUtils { - public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) { - HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl)) + public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) { + Request request = new Request.Builder() + .url(feedUrl) .build(); - HttpResponse response = null; - try { - response = client.send(request, HttpResponse.BodyHandlers.ofByteArray()); + try(Response response = client.newCall(request).execute()) { + byte[] xmlBytes = null; + if (response.body() == null) { + throw new ConnectorFetchException("xml response null for url: " + feedUrl); + } + + xmlBytes = response.body().bytes(); + Charset encoding = FeedUtils.guessEncoding(xmlBytes); + String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding)); + if (xmlString == null) { + throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl); + } + return new SyndFeedInput().build(new StringReader(xmlString)); } catch (IOException e) { throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - var xmlBytes = response.body(); - Charset encoding = FeedUtils.guessEncoding(xmlBytes); - String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding)); - if (xmlString == null) { - throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl); - } - - try { - SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString)); - return feed; } catch (FeedException e) { throw new RuntimeException(e); } } - public static SyndFeed parseFeedUrl(String feedUrl) { - var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60)) - .followRedirects(HttpClient.Redirect.ALWAYS).build(); - return parseFeedUrl(feedUrl, client); - } +// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) { +// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl)) +// .build(); +// HttpResponse response = null; +// try { +// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray()); +// } catch (IOException e) { +// throw new RuntimeException(e); +// } catch (InterruptedException e) { +// throw new RuntimeException(e); +// } +// var xmlBytes = response.body(); +// Charset encoding = FeedUtils.guessEncoding(xmlBytes); +// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding)); +// if (xmlString == null) { +// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl); +// } +// +// try { +// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString)); +// return feed; +// } catch (FeedException e) { +// throw new RuntimeException(e); +// } +// } + +// public static SyndFeed parseFeedUrl(String feedUrl) { +// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60)) +// .followRedirects(HttpClient.Redirect.ALWAYS).build(); +// return parseFeedUrl(feedUrl, client); +// } public static Charset guessEncoding(byte[] bytes) { String extracted = extractDeclaredEncoding(bytes); diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java index df71870..03ca261 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java @@ -5,12 +5,14 @@ import com.huntly.server.connector.ConnectorProperties; import com.huntly.server.connector.InfoConnector; import com.huntly.server.domain.exceptions.ConnectorFetchException; +import com.huntly.server.util.HttpUtils; import com.huntly.server.util.SiteUtils; import com.rometools.rome.feed.synd.SyndCategory; import com.rometools.rome.feed.synd.SyndContent; import com.rometools.rome.feed.synd.SyndEntry; import com.rometools.rome.feed.synd.SyndFeed; import lombok.extern.slf4j.Slf4j; +import okhttp3.OkHttpClient; import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; @@ -28,18 +30,16 @@ public class RSSConnector extends InfoConnector { private final ConnectorProperties connectorProperties; + private final OkHttpClient okClient; + private final HttpClient client; public RSSConnector(ConnectorProperties connectorProperties) { this.connectorProperties = connectorProperties; + this.okClient = HttpUtils.buildFeedOkHttpClient(connectorProperties.getProxySetting()); this.client = buildHttpClient(connectorProperties); } - public RSSConnector(ConnectorProperties connectorProperties, HttpClient httpClient) { - this.connectorProperties = connectorProperties; - this.client = httpClient; - } - @Override public List fetchAllPages() { return fetchNewestPages(); @@ -52,7 +52,7 @@ public List fetchNewestPages() { } try { - SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), client); + SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), okClient); var entries = feed.getEntries(); List pages = new ArrayList<>(); for (var entry : entries) { diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java b/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java index 108c8a8..45e8c8d 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java @@ -9,6 +9,10 @@ public class AppConstants { public static final String DEFAULT_LUCENE_DIR = "lucene"; + public static final String HTTP_FEED_CACHE_DIR = "feed_cache"; + + public static final Long HTTP_FEED_CACHE_MAXSIZE = 50L * 1024L * 1024L; // 50 MB + public static final Integer DEFAULT_FETCH_INTERVAL_SECONDS = 600; public static final Integer DEFAULT_COLD_DATA_KEEP_DAYS = 60; diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java b/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java index 79adb90..0f0ac7b 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java @@ -147,7 +147,13 @@ private void fetchPages(Connector connector) { } } - var savedPage = capturePageService.save(page); + Page savedPage = null; + //Avoid frequent updates of RSS articles. + if (isRssFetch && existPage != null && Objects.equals(existPage.getTitle(), page.getTitle()) && Objects.equals(existPage.getConnectedAt(), page.getConnectedAt())) { + savedPage = existPage; + } else { + savedPage = capturePageService.save(page); + } if (isRssFetch && isExecuteFetch) { pageArticleContentService.saveContent(savedPage.getId(), rawContent, ArticleContentCategory.RAW_CONTENT); diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java b/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java index 6dd8c1c..8976eea 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java @@ -71,8 +71,10 @@ public Connector followFeed(String subscribeUrl) { public PreviewFeedsInfo previewFeeds(String subscribeUrl) { PreviewFeedsInfo feedsInfo = new PreviewFeedsInfo(); feedsInfo.setFeedUrl(subscribeUrl); - var httpClient = HttpUtils.buildHttpClient(globalSettingService.getProxySetting()); - SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, httpClient); + var proxySetting = globalSettingService.getProxySetting(); + var httpClient = HttpUtils.buildHttpClient(proxySetting); + var feedClient = HttpUtils.buildFeedOkHttpClient(proxySetting); + SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, feedClient); if (syndFeed != null) { feedsInfo.setSiteLink(syndFeed.getLink()); feedsInfo.setTitle(syndFeed.getTitle()); diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java b/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java index e9616d8..c42f1d0 100644 --- a/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java +++ b/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java @@ -1,14 +1,20 @@ package com.huntly.server.util; +import com.huntly.server.domain.constant.AppConstants; import com.huntly.server.domain.model.ProxySetting; import lombok.experimental.UtilityClass; +import okhttp3.Cache; +import okhttp3.ConnectionSpec; +import okhttp3.OkHttpClient; import org.apache.commons.lang3.StringUtils; import javax.net.ssl.KeyManager; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; +import java.io.File; import java.net.InetSocketAddress; +import java.net.Proxy; import java.net.ProxySelector; import java.net.http.HttpClient; import java.security.KeyManagementException; @@ -16,12 +22,38 @@ import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.time.Duration; +import java.util.Arrays; /** * @author lcomplete */ @UtilityClass public class HttpUtils { + public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) { + var builder = new OkHttpClient.Builder() + .cache(new Cache( + new File(AppConstants.HTTP_FEED_CACHE_DIR), AppConstants.HTTP_FEED_CACHE_MAXSIZE + )) + .connectionSpecs(Arrays.asList(ConnectionSpec.MODERN_TLS, ConnectionSpec.COMPATIBLE_TLS, ConnectionSpec.CLEARTEXT)) + .followRedirects(true); + if (proxySetting != null && StringUtils.isNotBlank(proxySetting.getHost())) { + builder = builder.proxy( + new Proxy( + Proxy.Type.HTTP, + new InetSocketAddress(proxySetting.getHost(), proxySetting.getPort()) + ) + ); + } + if (timeoutSeconds != null) { + builder = builder.callTimeout(Duration.ofSeconds(timeoutSeconds)); + } + return builder.build(); + } + + public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting) { + return buildFeedOkHttpClient(proxySetting, 30); + } + public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) { // Configure SSLContext with a TrustManager that accepts any certificate SSLContext sslContext = null; @@ -31,7 +63,7 @@ public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer time } catch (NoSuchAlgorithmException | KeyManagementException e) { throw new RuntimeException(e); } - + var clientBuilder = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(timeoutSeconds)) .sslContext(sslContext) .followRedirects(HttpClient.Redirect.ALWAYS);