From 88a8ab0fe6d96202f371ad8704e57e0fc22bef16 Mon Sep 17 00:00:00 2001 From: Henry F Date: Sun, 14 Jul 2024 09:26:43 -0500 Subject: [PATCH 1/3] #52: A quick fix to address this, I have a concert to go to. --- .../Scrapers/Strategy/ScraperStrategy.cs | 4 +++- Benny-Scraper/Properties/launchSettings.json | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs b/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs index b1b4e88..71f16b7 100644 --- a/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs +++ b/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs @@ -255,6 +255,7 @@ public abstract class ScraperStrategy private SemaphoreSlim _semaphoreSlim; // limit the number of concurrent requests, prevent posssible rate limiting private static readonly List _userAgents = new List { + "Other", // found at https://stackoverflow.com/questions/62402504/c-sharp-httpclient-postasync-403-forbidden-with-ssl "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36", @@ -325,7 +326,8 @@ public SiteConfiguration GetSiteConfiguration() return await retryPolicy.ExecuteAsync(async context => { var requestMessage = new HttpRequestMessage(HttpMethod.Get, uri); - var userAgent = _userAgents[++_userAgentIndex % _userAgents.Count]; + //var userAgent = _userAgents[++_userAgentIndex % _userAgents.Count]; + var userAgent = _userAgents[0]; requestMessage.Headers.Add("User-Agent", userAgent); requestMessage.Options.Set(new HttpRequestOptionsKey("RequestTimeout"), TimeSpan.FromSeconds(10)); Logger.Debug($"Sending request to {uri}"); diff --git a/Benny-Scraper/Properties/launchSettings.json b/Benny-Scraper/Properties/launchSettings.json index dd1e951..4eb5dd0 100644 --- a/Benny-Scraper/Properties/launchSettings.json +++ b/Benny-Scraper/Properties/launchSettings.json @@ -6,6 +6,7 @@ }, "Benny-Scraper": { "commandName": "Project", + "commandLineArgs": "-U", "nativeDebugging": true } } From 349600b9e623911f6f0f29c60b6e19dc1f505a00 Mon Sep 17 00:00:00 2001 From: Henry F Date: Mon, 15 Jul 2024 23:05:23 -0500 Subject: [PATCH 2/3] #52: needed to remove the launchsettings.json --- Benny-Scraper/Properties/launchSettings.json | 1 - 1 file changed, 1 deletion(-) diff --git a/Benny-Scraper/Properties/launchSettings.json b/Benny-Scraper/Properties/launchSettings.json index 4eb5dd0..dd1e951 100644 --- a/Benny-Scraper/Properties/launchSettings.json +++ b/Benny-Scraper/Properties/launchSettings.json @@ -6,7 +6,6 @@ }, "Benny-Scraper": { "commandName": "Project", - "commandLineArgs": "-U", "nativeDebugging": true } } From 2787a8a7e8738e16155cfa7d5a5d6ccca8080c14 Mon Sep 17 00:00:00 2001 From: Henry F Date: Tue, 16 Jul 2024 23:03:55 -0500 Subject: [PATCH 3/3] #52: fix specifix to lightnovelworld. --- .../Scrapers/Strategy/ScraperStrategy.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs b/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs index 71f16b7..905678c 100644 --- a/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs +++ b/Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs @@ -148,7 +148,7 @@ protected static void FetchContentByAttribute(Attr attr, NovelDataBuffer novelDa Console.ResetColor(); throw; } - + case Attr.ChapterUrls: var chapterLinkNodes = htmlDocument.DocumentNode.SelectNodes(scraperData.SiteConfig?.Selectors.ChapterLinks); @@ -300,7 +300,7 @@ public SiteConfiguration GetSiteConfiguration() return await LoadHtmlAsync(uri); } - protected static async Task<(HtmlDocument document, Uri updatedUri)> LoadHtmlAsync(Uri uri) + protected async Task<(HtmlDocument document, Uri updatedUri)> LoadHtmlAsync(Uri uri) { ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls; @@ -326,8 +326,11 @@ public SiteConfiguration GetSiteConfiguration() return await retryPolicy.ExecuteAsync(async context => { var requestMessage = new HttpRequestMessage(HttpMethod.Get, uri); - //var userAgent = _userAgents[++_userAgentIndex % _userAgents.Count]; - var userAgent = _userAgents[0]; + string userAgent = _userAgents[++_userAgentIndex % _userAgents.Count]; + + if (_scraperData.BaseUri == new Uri("https://www.lightnovelworld.com/")) + userAgent = _userAgents[0]; + requestMessage.Headers.Add("User-Agent", userAgent); requestMessage.Options.Set(new HttpRequestOptionsKey("RequestTimeout"), TimeSpan.FromSeconds(10)); Logger.Debug($"Sending request to {uri}");