Skip to content

Commit

Permalink
Merge pull request #53 from martial-god/Task/52-LightNovelworld-Not-G…
Browse files Browse the repository at this point in the history
…etting-Chapters

Task/52 light novelworld not getting chapters
  • Loading branch information
feahnthor authored Jul 17, 2024
2 parents bfb7622 + 2787a8a commit fe7835d
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions Benny-Scraper.BusinessLogic/Scrapers/Strategy/ScraperStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ protected static void FetchContentByAttribute(Attr attr, NovelDataBuffer novelDa
Console.ResetColor();
throw;
}


case Attr.ChapterUrls:
var chapterLinkNodes = htmlDocument.DocumentNode.SelectNodes(scraperData.SiteConfig?.Selectors.ChapterLinks);
Expand Down Expand Up @@ -255,6 +255,7 @@ public abstract class ScraperStrategy
private SemaphoreSlim _semaphoreSlim; // limit the number of concurrent requests, prevent posssible rate limiting
private static readonly List<string> _userAgents = new List<string>
{
"Other", // found at https://stackoverflow.com/questions/62402504/c-sharp-httpclient-postasync-403-forbidden-with-ssl
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36",
Expand Down Expand Up @@ -299,7 +300,7 @@ public SiteConfiguration GetSiteConfiguration()
return await LoadHtmlAsync(uri);
}

protected static async Task<(HtmlDocument document, Uri updatedUri)> LoadHtmlAsync(Uri uri)
protected async Task<(HtmlDocument document, Uri updatedUri)> LoadHtmlAsync(Uri uri)
{
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12 | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls;

Expand All @@ -325,7 +326,11 @@ public SiteConfiguration GetSiteConfiguration()
return await retryPolicy.ExecuteAsync(async context =>
{
var requestMessage = new HttpRequestMessage(HttpMethod.Get, uri);
var userAgent = _userAgents[++_userAgentIndex % _userAgents.Count];
string userAgent = _userAgents[++_userAgentIndex % _userAgents.Count];

if (_scraperData.BaseUri == new Uri("https://www.lightnovelworld.com/"))
userAgent = _userAgents[0];

requestMessage.Headers.Add("User-Agent", userAgent);
requestMessage.Options.Set(new HttpRequestOptionsKey<TimeSpan>("RequestTimeout"), TimeSpan.FromSeconds(10));
Logger.Debug($"Sending request to {uri}");
Expand Down

0 comments on commit fe7835d

Please sign in to comment.