Skip to content

Commit

Permalink
Throttle requests (#65)
Browse files Browse the repository at this point in the history
* Fix styling

* Throttle requests

* Fix styling

---------

Co-authored-by: Baspa <Baspa@users.noreply.github.com>
  • Loading branch information
Baspa and Baspa authored Aug 16, 2024
1 parent 1f72d97 commit efc6447
Show file tree
Hide file tree
Showing 27 changed files with 191 additions and 148 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Easily configure which routes to scan, exclude or include specific checks or eve
- [Scanning routes](#scanning-routes)
- [Scanning a single route](#scanning-a-single-route)
- [Scanning routes in an SPA application](#scanning-routes-in-an-spa-application)
- [Throttling](#throttling)
- [Scan model urls](#scan-model-urls)
- [Saving scans into the database](#saving-scans-into-the-database)
- [Listening to events](#listening-to-events)
Expand Down Expand Up @@ -186,6 +187,17 @@ php artisan seo:scan-url https://vormkracht10.nl --javascript

> Note: This command will use Puppeteer to render the page. Make sure that you have Puppeteer installed on your system. You can install Puppeteer by running the following command: `npm install puppeteer`. **At this moment it's only available when scanning single routes.**
### Throttling

If you want to throttle the requests, you can set the `throttle` option to `true` in the config file. You can also set the amount of requests per minute by setting the `requests_per_minute` option in the config file.

```php
'throttle' => [
'enabled' => false,
'requests_per_minute' => 10,
],
```

### Scan model urls

When you have an application where you have a lot of pages which are related to a model, you can save the SEO score to the model. This way you can check the SEO score of a specific page and show it in your application.
Expand Down
5 changes: 5 additions & 0 deletions config/seo.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@
'vapor-ui/*',
],

'throttle' => [
'enabled' => false,
'requests_per_minute' => null,
],

/*
|--------------------------------------------------------------------------
| Domains (DNS resolving)
Expand Down
46 changes: 36 additions & 10 deletions src/Commands/SeoScan.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,52 @@ public function handle(): int
private function calculateScoreForRoutes(): void
{
$routes = self::getRoutes();
$throttleEnabled = config('seo.throttle.enabled');
$maxRequests = config('seo.throttle.requests_per_minute') ?? 'N/A';
$requestCount = 0;
$startTime = time();

if ($throttleEnabled) {
$this->line('<fg=yellow>Throttling enabled. Maximum requests per minute: '.$maxRequests.'</>');
sleep(5);
}

$routes->each(function ($path, $name) {
$routes->each(function ($path, $name) use ($throttleEnabled, $maxRequests, &$requestCount, &$startTime) {
$this->progress->start();

$seo = Seo::check(url: route($name), progress: $this->progress, useJavascript: config('seo.javascript'));

$this->failed += count($seo->getFailedChecks());
$this->success += count($seo->getSuccessfulChecks());
$this->routeCount++;
if ($throttleEnabled) {

if (config('seo.database.save')) {
$this->saveScoreToDatabase(seo: $seo, url: route($name));
if ($requestCount >= $maxRequests) {
$elapsedTime = time() - $startTime;
if ($elapsedTime < 60) {
sleep(60 - $elapsedTime);
}
$requestCount = 0;
$startTime = time();
}
$requestCount++;
}

$this->performSeoCheck($name);
$this->progress->finish();

$this->logResultToConsole($seo, route($name));
});
}

private function performSeoCheck($name): void
{
$seo = Seo::check(url: route($name), progress: $this->progress, useJavascript: config('seo.javascript'));

$this->failed += count($seo->getFailedChecks());
$this->success += count($seo->getSuccessfulChecks());
$this->routeCount++;

if (config('seo.database.save')) {
$this->saveScoreToDatabase(seo: $seo, url: route($name));
}

$this->logResultToConsole($seo, route($name));
}

private static function getRoutes(): Collection
{
$routes = collect(app('router')->getRoutes()->getRoutesByName())
Expand Down
12 changes: 6 additions & 6 deletions tests/Checks/Configuration/NoFollowCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Configuration\NoFollowCheck;

it('can perform the nofollow check with robots tag', function () {
$check = new NoFollowCheck();
$crawler = new Crawler();
$check = new NoFollowCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('', 200, ['X-Robots-Tag' => 'nofollow']),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the nofollow check with robots metatag', function () {
$check = new NoFollowCheck();
$crawler = new Crawler();
$check = new NoFollowCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="robots" content="nofollow"></head></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the nofollow check with googlebot metatag', function () {
$check = new NoFollowCheck();
$crawler = new Crawler();
$check = new NoFollowCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="googlebot" content="nofollow"></head></html>', 200),
Expand Down
12 changes: 6 additions & 6 deletions tests/Checks/Configuration/NoIndexCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Configuration\NoIndexCheck;

it('can perform the noindex check with robots tag', function () {
$check = new NoIndexCheck();
$crawler = new Crawler();
$check = new NoIndexCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('', 200, ['X-Robots-Tag' => 'noindex']),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the noindex check with robots metatag', function () {
$check = new NoIndexCheck();
$crawler = new Crawler();
$check = new NoIndexCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="robots" content="noindex"></head></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the noindex check with googlebot metatag', function () {
$check = new NoIndexCheck();
$crawler = new Crawler();
$check = new NoIndexCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="googlebot" content="noindex"></head></html>', 200),
Expand Down
4 changes: 2 additions & 2 deletions tests/Checks/Configuration/RobotsCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
use Vormkracht10\Seo\Checks\Configuration\RobotsCheck;

it('can perform the robots check', function () {
$check = new RobotsCheck();
$check = new RobotsCheck;

Http::fake([
'vormkracht10.nl/robots.txt' => Http::response('User-agent: Googlebot
Disallow: /admin', 200),
]);

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), new Crawler()));
$this->assertTrue($check->check(Http::get('vormkracht10.nl'), new Crawler));
});
12 changes: 6 additions & 6 deletions tests/Checks/Content/AltTagCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Content\AltTagCheck;

it('can perform the alt tag check with alt', function () {
$check = new AltTagCheck();
$crawler = new Crawler();
$check = new AltTagCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5" alt="Vormkracht10 logo"></body></html>', 200),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the alt tag check without alt', function () {
$check = new AltTagCheck();
$crawler = new Crawler();
$check = new AltTagCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5"></body></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the alt tag check with empty alt', function () {
$check = new AltTagCheck();
$crawler = new Crawler();
$check = new AltTagCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/images/logo.png" width="5" height="5" alt=""></body></html>', 200),
Expand Down
12 changes: 6 additions & 6 deletions tests/Checks/Content/BrokenImageCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Content\BrokenImageCheck;

it('can perform the broken image check on broken images', function () {
$check = new BrokenImageCheck();
$crawler = new Crawler();
$check = new BrokenImageCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl/404"></body></html>', 200),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the broken image check on working images', function () {
$check = new BrokenImageCheck();
$crawler = new Crawler();
$check = new BrokenImageCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><img src="https://vormkracht10.nl"></body></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the broken image check on content where no images are used', function () {
$check = new BrokenImageCheck();
$crawler = new Crawler();
$check = new BrokenImageCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),
Expand Down
32 changes: 16 additions & 16 deletions tests/Checks/Content/BrokenLinkCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Content\BrokenLinkCheck;

it('can perform the broken link check on broken links', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl/404">Vormkracht10</a></body></html>', 200),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the broken link check on working links', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the broken link check on content where no links are used', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),
Expand All @@ -44,8 +44,8 @@
});

it('can run the broken link check on a relative url', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="/404">Vormkracht10</a></body></html>', 200),
Expand All @@ -59,8 +59,8 @@
it('can bypass DNS layers using DNS resolving', function () {
$this->markTestSkipped('This test is skipped because we cannot fake DNS resolving.');

$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
Expand All @@ -76,8 +76,8 @@
});

it('cannot bypass DNS layers using a fake IP when DNS resolving', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

config(['seo.resolve' => [
'vormkracht10.nl' => '8.8.8.8',
Expand All @@ -93,8 +93,8 @@
});

it('can check if link is broken by checking on configured status codes', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

config(['seo.broken_link_check.status_codes' => ['403']]);

Expand All @@ -108,8 +108,8 @@
});

it('can exclude certain paths from the broken link check', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();
$check = new BrokenLinkCheck;
$crawler = new Crawler;

config(['seo.broken_link_check.exclude_links' => ['https://vormkracht10.nl/excluded']]);

Expand Down
8 changes: 4 additions & 4 deletions tests/Checks/Content/ContentLengthCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Content\ContentLengthCheck;

it('can perform the content length check on content with a length of 2100 characters', function () {
$check = new ContentLengthCheck();
$crawler = new Crawler();
$check = new ContentLengthCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response(
Expand All @@ -26,8 +26,8 @@
});

it('can perform the content length check on content with less characters', function () {
$check = new ContentLengthCheck();
$crawler = new Crawler();
$check = new ContentLengthCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response(
Expand Down
12 changes: 6 additions & 6 deletions tests/Checks/Content/KeywordInFirstParagraphCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
use Vormkracht10\Seo\Checks\Content\KeywordInFirstParagraphCheck;

it('can perform the keyword in first paragraph check on a page with the keyword in the first paragraph', function () {
$check = new KeywordInFirstParagraphCheck();
$crawler = new Crawler();
$check = new KeywordInFirstParagraphCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="keywords" content="vormkracht10, seo, laravel, package"></head><body><p>vormkracht10 is a great company that specializes in SEO and Laravel packages.</p></body></html>', 200),
Expand All @@ -18,8 +18,8 @@
});

it('can perform the keyword in first paragraph check on a page without the keyword in the first paragraph', function () {
$check = new KeywordInFirstParagraphCheck();
$crawler = new Crawler();
$check = new KeywordInFirstParagraphCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head><meta name="keywords" content="seo, laravel, package"></head><body><p>Lorem ipsum dolor sit amet.</p></body></html>', 200),
Expand All @@ -31,8 +31,8 @@
});

it('can perform the keyword in first paragraph check on a page without keywords', function () {
$check = new KeywordInFirstParagraphCheck();
$crawler = new Crawler();
$check = new KeywordInFirstParagraphCheck;
$crawler = new Crawler;

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body></body></html>', 200),
Expand Down
Loading

0 comments on commit efc6447

Please sign in to comment.