Skip to content

Commit

Permalink
Added Blazer browser (PalmOS).
Browse files Browse the repository at this point in the history
Added `PalmOS` and updated Windows captures to ignore PalmOS.
Added a new `ai` category, to mark crawlers that are specifically for AI.
Added specific mispellings that indicate a web scraper instead of the human it is trying to spoof.
Added and updated tests.
  • Loading branch information
hexydec committed Apr 8, 2024
1 parent 55ce30b commit 607587c
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 16 deletions.
6 changes: 6 additions & 0 deletions src/mappings/browsers.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ public static function get() : array {
'browser' => $value,
'browserversion' => $tokens[$i + 1]
]),
'Blazer' => new props('start', fn (string $value) : array => [
'type' => 'human',
'browser' => 'Blazer',
'browserversion' => \mb_substr($value, 7),
'engine' => 'Proprietary'
]),
'Edg/' => new props('start', $fn['browserslash']),
'EdgA/' => new props('start', $fn['browserslash']),
'Edge/' => new props('start', $fn['browserslash']),
Expand Down
4 changes: 2 additions & 2 deletions src/mappings/categories.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ public static function get() : array {
'type' => 'human',
'category' => 'mobile'
]),
'Moblie' => new props('exact', [ // some samsung devices mispelt it
'Phone' => new props('exact', [
'type' => 'human',
'category' => 'mobile'
]),
'Phone' => new props('exact', [
'PDA' => new props('exact', [
'type' => 'human',
'category' => 'mobile'
]),
Expand Down
24 changes: 23 additions & 1 deletion src/mappings/crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public static function get() : array {
'search' => fn (string $value) : array => self::getApp($value, ['category' => 'search']),
'ads' => fn (string $value) : array => self::getApp($value, ['category' => 'ads']),
'validator' => fn (string $value) : array => self::getApp($value, ['category' => 'validator']),
'ai' => fn (string $value) : array => self::getApp($value, ['category' => 'ai']),
'feed' => fn (string $value) : array => self::getApp($value, \array_merge(
\str_contains($value, 'WhatsApp/') ? [
'app' => 'WhatsApp'
Expand Down Expand Up @@ -159,8 +160,17 @@ public static function get() : array {
'map' => fn (string $value) : ?array => self::getApp($value)
];
return [
'Mozlila/' => new props('start', [
'type' => 'robot',
'categpry' => 'scraper'
]),
'Moblie' => new props('exact', [ // some samsung devices mispelt it
'type' => 'robot',
'category' => 'scraper'
]),
'Yahoo! Slurp' => new props('exact', $fn['search']),
'facebookexternalhit/' => new props('start', $fn['feed']),
'facebookcatalog/' => new props('start', $fn['feed']),
'Google-Site-Verification/' => new props('start', $fn['validator']),
'Google-InspectionTool/' => new props('start', $fn['validator']),
'Google-Read-Aloud' => new props('exact', $fn['feed']),
Expand All @@ -184,6 +194,7 @@ public static function get() : array {
}),
'okhttp' => new props('start', $fn['scraper']),
'python' => new props('start', $fn['scraper']),
'grpc-python/' => new props('start', $fn['scraper']),
'jsdom/' => new props('start', $fn['scraper']),
'Nessus' => new props('start', $fn['monitor']),
'monitoring360bot' => new props('start', $fn['monitor']),
Expand Down Expand Up @@ -230,6 +241,7 @@ public static function get() : array {
'PRTGCloudBot/' => new props('start', $fn['monitor']),
'Site24x7' => new props('exact', $fn['monitor']),
'StatusCake' => new props('exact', $fn['monitor']),
'AWS Network Health' => new props('start', $fn['monitor']),
'adbeat.com' => new props('start', fn (string $value) : array => [
'type' => 'robot',
'category' => 'ads',
Expand Down Expand Up @@ -271,6 +283,8 @@ public static function get() : array {
'appversion' => \mb_substr($value, 37)
]),
'Pro-Sitemaps/' => new props('start', $fn['crawler']),
'omgili/' => new props('start', $fn['crawler']),
'CCBot/' => new props('start', $fn['crawler']),
'Chrome Privacy Preserving Prefetch Proxy' => new props('exact', $fn['feed']),
'ViberUrlDownloader' => new props('exact', $fn['feed']),
'Google-Lens' => new props('exact', $fn['feed']),
Expand All @@ -281,7 +295,7 @@ public static function get() : array {
'W3C-checklink/' => new props('start', $fn['validator']),
'CSSCheck/' => new props('start', $fn['validator']),
'Let\'s Encrypt validation server' => new props('exact', $fn['validator']),
'SEO-Macroscope/' => new props('exact', $fn['validator']),
'SEO-Macroscope/' => new props('start', $fn['validator']),
'Electronic Frontier Foundation\'s Do Not Track Verifier' => new props('exact', $fn['validator']),
'Expanse' => new props('start', $fn['crawler']),
'eCairn-Grabber/' => new props('start', $fn['scraper']),
Expand Down Expand Up @@ -313,6 +327,14 @@ public static function get() : array {
'http-client/' => new props('any', $fn['scraper']),
'HttpClient/' => new props('any', $fn['scraper']),
'PowerShell/' => new props('start', $fn['scraper']),
'GPTBot/' => new props('start', $fn['ai']),
'Diffbot/' => new props('start', $fn['ai']),
'Amazonbot/' => new props('start', $fn['ai']),
'Applebot/' => new props('start', $fn['ai']),
'PerplexityBot/' => new props('start', $fn['ai']),
'YouBot/' => new props('start', $fn['ai']),
'Google-Extended' => new props('start', $fn['ai']),
'ChatGPT-User/' => new props('start', $fn['feed']),
'Validator' => new props('any', $fn['validator']),
'feed' => new props('any', $fn['feed']),
'spider' => new props('any', $fn['crawler']),
Expand Down
52 changes: 42 additions & 10 deletions src/mappings/platforms.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,24 @@ public static function get() : array {
return [

// platforms
'PalmSource' => new props('start', fn (string $value) : array => [
'type' => 'human',
'category' => 'mobile',
'platform' => 'PalmOS',
'platformversion' => \mb_substr($value, 11) ?: null,
'kernel' => 'AMX 68000',
'architecture' => 'arm',
'bits' => 32,
]),
'PalmOS' => new props('start', fn (string $value) : array => [
'type' => 'human',
'category' => 'mobile',
'platform' => 'PalmOS',
'platformversion' => \mb_substr($value, 7) ?: null,
'kernel' => 'AMX 68000',
'architecture' => 'arm',
'bits' => 32,
]),
'Windows NT ' => new props('any', $fn['platformwindows']),
'Windows Phone' => new props('start', function (string $value) : array {
$version = \mb_substr($value, 14);
Expand All @@ -88,15 +106,22 @@ public static function get() : array {
'kernel' => \intval($version) >= 8 ? 'Windows NT' : 'Windows CE'
];
}),
'Win98' => new props('start', [
'type' => 'human',
'category' => 'desktop',
'architecture' => 'x86',
'bits' => 32,
'kernel' => 'MS-DOS',
'platform' => 'Windows',
'platformversion' => '98'
]),
'Win98' => new props('start', function (string $value, int $i, array $tokens) : array {
foreach ($tokens AS $item) {
if (\str_starts_with($item, 'PalmSource')) {
return [];
}
}
return [
'type' => 'human',
'category' => 'desktop',
'architecture' => 'x86',
'bits' => 32,
'kernel' => 'MS-DOS',
'platform' => 'Windows',
'platformversion' => '98'
];
}),
'Win32' => new props('exact', [
'type' => 'human',
'category' => 'desktop',
Expand All @@ -119,7 +144,14 @@ public static function get() : array {
'platform' => 'Windows',
'platformversion' => \mb_substr($value, 5)
]),
'Windows' => new props('any', $fn['platformwindows']),
'Windows' => new props('any', function (string $value, int $i, array $tokens) use ($fn) : array {
foreach ($tokens AS $item) {
if (\str_starts_with($item, 'PalmSource')) {
return [];
}
}
return $fn['platformwindows']($value, $i, $tokens);
}),
'Mac OS X' => new props('any', function (string $value) : array {
$version = \str_replace('_', '.', \mb_substr($value, \mb_stripos($value, 'Mac OS X') + 9));
$register = $version && \intval(\explode('.', $version)[1] ?? 0) >= 6 ? 64 : null;
Expand Down
17 changes: 16 additions & 1 deletion tests/browsersTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ public function testSafari() : void {
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)' => [
'string' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)',
'type' => 'robot',
'category' => 'search',
'category' => 'ai',
'app' => 'AppleBot',
'appname' => 'Applebot',
'appversion' => '0.1',
Expand Down Expand Up @@ -1863,6 +1863,21 @@ public function testOther() : void {
'engineversion' => '122.0.0.0',
'browser' => 'Huawei Browser',
'browserversion' => '14.0.0.322'
],
'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; PalmSource/hspr-H102; Blazer/4.0) 16;320x320' => [
'string' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; PalmSource/hspr-H102; Blazer/4.0) 16;320x320',
'type' => 'human',
'category' => 'mobile',
'architecture' => 'arm',
'bits' => 32,
'kernel' => 'AMX 68000',
'platform' => 'PalmOS',
'platformversion' => 'hspr-H102',
'engine' => 'Proprietary',
'browser' => 'Blazer',
'browserversion' => '4.0',
'width' => '320',
'height' => '320'
]
];
foreach ($strings AS $ua => $item) {
Expand Down
109 changes: 107 additions & 2 deletions tests/crawlersTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ public function testSearch() : void {
'appname' => 'Applebot',
'appversion' => '0.1',
'type' => 'robot',
'category' => 'search',
'category' => 'ai',
'architecture' => 'x86',
'bits' => 64,
'processor' => 'Intel',
Expand All @@ -116,7 +116,7 @@ public function testSearch() : void {
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)' => [
'string' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)',
'type' => 'robot',
'category' => 'search',
'category' => 'ai',
'app' => 'AppleBot',
'appname' => 'Applebot',
'appversion' => '0.1',
Expand Down Expand Up @@ -998,4 +998,109 @@ public function testMonitors() : void {
$this->assertEquals($item, \array_filter((array) agentzero::parse($ua), fn(mixed $item) : mixed => $item !== null), $ua);
}
}

public function testAi() : void {
$strings = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)' => [
'string' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
'type' => 'robot',
'category' => 'ai',
'vendor' => 'Apple',
'device' => 'Macintosh',
'processor' => 'Intel',
'architecture' => 'x86',
'bits' => 64,
'kernel' => 'Linux',
'platform' => 'Mac OS X',
'platformversion' => '10.10.1',
'engine' => 'WebKit',
'engineversion' => '600.2.5',
'browser' => 'Safari',
'browserversion' => '8.0.2',
'app' => 'Amazonbot',
'appname' => 'Amazonbot',
'appversion' => '0.1',
'url' => 'https://developer.amazon.com/support/amazonbot'
],
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)' => [
'string' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)',
'type' => 'robot',
'category' => 'ai',
'kernel' => 'Windows NT',
'platform' => 'Windows',
'platformversion' => 'XP',
'engine' => 'Gecko',
'engineversion' => '20090729',
'browser' => 'Firefox',
'browserversion' => '3.5.2',
'language' => 'en-US',
'app' => 'Diffbot',
'appname' => 'Diffbot',
'appversion' => '0.1',
'url' => 'http://www.diffbot.com'
],
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15 (Applebot/0.1)' => [
'string' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15 (Applebot/0.1)',
'type' => 'robot',
'category' => 'ai',
'vendor' => 'Apple',
'device' => 'Macintosh',
'processor' => 'Intel',
'architecture' => 'x86',
'bits' => '64',
'kernel' => 'Linux',
'platform' => 'Mac OS X',
'platformversion' => '10.14.5',
'engine' => 'WebKit',
'engineversion' => '605.1.15',
'browser' => 'Safari',
'browserversion' => '12.1.1',
'app' => 'AppleBot',
'appname' => 'Applebot',
'appversion' => '0.1'
],
'Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)' => [
'string' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)',
'type' => 'robot',
'category' => 'ai',
'vendor' => 'Apple',
'device' => 'iPhone',
'model' => '12B410',
'architecture' => 'arm',
'bits' => 64,
'kernel' => 'Linux',
'platform' => 'iOS',
'platformversion' => '8.1',
'engine' => 'WebKit',
'engineversion' => '600.1.4',
'browser' => 'Safari',
'browserversion' => '8.0',
'app' => 'AppleBot',
'appname' => 'Applebot',
'appversion' => '0.1',
'url' => 'http://www.apple.com/go/applebot'
],
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)' => [
'string' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)',
'type' => 'robot',
'category' => 'ai',
'app' => 'PerplexityBot',
'appname' => 'PerplexityBot',
'appversion' => '1.0',
'url' => 'https://perplexity.ai/perplexitybot'
],
'Mozilla/5.0 (compatible; YouBot/1.0; +https://about.you.com/youbot/)' => [
'string' => 'Mozilla/5.0 (compatible; YouBot/1.0; +https://about.you.com/youbot/)',
'type' => 'robot',
'category' => 'ai',
'app' => 'YouBot',
'appname' => 'YouBot',
'appversion' => '1.0',
'url' => 'https://about.you.com/youbot/'
]
];
foreach ($strings AS $ua => $item) {
$this->assertEquals($item, \array_filter((array) agentzero::parse($ua), fn(mixed $item) : mixed => $item !== null), $ua);
}
}
}
28 changes: 28 additions & 0 deletions tests/platformsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,34 @@ public function testOther() : void {
'browserversion' => '3.0.0',
'engine' => 'Blink',
'engineversion' => '83.0.4103.122'
],
'Mozilla/4.0 (PDA; PalmOS/sony/model prmr/Revision:1.1.54 (en)) NetFront/3.0' => [
'string' => 'Mozilla/4.0 (PDA; PalmOS/sony/model prmr/Revision:1.1.54 (en)) NetFront/3.0',
'type' => 'human',
'category' => 'mobile',
'architecture' => 'arm',
'bits' => 32,
'kernel' => 'AMX 68000',
'platform' => 'PalmOS',
'platformversion' => 'sony/model',
'browser' => 'NetFront',
'browserversion' => '3.0',
'language' => 'en'
],
'Palm680/RC1 Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; PalmSource/Palm-D053; Blazer/4.5) 16;320x320' => [
'string' => 'Palm680/RC1 Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; PalmSource/Palm-D053; Blazer/4.5) 16;320x320',
'type' => 'human',
'category' => 'mobile',
'architecture' => 'arm',
'bits' => 32,
'kernel' => 'AMX 68000',
'platform' => 'PalmOS',
'platformversion' => 'Palm-D053',
'engine' => 'Proprietary',
'browser' => 'Blazer',
'browserversion' => '4.5',
'width' => '320',
'height' => '320'
]
];
foreach ($strings AS $ua => $item) {
Expand Down

0 comments on commit 607587c

Please sign in to comment.