-
Notifications
You must be signed in to change notification settings - Fork 4
/
LinkDiscoverer.php
41 lines (36 loc) · 1.31 KB
/
LinkDiscoverer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
<?php
namespace LastCall\Crawler\Handler\Discovery;
use LastCall\Crawler\CrawlerEvents;
use LastCall\Crawler\Event\CrawlerHtmlResponseEvent;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
/**
* Discovers link URLs in an HTML response.
*/
class LinkDiscoverer extends AbstractDiscoverer implements EventSubscriberInterface
{
/**
* {@inheritdoc}
*/
public static function getSubscribedEvents()
{
return [
CrawlerEvents::SUCCESS_HTML => 'onHtmlResponse',
CrawlerEvents::FAILURE_HTML => 'onHtmlResponse',
];
}
/**
* Discover link URLS from anchor tags.
*
* @param \LastCall\Crawler\Event\CrawlerHtmlResponseEvent $event
* @param $eventName
* @param \Symfony\Component\EventDispatcher\EventDispatcherInterface $dispatcher
*/
public function onHtmlResponse(CrawlerHtmlResponseEvent $event, $eventName, EventDispatcherInterface $dispatcher)
{
$crawler = $event->getDomCrawler();
$nodes = $crawler->filterXPath('descendant-or-self::a[@href]');
$urls = array_unique($nodes->extract('href'));
$this->processUris($event, $dispatcher, $urls, 'link');
}
}