Skip to content

Commit

Permalink
Merge pull request #5 from grgk/Parser-optimization
Browse files Browse the repository at this point in the history
added option of injecting a custom parser,
  • Loading branch information
grgk authored Oct 23, 2019
2 parents d2660bc + a3083dc commit a52ec60
Show file tree
Hide file tree
Showing 15 changed files with 356 additions and 203 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"psr/simple-cache": "^1.0"
},
"require-dev": {
"phpunit/phpunit": "^7"
"phpunit/phpunit": "^7",
"ext-json": "*"
},
"autoload": {
"psr-4": {
Expand Down
29 changes: 29 additions & 0 deletions examples/analyze_file_with_custom_parser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php
/**
* File analyze example
*
* Executes file seo analyze on local html file using custom parser.
*/

require_once(__DIR__ . '/../vendor/autoload.php');

use SeoAnalyzer\Analyzer;
use SeoAnalyzer\Factor;
use SeoAnalyzer\HttpClient\Exception\HttpException;
use SeoAnalyzer\Page;
use SeoAnalyzer\Parser\ExampleCustomParser;

try {
$page = new Page('https://www.msn.com/pl-pl');
$parser = new ExampleCustomParser();
$page->setParser($parser);
$analyzer = new Analyzer($page);
$analyzer->metrics = $page->setMetrics([Factor::ALTS]);
$results = $analyzer->analyze();
} catch (HttpException $e) {
echo "Error loading page: " . $e->getMessage();
} catch (ReflectionException $e) {
echo "Error loading metric file: " . $e->getMessage();
}

print_r($results);
5 changes: 4 additions & 1 deletion src/Metric/MetricFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

namespace SeoAnalyzer\Metric;

use ReflectionException;

class MetricFactory
{
/**
* @param string $key
* @param null $inputData
* @return mixed
* @throws ReflectionException
*/
public static function get(string $key, $inputData = null)
{
Expand All @@ -24,6 +27,6 @@ public static function get(string $key, $inputData = null)
}
return $metric;
}
throw new \ReflectionException('Metric class ' . $class .' not exists');
throw new ReflectionException('Metric class ' . $class .' not exists');
}
}
26 changes: 0 additions & 26 deletions src/Metric/Page/SizeMetric.php

This file was deleted.

58 changes: 47 additions & 11 deletions src/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
use SeoAnalyzer\HttpClient\Exception\HttpException;
use SeoAnalyzer\Metric\MetricFactory;
use ReflectionException;
use SeoAnalyzer\Parser\Parser;
use SeoAnalyzer\Parser\ParserInterface;

class Page
{
Expand Down Expand Up @@ -52,19 +54,33 @@ class Page
*/
public $client;

/**
* @var ParserInterface
*/
public $parser;

/**
* Page constructor.
*
* @param string|null $url
* @param string|null $locale
* @param ClientInterface|null $client
* @param ParserInterface|null $parser
*/
public function __construct(string $url = null, string $locale = null, ClientInterface $client = null)
{
public function __construct(
string $url = null,
string $locale = null,
ClientInterface $client = null,
ParserInterface $parser = null
) {
$this->client = $client;
if (empty($client)) {
$this->client = new Client();
}
$this->parser = $parser;
if (empty($parser)) {
$this->parser = new Parser();
}
if (!empty($url)) {
$this->url = $this->setUpUrl($url);
$this->getContent();
Expand All @@ -74,6 +90,26 @@ public function __construct(string $url = null, string $locale = null, ClientInt
}
}

/**
* Sets custom Http Client.
*
* @param ClientInterface $client
*/
public function setClient(ClientInterface $client): void
{
$this->client = $client;
}

/**
* Sets custom Html Parser.
*
* @param ParserInterface $parser
*/
public function setParser(ParserInterface $parser): void
{
$this->parser = $parser;
}

/**
* Verifies URL and sets up some basic metrics.
*
Expand Down Expand Up @@ -180,13 +216,16 @@ protected function getHttpClientOptions()
*/
public function parse()
{
$parser = new Parser($this->content);
if (empty($this->content)) {
$this->getContent();
}
$this->parser->setContent($this->content);
$this->setFactors([
Factor::META_META => $parser->getMeta(),
Factor::HEADERS => $parser->getHeaders(),
Factor::META_TITLE => $parser->getTitle(),
Factor::TEXT => $parser->getText(),
Factor::ALTS => $parser->getAlts()
Factor::META_META => $this->parser->getMeta(),
Factor::HEADERS => $this->parser->getHeaders(),
Factor::META_TITLE => $this->parser->getTitle(),
Factor::TEXT => $this->parser->getText(),
Factor::ALTS => $this->parser->getAlts()
]);
}

Expand Down Expand Up @@ -216,9 +255,6 @@ public function setMetrics(array $config)

private function initializeFactors()
{
if (empty($this->content)) {
$this->getContent();
}
if (empty($this->dom)) {
$this->parse();
}
Expand Down
125 changes: 0 additions & 125 deletions src/Parser.php

This file was deleted.

63 changes: 63 additions & 0 deletions src/Parser/AbstractParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

namespace SeoAnalyzer\Parser;

use DOMDocument;
use DOMElement;
use DOMNodeList;

abstract class AbstractParser implements ParserInterface
{
/**
* @var DOMDocument Dom representation of HTML document
*/
protected $dom;

/**
* @param string $html Html document to parse.
*/
public function __construct(string $html = null)
{
$this->dom = new DOMDocument();
if (!empty($html)) {
$this->setContent($html);
}
}

/**
* @inheritDoc
*/
public function setContent($html): void
{
$internalErrors = libxml_use_internal_errors(true);
$this->dom->loadHTML($html, LIBXML_NOWARNING);
libxml_use_internal_errors($internalErrors);
}

/**
* Removes specified tags with it's content from DOM.
*
* @param string $tag
*/
protected function removeTags(string $tag)
{
$tagsToRemove = [];
foreach ($this->getDomElements($tag) as $tag) {
$tagsToRemove[] = $tag;
}
foreach ($tagsToRemove as $item) {
$item->parentNode->removeChild($item);
}
}

/**
* Returns DOM elements by tag name.
*
* @param string $name
* @return DOMNodeList|DOMElement[]
*/
protected function getDomElements(string $name): DOMNodeList
{
return $this->dom->getElementsByTagName($name);
}
}
Loading

0 comments on commit a52ec60

Please sign in to comment.