Skip to content

Commit

Permalink
[ENH] Introducing new ZugferdDocumentPdfReaderExt (Extended PDF Reader)
Browse files Browse the repository at this point in the history
  • Loading branch information
HorstOeko committed Dec 25, 2024
1 parent 82a62b8 commit d3498b0
Show file tree
Hide file tree
Showing 4 changed files with 505 additions and 0 deletions.
1 change: 1 addition & 0 deletions build/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
<file>../tests/testcases/PdfReaderExtended2Test.php</file>
<file>../tests/testcases/PdfReaderXRechnungTest.php</file>
<file>../tests/testcases/PdfReaderMultipleAttachmentsTest.php</file>
<file>../tests/testcases/PdfReaderExtGeneralTest.php</file>
</testsuite>
<testsuite name="PDFBuilder">
<file>../tests/testcases/PdfBuilderEn16931Test.php</file>
Expand Down
5 changes: 5 additions & 0 deletions make/genmethoddocs.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use horstoeko\zugferd\ZugferdDocumentPdfBuilder;
use horstoeko\zugferd\ZugferdDocumentPdfMerger;
use horstoeko\zugferd\ZugferdDocumentPdfReader;
use horstoeko\zugferd\ZugferdDocumentPdfReaderExt;
use horstoeko\zugferd\ZugferdDocumentReader;
use horstoeko\zugferd\ZugferdDocumentValidator;
use horstoeko\zugferd\ZugferdKositValidator;
Expand Down Expand Up @@ -672,6 +673,9 @@ private function fixPhpType(string $string): string
if (stripos($string, '[]') !== false) {
$string = 'array';
}
if (stripos($string, 'array<') === 0) {
$string = 'array';
}
if ($string == '$this') {
$string = 'static';
}
Expand Down Expand Up @@ -719,6 +723,7 @@ public static function generate(array $classes, array $ignoreInheritance = [])
ZugferdDocumentBuilder::class => dirname(__FILE__) . '/Class-ZugferdDocumentBuilder.md',
ZugferdDocumentReader::class => dirname(__FILE__) . '/Class-ZugferdDocumentReader.md',
ZugferdDocumentPdfReader::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfReader.md',
ZugferdDocumentPdfReaderExt::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfReaderExt.md',
ZugferdDocumentPdfBuilder::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfBuilder.md',
ZugferdDocumentPdfMerger::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfMerger.md',
ZugferdDocumentValidator::class => dirname(__FILE__) . '/Class-ZugferdDocumentValidator.md',
Expand Down
320 changes: 320 additions & 0 deletions src/ZugferdDocumentPdfReaderExt.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
<?php

/**
* This file is a part of horstoeko/zugferd.
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace horstoeko\zugferd;

use Exception;
use horstoeko\zugferd\exception\ZugferdFileNotFoundException;
use horstoeko\zugferd\exception\ZugferdFileNotReadableException;
use horstoeko\zugferd\exception\ZugferdNoPdfAttachmentFoundException;
use horstoeko\zugferd\exception\ZugferdUnknownProfileException;
use horstoeko\zugferd\exception\ZugferdUnknownProfileParameterException;
use horstoeko\zugferd\exception\ZugferdUnknownXmlContentException;
use JMS\Serializer\Exception\RuntimeException;
use Smalot\PdfParser\Parser as PdfParser;

/**
* Class representing the extended document reader for incoming PDF/A-Documents with
* attached XML data in BASIC-, EN16931- and EXTENDED profile. The Extended PDF reader
* reads also additinal attached documents from PDF
*
* @category Zugferd
* @package Zugferd
* @author D. Erling <horstoeko@erling.com.de>
* @license https://opensource.org/licenses/MIT MIT
* @link https://github.com/horstoeko/zugferd
*/
class ZugferdDocumentPdfReaderExt
{
/**
* List of filenames which are possible for an attached XML-Invoice-Document in PDF
*/
public const ATTACHMENT_FILENAMES = [
'ZUGFeRD-invoice.xml'/*1.0*/,
'zugferd-invoice.xml'/*2.0*/,
'factur-x.xml'/*2.1*/,
'xrechnung.xml'
];

/**
* Identifier for a XML-Invoice-Docuemnt
*/
private const ATTACHMENT_TYPE_XMLINVOICE = 0;

/**
* Identifier for an additional document
*/
private const ATTACHMENT_TYPE_ADDITIONAL = 1;

/**
* Key of the type element in the internal attachment list
*/
public const ATTACHMENT_KEY_TYPE = 'type';

/**
* Key of the content element in the internal attachment list
*/
public const ATTACHMENT_KEY_CONTENT = 'content';

/**
* Key of the filename element in the internal attachment list
*/
public const ATTACHMENT_KEY_FILENAME = 'filename';

/**
* Key of the filename element in the internal attachment list
*/
public const ATTACHMENT_KEY_MIMETYPE = 'mimetype';

/**
* Array containing all the attached files found in PDF
*
* @var array<int, array{type: int, content: string, filename: string, mimetype: string}>
*/
private $attachmentContentList = [];

/**
* (Hidden) Constructor
*/
final protected function __construct()
{
$this->attachmentContentList = [];
}

/**
* Load a PDF file
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @return ZugferdDocumentPdfReaderExt
* @throws ZugferdFileNotFoundException
* @throws ZugferdFileNotReadableException
* @throws Exception
*/
public static function fromFile(string $pdfFilename): ZugferdDocumentPdfReaderExt
{
if (!file_exists($pdfFilename)) {
throw new ZugferdFileNotFoundException($pdfFilename);
}

$pdfContent = file_get_contents($pdfFilename);

if ($pdfContent === false) {
throw new ZugferdFileNotReadableException($pdfFilename);
}

return static::fromContent($pdfContent);
}

/**
* Load a PDF content string
*
* @param string $pdfContent Contains the raw data of a PDF
* @return ZugferdDocumentPdfReaderExt
* @throws Exception
*/
public static function fromContent(string $pdfContent): ZugferdDocumentPdfReaderExt
{
return (new ZugferdDocumentPdfReaderExt())->collectAttachmentsFromPdfContent($pdfContent);
}

/**
* Load a PDF file and return a ZugferDocumentReader-Instance
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @throws Exception
* @throws RuntimeException
* @return ZugferdDocumentReader
* @throws ZugferdFileNotFoundException
* @throws ZugferdFileNotReadableException
* @throws ZugferdNoPdfAttachmentFoundException
* @throws ZugferdUnknownProfileException
* @throws ZugferdUnknownProfileParameterException
* @throws ZugferdUnknownXmlContentException
* @see \horstoeko\zugferd\ZugferdDocumentPdfReader::readAndGuessFromFile() For a similar purpose in another context.
*/
public static function readAndGuessFromFile(string $pdfFilename): ZugferdDocumentReader
{
return static::fromFile($pdfFilename)->resolveInvoiceDocumentReader();
}

/**
* Load a PDF content and return a ZugferDocumentReader-Instance
*
* @param string $pdfContent Contains the raw data of a PDF
* @throws Exception
* @throws RuntimeException
* @return ZugferdDocumentReader
* @throws ZugferdNoPdfAttachmentFoundException
* @throws ZugferdUnknownXmlContentException
* @throws ZugferdUnknownProfileException
* @throws ZugferdUnknownProfileParameterException
* @see \horstoeko\zugferd\ZugferdDocumentPdfReader::readAndGuessFromContent() For a similar purpose in another context.
*/
public static function readAndGuessFromContent(string $pdfContent): ZugferdDocumentReader
{
return static::fromContent($pdfContent)->resolveInvoiceDocumentReader();
}

/**
* Returns a invoice document XML content from a PDF file
* similar to ZugferdDocumentPdfReader::getXmlFromContent
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @return string
* @throws ZugferdFileNotFoundException
* @throws ZugferdFileNotReadableException
* @throws Exception
* @throws ZugferdNoPdfAttachmentFoundException
* @see \horstoeko\zugferd\ZugferdDocumentPdfReader::getXmlFromFile() For a similar purpose in another context.
*/
public static function getInvoiceDocumentContentFromFile(string $pdfFilename): string
{
return static::fromFile($pdfFilename)->resolveInvoiceDocumentContent();
}

/**
* Returns a invoice document XML content from a PDF content string
*
* @param string $pdfContent Contains the raw data of a PDF
* @return string
* @throws Exception
* @throws ZugferdNoPdfAttachmentFoundException
* @see \horstoeko\zugferd\ZugferdDocumentPdfReader::getXmlFromContent() For a similar purpose in another context.
*/
public static function getInvoiceDocumentContentFromContent(string $pdfContent): string
{
return static::fromContent($pdfContent)->resolveInvoiceDocumentContent();
}

/**
* Returns all additional documents (except the invoice document) from a PDF file
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @return array<int, array{type: int, content: string, filename: string, mimetype: string}>
* @throws ZugferdFileNotFoundException
* @throws ZugferdFileNotReadableException
* @throws Exception
*/
public static function getAdditionalDocumentContentsFromFile(string $pdfFilename): array
{
return static::fromFile($pdfFilename)->resolveAdditionalDocumentContents();
}

/**
* Returns all additional documents (except the invoice document) from a PDF content string
*
* @param string $pdfContent Contains the raw data of a PDF
* @return array<int, array{type: int, content: string, filename: string, mimetype: string}>
* @throws Exception
*/
public static function getAdditionalDocumentContentsFromContent(string $pdfContent): array
{
return static::fromContent($pdfContent)->resolveAdditionalDocumentContents();
}

/**
* Returns an instance of ZugferdDocumentReader by a valid invoice attachment
*
* @return ZugferdDocumentReader
* @throws ZugferdNoPdfAttachmentFoundException
* @throws ZugferdUnknownXmlContentException
* @throws ZugferdUnknownProfileException
* @throws ZugferdUnknownProfileParameterException
* @throws RuntimeException
*/
public function resolveInvoiceDocumentReader(): ZugferdDocumentReader
{
return ZugferdDocumentReader::readAndGuessFromContent($this->resolveInvoiceDocumentContent());
}

/**
* Returns the content as string if a valid invoice attachment was found, otherwise
* an exception will be raised
*
* @return string
* @throws ZugferdNoPdfAttachmentFoundException
*/
public function resolveInvoiceDocumentContent(): string
{
$invoiceContent =
array_values(
array_filter(
$this->attachmentContentList,
function ($attachmentContentItem) {
return $attachmentContentItem[ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE] === ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_XMLINVOICE;
}
)
);

if (empty($invoiceContent)) {
throw new ZugferdNoPdfAttachmentFoundException();
}

return $invoiceContent[0][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT];
}

/**
* Returns a list of all additional attached documents except the invoice document
*
* @return array<int, array{type: int, content: string, filename: string, mimetype: string}>
*/
public function resolveAdditionalDocumentContents(): array
{
return
array_values(
array_filter(
$this->attachmentContentList,
function ($attachmentContentItem) {
return $attachmentContentItem[ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE] === ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_ADDITIONAL;
}
)
);
}

/**
* Get a list of all the attachments.
*
* @param string $pdfContent Contains the raw data of a PDF
* @return ZugferdDocumentPdfReaderExt
* @throws Exception
*/
protected function collectAttachmentsFromPdfContent(string $pdfContent): ZugferdDocumentPdfReaderExt
{
$this->attachmentContentList = [];

$pdfParser = new PdfParser();
$pdfParsed = $pdfParser->parseContent($pdfContent);
$fileSpecs = $pdfParsed->getObjectsByType('Filespec');

$fileSpecs = array_filter(
$fileSpecs,
function ($fileSpec) {
return $fileSpec->has('F') && $fileSpec->has('EF');
}
);

$fileSpecs = array_filter(
$fileSpecs,
function ($fileSpec) {
return $fileSpec->get('EF')->has('F');
}
);

foreach ($fileSpecs as $fileSpec) {
$this->attachmentContentList[] = [
ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE => in_array($fileSpec->get('F')->getContent(), ZugferdDocumentPdfReaderExt::ATTACHMENT_FILENAMES) ? ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_XMLINVOICE : ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_ADDITIONAL,
ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT => $fileSpec->get('EF')->get('F')->getContent(),
ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME => $fileSpec->get('F')->getContent(),
ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE => $fileSpec->get('EF')->get('F')->has('Subtype') ? (string)($fileSpec->get('EF')->get('F')->get('Subtype')->getContent()) : "",
];
}

return $this;
}
}
Loading

0 comments on commit d3498b0

Please sign in to comment.