From d3498b082aadb5fa9b7dd241bbdb9c0a98e653bb Mon Sep 17 00:00:00 2001 From: HorstOeko Date: Wed, 25 Dec 2024 13:55:35 +0100 Subject: [PATCH] [ENH] Introducing new ZugferdDocumentPdfReaderExt (Extended PDF Reader) --- build/phpunit.xml | 1 + make/genmethoddocs.php | 5 + src/ZugferdDocumentPdfReaderExt.php | 320 ++++++++++++++++++++ tests/testcases/PdfReaderExtGeneralTest.php | 179 +++++++++++ 4 files changed, 505 insertions(+) create mode 100644 src/ZugferdDocumentPdfReaderExt.php create mode 100644 tests/testcases/PdfReaderExtGeneralTest.php diff --git a/build/phpunit.xml b/build/phpunit.xml index cba02c8a..926eb8e7 100644 --- a/build/phpunit.xml +++ b/build/phpunit.xml @@ -37,6 +37,7 @@ ../tests/testcases/PdfReaderExtended2Test.php ../tests/testcases/PdfReaderXRechnungTest.php ../tests/testcases/PdfReaderMultipleAttachmentsTest.php + ../tests/testcases/PdfReaderExtGeneralTest.php ../tests/testcases/PdfBuilderEn16931Test.php diff --git a/make/genmethoddocs.php b/make/genmethoddocs.php index 99a34f8d..cbe338d0 100644 --- a/make/genmethoddocs.php +++ b/make/genmethoddocs.php @@ -12,6 +12,7 @@ use horstoeko\zugferd\ZugferdDocumentPdfBuilder; use horstoeko\zugferd\ZugferdDocumentPdfMerger; use horstoeko\zugferd\ZugferdDocumentPdfReader; +use horstoeko\zugferd\ZugferdDocumentPdfReaderExt; use horstoeko\zugferd\ZugferdDocumentReader; use horstoeko\zugferd\ZugferdDocumentValidator; use horstoeko\zugferd\ZugferdKositValidator; @@ -672,6 +673,9 @@ private function fixPhpType(string $string): string if (stripos($string, '[]') !== false) { $string = 'array'; } + if (stripos($string, 'array<') === 0) { + $string = 'array'; + } if ($string == '$this') { $string = 'static'; } @@ -719,6 +723,7 @@ public static function generate(array $classes, array $ignoreInheritance = []) ZugferdDocumentBuilder::class => dirname(__FILE__) . '/Class-ZugferdDocumentBuilder.md', ZugferdDocumentReader::class => dirname(__FILE__) . '/Class-ZugferdDocumentReader.md', ZugferdDocumentPdfReader::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfReader.md', + ZugferdDocumentPdfReaderExt::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfReaderExt.md', ZugferdDocumentPdfBuilder::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfBuilder.md', ZugferdDocumentPdfMerger::class => dirname(__FILE__) . '/Class-ZugferdDocumentPdfMerger.md', ZugferdDocumentValidator::class => dirname(__FILE__) . '/Class-ZugferdDocumentValidator.md', diff --git a/src/ZugferdDocumentPdfReaderExt.php b/src/ZugferdDocumentPdfReaderExt.php new file mode 100644 index 00000000..5efbefa7 --- /dev/null +++ b/src/ZugferdDocumentPdfReaderExt.php @@ -0,0 +1,320 @@ + + * @license https://opensource.org/licenses/MIT MIT + * @link https://github.com/horstoeko/zugferd + */ +class ZugferdDocumentPdfReaderExt +{ + /** + * List of filenames which are possible for an attached XML-Invoice-Document in PDF + */ + public const ATTACHMENT_FILENAMES = [ + 'ZUGFeRD-invoice.xml'/*1.0*/, + 'zugferd-invoice.xml'/*2.0*/, + 'factur-x.xml'/*2.1*/, + 'xrechnung.xml' + ]; + + /** + * Identifier for a XML-Invoice-Docuemnt + */ + private const ATTACHMENT_TYPE_XMLINVOICE = 0; + + /** + * Identifier for an additional document + */ + private const ATTACHMENT_TYPE_ADDITIONAL = 1; + + /** + * Key of the type element in the internal attachment list + */ + public const ATTACHMENT_KEY_TYPE = 'type'; + + /** + * Key of the content element in the internal attachment list + */ + public const ATTACHMENT_KEY_CONTENT = 'content'; + + /** + * Key of the filename element in the internal attachment list + */ + public const ATTACHMENT_KEY_FILENAME = 'filename'; + + /** + * Key of the filename element in the internal attachment list + */ + public const ATTACHMENT_KEY_MIMETYPE = 'mimetype'; + + /** + * Array containing all the attached files found in PDF + * + * @var array + */ + private $attachmentContentList = []; + + /** + * (Hidden) Constructor + */ + final protected function __construct() + { + $this->attachmentContentList = []; + } + + /** + * Load a PDF file + * + * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @return ZugferdDocumentPdfReaderExt + * @throws ZugferdFileNotFoundException + * @throws ZugferdFileNotReadableException + * @throws Exception + */ + public static function fromFile(string $pdfFilename): ZugferdDocumentPdfReaderExt + { + if (!file_exists($pdfFilename)) { + throw new ZugferdFileNotFoundException($pdfFilename); + } + + $pdfContent = file_get_contents($pdfFilename); + + if ($pdfContent === false) { + throw new ZugferdFileNotReadableException($pdfFilename); + } + + return static::fromContent($pdfContent); + } + + /** + * Load a PDF content string + * + * @param string $pdfContent Contains the raw data of a PDF + * @return ZugferdDocumentPdfReaderExt + * @throws Exception + */ + public static function fromContent(string $pdfContent): ZugferdDocumentPdfReaderExt + { + return (new ZugferdDocumentPdfReaderExt())->collectAttachmentsFromPdfContent($pdfContent); + } + + /** + * Load a PDF file and return a ZugferDocumentReader-Instance + * + * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @throws Exception + * @throws RuntimeException + * @return ZugferdDocumentReader + * @throws ZugferdFileNotFoundException + * @throws ZugferdFileNotReadableException + * @throws ZugferdNoPdfAttachmentFoundException + * @throws ZugferdUnknownProfileException + * @throws ZugferdUnknownProfileParameterException + * @throws ZugferdUnknownXmlContentException + * @see \horstoeko\zugferd\ZugferdDocumentPdfReader::readAndGuessFromFile() For a similar purpose in another context. + */ + public static function readAndGuessFromFile(string $pdfFilename): ZugferdDocumentReader + { + return static::fromFile($pdfFilename)->resolveInvoiceDocumentReader(); + } + + /** + * Load a PDF content and return a ZugferDocumentReader-Instance + * + * @param string $pdfContent Contains the raw data of a PDF + * @throws Exception + * @throws RuntimeException + * @return ZugferdDocumentReader + * @throws ZugferdNoPdfAttachmentFoundException + * @throws ZugferdUnknownXmlContentException + * @throws ZugferdUnknownProfileException + * @throws ZugferdUnknownProfileParameterException + * @see \horstoeko\zugferd\ZugferdDocumentPdfReader::readAndGuessFromContent() For a similar purpose in another context. + */ + public static function readAndGuessFromContent(string $pdfContent): ZugferdDocumentReader + { + return static::fromContent($pdfContent)->resolveInvoiceDocumentReader(); + } + + /** + * Returns a invoice document XML content from a PDF file + * similar to ZugferdDocumentPdfReader::getXmlFromContent + * + * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @return string + * @throws ZugferdFileNotFoundException + * @throws ZugferdFileNotReadableException + * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException + * @see \horstoeko\zugferd\ZugferdDocumentPdfReader::getXmlFromFile() For a similar purpose in another context. + */ + public static function getInvoiceDocumentContentFromFile(string $pdfFilename): string + { + return static::fromFile($pdfFilename)->resolveInvoiceDocumentContent(); + } + + /** + * Returns a invoice document XML content from a PDF content string + * + * @param string $pdfContent Contains the raw data of a PDF + * @return string + * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException + * @see \horstoeko\zugferd\ZugferdDocumentPdfReader::getXmlFromContent() For a similar purpose in another context. + */ + public static function getInvoiceDocumentContentFromContent(string $pdfContent): string + { + return static::fromContent($pdfContent)->resolveInvoiceDocumentContent(); + } + + /** + * Returns all additional documents (except the invoice document) from a PDF file + * + * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @return array + * @throws ZugferdFileNotFoundException + * @throws ZugferdFileNotReadableException + * @throws Exception + */ + public static function getAdditionalDocumentContentsFromFile(string $pdfFilename): array + { + return static::fromFile($pdfFilename)->resolveAdditionalDocumentContents(); + } + + /** + * Returns all additional documents (except the invoice document) from a PDF content string + * + * @param string $pdfContent Contains the raw data of a PDF + * @return array + * @throws Exception + */ + public static function getAdditionalDocumentContentsFromContent(string $pdfContent): array + { + return static::fromContent($pdfContent)->resolveAdditionalDocumentContents(); + } + + /** + * Returns an instance of ZugferdDocumentReader by a valid invoice attachment + * + * @return ZugferdDocumentReader + * @throws ZugferdNoPdfAttachmentFoundException + * @throws ZugferdUnknownXmlContentException + * @throws ZugferdUnknownProfileException + * @throws ZugferdUnknownProfileParameterException + * @throws RuntimeException + */ + public function resolveInvoiceDocumentReader(): ZugferdDocumentReader + { + return ZugferdDocumentReader::readAndGuessFromContent($this->resolveInvoiceDocumentContent()); + } + + /** + * Returns the content as string if a valid invoice attachment was found, otherwise + * an exception will be raised + * + * @return string + * @throws ZugferdNoPdfAttachmentFoundException + */ + public function resolveInvoiceDocumentContent(): string + { + $invoiceContent = + array_values( + array_filter( + $this->attachmentContentList, + function ($attachmentContentItem) { + return $attachmentContentItem[ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE] === ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_XMLINVOICE; + } + ) + ); + + if (empty($invoiceContent)) { + throw new ZugferdNoPdfAttachmentFoundException(); + } + + return $invoiceContent[0][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT]; + } + + /** + * Returns a list of all additional attached documents except the invoice document + * + * @return array + */ + public function resolveAdditionalDocumentContents(): array + { + return + array_values( + array_filter( + $this->attachmentContentList, + function ($attachmentContentItem) { + return $attachmentContentItem[ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE] === ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_ADDITIONAL; + } + ) + ); + } + + /** + * Get a list of all the attachments. + * + * @param string $pdfContent Contains the raw data of a PDF + * @return ZugferdDocumentPdfReaderExt + * @throws Exception + */ + protected function collectAttachmentsFromPdfContent(string $pdfContent): ZugferdDocumentPdfReaderExt + { + $this->attachmentContentList = []; + + $pdfParser = new PdfParser(); + $pdfParsed = $pdfParser->parseContent($pdfContent); + $fileSpecs = $pdfParsed->getObjectsByType('Filespec'); + + $fileSpecs = array_filter( + $fileSpecs, + function ($fileSpec) { + return $fileSpec->has('F') && $fileSpec->has('EF'); + } + ); + + $fileSpecs = array_filter( + $fileSpecs, + function ($fileSpec) { + return $fileSpec->get('EF')->has('F'); + } + ); + + foreach ($fileSpecs as $fileSpec) { + $this->attachmentContentList[] = [ + ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE => in_array($fileSpec->get('F')->getContent(), ZugferdDocumentPdfReaderExt::ATTACHMENT_FILENAMES) ? ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_XMLINVOICE : ZugferdDocumentPdfReaderExt::ATTACHMENT_TYPE_ADDITIONAL, + ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT => $fileSpec->get('EF')->get('F')->getContent(), + ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME => $fileSpec->get('F')->getContent(), + ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE => $fileSpec->get('EF')->get('F')->has('Subtype') ? (string)($fileSpec->get('EF')->get('F')->get('Subtype')->getContent()) : "", + ]; + } + + return $this; + } +} diff --git a/tests/testcases/PdfReaderExtGeneralTest.php b/tests/testcases/PdfReaderExtGeneralTest.php new file mode 100644 index 00000000..2d1f1b43 --- /dev/null +++ b/tests/testcases/PdfReaderExtGeneralTest.php @@ -0,0 +1,179 @@ +expectException(ZugferdFileNotFoundException::class); + + ZugferdDocumentPdfReaderExt::readAndGuessFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + } + + public function testReadFromFileWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + ZugferdDocumentPdfReaderExt::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + } + + public function testReadFromFileWhichExistsAndHasValidAttachment(): void + { + $document = ZugferdDocumentPdfReaderExt::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $this->checkDocumentReader($document); + } + + public function testReadFromContentWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + + ZugferdDocumentPdfReaderExt::readAndGuessFromContent($pdfContent); + } + + public function testReadFromContentWhichHasValidAttachment(): void + { + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $document = ZugferdDocumentPdfReaderExt::readAndGuessFromContent($pdfContent); + + $this->checkDocumentReader($document); + } + + public function testGetXmlFromFileWhichDoesNotExist(): void + { + $this->expectException(ZugferdFileNotFoundException::class); + + ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + } + + public function testGetXmlFromFileWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + } + + public function testGetXmlFromFileWhichExistsAndHasValidAttachment(): void + { + $xmlString = ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $this->checkInvoiceDocumentXml($xmlString); + } + + public function testGetXmlFromContentWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + + ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromContent($pdfContent); + } + + public function testGetXmlFromContentWhichHasValidAttachment(): void + { + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $xmlString = ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromContent($pdfContent); + + $this->checkInvoiceDocumentXml($xmlString); + } + + public function testAdditionalAttachments(): void + { + $filename = dirname(__FILE__) . "/../assets/pdf_zf_en16931_2.pdf"; + + $xmlString = ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile($filename); + + $this->checkInvoiceDocumentXml($xmlString); + + $additionalDocuments = ZugferdDocumentPdfReaderExt::getAdditionalDocumentContentsFromFile($filename); + + $this->checkAdditionalAttachments($additionalDocuments); + + $pdfContent = file_get_contents($filename); + $additionalDocuments = ZugferdDocumentPdfReaderExt::getAdditionalDocumentContentsFromContent($pdfContent); + + $this->checkAdditionalAttachments($additionalDocuments); + } + + public function testInvoiceDocumentAndAttachmentsNoStatic(): void + { + $pdfReaderExt = ZugferdDocumentPdfReaderExt::fromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_2.pdf"); + + $documentReader = $pdfReaderExt->resolveInvoiceDocumentReader(); + + $this->checkDocumentReader($documentReader); + + $xmlString = $pdfReaderExt->resolveInvoiceDocumentContent(); + + $this->checkInvoiceDocumentXml($xmlString); + + $additionalDocuments = $pdfReaderExt->resolveAdditionalDocumentContents(); + + $this->checkAdditionalAttachments($additionalDocuments); + } + + private function checkDocumentReader($documentReader): void + { + $this->assertNotNull($documentReader); + $this->assertInstanceOf(ZugferdDocument::class, $documentReader); + $this->assertInstanceOf(ZugferdDocumentReader::class, $documentReader); + } + + private function checkInvoiceDocumentXml($xmlString): void + { + $this->assertNotNull($xmlString); + $this->assertIsString($xmlString); + $this->assertStringContainsString("assertStringContainsString("assertStringContainsString("", $xmlString); + } + + private function checkAdditionalAttachments($additionalDocuments): void + { + $this->assertNotEmpty($additionalDocuments); + $this->assertCount(2, $additionalDocuments); + $this->assertArrayHasKey(0, $additionalDocuments); + $this->assertArrayHasKey(1, $additionalDocuments); + $this->assertArrayNotHasKey(2, $additionalDocuments); + $this->assertArrayNotHasKey(3, $additionalDocuments); + + $this->assertIsArray($additionalDocuments[0]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE, $additionalDocuments[0]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT, $additionalDocuments[0]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME, $additionalDocuments[0]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE, $additionalDocuments[0]); + $this->assertEquals(1, $additionalDocuments[0][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE]); + $this->assertEquals('Aufmass.png', $additionalDocuments[0][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME]); + $this->assertEquals('image/png', $additionalDocuments[0][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE]); + + $this->assertIsArray($additionalDocuments[1]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE, $additionalDocuments[1]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_CONTENT, $additionalDocuments[1]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME, $additionalDocuments[1]); + $this->assertArrayHasKey(ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE, $additionalDocuments[1]); + $this->assertEquals(1, $additionalDocuments[1][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_TYPE]); + $this->assertEquals('ElektronRapport.pdf', $additionalDocuments[1][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_FILENAME]); + $this->assertEquals('application/pdf', $additionalDocuments[1][ZugferdDocumentPdfReaderExt::ATTACHMENT_KEY_MIMETYPE]); + } +}