diff --git a/src/ZugferdDocumentPdfReader.php b/src/ZugferdDocumentPdfReader.php index 4e3de1fb..e93942df 100644 --- a/src/ZugferdDocumentPdfReader.php +++ b/src/ZugferdDocumentPdfReader.php @@ -10,9 +10,14 @@ namespace horstoeko\zugferd; use Exception; -use Smalot\PdfParser\Parser as PdfParser; use horstoeko\zugferd\exception\ZugferdFileNotFoundException; use horstoeko\zugferd\exception\ZugferdFileNotReadableException; +use horstoeko\zugferd\exception\ZugferdNoPdfAttachmentFoundException; +use horstoeko\zugferd\exception\ZugferdUnknownProfileException; +use horstoeko\zugferd\exception\ZugferdUnknownProfileParameterException; +use horstoeko\zugferd\exception\ZugferdUnknownXmlContentException; +use JMS\Serializer\Exception\RuntimeException; +use Smalot\PdfParser\Parser as PdfParser; /** * Class representing the document reader for incoming PDF/A-Documents with @@ -40,12 +45,17 @@ class ZugferdDocumentPdfReader * Load a PDF file (ZUGFeRD/Factur-X) * * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable - * @return ZugferdDocumentReader|null + * @throws Exception + * @return ZugferdDocumentReader * @throws ZugferdFileNotFoundException * @throws ZugferdFileNotReadableException - * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException + * @throws ZugferdUnknownXmlContentException + * @throws ZugferdUnknownProfileException + * @throws ZugferdUnknownProfileParameterException + * @throws RuntimeException */ - public static function readAndGuessFromFile(string $pdfFilename): ?ZugferdDocumentReader + public static function readAndGuessFromFile(string $pdfFilename): ZugferdDocumentReader { if (!file_exists($pdfFilename)) { throw new ZugferdFileNotFoundException($pdfFilename); @@ -62,38 +72,34 @@ public static function readAndGuessFromFile(string $pdfFilename): ?ZugferdDocume /** * Tries to load an attachment content from PDF and return a ZugferdDocumentReader - * If any erros occured or no attachments were found null is returned * - * @param string $pdfContent String Containing the binary pdf data - * @return ZugferdDocumentReader|null + * @param string $pdfContent String containing the binary pdf data + * @return ZugferdDocumentReader * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException + * @throws ZugferdUnknownXmlContentException + * @throws ZugferdUnknownProfileException + * @throws ZugferdUnknownProfileParameterException + * @throws RuntimeException */ - public static function readAndGuessFromContent(string $pdfContent): ?ZugferdDocumentReader + public static function readAndGuessFromContent(string $pdfContent): ZugferdDocumentReader { $xmlContent = static::internalExtractXMLFromPdfContent($pdfContent); - if (is_null($xmlContent)) { - return null; - } - - try { - return ZugferdDocumentReader::readAndGuessFromContent($xmlContent); - } catch (\Exception $e) { - return null; - } + return ZugferdDocumentReader::readAndGuessFromContent($xmlContent); } /** * Returns a XML content from a PDF file * - * @param string $pdfFilename - * Contains a full-qualified filename which must exist and must be readable - * @return string|null + * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @return string + * @throws Exception * @throws ZugferdFileNotFoundException * @throws ZugferdFileNotReadableException - * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException */ - public static function getXmlFromFile(string $pdfFilename): ?string + public static function getXmlFromFile(string $pdfFilename): string { if (!file_exists($pdfFilename)) { throw new ZugferdFileNotFoundException($pdfFilename); @@ -112,10 +118,12 @@ public static function getXmlFromFile(string $pdfFilename): ?string * Returns a XML content from a PDF binary stream (string) * * @param string $pdfContent String Containing the binary pdf data - * @return string|null + * @param string $pdfContent + * @return string * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException */ - public static function getXmlFromContent(string $pdfContent): ?string + public static function getXmlFromContent(string $pdfContent): string { return static::internalExtractXMLFromPdfContent($pdfContent); } @@ -125,10 +133,11 @@ public static function getXmlFromContent(string $pdfContent): ?string * See the allowed filenames which are supported * * @param string $pdfContent - * @return null|string + * @return string * @throws Exception + * @throws ZugferdNoPdfAttachmentFoundException */ - protected static function internalExtractXMLFromPdfContent(string $pdfContent): ?string + protected static function internalExtractXMLFromPdfContent(string $pdfContent): string { $pdfParser = new PdfParser(); $pdfParsed = $pdfParser->parseContent($pdfContent); @@ -137,35 +146,29 @@ protected static function internalExtractXMLFromPdfContent(string $pdfContent): $attachmentFound = false; $attachmentIndex = 0; $embeddedFileIndex = 0; - $returnValue = null; - - try { - foreach ($filespecs as $filespec) { - $filespecDetails = $filespec->getDetails(); - if (in_array($filespecDetails['F'], static::ATTACHMENT_FILENAMES)) { - $attachmentFound = true; - break; - } - $attachmentIndex++; + + foreach ($filespecs as $filespec) { + $filespecDetails = $filespec->getDetails(); + if (in_array($filespecDetails['F'], static::ATTACHMENT_FILENAMES)) { + $attachmentFound = true; + break; } + $attachmentIndex++; + } - if (true == $attachmentFound) { - /** - * @var array<\Smalot\PdfParser\PDFObject> - */ - $embeddedFiles = $pdfParsed->getObjectsByType('EmbeddedFile'); - foreach ($embeddedFiles as $embeddedFile) { - if ($attachmentIndex == $embeddedFileIndex) { - $returnValue = $embeddedFile->getContent(); - break; - } - $embeddedFileIndex++; + if (true == $attachmentFound) { + /** + * @var array<\Smalot\PdfParser\PDFObject> + */ + $embeddedFiles = $pdfParsed->getObjectsByType('EmbeddedFile'); + foreach ($embeddedFiles as $embeddedFile) { + if ($attachmentIndex == $embeddedFileIndex) { + return $embeddedFile->getContent(); } + $embeddedFileIndex++; } - } catch (\Exception $e) { - $returnValue = null; } - return $returnValue; + throw new ZugferdNoPdfAttachmentFoundException(); } } diff --git a/src/exception/ZugferdExceptionCodes.php b/src/exception/ZugferdExceptionCodes.php index c9909f83..9624cbf1 100644 --- a/src/exception/ZugferdExceptionCodes.php +++ b/src/exception/ZugferdExceptionCodes.php @@ -29,6 +29,7 @@ class ZugferdExceptionCodes public const UNKNOWNSYNTAX = -1107; public const UNKNOWNMIMETYPE = -1108; public const UNSUPPORTEDMIMETYPE = -1109; + public const NOPDFATTACHMENTFOUND = -1110; public const FILENOTFOUND = -2000; public const FILENOTREADABLE = -2001; } diff --git a/src/exception/ZugferdNoPdfAttachmentFoundException.php b/src/exception/ZugferdNoPdfAttachmentFoundException.php new file mode 100644 index 00000000..13ae18b3 --- /dev/null +++ b/src/exception/ZugferdNoPdfAttachmentFoundException.php @@ -0,0 +1,35 @@ + + * @license https://opensource.org/licenses/MIT MIT + * @link https://github.com/horstoeko/zugferd + */ +class ZugferdNoPdfAttachmentFoundException extends ZugferdBaseException +{ + /** + * Constructor + * + * @param Throwable|null $previous + */ + public function __construct(?Throwable $previous = null) + { + parent::__construct("No PDF attachment found", ZugferdExceptionCodes::NOPDFATTACHMENTFOUND, $previous); + } +} diff --git a/tests/testcases/PdfReaderGeneralTest.php b/tests/testcases/PdfReaderGeneralTest.php index 92560cb2..51bf3647 100644 --- a/tests/testcases/PdfReaderGeneralTest.php +++ b/tests/testcases/PdfReaderGeneralTest.php @@ -2,47 +2,112 @@ namespace horstoeko\zugferd\tests\testcases; +use horstoeko\zugferd\exception\ZugferdExceptionCodes; use horstoeko\zugferd\exception\ZugferdFileNotFoundException; +use horstoeko\zugferd\exception\ZugferdNoPdfAttachmentFoundException; use horstoeko\zugferd\tests\TestCase; use horstoeko\zugferd\ZugferdDocument; use horstoeko\zugferd\ZugferdDocumentPdfReader; class PdfReaderGeneralTest extends TestCase { - public function testCanReadPdf(): void + /* ZugferdPdfReader::readAndGuessFromFile */ + + public function testReadFromFileWhichDoesNotExist(): void { - $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); - $this->assertNull($document); + $this->expectException(ZugferdFileNotFoundException::class); + + ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); } - public function testFileNotFound(): void + public function testReadFromFileWhichHasNoValidAttachment(): void { - $this->expectException(ZugferdFileNotFoundException::class); - $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); } - public function testCanReadPdf2(): void + public function testReadFromFileWhichExistsAndHasValidAttachment(): void { - $document = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); - $this->assertNull($document); + $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $this->assertNotNull($document); + $this->assertInstanceOf(ZugferdDocument::class, $document); } - public function testFileNotFound2(): void + /* ZugferdPdfReader::readAndGuessFromContent */ + + public function testReadFromContentWhichHasNoValidAttachment(): void { - $this->expectException(ZugferdFileNotFoundException::class); - $document = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + + ZugferdDocumentPdfReader::readAndGuessFromContent($pdfContent); } - public function testCanReadPdf3(): void + public function testReadFromContentWhichHasValidAttachment(): void { - $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $document = ZugferdDocumentPdfReader::readAndGuessFromContent($pdfContent); + $this->assertNotNull($document); $this->assertInstanceOf(ZugferdDocument::class, $document); } - public function testCanReadPdf4(): void + /* ZugferdPdfReader::getXmlFromFile */ + + public function testGetXmlFromFileWhichDoesNotExist(): void + { + $this->expectException(ZugferdFileNotFoundException::class); + + ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + } + + public function testGetXmlFromFileWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + } + + public function testGetXmlFromFileWhichExistsAndHasValidAttachment(): void { $xmlString = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $this->assertNotNull($xmlString); + $this->assertIsString($xmlString); + $this->assertStringContainsString("assertStringContainsString("assertStringContainsString("", $xmlString); + } + + /* ZugferdPdfReader::getXmlFromContent */ + + public function testGetXmlFromContentWhichHasNoValidAttachment(): void + { + $this->expectException(ZugferdNoPdfAttachmentFoundException::class); + $this->expectExceptionMessage('No PDF attachment found'); + $this->expectExceptionCode(ZugferdExceptionCodes::NOPDFATTACHMENTFOUND); + + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_invalid.pdf"); + + ZugferdDocumentPdfReader::getXmlFromContent($pdfContent); + } + + public function testGetXmlFromContentWhichHasValidAttachment(): void + { + $pdfContent = file_get_contents(dirname(__FILE__) . "/../assets/pdf_zf_en16931_1.pdf"); + + $xmlString = ZugferdDocumentPdfReader::getXmlFromContent($pdfContent); + $this->assertNotNull($xmlString); $this->assertIsString($xmlString); $this->assertStringContainsString("