From 26511cb62a0d31e181d253a18d5543d970b85813 Mon Sep 17 00:00:00 2001 From: "Alexie (Boyong) Madolid" Date: Fri, 1 Sep 2023 13:16:00 +0800 Subject: [PATCH] [PDF_EXT]: Update pypdf version to resolve conflict --- jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/pdf_ext.py | 8 ++++---- jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/requirements.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/pdf_ext.py b/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/pdf_ext.py index bedf21d1db..c7cdc66226 100644 --- a/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/pdf_ext.py +++ b/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/pdf_ext.py @@ -4,7 +4,7 @@ import uuid from jaseci.jsorc.live_actions import jaseci_action -from PyPDF2 import PdfFileReader +from pypdf import PdfReader from fastapi import HTTPException @@ -49,14 +49,14 @@ def remove_pdf(filename: str): def process_pdf(filename, metadata, data): with open(filename, "rb") as pdf_file: - pdf_reader = PdfFileReader(pdf_file) + pdf_reader = PdfReader(pdf_file) if metadata: data.update({"metadata": {}}) - md = dict(pdf_reader.documentInfo) + md = dict(pdf_reader.metadata) for k, v in md.items(): data["metadata"][re.sub("[^a-zA-Z0-9]+", "", k)] = v data["pages"] = len(pdf_reader.pages) - data["content"] = [page.extractText() for page in pdf_reader.pages] + data["content"] = [page.extract_text() for page in pdf_reader.pages] return data diff --git a/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/requirements.txt b/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/requirements.txt index f6e4c7b3fc..7cafbbf83e 100644 --- a/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/requirements.txt +++ b/jaseci_ai_kit/jac_misc/jac_misc/pdf_ext/requirements.txt @@ -1 +1 @@ -PyPDF2>=1.27.12,<1.28 \ No newline at end of file +pypdf==3.15.4 \ No newline at end of file