Skip to content

Commit

Permalink
read tool and list tool fix
Browse files Browse the repository at this point in the history
  • Loading branch information
rounak610 authored and dungnmaster committed Sep 28, 2023
1 parent 68d32de commit 681982d
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 13 deletions.
20 changes: 11 additions & 9 deletions superagi/helper/s3_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,14 @@ def get_download_url_of_resources(self, db_resources_arr):
return response_obj

def list_files_from_s3(self, file_path):
file_path = "resources" + file_path
logger.info(f"Listing files from s3 with prefix: {file_path}")
response = self.s3.list_objects_v2(Bucket=get_config("BUCKET_NAME"), Prefix=file_path)

if 'Contents' in response:
file_list = [obj['Key'] for obj in response['Contents']]
return file_list

raise Exception(f"Error listing files from s3")
try:
file_path = "resources" + file_path
logger.info(f"Listing files from s3 with prefix: {file_path}")
response = self.s3.list_objects_v2(Bucket=get_config("BUCKET_NAME"), Prefix=file_path)
if 'Contents' in response:
file_list = [obj['Key'] for obj in response['Contents']]
return file_list
else:
raise Exception(f"No contents in S3 response")
except:
raise Exception(f"Error listing files from s3")
27 changes: 27 additions & 0 deletions superagi/helper/validate_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import csv
import pandas as pd
import chardet
from superagi.lib.logger import logger

def correct_csv_encoding(file_path):
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
encoding = result['encoding']

if encoding != 'utf-8':
data = []
with open(file_path, 'r', encoding=encoding) as f:
reader = csv.reader(f, delimiter=';', quotechar='"')
for row in reader:
try:
data.append(row)
except Exception as e:
logger.error(f"An error occurred while processing the file: {e}")
continue

df = pd.DataFrame(data)

df.to_csv(file_path, encoding='utf-8', index=False)
logger.info("File is converted to utf-8 encoding.")
else:
logger.info("File is already in utf-8 encoding.")
9 changes: 5 additions & 4 deletions superagi/tools/file/read_file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import os
from typing import Type, Optional
import ebooklib
Expand All @@ -7,6 +6,7 @@

from pydantic import BaseModel, Field
from ebooklib import epub
from superagi.helper.validate_csv import correct_csv_encoding

from superagi.helper.resource_helper import ResourceHelper
from superagi.helper.s3_helper import S3Helper
Expand All @@ -17,6 +17,7 @@
from superagi.types.storage_types import StorageType
from superagi.config.config import get_config
from unstructured.partition.auto import partition
from superagi.lib.logger import logger

class ReadFileSchema(BaseModel):
"""Input for CopyFileTool."""
Expand Down Expand Up @@ -89,12 +90,12 @@ def _execute(self, file_name: str):

content = "\n".join(content)
else:
if final_path.endswith('.csv'):
correct_csv_encoding(final_path)
elements = partition(final_path)
content = "\n\n".join([str(el) for el in elements])

if temporary_file_path is not None:
os.remove(temporary_file_path)

return content


return content

0 comments on commit 681982d

Please sign in to comment.