Skip to content

Commit

Permalink
Fix output (#14)
Browse files Browse the repository at this point in the history
* fix the error that output was splitted by comma

* fix doc

* fix flake8 error

---------

Co-authored-by: parisa-zahedi <p.zahedi@uu.nl>
  • Loading branch information
parisa-zahedi and parisa-zahedi authored Apr 15, 2024
1 parent 122d367 commit a48aac7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
16 changes: 8 additions & 8 deletions interest/output_generator/text_formater.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ def __init__(self, output_unit: str, sentences_per_segment: int,
self.texts: List[str] = []

def format_output(self, texts: Union[None, List[str]]) -> (
Union)[str, List[str], None]:
Union)[str, List[str], List[List[str]], None]:
"""
Formats input texts based on the specified output unit.
Args:
texts (List[str]): List of input texts to be formatted.
Returns:
Union[str, List[List[str]]]: Formatted output text based on the
selected output_unit. For 'full_text', returns a single string.
For 'paragraph' and 'segmented_text', returns a list of segmented
text lists.
Union[str, List[str], List[List[str]]]: Formatted output text
based on the selected output_unit. For 'full_text', returns a
single string. For 'paragraph' and 'segmented_text', returns a
list of segmented text lists.
Raises:
ValueError: If input 'texts' is not a list of strings.
Expand Down Expand Up @@ -91,11 +91,11 @@ def _format_fulltext(self) -> str:
"""
return '\n'.join(self.texts)

def _format_segmented_text(self) -> List[str]:
def _format_segmented_text(self) -> List[List[str]]:
"""Formats texts as segmented text based on sentences_per_segment.
Returns:
List[str]: Flattened list of segmented text strings.
List[List[str]]: Flattened list of segmented text strings.
"""
segmented_texts = []
for text in self.texts:
Expand All @@ -104,7 +104,7 @@ def _format_segmented_text(self) -> List[str]:

for i in range(0, len(sentences), self.sentences_per_segment):
segment = sentences[i:i + self.sentences_per_segment]
segmented_texts.extend(segment)
segmented_texts.append(segment)

return segmented_texts

Expand Down
6 changes: 4 additions & 2 deletions scripts/step4_generate_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ def find_articles_in_file(filepath: str, formatter: TextFormatter) -> (
spacy_model=SPACY_MODEL)
for articles_filepath in args.input_dir.rglob(args.glob):
df = find_articles_in_file(articles_filepath, text_formatter)
if df is None:
continue
file_name = get_file_name_without_extension(articles_filepath)
df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'+file_name+'.csv'), index = False)

df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'
+ file_name+'.csv'), index=False)

0 comments on commit a48aac7

Please sign in to comment.