From a48aac7faa54a89b68a9fd9159fae0f56d479549 Mon Sep 17 00:00:00 2001
From: parisa-zahedi <parisa.zahedi@gmail.com>
Date: Mon, 15 Apr 2024 14:35:40 +0200
Subject: [PATCH] Fix output (#14)

* fix the error that output was splitted by comma

* fix doc

* fix flake8 error

---------

Co-authored-by: parisa-zahedi <p.zahedi@uu.nl>
---
 interest/output_generator/text_formater.py | 16 ++++++++--------
 scripts/step4_generate_output.py           |  6 ++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/interest/output_generator/text_formater.py b/interest/output_generator/text_formater.py
index 42ee92b..93bb85b 100644
--- a/interest/output_generator/text_formater.py
+++ b/interest/output_generator/text_formater.py
@@ -39,7 +39,7 @@ def __init__(self, output_unit: str, sentences_per_segment: int,
         self.texts: List[str] = []
 
     def format_output(self, texts: Union[None, List[str]]) -> (
-            Union)[str, List[str], None]:
+            Union)[str, List[str], List[List[str]], None]:
         """
         Formats input texts based on the specified output unit.
 
@@ -47,10 +47,10 @@ def format_output(self, texts: Union[None, List[str]]) -> (
             texts (List[str]): List of input texts to be formatted.
 
         Returns:
-            Union[str, List[List[str]]]: Formatted output text based on the
-            selected output_unit. For 'full_text', returns a single string.
-            For 'paragraph' and 'segmented_text', returns a list of segmented
-             text lists.
+            Union[str, List[str], List[List[str]]]: Formatted output text
+            based on the selected output_unit. For 'full_text', returns a
+            single string. For 'paragraph' and 'segmented_text', returns a
+            list of segmented text lists.
 
         Raises:
             ValueError: If input 'texts' is not a list of strings.
@@ -91,11 +91,11 @@ def _format_fulltext(self) -> str:
         """
         return '\n'.join(self.texts)
 
-    def _format_segmented_text(self) -> List[str]:
+    def _format_segmented_text(self) -> List[List[str]]:
         """Formats texts as segmented text based on sentences_per_segment.
 
         Returns:
-            List[str]: Flattened list of segmented text strings.
+             List[List[str]]: Flattened list of segmented text strings.
         """
         segmented_texts = []
         for text in self.texts:
@@ -104,7 +104,7 @@ def _format_segmented_text(self) -> List[str]:
 
             for i in range(0, len(sentences), self.sentences_per_segment):
                 segment = sentences[i:i + self.sentences_per_segment]
-                segmented_texts.extend(segment)
+                segmented_texts.append(segment)
 
         return segmented_texts
 
diff --git a/scripts/step4_generate_output.py b/scripts/step4_generate_output.py
index ad89f61..b9904c0 100644
--- a/scripts/step4_generate_output.py
+++ b/scripts/step4_generate_output.py
@@ -135,6 +135,8 @@ def find_articles_in_file(filepath: str, formatter: TextFormatter) -> (
                                    spacy_model=SPACY_MODEL)
     for articles_filepath in args.input_dir.rglob(args.glob):
         df = find_articles_in_file(articles_filepath, text_formatter)
+        if df is None:
+            continue
         file_name = get_file_name_without_extension(articles_filepath)
-        df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'+file_name+'.csv'), index = False)
-
+        df.to_csv(os.path.join(args.output_dir, 'articles_to_label_'
+                               + file_name+'.csv'), index=False)