Merge branch 'kubeedge:main' into main

kubeedge · Jan 7, 2025 · ec2f32f · ec2f32f
2 parents f1b152a + aefdbeb
commit ec2f32f
Show file tree

Hide file tree

Showing 236 changed files with 16,472 additions and 511 deletions.
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -31,5 +31,11 @@ jobs:
           python -m pip install ${{github.workspace}}/examples/resources/third_party/*
           python -m pip install -r ${{github.workspace}}/requirements.txt
       - name: Analysing code of core with pylint
+        # `--max-positional-arguments=10` is set for Python 3.9 to avoid `R0917: too-many-positional-arguments`.
+        # See details at https://github.com/kubeedge/ianvs/issues/157
         run: |
-          pylint '${{github.workspace}}/core'
+          if [ "${{ matrix.python-version }}" = "3.9" ]; then
+            pylint --max-positional-arguments=10 '${{github.workspace}}/core'
+          else
+            pylint '${{github.workspace}}/core'
+          fi
diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,8 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+dataset/
+initial_model/
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/core/common/constant.py b/core/common/constant.py
@@ -22,27 +22,42 @@ class DatasetFormat(Enum):
     File format of inputting dataset.
     Currently, file formats are as follows: txt, csv.
     """
+
     CSV = "csv"
     TXT = "txt"
     JSON = "json"
+    JSONL = "jsonl"
+    JSONFORLLM = "jsonforllm"
 
 
 class ParadigmType(Enum):
     """
     Algorithm paradigm type.
     """
+
     SINGLE_TASK_LEARNING = "singletasklearning"
     INCREMENTAL_LEARNING = "incrementallearning"
     MULTIEDGE_INFERENCE = "multiedgeinference"
     LIFELONG_LEARNING = "lifelonglearning"
+    FEDERATED_LEARNING = "federatedlearning"
+    FEDERATED_CLASS_INCREMENTAL_LEARNING = "federatedclassincrementallearning"
+    JOINT_INFERENCE = "jointinference"
 
 
 class ModuleType(Enum):
     """
     Algorithm module type.
     """
+
     BASEMODEL = "basemodel"
 
+    # JOINT INFERENCE
+    EDGEMODEL = "edgemodel"
+    CLOUDMODEL = "cloudmodel"
+
+    # Dataset Preprocessor
+    DATA_PROCESSOR = "dataset_processor"
+
     # HEM
     HARD_EXAMPLE_MINING = "hard_example_mining"
 
@@ -63,20 +78,26 @@ class ModuleType(Enum):
     UNSEEN_SAMPLE_RECOGNITION = "unseen_sample_recognition"
     UNSEEN_SAMPLE_RE_RECOGNITION = "unseen_sample_re_recognition"
 
+    # FL_AGG
+    AGGREGATION = "aggregation"
+
 
 class SystemMetricType(Enum):
     """
     System metric type of ianvs.
     """
+
     SAMPLES_TRANSFER_RATIO = "samples_transfer_ratio"
     FWT = "FWT"
     BWT = "BWT"
     TASK_AVG_ACC = "task_avg_acc"
     MATRIX = "MATRIX"
+    FORGET_RATE = "forget_rate"
 
 
 class TestObjectType(Enum):
     """
     Test object type of ianvs.
     """
+
     ALGORITHMS = "algorithms"
diff --git a/core/common/utils.py b/core/common/utils.py
@@ -36,8 +36,12 @@ def is_local_dir(url):
 
 def get_file_format(url):
     """Get file format of the url."""
-    return os.path.splitext(url)[-1][1:]
+    # Check if the url
+    if os.path.basename(url) == "metadata.json":
+        return "jsonforllm"
 
+    # Check if the url
+    return os.path.splitext(url)[-1][1:]
 
 def parse_kwargs(func, **kwargs):
     """Get valid parameters of the func in kwargs."""

diff --git a/core/storymanager/rank/rank.py b/core/storymanager/rank/rank.py
@@ -35,15 +35,12 @@ class Rank:
 
     def __init__(self, config):
         self.sort_by: list = []
-        self.visualization: dict = {
-            "mode": "selected_only",
-            "method": "print_table"
-        }
+        self.visualization: dict = {"mode": "selected_only", "method": "print_table"}
         self.selected_dataitem: dict = {
             "paradigms": ["all"],
             "modules": ["all"],
             "hyperparameters": ["all"],
-            "metrics": ["all"]
+            "metrics": ["all"],
         }
         self.save_mode: str = "selected_and_all"
 
@@ -62,15 +59,21 @@ def _parse_config(self, config):
 
     def _check_fields(self):
         if not self.sort_by and not isinstance(self.sort_by, list):
-            raise ValueError(f"rank's sort_by({self.sort_by}) must be provided and be list type.")
+            raise ValueError(
+                f"rank's sort_by({self.sort_by}) must be provided and be list type."
+            )
 
         if not self.visualization and not isinstance(self.visualization, dict):
-            raise ValueError(f"rank's visualization({self.visualization}) "
-                             f"must be provided and be dict type.")
+            raise ValueError(
+                f"rank's visualization({self.visualization}) "
+                f"must be provided and be dict type."
+            )
 
         if not self.selected_dataitem and not isinstance(self.selected_dataitem, dict):
-            raise ValueError(f"rank's selected_dataitem({self.selected_dataitem}) "
-                             f"must be provided and be dict type.")
+            raise ValueError(
+                f"rank's selected_dataitem({self.selected_dataitem}) "
+                f"must be provided and be dict type."
+            )
 
         if not self.selected_dataitem.get("paradigms"):
             raise ValueError("not found paradigms of selected_dataitem in rank.")
@@ -82,8 +85,10 @@ def _check_fields(self):
             raise ValueError("not found metrics of selected_dataitem in rank.")
 
         if not self.save_mode and not isinstance(self.save_mode, list):
-            raise ValueError(f"rank's save_mode({self.save_mode}) "
-                             f"must be provided and be list type.")
+            raise ValueError(
+                f"rank's save_mode({self.save_mode}) "
+                f"must be provided and be list type."
+            )
 
     @classmethod
     def _get_all_metric_names(cls, test_results) -> list:
@@ -133,50 +138,56 @@ def _sort_all_df(self, all_df, all_metric_names):
 
             if metric_name not in all_metric_names:
                 continue
-
             sort_metric_list.append(metric_name)
             is_ascend_list.append(ele.get(metric_name) == "ascend")
 
         return all_df.sort_values(by=sort_metric_list, ascending=is_ascend_list)
 
     def _get_all(self, test_cases, test_results) -> pd.DataFrame:
         all_df = pd.DataFrame(columns=self.all_df_header)
+
         for i, test_case in enumerate(test_cases):
-            all_df.loc[i] = [np.NAN for i in range(len(self.all_df_header))]
-            # fill name column of algorithm
             algorithm = test_case.algorithm
-            all_df.loc[i][0] = algorithm.name
-            # fill metric columns of algorithm
-            for metric_name in test_results[test_case.id][0]:
-                all_df.loc[i][metric_name] = test_results[test_case.id][0].get(metric_name)
+            test_result = test_results[test_case.id][0]
 
-            # file paradigm column of algorithm
-            all_df.loc[i]["paradigm"] = algorithm.paradigm_type
+            # add algorithm, paradigm, time, url of algorithm
+            row_data = {
+                "algorithm": algorithm.name,
+                "paradigm": algorithm.paradigm_type,
+                "time": test_results[test_case.id][1],
+                "url": test_case.output_dir
+            }
 
-            # fill module columns of algorithm
-            for module_type, module in algorithm.modules.items():
-                all_df.loc[i][module_type] = module.name
+            # add metric of algorithm
+            row_data.update(test_result)
 
-            # fill hyperparameters columns of algorithm modules
-            hps = self._get_algorithm_hyperparameters(algorithm)
+            # add module of algorithm
+            row_data.update({
+                module_type: module.name
+                for module_type, module in algorithm.modules.items()
+            })
 
-            # pylint: disable=C0103
-            for k, v in hps.items():
-                all_df.loc[i][k] = v
-            # fill time and output dir of testcase
-            all_df.loc[i][-2:] = [test_results[test_case.id][1], test_case.output_dir]
+            # add hyperparameters of algorithm modules
+            row_data.update(self._get_algorithm_hyperparameters(algorithm))
 
-        if utils.is_local_file(self.all_rank_file):
-            old_df = pd.read_csv(self.all_rank_file, delim_whitespace=True, index_col=0)
-            all_df = all_df.append(old_df)
+            # fill data
+            all_df.loc[i] = row_data
+
+        new_df = self._concat_existing_data(all_df)
 
-        return self._sort_all_df(all_df, self._get_all_metric_names(test_results))
+        return self._sort_all_df(new_df, self._get_all_metric_names(test_results))
+
+    def _concat_existing_data(self, new_df):
+        if utils.is_local_file(self.all_rank_file):
+            old_df = pd.read_csv(self.all_rank_file, index_col=0)
+            new_df = pd.concat([old_df, new_df])
+        return new_df
 
     def _save_all(self):
         # pylint: disable=E1101
         all_df = copy.deepcopy(self.all_df)
-        all_df.index = pd.np.arange(1, len(all_df) + 1)
-        all_df.to_csv(self.all_rank_file, index_label="rank", encoding="utf-8", sep=" ")
+        all_df.index = np.arange(1, len(all_df) + 1)
+        all_df.to_csv(self.all_rank_file, index_label="rank", encoding="utf-8")
 
     def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
         module_types = self.selected_dataitem.get("modules")
@@ -191,7 +202,15 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
         if metric_names == ["all"]:
             metric_names = self._get_all_metric_names(test_results)
 
-        header = ["algorithm", *metric_names, "paradigm", *module_types, *hps_names, "time", "url"]
+        header = [
+            "algorithm",
+            *metric_names,
+            "paradigm",
+            *module_types,
+            *hps_names,
+            "time",
+            "url",
+        ]
 
         all_df = copy.deepcopy(self.all_df)
         selected_df = pd.DataFrame(all_df, columns=header)
@@ -205,25 +224,27 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
     def _save_selected(self, test_cases, test_results):
         # pylint: disable=E1101
         selected_df = self._get_selected(test_cases, test_results)
-        selected_df.index = pd.np.arange(1, len(selected_df) + 1)
-        selected_df.to_csv(self.selected_rank_file, index_label="rank", encoding="utf-8", sep=" ")
+        selected_df.index = np.arange(1, len(selected_df) + 1)
+        selected_df.to_csv(self.selected_rank_file, index_label="rank", encoding="utf-8")
 
     def _draw_pictures(self, test_cases, test_results):
         # pylint: disable=E1101
         for test_case in test_cases:
             out_put = test_case.output_dir
             test_result = test_results[test_case.id][0]
-            matrix = test_result.get('Matrix')
-            #print(out_put)
+            matrix = test_result.get("Matrix")
             for key in matrix.keys():
                 draw_heatmap_picture(out_put, key, matrix[key])
 
     def _prepare(self, test_cases, test_results, output_dir):
         all_metric_names = self._get_all_metric_names(test_results)
         all_hps_names = self._get_all_hps_names(test_cases)
         all_module_types = self._get_all_module_types(test_cases)
-        self.all_df_header = ["algorithm", *all_metric_names, "paradigm",
-                              *all_module_types, *all_hps_names, "time", "url"]
+        self.all_df_header = [
+            "algorithm", *all_metric_names,
+            "paradigm", *all_module_types,
+            *all_hps_names, "time", "url"
+        ]
 
         rank_output_dir = os.path.join(output_dir, "rank")
         if not utils.is_local_dir(rank_output_dir):
@@ -246,7 +267,6 @@ def save(self, test_cases, test_results, output_dir):
         output_dir: string
 
         """
-
         self._prepare(test_cases, test_results, output_dir)
 
         if self.save_mode == "selected_and_all":
@@ -276,4 +296,5 @@ def plot(self):
             except Exception as err:
                 raise RuntimeError(
                     f"process visualization(method={method}) of "
-                    f"rank file({self.selected_rank_file}) failed, error: {err}.") from err
+                    f"rank file({self.selected_rank_file}) failed, error: {err}."
+                ) from err
diff --git a/core/storymanager/visualization/visualization.py b/core/storymanager/visualization/visualization.py
@@ -23,7 +23,7 @@
 def print_table(rank_file):
     """ print rank of the test"""
     with open(rank_file, "r", encoding="utf-8") as file:
-        table = from_csv(file)
+        table = from_csv(file, delimiter=",")
         print(table)
 
 def draw_heatmap_picture(output, title, matrix):
@@ -40,7 +40,6 @@ def draw_heatmap_picture(output, title, matrix):
     plt.title(title, fontsize=15)
     plt.colorbar(format='%.2f')
     output_dir = os.path.join(output, f"output/{title}-heatmap.png")
-    #print(output_dir)
     plt.savefig(output_dir)
     plt.show()
 

diff --git a/core/testcasecontroller/algorithm/algorithm.py b/core/testcasecontroller/algorithm/algorithm.py
@@ -24,6 +24,9 @@
     IncrementalLearning,
     MultiedgeInference,
     LifelongLearning,
+    FederatedLearning,
+    FederatedClassIncrementalLearning,
+    JointInference
 )
 from core.testcasecontroller.generation_assistant import get_full_combinations
 
@@ -64,12 +67,24 @@ def __init__(self, name, config):
             "train_ratio": 0.8,
             "splitting_method": "default"
         }
+        self.fl_data_setting: dict = {
+            "train_ratio": 1.0,
+            "splitting_method": "default",
+            "data_partition": "iid",
+            'non_iid_ratio': 0.6,
+            "label_data_ratio": 1.0
+        }
+
         self.initial_model_url: str = ""
         self.modules: list = []
         self.modules_list = None
+        self.mode: str = ""
+        self.quantization_type: str = ""
+        self.llama_quantize_path: str = ""
         self._parse_config(config)
         self._load_third_party_packages()
 
+    # pylint: disable=R0911
     def paradigm(self, workspace: str, **kwargs):
         """
         get test process of AI algorithm paradigm.
@@ -91,7 +106,6 @@ def paradigm(self, workspace: str, **kwargs):
         # pylint: disable=C0103
         for k, v in self.__dict__.items():
             config.update({k: v})
-
         if self.paradigm_type == ParadigmType.SINGLE_TASK_LEARNING.value:
             return SingleTaskLearning(workspace, **config)
 
@@ -104,6 +118,15 @@ def paradigm(self, workspace: str, **kwargs):
         if self.paradigm_type == ParadigmType.LIFELONG_LEARNING.value:
             return LifelongLearning(workspace, **config)
 
+        if self.paradigm_type == ParadigmType.FEDERATED_LEARNING.value:
+            return FederatedLearning(workspace, **config)
+
+        if self.paradigm_type == ParadigmType.FEDERATED_CLASS_INCREMENTAL_LEARNING.value:
+            return FederatedClassIncrementalLearning(workspace, **config)
+
+        if self.paradigm_type == ParadigmType.JOINT_INFERENCE.value:
+            return JointInference(workspace, **config)
+
         return None
 
     def _check_fields(self):