Skip to content

Commit

Permalink
Merge branch 'kubeedge:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
MooreZheng authored Jan 7, 2025
2 parents f1b152a + aefdbeb commit ec2f32f
Show file tree
Hide file tree
Showing 236 changed files with 16,472 additions and 511 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,11 @@ jobs:
python -m pip install ${{github.workspace}}/examples/resources/third_party/*
python -m pip install -r ${{github.workspace}}/requirements.txt
- name: Analysing code of core with pylint
# `--max-positional-arguments=10` is set for Python 3.9 to avoid `R0917: too-many-positional-arguments`.
# See details at https://github.com/kubeedge/ianvs/issues/157
run: |
pylint '${{github.workspace}}/core'
if [ "${{ matrix.python-version }}" = "3.9" ]; then
pylint --max-positional-arguments=10 '${{github.workspace}}/core'
else
pylint '${{github.workspace}}/core'
fi
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
dataset/
initial_model/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
21 changes: 21 additions & 0 deletions core/common/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,42 @@ class DatasetFormat(Enum):
File format of inputting dataset.
Currently, file formats are as follows: txt, csv.
"""

CSV = "csv"
TXT = "txt"
JSON = "json"
JSONL = "jsonl"
JSONFORLLM = "jsonforllm"


class ParadigmType(Enum):
"""
Algorithm paradigm type.
"""

SINGLE_TASK_LEARNING = "singletasklearning"
INCREMENTAL_LEARNING = "incrementallearning"
MULTIEDGE_INFERENCE = "multiedgeinference"
LIFELONG_LEARNING = "lifelonglearning"
FEDERATED_LEARNING = "federatedlearning"
FEDERATED_CLASS_INCREMENTAL_LEARNING = "federatedclassincrementallearning"
JOINT_INFERENCE = "jointinference"


class ModuleType(Enum):
"""
Algorithm module type.
"""

BASEMODEL = "basemodel"

# JOINT INFERENCE
EDGEMODEL = "edgemodel"
CLOUDMODEL = "cloudmodel"

# Dataset Preprocessor
DATA_PROCESSOR = "dataset_processor"

# HEM
HARD_EXAMPLE_MINING = "hard_example_mining"

Expand All @@ -63,20 +78,26 @@ class ModuleType(Enum):
UNSEEN_SAMPLE_RECOGNITION = "unseen_sample_recognition"
UNSEEN_SAMPLE_RE_RECOGNITION = "unseen_sample_re_recognition"

# FL_AGG
AGGREGATION = "aggregation"


class SystemMetricType(Enum):
"""
System metric type of ianvs.
"""

SAMPLES_TRANSFER_RATIO = "samples_transfer_ratio"
FWT = "FWT"
BWT = "BWT"
TASK_AVG_ACC = "task_avg_acc"
MATRIX = "MATRIX"
FORGET_RATE = "forget_rate"


class TestObjectType(Enum):
"""
Test object type of ianvs.
"""

ALGORITHMS = "algorithms"
6 changes: 5 additions & 1 deletion core/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ def is_local_dir(url):

def get_file_format(url):
"""Get file format of the url."""
return os.path.splitext(url)[-1][1:]
# Check if the url
if os.path.basename(url) == "metadata.json":
return "jsonforllm"

# Check if the url
return os.path.splitext(url)[-1][1:]

def parse_kwargs(func, **kwargs):
"""Get valid parameters of the func in kwargs."""
Expand Down
113 changes: 67 additions & 46 deletions core/storymanager/rank/rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,12 @@ class Rank:

def __init__(self, config):
self.sort_by: list = []
self.visualization: dict = {
"mode": "selected_only",
"method": "print_table"
}
self.visualization: dict = {"mode": "selected_only", "method": "print_table"}
self.selected_dataitem: dict = {
"paradigms": ["all"],
"modules": ["all"],
"hyperparameters": ["all"],
"metrics": ["all"]
"metrics": ["all"],
}
self.save_mode: str = "selected_and_all"

Expand All @@ -62,15 +59,21 @@ def _parse_config(self, config):

def _check_fields(self):
if not self.sort_by and not isinstance(self.sort_by, list):
raise ValueError(f"rank's sort_by({self.sort_by}) must be provided and be list type.")
raise ValueError(
f"rank's sort_by({self.sort_by}) must be provided and be list type."
)

if not self.visualization and not isinstance(self.visualization, dict):
raise ValueError(f"rank's visualization({self.visualization}) "
f"must be provided and be dict type.")
raise ValueError(
f"rank's visualization({self.visualization}) "
f"must be provided and be dict type."
)

if not self.selected_dataitem and not isinstance(self.selected_dataitem, dict):
raise ValueError(f"rank's selected_dataitem({self.selected_dataitem}) "
f"must be provided and be dict type.")
raise ValueError(
f"rank's selected_dataitem({self.selected_dataitem}) "
f"must be provided and be dict type."
)

if not self.selected_dataitem.get("paradigms"):
raise ValueError("not found paradigms of selected_dataitem in rank.")
Expand All @@ -82,8 +85,10 @@ def _check_fields(self):
raise ValueError("not found metrics of selected_dataitem in rank.")

if not self.save_mode and not isinstance(self.save_mode, list):
raise ValueError(f"rank's save_mode({self.save_mode}) "
f"must be provided and be list type.")
raise ValueError(
f"rank's save_mode({self.save_mode}) "
f"must be provided and be list type."
)

@classmethod
def _get_all_metric_names(cls, test_results) -> list:
Expand Down Expand Up @@ -133,50 +138,56 @@ def _sort_all_df(self, all_df, all_metric_names):

if metric_name not in all_metric_names:
continue

sort_metric_list.append(metric_name)
is_ascend_list.append(ele.get(metric_name) == "ascend")

return all_df.sort_values(by=sort_metric_list, ascending=is_ascend_list)

def _get_all(self, test_cases, test_results) -> pd.DataFrame:
all_df = pd.DataFrame(columns=self.all_df_header)

for i, test_case in enumerate(test_cases):
all_df.loc[i] = [np.NAN for i in range(len(self.all_df_header))]
# fill name column of algorithm
algorithm = test_case.algorithm
all_df.loc[i][0] = algorithm.name
# fill metric columns of algorithm
for metric_name in test_results[test_case.id][0]:
all_df.loc[i][metric_name] = test_results[test_case.id][0].get(metric_name)
test_result = test_results[test_case.id][0]

# file paradigm column of algorithm
all_df.loc[i]["paradigm"] = algorithm.paradigm_type
# add algorithm, paradigm, time, url of algorithm
row_data = {
"algorithm": algorithm.name,
"paradigm": algorithm.paradigm_type,
"time": test_results[test_case.id][1],
"url": test_case.output_dir
}

# fill module columns of algorithm
for module_type, module in algorithm.modules.items():
all_df.loc[i][module_type] = module.name
# add metric of algorithm
row_data.update(test_result)

# fill hyperparameters columns of algorithm modules
hps = self._get_algorithm_hyperparameters(algorithm)
# add module of algorithm
row_data.update({
module_type: module.name
for module_type, module in algorithm.modules.items()
})

# pylint: disable=C0103
for k, v in hps.items():
all_df.loc[i][k] = v
# fill time and output dir of testcase
all_df.loc[i][-2:] = [test_results[test_case.id][1], test_case.output_dir]
# add hyperparameters of algorithm modules
row_data.update(self._get_algorithm_hyperparameters(algorithm))

if utils.is_local_file(self.all_rank_file):
old_df = pd.read_csv(self.all_rank_file, delim_whitespace=True, index_col=0)
all_df = all_df.append(old_df)
# fill data
all_df.loc[i] = row_data

new_df = self._concat_existing_data(all_df)

return self._sort_all_df(all_df, self._get_all_metric_names(test_results))
return self._sort_all_df(new_df, self._get_all_metric_names(test_results))

def _concat_existing_data(self, new_df):
if utils.is_local_file(self.all_rank_file):
old_df = pd.read_csv(self.all_rank_file, index_col=0)
new_df = pd.concat([old_df, new_df])
return new_df

def _save_all(self):
# pylint: disable=E1101
all_df = copy.deepcopy(self.all_df)
all_df.index = pd.np.arange(1, len(all_df) + 1)
all_df.to_csv(self.all_rank_file, index_label="rank", encoding="utf-8", sep=" ")
all_df.index = np.arange(1, len(all_df) + 1)
all_df.to_csv(self.all_rank_file, index_label="rank", encoding="utf-8")

def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
module_types = self.selected_dataitem.get("modules")
Expand All @@ -191,7 +202,15 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
if metric_names == ["all"]:
metric_names = self._get_all_metric_names(test_results)

header = ["algorithm", *metric_names, "paradigm", *module_types, *hps_names, "time", "url"]
header = [
"algorithm",
*metric_names,
"paradigm",
*module_types,
*hps_names,
"time",
"url",
]

all_df = copy.deepcopy(self.all_df)
selected_df = pd.DataFrame(all_df, columns=header)
Expand All @@ -205,25 +224,27 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
def _save_selected(self, test_cases, test_results):
# pylint: disable=E1101
selected_df = self._get_selected(test_cases, test_results)
selected_df.index = pd.np.arange(1, len(selected_df) + 1)
selected_df.to_csv(self.selected_rank_file, index_label="rank", encoding="utf-8", sep=" ")
selected_df.index = np.arange(1, len(selected_df) + 1)
selected_df.to_csv(self.selected_rank_file, index_label="rank", encoding="utf-8")

def _draw_pictures(self, test_cases, test_results):
# pylint: disable=E1101
for test_case in test_cases:
out_put = test_case.output_dir
test_result = test_results[test_case.id][0]
matrix = test_result.get('Matrix')
#print(out_put)
matrix = test_result.get("Matrix")
for key in matrix.keys():
draw_heatmap_picture(out_put, key, matrix[key])

def _prepare(self, test_cases, test_results, output_dir):
all_metric_names = self._get_all_metric_names(test_results)
all_hps_names = self._get_all_hps_names(test_cases)
all_module_types = self._get_all_module_types(test_cases)
self.all_df_header = ["algorithm", *all_metric_names, "paradigm",
*all_module_types, *all_hps_names, "time", "url"]
self.all_df_header = [
"algorithm", *all_metric_names,
"paradigm", *all_module_types,
*all_hps_names, "time", "url"
]

rank_output_dir = os.path.join(output_dir, "rank")
if not utils.is_local_dir(rank_output_dir):
Expand All @@ -246,7 +267,6 @@ def save(self, test_cases, test_results, output_dir):
output_dir: string
"""

self._prepare(test_cases, test_results, output_dir)

if self.save_mode == "selected_and_all":
Expand Down Expand Up @@ -276,4 +296,5 @@ def plot(self):
except Exception as err:
raise RuntimeError(
f"process visualization(method={method}) of "
f"rank file({self.selected_rank_file}) failed, error: {err}.") from err
f"rank file({self.selected_rank_file}) failed, error: {err}."
) from err
3 changes: 1 addition & 2 deletions core/storymanager/visualization/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
def print_table(rank_file):
""" print rank of the test"""
with open(rank_file, "r", encoding="utf-8") as file:
table = from_csv(file)
table = from_csv(file, delimiter=",")
print(table)

def draw_heatmap_picture(output, title, matrix):
Expand All @@ -40,7 +40,6 @@ def draw_heatmap_picture(output, title, matrix):
plt.title(title, fontsize=15)
plt.colorbar(format='%.2f')
output_dir = os.path.join(output, f"output/{title}-heatmap.png")
#print(output_dir)
plt.savefig(output_dir)
plt.show()

Expand Down
25 changes: 24 additions & 1 deletion core/testcasecontroller/algorithm/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
IncrementalLearning,
MultiedgeInference,
LifelongLearning,
FederatedLearning,
FederatedClassIncrementalLearning,
JointInference
)
from core.testcasecontroller.generation_assistant import get_full_combinations

Expand Down Expand Up @@ -64,12 +67,24 @@ def __init__(self, name, config):
"train_ratio": 0.8,
"splitting_method": "default"
}
self.fl_data_setting: dict = {
"train_ratio": 1.0,
"splitting_method": "default",
"data_partition": "iid",
'non_iid_ratio': 0.6,
"label_data_ratio": 1.0
}

self.initial_model_url: str = ""
self.modules: list = []
self.modules_list = None
self.mode: str = ""
self.quantization_type: str = ""
self.llama_quantize_path: str = ""
self._parse_config(config)
self._load_third_party_packages()

# pylint: disable=R0911
def paradigm(self, workspace: str, **kwargs):
"""
get test process of AI algorithm paradigm.
Expand All @@ -91,7 +106,6 @@ def paradigm(self, workspace: str, **kwargs):
# pylint: disable=C0103
for k, v in self.__dict__.items():
config.update({k: v})

if self.paradigm_type == ParadigmType.SINGLE_TASK_LEARNING.value:
return SingleTaskLearning(workspace, **config)

Expand All @@ -104,6 +118,15 @@ def paradigm(self, workspace: str, **kwargs):
if self.paradigm_type == ParadigmType.LIFELONG_LEARNING.value:
return LifelongLearning(workspace, **config)

if self.paradigm_type == ParadigmType.FEDERATED_LEARNING.value:
return FederatedLearning(workspace, **config)

if self.paradigm_type == ParadigmType.FEDERATED_CLASS_INCREMENTAL_LEARNING.value:
return FederatedClassIncrementalLearning(workspace, **config)

if self.paradigm_type == ParadigmType.JOINT_INFERENCE.value:
return JointInference(workspace, **config)

return None

def _check_fields(self):
Expand Down
Loading

0 comments on commit ec2f32f

Please sign in to comment.