-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #71 from xiaomoguhzz/unknown-task-process
- Loading branch information
Showing
52 changed files
with
4,917 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# Copyright 2022 The KubeEdge Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""main""" | ||
|
||
import sys | ||
import argparse | ||
|
||
from core.common.log import LOGGER | ||
from core.common import utils | ||
from core.cmd.obj import BenchmarkingJob | ||
from core.__version__ import __version__ | ||
|
||
|
||
def main(): | ||
""" main command-line interface to ianvs""" | ||
try: | ||
parser = _generate_parser() | ||
args = parser.parse_args() | ||
config_file = args.benchmarking_config_file | ||
if not utils.is_local_file(config_file): | ||
raise SystemExit(f"not found benchmarking config({config_file}) file in local") | ||
|
||
config = utils.yaml2dict(args.benchmarking_config_file) | ||
job = BenchmarkingJob(config[str.lower(BenchmarkingJob.__name__)]) | ||
job.run() | ||
|
||
LOGGER.info("benchmarkingjob runs successfully.") | ||
except Exception as err: | ||
raise RuntimeError(f"benchmarkingjob runs failed, error: {err}.") from err | ||
|
||
|
||
def _generate_parser(): | ||
parser = argparse.ArgumentParser(description='AI Benchmarking Tool') | ||
parser.prog = "ianvs" | ||
|
||
parser.add_argument("-f", | ||
"--benchmarking_config_file", | ||
nargs="?", | ||
type=str, | ||
help="run a benchmarking job, " | ||
"and the benchmarking config file must be yaml/yml file.") | ||
|
||
parser.add_argument('-v', | ||
'--version', | ||
action='version', | ||
version=__version__, | ||
help='show program version info and exit.') | ||
|
||
if len(sys.argv) == 1: | ||
parser.print_help(sys.stderr) | ||
sys.exit(1) | ||
|
||
return parser | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
147 changes: 147 additions & 0 deletions
147
...casecontroller/algorithm/paradigm/singletask_learning/singletask_learning_active_boost.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import json | ||
import os | ||
from core.common.constant import ParadigmType | ||
from examples.yaoba.singletask_learning_boost.resource.utils.infer_and_error import infer_anno, merge_predict_results, \ | ||
compute_error, gen_txt_according_json, get_new_train_json | ||
from examples.yaoba.singletask_learning_boost.resource.utils.transform_unkonwn import aug_image_bboxes | ||
from .singletask_learning import SingleTaskLearning | ||
import os.path as osp | ||
|
||
|
||
class SingleTaskLearningACBoost(SingleTaskLearning): | ||
|
||
def __init__(self, workspace, **kwargs): | ||
super(SingleTaskLearningACBoost, self).__init__(workspace, **kwargs) | ||
|
||
def run(self): | ||
job = self.build_paradigm_job(str(ParadigmType.SINGLE_TASK_LEARNING.value)) | ||
known_dataset_json, unknown_dataset_json, img_path = self._prepare_for_calculate_weights() | ||
base_config_path = osp.join(job.resource_dir, "base_config.py") | ||
train_script_path = osp.join(job.resource_dir, "train.py") | ||
ac_boost_training_json, aug_img_folder = self._calculate_weights_for_training( | ||
base_config=base_config_path, | ||
known_json_path=known_dataset_json, | ||
unknown_json_path=unknown_dataset_json, | ||
img_path=img_path, | ||
tmp_path=os.path.join(job.work_dir, "tmp_folder"), | ||
train_script_path=train_script_path | ||
) | ||
trained_model = self._ac_boost_train(job, ac_boost_training_json, aug_img_folder) | ||
inference_result = self._inference(job, trained_model) | ||
self.system_metric_info['use_raw'] = True | ||
return inference_result, self.system_metric_info | ||
|
||
def _ac_boost_train(self, job, training_anno, training_img_folder): | ||
train_output_model_path = job.train((training_img_folder, training_anno)) | ||
trained_model_path = job.save(train_output_model_path) | ||
return trained_model_path | ||
|
||
def _inference(self, job, trained_model): | ||
# Load test set data | ||
img_prefix = self.dataset.image_folder_url | ||
ann_file_path = self.dataset.test_url | ||
ann_file = json.load(open(ann_file_path, mode="r", encoding="utf-8")) | ||
test_set = [] | ||
for i in ann_file['images']: | ||
test_set.append(os.path.join(img_prefix, i['file_name'])) | ||
|
||
job.load(trained_model) | ||
infer_res = job.predict(test_set) | ||
return infer_res | ||
|
||
def _prepare_for_calculate_weights(self): | ||
known_dataset_json = self.dataset.known_dataset_url | ||
unknown_dataset_json = self.dataset.unknown_dataset_url | ||
img_path = self.dataset.image_folder_url | ||
return known_dataset_json, unknown_dataset_json, img_path | ||
|
||
def _calculate_weights_for_training(self, | ||
base_config, | ||
known_json_path, | ||
unknown_json_path, | ||
img_path, | ||
tmp_path, | ||
train_script_path): | ||
r"""Generate instance weights required for unknown task training. In object detection, | ||
an instance means a bounding box, i.e., generating training weights for each bounding box. | ||
Args: | ||
base_config (str): path of config file for training known/unknown model | ||
known_json_path (str): path of JSON file for training known model | ||
unknown_json_path (str): path of JSON file for training unknown model | ||
img_path (str): image path of training, validation, and test set. | ||
tmp_path (str): path to save temporary files, including augmented images, training JSON files, etc. | ||
train_script_path (str): path of mmdet training script | ||
Return: | ||
new_training_weight (str): JSON file with instance weights for unknown task training, | ||
which contains both the known and unknown training sets. | ||
aug_img_folder (str): the image paths required for training the model using the JSON file with instance weights. | ||
""" | ||
if not os.path.exists(tmp_path): | ||
os.mkdir(tmp_path) | ||
# Define necessary path | ||
aug_img_folder = osp.join(tmp_path, "aug_img_folder") # The directory for saving augmented images | ||
known_model_folder = osp.join(tmp_path, "known_model") # The directory for saving known model training results | ||
unknown_model_folder = osp.join(tmp_path, "unknown_model") # The directory for saving unknown model training results | ||
aug_unknown_json = osp.join(tmp_path, 'aug_unknown.json') # The JSON file path of the unknown data after augmentation | ||
|
||
# Augmenting the unknown data and returning the paths of the augmented images | ||
aug_image_bboxes( | ||
anno=unknown_json_path, | ||
augs=[('flip', 1), ('brightness', 0.6), ('flip', -1)], | ||
image_path=img_path, | ||
out_path=tmp_path | ||
) | ||
|
||
# Train the known model | ||
known_model_training_task = f"python {train_script_path} " \ | ||
f"{base_config} --seed 1 --deterministic --cfg-options " \ | ||
f"data.train.ann_file={known_json_path} " \ | ||
f"data.train.img_prefix={img_path} " \ | ||
f"work_dir={known_model_folder}" | ||
os.system(known_model_training_task) | ||
|
||
# Train the unknown model | ||
unknown_model_training_task = f"python {train_script_path} " \ | ||
f"{base_config} --seed 1 --deterministic --cfg-options " \ | ||
f"data.train.ann_file={aug_unknown_json} " \ | ||
f"data.train.img_prefix={aug_img_folder} " \ | ||
f"work_dir={unknown_model_folder}" | ||
os.system(unknown_model_training_task) | ||
|
||
# using above known model to infer unknown data | ||
infer_anno( | ||
config_file=base_config, | ||
checkpoint_file=osp.join(known_model_folder, 'latest.pth'), | ||
img_path=aug_img_folder, | ||
anno_path=aug_unknown_json, | ||
out_path=osp.join(tmp_path, 'unknown_infer_results.json') | ||
) | ||
|
||
# using above unknown model to infer known data | ||
infer_anno( | ||
config_file=base_config, | ||
checkpoint_file=osp.join(unknown_model_folder, 'latest.pth'), | ||
img_path=aug_img_folder, | ||
anno_path=known_json_path, | ||
out_path=osp.join(tmp_path, 'known_infer_results.json') | ||
) | ||
|
||
# merging the prediction results and computing error | ||
merge_predict_results( | ||
result1=osp.join(tmp_path, 'unknown_infer_results.json'), | ||
result2=osp.join(tmp_path, 'known_infer_results.json'), | ||
out_dir=osp.join(tmp_path, "merge_predict_result.json") | ||
) | ||
new_json = compute_error(osp.join(tmp_path, "merge_predict_result.json")) | ||
|
||
# generating the weights of the overall training sample based on the prediction error. | ||
gen_txt_according_json(known_json_path, osp.join(tmp_path, 'known.txt')) | ||
gen_txt_according_json(aug_unknown_json, osp.join(tmp_path, 'aug_unknown.txt')) | ||
get_new_train_json( | ||
new_json, | ||
aug_img_folder, | ||
osp.join(tmp_path, 'known.txt'), | ||
osp.join(tmp_path, 'aug_unknown.txt'), | ||
out_dir=osp.join(tmp_path, 'new_training_weight.json')) | ||
|
||
return osp.join(tmp_path, 'new_training_weight.json'), aug_img_folder |
98 changes: 98 additions & 0 deletions
98
core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning_tta.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import json | ||
import os | ||
import torch | ||
from mmdet.apis import init_detector | ||
from core.common.constant import ParadigmType | ||
from examples.yaoba.singletask_learning_yolox_tta.resource.utils.TTA_strategy import TTA_Strategy | ||
from .singletask_learning import SingleTaskLearning | ||
|
||
|
||
class SingleTaskLearningTTA(SingleTaskLearning): | ||
|
||
def __init__(self, workspace, **kwargs): | ||
super(SingleTaskLearningTTA, self).__init__(workspace, **kwargs) | ||
|
||
def run(self): | ||
# Build an experimental task | ||
job = self.build_paradigm_job(str(ParadigmType.SINGLE_TASK_LEARNING.value)) | ||
|
||
# If there are no initialized model weights, then train a new model from scratch | ||
if self.initial_model != "": | ||
trained_model = self.initial_model | ||
else: | ||
trained_model = self._train(job, None) | ||
|
||
# Search for the optimal test-time augmentation policies | ||
searched_strategy = self._search_tta_strategy(job, trained_model) | ||
|
||
# Merging the optimal policies with original default policy | ||
merged_strategy = self._prepare_infer_strategy(job, searched_strategy) | ||
|
||
# infer the test set with searched policies | ||
inference_result = self._inference_w_tta(job, trained_model, merged_strategy) | ||
self.system_metric_info['use_raw']=True | ||
return inference_result, self.system_metric_info | ||
|
||
def _inference_w_tta(self, job, trained_model, strategy): | ||
# Load test set data | ||
img_prefix = self.dataset.image_folder_url | ||
ann_file_path = self.dataset.test_url | ||
ann_file = json.load(open(ann_file_path, mode="r", encoding="utf-8")) | ||
test_set = [] | ||
for i in ann_file['images']: | ||
test_set.append(os.path.join(img_prefix, i['file_name'])) | ||
|
||
# Perform inference with data augmentation policy. | ||
job.load(trained_model) | ||
print(f"Total infer strategy is :{strategy}") | ||
infer_res = job.tta_predict(test_set, strategy) | ||
|
||
return infer_res | ||
|
||
def _prepare_infer_strategy(self, job, searched_strategy): | ||
default_img_size = None | ||
# The default inference policy | ||
for p in job.cfg.data.test.pipeline: | ||
if p['type'] == 'MultiScaleFlipAug': | ||
default_img_size = p['img_scale'] | ||
if default_img_size: | ||
combined_strategy = [[("TTA_Resize", default_img_size), ]] | ||
else: | ||
raise ValueError("can not find img_scale model cfg") | ||
combined_strategy.append(searched_strategy[0]) | ||
|
||
return combined_strategy | ||
|
||
def _search_tta_strategy(self, job, model_url): | ||
# Load validation dataset | ||
img_prefix = self.dataset.image_folder_url | ||
ann_file = self.dataset.val_url | ||
|
||
# Create a search agent to search for the best data augmentation strategy. | ||
model_cfg = job.cfg | ||
model = init_detector(model_cfg, model_url) | ||
torch.multiprocessing.set_start_method("spawn", force=True) | ||
search_agent = TTA_Strategy( | ||
model=model, | ||
val_image_path=img_prefix, | ||
val_anno_path=ann_file, | ||
log_dir=os.path.join(model_cfg.work_dir, "log"), | ||
worker=6, | ||
nms_thr=0.5 | ||
) | ||
# Search for single policies for TTA | ||
single_strategies = search_agent.search_single_strategy(top_num=3) | ||
|
||
# Search for Cascade policies for TTA, which based on single policies | ||
cascade_strategies = search_agent.search_cascade_strategy( | ||
single_strategies, | ||
cascade_num=3, | ||
top_num=5 | ||
) | ||
return cascade_strategies | ||
|
||
def _train(self, job, initial_model): | ||
img_prefix = self.dataset.image_folder_url | ||
ann_file = self.dataset.train_url | ||
checkpoint_path = job.train((img_prefix, ann_file)) | ||
return checkpoint_path |
Empty file.
70 changes: 70 additions & 0 deletions
70
examples/yaoba/singletask_learning_boost/benchmarkingjob.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
benchmarkingjob: | ||
# job name of bechmarking; string type; | ||
name: "benchmarkingjob" | ||
# the url address of job workspace that will reserve the output of tests; string type; | ||
workspace: "examples/yaoba/singletask_learning_boost/workspace" | ||
# the url address of test environment configuration file; string type; | ||
# the file format supports yaml/yml; | ||
testenv: "examples/yaoba/singletask_learning_boost/testenv/testenv.yaml" | ||
# the configuration of test object | ||
test_object: | ||
# test type; string type; | ||
# currently the option of value is "algorithms",the others will be added in succession. | ||
type: "algorithms" | ||
# test algorithm configuration files; list type; | ||
algorithms: | ||
# algorithm name; string type; | ||
- name: "mmlab-model" | ||
# the url address of test algorithm configuration file; string type; | ||
# the file format supports yaml/yml; | ||
url: "examples/yaoba/singletask_learning_boost/testalgorithms/algorithm.yaml" | ||
|
||
# the configuration of ranking leaderboard | ||
rank: | ||
# rank leaderboard with metric of test case's evaluation and order ; list type; | ||
# the sorting priority is based on the sequence of metrics in the list from front to back; | ||
sort_by: [ { "map": "descend" } ] | ||
|
||
# visualization configuration | ||
visualization: | ||
# mode of visualization in the leaderboard; string type; | ||
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. | ||
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. | ||
mode: "selected_only" | ||
# method of visualization for selected dataitems; string type; | ||
# currently the options of value are as follows: | ||
# 1> "print_table": print selected dataitems; | ||
method: "print_table" | ||
|
||
# selected dataitem configuration | ||
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", | ||
# so that the selected columns will be shown. | ||
selected_dataitem: | ||
# currently the options of value are as follows: | ||
# 1> "all": select all paradigms in the leaderboard; | ||
# 2> paradigms in the leaderboard, e.g., "singletasklearning" | ||
paradigms: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all modules in the leaderboard; | ||
# 2> modules in the leaderboard, e.g., "basemodel" | ||
modules: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all hyperparameters in the leaderboard; | ||
# 2> hyperparameters in the leaderboard, e.g., "momentum" | ||
hyperparameters: [ "all" ] | ||
# currently the options of value are as follows: | ||
# 1> "all": select all metrics in the leaderboard; | ||
# 2> metrics in the leaderboard, e.g., "f1_score" | ||
metrics: [ "map" ] | ||
|
||
# model of save selected and all dataitems in workspace; string type; | ||
# currently the options of value are as follows: | ||
# 1> "selected_and_all": save selected and all dataitems; | ||
# 2> "selected_only": save selected dataitems; | ||
save_mode: "selected_and_all" | ||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.