Skip to content

Commit

Permalink
Merge pull request #71 from xiaomoguhzz/unknown-task-process
Browse files Browse the repository at this point in the history
  • Loading branch information
jaypume authored Jun 21, 2024
2 parents 24f74d0 + 7111198 commit 5c48872
Show file tree
Hide file tree
Showing 52 changed files with 4,917 additions and 0 deletions.
69 changes: 69 additions & 0 deletions benchmarking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2022 The KubeEdge Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""main"""

import sys
import argparse

from core.common.log import LOGGER
from core.common import utils
from core.cmd.obj import BenchmarkingJob
from core.__version__ import __version__


def main():
""" main command-line interface to ianvs"""
try:
parser = _generate_parser()
args = parser.parse_args()
config_file = args.benchmarking_config_file
if not utils.is_local_file(config_file):
raise SystemExit(f"not found benchmarking config({config_file}) file in local")

config = utils.yaml2dict(args.benchmarking_config_file)
job = BenchmarkingJob(config[str.lower(BenchmarkingJob.__name__)])
job.run()

LOGGER.info("benchmarkingjob runs successfully.")
except Exception as err:
raise RuntimeError(f"benchmarkingjob runs failed, error: {err}.") from err


def _generate_parser():
parser = argparse.ArgumentParser(description='AI Benchmarking Tool')
parser.prog = "ianvs"

parser.add_argument("-f",
"--benchmarking_config_file",
nargs="?",
type=str,
help="run a benchmarking job, "
"and the benchmarking config file must be yaml/yml file.")

parser.add_argument('-v',
'--version',
action='version',
version=__version__,
help='show program version info and exit.')

if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)

return parser


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import json
import os
from core.common.constant import ParadigmType
from examples.yaoba.singletask_learning_boost.resource.utils.infer_and_error import infer_anno, merge_predict_results, \
compute_error, gen_txt_according_json, get_new_train_json
from examples.yaoba.singletask_learning_boost.resource.utils.transform_unkonwn import aug_image_bboxes
from .singletask_learning import SingleTaskLearning
import os.path as osp


class SingleTaskLearningACBoost(SingleTaskLearning):

def __init__(self, workspace, **kwargs):
super(SingleTaskLearningACBoost, self).__init__(workspace, **kwargs)

def run(self):
job = self.build_paradigm_job(str(ParadigmType.SINGLE_TASK_LEARNING.value))
known_dataset_json, unknown_dataset_json, img_path = self._prepare_for_calculate_weights()
base_config_path = osp.join(job.resource_dir, "base_config.py")
train_script_path = osp.join(job.resource_dir, "train.py")
ac_boost_training_json, aug_img_folder = self._calculate_weights_for_training(
base_config=base_config_path,
known_json_path=known_dataset_json,
unknown_json_path=unknown_dataset_json,
img_path=img_path,
tmp_path=os.path.join(job.work_dir, "tmp_folder"),
train_script_path=train_script_path
)
trained_model = self._ac_boost_train(job, ac_boost_training_json, aug_img_folder)
inference_result = self._inference(job, trained_model)
self.system_metric_info['use_raw'] = True
return inference_result, self.system_metric_info

def _ac_boost_train(self, job, training_anno, training_img_folder):
train_output_model_path = job.train((training_img_folder, training_anno))
trained_model_path = job.save(train_output_model_path)
return trained_model_path

def _inference(self, job, trained_model):
# Load test set data
img_prefix = self.dataset.image_folder_url
ann_file_path = self.dataset.test_url
ann_file = json.load(open(ann_file_path, mode="r", encoding="utf-8"))
test_set = []
for i in ann_file['images']:
test_set.append(os.path.join(img_prefix, i['file_name']))

job.load(trained_model)
infer_res = job.predict(test_set)
return infer_res

def _prepare_for_calculate_weights(self):
known_dataset_json = self.dataset.known_dataset_url
unknown_dataset_json = self.dataset.unknown_dataset_url
img_path = self.dataset.image_folder_url
return known_dataset_json, unknown_dataset_json, img_path

def _calculate_weights_for_training(self,
base_config,
known_json_path,
unknown_json_path,
img_path,
tmp_path,
train_script_path):
r"""Generate instance weights required for unknown task training. In object detection,
an instance means a bounding box, i.e., generating training weights for each bounding box.
Args:
base_config (str): path of config file for training known/unknown model
known_json_path (str): path of JSON file for training known model
unknown_json_path (str): path of JSON file for training unknown model
img_path (str): image path of training, validation, and test set.
tmp_path (str): path to save temporary files, including augmented images, training JSON files, etc.
train_script_path (str): path of mmdet training script
Return:
new_training_weight (str): JSON file with instance weights for unknown task training,
which contains both the known and unknown training sets.
aug_img_folder (str): the image paths required for training the model using the JSON file with instance weights.
"""
if not os.path.exists(tmp_path):
os.mkdir(tmp_path)
# Define necessary path
aug_img_folder = osp.join(tmp_path, "aug_img_folder") # The directory for saving augmented images
known_model_folder = osp.join(tmp_path, "known_model") # The directory for saving known model training results
unknown_model_folder = osp.join(tmp_path, "unknown_model") # The directory for saving unknown model training results
aug_unknown_json = osp.join(tmp_path, 'aug_unknown.json') # The JSON file path of the unknown data after augmentation

# Augmenting the unknown data and returning the paths of the augmented images
aug_image_bboxes(
anno=unknown_json_path,
augs=[('flip', 1), ('brightness', 0.6), ('flip', -1)],
image_path=img_path,
out_path=tmp_path
)

# Train the known model
known_model_training_task = f"python {train_script_path} " \
f"{base_config} --seed 1 --deterministic --cfg-options " \
f"data.train.ann_file={known_json_path} " \
f"data.train.img_prefix={img_path} " \
f"work_dir={known_model_folder}"
os.system(known_model_training_task)

# Train the unknown model
unknown_model_training_task = f"python {train_script_path} " \
f"{base_config} --seed 1 --deterministic --cfg-options " \
f"data.train.ann_file={aug_unknown_json} " \
f"data.train.img_prefix={aug_img_folder} " \
f"work_dir={unknown_model_folder}"
os.system(unknown_model_training_task)

# using above known model to infer unknown data
infer_anno(
config_file=base_config,
checkpoint_file=osp.join(known_model_folder, 'latest.pth'),
img_path=aug_img_folder,
anno_path=aug_unknown_json,
out_path=osp.join(tmp_path, 'unknown_infer_results.json')
)

# using above unknown model to infer known data
infer_anno(
config_file=base_config,
checkpoint_file=osp.join(unknown_model_folder, 'latest.pth'),
img_path=aug_img_folder,
anno_path=known_json_path,
out_path=osp.join(tmp_path, 'known_infer_results.json')
)

# merging the prediction results and computing error
merge_predict_results(
result1=osp.join(tmp_path, 'unknown_infer_results.json'),
result2=osp.join(tmp_path, 'known_infer_results.json'),
out_dir=osp.join(tmp_path, "merge_predict_result.json")
)
new_json = compute_error(osp.join(tmp_path, "merge_predict_result.json"))

# generating the weights of the overall training sample based on the prediction error.
gen_txt_according_json(known_json_path, osp.join(tmp_path, 'known.txt'))
gen_txt_according_json(aug_unknown_json, osp.join(tmp_path, 'aug_unknown.txt'))
get_new_train_json(
new_json,
aug_img_folder,
osp.join(tmp_path, 'known.txt'),
osp.join(tmp_path, 'aug_unknown.txt'),
out_dir=osp.join(tmp_path, 'new_training_weight.json'))

return osp.join(tmp_path, 'new_training_weight.json'), aug_img_folder
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import json
import os
import torch
from mmdet.apis import init_detector
from core.common.constant import ParadigmType
from examples.yaoba.singletask_learning_yolox_tta.resource.utils.TTA_strategy import TTA_Strategy
from .singletask_learning import SingleTaskLearning


class SingleTaskLearningTTA(SingleTaskLearning):

def __init__(self, workspace, **kwargs):
super(SingleTaskLearningTTA, self).__init__(workspace, **kwargs)

def run(self):
# Build an experimental task
job = self.build_paradigm_job(str(ParadigmType.SINGLE_TASK_LEARNING.value))

# If there are no initialized model weights, then train a new model from scratch
if self.initial_model != "":
trained_model = self.initial_model
else:
trained_model = self._train(job, None)

# Search for the optimal test-time augmentation policies
searched_strategy = self._search_tta_strategy(job, trained_model)

# Merging the optimal policies with original default policy
merged_strategy = self._prepare_infer_strategy(job, searched_strategy)

# infer the test set with searched policies
inference_result = self._inference_w_tta(job, trained_model, merged_strategy)
self.system_metric_info['use_raw']=True
return inference_result, self.system_metric_info

def _inference_w_tta(self, job, trained_model, strategy):
# Load test set data
img_prefix = self.dataset.image_folder_url
ann_file_path = self.dataset.test_url
ann_file = json.load(open(ann_file_path, mode="r", encoding="utf-8"))
test_set = []
for i in ann_file['images']:
test_set.append(os.path.join(img_prefix, i['file_name']))

# Perform inference with data augmentation policy.
job.load(trained_model)
print(f"Total infer strategy is :{strategy}")
infer_res = job.tta_predict(test_set, strategy)

return infer_res

def _prepare_infer_strategy(self, job, searched_strategy):
default_img_size = None
# The default inference policy
for p in job.cfg.data.test.pipeline:
if p['type'] == 'MultiScaleFlipAug':
default_img_size = p['img_scale']
if default_img_size:
combined_strategy = [[("TTA_Resize", default_img_size), ]]
else:
raise ValueError("can not find img_scale model cfg")
combined_strategy.append(searched_strategy[0])

return combined_strategy

def _search_tta_strategy(self, job, model_url):
# Load validation dataset
img_prefix = self.dataset.image_folder_url
ann_file = self.dataset.val_url

# Create a search agent to search for the best data augmentation strategy.
model_cfg = job.cfg
model = init_detector(model_cfg, model_url)
torch.multiprocessing.set_start_method("spawn", force=True)
search_agent = TTA_Strategy(
model=model,
val_image_path=img_prefix,
val_anno_path=ann_file,
log_dir=os.path.join(model_cfg.work_dir, "log"),
worker=6,
nms_thr=0.5
)
# Search for single policies for TTA
single_strategies = search_agent.search_single_strategy(top_num=3)

# Search for Cascade policies for TTA, which based on single policies
cascade_strategies = search_agent.search_cascade_strategy(
single_strategies,
cascade_num=3,
top_num=5
)
return cascade_strategies

def _train(self, job, initial_model):
img_prefix = self.dataset.image_folder_url
ann_file = self.dataset.train_url
checkpoint_path = job.train((img_prefix, ann_file))
return checkpoint_path
Empty file added examples/yaoba/__init__.py
Empty file.
70 changes: 70 additions & 0 deletions examples/yaoba/singletask_learning_boost/benchmarkingjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
benchmarkingjob:
# job name of bechmarking; string type;
name: "benchmarkingjob"
# the url address of job workspace that will reserve the output of tests; string type;
workspace: "examples/yaoba/singletask_learning_boost/workspace"
# the url address of test environment configuration file; string type;
# the file format supports yaml/yml;
testenv: "examples/yaoba/singletask_learning_boost/testenv/testenv.yaml"
# the configuration of test object
test_object:
# test type; string type;
# currently the option of value is "algorithms",the others will be added in succession.
type: "algorithms"
# test algorithm configuration files; list type;
algorithms:
# algorithm name; string type;
- name: "mmlab-model"
# the url address of test algorithm configuration file; string type;
# the file format supports yaml/yml;
url: "examples/yaoba/singletask_learning_boost/testalgorithms/algorithm.yaml"

# the configuration of ranking leaderboard
rank:
# rank leaderboard with metric of test case's evaluation and order ; list type;
# the sorting priority is based on the sequence of metrics in the list from front to back;
sort_by: [ { "map": "descend" } ]

# visualization configuration
visualization:
# mode of visualization in the leaderboard; string type;
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
mode: "selected_only"
# method of visualization for selected dataitems; string type;
# currently the options of value are as follows:
# 1> "print_table": print selected dataitems;
method: "print_table"

# selected dataitem configuration
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
# so that the selected columns will be shown.
selected_dataitem:
# currently the options of value are as follows:
# 1> "all": select all paradigms in the leaderboard;
# 2> paradigms in the leaderboard, e.g., "singletasklearning"
paradigms: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all modules in the leaderboard;
# 2> modules in the leaderboard, e.g., "basemodel"
modules: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all hyperparameters in the leaderboard;
# 2> hyperparameters in the leaderboard, e.g., "momentum"
hyperparameters: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all metrics in the leaderboard;
# 2> metrics in the leaderboard, e.g., "f1_score"
metrics: [ "map" ]

# model of save selected and all dataitems in workspace; string type;
# currently the options of value are as follows:
# 1> "selected_and_all": save selected and all dataitems;
# 2> "selected_only": save selected dataitems;
save_mode: "selected_and_all"






Loading

0 comments on commit 5c48872

Please sign in to comment.