Skip to content

Commit

Permalink
cleaned up BERT pruner
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh committed Aug 18, 2023
1 parent d467231 commit 67ea99b
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 37 deletions.
4 changes: 2 additions & 2 deletions cm-mlops/script/get-git-repo/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def preprocess(i):
meta = i['meta']

env_key = get_env_key(env)

if 'CM_GIT_REPO_NAME' not in env:
update_env(env, 'CM_GIT_REPO{}_NAME', env_key, os.path.basename(env['CM_GIT_URL']))

Expand Down Expand Up @@ -44,7 +44,7 @@ def postprocess(i):
git_checkout_path = env['CM_GIT_CHECKOUT_PATH']

env_key = get_env_key(env)

# We remap CM_GIT variables with CM_GIT_REPO prefix so that they don't contaminate the env of the parent script
update_env(env, 'CM_GIT_REPO{}_CHECKOUT_PATH', env_key, env['CM_GIT_CHECKOUT_PATH'])
update_env(env, 'CM_GIT_REPO{}_URL', env_key, env['CM_GIT_URL'])
Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/script/get-ml-model-huggingface-zoo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
},
"prune":{
"env":{
" CM_MODEL_TASK": "prune"
"CM_MODEL_TASK": "prune"
}
}
}
Expand Down
19 changes: 10 additions & 9 deletions cm-mlops/script/get-ml-model-huggingface-zoo/download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
model_task = os.environ.get('CM_MODEL_TASK', '')

if model_task == "prune":
print("Downloading model: "+model_stub)
downloaded_model_path = hf_hub_download(repo_id=model_stub,
filename="pytorch_model.bin",
cache_dir=os.getcwd())
downloaded_model_path = hf_hub_download(repo_id=model_stub,
filename="config.json",
cache_dir=os.getcwd())
with open('tmp-run-env.out', 'w') as f:
f.write(f"CM_ML_MODEL_FILE_WITH_PATH={os.path.join(os.getcwd(),'')}")
print("Downloading model: " + model_stub)

for filename in ["pytorch_model.bin", "config.json"]:

downloaded_model_path = hf_hub_download(repo_id=model_stub,
filename=filename,
cache_dir=os.getcwd())

with open('tmp-run-env.out', 'w') as f:
f.write(f"CM_ML_MODEL_FILE_WITH_PATH={os.path.join(os.getcwd(),'')}")

else:
model_filename = os.environ.get('CM_MODEL_ZOO_FILENAME', '')
Expand Down
4 changes: 4 additions & 0 deletions cm-mlops/script/get-ml-model-neuralmagic-zoo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
"tags": "get,python3",
"names": [ "python3", "python" ]
},
{
"tags": "get,generic-python-lib,_package.protobuf",
"version_max": "3.20.1"
},
{
"tags": "get,generic-python-lib,_sparsezoo"
}
Expand Down
Empty file.
53 changes: 34 additions & 19 deletions cm-mlops/script/prune-bert-models/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
"automation_uid": "5b4e0237da074764",
"category": "Modular ML/AI applications",
"category_sort": 10000,
"default_env": {
"CM_BERT_PRUNE_TASK":"squad",
"CM_BERT_PRUNE_MODEL_NAME":"bert-large-uncased",
"CM_MODEL_ZOO_STUB":"bert-large-uncased",
"CM_BERT_PRUNE_CONSTRAINT": "0.5"
},
"input_mapping": {
"constraint": "CM_BERT_PRUNE_CONSTRAINT",
"output_dir": "CM_BERT_PRUNE_OUTPUT_DIR"
},
"deps": [
{
"tags": "get,python3"
Expand All @@ -28,46 +38,51 @@
},
{
"tags": "get,generic-python-lib,_transformers"
},
},
{
"tags": "get,generic-python-lib,_scikit-learn"
},
{
"tags": "get,git,repo,_repo.https://github.com/anandhu-eng/retraining-free-pruning"
"tags": "get,git,repo,_repo.https://github.com/cknowledge/retraining-free-pruning",
"env": {
"CM_GIT_ENV_KEY":"BERT_PRUNER_NEURIPS_2022"
}
},
{
"names": [
"get-model"
"get-model"
],
"tags": "get, ml-model, model, zoo, model-zoo, huggingface, _prune"
"tags": "get,ml-model,model,zoo,model-zoo,huggingface,_prune"
}
],
"tags": [
"prune",
"bert-models",
"bert-prune",
"prune-bert-models"
],
"uid": "76182d4896414216",
"variations":{
"path.#":{
"env":{
"CM_UNPRUNED_MODEL_PATH":"#"
}
"env":{
"CM_BERT_PRUNE_CKPT_PATH":"#"
}
},
"task.#":{
"env":{
"CM_PRUNE_TASK":"#"
}
"env":{
"CM_BERT_PRUNE_TASK":"#"
}
},
"model-name.#":{
"adr":{
"get-model":{
"tags":"_model-stub.#"
}
},
"env":{
"CM_PRUNE_MODEL_NAME":"#"
}
"model.#":{
"adr":{
"get-model":{
"tags":"_model-stub.#"
}
},
"env":{
"CM_BERT_PRUNE_MODEL_NAME":"#",
"CM_MODEL_ZOO_STUB":"#"
}
}
}
}
38 changes: 32 additions & 6 deletions cm-mlops/script/prune-bert-models/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,38 @@ def preprocess(i):
os_info = i['os_info']

env = i['env']
env['BERT_PRUNE_REPO_PATH'] = env['CM_GIT_CHECKOUT_PATH']
print("Pruning repo path:"+env['BERT_PRUNE_REPO_PATH'])
env['CM_UNPRUNED_MODEL_PATH']=env['CM_ML_MODEL_FILE_WITH_PATH']+"models--bert-large-uncased/snapshots/80792f8e8216b29f3c846b653a0ff0a37c210431"
out_dir="/home/ubuntu/prune_model/out"
cmd = "python3 "+env['BERT_PRUNE_REPO_PATH']+"/main.py --model_name " + env['CM_PRUNE_MODEL_NAME'] + " --task_name " + env['CM_PRUNE_TASK'] + " --ckpt_dir "+env['CM_UNPRUNED_MODEL_PATH']+" --constraint 0.5 --output_dir "+out_dir
os.system(cmd)

ckpt_path = env.get('CM_BERT_PRUNE_CKPT_PATH','')
if ckpt_path == '':
p = env['CM_ML_MODEL_FILE_WITH_PATH']
x = os.listdir(p)
for y in x:
if y.startswith('models--'):
z = os.path.join(p,y)
if os.path.isdir(z):
z1 = os.path.join(z, 'snapshots')
if os.path.isdir(z1):
z2 = os.listdir(z1)
if len(z2)>0:
ckpt_path=os.path.join(z1, z2[0])

env['CM_BERT_PRUNE_CKPT_PATH'] = ckpt_path

out_dir=env.get('CM_BERT_PRUNE_OUTPUT_DIR','')
if out_dir == '':
out_dir = os.path.join(os.getcwd(), 'pruned-model-output')
env['CM_BERT_PRUNE_OUTPUT_DIR'] = out_dir

print ('')
print ('Local CM cache path to the updated BERT pruner src from NeurIPS 2022: ' + env['CM_GIT_REPO_BERT_PRUNER_NEURIPS_2022_CHECKOUT_PATH'])

print ('')
for k in ["CM_ML_MODEL_FILE_WITH_PATH", "CM_BERT_PRUNE_CKPT_PATH", "CM_BERT_PRUNE_OUTPUT_DIR"]:
print ('ENV["{}"]: {}'.format(k, env[k]))

print ('')
input ('xyz')

return {'return': 0}

def postprocess(i):
Expand Down
19 changes: 19 additions & 0 deletions cm-mlops/script/prune-bert-models/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

echo "===================================================================="
echo "Start pruning ..."
echo ""

CM_TMP_CURRENT_SCRIPT_PATH=${CM_TMP_CURRENT_SCRIPT_PATH:-$PWD}

time ${CM_PYTHON_BIN_WITH_PATH} \
${CM_GIT_REPO_BERT_PRUNER_NEURIPS_2022_CHECKOUT_PATH}/main.py \
--model_name ${CM_BERT_PRUNE_MODEL_NAME} \
--task_name ${CM_BERT_PRUNE_TASK} \
--ckpt_dir ${CM_BERT_PRUNE_CKPT_PATH} \
--constraint ${CM_BERT_PRUNE_CONSTRAINT} \
--output_dir ${CM_BERT_PRUNE_OUTPUT_DIR}

test $? -eq 0 || exit $?

echo "===================================================================="

0 comments on commit 67ea99b

Please sign in to comment.