-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
finished gcp upload download and background check (#125)
- Loading branch information
1 parent
c871c64
commit 31694c4
Showing
8 changed files
with
168 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,16 @@ | ||
# redis | ||
redis_om_url: redis://:password@server_name:port_num | ||
redis_data_tag: gpt-4_gpt-3.5-turbo_v0.0.1_clean | ||
|
||
# training | ||
num_gpus: 1 | ||
model_name_or_path: meta-llama/Llama-2-13b-hf | ||
hf_auth_token: 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 | ||
|
||
wandb_project: self-train | ||
wandb_tags: "['llama-2-13b-hf']" | ||
wandb_token: 99caa13ec9552adf0e92e5c30021307ce3cf7fa4 | ||
wandb_token: 99caa13ec9552adf0e92e5c30021307ce3cf7fa4 | ||
|
||
#gcp | ||
oauth2_token_location: ./resources/auth_token.key | ||
bucket_name: pipeline-test-storage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from pipelines.gcp_util import upload_to_gcp, download_from_gcp, monitor_and_upload | ||
|
||
object_location = '/Users/zhengyangqi/Desktop/template-demo.txt' # Replace with your file path | ||
oauth2_token_location = './resources/auth_token.key' # Replace with your OAuth2 token | ||
content_type = 'application/json; charset=utf-8' # Replace with the content type of your object | ||
bucket_name = 'pipeline-test-storage' # Replace with your bucket name | ||
object_name = 'test/test.txt' # Replace with your object name | ||
save_to_location = './test.txt' | ||
|
||
# Call the upload function | ||
response = upload_to_gcp(object_name, object_location, oauth2_token_location, bucket_name, content_type) | ||
print(response.text) | ||
response = download_from_gcp(object_name, save_to_location, oauth2_token_location, bucket_name) | ||
print(response.text) | ||
|
||
# monitor_and_upload('./test_cache', 5, oauth2_token_location, bucket_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
import os | ||
from pipelines.pull_data import preprocess_episodes_with_tag | ||
from pipelines.run_train import run | ||
from pipelines.run_train import run_sft | ||
|
||
|
||
def main(): | ||
if not os.path.exists("../llm_rl/data/sotopia_custom_training_sft.json"): | ||
preprocess_episodes_with_tag() | ||
run() | ||
run_sft() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from pipelines import config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
from pipelines import config | ||
|
||
import os | ||
import requests | ||
import zipfile | ||
|
||
import os | ||
import time | ||
|
||
def monitor_and_upload(directory_to_monitor, check_interval=60, oauth2_token_location=config['oauth2_token_location'], bucket_name=config['bucket_name']): | ||
''' | ||
Monitors a directory and uploads new subdirectories to GCP | ||
Parameters: | ||
directory_to_monitor (str): Directory to monitor for new subdirectories | ||
gcp_bucket_name (str): GCP bucket name for uploading | ||
oauth2_token_location (str): The location of the OAuth2 token | ||
check_interval (int): Time interval (in seconds) to check for new subdirectories | ||
''' | ||
already_uploaded = set() | ||
|
||
while True: | ||
try: | ||
current_subdirectories = {d for d in os.listdir(directory_to_monitor) if os.path.isdir(os.path.join(directory_to_monitor, d))} | ||
new_subdirectories = current_subdirectories - already_uploaded | ||
|
||
if not new_subdirectories: | ||
print(f"No new subdirectories found. Checking again in {check_interval} seconds...") | ||
time.sleep(check_interval) | ||
|
||
for subdir in new_subdirectories: | ||
subdir_path = os.path.join(directory_to_monitor, subdir) | ||
zip_name = f"{subdir}.zip" | ||
|
||
print(f"Zipping {subdir}...") | ||
zip_directory(subdir_path, zip_name) | ||
print(f"Zipped {subdir}.") | ||
|
||
print(f"Uploading {zip_name} to GCP...") | ||
response = upload_to_gcp(zip_name, zip_name) | ||
already_uploaded.add(subdir) | ||
print(f"Uploaded {subdir} to GCP.") | ||
print(response.text) | ||
|
||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
|
||
|
||
def upload_to_gcp(object_name, object_location, oauth2_token_location=config['oauth2_token_location'], bucket_name=config['bucket_name'], content_type="application/json; charset=utf-8"): | ||
''' | ||
Uploads a file to a bucket in Google Cloud Storage | ||
Parameters: | ||
object_location (str): The location of the file to upload | ||
oauth2_token_location (str): The location of the OAuth2 token | ||
content_type (str): The content type of the object | ||
bucket_name (str): The name of the bucket to upload to | ||
object_name (str): The name of the object to upload | ||
''' | ||
url = f'https://storage.googleapis.com/upload/storage/v1/b/{bucket_name}/o?uploadType=media&name={object_name}' | ||
with open(oauth2_token_location, 'r') as file: | ||
oauth2_token = str(file.read()) | ||
|
||
headers = { | ||
'Authorization': f'Bearer {oauth2_token}', | ||
'Content-Type': content_type | ||
} | ||
|
||
with open(object_location, 'rb') as file: | ||
data = file.read() | ||
|
||
response = requests.post(url, headers=headers, data=data) | ||
return response | ||
|
||
|
||
def download_from_gcp(object_name, save_to_location, oauth2_token_location=config['oauth2_token_location'], bucket_name=config['bucket_name']): | ||
''' | ||
Downloads a file from a bucket in Google Cloud Storage | ||
Parameters: | ||
oauth2_token_location (str): The location of the OAuth2 token | ||
bucket_name (str): The name of the bucket to download from | ||
object_name (str): The name of the object to download | ||
save_to_location (str): The location to save the file to | ||
''' | ||
if '/' in object_name: | ||
object_name = object_name.replace('/', '%2F') | ||
|
||
url = f'https://storage.googleapis.com/storage/v1/b/{bucket_name}/o/{object_name}?alt=media' | ||
|
||
with open(oauth2_token_location, 'r') as file: | ||
oauth2_token = str(file.read()) | ||
|
||
headers = { | ||
'Authorization': f'Bearer {oauth2_token}' | ||
} | ||
|
||
response = requests.get(url, headers=headers) | ||
with open(save_to_location, 'wb') as file: | ||
file.write(response.content) | ||
return response | ||
|
||
def zip_directory(folder_path, zip_name): | ||
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf: | ||
for root, dirs, files in os.walk(folder_path): | ||
for file in files: | ||
# Create a relative path for files to keep the directory structure | ||
relative_path = os.path.relpath(os.path.join(root, file), os.path.dirname(folder_path)) | ||
zipf.write(os.path.join(root, file), arcname=relative_path) | ||
|
||
|
||
def unzip_directory(zip_name, folder_path): | ||
with zipfile.ZipFile(zip_name, 'r') as zipf: | ||
zipf.extractall(folder_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
llm_self_train/train_args.yml → llm_self_train/resources/train_args.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
input_ids: | ||
[1, 518, 25580, 29962, 9705, 415, 1156, 15998, 29901, 13, 1888, 22094, 366, 526, 435, 1165, 265, 1588, 296, 625, 29892, 596, 3414, 338, 304, 1044, 29914, 5965, 557, 408, 435, 1165, 265, 1588, 296, 625, 723, 29892, 12515, 297, 3458, 435, 1165, 265, 1588, 296, 625, 29915, 29879, 5264, 7306, 29889, 13, 3492, 508, 1284, 435, 1165, 265, 1588, 296, 625, 29915, 29879, 3239, 322, 7306, 297, 278, 525, 10605, 338, 278, 3030, 310, 278, 14881, 29915, 1746, 29889, 13, 9842, 393, 435, 1165, 265, 1588, 296, 625, 29915, 29879, 7035, 322, 7306, 338, 871, 7962, 304, 366, 29889, 13, 3492, 881, 1018, 596, 1900, 304, 6176, 435, 1165, 265, 1588, 296, 625, 29915, 29879, 7306, 297, 263, 982, 393, 7595, 411, 1009, 2931, 1020, 1169, 29889, 13, 2528, 17658, 29892, 7344, 292, 278, 14983, 29915, 29879, 5613, 2264, 322, 1855, 1608, 338, 18853, 313, 29872, 29889, 29887, 1696, 437, 451, 12312, 825, 916, 2305, 756, 2307, 1497, 1434, 467, 13, 13, 10605, 338, 278, 3030, 310, 445, 14881, 29901, 13, 4421, 24893, 29901, 1281, 874, 362, 5622, 2058, 297, 385, 17568, 1373, 537, 1741, 1546, 1023, 472, 841, 311, 267, 29889, 13, 7439, 12654, 1934, 29901, 25661, 2994, 371, 29920, 322, 435, 1165, 265, 1588, 296, 625, 13, 29934, 2142, 4271, 2994, 371, 29920, 29915, 29879, 3239, 29901, 25661, 2994, 371, 29920, 338, 263, 29871, 29946, 29945, 29899, 6360, 29899, 1025, 14263, 1190, 25339, 29889, 940, 29914, 26994, 11504, 1309, 29879, 29889, 25661, 2994, 371, 29920, 29892, 263, 2071, 24455, 1190, 25339, 29892, 1914, 29879, 385, 21210, 573, 4333, 310, 325, 524, 482, 16083, 26959, 4427, 423, 29889, 7133, 670, 454, 275, 545, 931, 29892, 540, 11418, 952, 8743, 521, 404, 322, 6493, 292, 15839, 2982, 22848, 29889, 29871, 13, 29967, 1165, 265, 1588, 296, 625, 29915, 29879, 3239, 29901, 435, 1165, 265, 1588, 296, 625, 338, 263, 29871, 29946, 29900, 29899, 6360, 29899, 1025, 14263, 7405, 1230, 24452, 29889, 940, 29914, 26994, 11504, 1309, 29879, 29889, 435, 1165, 265, 1588, 296, 625, 29892, 385, 7405, 1230, 24452, 29892, 338, 2998, 363, 670, 1506, 3873, 16812, 3460, 21396, 4688, 297, 278, 7250, 322, 670, 6136, 27577, 297, 7881, 3168, 29889, 5196, 2877, 322, 1819, 6139, 29901, 435, 1165, 265, 1588, 296, 625, 29892, 714, 17696, 3447, 4589, 2454, 29892, 19310, 7093, 27012, 882, 29892, 3081, 29892, 322, 6993, 29889, 3600, 1602, 12112, 24500, 1546, 4996, 7744, 800, 304, 18845, 322, 16010, 18987, 29889, 435, 1165, 265, 29915, 29879, 22183, 1372, 29901, 10629, 2200, 630, 10757, 304, 5528, 5632, 16976, 263, 1034, 6685, 14099, 13, 29934, 2142, 4271, 2994, 371, 29920, 29915, 29879, 7306, 29901, 853, 5203, 13, 29967, 1165, 265, 1588, 296, 625, 29915, 29879, 7306, 29901, 3872, 403, 541, 7344, 18161, 25806, 313, 18126, 2472, 29901, 366, 526, 17762, 304, 1016, 403, 541, 366, 505, 263, 19932, 23562, 29892, 395, 29896, 263, 2462, 1795, 367, 1886, 1022, 363, 596, 1857, 18161, 6434, 29889, 379, 524, 29901, 366, 1795, 864, 304, 27214, 403, 263, 5224, 14218, 5253, 470, 4368, 263, 697, 29899, 2230, 1016, 362, 1846, 13, 1168, 874, 362, 624, 5708, 29901, 13, 13, 27407, 396, 29900, 29901, 25661, 2994, 371, 29920, 1497, 29901, 376, 18420, 11005, 29892, 435, 1165, 265, 29991, 23974, 873, 11005, 29892, 3508, 29915, 29873, 372, 29973, 306, 1074, 1438, 1373, 537, 4959, 6963, 714, 278, 1900, 297, 2305, 29889, 5013, 5086, 310, 29892, 306, 29915, 345, 1063, 9701, 411, 445, 29811, 1821, 1373, 537, 393, 1736, 7113, 8343, 292, 4344, 297, 817, 29889, 3387, 263, 11232, 279, 263, 2462, 508, 3867, 263, 592, 284, 363, 263, 2278, 363, 263, 3353, 4098, 29889, 1815, 306, 2302, 373, 596, 2304, 3026, 13, 29889, 13, 3492, 526, 472, 9603, 396, 29896, 29889, 518, 29914, 25580, 29962, 11117, 2467, 29918, 1853, 2396, 525, 5965, 557, 742, 525, 23516, 2396, 376, 18420, 11005, 29892, 25661, 29991, 910, 338, 6200, 263, 24717, 4556, 29892, 322, 306, 11188, 596, 8856, 362, 29889, 306, 29915, 29885, 8852, 297, 20382, 29892, 541, 306, 29915, 645, 367, 15993, 29892, 590, 23562, 338, 19932, 1438, 3841, 29889, 1128, 1048, 445, 29973, 306, 508, 5957, 263, 697, 29899, 2230, 11896, 393, 23994, 2629, 590, 18161, 2794, 29889, 10878, 393, 367, 22691, 3026, 29913, 2] | ||
inputs: | ||
<s> [INST] Prompt after formatting: | ||
Imagine you are Jaxon Prentice, your task is to act/speak as Jaxon Prentice would, keeping in mind Jaxon Prentice's social goal. | ||
You can find Jaxon Prentice's background and goal in the 'Here is the context of the interaction' field. | ||
Note that Jaxon Prentice's secret and goal is only visible to you. | ||
You should try your best to achieve Jaxon Prentice's goal in a way that align with their character traits. | ||
Additionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before). | ||
|
||
Here is the context of this interaction: | ||
Scenario: Conversation taking place in an annual charity event between two attendees. | ||
Participants: Rafael Cortez and Jaxon Prentice | ||
Rafael Cortez's background: Rafael Cortez is a 45-year-old male surgeon. He/him pronouns. Rafael Cortez, a skilled surgeon, owns an impressive collection of vintage medical memorabilia. During his leisure time, he enjoys playing chess and visiting historical landmarks. | ||
Jaxon Prentice's background: Jaxon Prentice is a 40-year-old male investigative journalist. He/him pronouns. Jaxon Prentice, an investigative journalist, is known for his brisk jogging sessions early in the morning and his active participation in community functions. Personality and values description: Jaxon Prentice, outgoing yet erratic, emphasizes achievement, power, and security. His decisions swing between quick adaptations to situations and careful planning. Jaxon's secrets: Fabricated evidence to incriminate a corrupt politician | ||
Rafael Cortez's goal: Unknown | ||
Jaxon Prentice's goal: Donate but maintain financial stability (Extra information: you are willing to donate but you have a tight budget, $1 a day might be steep for your current financial situation. Hint: you might want to negotiate a lower daily amount or suggest a one-time donation.) | ||
Conversation Starts: | ||
|
||
Turn #0: Rafael Cortez said: "Good evening, Jaxon! Lovely evening, isn't it? I see these charity events bring out the best in people. Speaking of, I've been involved with this incredible charity that works towards feeding children in need. Just a dollar a day can provide a meal for a child for a whole month. Can I count on your support?" | ||
. | ||
You are at Turn #1. [/INST] {'action_type': 'speak', 'argument': "Good evening, Rafael! This is indeed a worthy cause, and I appreciate your dedication. I'm interested in supporting, but I'll be honest, my budget is tight these days. How about this? I can offer a one-time contribution that fits within my financial means. Would that be acceptable?"}</s> | ||
label_ids: | ||
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 11117, 2467, 29918, 1853, 2396, 525, 5965, 557, 742, 525, 23516, 2396, 376, 18420, 11005, 29892, 25661, 29991, 910, 338, 6200, 263, 24717, 4556, 29892, 322, 306, 11188, 596, 8856, 362, 29889, 306, 29915, 29885, 8852, 297, 20382, 29892, 541, 306, 29915, 645, 367, 15993, 29892, 590, 23562, 338, 19932, 1438, 3841, 29889, 1128, 1048, 445, 29973, 306, 508, 5957, 263, 697, 29899, 2230, 11896, 393, 23994, 2629, 590, 18161, 2794, 29889, 10878, 393, 367, 22691, 3026, 29913, 2] | ||
labels: | ||
{'action_type': 'speak', 'argument': "Good evening, Rafael! This is indeed a worthy cause, and I appreciate your dedication. I'm interested in supporting, but I'll be honest, my budget is tight these days. How about this? I can offer a one-time contribution that fits within my financial means. Would that be acceptable?"}</s> |