Skip to content
This repository has been archived by the owner on Sep 24, 2020. It is now read-only.

Commit

Permalink
Merge branch 'master' into release/cli-0.10.0rc
Browse files Browse the repository at this point in the history
  • Loading branch information
raubitsj committed Sep 3, 2020
2 parents a6487a8 + c3b0633 commit d7d568a
Show file tree
Hide file tree
Showing 78 changed files with 2,648 additions and 1,036 deletions.
10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ then run:
./tools/setup_dev_environment.sh
```

## Building protocol buffers

We use protocol buffers to communicate from the user process to the wandb backend process.

If you update any of the *.proto files in wandb/proto, you'll need to run:

```
cd wandb/proto && python wandb_internal_codegen.py
```

## Code checks

- Reformat: `tox -e format`
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ test:
test-full:
tox

test-short:
tox -e "codemod,black,mypy,flake8,py36"

format:
tox -e format

clean-pyc: ## remove Python file artifacts
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
Expand Down
9 changes: 4 additions & 5 deletions codemod/remove_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ def leave_FunctionDef(self, original_node: cst.FunctionDef,
def leave_AnnAssign(self, original_node: cst.AnnAssign,
updated_node: cst.AnnAssign):
if updated_node.value is None:
# e.g. `some_var: str`

# these are *only* type declarations and have no runtime behavior,
# so they should be removed entirely:
return cst.RemoveFromParent()
# Annotate assignments so they can be commented out by a second pass
return updated_node.with_changes(
target=cst.Name("__COMMENT__" + original_node.target.value))
# return cst.RemoveFromParent()

return cst.Assign(
targets=[cst.AssignTarget(target=updated_node.target)],
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@ include = '''
| wandb/integration/tensorboard/
| wandb/cli/
| wandb/integration/magic.py
| wandb/integration/keras
| wandb/integration/keras/
| wandb/integration/sagemaker/
| wandb/sync/
'''
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ configparser>=3.8.1
protobuf>=3.12.0
PyYAML
typing; python_version < '3.5'
enum34; python_version < '3.4'
7 changes: 7 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from setuptools import setup


with open('package_readme.md') as readme_file:
readme = readme_file.read()

Expand Down Expand Up @@ -77,3 +78,9 @@
'grpc': grpc_requirements,
}
)

# if os.name == "nt" and sys.version_info >= (3, 6):
# legacy_env_var = "PYTHONLEGACYWINDOWSSTDIO"
# if legacy_env_var not in os.environ:
# if os.system("setx " + legacy_env_var + " 1") != 0:
# raise Exception("Error setting environment variable " + legacy_env_var)
24 changes: 19 additions & 5 deletions standalone_tests/artifact_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
parser.add_argument('--num_writers', type=int, required=True)
parser.add_argument('--files_per_version_min', type=int, required=True)
parser.add_argument('--files_per_version_max', type=int, required=True)
parser.add_argument('--non_overlapping_writers', default=True, action='store_true')

# reader args
parser.add_argument('--num_readers', type=int, required=True)
Expand Down Expand Up @@ -199,8 +200,8 @@ def proc_bucket_garbage_collector(stop_queue, bucket_gc_period_max):
# TODO: implement bucket gc

def main(argv):
print('Load test starting')
args = parser.parse_args()
print('Load test starting')

project_name = args.project
if project_name is None:
Expand All @@ -222,9 +223,11 @@ def main(argv):
# set global entity and project before chdir'ing
from wandb.apis import InternalApi
api = InternalApi()
os.environ['WANDB_ENTITY'] = api.settings('entity')
settings_entity = api.settings('entity')
settings_base_url = api.settings('base_url')
os.environ['WANDB_ENTITY'] = (os.environ.get('LOAD_TEST_ENTITY') or settings_entity)
os.environ['WANDB_PROJECT'] = project_name
os.environ['WANDB_BASE_URL'] = api.settings('base_url')
os.environ['WANDB_BASE_URL'] = (os.environ.get('LOAD_TEST_BASE_URL') or settings_base_url)

# Change dir to avoid litering code directory
pwd = os.getcwd()
Expand All @@ -247,13 +250,17 @@ def main(argv):

# writers
for i in range(args.num_writers):
file_names = source_file_names
if args.non_overlapping_writers:
chunk_size = int(len(source_file_names) / args.num_writers)
file_names = source_file_names[i * chunk_size: (i+1) * chunk_size]
p = multiprocessing.Process(
target=proc_version_writer,
args=(
stop_queue,
stats_queue,
project_name,
source_file_names,
file_names,
artifact_name,
args.files_per_version_min,
args.files_per_version_max))
Expand Down Expand Up @@ -299,6 +306,9 @@ def main(argv):
procs.append(p)

# reset environment
os.environ['WANDB_ENTITY'] = settings_entity
os.environ['WANDB_BASE_URL'] = settings_base_url
os.environ
if env_project is None:
del os.environ['WANDB_PROJECT']
else:
Expand Down Expand Up @@ -364,6 +374,9 @@ def main(argv):

print('Starting verification phase')

os.environ['WANDB_ENTITY'] = (os.environ.get('LOAD_TEST_ENTITY') or settings_entity)
os.environ['WANDB_PROJECT'] = project_name
os.environ['WANDB_BASE_URL'] = (os.environ.get('LOAD_TEST_BASE_URL') or settings_base_url)
data_api = wandb.Api()
# we need list artifacts by walking runs, accessing via
# project.artifactType.artifacts only returns committed artifacts
Expand All @@ -376,5 +389,6 @@ def main(argv):

print('Verification succeeded')


if __name__ == '__main__':
main(sys.argv)
main(sys.argv)
4 changes: 1 addition & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,13 @@ def test_settings(test_dir, mocker):
settings = wandb.Settings(_start_time=time.time(),
base_url="http://localhost",
root_dir=os.getcwd(),
wandb_dir=wandb_dir,
save_code=True,
project="test",
console="off",
host="test",
run_id=wandb.util.generate_id(),
_start_datetime=datetime.datetime.now())
settings.setdefaults()
settings.files_dir = settings._path_convert(settings.files_dir_spec)
yield settings
# Just incase someone forgets to join in tests
if wandb.run is not None:
Expand Down Expand Up @@ -298,7 +296,7 @@ def wandb_init_run(request, runner, mocker, mock_server):
# We want to run setup every time in tests
wandb.wandb_sdk.wandb_setup._WandbSetup._instance = None
mocker.patch('wandb.wandb_sdk.wandb_init.Backend', utils.BackendMock)
run = wandb.init(settings=wandb.Settings(console="off", offline=True, _except_exit=False),
run = wandb.init(settings=wandb.Settings(console="off", mode="offline", _except_exit=False),
**args["wandb_init"])
yield run
wandb.join()
Expand Down
1 change: 0 additions & 1 deletion tests/test_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def meta(test_settings, interface):

@pytest.fixture()
def sm(runner, git_repo, record_q, result_q, test_settings, meta, mock_server, mocked_run, interface):
test_settings.root_dir = os.getcwd()
sm = SendManager(settings=test_settings, record_q=record_q, result_q=result_q, interface=interface)
meta._interface.publish_run(mocked_run)
sm.send(record_q.get())
Expand Down
10 changes: 9 additions & 1 deletion tests/test_public_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_run_from_path(mock_server, api):


def test_run_retry(mock_server, api):
mock_server.set_context("fail_times", 2)
mock_server.set_context("fail_graphql_times", 2)
run = api.run("test/test/test")
assert run.summary_metrics == {"acc": 100, "loss": 0}

Expand Down Expand Up @@ -220,6 +220,14 @@ def test_artifact_get_path(runner, mock_server, api):
assert res == path


def test_artifact_get_path_download(runner, mock_server, api):
with runner.isolated_filesystem():
art = api.artifact("entity/project/mnist:v0", type="dataset")
path = art.get_path("digits.h5").download(os.getcwd())
assert os.path.exists("./digits.h5")
assert path == os.path.join(os.getcwd(), "digits.h5")


def test_artifact_file(runner, mock_server, api):
with runner.isolated_filesystem():
art = api.artifact("entity/project/mnist:v0", type="dataset")
Expand Down
6 changes: 6 additions & 0 deletions tests/utils/mock_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ def _hack_set_run(self, run):

def _communicate(self, rec, timeout=5, local=False):
resp = wandb_internal_pb2.Result()
record_type = rec.WhichOneof("record_type")
if record_type == "request":
req = rec.request
req_type = req.WhichOneof("request_type")
if req_type == "poll_exit":
resp.response.poll_exit_response.done = True
return resp

def _proto_to_dict(self, obj_list):
Expand Down
30 changes: 22 additions & 8 deletions tests/utils/mock_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from flask import Flask, request, g
import os
import sys
from datetime import datetime
from datetime import datetime, timedelta
import json
import yaml
import wandb
Expand All @@ -15,7 +15,8 @@

def default_ctx():
return {
"fail_count": 0,
"fail_graphql_count": 0, # used via "fail_graphql_times"
"fail_storage_count": 0, # used via "fail_storage_times"
"page_count": 0,
"page_times": 2,
"files": {},
Expand Down Expand Up @@ -44,7 +45,7 @@ def mock_server(mocker):
def run(ctx):
if ctx["resume"]:
now = datetime.now()
created_at = now.replace(day=now.day - 1).isoformat()
created_at = (now - timedelta(days=1)).isoformat()
else:
created_at = datetime.now().isoformat()

Expand Down Expand Up @@ -109,6 +110,9 @@ def artifact(ctx, collection_name="mnist"):
"alias": "v%i" % ctx["page_count"],
}
],
"artifactSequence": {
"name": collection_name,
}
}


Expand Down Expand Up @@ -229,9 +233,9 @@ def graphql():
ctx = get_ctx()
test_name = request.headers.get("X-WANDB-USERNAME")
app.logger.info("Test request from: %s", test_name)
if "fail_times" in ctx:
if ctx["fail_count"] < ctx["fail_times"]:
ctx["fail_count"] += 1
if "fail_graphql_times" in ctx:
if ctx["fail_graphql_count"] < ctx["fail_graphql_times"]:
ctx["fail_graphql_count"] += 1
return json.dumps({"errors": ["Server down"]}), 500
body = request.get_json()
if body["variables"].get("files"):
Expand Down Expand Up @@ -268,7 +272,7 @@ def graphql():
"historyLineCount": 15,
"eventsLineCount": 0,
"historyTail": hist_tail,
"eventsTail": '["{\\"_runtime\\": 70}"}"]',
"eventsTail": '["{\\"_runtime\\": 70}"]',
}
}
}
Expand Down Expand Up @@ -442,7 +446,11 @@ def graphql():
if "mutation CreateArtifact(" in body["query"]:
collection_name = body["variables"]["artifactCollectionNames"][0]
return {
"data": {"createArtifact": {"artifact": artifact(ctx, collection_name)}}
"data": {
"createArtifact": {
"artifact": artifact(ctx, collection_name)
}
}
}
if "mutation UseArtifact(" in body["query"]:
return {"data": {"useArtifact": {"artifact": artifact(ctx)}}}
Expand Down Expand Up @@ -545,7 +553,13 @@ def graphql():
@app.route("/storage", methods=["PUT", "GET"])
def storage():
ctx = get_ctx()
if "fail_storage_times" in ctx:
if ctx["fail_storage_count"] < ctx["fail_storage_times"]:
ctx["fail_storage_count"] += 1
return json.dumps({"errors": ["Server down"]}), 500
file = request.args.get("file")
ctx["storage"] = ctx.get("storage", [])
ctx["storage"].append(request.args.get("file"))
size = ctx["files"].get(request.args.get("file"))
if request.method == "GET" and size:
return os.urandom(size), 200
Expand Down
52 changes: 49 additions & 3 deletions tests/wandb_artifacts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,33 @@ def bucket(self, bucket):
return mock


def mock_http(artifact, path=False, headers={}):
class Response(object):
def __init__(self, headers):
self.headers = headers

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
pass

def raise_for_status(self):
pass

class Session(object):
def __init__(self, name='file1.txt', headers=headers):
self.headers = headers

def get(self, path, *args, **kwargs):
return Response(self.headers)


mock = Session()
handler = artifact._storage_policy._handler._handlers["http"]
handler._session = mock
return mock

def test_add_one_file(runner):
with runner.isolated_filesystem():
with open('file1.txt', 'w') as f:
Expand Down Expand Up @@ -279,6 +306,25 @@ def test_add_gs_reference_path(runner, mocker, capsys):
_, err = capsys.readouterr()
assert "Generating checksum" in err

def test_add_http_reference_path(runner):
with runner.isolated_filesystem():
artifact = wandb.Artifact(type='dataset', name='my-arty')
mock_http(artifact, headers={
'ETag': '"abc"',
'Content-Length': "256",
})
artifact.add_reference("http://example.com/file1.txt")

assert artifact.digest == '48237ccc050a88af9dcd869dd5a7e9f4'
manifest = artifact.manifest.to_manifest_json()
assert manifest['contents']['file1.txt'] == {
'digest': 'abc',
'ref': 'http://example.com/file1.txt',
'size': 256,
'extra': {
'etag': '"abc"',
},
}

def test_add_reference_named_local_file(runner):
with runner.isolated_filesystem():
Expand All @@ -295,10 +341,10 @@ def test_add_reference_named_local_file(runner):
def test_add_reference_unknown_handler(runner):
with runner.isolated_filesystem():
artifact = wandb.Artifact(type='dataset', name='my-arty')
artifact.add_reference('http://example.com/somefile.txt', name='ref')
artifact.add_reference('ref://example.com/somefile.txt', name='ref')

assert artifact.digest == '5b8876252f3ca922c164de380089c9ae'
assert artifact.digest == '410ade94865e89ebe1f593f4379ac228'

manifest = artifact.manifest.to_manifest_json()
assert manifest['contents']['ref'] == {
'digest': 'http://example.com/somefile.txt', 'ref': 'http://example.com/somefile.txt'}
'digest': 'ref://example.com/somefile.txt', 'ref': 'ref://example.com/somefile.txt'}
Loading

0 comments on commit d7d568a

Please sign in to comment.