Skip to content

Commit

Permalink
Merge pull request #26 from edsu/response-status
Browse files Browse the repository at this point in the history
Model response status
  • Loading branch information
Florents-Tselai committed Nov 2, 2023
2 parents 43866ef + b3e75b9 commit 3acb426
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tests/test_warcdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,15 @@ def test_http_header():
"value": "Wget/1.21.3",
"warc_record_id": "<urn:uuid:6E9096E2-5D54-4CD6-A157-1DE4A7040DEB>",
} in req_headers


def test_http_header():
runner = CliRunner()
runner.invoke(
warcdb_cli, ["import", db_file, str(pathlib.Path("tests/google.warc"))]
)
db = sqlite_utils.Database(db_file)
responses = db["response"].rows
assert next(responses)["http_status"] == 301
assert next(responses)["http_status"] == 302
assert next(responses)["http_status"] == 200
2 changes: 2 additions & 0 deletions warcdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ def __iadd__(self, r: ArcWarcRecord):
)

elif r.rec_type == "response":
if r.http_headers:
record_dict["http_status"] = r.http_headers.get_statuscode()
self.db.table("response").insert(
record_dict,
pk="warc_record_id",
Expand Down
5 changes: 5 additions & 0 deletions warcdb/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,8 @@ def m002_headers(db):
FROM response, JSON_EACH(response.http_headers) AS header
""",
)


@migration()
def m003_status(db):
db["response"].add_column("http_status", int)

0 comments on commit 3acb426

Please sign in to comment.