Skip to content

Commit

Permalink
Model response status
Browse files Browse the repository at this point in the history
Since the HTTP Response status code isn't in the headers dictionary it
should be modeled separately.

Fixes #24
  • Loading branch information
edsu committed Oct 31, 2023
1 parent 43866ef commit b3e75b9
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tests/test_warcdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,15 @@ def test_http_header():
"value": "Wget/1.21.3",
"warc_record_id": "<urn:uuid:6E9096E2-5D54-4CD6-A157-1DE4A7040DEB>",
} in req_headers


def test_http_header():
runner = CliRunner()
runner.invoke(
warcdb_cli, ["import", db_file, str(pathlib.Path("tests/google.warc"))]
)
db = sqlite_utils.Database(db_file)
responses = db["response"].rows
assert next(responses)["http_status"] == 301
assert next(responses)["http_status"] == 302
assert next(responses)["http_status"] == 200
2 changes: 2 additions & 0 deletions warcdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ def __iadd__(self, r: ArcWarcRecord):
)

elif r.rec_type == "response":
if r.http_headers:
record_dict["http_status"] = r.http_headers.get_statuscode()
self.db.table("response").insert(
record_dict,
pk="warc_record_id",
Expand Down
5 changes: 5 additions & 0 deletions warcdb/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,8 @@ def m002_headers(db):
FROM response, JSON_EACH(response.http_headers) AS header
""",
)


@migration()
def m003_status(db):
db["response"].add_column("http_status", int)

0 comments on commit b3e75b9

Please sign in to comment.