Skip to content

Commit

Permalink
Configure full-text search, closes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Mar 21, 2020
1 parent 25d8578 commit 9e12f24
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
27 changes: 27 additions & 0 deletions hacker_news_to_sqlite/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def cli():
def user(db_path, username):
"Fetch all content submitted by this user"
db = sqlite_utils.Database(db_path)
ensure_tables(db)
user = requests.get(
"https://hacker-news.firebaseio.com/v0/user/{}.json".format(username)
).json()
Expand Down Expand Up @@ -48,6 +49,7 @@ def user(db_path, username):
db["items"].upsert(
item, column_order=("id", "type", "by", "time"), pk="id", alter=True
)
ensure_fts(db)


@cli.command()
Expand All @@ -60,6 +62,7 @@ def user(db_path, username):
def trees(db_path, item_ids):
"Retrieve all content from the trees of which any item_id is a member"
db = sqlite_utils.Database(db_path)
ensure_tables(db)
to_fetch = set(item_ids)
done_count = 0
while to_fetch:
Expand Down Expand Up @@ -105,3 +108,27 @@ def trees(db_path, item_ids):
db["items"].add_foreign_key("parent", "items", "id")
except sqlite_utils.db.AlterError:
pass # Foreign key already exists
ensure_fts(db)


def ensure_tables(db):
# Create tables manually, because if we create them automatically
# we may create items without 'title' first, which breaks
# when we later call ensure_fts()
if "items" not in db.table_names():
db["items"].create(
{"id": int, "type": str, "by": str, "time": int, "title": str, "text": str},
pk="id",
)
if "users" not in db.table_names():
db["users"].create(
{"id": str, "created": int, "karma": int, "about": str}, pk="id"
)


def ensure_fts(db):
table_names = set(db.table_names())
if "items" in table_names and "items_fts" not in table_names:
db["items"].enable_fts(["title", "text"], create_triggers=True)
if "users" in table_names and "users_fts" not in table_names:
db["users"].enable_fts(["id", "about"], create_triggers=True)
3 changes: 2 additions & 1 deletion tests/test_hacker_news_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_import_user(tmpdir, requests_mock):
result = CliRunner().invoke(cli.cli, ["user", db_path, "simonw"])
assert not result.exception, result.exception
db = sqlite_utils.Database(db_path)
assert {"users", "items"} == set(db.table_names())
assert {"users", "items", "users_fts", "items_fts",}.issubset(db.table_names())
users = list(db["users"].rows)
items = list(db["items"].rows)
assert [
Expand All @@ -68,5 +68,6 @@ def test_import_user(tmpdir, requests_mock):
"kids": "[22491039, 22490633, 22491277, 22492319, 22490883, 22491996, 22502812, 22491049, 22491052, 22491001, 22490704]",
"parent": 22485489,
"text": "The approach that has worked best for me is...",
"title": None,
}
] == items

0 comments on commit 9e12f24

Please sign in to comment.