Skip to content
This repository has been archived by the owner on Sep 19, 2024. It is now read-only.

Commit

Permalink
qatar demo
Browse files Browse the repository at this point in the history
  • Loading branch information
dudizimber committed Jul 17, 2024
1 parent d3f3430 commit d7cf0bb
Show file tree
Hide file tree
Showing 41 changed files with 51,668 additions and 49 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified falkordb_gemini_kg/.DS_Store
Binary file not shown.
Binary file added falkordb_gemini_kg/demos/.DS_Store
Binary file not shown.
Binary file added falkordb_gemini_kg/demos/qatar_qa/.DS_Store
Binary file not shown.
244 changes: 244 additions & 0 deletions falkordb_gemini_kg/demos/qatar_qa/clean_data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"with open(\"data/cleaned_qr_baggage_v7.jsonl\") as f:\n",
" data = f.read().splitlines()\n",
"\n",
"data = [json.loads(x) for x in data]\n",
"\n",
"categories = {\n",
" \"children\": [\n",
" \"child\",\n",
" \"children\",\n",
" \"kid\",\n",
" \"kids\",\n",
" \"baby\",\n",
" \"toddler\",\n",
" \"infant\",\n",
" \"teen\",\n",
" \"teenager\",\n",
" \"young\",\n",
" ],\n",
" \"special_items\": [\n",
" \"special\",\n",
" \"item\",\n",
" \"fragile\",\n",
" \"perishable\",\n",
" \"valuable\",\n",
" \"jewelry\",\n",
" \"electronics\",\n",
" \"fragile\",\n",
" \"fragileitem\",\n",
" \"fragile item\",\n",
" \"fragile-item\",\n",
" \"wheelchair\",\n",
" \" sport\",\n",
" \"hiking\",\n",
" \"instrument\",\n",
" \"equipment\",\n",
" \"lighter\",\n",
" \"matches\",\n",
" \"medical\",\n",
" \"device\",\n",
" \"batteries\",\n",
" \"battery\",\n",
" \"chemical\",\n",
" \"specimen\",\n",
" \"fuel\",\n",
" \"gas\",\n",
" \"cartridge\",\n",
" \"gun\",\n",
" \"dangerous\",\n",
" \"alcohol\",\n",
" \"kayak\",\n",
" \"surf\",\n",
" \"guitar\",\n",
" \"tires\",\n",
" \"bicycle\",\n",
" \"buoyancy\",\n",
" \"diving\",\n",
" \"cigarette\",\n",
" \"thermometer\",\n",
" \"drone\",\n",
" \"peds\",\n",
" \"powder\",\n",
" \"dry ice\",\n",
" \"machinery\",\n",
" \"cylinder\",\n",
" \"aerosol\",\n",
" \"firearm\",\n",
" \"ammunition\",\n",
" \"containers\",\n",
" \"bottles\",\n",
" \"stroller\",\n",
" \"stun\",\n",
" \"casket\",\n",
" \"remains\",\n",
" \"meat\",\n",
" \"fishing\",\n",
" \"medication\",\n",
" \"liquid\"\n",
" ],\n",
" \"animals\": [\n",
" \"animal\",\n",
" \" pet \",\n",
" \"pets\",\n",
" \"dog\",\n",
" \"cats\",\n",
" \" cat \",\n",
" \"bird\",\n",
" \"fish\",\n",
" \"reptile\",\n",
" \"mammal\",\n",
" \"falcon\",\n",
" ],\n",
" \"check_in\": [\n",
" \"check-in\",\n",
" \"check in\",\n",
" \"check -in\",\n",
" \"checkin\",\n",
" \"boarding\",\n",
" \"boardingpass\",\n",
" \"boarding\",\n",
" ],\n",
" \"frequent_flyers\": [\n",
" \"frequent flyer\",\n",
" \"frequentflyer\",\n",
" \"frequent\",\n",
" \"flyer\",\n",
" \"miles\",\n",
" \"points\",\n",
" \"qrewards\",\n",
" \"privilege\",\n",
" \"gold members\",\n",
" \"f1\",\n",
" \"lounge\",\n",
" \"bonus\",\n",
" \"tier\",\n",
" \"student\"\n",
" ],\n",
" \"booking\": [\n",
" \"booking\",\n",
" \"reservation\",\n",
" \"ticket\",\n",
" \"flight\",\n",
" \"plane\",\n",
" \"aircraft\",\n",
" \"airplane\",\n",
" \"airline\",\n",
" \"seat\",\n",
" \"dining\",\n",
" \"book\",\n",
" \"mobility\",\n",
" \"travel guard\",\n",
" \"refund\",\n",
" \"accommodation\",\n",
" \"qsuite\",\n",
" \"quad\",\n",
" \"alerts\",\n",
" \"food\",\n",
" \"meal\",\n",
" \"travelcare\",\n",
" \"bid\"\n",
" ],\n",
" \"baggage\": [\n",
" \"bag\",\n",
" \"baggage\",\n",
" \"luggage\",\n",
" \"suitcase\",\n",
" \"backpack\",\n",
" \"carryon\",\n",
" \"carry-on\",\n",
" \"checked\",\n",
" \"lost\",\n",
" \"found\",\n",
" \"q-tag\",\n",
" \"voucher\",\n",
" \"overweight\",\n",
" \"carriage\",\n",
" \"claim\",\n",
" \"package\"\n",
" ],\n",
" \"portal\": [\n",
" \"portal\",\n",
" \"website\",\n",
" \"account\",\n",
" \"otp\",\n",
" \"avios\",\n",
" \"access\",\n",
" \"browser\",\n",
" \"app\",\n",
" \"assistance\",\n",
" \"kiosk\",\n",
" ],\n",
" \"fares\": [\"fare\", \"price\", \"cost\", \"charges\", \"fee\", \"rate\", \"expensive\", \"charge\"],\n",
" \"countries\": [\n",
" \"country\",\n",
" \"passport\",\n",
" \"visa\",\n",
" \"countries\",\n",
" \"region\",\n",
" \"city\",\n",
" \"cities\",\n",
" \"quarantine\",\n",
" \"syndney\",\n",
" \"australia\",\n",
" \"iran\",\n",
" \"africa\",\n",
" \"destination\"\n",
" ],\n",
"}\n",
"\n",
"for i in range(len(data)):\n",
" for category, keywords in categories.items():\n",
" for keyword in keywords:\n",
" if not \"category\" in data[i] and (\n",
" keyword in data[i][\"anchor\"].lower()\n",
" or keyword in data[i][\"positive\"].lower()\n",
" ):\n",
" data[i][\"category\"] = category\n",
" data[i][\"keyword\"] = keyword\n",
"\n",
"\n",
"for category in categories:\n",
" with open(f\"data/{category}.jsonl\", \"w\") as f:\n",
" for x in data:\n",
" if x.get(\"category\") == category:\n",
" f.write(json.dumps(x) + \"\\n\")\n",
"\n",
"with open(\"data/undefined.jsonl\", \"w\") as f:\n",
" for x in data:\n",
" if not x.get(\"category\"):\n",
" f.write(json.dumps(x) + \"\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit d7cf0bb

Please sign in to comment.