Skip to content

Commit

Permalink
Updated benchmark to reflect deprecation of Table.dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
root-11 committed Jul 3, 2023
1 parent 8164ff6 commit 3425243
Showing 1 changed file with 44 additions and 12 deletions.
56 changes: 44 additions & 12 deletions benchmarks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -60,15 +60,14 @@
"Table 1,000,000 rows is 240 Mb on disk\n",
"Table 2,000,000 rows is 480 Mb on disk\n",
"Table 5,000,000 rows is 1,200 Mb on disk\n",
"Table 20,000,000 rows is 4,800 Mb on disk\n",
"Table 50,000,000 rows is 12,000 Mb on disk\n"
"Table 10,000,000 rows is 2,400 Mb on disk\n"
]
}
],
"source": [
"process = psutil.Process(os.getpid())\n",
"\n",
"def make_tables(sizes=[1,2,5,20,50]):\n",
"def make_tables(sizes=[1,2,5,10,20,50]):\n",
" # The last tables are too big for RAM (~24Gb), so I create subtables of 1M rows and append them.\n",
" t = synthetic_order_data(Config.PAGE_SIZE)\n",
" real, flat = t.nbytes()\n",
Expand Down Expand Up @@ -790,14 +789,47 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Table.dtypes()\n",
"### Table.types()\n",
"\n",
"Table.types() is implemented for near constant speed lookup.\n",
"\n",
"Table.dtypes() is implemented for near constant speed lookup."
"Here is an example:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'#': {int: 10000000},\n",
" '1': {int: 10000000},\n",
" '2': {str: 10000000},\n",
" '3': {int: 10000000},\n",
" '4': {int: 10000000},\n",
" '5': {int: 10000000},\n",
" '6': {str: 10000000},\n",
" '7': {str: 10000000},\n",
" '8': {str: 10000000},\n",
" '9': {str: 10000000},\n",
" '10': {float: 10000000},\n",
" '11': {str: 10000000}}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tables[-1].types()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -807,7 +839,7 @@
"\n",
" for table in tables:\n",
" start_time = perf_counter()\n",
" dt = table.dtypes()\n",
" dt = table.types()\n",
" end_time = perf_counter()\n",
" assert isinstance(dt, dict) and len(dt) != 0\n",
" dtypes_results.add_rows( len(table), round(end_time-start_time, 3) )\n",
Expand All @@ -817,19 +849,19 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><table border=1><tr><th>#</th><th>rows</th><th>time (s)</th></tr><tr><th> 0</th><th>1000000</th><th>0.0</th></tr><tr><th> 1</th><th>2000000</th><th>0.0</th></tr><tr><th> 2</th><th>5000000</th><th>0.0</th></tr><tr><th> 3</th><th>20000000</th><th>0.0</th></tr><tr><th> 4</th><th>50000000</th><th>0.0</th></tr></table></div>"
"<div><table border=1><tr><th>#</th><th>rows</th><th>time (s)</th></tr><tr><th> 0</th><th>1000000</th><th>0.0</th></tr><tr><th> 1</th><th>2000000</th><th>0.0</th></tr><tr><th> 2</th><th>5000000</th><th>0.0</th></tr><tr><th> 3</th><th>10000000</th><th>0.0</th></tr></table></div>"
],
"text/plain": [
"Table(2 columns, 5 rows)"
"Table(2 columns, 4 rows)"
]
},
"execution_count": 25,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 3425243

Please sign in to comment.