diff --git a/benchmarks.ipynb b/benchmarks.ipynb
index ba7f0fa8..57bd1d40 100644
--- a/benchmarks.ipynb
+++ b/benchmarks.ipynb
@@ -50,7 +50,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -60,15 +60,14 @@
"Table 1,000,000 rows is 240 Mb on disk\n",
"Table 2,000,000 rows is 480 Mb on disk\n",
"Table 5,000,000 rows is 1,200 Mb on disk\n",
- "Table 20,000,000 rows is 4,800 Mb on disk\n",
- "Table 50,000,000 rows is 12,000 Mb on disk\n"
+ "Table 10,000,000 rows is 2,400 Mb on disk\n"
]
}
],
"source": [
"process = psutil.Process(os.getpid())\n",
"\n",
- "def make_tables(sizes=[1,2,5,20,50]):\n",
+ "def make_tables(sizes=[1,2,5,10,20,50]):\n",
" # The last tables are too big for RAM (~24Gb), so I create subtables of 1M rows and append them.\n",
" t = synthetic_order_data(Config.PAGE_SIZE)\n",
" real, flat = t.nbytes()\n",
@@ -790,14 +789,47 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Table.dtypes()\n",
+ "### Table.types()\n",
+ "\n",
+ "Table.types() is implemented for near constant speed lookup.\n",
"\n",
- "Table.dtypes() is implemented for near constant speed lookup."
+ "Here is an example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'#': {int: 10000000},\n",
+ " '1': {int: 10000000},\n",
+ " '2': {str: 10000000},\n",
+ " '3': {int: 10000000},\n",
+ " '4': {int: 10000000},\n",
+ " '5': {int: 10000000},\n",
+ " '6': {str: 10000000},\n",
+ " '7': {str: 10000000},\n",
+ " '8': {str: 10000000},\n",
+ " '9': {str: 10000000},\n",
+ " '10': {float: 10000000},\n",
+ " '11': {str: 10000000}}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tables[-1].types()"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -807,7 +839,7 @@
"\n",
" for table in tables:\n",
" start_time = perf_counter()\n",
- " dt = table.dtypes()\n",
+ " dt = table.types()\n",
" end_time = perf_counter()\n",
" assert isinstance(dt, dict) and len(dt) != 0\n",
" dtypes_results.add_rows( len(table), round(end_time-start_time, 3) )\n",
@@ -817,19 +849,19 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "
# | rows | time (s) |
---|
0 | 1000000 | 0.0 |
---|
1 | 2000000 | 0.0 |
---|
2 | 5000000 | 0.0 |
---|
3 | 20000000 | 0.0 |
---|
4 | 50000000 | 0.0 |
---|
"
+ "# | rows | time (s) |
---|
0 | 1000000 | 0.0 |
---|
1 | 2000000 | 0.0 |
---|
2 | 5000000 | 0.0 |
---|
3 | 10000000 | 0.0 |
---|
"
],
"text/plain": [
- "Table(2 columns, 5 rows)"
+ "Table(2 columns, 4 rows)"
]
},
- "execution_count": 25,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}