diff --git a/benchmarks.ipynb b/benchmarks.ipynb index ba7f0fa8..57bd1d40 100644 --- a/benchmarks.ipynb +++ b/benchmarks.ipynb @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -60,15 +60,14 @@ "Table 1,000,000 rows is 240 Mb on disk\n", "Table 2,000,000 rows is 480 Mb on disk\n", "Table 5,000,000 rows is 1,200 Mb on disk\n", - "Table 20,000,000 rows is 4,800 Mb on disk\n", - "Table 50,000,000 rows is 12,000 Mb on disk\n" + "Table 10,000,000 rows is 2,400 Mb on disk\n" ] } ], "source": [ "process = psutil.Process(os.getpid())\n", "\n", - "def make_tables(sizes=[1,2,5,20,50]):\n", + "def make_tables(sizes=[1,2,5,10,20,50]):\n", " # The last tables are too big for RAM (~24Gb), so I create subtables of 1M rows and append them.\n", " t = synthetic_order_data(Config.PAGE_SIZE)\n", " real, flat = t.nbytes()\n", @@ -790,14 +789,47 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Table.dtypes()\n", + "### Table.types()\n", + "\n", + "Table.types() is implemented for near constant speed lookup.\n", "\n", - "Table.dtypes() is implemented for near constant speed lookup." + "Here is an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'#': {int: 10000000},\n", + " '1': {int: 10000000},\n", + " '2': {str: 10000000},\n", + " '3': {int: 10000000},\n", + " '4': {int: 10000000},\n", + " '5': {int: 10000000},\n", + " '6': {str: 10000000},\n", + " '7': {str: 10000000},\n", + " '8': {str: 10000000},\n", + " '9': {str: 10000000},\n", + " '10': {float: 10000000},\n", + " '11': {str: 10000000}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tables[-1].types()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -807,7 +839,7 @@ "\n", " for table in tables:\n", " start_time = perf_counter()\n", - " dt = table.dtypes()\n", + " dt = table.types()\n", " end_time = perf_counter()\n", " assert isinstance(dt, dict) and len(dt) != 0\n", " dtypes_results.add_rows( len(table), round(end_time-start_time, 3) )\n", @@ -817,19 +849,19 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
#rowstime (s)
010000000.0
120000000.0
250000000.0
3200000000.0
4500000000.0
" + "
#rowstime (s)
010000000.0
120000000.0
250000000.0
3100000000.0
" ], "text/plain": [ - "Table(2 columns, 5 rows)" + "Table(2 columns, 4 rows)" ] }, - "execution_count": 25, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" }