Skip to content

Commit

Permalink
download url
Browse files Browse the repository at this point in the history
  • Loading branch information
luweizheng committed Jan 12, 2024
1 parent 9afe761 commit 6a2e23b
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 189 deletions.
143 changes: 65 additions & 78 deletions ch-pandas/data-preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,7 @@
"hide-cell"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"文件夹已存在,无需操作。\n"
]
}
],
"outputs": [],
"source": [
"import os\n",
"import urllib.request\n",
Expand All @@ -44,19 +36,14 @@
"folder_path = os.path.join(os.getcwd(), \"../data/pwt\")\n",
"download_url = \"https://www.rug.nl/ggdc/docs/pwt70_06032011version.zip\"\n",
"file_name = download_url.split(\"/\")[-1]\n",
"zip_file_path = os.path.join(folder_path, file_name)\n",
"if not os.path.exists(folder_path):\n",
" # 创建文件夹\n",
" os.makedirs(folder_path)\n",
" print(f\"文件夹不存在,已创建。\")\n",
"\n",
" zip_file_path = os.path.join(folder_path, file_name)\n",
"\n",
"if not os.path.exists(zip_file_path):\n",
" urllib.request.urlretrieve(download_url, zip_file_path)\n",
" with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
" zip_ref.extractall(folder_path)\n",
" print(\"数据已下载并解压缩。\")\n",
"else:\n",
" print(f\"文件夹已存在,无需操作。\")\n",
"\n",
"df = pd.read_csv(os.path.join(folder_path, \"pwt70_w_country_names.csv\"))"
]
Expand Down Expand Up @@ -1765,7 +1752,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/4n/v40br47s46ggrjm9bdm64lwh0000gn/T/ipykernel_85795/832431325.py:1: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
"/var/folders/4n/v40br47s46ggrjm9bdm64lwh0000gn/T/ipykernel_18576/832431325.py:1: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
" df.applymap(lambda x : round(x, 2) if type(x)!=str else x)\n"
]
},
Expand Down Expand Up @@ -2433,28 +2420,28 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2218</th>\n",
" <td>China Version 2</td>\n",
" <td>CH2</td>\n",
" <th>2158</th>\n",
" <td>China Version 1</td>\n",
" <td>CHN</td>\n",
" <td>2008</td>\n",
" <td>1317066.000</td>\n",
" <td>6.95</td>\n",
" <td>Yuan Renminbi</td>\n",
" <td>10557315.23</td>\n",
" <td>8015.782984</td>\n",
" <td>8416.480345</td>\n",
" <td>7551.673672</td>\n",
" <td>9.719402e+06</td>\n",
" <td>7379.586069</td>\n",
" <td>7529.397258</td>\n",
" <td>6747.223691</td>\n",
" <td>...</td>\n",
" <td>37.629450</td>\n",
" <td>69.883127</td>\n",
" <td>7777.945871</td>\n",
" <td>11929.04952</td>\n",
" <td>11929.04952</td>\n",
" <td>42.862836</td>\n",
" <td>68.760068</td>\n",
" <td>7132.315550</td>\n",
" <td>10938.84514</td>\n",
" <td>10929.28222</td>\n",
" <td>NaN</td>\n",
" <td>11938.30669</td>\n",
" <td>10937.76354</td>\n",
" <td>NaN</td>\n",
" <td>7233.090071</td>\n",
" <td>8.015783</td>\n",
" <td>6635.526673</td>\n",
" <td>7.379586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2219</th>\n",
Expand All @@ -2464,7 +2451,7 @@
" <td>1323592.000</td>\n",
" <td>6.83</td>\n",
" <td>Yuan Renminbi</td>\n",
" <td>11342464.26</td>\n",
" <td>1.134246e+07</td>\n",
" <td>8569.456643</td>\n",
" <td>9227.187296</td>\n",
" <td>8531.307128</td>\n",
Expand All @@ -2488,7 +2475,7 @@
" <td>1323592.000</td>\n",
" <td>6.83</td>\n",
" <td>Yuan Renminbi</td>\n",
" <td>10739325.58</td>\n",
" <td>1.073933e+07</td>\n",
" <td>8113.773410</td>\n",
" <td>8296.575690</td>\n",
" <td>7651.109254</td>\n",
Expand Down Expand Up @@ -2565,24 +2552,24 @@
"7743 Palau PLW 1953 7.859 NaN NaN \n",
"7744 Palau PLW 1954 8.073 NaN NaN \n",
"... ... ... ... ... ... ... \n",
"2218 China Version 2 CH2 2008 1317066.000 6.95 Yuan Renminbi \n",
"2158 China Version 1 CHN 2008 1317066.000 6.95 Yuan Renminbi \n",
"2219 China Version 2 CH2 2009 1323592.000 6.83 Yuan Renminbi \n",
"2159 China Version 1 CHN 2009 1323592.000 6.83 Yuan Renminbi \n",
"10020 Tajikistan TJK 1950 NaN NaN NaN \n",
"10021 Tajikistan TJK 1951 NaN NaN NaN \n",
"\n",
" tcgdp cgdp cgdp2 cda2 ... ki \\\n",
"7740 NaN NaN NaN NaN ... NaN \n",
"7741 NaN NaN NaN NaN ... NaN \n",
"7742 NaN NaN NaN NaN ... NaN \n",
"7743 NaN NaN NaN NaN ... NaN \n",
"7744 NaN NaN NaN NaN ... NaN \n",
"... ... ... ... ... ... ... \n",
"2218 10557315.23 8015.782984 8416.480345 7551.673672 ... 37.629450 \n",
"2219 11342464.26 8569.456643 9227.187296 8531.307128 ... 39.044203 \n",
"2159 10739325.58 8113.773410 8296.575690 7651.109254 ... 46.707842 \n",
"10020 NaN NaN NaN NaN ... NaN \n",
"10021 NaN NaN NaN NaN ... NaN \n",
" tcgdp cgdp cgdp2 cda2 ... ki \\\n",
"7740 NaN NaN NaN NaN ... NaN \n",
"7741 NaN NaN NaN NaN ... NaN \n",
"7742 NaN NaN NaN NaN ... NaN \n",
"7743 NaN NaN NaN NaN ... NaN \n",
"7744 NaN NaN NaN NaN ... NaN \n",
"... ... ... ... ... ... ... \n",
"2158 9.719402e+06 7379.586069 7529.397258 6747.223691 ... 42.862836 \n",
"2219 1.134246e+07 8569.456643 9227.187296 8531.307128 ... 39.044203 \n",
"2159 1.073933e+07 8113.773410 8296.575690 7651.109254 ... 46.707842 \n",
"10020 NaN NaN NaN NaN ... NaN \n",
"10021 NaN NaN NaN NaN ... NaN \n",
"\n",
" openk rgdpeqa rgdpwok rgdpl2wok rgdpl2pe \\\n",
"7740 NaN NaN NaN NaN NaN \n",
Expand All @@ -2591,7 +2578,7 @@
"7743 NaN NaN NaN NaN NaN \n",
"7744 NaN NaN NaN NaN NaN \n",
"... ... ... ... ... ... \n",
"2218 69.883127 7777.945871 11929.04952 11929.04952 NaN \n",
"2158 68.760068 7132.315550 10938.84514 10929.28222 NaN \n",
"2219 61.240342 8247.646160 NaN NaN NaN \n",
"2159 58.579480 7782.385295 NaN NaN NaN \n",
"10020 NaN NaN NaN NaN NaN \n",
Expand All @@ -2604,7 +2591,7 @@
"7743 NaN NaN NaN NaN \n",
"7744 NaN NaN NaN NaN \n",
"... ... ... ... ... \n",
"2218 11938.30669 NaN 7233.090071 8.015783 \n",
"2158 10937.76354 NaN 6635.526673 7.379586 \n",
"2219 12658.65025 NaN 7747.331618 8.569457 \n",
"2159 11914.32395 NaN 7314.930606 8.113773 \n",
"10020 NaN NaN NaN NaN \n",
Expand Down Expand Up @@ -2738,30 +2725,6 @@
" <td>8.113773</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158</th>\n",
" <td>China Version 1</td>\n",
" <td>CHN</td>\n",
" <td>2008</td>\n",
" <td>1317066.000</td>\n",
" <td>6.950000</td>\n",
" <td>Yuan Renminbi</td>\n",
" <td>9.719402e+06</td>\n",
" <td>7379.586069</td>\n",
" <td>7529.397258</td>\n",
" <td>6747.223691</td>\n",
" <td>...</td>\n",
" <td>42.862836</td>\n",
" <td>68.760068</td>\n",
" <td>7132.315550</td>\n",
" <td>10938.84514</td>\n",
" <td>10929.28222</td>\n",
" <td>NaN</td>\n",
" <td>10937.76354</td>\n",
" <td>NaN</td>\n",
" <td>6635.526673</td>\n",
" <td>7.379586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2218</th>\n",
" <td>China Version 2</td>\n",
" <td>CH2</td>\n",
Expand All @@ -2786,6 +2749,30 @@
" <td>8.015783</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2158</th>\n",
" <td>China Version 1</td>\n",
" <td>CHN</td>\n",
" <td>2008</td>\n",
" <td>1317066.000</td>\n",
" <td>6.950000</td>\n",
" <td>Yuan Renminbi</td>\n",
" <td>9.719402e+06</td>\n",
" <td>7379.586069</td>\n",
" <td>7529.397258</td>\n",
" <td>6747.223691</td>\n",
" <td>...</td>\n",
" <td>42.862836</td>\n",
" <td>68.760068</td>\n",
" <td>7132.315550</td>\n",
" <td>10938.84514</td>\n",
" <td>10929.28222</td>\n",
" <td>NaN</td>\n",
" <td>10937.76354</td>\n",
" <td>NaN</td>\n",
" <td>6635.526673</td>\n",
" <td>7.379586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2217</th>\n",
" <td>China Version 2</td>\n",
" <td>CH2</td>\n",
Expand Down Expand Up @@ -2962,8 +2949,8 @@
" country isocode year POP XRAT Currency_Unit \\\n",
"2219 China Version 2 CH2 2009 1323592.000 6.830000 Yuan Renminbi \n",
"2159 China Version 1 CHN 2009 1323592.000 6.830000 Yuan Renminbi \n",
"2158 China Version 1 CHN 2008 1317066.000 6.950000 Yuan Renminbi \n",
"2218 China Version 2 CH2 2008 1317066.000 6.950000 Yuan Renminbi \n",
"2158 China Version 1 CHN 2008 1317066.000 6.950000 Yuan Renminbi \n",
"2217 China Version 2 CH2 2007 1310584.000 7.607532 Yuan Renminbi \n",
"... ... ... ... ... ... ... \n",
"7742 Palau PLW 1952 7.651 NaN NaN \n",
Expand All @@ -2975,8 +2962,8 @@
" tcgdp cgdp cgdp2 cda2 ... ki \\\n",
"2219 1.134246e+07 8569.456643 9227.187296 8531.307128 ... 39.044203 \n",
"2159 1.073933e+07 8113.773410 8296.575690 7651.109254 ... 46.707842 \n",
"2158 9.719402e+06 7379.586069 7529.397258 6747.223691 ... 42.862836 \n",
"2218 1.055732e+07 8015.782984 8416.480345 7551.673672 ... 37.629450 \n",
"2158 9.719402e+06 7379.586069 7529.397258 6747.223691 ... 42.862836 \n",
"2217 9.407736e+06 7178.278054 7437.401920 6803.244389 ... 37.634047 \n",
"... ... ... ... ... ... ... \n",
"7742 NaN NaN NaN NaN ... NaN \n",
Expand All @@ -2988,8 +2975,8 @@
" openk rgdpeqa rgdpwok rgdpl2wok rgdpl2pe \\\n",
"2219 61.240342 8247.646160 NaN NaN NaN \n",
"2159 58.579480 7782.385295 NaN NaN NaN \n",
"2158 68.760068 7132.315550 10938.84514 10929.28222 NaN \n",
"2218 69.883127 7777.945871 11929.04952 11929.04952 NaN \n",
"2158 68.760068 7132.315550 10938.84514 10929.28222 NaN \n",
"2217 69.245056 7357.713761 11290.43295 11290.43295 NaN \n",
"... ... ... ... ... ... \n",
"7742 NaN NaN NaN NaN NaN \n",
Expand All @@ -3001,8 +2988,8 @@
" rgdpl2te rgdpl2th rgdptt GDP percap \n",
"2219 12658.65025 NaN 7747.331618 8.569457 \n",
"2159 11914.32395 NaN 7314.930606 8.113773 \n",
"2158 10937.76354 NaN 6635.526673 7.379586 \n",
"2218 11938.30669 NaN 7233.090071 8.015783 \n",
"2158 10937.76354 NaN 6635.526673 7.379586 \n",
"2217 11288.33089 NaN 6690.086924 7.178278 \n",
"... ... ... ... ... \n",
"7742 NaN NaN NaN NaN \n",
Expand Down Expand Up @@ -3441,7 +3428,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.7"
}
},
"nbformat": 4,
Expand Down
9 changes: 2 additions & 7 deletions ch-pandas/dataframe-groupby.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,14 @@
"folder_path = os.path.join(os.getcwd(), \"../data/pwt\")\n",
"download_url = \"https://www.rug.nl/ggdc/docs/pwt70_06032011version.zip\"\n",
"file_name = download_url.split(\"/\")[-1]\n",
"zip_file_path = os.path.join(folder_path, file_name)\n",
"if not os.path.exists(folder_path):\n",
" # 创建文件夹\n",
" os.makedirs(folder_path)\n",
" print(f\"文件夹不存在,已创建。\")\n",
"\n",
" zip_file_path = os.path.join(folder_path, file_name)\n",
"\n",
"if not os.path.exists(zip_file_path):\n",
" urllib.request.urlretrieve(download_url, zip_file_path)\n",
" with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
" zip_ref.extractall(folder_path)\n",
" print(\"数据已下载并解压缩。\")\n",
"else:\n",
" print(f\"文件夹已存在,无需操作。\")\n",
"\n",
"df = pd.read_csv(os.path.join(folder_path, \"pwt70_w_country_names.csv\"))\n",
"df = df.fillna(0)"
Expand Down
Loading

0 comments on commit 6a2e23b

Please sign in to comment.