Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
kelu124 committed Jan 2, 2024
1 parent 65806b8 commit 2c020f5
Show file tree
Hide file tree
Showing 11 changed files with 6,984 additions and 4,096 deletions.
172 changes: 87 additions & 85 deletions 00.getURLs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@
"data": {
"text/plain": [
"(12,\n",
" ['https://kghosh.substack.com/p/20231029',\n",
" 'https://kghosh.substack.com/p/20230927',\n",
" 'https://kghosh.substack.com/p/20230819',\n",
" 'https://kghosh.substack.com/p/20230827',\n",
" 'https://kghosh.substack.com/p/20231017',\n",
" 'https://kghosh.substack.com/p/20231005',\n",
" ['https://kghosh.substack.com/p/20231111',\n",
" 'https://kghosh.substack.com/p/20231022',\n",
" 'https://kghosh.substack.com/p/20290830',\n",
" 'https://kghosh.substack.com/p/20290911',\n",
" 'https://kghosh.substack.com/p/20230320',\n",
" 'https://kghosh.substack.com/p/20230810',\n",
" 'https://kghosh.substack.com/p/20231010'])"
" 'https://kghosh.substack.com/p/20231230',\n",
" 'https://kghosh.substack.com/p/20231029',\n",
" 'https://kghosh.substack.com/p/20231017',\n",
" 'https://kghosh.substack.com/p/20231105',\n",
" 'https://kghosh.substack.com/p/20231119',\n",
" 'https://kghosh.substack.com/p/20231203',\n",
" 'https://kghosh.substack.com/p/20231126',\n",
" 'https://kghosh.substack.com/p/20221217',\n",
" 'https://kghosh.substack.com/p/some-guesses-about-2024',\n",
" 'https://kghosh.substack.com/p/20231209'])"
]
},
"execution_count": 2,
Expand Down Expand Up @@ -85,18 +85,18 @@
"name": "stdout",
"output_type": "stream",
"text": [
"20231029 saved\n",
"20230927 exists.\n",
"20230819 exists.\n",
"20230827 exists.\n",
"20231111 exists.\n",
"20231022 exists.\n",
"20231230 saved\n",
"20231029 exists.\n",
"20231017 exists.\n",
"20231005 exists.\n",
"20231022 saved\n",
"20290830 exists.\n",
"20290911 exists.\n",
"20230320 exists.\n",
"20230810 exists.\n",
"20231010 exists.\n"
"20231105 exists.\n",
"20231119 exists.\n",
"20231203 exists.\n",
"20231126 exists.\n",
"20221217 saved\n",
"some-guesses-about-2024 saved\n",
"20231209 exists.\n"
]
}
],
Expand Down Expand Up @@ -140,13 +140,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"There are 67 pages saved.\n"
"There are 1272 pages saved.\n"
]
},
{
"data": {
"text/plain": [
"['.cache/20220118', '.cache/20220128', '.cache/20220212']"
"['.cache/001b195bbddec8fb0193b668a70aa883',\n",
" '.cache/002ee663c73c7add6ce2cabe29e4ae02',\n",
" '.cache/00931247998b35b40d513cfa65a11571']"
]
},
"execution_count": 5,
Expand Down Expand Up @@ -187,7 +189,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1626\n"
"1875\n"
]
},
{
Expand Down Expand Up @@ -254,38 +256,38 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1621</th>\n",
" <th>1870</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://web.yammer.com/main/org/mottmac.com/...</td>\n",
" <td>fc70437f7665556e8d27c5a9653763a6</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>f0a23dcd98c2fdf0a3fd523a04e7c255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1622</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://www.kaggle.com/code/concretemlteam/t...</td>\n",
" <td>cde046296faf786f02951f73b9b25417</td>\n",
" <th>1871</th>\n",
" <td>.cache/some-guesses-about-2024</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>b82a7509d34dc6974ca9036c0d80ba7d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1623</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>f0a23dcd98c2fdf0a3fd523a04e7c255</td>\n",
" <th>1872</th>\n",
" <td>.cache/some-guesses-about-2024</td>\n",
" <td>b'https://www2.deloitte.com/us/en/insights/ind...</td>\n",
" <td>c1f03407e92d82ff6f8e3bc04f8996c2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1624</th>\n",
" <th>1873</th>\n",
" <td>.cache/when-the-tide-of-ai-generated-texts</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>d256bfc733bfe7fe3f55726967800605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1625</th>\n",
" <th>1874</th>\n",
" <td>.cache/when-the-tide-of-ai-generated-texts</td>\n",
" <td>b'https://bronasbooks.com/2022/01/17/the-libra...</td>\n",
" <td>228ce5d138c39687cacf2eae975950d7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1600 rows × 3 columns</p>\n",
"<p>1844 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
Expand All @@ -296,11 +298,11 @@
"3 .cache/20220118 \n",
"4 .cache/20220118 \n",
"... ... \n",
"1621 .cache/fully-homomorphic-encryption \n",
"1622 .cache/fully-homomorphic-encryption \n",
"1623 .cache/fully-homomorphic-encryption \n",
"1624 .cache/when-the-tide-of-ai-generated-texts \n",
"1625 .cache/when-the-tide-of-ai-generated-texts \n",
"1870 .cache/fully-homomorphic-encryption \n",
"1871 .cache/some-guesses-about-2024 \n",
"1872 .cache/some-guesses-about-2024 \n",
"1873 .cache/when-the-tide-of-ai-generated-texts \n",
"1874 .cache/when-the-tide-of-ai-generated-texts \n",
"\n",
" url \\\n",
"0 b'https://www.gov.wales/sites/default/files/pu... \n",
Expand All @@ -309,11 +311,11 @@
"3 b'https://www.futuregenerations.wales/about-us... \n",
"4 b'https://www.bbc.com/future/article/20230215-... \n",
"... ... \n",
"1621 b'https://web.yammer.com/main/org/mottmac.com/... \n",
"1622 b'https://www.kaggle.com/code/concretemlteam/t... \n",
"1623 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1624 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1625 b'https://bronasbooks.com/2022/01/17/the-libra... \n",
"1870 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1871 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1872 b'https://www2.deloitte.com/us/en/insights/ind... \n",
"1873 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1874 b'https://bronasbooks.com/2022/01/17/the-libra... \n",
"\n",
" hash \n",
"0 3b92dc627d73b0d586fcff194b697c89 \n",
Expand All @@ -322,13 +324,13 @@
"3 3938700a79bbde32972657f9eb7b1b69 \n",
"4 32228bbf929d81d39a1808dd9b7c4493 \n",
"... ... \n",
"1621 fc70437f7665556e8d27c5a9653763a6 \n",
"1622 cde046296faf786f02951f73b9b25417 \n",
"1623 f0a23dcd98c2fdf0a3fd523a04e7c255 \n",
"1624 d256bfc733bfe7fe3f55726967800605 \n",
"1625 228ce5d138c39687cacf2eae975950d7 \n",
"1870 f0a23dcd98c2fdf0a3fd523a04e7c255 \n",
"1871 b82a7509d34dc6974ca9036c0d80ba7d \n",
"1872 c1f03407e92d82ff6f8e3bc04f8996c2 \n",
"1873 d256bfc733bfe7fe3f55726967800605 \n",
"1874 228ce5d138c39687cacf2eae975950d7 \n",
"\n",
"[1600 rows x 3 columns]"
"[1844 rows x 3 columns]"
]
},
"execution_count": 7,
Expand Down Expand Up @@ -419,38 +421,38 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1621</th>\n",
" <th>1870</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://web.yammer.com/main/org/mottmac.com/...</td>\n",
" <td>fc70437f7665556e8d27c5a9653763a6</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>f0a23dcd98c2fdf0a3fd523a04e7c255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1622</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://www.kaggle.com/code/concretemlteam/t...</td>\n",
" <td>cde046296faf786f02951f73b9b25417</td>\n",
" <th>1871</th>\n",
" <td>.cache/some-guesses-about-2024</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>b82a7509d34dc6974ca9036c0d80ba7d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1623</th>\n",
" <td>.cache/fully-homomorphic-encryption</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>f0a23dcd98c2fdf0a3fd523a04e7c255</td>\n",
" <th>1872</th>\n",
" <td>.cache/some-guesses-about-2024</td>\n",
" <td>b'https://www2.deloitte.com/us/en/insights/ind...</td>\n",
" <td>c1f03407e92d82ff6f8e3bc04f8996c2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1624</th>\n",
" <th>1873</th>\n",
" <td>.cache/when-the-tide-of-ai-generated-texts</td>\n",
" <td>b'https://substackcdn.com/image/fetch/f_auto,q...</td>\n",
" <td>d256bfc733bfe7fe3f55726967800605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1625</th>\n",
" <th>1874</th>\n",
" <td>.cache/when-the-tide-of-ai-generated-texts</td>\n",
" <td>b'https://bronasbooks.com/2022/01/17/the-libra...</td>\n",
" <td>228ce5d138c39687cacf2eae975950d7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1600 rows × 3 columns</p>\n",
"<p>1844 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
Expand All @@ -461,11 +463,11 @@
"3 .cache/20220118 \n",
"4 .cache/20220118 \n",
"... ... \n",
"1621 .cache/fully-homomorphic-encryption \n",
"1622 .cache/fully-homomorphic-encryption \n",
"1623 .cache/fully-homomorphic-encryption \n",
"1624 .cache/when-the-tide-of-ai-generated-texts \n",
"1625 .cache/when-the-tide-of-ai-generated-texts \n",
"1870 .cache/fully-homomorphic-encryption \n",
"1871 .cache/some-guesses-about-2024 \n",
"1872 .cache/some-guesses-about-2024 \n",
"1873 .cache/when-the-tide-of-ai-generated-texts \n",
"1874 .cache/when-the-tide-of-ai-generated-texts \n",
"\n",
" url \\\n",
"0 b'https://www.gov.wales/sites/default/files/pu... \n",
Expand All @@ -474,11 +476,11 @@
"3 b'https://www.futuregenerations.wales/about-us... \n",
"4 b'https://www.bbc.com/future/article/20230215-... \n",
"... ... \n",
"1621 b'https://web.yammer.com/main/org/mottmac.com/... \n",
"1622 b'https://www.kaggle.com/code/concretemlteam/t... \n",
"1623 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1624 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1625 b'https://bronasbooks.com/2022/01/17/the-libra... \n",
"1870 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1871 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1872 b'https://www2.deloitte.com/us/en/insights/ind... \n",
"1873 b'https://substackcdn.com/image/fetch/f_auto,q... \n",
"1874 b'https://bronasbooks.com/2022/01/17/the-libra... \n",
"\n",
" hash \n",
"0 3b92dc627d73b0d586fcff194b697c89 \n",
Expand All @@ -487,13 +489,13 @@
"3 3938700a79bbde32972657f9eb7b1b69 \n",
"4 32228bbf929d81d39a1808dd9b7c4493 \n",
"... ... \n",
"1621 fc70437f7665556e8d27c5a9653763a6 \n",
"1622 cde046296faf786f02951f73b9b25417 \n",
"1623 f0a23dcd98c2fdf0a3fd523a04e7c255 \n",
"1624 d256bfc733bfe7fe3f55726967800605 \n",
"1625 228ce5d138c39687cacf2eae975950d7 \n",
"1870 f0a23dcd98c2fdf0a3fd523a04e7c255 \n",
"1871 b82a7509d34dc6974ca9036c0d80ba7d \n",
"1872 c1f03407e92d82ff6f8e3bc04f8996c2 \n",
"1873 d256bfc733bfe7fe3f55726967800605 \n",
"1874 228ce5d138c39687cacf2eae975950d7 \n",
"\n",
"[1600 rows x 3 columns]"
"[1844 rows x 3 columns]"
]
},
"execution_count": 8,
Expand Down
Loading

0 comments on commit 2c020f5

Please sign in to comment.