-
Notifications
You must be signed in to change notification settings - Fork 4
/
save_to_sqlite.py
162 lines (146 loc) · 6.65 KB
/
save_to_sqlite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# https://www.youtube.com/watch?v=pd-0G0MigUA
# https://docs.python.org/3/library/sqlite3.html
# https://stackoverflow.com/questions/8442147/how-to-delete-or-add-column-in-sqlite/66399224#66399224
# https://www.sqlite.org/datatype3.html
# Other useful commands:
# conn.execute("DELETE FROM google_scholar_organic_results")
# conn.execute("DROP TABLE google_scholar_organic_results")
# conn.execute("ALTER TABLE google_scholar_organic_results DROP COLUMN authors")
# conn.execute("ALTER TABLE google_scholar_organic_results ADD COLUMN snippet text")
import sqlite3
import pandas as pd
from google_scholar_organic_results import organic_results
from google_scholar_cite_results import cite_results
conn = sqlite3.connect("google_scholar_results.db")
# Save to SQLite using Pandas
pd.DataFrame(organic_results()).to_sql(name="google_scholar_organic_results",
con=conn,
if_exists="append",
index=False)
pd.DataFrame(cite_results()).to_sql(name="google_scholar_cite_results",
con=conn,
if_exists="append",
index=False)
# Save to database using SQLite
# conn.execute("""CREATE TABLE google_scholar_organic_results (
# page_number integer,
# position integer,
# result_type text,
# title text,
# link text,
# snippet text,
# result_id text,
# publication_info_summary text,
# cited_by_count integer,
# cited_by_link text,
# cited_by_id text,
# total_versions integer,
# all_versions_link text,
# all_versions_id text,
# file_format text,
# file_title text,
# file_link text)""")
# conn.execute("""CREATE TABLE google_scholar_cite_results (
# organic_results_title text,
# organic_results_link text,
# citation_title text,
# citation_link text)""")
# for item in organic_results():
# conn.execute("""INSERT INTO google_scholar_organic_results
# VALUES (:page_number,
# :position,
# :result_type,
# :title,
# :link,
# :snippet,
# :result_id,
# :publication_info_summary,
# :cited_by_count,
# :cited_by_link,
# :cited_by_id,
# :total_versions,
# :all_versions_link,
# :all_versions_id,
# :file_format,
# :file_title,
# :file_link)""",
# {"page_number": item["page_number"],
# "position": item["position"],
# "result_type": item["type"],
# "title": item["title"],
# "link": item["link"],
# "snippet": item["snippet"],
# "result_id": item["result_id"],
# "publication_info_summary": item["publication_info_summary"],
# "cited_by_count": item["cited_by_count"],
# "cited_by_link": item["cited_by_link"],
# "cited_by_id": item["cited_by_id"],
# "total_versions": item["total_versions"],
# "all_versions_link": item["all_versions_link"],
# "all_versions_id": item["all_versions_id"],
# "file_format": item["file_format"],
# "file_title": item["file_title"],
# "file_link": item["file_link"]})
# OR YOU CAN DO THE SAME THING LIKE SO:
# for item in organic_results():
# conn.execute("""INSERT INTO google_scholar_organic_results
# (page_number,
# position,
# result_type,
# title,
# link,
# result_id,
# publication_info_summary,
# cited_by_count,
# cited_by_link,
# cited_by_id,
# total_versions,
# all_versions_link,
# all_versions_id,
# file_format,
# file_title,
# file_link)
# VALUES (:page_number,
# :position,
# :result_type,
# :title,
# :link,
# :result_id,
# :publication_info_summary,
# :cited_by_count,
# :cited_by_link,
# :cited_by_id,
# :total_versions,
# :all_versions_link,
# :all_versions_id,
# :file_format,
# :file_title,
# :file_link)""",
# {"page_number": item["page_number"],
# "position": item["position"],
# "result_type": item["type"],
# "title": item["title"],
# "link": item["link"],
# "result_id": item["result_id"],
# "publication_info_summary": item["publication_info_summary"],
# "cited_by_count": item["cited_by_count"],
# "cited_by_link": item["cited_by_link"],
# "cited_by_id": item["cited_by_id"],
# "total_versions": item["total_versions"],
# "all_versions_link": item["all_versions_link"],
# "file_format": item["file_format"],
# "file_title": item["file_title"],
# "file_link": item["file_link"]})
# for cite_result in cite_results():
# conn.execute("""INSERT INTO google_scholar_cite_results
# VALUES (:organic_result_title,
# :organic_result_link,
# :citation_title,
# :citation_snippet)""",
# {"organic_result_title": cite_result["organic_result_title"],
# "organic_result_link": cite_result["organic_result_link"],
# "citation_title": cite_result["citation_title"],
# "citation_snippet": cite_result["citation_snippet"]})
conn.commit()
conn.close()
print("Saved to SQL Lite database.")