-
Notifications
You must be signed in to change notification settings - Fork 0
/
bigquery google_trends.sql
175 lines (153 loc) · 2.93 KB
/
bigquery google_trends.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
-- This project is done in bigquery on google_trends
-- To see how old is the data. and when it has been updated?
SELECT
MIN(week) AS min_date,
MAX(week) AS updated_date
FROM
`bigquery-public-data.google_trends.top_terms`;
-- to see which term has been most searched?
SELECT
term,
COUNT(*) AS times_on_top,
SUM(score) AS total_score
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank=1
GROUP BY
term
ORDER BY
times_on_top DESC,
total_score DESC;
-- to see which was top search each month in string form.
SELECT
COUNT(DISTINCT(dma_id)) AS num_of_locations
FROM
`bigquery-public-data.google_trends.top_terms`;
WITH
my_tbl AS (
SELECT
EXTRACT(month
FROM
week) AS month_number,
term
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank = 1 ),
cnt_tbl AS(
SELECT
*,
COUNT(*) AS cnt
FROM
my_tbl
GROUP BY
month_number,
term)
SELECT
month_number,
STRING_AGG(CONCAT(' ',cnt, 'x', term ),',')
FROM
cnt_tbl
GROUP BY
month_number
ORDER BY
month_number;
-- total number of different searchs each month.
SELECT
DISTINCT(EXTRACT(month
FROM
week)) AS month_,
COUNT(DISTINCT(term)) AS number_of_terms
FROM
`bigquery-public-data.google_trends.top_terms`
GROUP BY
month_
ORDER BY
month_;
-- which searches has made in top 10?
SELECT
DISTINCT(term)
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank < 11
ORDER BY
term;
-- which searchs has made in top 25 but not in top 10.
SELECT
DISTINCT(term)
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank >= 11 EXCEPT DISTINCT
SELECT
DISTINCT(term)
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank < 11
ORDER BY
term;
-- is there any term whcih has made in all these weeks?
WITH
weeks_count AS (
SELECT
COUNT(DISTINCT week) AS total_weeks
FROM
`bigquery-public-data.google_trends.top_terms`),
term_count AS (
SELECT
term,
COUNT(DISTINCT week) AS distinct_count
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank = 1
GROUP BY
term
ORDER BY
distinct_count DESC)
SELECT
t.term
FROM
weeks_count w
JOIN
term_count t
ON
w.total_weeks = t.distinct_count;
-- which term was the most on during each week?
WITH
term_count AS (
SELECT
term,
COUNT(DISTINCT week) AS distinct_count
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
rank = 1
GROUP BY
term
ORDER BY
distinct_count DESC),
ranking AS (
SELECT
term,
RANK() OVER(ORDER BY distinct_count DESC) AS rnk
FROM
term_count)
SELECT
term
FROM
ranking
WHERE
rnk =1;
-- I ran this query just to how valid this data is? after seeing the result, I'm sure this is not a real data. Before starting this project, I thought this data was real.
SELECT
DISTINCT(week),
term
FROM
`bigquery-public-data.google_trends.top_terms`
WHERE
term = 'Riyadh XI vs PSG'
AND rank = 1;