-
Notifications
You must be signed in to change notification settings - Fork 0
/
userwise_scrape.R
66 lines (55 loc) · 1.62 KB
/
userwise_scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
library(FlickrAPI)
library(dplyr)
library(readxl)
setFlickrAPIKey(api_key = "3186081d7528ee513480e82498d9366b", overwrite = TRUE)
i = 1
k = 1
df = data.frame()
user_df1 = read.csv("final_full.csv")
IDs = user_df1$owner
IDs = unique(IDs)
length(IDs)
not_scraped = c()
for (userID in IDs){
i = 1
while (TRUE) {
sub_df = getPhotoSearch(
api_key = "3186081d7528ee513480e82498d9366b",
user_id = userID,
#tags = c('Benthota'),
bbox = c(79.561244, 5.9, 81.9, 9.9),
extras = c("date_taken", "owner_name", "geo", 'url_c'),
sort = "date-taken-desc",
per_page = 100,
page = i
)
#i = i+1
if(length(sub_df != 0)){
if("url_c" %in% colnames(sub_df)){
sub_df = sub_df %>% select('id', 'owner', 'datetaken', 'ownername', 'latitude',
'longitude', 'url_c')
df = rbind(df, sub_df)
i = i + 1
}else{
sub_df = sub_df %>% select('id', 'owner', 'datetaken', 'ownername', 'latitude',
'longitude')
url_c = rep(NA, nrow(sub_df))
sub_df['url_c'] = url_c
df = rbind(df, sub_df)
i = i + 1
}
}else{
if(i == 1){ ## checking whether the user has been scraped or not
x1 = sprintf("%s user not scraped",k)
print(x1)
not_scraped = append(not_scraped, userID)
}
break
}
}
x2 = sprintf("%s users done",k)
print(x2)
k = k + 1
}
write.csv(df, "my_full_userwise_scrape.csv")
write.csv(as.data.frame(not_scraped), "not_scraped_users.csv")