-
Notifications
You must be signed in to change notification settings - Fork 1
/
undp_read_before_merge.R
103 lines (82 loc) · 4 KB
/
undp_read_before_merge.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
##Save translated data the first time with
library(xlsx)
datasave <- read.xlsx2("foi_Yemen_save.xlsx",1)
Nsave <- length(datasave$FragmentID)
save(datasave,Nsave,file="save_Yemen.RData")
data <- array(list(),Ncountry)
for (ncountry in 1:Ncountry){
print(paste("Country:",Country[[ncountry]]$country))
data[[ncountry]] <- read.xlsx2(Country[[ncountry]]$fname,1)
N <- length(data[[ncountry]]$FragmentID)
# Number of variables to be translated (list in Country[[ncountry]]$variables_to_transalte[nvar])
Nvarsave <- length(Country[[ncountry]]$variables_to_transalte)
# Make variables to be translated as character instead of factors otherwise they
# cannot be updated because not included in previous factors
for (nvar in 1:Nvarsave) {
ind <- which(names(data[[ncountry]]) == Country[[ncountry]]$variables_to_transalte[nvar])
print(paste(ind,Country[[ncountry]]$variables_to_transalte[nvar]))
data[[ncountry]][,ind] <- as.character(data[[ncountry]][,ind])
}
fnamesave <- paste("save_",Country[[ncountry]]$country,".RData",sep="")
# flag newfile: check what to do with save file
# newfile = 0: Data not updated. Save file up to date. Do not touch it
# newfile = 1: Data updated. Translate and save new records
# newfile = 2: Data new or messed up. Translate and save all records
if (!file.exists(fnamesave)) {
newfile <- 2 # Save file does not exist. Build save file
} else {
load(fnamesave)
# Make variables to be translated as character instead of factors otherwise they
# cannot be updated because not included in previous factors
# Must be done variable by variable otherwise structure is lost
for (nvar in 1:(2*Nvarsave))
datasave[,nvar+1] <- as.character(datasave[,nvar+1])
if (Nsave == N){
newfile <- 0 # Size of saved data = size of data. Do not update save file
} else {
if (Nsave < N) {
newfile <- 1 # Size of saved data < size of data. Update save file
} else {
newfile <- 2 # Size of saved data > size of data. Rebuild save file
}
}
}
N1save <- Nsave+1 # First record to translate and save - newfile = 0,1
N2save <- N # Last record to translate and save - newfile = 0,1
if (newfile == 2) {
N1save <- 1 # First record to translate and save - newfile = 2
}
# Translate records from N1save to N2save
if (N2save >= N1save){
# Locate variables in data[[ncountry]]
# First add the new fragments for a future use (search for really new records)
# Note that fragments are always the first variable
datasave[N1save:N2save,1] <- data[[ncountry]][N1save:N2save,1]
# Then add actual translations
for (nvar in 1:Nvarsave) {
ind <- which(names(data[[ncountry]]) == Country[[ncountry]]$variables_to_transalte[nvar])
indsave <- which(names(datasave) == Country[[ncountry]]$variables_to_transalte[nvar])
print(paste(nvar,Country[[ncountry]]$variables_to_transalte[nvar],ind,indsave))
# fist add the original variables
datasave[N1save:N2save,indsave] <- data[[ncountry]][N1save:N2save,ind]
# Then add the translations, record by record
for (nrec in N1save:N2save) {
dum <- translate(my_api_Yandex,data[[ncountry]][nrec,ind],"en")$text
print(paste(nvar,nrec,dum))
if (length(dum)>0)
datasave[nrec,(indsave+Nvarsave)] <- dum
}
}
} # End of if (N2save >= N1save){
# Add all translations from datasave to data[[ncountry]]
# Note that the variables have the same name as the original ones with suffix 1 at the end
Nvar <- length(data[[ncountry]])
for (nvar in 1:Nvarsave) { # FragID; Original vars; English vars
data[[ncountry]][,Nvar+nvar] <- datasave[,Nvarsave+1+nvar]
names(data[[ncountry]])[Nvar+nvar] <- names(datasave)[nvar+1]
}
# Finally write back the save xlsx file and the .RData
Nsave <- N
save(datasave,Nsave,file=fnamesave)
# write.xlsx2(datasave,paste("foi_",Country[[ncountry]]$country,"_save2.xlsx",sep=""))
} # end of for (ncountry=1:Ncountry)