-
Notifications
You must be signed in to change notification settings - Fork 0
/
IJMacroResultToPdf.R
380 lines (309 loc) · 16.1 KB
/
IJMacroResultToPdf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# R-script for ananlysis of SquaredAnt plates
# For plate design SA20191111
# This script is designed for 2 96well plates recorded in one recording station and assumes the orginal output
# from the Supplementary_file_3.zip ImageJ2 macro archive.
# written by Sam, 20191111, last modified on 20210821
##First read in the arguments listed at the command line
data_file <-""
args <- commandArgs(trailingOnly = TRUE)
data_file <- args[1]
#load lib PNG for the logo, install if not there?
#library(png)
#img <- readPNG(args[2])
skiplines <- 241
if (length(args) >2){
skiplines <- as.numeric(args[3])
}
#for non-command line testing purposes
#setwd("/Users/samlinsen/Dropbox/SquaredAnt_company/SquaredAnt_Private/4.company_docs/8.Directions/paper_XJTLU/progress/20210302/")
#data_file <- '20200710_OTC25_50.IJ.txt'
#get all the info from the imageJ output
input <- read.delim(file = data_file, header = F, skip = skiplines, nrows = 7, blank.lines.skip = FALSE)
sample_names <- strsplit(as.character(input[1,]),",")[[1]]
batch <- input[2,]
run_date <- input[3,]
comments <- input[4,]
persons <- input[5,]
timestamp <- date()
platform <- input[6,]
rec_station <- input[7,]
#get directory
directory <- data_file
if (grepl ('c:',directory, ignore.case = T)){
pattern <- sub("c:.+\\\\","",directory, fixed = F, ignore.case = T)
directory <- sub(pattern,"",directory)
} else {
pattern <- sub("^.+/","",directory, perl = T)
directory <- sub(pattern,"",directory)
}
#setwd
setwd(directory)
#strain names. Assumed is a plate layout where the first 2 columns are copied 6 times over one 96 well plate. A1 is a positive control; H2 is a negative (empty) control.
#Then, non-treated samples are in the first 2 columns (1,2), treated in the second (3,4), and again for column (5,6 vs 7,8) and (9,10 vs 11,12)
strains <- c('P1 MG1655','P1 L04','P1 B14','P1 K01','P1 C12','P1 H17','P1 I03','P1 A05','P1 F20','P1 N10','P1 C19','P1 C14','P1 F21','P1 G19','P1 A18','P1 NC','P2 MG1655','P2 L04','P2 B14','P2 K01','P2 C12','P2 H17','P2 I03','P2 A05','P2 F20','P2 N10','P2 C19','P2 C14','P2 F21','P2 G19','P2 A18','P2 NC')
#open file
data <- read.delim(file = data_file, h = T, nrows = skiplines - 1, blank.lines.skip = FALSE)
if (dim(data)[2] == 97 || dim(data)[2] == 193){
data<- data[,(2:193)] #if only one plate, change to 97
}
data <- na.exclude(data)
length_block <- dim(data)[1]
blocks <- seq(0,length_block,by=(length_block/3))
block <- blocks[2]
#open pdf
date_now <- strsplit(timestamp," ")[[1]]
output <- paste(date_now[1], date_now[2], date_now[3],pattern,"pdf", sep = '.')
pdf (file = output, width = 8, height = 8, bg = 'white')
#build frontpage
sample_names_fp<-c("Samples:", "", "Plate 1", as.character(sample_names[1:6]),"", "Plate 2", as.character(sample_names[7:12]))
sample_names_fp<- paste(sample_names_fp, sep = " ", collapes = "\n")
sample_names_fp <- toString(sample_names_fp)
sample_names_fp <- gsub(',','',sample_names_fp)
###plot it
plot (0,0, xlim = c(0,1), ylim = c(0,1), type = 'n', axes = F, xlab ="", ylab = "")
text(0.5,1,"SquaredAnt ABright WATER Sample Report", cex = 1, pos = 1)
text(0.5,0.95,paste("Created on:", timestamp, sep = " "), cex = 0.8, pos = 1)
text(0.5,0.9,paste("by:", persons, "with", rec_station, sep = " "), cex = 0.8, pos = 1)
text(0.2,0.7, sample_names_fp, cex = 0.7, pos = 1, adj = 0)
text(0.8,0.7,paste("Platform ID", platform, "; batch no", batch, sep = " "), cex = 0.7, pos = 1)
text(0.8,0.65,paste("Run on", run_date, sep = " "), cex = 0.7, pos = 1)
text(0.8,0.6, paste(strwrap(comments, width = 50), collapse = "\n"), cex = 0.7, pos = 1, adj = 0)
rasterImage(img, 0.65, 0.2, 0.65+0.3, 0.2+0.087)
#colors
blue <- data[1:blocks[2],]
green <- data[(blocks[2]+1):blocks[3],]
red <- data[(blocks[3]+1):length_block,]
#normalized differene
ndif <- (red - blue) / (red + green + blue)
colnames(ndif) <- NULL
#line colors from high to low
colors <- c('blue', 'red','yellow','green','brown','light blue')
#prepare legend text
legend_text <-c()
for (i in seq(1,96)){
legend_text <- cbind(legend_text, as.character(sample_names[1:6]))
}
for (i in seq(97,192)){
legend_text <- cbind(legend_text, as.character(sample_names[7:12]))
}
#plot time-series curves for each cell strain (=well)
par(mfrow = c(2,2))
#plot every 16th column
column_select <- rep(1:16, each = 2)
strain_title <- column_select
column_add <- rep(c(0,96),8)
column_select <- column_select + column_add
strain_title <- strain_title + rep(c(0,16),8)
strain_title <- strains[strain_title]
strain_title_no <- 0
for (i in column_select){
strain_title_no <- strain_title_no + 1
ylim <- c(min(ndif),max(ndif))
plot (0,0, xlim = c(0,block), ylim = ylim, type = 'n', axes = F, xlab = "measure point (nr)", ylab = 'signal strength, normalized', main = strain_title[strain_title_no], cex.main = 0.8, cex.lab = 0.8)
axis(side = 1, at = seq(1:block),labels = seq(1:block), cex.axis = .7, las = 3)
axis(side = 2, at = seq(ylim[1],ylim[2], by = 0.1 ),labels = round (seq(ylim[1],ylim[2], by = 0.1 ), 2), cex.axis = .7, las = 2)
legend(block, ylim[1], legend_text[,i], col = colors, fill = colors, cex = 0.5 , yjust = 0, xjust = 1)
for (j in seq(1:6)){
lines (ndif[,i + (16*(j-1))], col = colors[j])
}
}
#calculate Euclidian distances
#===
#get the median of the control values
majority<-function(x){
maxval <-10000
positions <-c()
for (i in c(1:(length(x)-1))){
for (j in c((i+1):length(x))){
if (abs(x[i]-x[j]) < maxval){
maxval <- abs(x[i]-x[j])
positions <- c(i,j)
}
}
}
return(mean(x[positions]))
}
# here we are taking the most alike control values and produce a matrix with 16 columns ("majority mean" of column
# 1, 3, 5 and column 2,4,6)
nontreated_p1 <- c()
treated_p1 <- c()
block <- dim(ndif)[1]
for (i in seq(1:16)){
nontreated_p1 <- cbind(nontreated_p1,apply(ndif[c(1:block),seq(i,80+i, by = 32)],1, majority)) #these are noon-treated samples
treated_p1 <- cbind(treated_p1,apply(ndif[c(1:block),seq(i+16,80+i, by = 32)],1, majority)) #these are treated samples
}
#...and repeat for plate 2
nontreated_p2 <- c()
treated_p2 <- c()
block <- dim(ndif)[1]
for (i in seq(1:16)){
nontreated_p2 <- cbind(nontreated_p2,apply(ndif[c(1:block),seq(i+96,80+96+i,by = 32)],1, majority))
treated_p2 <- cbind(treated_p2,apply(ndif[c(1:block),seq(i+96+16,80+96+i, by = 32)],1, majority))
}
#make a table that will be used to subtract and calculate distance. This is an array of 6 times the
#nontreated_p1 and then nontreated_p2 and can be normlized and subtracted from the whole table to give a liniear #distance between ctl and sample
pcsweep <- c() # all the "majority" non-treated values
for (i in seq(1:6)){
pcsweep <-cbind(pcsweep,nontreated_p1)
}
for (i in seq(1:6)){
pcsweep <-cbind(pcsweep,nontreated_p2)
}
ncsweep <- c() #each value represents the "majority" value of the negative control.
ncNontreated_p1<- c()
ncTreated_p1<- c()
for (i in seq(1:16)){
ncNontreated_p1 <-cbind(ncNontreated_p1, nontreated_p1[,16]) #treated
ncTreated_p1 <-cbind(ncTreated_p1, treated_p1[,16]) #non-treated
}
for (i in seq(1:3)){
ncsweep <- cbind(ncsweep, ncNontreated, ncTreated)
}
ncNontreated_p2<- c()
ncTreated_p2<- c()
for (i in seq(1:16)){
ncNontreated_p2 <-cbind(ncNontreated_p2, nontreated_p2[,16]) #treated
ncTreated_p2 <-cbind(ncTreated_p2, treated_p2[,16]) #non-treated
}
for (i in seq(1:3)){
ncsweep <- cbind(ncsweep, ncNontreated_p2, ncTreated_p2)
}
ndif_ed <- as.matrix(ndif)-ncsweep #normalize the data against empty controls. Basically we set the reference values to 0 ( as in the raw data, values may be negative)
#now we are going to find the maximum values of the non-treated samples and set these to 100%
sweep_row_p1 <- as.numeric(nontreated_p1[dim(nontreated_p1)[1],]) - ncNontreated_p1[dim(ncNontreated_p1)[1],] #maximum "majority" values minus negative control at the latest time point plate 1
sweep_row_p2 <- as.numeric(nontreated_p2[dim(nontreated_p2)[1],]) - ncNontreated_p2[dim(ncNontreated_p2)[1],] #maximum "majority" values minus negative control at the latest time point plate 2
sweep_row_p1[16] <- 1 #we set the NC well to 1, as we cannot divide by 0. These values are ignored later
sweep_row_p2[16] <- 1 #we set the NC well to 1, as we cannot divide by 0. These values are ignored later
sweep_row <- c(rep(sweep_row_p1,6), rep(sweep_row_p2,6)) #C1: comment out in order to use the biggest value in this plate as 100%. This reduces the impact of cells with a low signal.
#get the maximum value of the sweep row
#maxsweep_p1 <- max(sweep_row_p1[sweep_row_p1 != 1]) #C2: alternatively this can be commented out instead of C1. Get the maximum difference between a NC and non-treated control
#maxsweep_p2 <- max(sweep_row_p2[sweep_row_p2 != 1]) #C2: alternatively this can be commented out instead of C1. Get the maximum difference between a NC and non-treated control
ndif_ed2 <- ndif_ed
ndif_ed <- sweep(ndif_ed,2,sweep_row,"/") #C1: comment out in order to use the biggest value in this plate as 100%. This reduces the impact of cells with a low signal.
#ndif_ed[,1:96] <- ndif_ed[,1:96] / maxsweep_p1 #C2: alternatively this can be commented out instead of C1
#ndif_ed[,97:192] <- ndif_ed[,97:192] / maxsweep_p2 #C2: alternatively this can be commented out instead of C1
#now we have got a matrix that has been normalized against the NC values and divided by the maximal values of the non-treated samples
#it is time to calculate the distances
#first give the pcsweep the same treatment as ndif_ed (pcsweep is "majority" non-treated values, repeated 6x)
pcsweep <- pcsweep-ncsweep
pcsweep <- sweep(pcsweep,2,sweep_row,"/") #C1: comment out in order to use the biggest value in this plate as 100%. This reduces the impact of cells with a low signal.
#pcsweep[,1:96] <- pcsweep[,1:96] / maxsweep_p1 #C2: alternatively this can be commented out instead of C1
#pcsweep[,97:192] <- pcsweep[,97:192] / maxsweep_p2
#now pcsweep is a table with 6 repeats of the non-treated controls, normalized against the negative controls and divided by the maximum distance
#and ndif_ed is the original table, normalized and divided identically.
#if we substract pcsweep from ndif_ed, we find the differences between treated and non-treated, and we can also find variation witin the data
ndif_ed <- ndif_ed - pcsweep
#then we summarize all and get the Euclidian Distance
ndif_sum <- apply(ndif_ed, 2, sum)
ndif_ed_sq <- ndif_ed^2
ndif_ed_sq_sum <- apply(ndif_ed_sq, 2, sum)
ndif_ed_sq_sum_sqrt <- sqrt(ndif_ed_sq_sum)
ndif_ed_sq_sum_sqrt[ndif_sum>0]<-ndif_ed_sq_sum_sqrt[ndif_sum>0] * -1 #we assign a "negative" distance if the linear distance is > 0: then the signal in "treated" was higher
#last checks to get rid of low intensity
#ndif_ed_sq_sum_sqrt[sweep_row < 0.1] <- 0 # no growth
#here we take the first quartile of the values of each sample among the first measure points and create a matrix 192
#entries
firstMeasurement_p1 <- c()
firstMeasurement_p2 <- c()
for (i in seq(1:16)){
firstMeasurement_p1 <- c(firstMeasurement_p1 ,apply(ndif[1,seq(i,80+i,by = 16)],1,function(x){summary(x)[2]})) # 2 is the first quantile
firstMeasurement_p2 <- c(firstMeasurement_p2 ,apply(ndif[1,seq(i+96,80+96+i,by = 16)],1,function(x){summary(x)[2]}))
}
firstMeasurement_p1Columns <- c()
firstMeasurement_p2Columns <- c()
for (i in seq(1:6)){ #create table with all quartiles
firstMeasurement_p1Columns <-c(firstMeasurement_p1Columns,firstMeasurement_p1 )
firstMeasurement_p2Columns <-c(firstMeasurement_p2Columns,firstMeasurement_p2)
}
firstMeasurementsColumns <-c(firstMeasurement_p1Columns,firstMeasurement_p2Columns)
firstMeasurementsColumns <- as.numeric(firstMeasurementsColumns)
l<- as.numeric(ndif[1,]) < firstMeasurementsColumns - 0.1
ndif_ed_sq_sum_sqrt[l] <- 0
# in case the non-treated is below that level, we need to remove all the Eucliadian distances
for (i in c(seq(1,16))){
if (l[i] == T){
m <- i+ seq(0,80, by = 16)
# ndif_ed_sq_sum_sqrt[m] <- 0
}
}
sd_ed <- sd(abs(ndif_ed_sq_sum_sqrt[ndif_ed_sq_sum_sqrt!=0])) #set some sort of threshold to show higher_then_expected EDs
#(assume positive ed value as this is per definition the case)
thresh <- round (mean(abs(ndif_ed_sq_sum_sqrt[ndif_ed_sq_sum_sqrt!=0]))+2*sd_ed ,digits = 2)
print(thresh)
#plot it
layout(matrix(c(1,1,1,1,1,2), nrow = 1, ncol = 6, byrow = TRUE))
plot (0,0, xlim = c(0,15), ylim = c(0,6), type = 'n', axes = F, xlab ="", ylab = "", main = "Euclidian Distance Measure from Blank")
par(mgp = c(0, -2, 0))
for (i in seq(1,15)){
for (j in seq(0,5)){
plotcex <- ndif_ed_sq_sum_sqrt[i+(16*j)]
if (plotcex > 0){
points (i,6-j,pch = 25, cex = plotcex, bg = 'black' )
}
if (plotcex < 0){
points (i,6-j,pch = 24, cex = plotcex * -1, bg = 'black' )
}
axis(1,at = seq(1,15), labels = strains[1:15], las = 2, cex.axis = .5, lwd = 0, font = 3)
axis(2,at = seq(1,6), labels = rev(sample_names[1:6]), las = 2, cex.axis = .5, lwd = 0)
}
}
#pot legend
plot (0,0, xlim = c(0,4), ylim = c(0,25), type = 'n', axes = F, xlab ="", ylab = "", main = "ED Legend")
cexlegend <- seq(0.5,4,by = 0.5)
cexlegend <- sort(c(cexlegend, thresh))
collegend <- rep('white', 9)
for (i in seq(1,9)){
thiscex <- cexlegend[i]
if (thiscex >= thresh){
points (1, 25-i+1, pch = 25, cex = thiscex, bg = 'red' )
}
if (thiscex < thresh){
points (1, 25-i+1, pch = 25, cex = thiscex , bg = 'black' )
}
text (2.5, 25-i+1 , cexlegend[i])
}
#summarize the data
block <- c(1:16)
ctl_plate1 <- rbind(ndif_ed_sq_sum_sqrt[block], ndif_ed_sq_sum_sqrt[block+32], ndif_ed_sq_sum_sqrt[block+64])
sam_plate1 <- rbind(ndif_ed_sq_sum_sqrt[block+16], ndif_ed_sq_sum_sqrt[block+32+16], ndif_ed_sq_sum_sqrt[block+64+16])
ctl_plate2 <- rbind(ndif_ed_sq_sum_sqrt[block+96], ndif_ed_sq_sum_sqrt[block+128], ndif_ed_sq_sum_sqrt[block+160])
sam_plate2 <- rbind(ndif_ed_sq_sum_sqrt[block+96+16], ndif_ed_sq_sum_sqrt[block+128+16], ndif_ed_sq_sum_sqrt[block+160+16])
ctl_plate1_avg <- apply (ctl_plate1,2,majority)
ctl_plate1_sd <- apply (abs(ctl_plate1),2,sd)
sam_plate1_avg <- apply (sam_plate1,2,majority)
sam_plate1_sd <- apply (abs(sam_plate1),2,sd)
ctl_plate2_avg <- apply (ctl_plate2,2,majority)
ctl_plate2_sd <- apply (abs(ctl_plate2),2,sd)
sam_plate2_avg <- apply (sam_plate2,2,majority)
sam_plate2_sd <- apply (abs(sam_plate2),2,sd)
cat(data_file,file="outfile.txt", sep="\n", append=TRUE)
cat(sam_plate1_avg,file="outfile.txt", sep="\t", append=TRUE)
cat('',file="outfile.txt", sep="\n", append=TRUE)
cat(sam_plate1_sd ,file="outfile.txt", sep="\t", append=TRUE)
cat('',file="outfile.txt", sep="\n", append=TRUE)
avg_sd_data <- c(ctl_plate1_avg,ctl_plate1_sd,sam_plate1_avg,sam_plate1_sd,ctl_plate2_avg,ctl_plate2_sd,sam_plate2_avg,sam_plate2_sd)
#avg_sd_data <- c(ctl_plate1_avg,ctl_plate1_sd,sam_plate1_avg,sam_plate1_sd)
#plot the averages and the SDs
plot (0,0, xlim = c(0,15), ylim = c(0,8), type = 'n', axes = F, xlab ="", ylab = "", main = "Average and SD Euclidian Distance Measure from Blank")
par(mgp = c(0, -2, 0))
for (i in seq(1,15)){
for (j in seq(0,6,by = 2)){
#for (j in seq(0,2,by = 2)){
plotcex <- avg_sd_data[i+(16*j)]
if (plotcex > 0){
points (i,8-j,pch = 25, cex = plotcex, bg = 'black' )
}
if (plotcex < 0){
points (i,8-j,pch = 24, cex = plotcex * -1, bg = 'black' )
}
meanval <- avg_sd_data[i+(16*j)]
text (i,8-j-0.5,labels = round(meanval,2))
sdval <- avg_sd_data[i+(16*(j+1))]
text (i,8-j-1,labels = round(sdval,2))
axis(1,at = seq(1,15), labels = strains[1:15], las = 2, cex.axis = .5, lwd = 0, font = 3)
axis(2,at = seq(2,8, by = 2), labels = rev(sample_names[c(1,2,7,8)]), las = 2, cex.axis = .5, lwd = 0)
axis(2,at = seq(1.5,7.5, by = 2), labels = c("mean", "mean","mean","mean"), las = 2, cex.axis = .5, lwd = 0)
axis(2,at = seq(1,7, by = 2), labels = c("SD", "SD","SD","SD"), las = 2, cex.axis = .5, lwd = 0)
}
}
dev.off()