-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
469 lines (377 loc) · 24.8 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>iobio charts</title>
<style>
:root {
font-family: Arial;
//--iobio-data-color: orange;
//--iobio-data-color-secondary: blue;
}
body {
margin: 0;
}
#home-page, #help-page {
min-height: 100vh;
overflow: hidden;
}
main {
padding: 40px 10px 10px 10px;
box-sizing: border-box;
}
.read-depth-container {
height: 320px;
margin-bottom: 20px;
}
.bottom-container {
display: flex;
}
.percent-row {
display: flex;
width: 40%;
flex-wrap: wrap;
gap: 20px;
align-content: flex-start;
}
iobio-percent-box {
width: 220px;
height: 220px;
}
.histogram-column {
display: flex;
width: 60%;
flex-direction: column;
gap: 20px;
}
iobio-histogram {
height: 220px;
}
#bam2-header {
display: flex;
justify-content: space-between;
align-items: center;
height: 40px;
width: 100%;
background-color: #2d8fc1;
position: fixed;
top: 0;
z-index: 1;
padding: 0 10px;
box-sizing: border-box;
}
#home-button,
#help-button {
color: white;
font-size: 1.5em;
cursor: pointer;
}
#help-button:hover {
background-color: white;
color: #2d8fc1;
}
*:not(:defined) {
opacity: 0;
}
iobio-tab {
display: flex;
gap: 10px;
align-items: center;
justify-content: center;
}
.inline-container {
display: flex;
align-items: center;
}
iobio-percent-box.info-dialog-percent-box {
margin-right: 30px;
flex-shrink: 0;
width: 150px;
height: 150px;
}
span {
color: red;
}
</style>
</head>
<body>
<div id="bam2-header">
<h1 id="home-button" data-link="/">bam2.iobio</h1>
<span id="help-button" data-link="/help">Get Help</span>
</div>
<iobio-data-broker></iobio-data-broker>
<iobio-help-page class="route" id="help-page"></iobio-help-page>
<iobio-home-page class="route" id="home-page"></iobio-home-page>
<iobio-file-requirements-page class="route" id="file-requirements-page"></iobio-file-requirements-page>
<iobio-license-page class="route" id="license-page"></iobio-license-page>
<main id="main-content">
<iobio-bam-controls></iobio-bam-controls>
<div class='read-depth-container'>
<iobio-coverage-depth
label="Read Coverage">
</iobio-coverage-depth>
</div>
<div class='bottom-container'>
<div class="percent-row">
<iobio-panel>
<iobio-label-info-button label="Mapped Reads">
<div slot="content">
<p>The mapped reads chart shows how many of the reads in the sample were successfully mapped to the reference genome. Genetic variation, in particular structural variants, ensure that every sequenced sample is genetically different to the reference genome it was aligned to. If the sample differs only in a small number of single base pair changes (e.g. SNVs), the read will still likely map to the reference, but, for more significant variation, the read can fail to be placed. Therefore, it is not expected that the mapped reads rate will hit 100%, but it is expected to be high (usually >90%).</p>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[88866,267]"></iobio-percent-box>
<p>This is an example of a human, whole exome. In this case, 99.7% of the sampled reads map to the reference, corresponding to 88,666 actual reads. It is important to note that when the wheel is blue, only reads that have been assigned to a reference sequence are included. This means that the 0.3% of reads that are unmapped have a mate pair that successfully maps to the reference genome.</p>
</div>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[1108009142,8864073]" style="--data-color: rgb(9,176,135); --data-color-secondary: rgba(9,176,135,0.5);"></iobio-percent-box>
<p>For the case that both mates from paired end sequencing are unmapped, they appear at the end of the BAM file. Usually, the number of such unmapped reads can be obtained from the index file. When this is possible, the wheel will appear in green, as shown for this whole genome sample.</p>
</div>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[479405211,141424537]" style="--data-color: rgb(9,176,135); --data-color-secondary: rgba(9,176,135,0.5);"></iobio-percent-box>
<p>If the rate of mapped reads is low (usually below 90%), questions need to be asked about the sample to understand why so many reads are unmapped. The last example only has 77.2% of reads mapping to the reference genome for a whole genome sample. This was caused as the sample was contaminated with a significant amount of bacterial DNA; the DNA sample was obtained from a saliva sample, rather than a blood draw.</p>
</div>
</div>
</iobio-label-info-button>
<iobio-percent-box id="mapped-reads-chart" percent-key="mapped_reads" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
<iobio-panel>
<iobio-label-info-button label="Forward Strands">
<div slot="content">
<p>The forward strand chart shows the fraction of reads that map to the forward DNA strand. The general expectation is that the DNA library preparation step will generate DNA from the forward and reverse strands in equal amounts. After mapping the reads to the reference genome, approximately 50% of the reads will consequently map to the forward strand. If the observed rate is significantly different to 50%, this may be indicative of problems with the library preparation step.</p>
</div>
</iobio-label-info-button>
<iobio-percent-box percent-key="forward_strands" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
<iobio-panel>
<iobio-label-info-button label="Proper Pairs">
<div slot="content">
<p>A fragment consisting of two mates is called a proper pair if both mates map to the reference genome in a manner consistent with expectations. In particular, if the DNA library consists of fragments ~500 base pairs in length, and 100 base pair reads are sequenced from either end, the expectation would be that the two reads map to the reference genome separated by ~300 base pairs. If the sequenced sample contains large structural variants, e.g. a large insertion, reads mapping with a large separation would be a signal for this variant, and the reads would not be proper pairs. Based on the sequencing technology, there is also an expectation on the orientation of each read in the fragment.</p>
<p>When calculating the proper pair rate, pairs where both mates are unmapped are not included in the analysis. As a consequence, the rate of proper pairs is expected to be well over 90%; even if the mapping rate itself is low as a result of bacterial contamination, for example.</p>
</div>
</iobio-label-info-button>
<iobio-percent-box percent-key="proper_pairs" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
<iobio-panel>
<iobio-label-info-button label="Singletons">
<div slot="content">
<p>When working with paired-end sequencing, each DNA fragment is sequenced from both ends, creating two mates for each pair. If one mate in the pair successfully maps to the reference genome, but the other is unmapped, the mapped mate is a singleton. One way in which a singleton could occur would be if the sample has a large insertion compared with the reference genome; one mate can fall in sequence flanking the insertion and will be mapped, but the other falls in the inserted sequence and so cannot map to the reference genome. There are unlikely to many such structural variants in the sample, or sequencing errors that would could cause a read to not be able to map. Consequently, the singleton rate is expected to be very low (<1%).</p>
</div>
</iobio-label-info-button>
<iobio-percent-box percent-key="singletons" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
<iobio-panel>
<iobio-label-info-button label="Both Mates Mapped">
<div slot="content">
<p>When working with paired-end sequencing, each DNA fragment is sequenced from both ends, creating two mates for each pair. This chart shows the fraction of reads in pairs where both of the mates successfully map to the reference genome. When calculating this metric, pairs where both mates are unmapped are not included..</p>
</div>
</iobio-label-info-button>
<iobio-percent-box percent-key="both_mates_mapped" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
<iobio-panel>
<iobio-label-info-button label="Duplicates">
<div slot="content">
<p>PCR duplicates are two (or more) reads that originate from the same DNA fragment. When sequencing data is analysed, it is assumed that each observation (i.e. each read) is independent; an assumption that fails in the presence of duplicate reads. Typically, algorithms look for reads that map to the same genomic coordinate, and whose mates also map to identical genomic coordinates. It is important to note that as the sequencing depth increases, more reads are sampled from the DNA library, and consequently it is increasingly likely that duplicate reads will be sampled. As a result, the true duplicate rate is not independent of the depth, and they should both be considered when looking at the duplicate rate. Additionally, as the sequencing depth in increases, it is also increasingly likely that reads will map to the same location and be marked as duplicates, even when they are not. As such, as the sequencing depth approaches and surpasses the read length, the duplicate rate starts to become less indicative of problems.</p>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[2483,73425]"></iobio-percent-box>
<p>This is an example of the duplicate rate for a ~80X human whole genome. The expectation is that the duplicate rate is low (well below 10%), and consequently, this sample would be considered good.</p>
</div>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[401,100651]"></iobio-percent-box>
<p>If the median coverage drops to ~50X, the duplicate rate should be even lower.</p>
</div>
<div class="inline-container">
<iobio-percent-box class="info-dialog-percent-box" data="[54960,401167]"></iobio-percent-box>
<p>This is a different sample with ~50X coverage, but now the duplicate rate is much higher. This sample could well have problems at the library prep stage and should potentially be resequenced.</p>
</div>
</div>
</iobio-label-info-button>
<iobio-percent-box percent-key="duplicates" total-key="total_reads"></iobio-percent-box>
</iobio-panel>
</div>
<div class='histogram-column'>
<iobio-panel>
<iobio-label-info-button label="Read Coverage Distribution">
<div slot="content">
<p>This chart shows how read coverage is distributed, and the expected distribution is dependent on the type of sequencing data being visualized.</p>
<p>In a <span>Whole Genome Sequencing</span> experiment, the expectation is that the read coverage follows a Poisson distribution centred about the requested sequencing depth. The following example shows a high quality read coverage distribution for a sample sequenced to ~50X coverage. The distribution shows a nice Poisson distribution, and is centred around ~53X. (Note that the second scale at the bottom of the chart can be used to zoom in on desired parts of the distribution).</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-whole-genome.json"></iobio-histogram>
<p>Alternatively, if the distribution shows multiple peaks, isn't Poisson distributed, or is not centred around the expected coverage, it may be necessary to consider resequencing the sample, or at least, being aware that problems may arise in analysing the data. While the following distribution shows a median coverage around that expected (~80X), but with a significant portion of the genome at zero coverage and the multiple peaks, this would not be considered a good sample.</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-whole-genome-with-multiple-peaks.json"></iobio-histogram>
<p><span>Whole Exome Sequencing</span> relies on the targetted capture of DNA from the exome, followed by DNA amplification. This leads to large variation in the sequencing depth across exons, and consequently, the read coverage distribution is no longer expected to be Poisson distributed. When sampling across the entire genome, the majority of genomic regions will contain no sequencing reads as will are not exonic regions. This leads to a read coverage distribution overwhelmingly weighted to zero coverage as shown below.</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-exome.json"></iobio-histogram>
<p>To restrict sampling to exonic regions, select the default bed file in the top 'Exonic Regions'. It is also possible to select a custom bed file, if available. After selecting the default bed, the sequenced depth appears to be centred around ~50X, so if this is consistent with the requested depth, this sample would be considered good. The distribution above is updated to as shown below.</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-exome-with-bed.json"></iobio-histogram>
</div>
</iobio-label-info-button>
<iobio-histogram broker-key="coverage_hist"></iobio-histogram>
</iobio-panel>
<iobio-panel>
<iobio-tabs>
<iobio-tab slot="tab">
<iobio-label-info-button label="Fragment Length" icon-position="left" slot="tab-content">
<div slot="content">
<p>For paired end sequencing, DNA fragments are typically size selected to a uniform length and then sequenced from either end. Once the two mates are aligned back to the reference genome, the fragment length can be inferred from how far apart these two mates map. If the sequenced sample has a deletion or insertion relative to the reference, this will result in the two mates mapping closer together, or further apart than expected. Under the assumption that the sequenced sample has a relatively small number of insertions and deletions, we expect to see the fragment length follow a normal distribution.</p>
<p><span>Whole genome sequencing</span>, this is an example of the fragment length distribution for a high coverage (~80X) whole genome. The read lengths in this sample are 150bp, so a fragment can not be shorter than this value, consequently, we see a sharp cutoff at a fragment length of 150bp.</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-whole-genome-fragment-length.json"></iobio-histogram>
<p><span>Whole exome sequencing</span>, this is the fragment length distribution for a high coverage exome.</p>
<iobio-histogram data-url="https://s3.us-east-1.amazonaws.com/static.iobio.io/prod/bam.iobio.io/bam2.0/data-exome-fragment-length.json"></iobio-histogram>
</div>
</iobio-label-info-button>
</iobio-tab>
<iobio-tab-panel slot="panel">
<iobio-histogram broker-key="frag_hist" ignore-outliers></iobio-histogram>
</iobio-tab-panel>
<iobio-tab slot="tab">
<iobio-label-info-button label="Read Length" slot="tab-content">
<div slot="content">
<p>The read length is usually a very simple distribution. In most cases, the read length is fixed at a uniform length, e.g. 100 base pairs, or 150 base pairs etc. The read length distribution, therefore, tends to be a single spike at this read length. Depending on the sequencing technology used, this may not always be the case.</p>
</div>
</iobio-label-info-button>
</iobio-tab>
<iobio-tab-panel slot="panel">
<iobio-histogram broker-key="length_hist" ignore-outliers></iobio-histogram>
</iobio-tab-panel>
</iobio-tabs>
</iobio-panel>
<iobio-panel>
<iobio-tabs>
<iobio-tab slot="tab">
<iobio-label-info-button label="Mapping Quality" icon-position="left" slot="tab-content">
<div slot="content">
<p>The mapping quality distribution shows the Phred quality scores describing the probability that a read does not map to the location that it has been assigned to (specifically, Q=-log10(P), where Q is the Phred score and P is the probability the read is in the wrong location). So the larger the score, the higher the quality of the mapping. Some scores have specific meaning, e.g. a score of 0 means that the read could map equally to multiple places in the reference genome. The majority of reads should be well mapped and so we expect to see this distribution heavily skewed to large value (typically around 60). It is not unusual to see some scores around zero. Reads originating from repetitive elements in the genome will plausibly map to multiple locations.</p>
</div>
</iobio-label-info-button>
</iobio-tab>
<iobio-tab-panel slot="panel">
<iobio-histogram broker-key="mapq_hist"></iobio-histogram>
</iobio-tab-panel>
<iobio-tab slot="tab">
<iobio-label-info-button label="Base Quality" slot="tab-content">
<div slot="content">
<p>Similar to the mapping quality distribution, the base quality distribution shows the Phred quality scores describing the probability that a nucleotide has been incorrectly assigned; e.g. an error in the sequencing. Specifically, Q=-log10(P), where Q is the Phred score and P is the probability the nucleotide is wrong. The larger the score, the more confident we are in the base call. Depending on the sequencing technology, we can expect to see different distributions, but we expect to see a distribution skewed towards larger (more confident) scores; typically around 40.</p>
</div>
</iobio-label-info-button>
</iobio-tab>
<iobio-tab-panel slot="panel">
<iobio-histogram broker-key="baseq_hist"></iobio-histogram>
</iobio-tab-panel>
</iobio-tabs>
</iobio-panel>
</div>
</div>
</main>
<script type=importmap>
{
"imports": {
"d3": "https://cdn.jsdelivr.net/npm/d3@7/+esm",
"waygate": "https://cdn.jsdelivr.net/npm/waygate-js@0.5.0/index.js"
}
}
</script>
<script type='module' src='./lib/iobio-charts/index.js'></script>
<script type= 'module' src="./home_page.js"></script>
<script type= 'module' src="./file_requirement.js"></script>
<script type= 'module' src="./license_page.js"></script>
<script type= 'module' src="./help_page.js"></script>
<script type='module'>
import { parseBedFile } from './lib/iobio-charts/coverage/src/BamData.js';
import { initRouter } from './router.js'
import { getDataBroker } from './lib/iobio-charts/common.js';
initRouter()
const urlParams = new URLSearchParams(window.location.search);
const source = urlParams.get("source");
let apiUrl;
let alignmentUrl;
let indexUrl;
let bedUrl;
if (source) {
// Mosaic integration
const backendMap = {
"https://mosaic.chpc.utah.edu": "https://mosaic.chpc.utah.edu/gru/api/v1",
"https://mosaic-staging.chpc.utah.edu": "https://mosaic-staging.chpc.utah.edu/gru/api/v1",
};
apiUrl = backendMap[source];
const token = urlParams.get("access_token");
const project_id = urlParams.get("project_id");
const sample_id = urlParams.get("sample_id");
const mosaicRequest = mosaicRequesterForProject(source, project_id, token);
const files = await mosaicRequest(`/samples/${sample_id}/files`);
const alignmentFile = files.data.filter(f => (f.type == 'bam' || f.type == 'cram'))[0];
const indexFile = files.data.filter(f => (f.type == 'bai' || f.type == 'crai'))[0];
const bedFile = files.data.filter(f => (f.type == 'bam-bed'))[0];
const promises = [
mosaicRequest(`/files/${alignmentFile.id}/url`),
mosaicRequest(`/files/${indexFile.id}/url`),
];
if (bedFile) {
promises.push(mosaicRequest(`/files/${bedFile.id}/url`));
}
const [ alignmentUrlRes, indexUrlRes, bedUrlRes ] = await Promise.all(promises);
alignmentUrl = alignmentUrlRes.url
indexUrl = indexUrlRes.url;
if (bedUrlRes) {
bedUrl = bedUrlRes.url;
}
}
else {
alignmentUrl = urlParams.get("alignment-url");
indexUrl = urlParams.get("index-url");
bedUrl = urlParams.get("bed-url");
}
const broker = document.querySelector('iobio-data-broker');
if (apiUrl) {
broker.apiUrl = apiUrl;
}
broker.alignmentUrl = alignmentUrl;
broker.indexUrl = indexUrl;
if (bedUrl) {
broker.bedUrl = bedUrl;
}
const backendUrl = determineBackendUrl();
const readDepthComponent = document.querySelector('iobio-coverage-depth');
readDepthComponent.backendUrl = backendUrl;
function determineBackendUrl() {
// Determine backend URL dynamically
return 'https://backend.iobio.io'; // Default URL
}
function mosaicRequesterForProject(server, projectId, token) {
async function request(path) {
const url = `${server}/api/v1/projects/${projectId}${path}`;
const res = await fetch(url, {
headers: {
'Content-Type': 'application/json',
'Authorization': "Bearer " + token,
}
});
return res.json();
}
return request;
}
document.addEventListener('selected-regions-change', (event) => {
broker.regions = event.detail;
});
document.addEventListener('bed-file-selected', (event) => {
broker.bedText = event.detail.bedText;
});
document.addEventListener('bed-file-removed', (event) => {
broker.bedText = event.detail;
});
const broker_ = getDataBroker(broker);
broker_.addEventListener('mapped-reads-from-index-file', (event) => {
const chart = document.querySelector('#mapped-reads-chart');
// Determine the number of mapped reads and unmapped reads from the index file
if (event.detail.mappedReads && event.detail.unmappedReads) {
const val = event.detail.mappedReads;
const total = event.detail.mappedReads + event.detail.unmappedReads;
const data = [ val, total - val ];
chart.update(data);
chart.style.setProperty('--iobio-data-color', 'rgb(9,176,135)');
chart.style.setProperty('--iobio-data-color-secondary', 'rgba(9,176,135,0.5)');
} else {
chart.style.setProperty('--iobio-data-color', 'unset');
chart.style.setProperty('--iobio-data-color-secondary', 'unset');
}
});
</script>
</body>
</html>