Merge pull request #3 from levitsky/minihackathon-1

Updates to teaching module
hds-sandbox · Nov 12, 2024 · 5b2d52a · 5b2d52a
2 parents f16cf44 + 282858c
commit 5b2d52a
Show file tree

Hide file tree

Showing 4 changed files with 222 additions and 31 deletions.
diff --git a/css/styles.css b/css/styles.css
@@ -76,3 +76,8 @@ body {
 .download-button:hover {
   background-color: #0056b3; /* Darker blue on hover */
 }
+
+table.personal-details tr td:nth-child(1) {
+  vertical-align: middle;
+  font-weight: bold;
+}
diff --git a/fragger.params b/fragger.params
@@ -0,0 +1,145 @@
+database_name = /work/BC_data_test/2024-06-07-decoys-reviewed-contam-UP000005640.fas			# Path to the protein database file in FASTA format.
+num_threads = 3			# Number of CPU threads to use.
+
+precursor_mass_lower = -10			# Lower bound of the precursor mass window.
+precursor_mass_upper = 10			# Upper bound of the precursor mass window.
+precursor_mass_units = 1			# Precursor mass tolerance units (0 for Da, 1 for ppm).
+data_type = 0			# Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA, 3 for DDA+).
+precursor_true_tolerance = 20			# True precursor mass tolerance (window is +/- this value).
+precursor_true_units = 1			# True precursor mass tolerance units (0 for Da, 1 for ppm).
+fragment_mass_tolerance = 20			# Fragment mass tolerance (window is +/- this value).
+fragment_mass_units = 1			# Fragment mass tolerance units (0 for Da, 1 for ppm).
+calibrate_mass = 2			# Perform mass calibration (0 for OFF, 1 for ON, 2 for ON and find optimal parameters, 4 for ON and find the optimal fragment mass tolerance).
+use_all_mods_in_first_search = 0			# Use all variable modifications in first search (0 for No, 1 for Yes).
+decoy_prefix = rev_			# Prefix of the decoy protein entries. Used for parameter optimization only.
+
+deisotope = 1			# Perform deisotoping or not (0=no, 1=yes and assume singleton peaks single charged, 2=yes and assume singleton peaks single or double charged).
+deneutralloss = 1			# Perform deneutrallossing or not (0=no, 1=yes).
+isotope_error = -1/0/1/2/3			# Also search for MS/MS events triggered on specified isotopic peaks.
+mass_offsets = 0.0			# Creates multiple precursor tolerance windows with specified mass offsets.
+mass_offsets_detailed = 			# Optional detailed mass offset list. Overrides mass_offsets if use_detailed_offsets = 1.
+use_detailed_offsets = 0			# Whether to use the regular (0) or detailed (1) mass offset list.
+precursor_mass_mode = selected			# One of isolated/selected/corrected.
+
+remove_precursor_peak = 1			#  Remove precursor peaks from tandem mass spectra. 0 = not remove; 1 = remove the peak with precursor charge; 2 = remove the peaks with all charge states (only for DDA mode).
+remove_precursor_range = -1.500000,1.500000			# m/z range in removing precursor peaks. Only for DDA mode. Unit: Th.
+intensity_transform = 0			# Transform peaks intensities with sqrt root. 0 = not transform; 1 = transform using sqrt root.
+activation_types = all			# Filter to only search scans of provided activation type(s). Allowed: All, HCD, CID, ETD, ECD.
+analyzer_types = all			# Filter to only include scans matching the provided analyzer type(s) in search. Only support the mzML and raw format. Allowed types: all, FTMS, ITMS.
+group_variable = 0			# Specify the variable used to decide the PSM group in the group FDR estimation. 0 = no group FDR; 1 = num_enzyme_termini; 2 = PE from protein header.
+require_precursor = 1			# If required, PSMs with no precursor peaks will be discarded. For DIA data type only. 0 = no, 1 = yes.
+reuse_dia_fragment_peaks = 0			# Allow the same peak matches to multiple peptides. For DIA data type only. 0 = no, 1 = yes.
+
+write_calibrated_mzml = 0			# Write calibrated MS2 scan to a mzML file (0 for No, 1 for Yes).
+write_uncalibrated_mgf = 0			# Write uncalibrated MS2 scan to a MGF file (0 for No, 1 for Yes). Only for .raw and .d formats.
+write_mzbin_all = 0
+mass_diff_to_variable_mod = 0			# Put mass diff as a variable modification. 0 for no; 1 for yes and remove delta mass; 2 for yes and keep delta mass.
+
+localize_delta_mass = 0			# Include fragment ions mass-shifted by unknown modifications (recommended for open and mass offset searches) (0 for OFF, 1 for ON).
+delta_mass_exclude_ranges = (-1.5,3.5)			# Exclude mass range for shifted ions searching.
+fragment_ion_series = b,y			# Ion series used in search, specify any of a,b,c,x,y,z,Y,b-18,y-18 (comma separated).
+ion_series_definitions = 			# User defined ion series. Example: "b* N -17.026548;b0 N -18.010565".
+
+labile_search_mode = off			# type of search (nglycan, labile, or off). Off means non-labile/typical search.
+restrict_deltamass_to = all			# Specify amino acids on which delta masses (mass offsets or search modifications) can occur. Allowed values are single letter codes (e.g. ACD) and '-', must be capitalized. Use 'all' to allow any amino acid.
+diagnostic_intensity_filter = 0			# [nglycan/labile search_mode only]. Minimum relative intensity for SUM of all detected oxonium ions to achieve for spectrum to contain diagnostic fragment evidence. Calculated relative to spectrum base peak. 0 <= value.
+Y_type_masses = 			#  [nglycan/labile search_mode only]. Specify fragments of labile mods that are commonly retained on intact peptides (e.g. Y ions for glycans). Only used if 'Y' is included in fragment_ion_series.
+diagnostic_fragments = 			# [nglycan/labile search_mode only]. Specify diagnostic fragments of labile mods that appear in the low m/z region. Only used if diagnostic_intensity_filter > 0.
+remainder_fragment_masses = -18.01056/79.96633			# [labile search_mode only] List of possible remainder fragment ions to consider. Remainder masses are partial modification masses left on b/y ions after fragmentation.
+
+search_enzyme_name_1 = stricttrypsin			# Name of the first enzyme.
+search_enzyme_cut_1 = KR			# First enzyme's cutting amino acid.
+search_enzyme_nocut_1 = 			# First enzyme's protecting amino acid.
+search_enzyme_sense_1 = C			# First enzyme's cutting terminal.
+allowed_missed_cleavage_1 = 1			# First enzyme's allowed number of missed cleavages per peptide. Maximum value is 5.
+
+search_enzyme_name_2 = null			# Name of the second enzyme.
+search_enzyme_cut_2 = 			# Second enzyme's cutting amino acid.
+search_enzyme_nocut_2 = 			# Second enzyme's protecting amino acid.
+search_enzyme_sense_2 = C			# Second enzyme's cutting terminal.
+allowed_missed_cleavage_2 = 2			# Second enzyme's allowed number of missed cleavages per peptide. Maximum value is 5.
+
+num_enzyme_termini = 2			# 0 for non-enzymatic, 1 for semi-enzymatic, and 2 for fully-enzymatic.
+
+clip_nTerm_M = 1			# Specifies the trimming of a protein N-terminal methionine as a variable modification (0 or 1).
+
+# maximum of 16 mods - amino acid codes, * for any amino acid, [ and ] specifies protein termini, n and c specifies peptide termini
+variable_mod_01 = 15.9949 M 3
+variable_mod_02 = 42.0106 [^ 1
+# variable_mod_03 = 79.96633 STY 3
+# variable_mod_04 = -17.0265 nQnC 1
+# variable_mod_05 = -18.0106 nE 1
+variable_mod_06 = 229.16293 n^ 1
+variable_mod_07 = 229.16293 S 1
+# variable_mod_08 = 0.0 site_08 1
+# variable_mod_09 = 0.0 site_09 1
+# variable_mod_10 = 0.0 site_10 1
+# variable_mod_11 = 0.0 site_11 1
+# variable_mod_12 = 0.0 site_12 1
+# variable_mod_13 = 0.0 site_13 1
+# variable_mod_14 = 0.0 site_14 1
+# variable_mod_15 = 0.0 site_15 1
+# variable_mod_16 = 0.0 site_16 1
+
+allow_multiple_variable_mods_on_residue = 0
+max_variable_mods_per_peptide = 3			# Maximum total number of variable modifications per peptide.
+max_variable_mods_combinations = 5000			# Maximum number of modified forms allowed for each peptide (up to 65534).
+
+output_format = pepXML_pin			# File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin).
+output_report_topN = 1			# Reports top N PSMs per input spectrum.
+output_max_expect = 50			# Suppresses reporting of PSM if top hit has expectation value greater than this threshold.
+report_alternative_proteins = 1			# Report alternative proteins for peptides that are found in multiple proteins (0 for no, 1 for yes).
+
+precursor_charge = 1 4			# Assumed range of potential precursor charge states. Only relevant when override_charge is set to 1.
+override_charge = 0			# Ignores precursor charge and uses charge state specified in precursor_charge range (0 or 1).
+
+digest_min_length = 7			# Minimum length of peptides to be generated during in-silico digestion.
+digest_max_length = 50			# Maximum length of peptides to be generated during in-silico digestion.
+digest_mass_range = 200.0 5000.0			# Mass range of peptides to be generated during in-silico digestion in Daltons.
+max_fragment_charge = 2			# Maximum charge state for theoretical fragments to match (1-4).
+
+track_zero_topN = 0			# Track top N unmodified peptide results separately from main results internally for boosting features.
+zero_bin_accept_expect = 0			# Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value.
+zero_bin_mult_expect = 1			# Multiplies expect value of PSMs in the zero-bin during  results ordering (set to less than 1 for boosting).
+
+check_spectral_files = 1			# Checking spectral files before searching.
+minimum_peaks = 15			# Minimum number of peaks in experimental spectrum for matching.
+use_topN_peaks = 150			# Pre-process experimental spectrum to only use top N peaks.
+min_fragments_modelling = 2			# Minimum number of matched peaks in PSM for inclusion in statistical modeling.
+min_matched_fragments = 4			# Minimum number of matched peaks for PSM to be reported.
+min_sequence_matches = 2			# [nglycan/labile search_mode only] Minimum number of sequence-specific (not Y) ions to record a match.
+minimum_ratio = 0.01			# Filters out all peaks in experimental spectrum less intense than this multiple of the base peak intensity.
+clear_mz_range = 125.5 131.5			# Removes peaks in this m/z range prior to matching.
+
+add_Cterm_peptide = 0.0
+add_Nterm_peptide = 0.0
+add_Cterm_protein = 0.0
+add_Nterm_protein = 0.0
+
+add_G_glycine = 0.0
+add_A_alanine = 0.0
+add_S_serine = 0.0
+add_P_proline = 0.0
+add_V_valine = 0.0
+add_T_threonine = 0.0
+add_C_cysteine = 57.02146
+add_L_leucine = 0.0
+add_I_isoleucine = 0.0
+add_N_asparagine = 0.0
+add_D_aspartic_acid = 0.0
+add_Q_glutamine = 0.0
+add_K_lysine = 229.16293
+add_E_glutamic_acid = 0.0
+add_M_methionine = 0.0
+add_H_histidine = 0.0
+add_F_phenylalanine = 0.0
+add_R_arginine = 0.0
+add_Y_tyrosine = 0.0
+add_W_tryptophan = 0.0
+add_B_user_amino_acid = 0.0
+add_J_user_amino_acid = 0.0
+add_O_user_amino_acid = 0.0
+add_U_user_amino_acid = 0.0
+add_X_user_amino_acid = 0.0
+add_Z_user_amino_acid = 0.0
+
diff --git a/javascripts/answers.js b/javascripts/answers.js
@@ -44,7 +44,14 @@ document.getElementById('downloadBtn').addEventListener('click', function() {
   let answersHTML = `
     <div style="font-family: Arial, sans-serif; padding: 20px;">
       <h1 style="text-align: center; font-size: 24px;">Assignment Submission</h1>
-  `;
+      <h2 style="font-size: 18px;">Submitter</h2>
+      <table>`;
+
+  const table = document.querySelector('table.personal-details');
+  for (var i = 0, row; row = table.rows[i]; i++) {
+     answersHTML += `<tr><td style="font-weight: bold;">${row.cells[0].textContent}</td><td>${row.cells[1].children[0].value}</td></tr>`;
+  }
+  answersHTML += `</table>`;
 
   questions.forEach((question, index) => {
     const questionNumber = index + 1; // Incrementing index for question number

diff --git a/teachingmodule.qmd b/teachingmodule.qmd
@@ -77,14 +77,36 @@ Before delving into the actual analysis of the data in FragPipe, we must initial
 
 To better understand the paper, we have formulated some questions that should help clarify the study design, aim, and overall scope of the work. These questions are listed below:
 
-- …
-- …
-- …
+::: {.question}
+What are the primary research goals of this study?
+:::
+
+::: {.question}
+How is the study design structured to achieve the study objective?
+:::
+
+::: {.question}
+What role does LC-MS/MS play in this study, and why was it chosen as a method?
+:::
+
+::: {.question}
+What novel insights were gained from the proteogenomic approach used in this study?
+:::
 
 
 ### Supplementary Data
 
-Before we proceed and download the data available from the paper, we must first delve into some of the details in Supplementary Data 1. This is a large table containing the quantitative proteome data from the Oslo2 breast cancer cohort, which includes 45 subgroups of cancer tumors and relates to Figures 1-6 in the paper.
+Before we proceed and download the data available from the paper, we must first delve into some of the details in Supplementary Data.
+Your first task is to look through the Supplementary Data and find the annotation relating the tumor types to the isotopic labels used.
+
+::: {.question}
+How did you find the required information? Describe the steps you took. How much time did it take?
+:::
+
+::: {.callout-note collapse="true"}
+#### Hint
+If you have not been able to find the annotations, open Supplementary Data 1, and then go to the tab called "Tumor annotations".
+:::
 
 To better understand the supplementary data, we have prepared guiding questions to aid in interpreting the table. You can find the supplementary data in the paper [here](https://www-nature-com.proxy1-bib.sdu.dk/articles/s41467-019-09018-y#Sec15).
 
@@ -107,7 +129,10 @@ Briefly describe TMT-labeled mass spectrometry proteomics data and explain the e
 #### TMT10-Tags and Tumor IDs
 
 Fill in the table below by entering the corresponding TMT10-tags and Tumor IDs. Once submitted, you'll get instant feedback on whether your input is correct.
+
+::: {.callout-note}
 When you have filled the whole table correctly, download the annotation files, **you will need them later**.
+:::
 
 <form id="tmtForm">
   <table style="width: 100%; border-collapse: collapse;">
@@ -387,32 +412,40 @@ Next, we can launch FragPipe, which is located on the desktop. In this tutorial,
 Now that we have launched FragPipe, we need to configure the settings prior to running the analysis. Therefore, we have provided some guiding questions to help you set up the settings in FragPipe:
 
 ### Getting started with FragPipe
+
+::: {.callout-note}
+Some of the information you will need in this section can be found in **Supplementary Information** to the study. Open the **Supplementary Information**  and go to page 25, **Supplementary Methods**.
+:::
+
 Go to the “Workflow” tab to set up the workflow for the analysis and import the data you have just downloaded.
 
 ::: {.question}
-Which workflow should you select? HINT: How many TMT tags are listed in the table in Supplementary Data 1?
+Which workflow should you select? **Hint**: What labeling method was used in the study?
 :::
 
 
-Click ‘Load workflow’ after you have found and selected the correct workflow to be used.
+::: {.question}
+How does the labeling method affect data processing?
+:::
+
+Click "Load workflow" after you have found and selected the correct workflow to be used.
 
 Next, add your files by clicking on “Add files” and locate them in the designated folder for your raw files that you previously created. Assign each file to a separate *experiment* by clicking "Consecutive".
 
 Go to the "Quant (Isobaric)" tab. Here, you need to provide annotations for TMT channels. Use the five pool annotations that you downloaded from this page.
 You will need to upload them to Ucloud and specify the corresponding annotation file for each experiement in order.
 
-Now you should relocate to the “Database” tab. Here you can either download or browse for an already preexisting database file. In this case, we will simply download the latest database file by clicking the "Download" button in FragPipe.
+Now you should relocate to the “Database” tab. Here you can either download or browse for an already preexisting database file.
+In this case, we will simply download the latest database file by clicking the "Download" button in FragPipe. Add contaminants and decoys.
 
 ::: {.question}
 What is the purpose of the database file used in FragPipe, and why is it important?
 :::
 
-
 ::: {.question}
 Which organism should you choose when downloading the database file?
 :::
 
-
 ::: {.question}
 Describe the relationship between decoys and false discovery rate (FDR) by answering the following questions:
 
@@ -423,28 +456,29 @@ Describe the relationship between decoys and false discovery rate (FDR) by answe
 </ul>
 :::
 
+Next, you can go to the "MSFragger" tab to adjust the parameter settings for the search and matching of the theoretical and experimental peptide spectra.
+The search parameters to be used are listed in **Supplementary Methods**.
 
-Next, you can go to the MSFragger tab to adjust the parameter settings for the search and matching of the theoretical and experimental peptide spectra.
-
-Most of the settings used for MSFragger can be obtained from the paper *NAME OF PAPER*, which is referred to in the Methods and Materials section.
+::: {.question}
+What parameters did you set?
+:::
 
 When all settings have been obtained, MSFragger should look something like this:
 
 ::: {.question}
-What is MSFragger?
+What is MSFragger? What does it do?
 :::
 
 
 ::: {.question}
 How does MSFragger operate?
 :::
 
-
-::: {.question}
-Why is it essential to run MSFragger for this analysis?
+::: {.callout-note}
+You can also skip configuring MSFragger manually and just use this [parameter file](https://github.com/hds-sandbox/proteomics-sandbox/blob/webpage/fragger.params).
+You will need to upload it to UCloud and then load it on the "MSFragger" tab in FragPipe.
 :::
 
-
 Finally, we can navigate to the “Run” tab and run the analysis. For that, we must choose an output directory for the results of the search made by FragPipe. Once you have adjusted that, you are ready to click on “Run”.
 
 This process might take some time, so make sure that you still have enough hours allocated on your job on UCloud—otherwise, it will get terminated. Meanwhile, you can answer these questions:
@@ -460,7 +494,7 @@ Can the output from this analysis be reliably used for downstream applications g
 
 
 ::: {.question}
-What does it signify that the sample tissues have been fractionated as described in *PAPER*?
+What does it signify that the sample tissues have been fractionated as described in **Supplementary Information**?
 :::
 <ul>
     <li>Outline the fractionation process utilized.</li>
@@ -523,21 +557,21 @@ Guide on how to use Jupyter Notebook in the Proteomics Sandbox Application (git
 
 ## Personal Details
 
-**Name:**  
-<textarea id="name" placeholder="Type your name here..." class="input-box"></textarea>
-
-**Email:**  
-<textarea id="email" placeholder="Type your email here..." class="input-box"></textarea>
++---------------+---------------------------------------------------------------------------------------------------+
+|Name           | <textarea id="name" placeholder="Type your name here..." class="input-box"></textarea>            |
++---------------+---------------------------------------------------------------------------------------------------+
+|Email          | <textarea id="email" placeholder="Type your email here..." class="input-box"></textarea>          |
++---------------+---------------------------------------------------------------------------------------------------+
+|Course/Program | <textarea id="course" placeholder="Type your course/program here..." class="input-box"></textarea>|
++---------------+---------------------------------------------------------------------------------------------------+
+|Date           | <textarea id="date" placeholder="Type the date here..." class="input-box"></textarea>             |
++---------------+---------------------------------------------------------------------------------------------------+
 
-**Course/Program:**  
-<textarea id="course" placeholder="Type your course/program here..." class="input-box"></textarea>
-
-**Date:**  
-<textarea id="date" placeholder="Type the date here..." class="input-box"></textarea>
+: Please fill in your information {.personal-details}
 
 <button id="downloadBtn" class="download-button">Download Your Answers as PDF</button>
 
-<script src="javascripts/answers.js"/>
-
 <script src="https://cdnjs.cloudflare.com/ajax/libs/html2pdf.js/0.9.3/html2pdf.bundle.min.js"></script>
 
+<script src="javascripts/answers.js"/>
+