diff --git a/.gitignore b/.gitignore index 6380226b..33d695a8 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,6 @@ npm-debug.log yarn-error.log /.idea /.vscode -/resources/ops/kubernetes/secret.yml \ No newline at end of file +/resources/ops/kubernetes/secret.yml +docs/.vitepress/dist +docs/.vitepress/cache \ No newline at end of file diff --git a/docs/.vitepress/config.mjs b/docs/.vitepress/config.mjs new file mode 100644 index 00000000..cda4fd53 --- /dev/null +++ b/docs/.vitepress/config.mjs @@ -0,0 +1,161 @@ +import { defineConfig } from 'vitepress' + +// https://vitepress.dev/reference/site-config +export default defineConfig({ + title: "nmrXiv", + description: "FAIR, consensus-driven NMR data repository and computational platform", + ignoreDeadLinks: true, + + themeConfig: { + logo: { + light: "/logo.svg", + dark: "/logo-dark.svg", + alt : "nmrXiv" + }, + + siteTitle: "", + + // https://vitepress.dev/reference/default-theme-config + nav: [ + { text: 'Home', link: '/introduction/intro.md' }, + { text: 'Guides', link: '/developer-guides/architecture.md' } + ], + + sidebar: [ + { + text: 'Getting Started', + items: [ + { text: 'Overview', link: '/introduction/intro.md' }, + { text: 'Data', + items: [ + { text: 'File Formats', link: '/introduction/data/formats.md'}, + { text: 'Ontologies', link: '/introduction/data/ontologies.md'}, + { text: 'Schemas', link: '/introduction/data/schemas.md'}, + { text: 'Exemplary Data', link: '/introduction/data/exemplary-data.md'} + ] + } + ], + }, + { + text: 'Submission Guides', + items: [ + { text: 'Data - Life cycle', link: '/submission-guides/data-lifecycle.md' }, + { text: 'Registration', link: '/submission-guides/registration.md' }, + { text: 'Data Models', + items: [ + { text: 'Project', link: '/submission-guides/data-model/project.md'}, + { text: 'Sample/Study', link: '/submission-guides/data-model/study.md'}, + { text: 'Spectra Dataset', link: '/submission-guides/data-model/dataset.md'}, + { text: 'Team', link: '/submission-guides/data-model/team.md'}, + { text: 'Sharing', link: '/submission-guides/data-model/sharing.md'} + ] + }, + { text: 'Submission Process', + items: [ + { text: 'OAuth', link: '/developer-guides/configurations/oauth.md'}, + { text: 'Storage', link: '/developer-guides/configurations/storage.md'} + ] + }, + { text: 'Spectra', link: '/developer-guides/architecture.md' }, + { text: 'Licenses', link: '/submission-guides/licenses.md' } + ], + }, + { + text: 'Developers Guides', + items: [ + { text: 'Architecture', link: '/developer-guides/architecture.md' }, + { text: 'Installation', + items: [ + { text: 'macOS', link: '/developer-guides/installation/mac.md'}, + { text: 'Windows', link: '/developer-guides/installation/windows.md'}, + { text: 'Ubuntu', link: '/developer-guides/installation/ubuntu.md'}, + { text: 'Centos', link: '/developer-guides/installation/centos.md'}, + { text: 'Development Workflow', link: '/developer-guides/installation/development-workflow.md'} + ] + }, + { text: 'Configuration', + items: [ + { text: 'OAuth', link: '/developer-guides/configurations/oauth.md'}, + { text: 'Storage', link: '/developer-guides/configurations/storage.md'} + ] + }, + { text: 'Deployment', + items: [ + { text: 'CI/CD', link: '/developer-guides/deployment/ci-cd.md'}, + { text: 'GKE', link: '/developer-guides/deployment/gke.md'}, + { text: 'Helm', link: '/developer-guides/deployment/helm.md'}, + { text: 'Production', link: '/developer-guides/deployment/production.md'}, + { text: 'Environments', link: '/developer-guides/deployment/environment.md'} + ] + }, + { text: 'Code Contribution Guidelines', link: '/developer-guides/code-contribution-guidelines.md' }, + { text: 'API', link: '/developer-guides/api.md' } + ], + }, + { + text: 'Advance Guides', + items: [ + { text: 'NMRium', link: '/advanced-guides/nmrium/nmrium.md' }, + { text: 'NMR Repositories Overview', + items: [ + { text: 'Background', link: '/advanced-guides/nmr-repositories/background.md'}, + { text: 'Data Sanitisation and Missing Values', link: '/advanced-guides/nmr-repositories/sanitisation.md'}, + { text: 'Dimensionality', link: '/advanced-guides/nmr-repositories/dimensionality.md'}, + { text: 'Spectrometer Frequency', link: '/advanced-guides/nmr-repositories/frequency.md'}, + { text: 'Atomic Nuclei', link: '/advanced-guides/nmr-repositories/nuclei.md'}, + { text: 'Temperature', link: '/advanced-guides/nmr-repositories/temperature.md'}, + { text: 'Solvent', link: '/advanced-guides/nmr-repositories/solvent.md'}, + { text: 'Instruments', link: '/advanced-guides/nmr-repositories/instrument.md'}, + { text: 'pH', link: '/advanced-guides/nmr-repositories/ph.md'}, + { text: 'Organism', link: '/advanced-guides/nmr-repositories/organism.md'}, + { text: 'Organism Part', link: '/advanced-guides/nmr-repositories/part.md'}, + { text: 'Variant', link: '/advanced-guides/nmr-repositories/variant.md'} + + ] + }, + { text: 'Spectral Viewing and Processing', link: '/submission-guides/spectra.md' }, + { text: 'Licenses', link: '/submission-guides/licenses.md' } + ], + }, + { + text: 'Community', + items: [ + { text: 'Training', link: '/community/training.md' }, + { text: 'NMR MIChI Workshops in NFDI4Chem', link: '/community/workshops.md' }, + { text: 'Media Kit', link: '/community/media-kit.md' } + ], + }, + { + text: 'Contribution', + items: [ + { text: 'nmrXiv App', link: '/contribution/nmrxiv.md' }, + { text: 'nmrXiv Docs', link: '/contribution/nmrxiv-docs.md' }, + { text: 'Contributors and Steering Committee', link: '/contribution//contributors.md' } + ], + }, + { + text: 'Miscellaneous', + items: [ + { text: 'Tour', link: '/miscellaneous/tour.md' }, + { text: 'Shortcuts', link: '/miscellaneous/shortcuts.md' }, + ], + }, + { + text: 'License' , link: 'license.md', + }, + { + text: 'FAQ', link: 'FAQs.md' + } + ], + + + socialLinks: [ + { icon: 'github', link: 'https://github.com/NFDI4Chem/nmrxiv' } + ], + + footer: { + message: 'Source code released under the MIT License | Data are provided under the Creative Commons Attribution (aka CC-BY 4.0)
Funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under the National Research Data Infrastructure – NFDI4Chem – Projektnummer 441958208.', + copyright: `© ${new Date().getFullYear()} nmrXiv, Inc. All rights reserved.` + } + } +}) diff --git a/docs/.vitepress/theme/index.ts b/docs/.vitepress/theme/index.ts new file mode 100644 index 00000000..dde519d4 --- /dev/null +++ b/docs/.vitepress/theme/index.ts @@ -0,0 +1,33 @@ +// https://vitepress.dev/guide/custom-theme +import { h } from 'vue' +import Theme from 'vitepress/theme' +import './style.css' +import { onMounted, watch, nextTick } from 'vue'; +import { useRoute } from 'vitepress'; +import mediumZoom from 'medium-zoom'; + +export default { + extends: Theme, + Layout: () => { + return h(Theme.Layout, null, { + // https://vitepress.dev/guide/extending-default-theme#layout-slots + }) + }, + enhanceApp({ app, router, siteData }) { + // ... + }, + setup() { + const route = useRoute(); + const initZoom = () => { + // mediumZoom('[data-zoomable]', { background: 'var(--vp-c-bg)' }); + mediumZoom('.main img', { background: 'var(--vp-c-bg)' }); + }; + onMounted(() => { + initZoom(); + }); + watch( + () => route.path, + () => nextTick(() => initZoom()) + ); + }, +} \ No newline at end of file diff --git a/docs/.vitepress/theme/style.css b/docs/.vitepress/theme/style.css new file mode 100644 index 00000000..2544af4d --- /dev/null +++ b/docs/.vitepress/theme/style.css @@ -0,0 +1,176 @@ +/** + * Customize default theme styling by overriding CSS variables: + * https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css + */ + +/** + * Colors + * + * Each colors have exact same color scale system with 3 levels of solid + * colors with different brightness, and 1 soft color. + * + * - `XXX-1`: The most solid color used mainly for colored text. It must + * satisfy the contrast ratio against when used on top of `XXX-soft`. + * + * - `XXX-2`: The color used mainly for hover state of the button. + * + * - `XXX-3`: The color for solid background, such as bg color of the button. + * It must satisfy the contrast ratio with pure white (#ffffff) text on + * top of it. + * + * - `XXX-soft`: The color used for subtle background such as custom container + * or badges. It must satisfy the contrast ratio when putting `XXX-1` colors + * on top of it. + * + * The soft color must be semi transparent alpha channel. This is crucial + * because it allows adding multiple "soft" colors on top of each other + * to create a accent, such as when having inline code block inside + * custom containers. + * + * - `default`: The color used purely for subtle indication without any + * special meanings attched to it such as bg color for menu hover state. + * + * - `brand`: Used for primary brand colors, such as link text, button with + * brand theme, etc. + * + * - `tip`: Used to indicate useful information. The default theme uses the + * brand color for this by default. + * + * - `warning`: Used to indicate warning to the users. Used in custom + * container, badges, etc. + * + * - `danger`: Used to show error, or dangerous message to the users. Used + * in custom container, badges, etc. + * -------------------------------------------------------------------------- */ + + :root { + --vp-c-default-1: var(--vp-c-gray-1); + --vp-c-default-2: var(--vp-c-gray-2); + --vp-c-default-3: var(--vp-c-gray-3); + --vp-c-default-soft: var(--vp-c-gray-soft); + + --vp-c-brand-1: var(--vp-c-indigo-1); + --vp-c-brand-2: var(--vp-c-indigo-2); + --vp-c-brand-3: var(--vp-c-indigo-3); + --vp-c-brand-soft: var(--vp-c-indigo-soft); + + --vp-c-tip-1: var(--vp-c-brand-1); + --vp-c-tip-2: var(--vp-c-brand-2); + --vp-c-tip-3: var(--vp-c-brand-3); + --vp-c-tip-soft: var(--vp-c-brand-soft); + + --vp-c-warning-1: var(--vp-c-yellow-1); + --vp-c-warning-2: var(--vp-c-yellow-2); + --vp-c-warning-3: var(--vp-c-yellow-3); + --vp-c-warning-soft: var(--vp-c-yellow-soft); + + --vp-c-danger-1: var(--vp-c-red-1); + --vp-c-danger-2: var(--vp-c-red-2); + --vp-c-danger-3: var(--vp-c-red-3); + --vp-c-danger-soft: var(--vp-c-red-soft); + } + + /** + * Component: Button + * -------------------------------------------------------------------------- */ + + :root { + --vp-button-brand-border: transparent; + --vp-button-brand-text: var(--vp-c-white); + --vp-button-brand-bg: var(--vp-c-brand-3); + --vp-button-brand-hover-border: transparent; + --vp-button-brand-hover-text: var(--vp-c-white); + --vp-button-brand-hover-bg: var(--vp-c-brand-2); + --vp-button-brand-active-border: transparent; + --vp-button-brand-active-text: var(--vp-c-white); + --vp-button-brand-active-bg: var(--vp-c-brand-1); + } + + /** + * Component: Home + * -------------------------------------------------------------------------- */ + + :root { + --vp-home-hero-name-color: transparent; + --vp-home-hero-name-background: -webkit-linear-gradient( + 120deg, + #bd34fe 30%, + #41d1ff + ); + + --vp-home-hero-image-background-image: linear-gradient( + -45deg, + #bd34fe 50%, + #47caff 50% + ); + --vp-home-hero-image-filter: blur(40px); + } + + @media (min-width: 640px) { + :root { + --vp-home-hero-image-filter: blur(56px); + } + } + + @media (min-width: 960px) { + :root { + --vp-home-hero-image-filter: blur(72px); + } + } + + /** + * Component: Custom Block + * -------------------------------------------------------------------------- */ + + :root { + --vp-custom-block-tip-border: transparent; + --vp-custom-block-tip-text: var(--vp-c-text-1); + --vp-custom-block-tip-bg: var(--vp-c-brand-soft); + --vp-custom-block-tip-code-bg: var(--vp-c-brand-soft); + } + + /** + * Component: Algolia + * -------------------------------------------------------------------------- */ + + .DocSearch { + --docsearch-primary-color: var(--vp-c-brand-1) !important; + } + + .VPHomeHero .text{ + font-size: 36px !important; + line-height: 42px !important; + } + + .name{ + font-size: 64px !important; + line-height: 84px !important; + } + + .logo{ + height: 36px !important; + } + + html img.only-on-dark { + display: none; + } + + html img.only-on-light { + display: block; + } + + html.dark img.only-on-light { + display: none; + } + + html.dark img.only-on-dark { + display: block; + } + + .medium-zoom-overlay { + z-index: 100; + } + + .medium-zoom-image { + z-index: 100; + } \ No newline at end of file diff --git a/docs/FAQs.md b/docs/FAQs.md new file mode 100644 index 00000000..4e4569b5 --- /dev/null +++ b/docs/FAQs.md @@ -0,0 +1,85 @@ +# FAQs + +### How can I submit my data to **[nmrXiv](https://nmrxiv.org/)**? + +- [Register to nmrXiv](/submission-guides/registration.md). +- Structure your data in folders similar to **[nmrXiv](https://nmrxiv.org/)** structuring of projects, studies, and datasets. This step might not be intuitive so we recommend [checking its docummentation](/submission-guides/submission/folder-structure.md). +- Upload your data, edit it, and provide its metadata via the [submission pipeline](/submission-guides/submission/upload.md). + +### Do I need to register before submitting data to nmrXiv? + +Yes. Registration is a prerequisite to submitting the data. Although you can submit data by logging in via Single sign-on with your GitHub or Twitter ID, this logging-in will result in registering you on **[nmrXiv](https://nmrxiv.org/)**. Alternatively, you can register via your email id. More on the registration [here](/submission-guides/registration.md). + +### How should I structure my data in folders before submitting it to **[nmrXiv](https://nmrxiv.org/)**? + +**[nmrXiv](https://nmrxiv.org/)** can structure the submitted data into datasets/studies/projects, but there is still no way to guarantee that the automatically generated structure is what the user was hoping to get. Therefore, it is recommended to have a look at the [folder structuring page](/submission-guides/submission/folder-structure.md). + +### What are supported files format in **[nmrXiv](https://nmrxiv.org/)**? + +**[nmrXiv](https://nmrxiv.org/)** accepts all NMR formats uploaded. However, not all of them are readable at the moment. So far, only NMRium-supported formats can be translated into spectra in **[nmrXiv](https://nmrxiv.org/)**. Those formats are jcamp-dx, jeol, Bruker folders, NMReData, and nmrium. For validation purposes, the uploaded data should have at least one readable format. + +### What happens to my data once submitted? + +### What are public and private objects in **[nmrXiv](https://nmrxiv.org/)**? + +**Public/Published** objects (projects, studies, and datasets) are visible and accessible to everyone (even to the non-registered users of **[nmrXiv](https://nmrxiv.org/)**). You can see all the open projects [here](https://nmrxiv.org/projects) or find them in the Projects tab in the left-hand panel of your dashboard. + +**Private** objects (projects, studies, and datasets) are only visible and accessible to the people with whom they are shared by [single sharing](/submission-guides/data-model/sharing.html#single-sharing) or in a [team](/submission-guides/data-model/sharing.html#teams-sharing). + +:::danger Caution +Once an object is made public, it cannot be edited, versioned, or deleted anymore, nor be made private again. +::: + +### Who can use my public/published resources? + +If you make your resources public (projects, studies, datasets), you are making them open for access to everyone (even to the non-registered user of **[nmrXiv](https://nmrxiv.org/)**), but you can specify rights by choosing [licenses](/submission-guides/licenses.md) for your projects and studies (study license propagate to its datasets). Once your project is made public, you cannot edit, delete or make it private again. + +### How can I edit my public resources? + +You cannot edit a resource (project, study, dataset) once it's made public, but you can always create another version and make changes on top of it. + +### How can I delete my projects, studies or dataset? + +You can only delete private ones. For more details, check deletion of [projects](/submission-guides/data-model/project#delete), [studies](/submission-guides/data-model/study#delete), and [datasets](/submission-guides/data-model/dataset#delete). + +### How can I share my resources? + +You can share your resources (projects, studies, datasets) singly or in bulks within teams. For more details on sharing, please check the [sharing page](/submission-guides/data-model/sharing.md). + +### What are the available roles when sharing a resource? + +You can assign roles to people with whom you share content: + +- **Owner** - Can read and/or update, including deleting the project, study, and dataset. +- **Collaborator** - Can read and/or update the project, study, and dataset. +- **Reviewer** - Can only read the project, study, and dataset. + +### How can I delete or edit my account? + +You can edit your account details by heading to your name at the top right corner and clicking on the `Account` tab from the drop-down. To delete your account, please reach out to our [Helpdesk](https://www.nfdi4chem.de/index.php/helpdesk/), or write to us at info.nmrxiv@uni-jena.de. + +### How can I license my resources, and which license to choose? + +You can license your project during or after submission as long as it is private. The license will propagate to the underlying studies, but you can still change the studies licenses. For more details about licenses and when to use each one, please visit the [licenses page](/submission-guides/licenses.md). + +### Can I use **[nmrXiv](https://nmrxiv.org/)** for my teaching and demo purposes? + +Yes, you can use our [dev site](https://dev.nmrxiv.org) for all kind of training, demo and teaching purpose. Please avoid using the official **[nmrXiv](https://nmrxiv.org/)** site for the mentioned purpose. However, the [dev site](https://dev.nmrxiv.org) is just a sandbox, and all the data there could be reset anytime. +To learn more about our environment click [here](/developer-guides/deployment/environment). + +### How to report a bug? + +Before reporting a new issue + +- Check the existing [issues](https://github.com/NFDI4Chem/nmrxiv/issues) to avoid duplication. [Here](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests#searching-for-issues-and-pull-requests) are some tips that can help you to narrow down your search. You can also comment on existing issues to provide additional details. +- Check our [documentation](https://docs.nmrxiv.org/) first, if you can find an answer to your question. + +If the above criteria are not met + +- Click [here](https://github.com/NFDI4Chem/nmrxiv/issues/new/choose) to report a new one under appropiate category. + +### How to reach to you? + +Please write to us at info.nmrxiv@uni-jena.de or reach out to our [Helpdesk](https://www.nfdi4chem.de/index.php/helpdesk/). +You can also leave us a message via our support bubble which you find in the left down corner on our [application](https://nmrxiv.org/) page. +We will try to get back to your queries as soon as possible. diff --git a/docs/advanced-guides/_category_.json b/docs/advanced-guides/_category_.json new file mode 100644 index 00000000..ec4e3a7d --- /dev/null +++ b/docs/advanced-guides/_category_.json @@ -0,0 +1,7 @@ +{ + "label": "Advanced Guides", + "position": 4, + "link": { + "type": "generated-index" + } +} \ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/background.md b/docs/advanced-guides/nmr-repositories/background.md new file mode 100644 index 00000000..d543f1af --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/background.md @@ -0,0 +1,45 @@ +# Background + +**[nmrXiv](https://nmrxiv.org/)** will host NMR data from multiple sources, starting from pure compounds to provide reference spectra, reaching spectra generated from metabolites found in biological samples as mixtures. To facilitate dealing with this range of heterogeneity in future data and metadata, we have started by performing some quantitative and qualitative analysis on the available NMR repositories, including domain-specific and generic databases, to see how data have been handled so far, and to make use of the experience accumulated with every new database and dataset. + +## Aim of The Analysis and How It Will Influence nmrXiv +We are interested in knowing what metadata was covered, and how often (in how many studies) this metadata was available. That will help decide what metadata to be asked for from users when submitting their datasets. Also, what metadata to be optional/recommended and what to be mandatory. We plan to allow users to submit any metadata they have while recommending or requesting others. + +We would also like to find out what aspects were challenging and how they were tackled. Then, we move to the ontologies to see which ones are commonly used and relevant to NMR data to help us shape the process of data reporting in **[nmrXiv](https://nmrxiv.org/)**. We hope to make our data [FAIRer](https://www.go-fair.org/fair-principles/), and more machine-readable by using this knowledge. + +By identifying common issues encountered while reporting data, we can avoid repeating the same mistakes. We can also work on approaches to retrospectively correct the data before importing it into nmrXiv. + +## The Overview Scope + +* Domain-specific databases (WIP) + * Organic Chemistry Databases: [NMRShiftDB](https://nmrshiftdb.nmr.uni-koeln.de/) + * Metabolomics: [MetaboLights](https://www.ebi.ac.uk/metabolights/) and [Metabolomics Workbench](https://www.metabolomicsworkbench.org/). +* Public Datasets: + * [CENAPT](https://dataverse.harvard.edu/dataverse/cenapt) +* Generic Databases and Journals + * [EuropePMC](https://europepmc.org/) (Will be done in the future) + +So far, all covered NMR repositories are biology-oriented ones, but non-biological metadata was also covered. + +### Aspects Investigated + - Meta-Data inconsistencies + - Controlled Vocabulary - Machine readability + - Common parameters reported and missing information + +### Parameters of Interest +The covered metadata includes sample and assay (experiment) metadata. It also considers the ontologies used. Please find the GitHub repository with Python scripts and Jupyter notebooks to extract the metadata and visualize it [here](https://github.com/NFDI4Chem/repo-scripts). + +Here is a list of the covered parameters + - Dimensionality + - Spectrometer frequency + - Atomic nuclei + - Temperature + - Solvent + - Instruments + - pH + - Organism + - Organism part + - Variant + - Dataset size + + We will look into more parameters, such as the tube type, NMR probe, and magnetic field strength. \ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/dimensionality.md b/docs/advanced-guides/nmr-repositories/dimensionality.md new file mode 100644 index 00000000..85f93361 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/dimensionality.md @@ -0,0 +1,118 @@ +# Dimensionality +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/dimensionality.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies + +Despite the difficulty of finding definitions of NMR Dimensionality in ontologies, the term itself exists ( in [nuclear magnetic resonance CV](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1000117&viewMode=All&siblings=false), and in [Physico-chemical methods and properties](https://terminology.nfdi4chem.de/ts/ontologies/fix/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFIX_0000140&viewMode=All&siblings=false)). However, its subclasses are not rich with either definitions or values, with the values mostly being missing in the case of 1D NMR, or provided ([as in CHMO](https://terminology.nfdi4chem.de/ts/ontologies/chmo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FCHMO_0000613&viewMode=All&siblings=false)), but not classified as 1D. Still, the available ontologies are sufficient for providing the piece of metadata describing the dimensionality as the subclasses can be added in another field in the repository. + +## Data Sanitisation and Missing Values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissingComment
MTBLSsemi-dedicatedPulse sequence namefree textnoneThe field is not provided; or the expressions "1D" or "2D" are not found in the field, and also the method mentioned is not available in the mapping between the method and dimensionality made with hard-coding; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null"Due to providing methods names in free text, much hard coding was needed
MWsemi-dedicatedNMR Eexperiment Typefree textnoneThe field is not provided; or the expressions "1D" and "2D" are not found in the field, and also the method mentioned is not available in the mapping between the method and dimensionality made with hard-coding; or the value is provided as N/A or other similar expressions; or decoding the JSON file that contains the study details has failed due to syntax error thereDue to providing methods names in free text, much hard coding was needed
CENAPTsemi-dedicatedDescriptionmachine-readablenoneDescription field is always available, so missing means only that the expressions "1D" and "2D" are not found thereIt was machine-readable as looking for "1D" and "2D" expressions was enough in all the texts
NMRShiftDBsemi-dedicated[nmr:assignmentMethod, nmr:OBSERVENUCLEUS]free textnoneThe field is not provided; or the expressions "1D" and "2D" are not found in the field, and also the method mentioned is not available in the mapping between the method and dimensionality made with hard-coding. The value can also be given as "Unreported" or other similar expressions.Due to providing methods names in free text, much hard coding was needed
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["1D NOESY with presaturation (tnnoesy)", "2D JRES with homonuclear J-resolved 2D correlation, presaturation during relaxation delay with gradients (jresgpprqf)", "1H_PROTON"]"1d", "2d", "missing"
MW["2D 1H-13C HSQC-TOCSY", "NOESY", "NOESY PR1D"]"1d", "2d", "missing"
CENAPT["NMR data of gossypol in DMSOd6 and CDCl3. The dataset contains 1D 1H 13C as well as 2D COSY, HSQC, HMBC, all acquired at 600MHz (Bruker 600MHz spectrometer with CryoProbe (CP DCH 600S3 C/H-D-05 Z)"]"1d", "2d", "missing"
NMRShiftDB["1D shift positions", "2D INADEQUATE/NMRanalyst", "1H, H,H-COSY, H,H-NOESY, H,C-HMQC, H,C-HMBC, DEPTQ, 1H, 13C, 1H, H,H-COSY"]"1d", "2d", "missing"
+ +## Results +The Dimensionality was possible to obtain in most studies and repositories. It can be either mentioned explicitly as "1D" or "2D", or it can be implied by the technique name (e.g., HSQC). None of the repositories used ontologies to describe the value. + +Most of the studies were only one-dimensional. Here you can see the percentage of studies based on their dimensionalities. If a study has both 1D and 2D spectra, it will appear twice. + +

+ +

The percentages of all studies in the four repositories based on the Dimensionality
+

+ +[CENAPT](https://dataverse.harvard.edu/dataverse/cenapt) was the only exception to provide 2D NMR for almost all the studies as one can see below (NMRShiftDB data were not added due to the huge number of studies there). + +

+ +

The number of studies in three repositories based on NMR dimensionalities
+

+ +However, NMRShiftDB similarly has mostly 1D NMR data. +

+ +

The number of studies in NMRShiftDB based on NMR dimensionalities
+

+ +And here you can find the whole view of the four repositories with a logarithmic scale. + +

+ +

The number of studies in the four repositories based on NMR dimensionalities (with a logarithmic scale)
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/frequency.md b/docs/advanced-guides/nmr-repositories/frequency.md new file mode 100644 index 00000000..e22be4a4 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/frequency.md @@ -0,0 +1,119 @@ +# Spectrometer Frequency +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/frequency.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies + +Frequency-wise, the NMR field seems to be full of terms such as Larmor frequency, instrument frequency, offset frequency, and others. Still, there is rare, if any, mention of them in NMR and Chemistry related ontologies. Being said, repositories still have the option to define this field, where the value of it is simply a float. The main issue here was the absence of an ontology-driven unit of frequency. Although it is somehow safe to assume that the unit is [MHz](https://terminology.nfdi4chem.de/ts/ontologies/uo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUO_0000325), assumptions aren't ideal. More importantly, providing the unit was often the only possible way to deduct that a certain number corresponds to frequency. + +When talking about "Spectrometer Frequency" here, we mean during the NMR assay, not the instrument maximum frequency, except for Metabolights, as this is the value provided there. + +## Data Sanitisation and Missing Values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissingComment
MTBLSabsentnonenonenoneSlugified MHz was not found in the instrument nameThere is no field for the frequency. It can be available in the description, but still, it is difficult to extract due to possible confusion with the instrument's maximum frequency value. The pulse sequence name is provided, but it doesn't contain numerical values. Here, we only provide the instrument's maximum frequency value, not what was used in the study
MWdedicatedSpectrometer Frequencymachine-readableintegratedThe field is not provided; or multiple values were provided; or the value is provided as N/A or other similar expressions.
CENAPTsemi-dedicatedNamemachine-readableintegratedThe name is always provided, so "missing" means that a number plus slugified "MHz" were not found there.
NMRShiftDBdedicatedcml:fieldfloatseparate fieldThe value is given as "Unreported" or other similar expressions
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLSn-n+50MHz e.g., 100-150MHz
MW["500 MHz", "700.13 MHz", "600 MHz | 600 MHz | 600 MHz | 600 MHz | 601 MHz | 602 MHz | 603 MHz | 604 MHz NMR Probe | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D | TCI 600 H-C/N-D", "600 MHz for 1H", "1H Larmor frequency"]n-n+50MHz e.g., 100-150MHz
CENAPT["Gossypol 600 MHz CDCl3 DMSOd6 NMR data", "Ginsenoside Rb1 400/600 MHz in DMSOd6 NMR data", "Glycyrrhetinic acid/Enoxolone 900_400MHz DMSOd6 NMR data"]n-n+50MHz e.g., 100-150MHz
NMRShiftDB["100 Mhz", "100.03250122070312", "100.0", "100", "Unknown", "Unreported", "Not clear in reference.", "Not mentioned."]n-n+50MHz e.g., 100-150MHz
+ +## Results +The four repositories provided frequency values in one way or another. However, Metabolights only provided the instrument's maximum frequency value. Only NMRShiftDB provided a field for the unit. Including the large number of studies in NMRShiftDB, the most used spectrometer frequency was 100-150MHz. + +

+ +

The percentages of all studies in three repositories (without MTBLS) based on the Spectrometer frequency
+

+ +However, when NMRShiftDB is excluded, the most used spectrometer frequency is 600-650MHz. + +

+ +

The number of studies in MW and CENAPT based on Spectrometer frequency
+

+ +Looking into the instruments in Metabolights, the most used maximum frequency was also 600-650MHz. + +

+ +

The number of studies in MTBLS based on Maximum spectrometer frequency
+

+ + +And here you can find the whole view of the three repositories with a logarithmic scale. + +

+ +

The number of studies in the three repositories based on Spectrometer frequency (with a logarithmic scale)
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/instrument.md b/docs/advanced-guides/nmr-repositories/instrument.md new file mode 100644 index 00000000..0e2145a9 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/instrument.md @@ -0,0 +1,75 @@ +# Instruments Names +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/instruments-names.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies + +[NMR instrument](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1400059)s are already provided in [nmr CV](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv), along with [instruments parts](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1000463&viewMode=All&siblings=false). + +## Data Sanitisation and Missing Values +Instrument names are found only in MTBLS and MW. + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedInstrumentfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null".
MWdedicatedInstrumentfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or decoding the JSON file containing the study details has failed due to syntax error there.
+ + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["Bruker AVANCE 700 MHz spectrometer", "Bruker AVANCE II 700 MHz spectrometer", "Bruker AVANCE III 700 MHz spectrometer", "Bruker AVANCE III HD 700 MHz spectrometer"]["600-mhz-varian-inova-spectrometer", etc.]
MW["Bruker 18.8 Tesla (800 MHz) NMR spectrometer ascend", "Bruker 500MHz spectrometer", "Bruker 600 MHz", "Bruker 600 MHz Avance III HD spectrometer", "Bruker 600-MHz AVANCE III solution NMR spectrometer", "Bruker 600MHZ", "FT NMR", "INOVA"]["600-mhz-varian-inova-spectrometer", etc.]
+ +## Results + +Both repositories have a dedicated field for the instrument name, making it easily obtainable. However, none used ontology terms. As a result, one can see in the graph of the percentages of studies using the same instruments how small and plenty the sections are due to variations in the names used to indicate the same instrument. One can easily tell that Bruker and Agilent are the most used instruments, but getting exact numbers on how much a specific instrument is used was quite difficult. + +

+ +

A rough estimate of the percentages of all studies in the two repositories based on the NMR instrument
+

+ + +Looking at the distribution of instrument names in the two repositories, one can see that the same value (except for once) was never used in both, although the same instrument actually was. + +

+ +

The number of studies in MW and MTBLS based on the instrument name.
+

diff --git a/docs/advanced-guides/nmr-repositories/nuclei.md b/docs/advanced-guides/nmr-repositories/nuclei.md new file mode 100644 index 00000000..61296a87 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/nuclei.md @@ -0,0 +1,110 @@ +# Atomic Nuclei +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/nuclei.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +The term [acquisition nucleus](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1400083) is already provided in [nmr CV](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv). However, 2D NMR isotope-related terms are largely missing. The possible values such as "1H" are very well annotated, either simply as atoms ([in CHEBI](https://terminology.nfdi4chem.de/ts/ontologies/chebi/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FCHEBI_33250&viewMode=All&siblings=false)), or encoded in the name of the NMR method ([in CHMO](https://terminology.nfdi4chem.de/ts/ontologies/chmo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FCHMO_0000613&viewMode=All&siblings=false) despite the lack of consistency with dimensionality there). Currently, available ontologies are adequate to cover the atomic nuclei values in most cases. + +## Data Sanitisation and Missing Values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSsemi-dedicatedPulse sequence namefree textnoneThe field is not provided; or the regular expressions '\d+[A-CE-Z]\-\d+[A-CE-Z]' or '\d+[A-CE-Z]\-\d+[A-CE-Z]' are not found in the field; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null".
MWsemi-dedicatedNMR Experiment Typemachine-readablenoneThe field is not provided; or an expression of numbers followed directly with letters (but not 1D or 2D) is not found in the field; or the value is provided as N/A or other similar expressions; or decoding the JSON file that contains the study details has failed due to syntax error there.
CENAPTsemi-dedicatedDescriptionfree textnone"1h" and "13c" were not found in the description, and checking for the regular expressions '\d+[a-zA-Z]+' didn't match any isotope.
NMRShiftDBdedicatednmr:OBSERVENUCLEUSmachine-readablenonenone
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["2D 1H-13C HSQC (hsqcetgppr)", "Conventional 1H spectrum (zg30)", "1D NOESY 12C custom filtered with presaturation, phase-sensitive gradients, and 13C decoupling (c12filternoesygpc13cpd)", "13C_CARBON", "M3S COSY", "2D homonuclear correlation via dipolar coupling (noesyesgpph)"]"1h", "1h-13c", "h-h", "missing"
MW["1D 1H", "1D-1H", "1D1H", "2D 1H-13C HSQC-TOCSY", "2D-INADEQUATE", "NOESY PR1D"]"1h", "1h-13c", "h-h", "missing"
CENAPT["NMR data of gossypol in DMSOd6 and CDCl3. The dataset contains 1D 1H 13C as well as 2D COSY, HSQC, HMBC, all acquired at 600MHz (Bruker 600MHz spectrometer with CryoProbe (CP DCH 600S3 C/H-D-05 Z)"]"1h", "1h-13c", "h-h", "missing"
NMRShiftDB["1H", "13C", "H,H-COSY"]"1h", "1h-13c", "h-h", "missing"
+ +## Results +Taking the large number of studies from NMRShiftDB into account or not, the most reported atomic nuclei are "1H" and "13C". But NMRSiftDB adds more 1D nuclei to the scene, such as "19F" and "11B". + +

+ +

The percentages of all studies in the four repositories based on the acquisition nucleus (only nuclei appearing in more than 200 studies can be seen here.)
+

+ +Excluding NMRShiftDB, we see that the other repositories have only 1D and 2D NMR based on proton and carbon. + +

+ +

The number of studies in three repositories based on the acquisition nucleus
+

+ +Here one can see the variation in nuclei reported in NMRShiftDB. +

+ +

The number of studies in NMRShiftDB based on the acquisition nucleus
+

+ +And here you can find the whole view of the four repositories with a logarithmic scale. + +

+ +

The number of studies in the four repositories based on the acquisition nucleus (with a logarithmic scale)
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/organism.md b/docs/advanced-guides/nmr-repositories/organism.md new file mode 100644 index 00000000..510f5c84 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/organism.md @@ -0,0 +1,74 @@ +# Organism +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/organism.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +[NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/taxonomy) provides an excellent source for organisms. + +## Data Sanitisation and Missing Values +Organisms are found only in metabolomics-related repositories, i.e., MTBLS and MW. + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedOrganismontology-drivennoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null"; or the organism is not found in NCBI taxonomy.
MWdedicatedSubject Speciesfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or decoding the JSON file containing the study details has failed due to syntax error there; or the organism was not found in NCBI taxonomy.
+ + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["Homo sapiens", "Blank sample", "Lactobacillus sp. asf360;Parabacteroides sp. asf519", "Sus scrofa domesticus", "NCBITAXON:Thalassiosira pseudonana;NCBITAXON:Ruegeria pomeroyi"]["homo-sapiens", "mus-musculus", etc.]
MW["Homo sapiens", "Sus scrofa", "Sus Scrofa", "C57BL/6J Mouse", "Multi-species non-defined biofilm consortium", "Alexandrium catenella; Alexandrium tamarense"]["homo-sapiens", "mus-musculus", etc.]
+ +## Results +Organism details are available in metabolomics repositories. The users usually use the scientific name that can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/taxonomy). However, some inconsistencies were encountered such as with writing the scientific name (e.g., Mus Musculus instead of Mus musculus), providing a common name (Goat instead of Capra hircus), or providing the source along with the value (e.g., NCBITAXON:Homo sapiens), or even typos. + +Some values were ambiguous such as "Various", "Extract", "Multi-species non-defined biofilm consortium" or not even species, such as NMR buffer. Additionally, sometimes, more than one species was mentioned. The combination of species is usually standardized by putting ";" or "/" between the names, but still, the relation between the species is not clear (samples from multiple species vs one sample from a species tissue infected by another species). Lastly, the organism provided varys in rank. Mostly the species is provided, but sometimes it is the genus or strain. + +However, even after taking all that was mentioned above, it is still clear that the most studied species are humans (Homo sapiens) and mice (Mus musculus) +

+ +

A rough estimate of the percentages of all studies in MTBLS and MW repositories based on the organism
+

+ +Here one can see the number of studies providing the organism and its value. +

+ +

The number of studies in MTBLS and MW based on the organism
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/part.md b/docs/advanced-guides/nmr-repositories/part.md new file mode 100644 index 00000000..e5d3431b --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/part.md @@ -0,0 +1,74 @@ +# Organism Part +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/organism-part.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +[The BRENDA Tissue Ontology - BTO](https://www.ebi.ac.uk/ols/ontologies/bto) and [Experimental Factor Ontology - EFO](https://www.ebi.ac.uk/ols/ontologies/efo) are excellent sources for organisms parts. + +## Data Sanitisation and Missing Values +Organisms parts are found only in metabolomics-related repositories, i.e., MTBLS and MW. + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedOrganism partontology-drivennoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null"; or the organism is not found in NCBI taxonomy.
MWdedicatedSAMPLE_TYPEfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or decoding the JSON file containing the study details has failed due to syntax error there; or the organism was not found in NCBI taxonomy.
+ + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["blood serum", "serum", "A2780cisR cell", "muscle", "feces", "Acetonitrile:H2O (1:3)"]["blood serum", "urine", etc.]
MW["Urine", "urine", "BLOOD", "Serum", "Plasma, Liver"]["blood serum", "urine", etc.]
+ +## Results +Organism parts details are available in metabolomics repositories. The use of different sources of ontologies was easy to see when using terms such as "Blood" vs "blood". Additionally, values other than organism parts were sometimes provided, such as "Acetonitrile:H2O (1:3)". + +The most used part was the blood serum, then come the urine, blood plasma, liver, and others. + +

+ +

A rough estimate of the percentages of all studies in MTBLS and MW repositories based on the organism part
+

+ +Here one can see the number of studies providing the organism part and its value. +

+ +

The number of studies in MTBLS and MW based on the organism part
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/ph.md b/docs/advanced-guides/nmr-repositories/ph.md new file mode 100644 index 00000000..1e309850 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/ph.md @@ -0,0 +1,46 @@ +# Sample pH +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/ph.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +The [definition of the pH](https://terminology.nfdi4chem.de/ts/search?q=ph) is easily [found in ontologies](https://terminology.nfdi4chem.de/ts/search?q=ph), the definition of [Sample pH](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1000019) too. However, when users provide this value, it is not clear whether it really means the pH of the sample, the solvent, the solvent with the buffer, or another value. Due to the possible confusion, users tend to provide a detailed explanation within the solvent field as a free text, which unfortunately affects the machine-readability of both the solvent and the pH. You can find examples in the [solvent](/advanced-guides/nmr-repositories/solvent.md) page. + +## Data Sanitisation and Missing Values +It was possible to get the pH details only from [MetaboLights](https://www.ebi.ac.uk/metabolights/). + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedSample pHfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null"; or the value cannot be converted into a float.
+ +## Results + +It was possible to obtain the pH only from MTBLS, even though most of the studies didn't provide this metadata. Most samples' pHs were leaning towards the neutral 7, while a small percentage was around 4. + +

+ +

A rough estimate of the percentages of all studies in MTBLS repository based on the sample pH
+

+ +Here one can see the number of studies providing the pH of the sample and its value. +

+ +

The number of studies in MTBLS based on the sample pH
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/sanitisation.md b/docs/advanced-guides/nmr-repositories/sanitisation.md new file mode 100644 index 00000000..a5436614 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/sanitisation.md @@ -0,0 +1,19 @@ +# Data Sanitisation and Missing Values + +Due to the data being reported, in most cases, as free text; in addition to the databases having their own schemas for data modeling (with few exceptions, as [MetaboLights](https://www.ebi.ac.uk/metabolights/) complies to [ISA Model](https://isa-specs.readthedocs.io/en/latest/isamodel.html)), we had to generate “personalized” scripts for each database to handle its specific way of data reporting; with data being exposed via standard API or not; within a dedicated field or not; as an ontology term or a free text; with a unit in the same field, or in another one, or completely absent. That also led to a lot of cleaning through hard coding, and also to considering some data as missing as the unit is not provided, making the data vague even for human reading, while sometimes cleaning even with hard coding wasn’t possible as each value will need its own cleaning (e.g., with the solvents, unless mapped to an ontology which is out of the scope of this analysis). + +We provide a detailed description of the data sanitization we performed based on the repository and the parameter in the dedicated pages of the parameters. +- We describe the **Field Type** from which the parameter can be extracted as either: + - **dedicated** when the field purpose is to provide this parameter. + - **semi-dedicated** when it is not mainly concerned with providing the desired parameter, but still, it can be found there in most cases. + - **absent** means that no such field was found in the database. +- Then, the field name is mentioned. +- Afterwards, given that there is a dedicated or semi-dedicated field, we describe the **Values Readability** whether they are: + - **ontology-driven**. + - **machine-readable** when they are free text, but reasonable cleaning makes them machine-readable in most cases. + - **free text** +- We describe whether the **Unit** was provided as a + - **separate field** + - **integrated** with the values (as this option usually results from free text input, it leads to the possibility of not providing the unit in some cases). + - If no unit is needed, we write it in the table as **none**. +- We also provide examples of the **Input** (the values the parameter got in the repository) with the **Output** we generated from them, with clarification of the meaning of the value **missing** as an output. diff --git a/docs/advanced-guides/nmr-repositories/solvent.md b/docs/advanced-guides/nmr-repositories/solvent.md new file mode 100644 index 00000000..d1e90064 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/solvent.md @@ -0,0 +1,98 @@ +# Solvent +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/solvent.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +The concept of [NMR solvent](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1000330) already exists in [nmr CV](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv), unfortunately, without a definition. The same happens with [NMR buffer](https://terminology.nfdi4chem.de/ts/ontologies/nmrcv/terms?iri=http%3A%2F%2FnmrML.org%2FnmrCV%23NMR%3A1000331). Despite that, the values for [solvents, buffers,](https://terminology.nfdi4chem.de/ts/ontologies/chebi) and their [concentrations units](https://terminology.nfdi4chem.de/ts/ontologies/uo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUO_0000051&viewMode=All&siblings=false) can be ideally provided with ontology terms. + +## Data Sanitisation and Missing Values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedSolventfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null".
MWdedicatedNMR Solventfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or decoding the JSON file containing the study details has failed due to syntax error there.
CENAPTsemi-dedicatedDescriptionfree textnone'in ' was not found in the description, or it was not relevant to the solvent (hard-coding).
NMDShiftDBdedicatedcml:solventfree textnoneThe value is 'unreported' or 'unknown'
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["0.01 M phosphate buffered D2O", "0.154 M saline D20", "0.6 ml of 0.1M phosphate buffered D2O (pH=7.0) solution containing 0.5 mM 3-trimethylsilyl-propionate-2, 2, 3, 3, -d4 (TMSP, δ =0.0 ppm) ", "CD3OD", "H20 + D20", "water"]["0-01-m-phosphate-buffered-d2o", etc.]
MW["10%D20", "100 mM phosphate buffer at pH 7.4 (prepared in D2O) containing 0.1 mM 3-(trimethylsilyl)-propionic-2,2,3,3-d4 acid", "90:10 H2O/D2O (99.96% atom D2O; Cambridge Isotope Labs) with 0.2 mM phosphate buffer (pH 7.4) and 0.25 mM 3-(trimethylsilyl)propionic-2,2,3,3d4-acid", "CDCl3", "D2O and MeOD"]["0-01-m-phosphate-buffered-d2o", etc.]
CENAPT["NMR data of gossypol in DMSOd6 and CDCl3. The dataset contains 1D 1H 13C as well as 2D COSY, HSQC, HMBC, all acquired at 600MHz (Bruker 600MHz spectrometer with CryoProbe (CP DCH 600S3 C/H-D-05 Z)"]["0-01-m-phosphate-buffered-d2o", etc.]
NMRSiftDB["acetone", "Acetone-D6 ((CD3)2CO)", "acetonitril", "chloroform", "Chloroform-D1 (CDCl3)", "water", "Water (D2O)"]"288", "293"
+ +## Results +The solvent was possible to obtain in most studies and repositories, but it was rarely machine-readable, with a high level of inconsistencies. The following graph can give a rough estimate of popular solvents. However, it is far from accurate when it comes to exact numbers, as one solvent can be mentioned tens of times with different expressions. + +

+ +

A rough estimate of the percentages of all studies in the four repositories based on the NMR solvent (only solvent used in more than 50 studies are shown)
+

+ +Here you can find the whole view of the four repositories with a logarithmic scale where you can easily notice how some solvents get repeated with different expressions. + +

+ +

The number of studies in the four repositories based on NMR solvent (with a logarithmic scale)
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmr-repositories/temperature.md b/docs/advanced-guides/nmr-repositories/temperature.md new file mode 100644 index 00000000..20047a0d --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/temperature.md @@ -0,0 +1,97 @@ +# Temperature +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/temperature.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +While searching in NMR-related ontologies, finding terms about the temperature through an NMR assay wasn't possible. When saying "Temperature" here, we mean the temperature at which the NMR spectroscopy was conducted. Unit-wise, one can easily use [ontology-driven units](https://terminology.nfdi4chem.de/ts/ontologies/uo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FUO_0000005&viewMode=All&siblings=false). + +## Data Sanitisation and Missing Values + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissingComment
MTBLSdedicatedTemperaturefloatseparate fieldThe field is not provided; or the unit was not found in the list of units we provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null".The provided list of units is ['UO:kelvin', 'UO:kelvin:K','Kelvin','kelvin', 'degree Celsius','celsius', 'degree celsius']
MWdedicatedTemperaturemachine-readableintegratedThe field is not provided; or an expression of numbers was not found in the field; or the value is provided as N/A or other similar expressions; or the unit (words with c or d for Celsius and k for Kelvin) was not found; or decoding the JSON file that contains the study details has failed due to syntax error there.
CENAPTnonenonenonenone
NMRShiftDBdedicatedcml:tempfloatseparate fieldnoneThe unit is ontology-driven as it is always Kelvin
+ + + + + + + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["288.1"]"288", "293"
MW["4C", "4 oC", "281 ± 0.5 K", "15 celsius", "23d", "22 degree Celsius", "27", "5mm", "295.9", "36,85"]"288", "293"
NMRSiftDB["243","300.0136", "Unreported", "Unknown"]"288", "293"
+ +## Results +The temperature was easy to obtain in all the three repositories that provide it. However, the inconsistency encountered in the values and units made it time-consuming to harmonize the data. In the end, having units coming (or possibly to be mapped) from ontologies made most of the data clear and machine-readable. The temperature provided in Celsius was converted to Kelvin, and with visualization, one can see that most of the assays were held around room temperature. + +

+ +

The percentages of all studies in the three repositories based on the Temperature
+

+ +Still, some of the values were reported wrongly in Kelvin, as values like 0 and 37 can be seen in the graph showing the number of studies for each temperature (with a logarithmic scale). + +

+ +

The number of studies in the three repositories based on the Temperature (with a logarithmic scale)
+

diff --git a/docs/advanced-guides/nmr-repositories/variant.md b/docs/advanced-guides/nmr-repositories/variant.md new file mode 100644 index 00000000..6f74e3d3 --- /dev/null +++ b/docs/advanced-guides/nmr-repositories/variant.md @@ -0,0 +1,70 @@ +# Variant +[Notebook link](https://github.com/NFDI4Chem/repo-scripts/blob/main/notebooks/variant.ipynb) where you can find all the graphs. + +Data created on 17.10.2022 at 19:32:45 + +Data updated on 17.10.2022 at 19:32:45 + +## Support by Ontologies +[The BRENDA Tissue Ontology - BTO](https://www.ebi.ac.uk/ols/ontologies/bto) and [Experimental Factor Ontology - EFO](https://www.ebi.ac.uk/ols/ontologies/efo) are good sources for variants. + +Variants are found only in metabolomics-related repositories, i.e., MTBLS and MW. + + + + + + + + + + + + + + + + + + + + + + + + + + +
Field TypeField NameValues ReadabilityUnitMissing
MTBLSdedicatedVariantfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or the study "assays" value is "null"; or the organism is not found in NCBI taxonomy.
MWdedicatedGENOTYPE_STRAINfree textnoneThe field is not provided; or the value is provided as N/A or other similar expressions; or decoding the JSON file containing the study details has failed due to syntax error there; or the organism was not found in NCBI taxonomy.
+ + + + + + + + + + + + + + + + + +
Input ExamplesOutput
MTBLS["Mus musculus str. SAMP1/YitFc", "BY4741", "Thoroughbred", "EFO:Thalassiosira pseudonana CCMP1335"]["c57Bl-6", "c3h-hen", etc.]
MW["C57BL/6", "Swiss Webster Mice", "C3H/HeN"]["c57Bl-6", "c3h-hen", etc.]
+ +## Results +Variants details are available in metabolomics repositories. The most used variant was "C57BL/6J". + +

+ +

A rough estimate of the percentages of all studies in MTBLS and MW repositories based on the variant
+

+ +Here one can see the number of studies providing the variant and its value. +

+ +

The number of studies in MTBLS and MW based on the variant
+

\ No newline at end of file diff --git a/docs/advanced-guides/nmrium/nmrium.md b/docs/advanced-guides/nmrium/nmrium.md new file mode 100644 index 00000000..c7fd79c1 --- /dev/null +++ b/docs/advanced-guides/nmrium/nmrium.md @@ -0,0 +1,35 @@ +# NMRium + +[NMRium](https://www.nmrium.org/) is an open-source NMR spectra processing tool. + + +   NMRium + +It provides a bundle of handy features, such as: +* Open various vendors and open file formats (JCAMP-DX file, a zipped Bruker folder, or a JEOL file). +* It accepts 1D (FID and FT) and 2D spectra (FT only). +* Advanced peak picking detection for 1D and 2D NMR spectra. Also, it can generate the NMR string required for publication or patent. +* All the processing and assignment can be stored as a “.nmrium” file. This file contains the original data as well as all the processing that was applied on the spectrum. Assignments of the molecule are also saved in the file. Additionally, export in NMReData is possible as well. + +## Demo + +