diff --git a/dist/build.xml b/dist/build.xml index 5657f40..abb93dd 100644 --- a/dist/build.xml +++ b/dist/build.xml @@ -83,7 +83,6 @@ - @@ -105,7 +104,6 @@ - diff --git a/doc/publications/2010-06 Method/2012-06 BMC/CoverLetter_KGTMethod_BMC.doc b/doc/publications/2010-06 Method/2012-06 BMC/CoverLetter_KGTMethod_BMC.doc deleted file mode 100644 index 2ec5ac3..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/CoverLetter_KGTMethod_BMC.doc and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/Coverletter_KEGGtranslator_method_bmc.pdf b/doc/publications/2010-06 Method/2012-06 BMC/Coverletter_KEGGtranslator_method_bmc.pdf deleted file mode 100644 index fb9c09b..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/Coverletter_KEGGtranslator_method_bmc.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.pdf b/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.pdf deleted file mode 100644 index f0e00fb..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.tex b/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.tex deleted file mode 100644 index 7c62864..0000000 --- a/doc/publications/2010-06 Method/2012-06 BMC/KEGGtranslator_method_bmc.tex +++ /dev/null @@ -1,783 +0,0 @@ -%% BioMed_Central_Tex_Template_v1.06 -%% % -% bmc_article.tex ver: 1.06 % -% % - -%%IMPORTANT: do not delete the first line of this template -%%It must be present to enable the BMC Submission system to -%%recognise this template!! - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% LaTeX template for BioMed Central %% -%% journal article submissions %% -%% %% -%% <14 August 2007> %% -%% %% -%% %% -%% Uses: %% -%% cite.sty, url.sty, bmc_article.cls %% -%% ifthen.sty. multicol.sty %% -%% %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% For instructions on how to fill out this Tex template %% -%% document please refer to Readme.pdf and the instructions for %% -%% authors page on the biomed central website %% -%% http://www.biomedcentral.com/info/authors/ %% -%% %% -%% Please do not use \input{...} to include other tex files. %% -%% Submit your LaTeX manuscript as one .tex document. %% -%% %% -%% All additional figures and files should be attached %% -%% separately and not embedded in the \TeX\ document itself. %% -%% %% -%% BioMed Central currently use the MikTex distribution of %% -%% TeX for Windows) of TeX and LaTeX. This is available from %% -%% http://www.miktex.org %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\NeedsTeXFormat{LaTeX2e}[1995/12/01] -\documentclass[10pt]{bmc_article} - - - -% Load packages -\usepackage{cite} % Make references as [1-4], not [1,2,3,4] -\usepackage{url} % Formatting web addresses -\usepackage{ifthen} % Conditional -\usepackage{multicol} %Columns -\usepackage[utf8]{inputenc} %unicode support -%\usepackage[applemac]{inputenc} %applemac support if unicode package fails -%\usepackage[latin1]{inputenc} %UNIX support if unicode package fails -\urlstyle{rm} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% If you wish to display your graphics for %% -%% your own use using includegraphic or %% -%% includegraphics, then comment out the %% -%% following two lines of code. %% -%% NB: These line *must* be included when %% -%% submitting to BMC. %% -%% All figure files must be submitted as %% -%% separate graphics through the BMC %% -%% submission process, not included in the %% -%% submitted article. %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\def\includegraphic{} -\def\includegraphics{} - - - -\setlength{\topmargin}{0.0cm} -\setlength{\textheight}{21.5cm} -\setlength{\oddsidemargin}{0cm} -\setlength{\textwidth}{16.5cm} -\setlength{\columnsep}{0.6cm} - -\newboolean{publ} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% You may change the following style settings %% -%% Should you wish to format your article %% -%% in a publication style for printing out and %% -%% sharing with colleagues, but ensure that %% -%% before submitting to BMC that the style is %% -%% returned to the Review style setting. %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%Review style settings -%\newenvironment{bmcformat}{\begin{raggedright}\baselineskip20pt\sloppy\setboolean{publ}{false}}{\end{raggedright}\baselineskip20pt\sloppy} - -%Publication style settings -%\newenvironment{bmcformat}{\fussy\setboolean{publ}{true}}{\fussy} - -%New style setting -\newenvironment{bmcformat}{\baselineskip20pt\sloppy\setboolean{publ}{false}}{\baselineskip20pt\sloppy} - - -%% CUSTOM STUFF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\usepackage{booktabs} % allows \toprule and other table formatting utilities -\usepackage{multirow} -\usepackage[table]{xcolor} % U.a. allows for defining colors in HTML models -\usepackage[pdfborder={0 0 0}]{hyperref} % links, but no colored boxes -\usepackage{amssymb} % Provides the checkmark symbol - -% Figure captions are just used for the label and numbering. Do not show any text within the figure environment. -%\usepackage[labelformat=empty]{caption} - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Some macros -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\def\plus{\textsuperscript{+}} -\newcommand{\TODO}[1]{\textcolor{red}{\textbf{#1}}} - -% B -\newcommand{\BiochemicalReaction}{\texttt{Bio\-chemical\-Reaction}} -\newcommand{\BiochemicalReactions}{\texttt{Bio\-chemical\-Reaction}s} - -% C -\newcommand{\Catalysis}{\texttt{Cata\-lysis}} -\newcommand{\ComplexAssembly}{\texttt{Complex\-Assembly}} -\newcommand{\Conversion}{\texttt{Conversion}} -\newcommand{\control}{\texttt{control}} -\newcommand{\Control}{\texttt{Control}} -\newcommand{\Controller}{\texttt{Controller}} -\newcommand{\Controllers}{\texttt{Controller}s} -\newcommand{\Controlled}{\texttt{Controlled}} -\newcommand{\conversion}{\texttt{conversion}} -\newcommand{\Complex}{\texttt{Complex}} -\newcommand{\Complexes}{\texttt{Complex}es} - -% E -\newcommand{\EntityReference}{\texttt{EntityReference}} -\newcommand{\EntityReferences}{\texttt{EntityReference}s} -\newcommand{\Entity}{\texttt{Entity}} - - -% F -\newcommand{\functionTerm}{\texttt{functionTerm}} -\newcommand{\functionTerms}{\texttt{functionTerm}s} - -% I -\newcommand{\Interaction}{\texttt{Inter\-action}} -\newcommand{\InteractionVocabulary}{\texttt{Inter\-action\-Vo\-ca\-bu\-la\-ry}} - -% M -\newcommand{\model}{\texttt{model}} -\newcommand{\Modulation}{\texttt{Modulation}} -\newcommand{\MolecularInteraction}{\texttt{Mo\-le\-cu\-lar\-In\-ter\-ac\-tion}} -\newcommand{\ModifierSpeciesReference}{\texttt{Mo\-di\-fier\-Species\-Reference}} - - -% P -%\newcommand{\PhysicalEntity}{\texttt{Physical\-Entity}} -\newcommand{\physicalInteraction}{\texttt{physical\-Inter\-action}} -\newcommand{\protein}{\texttt{protein}} -\newcommand{\proteins}{\texttt{protein}s} - -% Q -\newcommand{\qualitativeModel}{\texttt{qualitative\-Model}} -%\newcommand{\quantitativeModel}{\texttt{quantitative\-Model}} -\newcommand{\qualitativeSpecies}{\texttt{qualitative\-Species}} - -% R -\newcommand{\reaction}{\texttt{re\-ac\-tion}} -\newcommand{\reactions}{\texttt{re\-ac\-tion}s} - -% S -\newcommand{\species}{\texttt{species}} -\newcommand{\smallMolecule}{\texttt{small\-Molecule}} -\newcommand{\SmallMolecules}{\texttt{Small\-Molecule}s} - - -% T -\newcommand{\TemplateReactionRegulation}{\texttt{Template\-Reaction\-Regulation}} -\newcommand{\TemplateReaction}{\texttt{Template\-Reaction}} -\newcommand{\transition}{\texttt{transition}} -\newcommand{\transitions}{\texttt{transition}s} -\newcommand{\Transport}{\texttt{Transport}} -\newcommand{\TransportWithBiochemicalReaction}{\texttt{Transport\-With\-Biochemical\-Reaction}} - -% X -\newcommand{\Xrefs}{\texttt{Xref}s} - -\hyphenation{ -bal-ance -con-ver-ter -trans-la-tor -KEGG-con-ver-ter -KEGG-trans-la-tor -mo-le-cule -be-tween -straight-for-ward -} - -%% END MACROS SECTION - - -% Begin ... -\begin{document} -\begin{bmcformat} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% Enter the title of your article here %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\title{Precise generation of systems biology models from KEGG pathways} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% Enter the authors here %% -%% %% -%% Ensure \and is entered between all but %% -%% the last two authors. This will be %% -%% replaced by a comma in the final article %% -%% %% -%% Ensure there are no trailing spaces at %% -%% the ends of the lines %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\author{ -Clemens Wrzodek\correspondingauthor$^1$ -\email{Clemens Wrzodek\correspondingauthor - clemens.wrzodek@uni-tuebingen.de} and -Finja B\"uchel$^1$ \email{Finja B\"uchel - finja.buechel@uni-tuebingen.de} and -Andreas Dr\"ager$^1$ \email{Andreas Dr\"ager - andreas.draeger@uni-tuebingen.de} and -Manuel Ruff$^1$ \email{Manuel Ruff - manuel.ruff@student.uni-tuebingen.de} and -Andreas Zell$^1$ \email{Andreas Zell - andreas.zell@uni-tuebingen.de} -} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% Enter the authors' addresses here %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\address{% - \iid(1)Center for Bioinformatics Tuebingen (ZBIT), University of Tuebingen, Sand 1, 72076 T\"ubingen, Germany -}% - -\maketitle - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% The Abstract begins here %% -%% %% -%% Please refer to the Instructions for %% -%% authors on http://www.biomedcentral.com %% -%% and include the section headings %% -%% accordingly for your article type. %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\begin{abstract} -\textbf{Background:} The KEGG PATHWAY database provides a plethora of pathways for a diversity of organisms. -All pathway components are directly linked to other KEGG databases, such as KEGG COMPOUND or KEGG REACTION. -% -Therefore, the pathways can be extended with an enormous amount of information and provide a foundation for initial structural modeling approaches. -% -As a drawback, KGML-formatted KEGG pathways are primarily intended for visualization purposes and often omit important details for the sake of a clear arrangement of its entries. -Thus, a direct conversion into systems biology models would produce incomplete and erroneous models. - -\textbf{Results:} Here, we present a precise method for processing and converting KEGG pathways into initial metabolic and signaling models encoded in the standardized community pathway formats SBML (Levels 2 and 3) and BioPAX (Levels 2 and 3). %, including the qualitative models, groups and layout extensions, -This method involves correcting invalid or incomplete KGML content, creating complete and valid stoichiometric reactions, translating relations to signaling models and augmenting the pathway content with various information, such as cross-references to Entrez Gene, OMIM, UniProt ChEBI, and many more. -% -Finally, we compare several existing conversion tools for KEGG pathways and show that the conversion from KEGG to BioPAX does not involve a loss of information, whilst lossless translations to SBML can only be performed using SBML Level~3, including its recently proposed qualitative models and groups extension packages. - -\textbf{Conclusions:} Building correct BioPAX and SBML signaling models from the KEGG database is a unique characteristic of the proposed method. Further, there is no other approach that is able to appropriately construct metabolic models from KEGG pathways, including correct reactions with stoichiometry. The resulting initial models, which contain valid and comprehensive SBML or BioPAX code and a multitude of cross-references, lay the foundation to facilitate further modeling steps. - -% Notes for conclusions -%The proposed method is the first Appropriate encoding of signaling models in SBML or BioPAX is -%Signaling, kein anderer stoichiometry, lay foundations (initial) facilitate further steps, annotations -% -\textbf{Keywords:} KEGG, KGML, SBML, BioPAX, modeling, systems biology, qualitative modeling, quantitative modeling, converter, comparison -\end{abstract} - - - -\ifthenelse{\boolean{publ}}{\begin{multicols}{2}}{} - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% The Main Body begins here %% -%% %% -%% Please refer to the instructions for %% -%% authors on: %% -%% http://www.biomedcentral.com/info/authors%% -%% and include the section headings %% -%% accordingly for your article type. %% -%% %% -%% See the Results and Discussion section %% -%% for details on how to create sub-sections%% -%% %% -%% use \cite{...} to cite references %% -%% \cite{koon} and %% -%% \cite{oreg,khar,zvai,xjon,schn,pond} %% -%% \nocite{smith,marg,hunn,advi,koha,mouse}%% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - - -%%%%%%%%%%%%%%%% -%% Background %% -%% -\section*{Background} - -The KEGG PATHWAY database provides a valuable resource for initial modeling approaches of specific biological networks \cite{Kanehisa2000,KEGG}. The database contains pathway maps for a multitude of different organisms and most provided information is cross-linked with other KEGG databases. Since many years, this database has been one of the most important sources for building initial structural models of various pathways \cite{Bauer-Mehren2009,Oberhardt2009}. All pathway information is stored in KGML formatted XML-files, which are barely supported by other applications. In systems biology, two wide-spread formats for modeling and exchanging pathways are the Systems Biology Markup Language (SBML) \cite{Finney2003} and Biological Pathway Exchange (BioPAX) \cite{Demir2010_short}. These formats can be used with graphical modeling applications (e.g., CellDesigner \cite{Funahashi2008} or Cytoscape \cite{Cytoscape}), complemented with rate laws (e.g., SBMLsqueezer \cite{SBMLSqueezer}), used for flux balance analysis (e.g., FASIMU \cite{Fasimu}), and many more applications. Therefore, converters exist that perform mostly basic conversions from KGML to those formats \cite{KEGG2SBML,Kuentzer2007,KEGGconverter,KEGG2BioPAX_and_SBML}. The drawback of many of those converters is that even for creating initial models, a basic translation of a KGML document to an SBML or BioPAX document is not sufficient. - -The KGML documents provided by KEGG are mainly designed for graphical representations of pathways and consist of entries (which correspond to nodes in a pathway map), relations (which correspond to edges in a pathway map) and reactions. Relations are mainly contained in signaling maps and encode information such as ``A activates B". Reactions are primarily contained in metabolic pathway maps and consist of substrates, products and information about reversibility of the reaction. Given this information, it seems straightforward to derive an algorithm for viable metabolic models. But a closer look on the actual maps shows that even those reactions are often created for visualization and not for modeling or simulation purposes. Reactions are sometimes bundled, i.e., one reaction instance is built and multiple reaction identifiers pointing to different reactions, are assigned. There are often missing reactants for reactions, stoichiometric information is omitted and also the list of enzymes, catalyzing a reaction, is not necessarily entirely contained in the KGML document. Similar difficulties arise for the entries in a KGML document. -For the sake of a high-quality graphical representation of the pathway, entries or other elements are sometimes duplicated.% in the KGML file -When interpreting the information content of those files, duplications must be taken into account. -% -Furthermore, a KGML document may contain references to entries, which are not physically present in the actual organism and the KGML specification even allows entries to be reactions. -All those exemplary mentioned problems show that simple one-to-one translations of KEGG pathway maps to other formats are not sufficient to build reliable and useful models. - -To overcome all those drawbacks, we deeply investigated the KGML documents, as well as the content of all cross-linked KEGG databases, and developed strategies for building useful initial models in SBML and BioPAX. Besides automatically correcting many of the mentioned issues, the proposed method includes extensive annotation and augmentation of all provided information to ease further model building and usage of those translated pathway maps. This ranges from adding simple database cross-references (e.g., to UniProt or Entrez Gene) over annotation of chemical formulas and molecular weight of small molecules, to an automated atom balance check of all reactions. All those strategies are now implemented in the second release of the KEGGtranslator application \cite{Wrzodek2011} and described in detail in the following sections. - - - - -\section*{Preparation of pathway models from the KEGG database} - -Several subsequent steps are involved in the creation of initial models from KEGG pathways. All of these steps are described in detail in the following sections and depicted as a flowchart in Figure~1. - -\subsection*{The KEGG Markup Language (KGML)} -KEGG uses the KGML format to encode its pathways \cite{KGML}. For each pathway, a generic reference pathway exists that is derived for a plethora of different organisms. All nodes in those pathways mainly correspond to proteins, small molecules, other referenced pathways or complexes and are encoded as entries in KGML. These entries have a type attribute that further specifies its nature. Additionally, they may have a graphics attribute that is essential for pathway visualizations. Entries corresponding to groups contain components that reference their contained entries. - -Besides entries, KGML specifies reactions, which contain substrates and products that are essentially references to the corresponding entries. The only additional information that is given for reactions is a type attribute: either `reversible' or `irreversible'. Moreover, KEGG specifies relations, which are primarily important for the visualization of signaling pathways. Relations contain network connections between two entries, such as ``A phosphorylates B", or ``A inhibits B" but they do not provide sufficient information for conversions to biochemical reactions. - - -\subsection*{Preprocessing and correcting issues in the input KGML} - - -Prior to converting the KEGG pathways to other modeling languages, several issues need to be corrected in preprocessing steps. -% -Operations that are not linked to SBML or to BioPAX are performed as first step directly on the input KGML. These include operations that involve adding or removing entries from the KGML document, as well as processing contained reactions. The actual conversion to models is independent of those steps and is performed after the preprocessing. -To generate reliable models, one might want to remove links to other pathway maps from the document. These referenced pathway maps are no physical instances and thus need to be ignored for some model simulation software. However, they might be required for cross-linking pathways. Furthermore, orphans (i.e., entries that are not present in reactions or relations) might be useless for some modeling approaches and therefore may also be removed. -% -An important step towards building metabolic models are correct biochemical reactions. The reactions specified in the KGML require significant preprocessing in order to reliably translate these to SBML or BioPAX. -% -% -KEGG files often contain bundled reactions. These must be disassembled into separate KGML reactions. Otherwise, it is not possible to create balanced and correct biochemical reactions if models keep multiple reactions that are bundled into one record. -Since the information provided in the KGML is limited, the KEGG API needs to be queried for further correction steps. From the KEGG API, information about reversibility of the reaction is retrieved, as well as the reaction equation, including all substrates, products, catalysts, and stoichiometric information. The reversibility is directly annotated on the reaction, the stoichiometric information has to be stored in separate classes, which are later translated to the desired output format. The equation is used to check for missing reaction participants. But simply comparing all KEGG identifiers that are present in the KGML with the reaction equation is not adequate. KEGG consists of many separate databases that contain information about compounds, drugs, glycans, etc. Therefore, one compound might have multiple KEGG identifiers, e.g., one in KEGG COMPOUND and another one in KEGG DRUG. The reaction equations specify just one identifier for each participant, which is any of all available identifiers for an object. Therefore, more queries to the KEGG API are necessary in order to fetch all synonyms for all identifiers. Now, it is possible to compare all reactants with the pathway components and check for missing reaction participants and eventually add those to the KGML. A similar method is required to check for missing enzymes (i.e., reaction modifiers) -- we use Enzyme Commission numbers (EC numbers) to check for missing enzymes. - -One last important preprocessing step might be performed before converting the pathways to models. -The KEGG database uses information about orthology to provide pathway maps for different organisms. Enzymes, catalyzing reactions are annotated using EC numbers, which are independent of actual organisms. In some cases, this leads to annotated enzymes or entries in the KGML, for which no physical instance in the current organism of interest is known. In other words, the entry does probably not exist in the current organism or its existence has not yet been proven. To visualize this information, KEGG changes the background color of those orthologous nodes to white. These nodes should also be removed in order to obtain organism-specific models. - -\subsection*{Atom balance of reactions} - -After the described preprocessing step, the KGML document contains unbundled and complete reactions, for which the equation and stoichiometry has been annotated. Using the KEGG API, the chemical formula of each compound, participating in a reaction can be fetched. By using this information together with the stoichiometry, it is possible to count and compare all atoms on the substrate and product side. There are some further properties that need to be considered: A generic `R' is sometimes used on the substrate and product side to indicate any substituent. Variables like $n$ and $n+1$ are used by KEGG to create more generic reactions. During our tests, we detected some simple cases in which an H\plus{} or P\plus{} was missing, but also some other cases in which multiple atoms (e.g., 2\,C, 3\,H and 1\,P) were missing. Automatically correcting those issues is not recommended because the real missing components are unknown. -% -For example if a P\plus{} is missing on the substrate side, larger compounds could be missing on any side of the reaction. The possibilities of missing components on both sides include ATP~$\longrightarrow$~ADP, NADPH~$\longrightarrow$~NADH, and many others. -Therefore, our method appends the result of each atom check as comment on every reaction and researchers might have to manually correct reactions with missing atoms. - - -\subsection*{Conversion and annotation of the KGML document} - - -The completed and corrected KGML document can now be used to generate models. Therefore, conversions to BioPAX, SBML, SBML-qual and several other formats are required. Typically, the model instance has to be initialized and all entries need to be added to the model. Caution needs to be taken in this step, because multiple copies of an entry might be existent in one KGML document. Usually, every graphical copy catalyzes different reactions. But for systems biology models, only one element should be created for all copies, representing a union of all physically identical entries. Furthermore, KGML specifies an entry type called `reaction', which should not be converted to a physical entity in the resulting model. -Depending on the modeling language, either the reactions or the relations or both need to be converted to the chosen format. - - -Besides those conversion steps, additional operations are required in order to facilitate further modeling efforts by researchers. This includes extensive annotations and comments for all elements. Hence, Gene Ontology terms, describing the elements and their function, as well as identifiers for a plethora of other databases for genes, proteins, interactions, structural information, small molecules, etc. are added to the model. In more detail, identifiers are added for Entrez Gene, OMIM, Ensembl, UniProt, ChEBI, DrugBank, Gene Ontology, HGNC, PubChem, 3DMET, NCBI Taxonomy, PDBeChem, GlycomeDB, LipidBank, EC-Numbers (enzyme nomenclature) and various KEGG databases (\uppercase{gene, glycan, reaction, compound, drug, pathway, orthology}). -% -Besides those cross-references, other helpful human and machine-readable annotations are added, for example, official gene symbols, synonyms, human-readable descriptions, links to more resources or visualizations, and the chemical formula and molecular weight for small molecules. - -The annotation of the models is an important step, because simulations on real data or simple experimental data visualization tools require unique identifiers to map the experimental data on the pathway structure. If models provide a simple data structure with labels, but no reference identifiers, they are hardly usable in conjunction with experimental data. - - -\subsection*{KEGG to BioPAX} - -Today, Level~3 is the most recent Level of BioPAX. But Level~2 is still common and there are some data structures in Level~3 that are not available in Level~2. Therefore, separate converters for BioPAX Level~2 and for Level~3 are required. First of all, a BioPAX \model{} has to be created and a pathway object, corresponding to the input KGML, needs to be added to the \model. Then, several annotations and cross-references are defined for this pathway. This includes, for instance, the organism, cross-references to other databases, and gene ontology terms to define the pathway's function. The next step involves mapping each KGML element to a corresponding BioPAX element. Figure~2 gives an overview of these mappings. - -Having the initial pathway model, the next step is to create BioPAX elements for each KGML entry. This translation mainly depends on the type of the KGML entry and is listed in detail in Table~1. Entries with the same identifier (graphical copies of the same element) are grouped to one instance and only one BioPAX element is created for those. Depending on the just created BioPAX element, further annotation steps are required. For \Complexes, we need to add all of its components. For \SmallMolecules, we add the molecular weight and chemical formula to the corresponding BioPAX fields, which facilitates further modeling steps. For each element, cross-references to other databases and more annotations, as described in the previous section, are added. - -KEGG reactions always correspond to biochemical reactions. Thus, a \BiochemicalReaction{} is the appropriate data structure for those reactions and one instance of this class is created for each reaction. If catalyzing enzymes are annotated, a \Catalysis{} instance is created. This \Catalysis{} has all catalyzing enzymes as \Controllers{} and the \BiochemicalReaction{} as \Controlled{} element. The reaction is annotated with the reaction direction and if it is reversible or not. Further, the stoichiometry of each participant is annotated, as well as the EC numbers of all catalyzing enzymes. Even to the reactions, human readable supporting information is added, like the reaction equation, other pathways in which this reaction also occurs, and a generic description. In addition, the result of the atom balance check is added as further comment, together with comprehensive information which atoms are on the substrate side, which are on the product side and the difference between them. - -Next to biochemical reactions, BioPAX also supports other kinds of relationships between entities. -BioPAX distinguishes between interactions, for which one can specify a source and a target (called \Conversion), and interactions describing a pool of interacting components (called \Interaction). -For instance, to express KEGG relations, which have no associated chemical equation but structural information such as ``A activates B", a \Conversion{} can be used. -In contrast, an \Interaction{} is especially useful for cases, in which information is missing or no direction is available. -% -For example, a relation of type binding with two participants -- this just allows for expressing ``A binds B", but no other conclusion can be drawn from such a relation. Therefore, all relations, from which no direction can be inferred, are converted to a \physicalInteraction{} in BioPAX Level~2 and to a \MolecularInteraction{} in BioPAX Level~3 (the Level~2 \physicalInteraction{} has been replaced by \MolecularInteraction{} in Level~3). All relations from which a direction can be inferred are converted to a BioPAX \Conversion{}. -An \InteractionVocabulary{} is created for each interaction, that specifies the type of interaction as SBO term, GO term and human-readable string. Table~2 shows in detail how each relation is converted and which SBO and GO terms are being used. - - - -\subsection*{KEGG to SBML} - -Even though it is not the latest release of SBML, Level~2 Version~4 is still used in many applications and hence, should be supported for the conversion of metabolic models. The most recent SBML Level~3 release introduces extension packages and is required to include qualitative models (qual), groups, and layout information in the document, which are essential for modeling signaling pathways. -% -At the first glance, conversion of KGML to SBML seems to be simple. This is also suggested by the mapping scheme, depicted in Figure~3. But many properties in SBML are encoded in other fields than actual class instances, and thus are not directly visible to researchers. KEGG defines entries and an entry type, which specifies if the entry corresponds to a protein, complex, small molecule, referenced pathway map, or some other type. BioPAX provides different classes to distinguish between those types. SBML, similar to KGML, just has a class named \species{} to encode all those entries. The type of the \species{} should be specified by using terms from the Systems Biology Ontology (SBO terms) \cite{SBO}. These SBO terms are hierarchically organized and only SBO terms from the `material entity' branch should be used to encode the entities. Table~1 shows, which SBO terms are most appropriate to encode the different KGML entries. Furthermore, as in BioPAX translations, it is important to group graphical copies of the same entries to one element and to create only one \species{} element for this entry. -To make the model usable for further applications, extensive annotations and references to other databases are added, using standardized controlled vocabulary (CV) terms and MIRIAM identifiers \cite{Juty2012,Novere2005}. Further, a description, various synonyms, the CAS number, chemical formula, a reference picture (structural formula for compounds, image of the pathway-map for pathways), molecular weight, and mass are added as human-readable annotation, if available. - -Groups are not supported by SBML-core. In order to encode entries of type `group' in SBML Level~3, one can use the groups extension package \cite{SBMLgroups}. To encode groups in SBML prior to Level~3, the only way are annotations, for example by adding a CV term with a \texttt{BQB\_IS\_ENCODED\_BY} or \texttt{BQB\_HAS\_PART} qualifier that specifies the contents of the group. In any case, an SBO term should also be used, which marks this \species{} as a complex of multiple other \species{}. - -KEGG reactions are converted to SBML \reactions{} with correct SBO terms for substrates (SBO:0000015) and products (SBO:0000011). If the reaction is reversible, a generic reactant SBO term (SBO:0000010) should be applied to all reaction participants. In addition, the reversibility is annotated to the \reaction{} itself and the stoichiometry is annotated on all reaction participants. Catalyzing enzymes are included as \ModifierSpeciesReference{} and CV terms, referring to the KEGG reaction identifier as well as all pathways, in which this reaction occurs, are added. Human-readable annotations on \reactions{} include the reaction definition, equation, a reference to the reaction equation as HTML-image, and the result of the atom balance check (i.e., if there are missing atoms in the reaction). - -Relations are required to encode signaling pathways but cannot properly be included into core SBML. There is no structure that encodes, e.g., ``A activates B" -- we can only add reactions to SBML. For SBML Level~3, the recently proposed qualitative models (qual) extension package solves this problem \cite{QualSpecification}. This extension is designed for qualitative modeling and allows for -modeling relationships that cannot be described in detail. -%creating transitions, that just specify input, output and a relation between those. -Thus, to encode the KEGG relations, we have to convert the \model{} to a \qualitativeModel{} and create a qualitative \transition{} for each relation. An SBO term, as given in Table~2, is assigned to the \transition{} to specify its type. A GO term, mentioned in the same table, is further added as CV term on the \transition{}. - - -\subsection*{Further KGML characteristics} - -\subsubsection*{KGML entries that are reactions.} -The KGML specification allows entries to have a type called `reaction'. This can be used, for example, to let a relation point to a reaction. Actually, KGML only allows entries to be targets of relations but these constructs can be used to relax the constraints. However, BioPAX naturally allows interactions to point to other interactions as sources or targets. Hence, the document structure is not invalidated if entries with type `reaction' are converted to real reactions in BioPAX and every use of this entry is replaced by using the BioPAX reaction. - -In SBML, these entries are also converted to real reactions. No \species{} is created for entries with type `reaction' in SBML-core. For SBML-qual, the specification has similar requirements as KGML: all \transitions{} must have \qualitativeSpecies{} as sources or targets. Therefore, for SBML-qual the translation is similar to the source KGML and a \qualitativeSpecies{} with adequate annotation is created for entries with type `reaction'. - -\subsubsection*{Relations of subtype `compound'.} -Some KGML documents include reactions and exclusively relations of subtype `compound'. These compound-relations are mostly relations between enzymes and compounds. KEGG states that this compound is ``shared with two successive reactions [\dots]" \cite{KGML}. In other words, these relations are copies of reactions that have been created by KEGG for the sake of better graphical representation of the pathway. Thus, if a converter translates both, the reactions and the relations, those compound-relations contain no additional information and should be skipped. - -\subsubsection*{Documents with glycans instead of compounds.} -Sometimes, KGML specifies glycans as reaction participants instead of compounds. Actually, there is nothing wrong with this, except that the KEGG API often returns reaction equations with compound identifiers and some attributes, such as chemical formula or molecular weight, are exclusively available for compounds. This leads to reactions that are erroneously detected as incorrect or to missing chemical formulas. Therefore, if a synonymous compound identifier is available for a KEGG glycan or another KEGG database identifier that contains synonyms in KEGG COMPOUND, it is advisable to fetch and internally work with the compound identifier. Otherwise, it is very likely that duplicates of the same entries but with different identifiers are created in a model and some relationships are not correctly resolved. - - -\subsection*{Implementation and availability} -All described methods are implemented in the second release of KEGGtranslator (since version 2.0). -% -The application uses and includes Paxtools, a Java\texttrademark{} library for working with BioPAX that facilitates building and writing the internal BioPAX data structure (\url{http://www.biopax.org/paxtools.php}). -% -To establish the SBML data structure, KEGGtranslator uses the Java\texttrademark{} library JSBML \cite{JSBML} and supports SBML Level~2 Version~4 \cite{SBMLl2} and SBML Level~3 Version~1 \cite{SBMLl3}. - -KEGGtranslator is implemented in Java\texttrademark, provides an interactive, user-friendly and easy-to-use graphical user interface (GUI), and is freely available under the LGPL version 3 license from \url{http://www.cogsys.cs.uni-tuebingen.de/software/KEGGtranslator/}. KGML pathways can be downloaded automatically from within KEGGtranslator. The application can convert KEGG pathways from KGML files to BioPAX Level~2, BioPAX Level~3, SBML (core), SBML (qual), or SBML-core and -qual in one model. If desired, graphical representations can be created in SBGN, SIF, GML, GraphML, JPG and some other formats. Furthermore, many options are provided that control the described (pre-) processing of KEGG conversions and allow for customization of the generated models to meet a great number of different requirements. - - -\section*{Discussion} - -We successfully established a procedure to create initial structural systems biology models from KEGG pathways. These steps aim at complete reconstruction of specific metabolic or signaling networks and hence, go far beyond simple translations. - -But even with all the discussed enhancements and corrections, all models derived from KEGG should only be considered as initial structural models. Many researchers are interested, e.g., in tissue-specific variants of those models. Others want to build kinetic models, constraint-based models, flux-based models, or any other specific model variant. Hence, our goal is to build a solid foundation that can quickly be used for further applications. The generation of these models is eased by providing cross-references to many databases, synonyms, descriptions and other information. This helps researchers to further process the generated models to the desired real model. With the help of annotated cross-references, it is quite easy to, e.g., map experimental data on the resulting model and perform simulations, or use the annotated reactions to identify kinetics in databases like SABIO-RK \cite{SabioRK}. - -The models reflect an effort to use all available information about KEGG pathways and consider the specific aspects of SBML or BioPAX to create complete and correct documents. These specific aspects include, for example, usage of SBO terms and MIRIAM URNs for metabolic SBML, as well as using \transitions{} and \qualitativeSpecies{} from the qual package to model signaling networks. For BioPAX, it is important to create correct instances, use cross-references and vocabularies for annotation and fill corresponding fields, e.g., chemical formula or molecular weight of \SmallMolecules{} or the EC numbers of catalyzed \BiochemicalReactions. -% -But besides those properties, there are more aspects of these formats that cannot be satisfied. This is owed to missing information and the aspiration to avoid creating knowledge out of nothing. -% -In SBML, the signaling maps contain \transitions{} that model all relations with information like `phosphorylation' or similar. The qualitative function of \transitions{} is encoded by \functionTerms{}, which define results and conditions in MathML. The information to fill those variables is not available for the KEGG pathways and thus, cannot be given. -% -Further, BioPAX Level~3 provides very interesting constructs to encode several instances of the same protein. For example, one protein might be contained in a pathway in multiple states: inactive (e.g., unphosphorylated), and active (phosphorylated). Since Level~3, BioPAX provides \EntityReferences{} that allow for the creation of several entities in different states for a single \Entity{} instance (i.e., protein). Unfortunately, we cannot fully use these structures, because KEGG does not specify wether a protein takes part in a relation with its phosphorylated, raw or any other form. This distinction is simply not available in KEGG databases. -% -Furthermore, a central dogma of BioPAX is to have \Controller{} and \Controlled{} elements to describe various interactions. For example, a \Controller{} could be an enzyme, controlling a reaction, which is then the \Controlled{} object. But if, e.g., KEGG annotates no enzyme on a reaction, or a relation is translated without knowing who controls this relation, no \Controller{} can be specified. -% -% -% -Besides this, KEGG does not provide information about compartmentalization. Some KEGG graphics do contain illustrations of compartments, but this information is hand-drawn in some pathway pictures and not encoded in any XML or referenced database. Hence, the resulting models just contain a default compartment in which all elements reside. - - - - -\subsection*{Comparison to other KEGG converters} - -There are some other approaches to convert KGML to SBML or BioPAX. Most of these approaches perform simple one-to-one conversions and do not augment or correct the content of the document. For visualizing a pathway model, this is not necessarily a problem, because there are almost no required processing steps, despite the actual format conversion. But for creating initial systems biology models, one should take care of all contained reactions and relations. Some important aspects are, for example, that one reaction really is one complete reaction, that all entities can be mapped computationally onto at least one database, and that the resulting document is valid. We created a list of various criteria to compare different conversion tools. Table~3 summarizes the result of this comparison. - -Besides the here described method, no referenced converter is able to build signaling networks. All converters focus on metabolic networks only. Before the release of the qualitative models extension for SBML Level~3, it was not possible to appropriately describe signaling networks in SBML. Because all referenced converters focus on SBML Level~1 or Level~2, it is correct that they do not convert signaling models. This is much more plausible than creating pseudo-\reactions{} or similar constructs. The BioPAX converters also focus on KEGG reactions. Generally, relations encoded in KEGG signaling maps seem to be completely ignored, which is incorrect, because BioPAX provides appropriate data structures to encode those relations. - -KEGGconverter \cite{KEGGconverter} is implemented in Java\texttrademark{} and able to translate KGML documents to SBML L2V1. The resulting \species{} (enzymes and small molecules) do not contain any annotations, notes, or SBO terms and are named with a human readable string containing KEGG identifiers in brackets. Thus, to computationally interpret those models and, e.g., map experimental data on them, one would need to reconstruct the KEGG identifier with a regular expression on the name. The conversion is complete (i.e., the complete KGML content is appropriately converted to SBML) and contains no duplicate entries or reactions. But reactions are directly converted as given: No unbundling of grouped reactions or augmenting of missing reactants is performed and the stoichiometry is not set. In our tests, the SBML validator complained that the generated SBML is not valid, because KEGGconverter uses spaces in identifiers which is not allowed in SBML. Besides the KGML conversion, KEGGconverter provides additional functionalities to add kinetics to the resulting models or merge different KGMLs to one model. - -KEGG2SBML \cite{KEGG2SBML} is a Perl script for converting KGML documents to valid SBML, supporting all Levels and Versions up to L2V3. This script uses various flat files from KEGG databases as additional resources and is capable of generating appropriate reactions (unbundled, no missing reactants and no duplicates). Unfortunately, the converted document is not complete (some reactions that should be contained in the pathway are missing), stoichiometry is omitted, and \species{} do not have any notes, annotations or SBO terms. All elements are named by their respective human-readable name, which is nice for manual inspections but renders the converted models barely usable for further subsequent modeling steps. JSim \cite{JSim}, a simulation system for quantitative SBML models, provides converted KEGG pathways for download. Those pathways have been created using KEGG2SBML and thus, the same properties apply for those files. - -BN++ \cite{Kuentzer2007} is an application that is not primarily designed for KEGG translations, but offers this functionality as a side-feature. According to its authors, the project is not maintained anymore and they are working on another project that may again support the translation of KGML files. Nevertheless, the available source code offers classes to convert KGML to SBML and BioPAX but we were not able to successfully compile and run their source code. However, BN++ has been used by the KEGG team to generate official BioPAX translations which are still downloadable from the official KEGG FTP and thus, represent a wide-spread used translation from KEGG to BioPAX. These BioPAX Level~2 files are only available for metabolic reference pathways and represent complete translations using appropriate BioPAX classes (e.g., \smallMolecule{} for small molecules and \protein{} for enzymes). All entities are nicely converted with cross-references to corresponding KEGG identifiers and no duplicate entities are created. KEGG database identifiers are also used as names for all entities, which makes the resulting models not directly interpretable to humans. Unfortunately, the conversion contains duplicate reactions, missing reactants are not augmented and there is no option to unbundle reactions. The stoichiometry is always set to one, which is not correct for many reactions. Furthermore the BioPAX fields for formula or molecular weight of small molecules are not used and the validator gives errors for `Cardinality violation' and `RDF Syntax errors'. - -KGML2BioPAX and KGML2SBML are two applications that are part of an ``ongoing effort to develop an ultimate KEGG-based pathway enrichment analysis system" \cite{KEGG2BioPAX_and_SBML}. Unfortunately, both the SBML and BioPAX conversions are not complete (some elements from the source document are missing), contain no revisions of the reactions, and the stoichiometry is erroneously always specified as one. But all elements use KEGG identifiers, which renders the models machine-interpretable and no reactions or entities are contained twice. The SBML Level~2 Version~4 documents are valid, but do not contain notes, annotations or SBO terms. The BioPAX Level~2 translations contain all KEGG entries as \proteins, which is not correct for small molecules or complexes, and contain no further annotations. The validator complains about errors in the RDF syntax and usage of ``unknown (or prohibited) class[es], not defined in the BioPAX specification". - -Despite these converters, there are even more possibilities to create SBML documents from KEGG pathways. A popular application is Cytoscape \cite{Cytoscape}, which provides KGMLReader (freely available at \url{http://code.google.com/p/kgmlreader/}), a plugin to read KGML documents, and BiNoM \cite{Zinovyev2008}, a plugin that can write SBML documents. -%These two plugins are not designed to match and neither is intended for building models from KEGG. -But the SBML code, that is generated by linking the results of both plugins, is not usable for further modeling steps. KGMLReader concentrates on graphical representations for Cytoscape and the resulting SBML export of BiNoM barley reflects the input file. It is obvious that the resulting SBML is merely a result of the graphical representation. Edges in the graph primarily connect metabolites with enzymes and each edge is encoded as an SBML reaction. This leads to reactions with small molecules as substrates and enzymes as products, which is clearly incorrect. No elements contain annotations and they are named with a consecutive number only. This renders those documents unusable for further modeling or simulation approaches. -Besides Cytoscape, there are many similar tools, e.g., PathVisio \cite{PathVisio}, Subio (\url{http://www.subio.jp}), or VANTED \cite{VANTED} that mainly focus on a graphical representation of the KGML files, most of which do not have SBML or BioPAX writers. Besides the graphical focus and missing writers, comparison to those tools is not reasonable because they are not thought to act as KEGG converters. - -The SuBliMinaL Toolbox \cite{SuBliMinaL} provides a very interesting alternative for metabolic modeling, based on KEGG data. SuBliMinaL does not provide KGML conversion and is thus not directly comparable to other converters. But it provides methods to reconstruct, e.g., whole organism maps from the KEGG database in an appropriate SBML document, which is well-annotated and contains complete and correct reactions. - - - -\subsection*{Conclusion} - -KEGG pathways are a valuable resource for pathway-based modeling approaches. Unfortunately, the KGML-formatted pathways are primarily designed for visualization purposes and not directly usable as metabolic or signaling models. Therefore, many aspects have to be revised and considered when converting the pathways to community standards such as BioPAX or SBML. This ranges from unbundling, correcting and annotating the stoichiometry of reactions, over using exclusively organism-specific and unique entities, to handling relations. With the help of additional information from multiple other KEGG databases, the resulting models provide correct and highly enriched structures that contain far more information than the original KGML. -% -The proposed method, including the qualitative models extension for SBML, is the first method that is able to generate signaling models in SBML or BioPAX from KEGG pathways. Currently, no other approach is able to generate complete pathway models with correct reactions, including stoichiometry and well-annotated SBML (i.e., including SBO terms or MIRIAM URNs) or valid BioPAX documents. - -% -All proposed methods are implemented in the KEGGtranslator application. The models, generated by KEGGtranslator with the here described method, lay the foundations for further modeling approaches, such as constraint-based models, tissue-specific models, or simply including kinetics to the models. All conversions obey the special requirements of SBML or BioPAX and include a huge amount of machine- and human-readable annotations. This facilitates the use of those models in other applications that perform further analysis, modeling or simulation steps on those. -% - - - -\bigskip - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section*{Author's contributions} -CW, FB, AD conceived and implemented the method. CW wrote the manuscript, MR contributed to the implementation of the method and AZ supervised the work. All authors read and approved the final manuscript. - -%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section*{Acknowledgements} - \ifthenelse{\boolean{publ}}{\small}{} -We gratefully acknowledge very fruitful discussions with Nicloas Le Nov\`{e}re, Nicolas Rodriguez, Neil Swainston, Falk Schreiber, Roland Keller, Florian Mittag, Akira Funahashi, and Toshiaki Katayama. - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% The Bibliography %% -%% %% -%% Bmc_article.bst will be used to %% -%% create a .BBL file for submission, which includes %% -%% XML structured for BMC. %% -%% After submission of the .TEX file, %% -%% you will be prompted to submit your .BBL file. %% -%% %% -%% %% -%% Note that the displayed Bibliography will not %% -%% necessarily be rendered by Latex exactly as specified %% -%% in the online Instructions for Authors. %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\newpage -{\ifthenelse{\boolean{publ}}{\footnotesize}{\small} - \bibliographystyle{bmc_article} % Style BST file - \bibliography{../2012-04_CMSB2012/KEGGtranslator_v2_methods} } % Bibliography file (usually '*.bib' ) - -%%%%%%%%%%% - -\ifthenelse{\boolean{publ}}{\end{multicols}}{} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% Figures %% -%% %% -%% NB: this is for captions and %% -%% Titles. All graphics must be %% -%% submitted separately and NOT %% -%% included in the Tex document %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% -%% Do not use \listoffigures as most will included as separate files - -\section*{Figures} - \subsection*{Figure 1 - Generation of systems biology models from KEGG pathways.} -The flowchart shows all major steps involved in the creation of initial systems biology models from KEGG pathways. The whole method requires two sources: a KGML-formatted KEGG pathway and access to other KEGG databases, e.g., via the KEGG API. The preprocessing steps, depicted on the top, involve mainly the removal of inappropriate nodes and processing of reactions. -An important step is the removal of duplicate entries. However, some further steps require information about these duplicates (e.g., when using the layout extension package for SBML) and thus, it is not always part of the preprocessing and may be performed at a later stage. -Depending on the desired output format, separate processing steps are executed that involve appropriate conversion and annotation of the initial model. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN FLOWCHART FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\begin{figure}[htbp] -% \caption{\label{fig:conversionScheme}} -% \begin{center} - %\includegraphics[width=.7\textwidth]{Wrzodek_Fig1.png} -% \end{center} -%\end{figure} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END FLOWCHART FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - \subsection*{Figure 2 - Simplified class structure and mapping from KGML to BioPAX.} -The figure shows the raw mapping of KGML to BioPAX class instances. The type attribute determines how each entry is translated (see Table~1). Reactions that are catalyzed by enzymes are translated to \Catalysis{}, whereas non-catalyzed reactions are translated directly to \BiochemicalReactions. Relations are either translated to \Conversion{} or to \physicalInteraction{} in BioPAX Level~2 and \MolecularInteraction{} in Level~3 (see Table~2). To keep the clarity, the figure does not include the information that in BioPAX Level~2, \control{} and \conversion{} inherit from \physicalInteraction{}. -%Furthermore, a catalysis object contains another object that is controlled by the catalysis, which is for our purposes always a BiochemicalReaction. -Furthermore, a \Catalysis{} consists of two elements: a \Controller{} and a \Controlled{} element. For our purposes, \Controller{} is always an enzyme and \Controlled{} is a \BiochemicalReaction. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN 2BioPAX FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\begin{figure}[htb] -% \caption{\label{fig:KGML2BioPAX}} -% \begin{center} - %\includegraphics[width=1.5\columnwidth]{Wrzodek_Fig2.png} -% \end{center} -%\end{figure} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END 2BioPAX FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - \subsection*{Figure 3 - Simplified class structure and mapping from KGML to SBML.} -This mapping includes the SBML qualitative models (qual) and groups extension packages. Most properties are encoded as attributes on the actual classes. Tables~1 and~2 give further details about translation of entries and relations. SBML can only handle reactions, therefore SBML-qual is required to properly encode relations. This extension package requires its own model. Subsequently, the SBML-core \model{} and each \species{} have to be duplicated to obtain a \qualitativeModel{} including the translated relations. Furthermore, the groups extension package can be used for a proper encoding of groups in SBML. -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN 2SBML FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%\begin{figure}[htb] -% \caption{\label{fig:KGML2SBML}} -% \begin{center} - %\includegraphics[width=1.5\columnwidth]{Wrzodek_Fig3.png} -% \end{center} -%\end{figure} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END 2SBML FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% -%% Tables %% -%% %% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% Use of \listoftables is discouraged. -%% -\section*{Tables} - -%\definecolor{tableShade}{HTML}{F1F5FA} -%\definecolor{tableShade2}{HTML}{ECF3FE} -\definecolor{tableShade2}{gray}{0.95} -%\rowcolors{1}{white}{tableShade2} -%\rowcolors{3}{white}{tableShade2} - -%%%%%%%%%%%%%%%%%%%%%%%% ENTRY TABLE %%%%%%%%%%%%%%%%%%%%%%%% -\subsection*{Table 1 - BioPAX instances and SBO terms corresponding to KGML entry types.} -This table depicts the conversion of KGML entries to BioPAX or SBML. The conversion depends on the KGML entry type attribute. For BioPAX, different class instances are initialized. Conversions to SBML always involve the creation of a \species{} with the given SBO term for each KGML entry. The KGML specification states that an entry of type `gene' ``is a gene product (mostly a protein)". Additionally, a `group' ``is a complex of gene products (mostly a protein complex)" \cite{KGML}. For compatibility with previous KGML versions, the deprecated type `genes' corresponds to `group' since KGML v0.6.1. Further, entries of type `reaction' are not listed in the table, but discussed in a separate section. -\par \mbox{} \par \mbox{ - -\makebox[\textwidth]{ -\begin{tabular}{lll} -\toprule -Entry type & BioPAX element & SBO term \\ -\midrule -\rowcolor{tableShade2} -compound & smallMolecule & 247 (simple chemical) \\ -enzyme & protein & 252 (polypeptide chain) \\ -\rowcolor{tableShade2} -gene & protein & 252 (polypeptide chain) \\ -ortholog & protein & 252 (polypeptide chain) \\ -\rowcolor{tableShade2} -group & complex & 253 (non-covalent complex) \\ -map & pathway & 552 (reference annotation) \\ -\bottomrule -\end{tabular} -} - -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%% RELATION TABLE %%%%%%%%%%%%%%%%%%%%%%%% -\subsection*{Table 2 - BioPAX instances and SBO terms corresponding to KGML relation subtypes.} -This table shows how relations are handled during conversion to BioPAX or SBML. The conversion depends on the subtype of each relation. For each subtype, the corresponding BioPAX element, as well as SBO terms and GO terms are given. When converting to BioPAX, both terms are annotated as an instance of \InteractionVocabulary, whereas an SBML \transition{} has a field for the SBO term and both terms are additionally added as controlled vocabulary term on the \transition. -Please note that \physicalInteraction{} in BioPAX Level~2 corresponds to \MolecularInteraction{} in BioPAX Level~3. Furthermore, relations of type `compound' are treated differently, as described in a separate section of this publication. -\par \mbox{} \par \mbox{ - -\makebox[\textwidth]{ -\setlength{\tabcolsep}{3.0pt} -\begin{tabular}{llllll} -\toprule -Relation subtype & BioPAX element & SBO term & SBO name & GO term & GO name\\ -\midrule - -\rowcolor{tableShade2} -activation & conversion & SBO:0000170 & stimulation & \emph{none} & \\ -inhibition & conversion & SBO:0000169 & inhibition & \emph{none} & \\ -\rowcolor{tableShade2} -expression & conversion & SBO:0000170 & stimulation & GO:0010467 & gene expression \\ -repression & conversion & SBO:0000169 & inhibition & \emph{none} &\\ -\rowcolor{tableShade2} -indirect effect & conversion & SBO:0000344 & molecular interaction & \emph{none} & \\ -state change & conversion & SBO:0000168 & control & \emph{none} & \\ - -\rowcolor{tableShade2} - & physicalInteraction/ & & non-covalent & & non-covalent \\ -\rowcolor{tableShade2} -\multirow{-2}{*}{binding/association} & MolecularInteraction & \multirow{-2}{*}{SBO:0000177} & binding & \multirow{-2}{*}{GO:0005488} & binding \\ - - & physicalInteraction/ & & & \\ -\multirow{-2}{*}{dissociation} & MolecularInteraction & \multirow{-2}{*}{SBO:0000180} & \multirow{-2}{*}{dissociation} & \multirow{-2}{*}{\emph{none}} &\\ - -\rowcolor{tableShade2} - & physicalInteraction/ & & & & \\ -\rowcolor{tableShade2} -\multirow{-2}{*}{missing interaction} & MolecularInteraction & \multirow{-2}{*}{SBO:0000396} & \multirow{-2}{*}{uncertain process} & \multirow{-2}{*}{\emph{none}} & \\ - -phosphorylation & conversion & SBO:0000216 & phosphorylation & GO:0016310 & phosphorylation \\ -\rowcolor{tableShade2} -dephosphorylation & conversion & SBO:0000330 & dephosphorylation & GO:0016311 & dephosphorylation \\ -glycosylation & conversion & SBO:0000217 & glycosylation & GO:0070085 & glycosylation \\ -\rowcolor{tableShade2} -ubiquitination & conversion & SBO:0000224 & ubiquitination & GO:0016567 & ubiquitination \\ -methylation & conversion & SBO:0000214 & methylation & GO:0032259 & methylation \\ - -\bottomrule -\end{tabular} -} - -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%% Converter Comparison TABLE %%%%%%%%%%%%%%%%% -\subsection*{Table 3 - Comparison of different available converters for KEGG pathways.} -This table compares various applications that can convert KEGG pathways to BioPAX or SBML models. A checkmark (\checkmark) is given, if the corresponding converter completely fulfills all requirements, a circle ($\circ$) states that the requirements are only met partially or incorrectly and a minus (-) indicates features, which are not contained at all. `n/a' indicates that a criterion is not applicable to a converter. -A model is \emph{Machine interpretable} if entities in the model can directly be mapped to a database. The criterion \emph{Human interpretable} indicates that a model somehow assigns human readable names or gene symbols to entities. \emph{Signaling pathways} are supported if the converters can read and convert KEGG models with relations. A conversion is \emph{complete} if every relevant reaction of a KGML pathway also occurs in any form in the translated document. For visualization purposes, KGML files often contain multiple copies of entries or reactions. These \emph{duplicates} should be removed. The contained reactions are often \emph{bundled} (multiple reactions are summarized as one) or miss some reaction participants. \emph{Revision of reactions} refers to the completion of missing reaction participants. The \emph{stoichiometry} is not contained in KGML documents and must be parsed from reaction equations in the KEGG REACTION database. To test the validity of the models, we used the corresponding validators from \href{http://sbml.org/Facilities/Validator/}{SBML.org} and \href{http://www.biopax.org/biopax-validator/}{BioPAX.org}. A model is marked as \emph{valid}, if the validator does not return any errors. For SBML, we further inspect if the models contain \emph{SBO terms}. It is further recommended to include \emph{notes}, such as human readable descriptions, and \emph{annotations} (e.g., cross-references in form of CV terms, MIRIAM URNs, \Xrefs). Only for BioPAX, it is important to use the \emph{appropriate classes} (instances of \smallMolecule{} for small molecules and instances of \protein{} for proteins) and a nice feature to fill the available BioPAX fields for chemical formula or molecular weight of small molecules (\emph{SM annotations}). -\par \mbox{} \par \mbox{ - -\makebox[\textwidth]{ - \newcolumntype{C}{>{\centering\arraybackslash}p{2cm}} - \setlength{\tabcolsep}{3.0pt} - \begin{tabular}{lcccCcc} -\toprule - & KEGG2SBML & BN++ & KEGGconverter & KGML2BioPAX KGML2SBML & \multicolumn{2}{c}{KEGGtranslator} \\ -Version & 1.5.0 & 1.1 & n/a & n/a & 1.2 & 2.0 \\ -Release date & 2008-07-28 & 2009-04-22 & 2009-12-18 & 2010-06-03 & 2011-07-04 & 2012-06-04 \\ -Authors & Funahashi \emph{et al.} & K\"untzer \emph{et al.} & Moutselos \emph{et al.} & Lee \emph{et al.} & \multicolumn{2}{c}{Wrzodek \emph{et al.}} \\ -\midrule - -\multicolumn{7}{l}{\textbf{Supported model formats}} \\ - -\rowcolor{tableShade2} -~~SBML & \checkmark & $\circ$ & \checkmark & \checkmark & \checkmark & \checkmark \\ -~~BioPAX & - & \checkmark & - & \checkmark & - & \checkmark \\ - -\multicolumn{7}{l}{\textbf{Generic translation features}} \\ -\rowcolor{tableShade2} -~~Machine interpretable & $\circ$ & \checkmark & $\circ$ & \checkmark & \checkmark & \checkmark \\ -~~Human interpretable & \checkmark & - & \checkmark & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Signaling pathways & - & - & - & - & - & \checkmark \\ -~~Complete & - & \checkmark & \checkmark & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~No duplicate entries & \checkmark & \checkmark & \checkmark & \checkmark & - & \checkmark \\ -~~No duplicate reactions & \checkmark & - & \checkmark & \checkmark & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Unbundle reactions & \checkmark & - & - & - & - & \checkmark \\ -~~Revision of reactions & \checkmark & - & - & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Stoichiometry & - & - & - & - & - & \checkmark \\ - -\multicolumn{7}{l}{\textbf{SBML}} \\ -\rowcolor{tableShade2} -~~Valid & \checkmark & n/a & - & \checkmark & \checkmark & \checkmark \\ -~~Level.Version & 1.1 up to 2.3 & n/a & 2.1 & 2.4 & 2.4 & 2.4, 3.1 \\ -\rowcolor{tableShade2} -~~SBO terms & - & n/a & - & - & \checkmark & \checkmark \\ -~~Notes & - & n/a & - & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Annotations & - & n/a & - & - & \checkmark & \checkmark \\ - -\multicolumn{7}{l}{\textbf{BioPAX}} \\ -\rowcolor{tableShade2} -~~Valid & n/a & - & n/a & - & n/a & \checkmark \\ -~~Level & n/a &2& n/a & 2 & n/a & 2, 3 \\ -\rowcolor{tableShade2} -~~Appropriate classes & n/a & \checkmark & n/a & - & n/a & \checkmark \\ -~~Notes & n/a & - & n/a & - & n/a & \checkmark \\ -\rowcolor{tableShade2} -~~Annotations & n/a & \checkmark & n/a & - & n/a & \checkmark \\ -~~SM annotations & n/a & - & n/a & - & n/a & \checkmark \\ - -\bottomrule -\end{tabular} -} - -} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\end{bmcformat} -\end{document} - - - - - - - diff --git a/doc/publications/2010-06 Method/2012-06 BMC/Submitted_article.pdf b/doc/publications/2010-06 Method/2012-06 BMC/Submitted_article.pdf deleted file mode 100644 index 74dae8b..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/Submitted_article.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig1.png b/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig1.png deleted file mode 100644 index 3c96ec9..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig1.png and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig2.png b/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig2.png deleted file mode 100644 index 6b91531..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig2.png and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig3.png b/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig3.png deleted file mode 100644 index a6bb246..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 BMC/Wrzodek_Fig3.png and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.bst b/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.bst deleted file mode 100644 index e68e894..0000000 --- a/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.bst +++ /dev/null @@ -1,1880 +0,0 @@ -%% % - % bmc_article.bst ver: 1.01 % - % % - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %% %% - %% BibTeX BST file for BioMed Central %% - %% a style syntax for latex .bib %% - %% bibliographies %% - %% %% - %% <1 September 2003> %% - %% %% - %% %% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % - % The order in the reference list is that by which the - % works were originally cited in the text, or that in - % the database. - % - % - % This file is based on the style 'unsrt.bst' - % ------------------------------------------ - % BibTeX standard bibliography style `unsrt' - % version 0.99a for BibTeX versions 0.99a - % or later, LaTeX version 2.09. - % Copyright (C) 1985, all rights reserved. - % Copying of this file is authorized only if either - % (1) you make absolutely no changes to your copy, including name, or - % (2) if you do make changes, you name it something other than - % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. - % This restriction helps ensure that all standard styles are identical. - % The file btxbst.doc has the documentation for this style. - % --------------------------------------------------------- -%% - - - - -ENTRY - { address - archive - author - booktitle - chapter - edition - editor - eid - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - url - volume - year - } - {} - { label } - - -% Global variables - most initialised and used locally. -STRINGS { longest.label } -STRINGS { cur tag source bibinfo} -STRINGS { p q r s t z name names } -STRINGS { lr lp lt ls} % local copys fxns with these call no other fxns. -INTEGERS { number.label longest.label.width } -INTEGERS { multiresult nameptr namesleft numnames global.counter} -INTEGERS { i j k l ll li} -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := - #0 'global.counter := -} - - -% Logical operators on integers -FUNCTION {not} -{ { #0 } { #1 } if$ -} -FUNCTION {and} -{ 'skip$ { pop$ #0 } if$ -} -FUNCTION {or} -{ { pop$ #1 } 'skip$ if$ -} -FUNCTION {field.or.null} -{ duplicate$ empty$ { pop$ "" } 'skip$ if$ -} - -FUNCTION {remove.dots} -{ 'z := - "" - { z empty$ not } - { z #1 #1 substring$ - z #2 global.max$ substring$ 'z := - duplicate$ "." = 'pop$ - { * } - if$ - } - while$ -} - -%% - % myreverse - % - % Takes 1 string - % Returns the myreverse string - % - % not to be confused with REVERSE (opposite of ITERATE) -%% -FUNCTION {myreverse} -{ - 'lt := - "" 'ls := - { lt empty$ not } - { - lt #1 #1 substring$ ls * 'ls := - lt #2 global.max$ substring$ 'lt := - } - while$ - ls -} - -%% - % search - % - % Takes 2 strings (txt, pattern) - % Retruns 1 if found 0 if not -%% -FUNCTION {search} -{ - 'lp := % pattern - 'lt := % text to search - #0 'i := % result - lp text.length$ 'll := % length of the search pattern - { lt empty$ not } - { lt #1 ll substring$ lp = - { #1 'li := - "" 'lt := } % force exit - { lt #2 global.max$ substring$ 'lt := } % pop 1 char - if$ - } - while$ - li -} - - -%% - % general replace - % - % Takes 3 strings (txt, pattern, replace-str) - % replaces all instances of pattern - % Retruns a new string -%% -FUNCTION {replace} -{ - 'lr := % replace string - 'lp := % pattern - 'lt := % text to search - "" 'ls := % result string - lp text.length$ 'll := % length of the search pattern - { lt empty$ not } - { lt #1 ll substring$ lp = - { ls lr * 'ls := - lt ll #1 + global.max$ substring$ 'lt := } - { ls lt #1 #1 substring$ * 'ls := - lt #2 global.max$ substring$ 'lt := } - if$ - } - while$ - ls -} - - -%% - % strip.letters - % - % Takes 1 arg (string) - % if string has letters get rid of them - % - useful for 2nd -> 2 - % Returns string -%% -FUNCTION {strip.letters} -{ - "" 's := - duplicate$ missing$ - 'pop$ - { - 't := - { t "" = not } - { % ascii '0' = 48, '9' = 57 - t #1 #1 substring$ chr.to.int$ 'i := - i #47 > i #58 < and - { s t #1 #1 substring$ * 's := } - 'skip$ - if$ - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - s -} - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - %{ "OS=(" * output.state int.to.str$ * ") " * write$ } - 'write$ - { add.period$ " " * write$ } % after.sentence - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -% raises an error (warning message) if type not present. -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} -FUNCTION {add.blank} -{ " " * before.all 'output.state := -} - -FUNCTION {add.bold.colon} -{ duplicate$ empty$ - 'skip$ - { "\textbf{:}" * add.blank } - if$ -} - -FUNCTION {add.colon} -{ duplicate$ empty$ - 'skip$ - { ":" * add.blank } - if$ -} -FUNCTION {bold} -{ duplicate$ empty$ - { pop$ "" } - { "\textbf{" swap$ * "}" * } - if$ -} -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "\emph{" swap$ * "}" * } - if$ -} -FUNCTION {tie.or.space.prefix} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ -} - -FUNCTION {capitalize} -{ "u" change.case$ "t" change.case$ } - -FUNCTION {space.word} -{ " " swap$ * " " * } - -% if field not entered - push empty string "" -FUNCTION {bibinfo.check} -{ swap$ - duplicate$ missing$ - { pop$ pop$ - "" } - { duplicate$ empty$ - { swap$ pop$ } - { swap$ pop$ } - if$ - } - if$ -} -FUNCTION {bibinfo.warn} -{ swap$ - duplicate$ missing$ - { - swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ - "" - } - { duplicate$ empty$ - { - swap$ "empty " swap$ * " in " * cite$ * warning$ - } - { swap$ - pop$ - } - if$ - } - if$ -} - -FUNCTION {format.thesis.type} -{ type duplicate$ empty$ - 'pop$ - { swap$ pop$ - "t" change.case$ "type" bibinfo.check - } - if$ -} - - - % Here are the language-specific definitions for explicit words. - % Each function has a name bbl.xxx where xxx is the English word. - % The language selected here is ENGLISH -FUNCTION {bbl.and} -{ "and"} - -FUNCTION {bbl.etal} -{ "et~al." } - -FUNCTION {bbl.editors} -{ "(Eds)" } -%{ "editors" } - -FUNCTION {bbl.editor} -{ "(Ed)" } -%{ "editor" } - -FUNCTION {bbl.edby} -{ "Edited by" } - -FUNCTION {bbl.edition} -{ "edition" } -%{ "edn." } - -FUNCTION {bbl.volume} -{ "Volume" } -%{ "vol." } - -FUNCTION {bbl.of} -{ "of" } - -FUNCTION {bbl.number} -{ "no." } - -FUNCTION {bbl.nr} -{ "no." } - -FUNCTION {bbl.in} -{ "in" } - -FUNCTION {bbl.pages} -{ "" } - -FUNCTION {bbl.page} -{ "" } - -FUNCTION {bbl.chapter} -{ "chap." } - -FUNCTION {bbl.techrep} -{ "Tech. Rep." } - -FUNCTION {bbl.mthesis} -{ "Master's thesis" } - -FUNCTION {bbl.phdthesis} -{ "PhD thesis" } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"Jun."} - -MACRO {jul} {"Jul."} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sep."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Computing Surveys"} - -MACRO {acta} {"Acta Informatica"} - -MACRO {cacm} {"Communications of the ACM"} - -MACRO {ibmjrd} {"IBM Journal of Research and Development"} - -MACRO {ibmsj} {"IBM Systems Journal"} - -MACRO {ieeese} {"IEEE Transactions on Software Engineering"} - -MACRO {ieeetc} {"IEEE Transactions on Computers"} - -MACRO {ieeetcad} - {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} - -MACRO {ipl} {"Information Processing Letters"} - -MACRO {jacm} {"Journal of the ACM"} - -MACRO {jcss} {"Journal of Computer and System Sciences"} - -MACRO {scp} {"Science of Computer Programming"} - -MACRO {sicomp} {"SIAM Journal on Computing"} - -MACRO {tocs} {"ACM Transactions on Computer Systems"} - -MACRO {tods} {"ACM Transactions on Database Systems"} - -MACRO {tog} {"ACM Transactions on Graphics"} - -MACRO {toms} {"ACM Transactions on Mathematical Software"} - -MACRO {toois} {"ACM Transactions on Office Information Systems"} - -MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} - -MACRO {tcs} {"Theoretical Computer Science"} - - -% Takes 2 args (author field) and string "author" -FUNCTION {format.names} -{ 'bibinfo := - duplicate$ empty$ 'skip$ { - 's := - "" 't := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{vv~}{ll}{ f{}}{ jj}" - format.name$ - remove.dots - bibinfo bibinfo.check - 't := - nameptr #1 > - { - namesleft #1 > - { ", " * t * } - { - "," * - s nameptr "{ll}" format.name$ duplicate$ "others" = - { 't := } - { pop$ } - if$ - t "others" = - { - " " * bbl.etal * - } - { " " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ - } if$ -} - - -%"{f{~}~}{vv~}{ll}{ jj}" -FUNCTION {format.names.ed} -{ - 'bibinfo := - duplicate$ empty$ 'skip$ { - 's := - "" 't := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{vv~}{ll}{ f{}}{ jj}" - format.name$ - remove.dots - bibinfo bibinfo.check - 't := - nameptr #1 > - { - namesleft #1 > - { ", " * t * } - { - "," * - s nameptr "{ll}" format.name$ duplicate$ "others" = - { 't := } - { pop$ } - if$ - t "others" = - { - - " " * bbl.etal * - } - { " " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ - } if$ -} -FUNCTION {format.authors} -{ author "author" format.names -} -FUNCTION {get.bbl.editor} -{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } - -FUNCTION {format.editors} -{ editor "editor" format.names duplicate$ empty$ 'skip$ - { - " " * get.bbl.editor * -%% "," * " " * get.bbl.editor * - } - if$ -} -FUNCTION {format.note} -{ - url empty$ - 'skip$ - { - "\urlprefix\url{[" url * "]}" * output } - if$ - note empty$ - { "" } - { - note "l" change.case$ "in press" = - { after.sentence 'output.state := - " in press" } - { - after.sentence 'output.state := - note #1 #1 substring$ - duplicate$ "{" = - 'skip$ - { output.state mid.sentence = - { "l" } - { "u" } - if$ - change.case$ % leave note * As Is * - } - if$ - note #2 global.max$ substring$ * "note" bibinfo.check - "[" swap$ * "]" * - } - if$ - } - if$ -} - -FUNCTION {bold.format.title} -{ title - "title" bibinfo.check - duplicate$ empty$ 'skip$ - { - bold - } - if$ -} - -FUNCTION {emph.format.title} -{ title - "title" bibinfo.check - duplicate$ empty$ 'skip$ - { - emphasize - } - if$ -} - -% Returns "" -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {output.allinfo.comment} -{ } - -FUNCTION {n.dashify} -{ - 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {word.in} -{ bbl.in capitalize - " " * } - -FUNCTION {format.date} -{ - before.all 'output.state := - "" - duplicate$ empty$ - year "year" bibinfo.check duplicate$ empty$ - { swap$ 'skip$ - { "there's a month but no year in " cite$ * warning$ } - if$ - * - } - { swap$ 'skip$ - { - swap$ - " " * swap$ - } - if$ - * - remove.dots - } - if$ - - duplicate$ "" = - 'skip$ - { " " swap$ * } - if$ -} - -% Source = book, conference, journal, manual -% have the source avaliable on the stack -FUNCTION {format.source} -{ 'source := - source "source" bibinfo.check - duplicate$ empty$ 'skip$ - { - emphasize - } - if$ -} - -FUNCTION {format.title} -{ title "title" bibinfo.check - duplicate$ empty$ 'skip$ - { - bold - } - if$ -} - - - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { bbl.volume volume tie.or.space.prefix - "volume" bibinfo.check * * - series "series" bibinfo.check - duplicate$ empty$ 'pop$ - { swap$ bbl.of space.word * swap$ - emphasize * } - if$ - emphasize - "volume and number" number either.or.check - } - if$ -} -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { series empty$ - { number "number" bibinfo.check } - { output.state mid.sentence = - { bbl.number } - { bbl.number capitalize } - if$ - number tie.or.space.prefix "number" bibinfo.check * * - bbl.in space.word * - series "series" bibinfo.check * - } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition duplicate$ empty$ 'skip$ - { - output.state mid.sentence = - { "l" } - { "t" } - if$ change.case$ - "edition" bibinfo.check - " " * bbl.edition * - } - if$ -} -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages duplicate$ empty$ 'skip$ - { duplicate$ multi.page.check - { - n.dashify - } - { - } - if$ - "pages" bibinfo.check - ":" swap$ * - } - if$ -} - -FUNCTION {format.journal.pages} -{ pages duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ - { pop$ pop$ format.pages } - { - ":" * - swap$ - n.dashify - "pages" bibinfo.check - * - } - if$ - } - if$ -} -FUNCTION {format.journal.eid} -{ eid "eid" bibinfo.check - duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ 'skip$ - { - ":" * - } - if$ - swap$ * - } - if$ -} - -% ignore issue (num) if == 1 -FUNCTION {format.vol.num.pages} -{ volume field.or.null - duplicate$ empty$ 'skip$ - { - "volume" bibinfo.check bold - } - if$ - number "number" bibinfo.check duplicate$ empty$ 'skip$ - { - duplicate$ "1" = - {pop$ ""} - { swap$ duplicate$ empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - swap$ - "(" swap$ * ")" * - } - if$ - } - if$ * - eid empty$ - { format.journal.pages } - { format.journal.eid } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { bbl.chapter } - { type "l" change.case$ - "type" bibinfo.check - } - if$ - chapter tie.or.space.prefix - "chapter" bibinfo.check - * * - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - - - -%% FUNCTION {format.in.ed.booktitle} -%% { booktitle format.source duplicate$ empty$ 'skip$ -%% { -%% editor "editor" format.names.ed duplicate$ empty$ 'pop$ -%% { -%% bbl.edby -%% " " * swap$ * -%% swap$ -%% "," * -%% " " * swap$ -%% * } -%% if$ -%% word.in swap$ * -%% } -%% if$ -%% } - -FUNCTION{format.edited.by} -{ editor "editor" format.names.ed duplicate$ empty$ - 'pop$ - { bbl.edby - " " * swap$ * - swap$ - ". " * - swap$ - * } - if$ -} - -FUNCTION {format.in.booktitle} -{ booktitle format.source duplicate$ empty$ - 'skip$ - { word.in swap$ * } - if$ -} -FUNCTION {format.in.journal} -{ journal format.source duplicate$ empty$ - 'skip$ - { word.in swap$ * } - if$ -} -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} -FUNCTION {format.tr.number} -{ number "number" bibinfo.check - type duplicate$ empty$ - { pop$ bbl.techrep } - 'skip$ - if$ - "type" bibinfo.check - swap$ duplicate$ empty$ - { pop$ "t" change.case$ } - { tie.or.space.prefix * * } - if$ -} -FUNCTION {format.article.crossref} -{ - key duplicate$ empty$ - { pop$ - journal duplicate$ empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * warning$ } - { "journal" bibinfo.check emphasize word.in swap$ * } - if$ - } - { word.in swap$ * " " *} - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - editor num.names$ duplicate$ - #2 > - { pop$ - "editor" bibinfo.check - " " * bbl.etal - * - } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { - "editor" bibinfo.check - " " * bbl.etal - * - } - { - bbl.and space.word - * editor #2 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - * - } - if$ - } - if$ - } - if$ -} -FUNCTION {format.book.crossref} -{ volume duplicate$ empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - pop$ word.in - } - { bbl.volume - capitalize - swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { series emphasize * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.incoll.inproc.crossref} -{ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle format.source duplicate$ empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - } - { word.in swap$ * } - if$ - } - { word.in key * " " *} - if$ - } - { word.in format.crossref.editor * " " *} - if$ - " \cite{" * crossref * "}" * -} - -% Takes 1 arg (publisher or organisation} -% if present appends address to the front. ie New York: Sachai Ltd -% returns the formatted string. -FUNCTION {format.org.or.pub} -{ 't := - t empty$ - { address empty$ - 'skip$ - { address "address" bibinfo.check 't := } - if$ } - { address empty$ - 'skip$ - { address "address" bibinfo.check ": " * t * 't :=} - if$ - } - if$ - t -} - -FUNCTION {format.publisher.address} -{ publisher "publisher" bibinfo.warn format.org.or.pub -} - -FUNCTION {format.organization.address} -{ organization "organization" bibinfo.check format.org.or.pub -} - -FUNCTION {article} -{ output.bibitem - author empty$ - { format.editors "author or editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - bold.format.title "title" output.check - new.sentence - crossref missing$ - { - journal "journal" bibinfo.check - format.source "journal" output.check - format.date "year" output.check - format.vol.num.pages output - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - format.note output - fin.entry -} -FUNCTION {wholejournal} -{ output.bibitem - author empty$ - { format.editors "author or editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - bold.format.title "title" output.check - new.sentence - crossref missing$ - { - journal emphasize word.in swap$ * "journal" output.check - format.date "year" output.check - format.vol.num.pages output - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - format.note output - fin.entry -} -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - title format.source "source" output.check - crossref missing$ - { format.bvolume output - new.sentence - format.number.series output - format.publisher.address output - } - { - new.sentence - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - format.note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - title format.source "title" output.check - crossref missing$ - { - format.publisher.address output - format.bvolume output - new.sentence - format.number.series output - } - { - new.sentence - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - add.blank - format.chapter.pages "chapter and/or pages" output.check - format.note output - fin.entry -} - -% In . Volume X. y edition. Edited by . -% Add: Pub; date:pages. -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title "title" output.check - new.sentence - crossref missing$ - { format.in.booktitle "booktitle" output.check - %format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.edition output - booktitle empty$ - 'skip$ - {format.edited.by} - if$ - format.publisher.address output - } - { format.incoll.inproc.crossref output.nonnull - } - if$ - format.date "year" output.check - before.all 'output.state := %no space,comma,etc.. -> 1991:23 - format.pages output - format.note output - fin.entry -} - - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title "title" output.check - new.sentence - crossref missing$ - % In . Volume X. x nd edition. Edited by . - { format.in.booktitle "booktitle" output.check - format.bvolume output - format.edition output - format.number.series output - booktitle missing$ - 'skip$ - { booktitle missing$ - 'skip$ - {format.edited.by} - if$ - } - if$ - publisher missing$ - { format.organization.address output } - { organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - } - if$ - format.date "year" output.check - before.all 'output.state := %no space,comma,etc.. - format.pages output - format.note output - fin.entry -} - -FUNCTION {conference} {inproceedings} - - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title - "title" output.check - new.sentence - bbl.mthesis format.thesis.type format.source output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check - format.note output - fin.entry -} - -% old manual style syntax -% all optional: (author, title, howpublished, month, year, note) -% maybe missing: (organization, address, edition) -FUNCTION {misc} -{ output.bibitem - author empty$ - { organization missing$ 'skip$ - { organization output } - if$ - } - { format.authors output } - if$ - add.colon - bold.format.title output - new.sentence - howpublished "howpublished" bibinfo.check format.source output - new.sentence - edition missing$ 'skip$ - {edition output} - if$ - author empty$ - 'skip$ - { organization missing$ - 'skip$ - {organization output} - if$ - } - if$ - address missing$ 'skip$ - {address output} - if$ - format.date output - format.note output - fin.entry - empty.misc.check -} - -FUNCTION {webpage} -{ output.bibitem - author empty$ - { organization missing$ 'skip$ - { organization output } - if$ - } - { format.authors output } - if$ - add.colon - bold.format.title " " * output - new.sentence - howpublished "howpublished" bibinfo.check format.source output - new.sentence - edition missing$ 'skip$ - {edition output} - if$ - author empty$ - 'skip$ - { organization missing$ - 'skip$ - {organization output} - if$ - } - if$ - address missing$ 'skip$ - {address output} - if$ - format.date output - format.note output - fin.entry - empty.misc.check -} - - -FUNCTION {inpress} -{ output.bibitem - author empty$ - { organization missing$ 'skip$ - { organization output } - if$ - } - { format.authors output } - if$ - add.colon - bold.format.title output - new.sentence - journal emphasize output - "in press" output - fin.entry - empty.misc.check -} - - - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization missing$ 'skip$ - { organization output } - if$ - } - { format.authors output } - if$ - add.colon - title output - emphasize - new.sentence - howpublished "howpublished" bibinfo.check format.source output - new.sentence - edition missing$ 'skip$ - {edition output} - if$ - author empty$ - 'skip$ - { organization missing$ - 'skip$ - {organization output} - if$ - } - if$ - address missing$ 'skip$ - {address output} - if$ - format.date output - format.note output - fin.entry - empty.misc.check -} - -FUNCTION {booklet}{misc} - - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title - "title" output.check - new.sentence - bbl.phdthesis format.thesis.type format.source output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check - format.note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization "organization" bibinfo.check output - } - { format.editors output.nonnull } - if$ - add.colon - title format.source "source" output.check - format.bvolume output - format.number.series output - editor empty$ - { publisher empty$ - 'skip$ - { - format.publisher.address output - } - if$ - } - { publisher empty$ - { - format.organization.address output } - { - organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - } - if$ - format.date "year" output.check - format.note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title - "title" output.check - new.sentence - format.tr.number output.nonnull - institution "institution" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check - format.note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - add.colon - bold.format.title "title" output.check - format.date output - format.note "note" output.check - fin.entry -} - -FUNCTION {default.type} { misc } - - - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% %% %%%% %%% %%%%%%% %% %%%%%%%%% -%% %%%% %% %%%% % %%%%% % %% %%%%%%%%% -%% %%%%% %%%%% %% %%% %% %% %%%%%%%%% -%% %%%%%% %%%%%% %%% % %%% %% %%%%%%%%% -%% %%%%% %%%%% %%%% %%%% %% %%%%%%%%% -%% %%%% %% %%%% %%%%%%%%%%% %% %%%%%%%%% -%% %% %%%% %%% %%%%%%%%%%% %% %% -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % Function for printing out all data in xml format. - % { address archive author booktitle chapter edition editor - % howpublished institution journal key month note number - % organization pages publisher school series title - % type url volume year - % } - % -%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%% - % output.xml - % -%% -FUNCTION {output.xml} -{ duplicate$ "" = - 'pop$ - { " " swap$ * write$ newline$ } - if$ -} - -%% - % markup.xml - % - % Takes 2 args text & tag {"the text " "tag"} - % Returns 1 string {"the text <\tag>"} -%% -FUNCTION {markup.xml} -{ - "" 'r := % result - 't := % tag - duplicate$ missing$ - 'pop$ - { 'r := } - if$ - r "" = - 'skip$ - { "<" t * ">" * r * "" * 'r := } - if$ - r -} - -%% Takes 2 args - tag, string -FUNCTION{markup.xml.title} -{ pop$ % tag - duplicate$ missing$ - 'pop$ - { - 's := % string - s "\&" "&" replace 's := - s "p" markup.xml 's := - s "title" markup.xml output.xml - } - if$ -} - - -%% - % markup.xml.pages - % - % Takes 1 arg (pages string) - % seperates into and if - % there is a - seperator. else no lpage. - % - % need to remove others -%% -FUNCTION{markup.xml.pages} -{ - "" 'r := % fpage - "" 's := % lpage - duplicate$ missing$ - 'pop$ - { - 't := - t " " "" replace 't := % remove all spaces. pgs must be - seperated - { t empty$ not } - { - t #1 #1 substring$ "-" = - { - t #2 global.max$ substring$ 's := - "" 't := % break while loop - }{ - r t #1 #1 substring$ * 'r := - t #2 global.max$ substring$ 't := - } - if$ - } - while$ - } - if$ - r "fpage" markup.xml output.xml - s "lpage" markup.xml output.xml -} - - -%% - % markup.xml.names - % - % Takes 2 args - % tag, namefield (eg. author "au") - % splits fields into and - % writes info - % returns nothing -%% -FUNCTION {markup.xml.names} -{ - 't := % tag - "" 'r := - "" 's := - duplicate$ empty$ - { pop$ - organization missing$ - { institution "cnm" markup.xml - "au" markup.xml "aug" markup.xml output.xml } - { organization "cnm" markup.xml - "au" markup.xml "aug" markup.xml output.xml } - if$ } - { - " " write$ newline$ - 'names := % names string - names num.names$ 'j := % num of names - #1 'i := % init counter - { i #1 j + < } % while (i <= j) - { - " " write$ - - % Initial first names - % if first name all in capitals - % - assume is initial list of first names. - names i "{ff{ }}" format.name$ - duplicate$ "u" change.case$ = - { names i "{ff{ }}" format.name$ } - { names i "{f{}}" format.name$ } - if$ - - % if last name but no first name use cnm; else snm - duplicate$ "" = - { names i "{ll{ }}" format.name$ % last names space seperated. - "cnm" markup.xml write$ } - { names i "{ll{ }}" format.name$ % last names space seperated. - "snm" markup.xml write$ } - if$ - - "fnm" markup.xml write$ - - "" write$ newline$ - i #1 + 'i := - } - while$ - " " write$ newline$ - } - if$ -} - - -%% - % markup.xml.pub - % - % Takes 4 args (school address publisher tag) - % prints concatenation - % returns nothing -%% -FUNCTION {markup.xml.pub} -{ - 't := % Tag - "" 'p := % Publisher - "" 'q := % Address - "" 'r := % School - "" 's := % Answer - duplicate$ missing$ 'pop$ { 'p := } if$ - duplicate$ missing$ 'pop$ { 'q := } if$ - duplicate$ missing$ 'pop$ { 'r := } if$ - "" r = not % school not empty - { r 's := } % return school as publisher - { % else - "" p = "" q = and % address and pub empty - 'skip$ - { - "" q = - { p 's := } % return pub - { "" p = - { q 's := } % return add - { q ": " * p * 's := } % return add : pub - if$ - } - if$ - } - if$ - } - if$ - s t markup.xml output.xml -} - -%% - % xml.phd - % - % Takes nothing - % Returns type (phd/msc) of empty string -%% -FUNCTION {xml.phd} -{ - "phdthesis" type$ = - "mastersthesis" type$ = OR - { "phdthesis" type$ = - { bbl.phdthesis format.thesis.type } - { bbl.mthesis format.thesis.type } - if$ - } - { type } % usually empty - if$ -} - -% markup.xml.edition -% -% edition is a numeric value. ie "2" -% if format 2nd 1st etc.. strip letters. -% -FUNCTION {markup.xml.edition} -{ pop$ pop$ %clear stack - edition strip.letters duplicate$ "" = - { pop$ edition } - 'skip$ - if$ - "edition" markup.xml output.xml -} - - -%% [bmc@xmlcomment] - % this is an environment that returns nothing - % so although it will be embedded in the bbl file it - % will not display anything -%% -FUNCTION{begin.bmcxmlcomment}{ - newline$ - "\newcommand{\BMCxmlcomment}[1]{}" write$ newline$ - newline$ - "\BMCxmlcomment{" write$ newline$ - newline$ "" write$ newline$ -} - -FUNCTION{end.bmcxmlcomment}{ - newline$ - "" write$ newline$ - "} % end of \BMCxmlcomment" write$ newline$ -} - -%% - % export.xml - % - % done for each entry referenced in the BibTeX database -%% -FUNCTION {export.xml}{ - newline$ - global.counter #1 + 'global.counter := - "" * - write$ newline$ -% title "\&" "&" replace "title" markup.xml.title - title "title" markup.xml.title - author "aug" markup.xml.names % org. and inst. here - howpublished missing$ - { booktitle missing$ - { journal missing$ - { xml.phd } % Phd/Msc - {journal} - if$ - } {booktitle} - if$ - } { howpublished } - if$ "source" markup.xml output.xml - school - address - publisher "publisher" markup.xml.pub - editor "editor" markup.xml output.xml - edition "edition" markup.xml.edition - series "p" markup.xml - "title" markup.xml - "series" markup.xml output.xml - chapter "p" markup.xml - "title" markup.xml - "section" markup.xml output.xml - % month % ignore - year "pubdate" markup.xml output.xml - - note missing$ - { volume "volume" markup.xml output.xml - number "issue" markup.xml output.xml - pages markup.xml.pages - url "url" markup.xml output.xml } - { note "l" change.case$ "in press" = - { " " write$ newline$ } - { volume "volume" markup.xml output.xml - number "issue" markup.xml output.xml - pages markup.xml.pages - url "url" markup.xml output.xml - note "note" markup.xml output.xml - } - if$ - } - if$ - "" write$ newline$ -} - -% xml fields not used -% suppliment conference xrefbib issn issb archive key -% BioMed_Central_Tex_Template_v1.01 - - -%%%%%%%%%%%%%%%% -% % -% Main % -% % -%%%%%%%%%%%%%%%% - -READ - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} -EXECUTE {initialize.longest.label} -ITERATE {longest.label.pass} -FUNCTION {begin.bib} -{ "%% BioMed_Central_Bib_Style_v1.01" write$ newline$ newline$ - preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * "}" * - write$ newline$ - "\providecommand{\url}[1]{[#1]}" - write$ newline$ - "\providecommand{\urlprefix}{}" - write$ newline$ -} - -EXECUTE {begin.bib} -EXECUTE {init.state.consts} -ITERATE {call.type$} -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} -EXECUTE {end.bib} -EXECUTE {begin.bmcxmlcomment} -ITERATE {export.xml} -EXECUTE {end.bmcxmlcomment} -%% End of customized bst file -%% -%% End of file `basic.bst'. diff --git a/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.cls b/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.cls deleted file mode 100644 index 01c5563..0000000 --- a/doc/publications/2010-06 Method/2012-06 BMC/bmc_article.cls +++ /dev/null @@ -1,137 +0,0 @@ -%% % - % bmc_article.cls ver: 1.03 % - % % - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % %% - % A Style Class based on article for %% - % formating towards BioMed Central %% - % guidelines %% - % %% - % <1 September 2003> %% - % %% - % %% - % %% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % - % See Readme.txt for ToDo list. -%% -% - -\NeedsTeXFormat{LaTeX2e}[1995/12/01] -\ProvidesClass{bmc_article}[2003/09/01 v1.01 BioMed Central article class] -\LoadClassWithOptions{article} - -\renewcommand\@biblabel[1]{#1.} -\def\hi{} -\def\pb{\\*[0.42cm]} - - -\AtBeginDocument{} - -\def\@abstractline{\textwidth} - -\newcounter{bmc@correspondingauthor} -\setcounter{bmc@correspondingauthor}{0} %never changes -\newcounter{bmc@emailcounter} \setcounter{bmc@emailcounter}{0} - - -% Corresponding Authors. -% adds an asterisk * as a marker. -\def\correspondingauthor{$^*$} -\def\@corresponding{\footnotesize\correspondingauthor Corresponding author} - - -% Email. -% A variable to store names and emails. -\gdef\bmc@email{Email: } -\newcommand\bmc@curname{} -\newcommand\email[1]{\relax - \g@addto@macro\bmc@email{#1; }} % has trailing ; - -\def\@bmc@email{\relax\footnotesize{\bmc@email}} - -% Address -\def\address#1{ \def\@address{\begin{hi}\footnotesize#1\end{hi}}} -\def\iid(#1){\hi$^#1$} - -% Thanks -\def\thempfootnote{\normalfont\@arabic\c@mpfootnote} -\def\thanks#1{% - \stepcounter{footnote}% - \hbox{\@textsuperscript{\normalfont\thefootnote}}% - \protected@xdef\@thanks{\@thanks% - \protect\footnotetext[\the\c@footnote]{#1}}% - } - -% Maketitle -\let\old@maketitle\maketitle -\renewcommand\maketitle{ - \begin{flushleft}\mbox{ - \global\let\@date\@empty % @date = null. - {\sffamily\begin{minipage}{\textwidth}% - \@maketitle - {\raggedright% - {\noindent\@address}\\ \hbox{} - {\noindent\@bmc@email}\\ \hbox{} - {\noindent\@corresponding}% - }%end \raggedright - \end{minipage}% - } - \renewcommand\thefootnote{\old@thefootnote} % - } % \fbox - \end{flushleft} -} - -\def\and{, } - -\def\@maketitle{% - \newpage - \null - \vskip 2em% - {\noindent\LARGE \bfseries\@title \par}% - \vskip 1.5em% - {\large - %\lineskip .5em% - \noindent\@author - \par}% - \vskip 1em% - \par - \vskip 1.5em} - - -% reformats abstract to BMC looks -\let\old@abstract\abstract -\renewenvironment{abstract} - { - \ifhmode\begingroup\parskip0pt\par\noindent\endgroup\fi% - \begin{sffamily} - \vspace{1cm} % use \topsep in list - {\noindent\bfseries\large\abstractname\vspace{-0.5em}} - {\par\vbox{}} - }{ - \ifhmode\begingroup\parskip0pt\par\noindent\endgroup\fi % - \end{sffamily} - } - - - -%% \@startsection{sec-name}{level}{indent}{pre-skip} -%% {post-skip}{style}*[srt title]{title} -\renewcommand\section{\@startsection {section}{1}{\z@}% - {-0.84cm \@plus -1ex \@minus -.2ex} {.2ex \@plus.2ex} %% - {\normalfont\large\bfseries\sffamily}} -\renewcommand\subsection{\@startsection{subsection}{2}{\z@}% - {-0.84cm \@plus -1ex \@minus -.2ex} {.2ex \@plus.2ex} %% - {\normalfont\normalsize\bfseries\sffamily}} -\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% - {-0.84cm \@plus -1ex \@minus -.2ex} {.2ex \@plus.2ex} %% - {\normalfont\normalsize\bfseries\itshape}} -\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}% - {-.2em \@plus -1ex \@minus -.2ex} {-0.5em} %% - {\normalfont\small\bfseries\sffamily}} -\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}% - {-.2em \@plus -1ex \@minus -.2ex} {-0.5em} %% - {\normalfont\normalsize\sffamily}} - -\endinput -%% end of bmc_article.cls diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-19_PCOMPBIOL-S-12-01335.pdf b/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-19_PCOMPBIOL-S-12-01335.pdf deleted file mode 100644 index 81ea5e6..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-19_PCOMPBIOL-S-12-01335.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-20_PCOMPBIOL-S-12-01335.pdf b/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-20_PCOMPBIOL-S-12-01335.pdf deleted file mode 100644 index 74f801a..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/2010-06-20_PCOMPBIOL-S-12-01335.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/2012-06 PLoS.zip b/doc/publications/2010-06 Method/2012-06 PLoS/2012-06 PLoS.zip deleted file mode 100644 index fb4d1d1..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/2012-06 PLoS.zip and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.doc b/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.doc deleted file mode 100644 index 97fc5f8..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.doc and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.pdf b/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.pdf deleted file mode 100644 index 8e65f9a..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/CoverLetter_KGTMethod_PLoS.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.bib b/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.bib deleted file mode 100644 index 2bb1f30..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.bib +++ /dev/null @@ -1,1583 +0,0 @@ -% This file was created with JabRef 2.7b. -% Encoding: Cp1252 - -@ARTICLE{Alexiou2009, - author = {Panagiotis Alexiou and Manolis Maragkakis and Giorgos L Papadopoulos - and Martin Reczko and Artemis G Hatzigeorgiou}, - title = {Lost in translation: an assessment and perspective for computational - {microRNA} target identification.}, - journal = {Bioinformatics}, - year = {2009}, - volume = {25}, - pages = {3049--3055}, - number = {23}, - month = {Dec}, - abstract = {MicroRNAs (miRNAs) are a class of short endogenously expressed RNA - molecules that regulate gene expression by binding directly to the - messenger RNA of protein coding genes. They have been found to confer - a novel layer of genetic regulation in a wide range of biological - processes. Computational miRNA target prediction remains one of the - key means used to decipher the role of miRNAs in development and - disease. Here we introduce the basic idea behind the experimental - identification of miRNA targets and present some of the most widely - used computational miRNA target identification programs. The review - includes an assessment of the prediction quality of these programs - and their combinations. Supplementary information: Supplementary - data are available at Bioinformatics online.}, - doi = {10.1093/bioinformatics/btp565}, - institution = {Institute of Molecular Oncology, Biomedical Sciences Research Center - Alexander Fleming, 166 72 Varkiza, Greece. p.alexiou@fleming.gr}, - keywords = {Animals; Computational Biology, methods; Humans; MicroRNAs, chemistry/metabolism; - Proteins, chemistry/metabolism}, - owner = {wrzodek}, - pii = {btp565}, - pmid = {19789267}, - timestamp = {2012.03.05}, - url = {http://dx.doi.org/10.1093/bioinformatics/btp565} -} - -@ARTICLE{Aydinlik2001, - author = {H. Aydinlik and T. D. Nguyen and O. Moennikes and A. Buchmann and - M. Schwarz}, - title = {Selective pressure during tumor promotion by phenobarbital leads - to clonal outgrowth of beta-catenin-mutated mouse liver tumors.}, - journal = {Oncogene}, - year = {2001}, - volume = {20}, - pages = {7812--7816}, - number = {53}, - month = {Nov}, - abstract = {Tumor promoters are non-mutagenic chemicals which increase the probability - of cancer by accelerating the clonal expansion of cells transformed - during tumor initiation. Phenobarbital (PB) is an antiepileptic drug - which promotes hepatocarcinogenesis in rodents when administered - subsequent to an initiating carcinogen like diethylnitrosamine (DEN). - Here we have investigated the prevalence and patterns of mutations - in two genes, Ha-ras and beta-catenin, both known mutational targets - in mouse hepatocarcinogenesis. Liver tumors were generated by a single - administration of DEN to 6 week old mice followed by feeding of PB - (0.05\%) containing or control diet for 39 weeks. Mutations at Ha-ras - codon 61 were screened by allele-specific oligonucleotide hybridization; - beta-catenin mutations were detected by direct sequencing of PCR - products spanning exon 2. In tumors from mice treated with DEN alone, - the prevalence of Ha-ras mutations was approximately 30\% (6/20), - while no beta-catenin mutations (0/13) were detectable in tumors - of this treatment group. By contrast, Ha-ras mutations were undetectable - in tumors from mice treated with DEN/PB (0/32), while approximately - 80\% (37/46) of tumors from this group showed beta-catenin mutations. - These results demonstrate that PB strongly affects the prevalence - of mutations in the two cancer-related genes, presumably by positive - and negative selection for cells harboring the respective mutation.}, - doi = {10.1038/sj.onc.1204982}, - institution = {Institut für Toxikologie, Universität Tübingen, Wilhelmstr. 56, 72074 - Tübingen, Germany.}, - keywords = {Animals; Base Sequence; Carcinogens, pharmacology; Cell Division, - drug effects; Clone Cells, drug effects/metabolism/pathology; Cytoskeletal - Proteins, analysis/chemistry/genetics; DNA Mutational Analysis; DNA, - Neoplasm, genetics; Diethylnitrosamine, pharmacology; Genes, ras, - genetics; Immunohistochemistry; Liver Neoplasms, Experimental, chemically - induced/genetics/pathology; Mice; Mice, Knockout; Molecular Sequence - Data; Mutagenesis, drug effects/genetics; Phenobarbital, pharmacology; - Polymerase Chain Reaction; Selection, Genetic; Trans-Activators; - beta Catenin}, - owner = {wrzodek}, - pmid = {11753661}, - timestamp = {2012.06.06}, - url = {http://dx.doi.org/10.1038/sj.onc.1204982} -} - -@ARTICLE{Backes2007, - author = {Christina Backes and Andreas Keller and Jan Kuentzer and Benny Kneissl - and Nicole Comtesse and Yasser A Elnakady and Rolf M\"{u}ller and - Eckart Meese and Hans-Peter Lenhof}, - title = {{GeneTrail}--advanced gene set enrichment analysis.}, - journal = {Nucleic Acids Res}, - year = {2007}, - volume = {35}, - pages = {W186--W192}, - number = {Web Server issue}, - month = {Jul}, - abstract = {We present a comprehensive and efficient gene set analysis tool, called - 'GeneTrail' that offers a rich functionality and is easy to use. - Our web-based application facilitates the statistical evaluation - of high-throughput genomic or proteomic data sets with respect to - enrichment of functional categories. GeneTrail covers a wide variety - of biological categories and pathways, among others KEGG, TRANSPATH, - TRANSFAC, and GO. Our web server provides two common statistical - approaches, 'Over-Representation Analysis' (ORA) comparing a reference - set of genes to a test set, and 'Gene Set Enrichment Analysis' (GSEA) - scoring sorted lists of genes. Besides other newly developed features, - GeneTrail's statistics module includes a novel dynamic-programming - algorithm that improves the P-value computation of GSEA methods considerably. - GeneTrail is freely accessible at http://genetrail.bioinf.uni-sb.de.}, - doi = {10.1093/nar/gkm323}, - institution = {Center for Bioinformatics, Saarland University, Building E1.1, 66041 - Saarbrücken, Germany. cbackes@bioinf.uni-sb.de}, - keywords = {Animals; Computational Biology, methods; Database Management Systems; - Databases, Genetic; Gene Expression Regulation; Genes, Fungal; Genome; - Genomics; Humans; Internet; Models, Genetic; Models, Statistical; - Programming Languages; Proteomics; Software; User-Computer Interface}, - owner = {wrzodek}, - pii = {gkm323}, - pmid = {17526521}, - timestamp = {2012.06.12}, - url = {http://dx.doi.org/10.1093/nar/gkm323} -} - -@ARTICLE{Bartel2004, - author = {David P Bartel}, - title = {{MicroRNAs}: genomics, biogenesis, mechanism, and function.}, - journal = {Cell}, - year = {2004}, - volume = {116}, - pages = {281--297}, - number = {2}, - month = {Jan}, - abstract = {MicroRNAs (miRNAs) are endogenous approximately 22 nt RNAs that can - play important regulatory roles in animals and plants by targeting - mRNAs for cleavage or translational repression. Although they escaped - notice until relatively recently, miRNAs comprise one of the more - abundant classes of gene regulatory molecules in multicellular organisms - and likely influence the output of many protein-coding genes.}, - institution = {Whitehead Institute for Biomedical Research, 9 Cambridge Center, - Cambridge, MA 02142, USA. dbartel@wi.mit.edu}, - keywords = {Animals; Base Sequence; Caenorhabditis elegans; Drosophila; Genome; - Humans; MicroRNAs, physiology; Models, Biological; Molecular Sequence - Data; Phenotype; Plant Proteins, physiology; Protein Biosynthesis; - RNA, Small Interfering, metabolism; Species Specificity; Transcription, - Genetic}, - owner = {wrzodek}, - pii = {S0092867404000455}, - pmid = {14744438}, - timestamp = {2012.03.20} -} - -@ARTICLE{Beaudry2006, - author = {Jean-Bernard Beaudry and Christophe E Pierreux and Graham P Hayhurst - and Nicolas Plumb-Rudewiez and Mary C Weiss and Guy G Rousseau and - Fr\'{e}d\'{e}ric P Lemaigre}, - title = {{Threshold levels of hepatocyte nuclear factor 6 (HNF-6) acting in - synergy with HNF-4 and PGC-1alpha are required for time-specific - gene expression during liver development}.}, - journal = {Mol Cell Biol}, - year = {2006}, - volume = {26}, - pages = {6037--6046}, - number = {16}, - month = {Aug}, - abstract = {During liver development, hepatocytes undergo a maturation process - that leads to the fully differentiated state. This relies at least - in part on the coordinated action of liver-enriched transcription - factors (LETFs), but little is known about the dynamics of this coordination. - In this context we investigate here the role of the LETF hepatocyte - nuclear factor 6 (HNF-6; also called Onecut-1) during hepatocyte - differentiation. We show that HNF-6 knockout mouse fetuses have delayed - expression of glucose-6-phosphatase (g6pc), which catalyzes the final - step of gluconeogenesis and is a late marker of hepatocyte maturation. - Using a combination of in vivo and in vitro gain- and loss-of-function - approaches, we demonstrate that HNF-6 stimulates endogenous g6pc - gene expression directly via a synergistic and interdependent action - with HNF-4 and that it involves coordinate recruitment of the coactivator - PGC-1alpha. The expression of HNF-6, HNF-4, and PGC-1alpha rises - steadily during liver development and precedes that of g6pc. We provide - evidence that threshold levels of HNF-6 are required to allow synergism - between HNF-6, HNF-4, and PGC-1alpha to induce time-specific expression - of g6pc. Our observations on the regulation of g6pc by HNF-6 provide - a model whereby synergism, interdependency, and threshold concentrations - of LETFs and coactivators determine time-specific expression of genes - during liver development.}, - doi = {10.1128/MCB.02445-05}, - institution = {Box 7529, 75 Avenue Hippocrate, B-1200 Brussels, Belgium.}, - keywords = {Animals; Base Sequence; Cell Differentiation; Cells, Cultured; Embryo, - Mammalian, embryology; Gene Expression Regulation, Developmental; - Glucose-6-Phosphatase, genetics; Hepatocyte Nuclear Factor 4, genetics/metabolism; - Hepatocyte Nuclear Factor 6, deficiency/genetics/metabolism; Hepatocytes, - cytology/enzymology; Humans; Liver, embryology/metabolism; Mice; - Mice, Knockout; Molecular Sequence Data; NIH 3T3 Cells; Promoter - Regions, Genetic, genetics; Protein Binding; RNA, Messenger, genetics/metabolism; - Time Factors; Trans-Activators, genetics/metabolism}, - owner = {wrzodek}, - pii = {26/16/6037}, - pmid = {16880515}, - timestamp = {2012.06.08}, - url = {http://dx.doi.org/10.1128/MCB.02445-05} -} - -@ARTICLE{TheMammalianEpigenome, - author = {Bradley E Bernstein and Alexander Meissner and Eric S Lander}, - title = {The mammalian epigenome.}, - journal = {Cell}, - year = {2007}, - volume = {128}, - pages = {669--681}, - number = {4}, - month = {Feb}, - abstract = {Chemical modifications to DNA and histone proteins form a complex - regulatory network that modulates chromatin structure and genome - function. The epigenome refers to the complete description of these - potentially heritable changes across the genome. The composition - of the epigenome within a given cell is a function of genetic determinants, - lineage, and environment. With the sequencing of the human genome - completed, investigators now seek a comprehensive view of the epigenetic - changes that determine how genetic information is made manifest across - an incredibly varied background of developmental stages, tissue types, - and disease states. Here we review current research efforts, with - an emphasis on large-scale studies, emerging technologies, and challenges - ahead.}, - doi = {10.1016/j.cell.2007.01.033}, - institution = {Molecular Pathology Unit and Center for Cancer Research, Massachusetts - General Hospital, Charlestown, MA 02129, USA. bbernstein@partners.org}, - keywords = {Animals; Chromatin Assembly and Disassembly, genetics; CpG Islands, - genetics; DNA Methylation; Epigenesis, Genetic, genetics; Gene Expression - Regulation, Developmental, genetics; Genomic Instability, genetics; - Histones, genetics/metabolism; Humans; Mammals, genetics}, - owner = {wrzodek}, - pii = {S0092-8674(07)00128-6}, - pmid = {17320505}, - timestamp = {2012.06.06}, - url = {http://dx.doi.org/10.1016/j.cell.2007.01.033} -} - -@ARTICLE{Cooper2006, - author = {Sara J Cooper and Nathan D Trinklein and Elizabeth D Anton and Loan - Nguyen and Richard M Myers}, - title = {Comprehensive analysis of transcriptional promoter structure and - function in 1\% of the human genome.}, - journal = {Genome Res}, - year = {2006}, - volume = {16}, - pages = {1--10}, - number = {1}, - month = {Jan}, - abstract = {Transcriptional promoters comprise one of many classes of eukaryotic - transcriptional regulatory elements. Identification and characterization - of these elements are vital to understanding the complex network - of human gene regulation. Using full-length cDNA sequences to identify - transcription start sites (TSS), we predicted more than 900 putative - human transcriptional promoters in the ENCODE regions, representing - a comprehensive sampling of promoters in 1\% of the genome. We identified - 387 fragments that function as promoters in at least one of 16 cell - lines by measuring promoter activity in high-throughput transient - transfection reporter assays. These positive functional results demonstrate - widespread use of alternative promoters. We show a strong correlation - between promoter activity and the corresponding endogenous RNA transcript - levels, providing the first experimental quantitative estimate of - promoter contribution to gene regulation. Finally, we identified - functional regions within a randomly selected subset of 45 promoters - using deletion analyses. These experiments showed that, on average, - the sequence -300 to -50 bp of the TSS positively contributes to - core promoter activity. Interestingly, putative negative elements - were identified -1000 to -500 bp upstream of the TSS for 55\% of - genes tested. These data provide the largest and most comprehensive - view of promoter function in the human genome.}, - doi = {10.1101/gr.4222606}, - institution = {Department of Genetics, Stanford University School of Medicine, Stanford, - California 94305-5120, USA.}, - keywords = {Gene Expression Regulation, physiology; Genome, Human, physiology; - HeLa Cells; Humans; Response Elements, physiology; Transcription, - Genetic, physiology}, - owner = {wrzodek}, - pii = {gr.4222606}, - pmid = {16344566}, - timestamp = {2012.06.08}, - url = {http://dx.doi.org/10.1101/gr.4222606} -} - -@ARTICLE{GardinerGarden1987, - author = {M. Gardiner-Garden and M. Frommer}, - title = {{CpG} islands in vertebrate genomes.}, - journal = {J Mol Biol}, - year = {1987}, - volume = {196}, - pages = {261--282}, - number = {2}, - month = {Jul}, - abstract = {Although vertebrate DNA is generally depleted in the dinucleotide - CpG, it has recently been shown that some vertebrate genes contain - CpG islands, regions of DNA with a high G+C content and a high frequency - of CpG dinucleotides relative to the bulk genome. In this study, - a large number of sequences of vertebrate genes were screened for - the presence of CpG islands. Each CpG island was then analysed in - terms of length, nucleotide composition, frequency of CpG dinucleotides, - and location relative to the transcription unit of the associated - gene. CpG islands were associated with the 5' ends of all housekeeping - genes and many tissue-specific genes, and with the 3' ends of some - tissue-specific genes. A few genes contained both 5' and 3' CpG islands, - separated by several thousand base-pairs of CpG-depleted DNA. The - 5' CpG islands extended through 5'-flanking DNA, exons and introns, - whereas most of the 3' CpG islands appeared to be associated with - exons. CpG islands were generally found in the same position relative - to the transcription unit of equivalent genes in different species, - with some notable exceptions. The locations of G/C boxes, composed - of the sequence GGGCGG or its reverse complement CCGCCC, were investigated - relative to the location of CpG islands. G/C boxes were found to - be rare in CpG-depleted DNA and plentiful in CpG islands, where they - occurred in 3' CpG islands, as well as in 5' CpG islands associated - with tissue-specific and housekeeping genes. G/C boxes were located - both upstream and downstream from the transcription start site of - genes with 5' CpG islands. Thus, G/C boxes appeared to be a feature - of CpG islands in general, rather than a feature of the promoter - region of housekeeping genes. Two theories for the maintenance of - a high frequency of CpG dinucleotides in CpG islands were tested: - that CpG islands in methylated genomes are maintained, despite a - tendency for 5mCpG to mutate by deamination to TpG+CpA, by the structural - stability of a high G+C content alone, and that CpG islands associated - with exons result from some selective importance of the arginine - codon CGX. Neither of these theories could account for the distribution - of CpG dinucleotides in the sequences analysed. Possible functions - of CpG islands in transcriptional and post-transcriptional regulation - of gene expression were discussed, and were related to theories for - the maintenance of CpG islands as "methylation-free zones" in germline - DNA.}, - institution = {Kanematsu Laboratories, Royal Prince Alfred Hospital, Camperdown - N.S.W., Australia.}, - keywords = {Animals; Base Composition; Base Sequence; Cattle; Cytidine Monophosphate, - analogs /&/ derivatives/genetics; Cytosine Nucleotides, genetics; - Dinucleoside Phosphates; Genes; Guanosine, analogs /&/ derivatives/genetics; - Humans; Mice; Rats; Transcription, Genetic; Vertebrates, genetics}, - owner = {wrzodek}, - pii = {0022-2836(87)90689-9}, - pmid = {3656447}, - timestamp = {2012.06.08} -} - -@ARTICLE{Golub1999, - author = {T. R. Golub and D. K. Slonim and P. Tamayo and C. Huard and M. Gaasenbeek - and J. P. Mesirov and H. Coller and M. L. Loh and J. R. Downing and - M. A. Caligiuri and C. D. Bloomfield and E. S. Lander}, - title = {Molecular classification of cancer: class discovery and class prediction - by gene expression monitoring.}, - journal = {Science}, - year = {1999}, - volume = {286}, - pages = {531--537}, - number = {5439}, - month = {Oct}, - abstract = {Although cancer classification has improved over the past 30 years, - there has been no general approach for identifying new cancer classes - (class discovery) or for assigning tumors to known classes (class - prediction). Here, a generic approach to cancer classification based - on gene expression monitoring by DNA microarrays is described and - applied to human acute leukemias as a test case. A class discovery - procedure automatically discovered the distinction between acute - myeloid leukemia (AML) and acute lymphoblastic leukemia (ALL) without - previous knowledge of these classes. An automatically derived class - predictor was able to determine the class of new leukemia cases. - The results demonstrate the feasibility of cancer classification - based solely on gene expression monitoring and suggest a general - strategy for discovering and predicting cancer classes for other - types of cancer, independent of previous biological knowledge.}, - institution = {Whitehead Institute/Massachusetts Institute of Technology Center - for Genome Research, Cambridge, MA 02139, USA. golub@genome.wi.mit.edu}, - keywords = {Acute Disease; Antineoplastic Combined Chemotherapy Protocols, therapeutic - use; Cell Adhesion, genetics; Cell Cycle, genetics; Gene Expression - Profiling; Homeodomain Proteins, genetics; Humans; Leukemia, Myeloid, - classification/drug therapy/genetics; Neoplasm Proteins, genetics; - Neoplasms, classification/genetics; Oligonucleotide Array Sequence - Analysis; Oncogenes; Precursor Cell Lymphoblastic Leukemia-Lymphoma, - classification/drug therapy/genetics; Predictive Value of Tests; - Reproducibility of Results; Treatment Outcome}, - owner = {wrzodek}, - pii = {7911}, - pmid = {10521349}, - timestamp = {2012.03.21} -} - -@ARTICLE{He2004, - author = {Lin He and Gregory J Hannon}, - title = {{MicroRNAs: small RNAs with a big role in gene regulation}.}, - journal = {Nat Rev Genet}, - year = {2004}, - volume = {5}, - pages = {522--531}, - number = {7}, - month = {Jul}, - doi = {10.1038/nrg1379}, - institution = {Cold Spring Harbor Laboratory, Watson School of Biological Sciences, - 1 Bungtown Road, Cold Spring Harbor, New York 11724, USA.}, - keywords = {3' Untranslated Regions; Animals; Caenorhabditis elegans; Caenorhabditis - elegans Proteins, genetics; Gene Expression Regulation; Genome; Humans; - MicroRNAs, chemistry; Phylogeny; Protein Structure, Tertiary; RNA - Processing, Post-Transcriptional; RNA, Messenger, metabolism; RNA, - Small Interfering, metabolism; Repressor Proteins, genetics; Tissue - Distribution}, - owner = {wrzodek}, - pii = {nrg1379}, - pmid = {15211354}, - timestamp = {2012.05.29}, - url = {http://dx.doi.org/10.1038/nrg1379} -} - -@ARTICLE{Hoheisel2006, - author = {J\"org D Hoheisel}, - title = {Microarray technology: beyond transcript profiling and genotype analysis.}, - journal = {Nat Rev Genet}, - year = {2006}, - volume = {7}, - pages = {200--210}, - number = {3}, - month = {Mar}, - abstract = {Understanding complex functional mechanisms requires the global and - parallel analysis of different cellular processes. DNA microarrays - have become synonymous with this kind of study and, in many cases, - are the obvious platform to achieve this aim. They have already made - important contributions, most notably to gene-expression studies, - although the true potential of this technology is far greater. Whereas - some assays, such as transcript profiling and genotyping, are becoming - routine, others are still in the early phases of development, and - new areas of application, such as genome-wide epigenetic analysis - and on-chip synthesis, continue to emerge.}, - doi = {10.1038/nrg1809}, - institution = {Division of Functional Genome Analysis, Deutsches Krebsforschungszentrum, - Im Neuenheimer Feld 580, 69120 Heidelberg, Germany. J.Hoheisel@dkfz.de}, - keywords = {Animals; Epigenesis, Genetic, physiology; Gene Expression Profiling, - methods/trends; Genome, physiology; Humans; Oligonucleotide Array - Sequence Analysis, methods/trends; Protein Array Analysis, methods/trends}, - owner = {wrzodek}, - pii = {nrg1809}, - pmid = {16485019}, - timestamp = {2012.03.19}, - url = {http://dx.doi.org/10.1038/nrg1809} -} - -@ARTICLE{miRTarBase, - author = {Hsu, S. D. and Lin, F. M. and Wu, W. Y. and Liang, C. and Huang, - W. C. and Chan, W. L. and Tsai, W. T. and Chen, G. Z. and Lee, C. - J. and Chiu, C. M. and Chien, C. H. and Wu, M. C. and Huang, C. Y. - and Tsou, A. P. and Huang, H. D.}, - title = {{miRTarBase: a database curates experimentally validated microRNA-target - interactions}.}, - journal = {Nucleic Acids Res}, - year = {2011}, - volume = {39}, - pages = {D163-9}, - number = {Database issue}, - abstract = {MicroRNAs (miRNAs), i.e. small non-coding RNA molecules ( approximately - 22 nt), can bind to one or more target sites on a gene transcript - to negatively regulate protein expression, subsequently controlling - many cellular mechanisms. A current and curated collection of miRNA-target - interactions (MTIs) with experimental support is essential to thoroughly - elucidating miRNA functions under different conditions and in different - species. As a database, miRTarBase has accumulated more than 3500 - MTIs by manually surveying pertinent literature after data mining - of the text systematically to filter research articles related to - functional studies of miRNAs. Generally, the collected MTIs are validated - experimentally by reporter assays, western blot, or microarray experiments - with overexpression or knockdown of miRNAs. miRTarBase curates 3576 - experimentally verified MTIs between 657 miRNAs and 2297 target genes - among 17 species. miRTarBase contains the largest amount of validated - MTIs by comparing with other similar, previously developed databases. - The MTIs collected in the miRTarBase can also provide a large amount - of positive samples to develop computational methods capable of identifying - miRNA-target interactions. miRTarBase is now available on http://miRTarBase.mbc.nctu.edu.tw/, - and is updated frequently by continuously surveying research articles.}, - keywords = {*Databases, Nucleic Acid Gene Expression Regulation MicroRNAs/*metabolism - RNA Interference RNA, Messenger/metabolism Reproducibility of Results - Systems Integration User-Computer Interface} -} - -@ARTICLE{TheEpigenomicsOfCancer, - author = {Peter A Jones and Stephen B Baylin}, - title = {The epigenomics of cancer.}, - journal = {Cell}, - year = {2007}, - volume = {128}, - pages = {683--692}, - number = {4}, - month = {Feb}, - abstract = {Aberrant gene function and altered patterns of gene expression are - key features of cancer. Growing evidence shows that acquired epigenetic - abnormalities participate with genetic alterations to cause this - dysregulation. Here, we review recent advances in understanding how - epigenetic alterations participate in the earliest stages of neoplasia, - including stem/precursor cell contributions, and discuss the growing - implications of these advances for strategies to control cancer.}, - doi = {10.1016/j.cell.2007.01.029}, - institution = {Department of Urology, Biochemistry, and Molecular Biology, USC/Norris - Comprehensive Cancer Center, Keck School of Medicine, University - of Southern California, Los Angeles, CA 90089, USA. jones_p@ccnt.usc.edu}, - keywords = {Animals; Cell Differentiation, genetics; Cell Transformation, Neoplastic, - genetics; Epigenesis, Genetic, genetics; Gene Expression Regulation, - Neoplastic, genetics; Gene Silencing, physiology; Gene Therapy, methods/trends; - Humans; Mutation, genetics; Neoplasms, genetics; Stem Cells, metabolism}, - owner = {wrzodek}, - pii = {S0092-8674(07)00127-4}, - pmid = {17320506}, - timestamp = {2012.06.06}, - url = {http://dx.doi.org/10.1016/j.cell.2007.01.029} -} - -@ARTICLE{KEGG, - author = {Minoru Kanehisa and Susumu Goto and Masahiro Hattori and Kiyoko F - Aoki-Kinoshita and Masumi Itoh and Shuichi Kawashima and Toshiaki - Katayama and Michihiro Araki and Mika Hirakawa}, - title = {From genomics to chemical genomics: new developments in {KEGG}.}, - journal = {Nucleic Acids Res}, - year = {2006}, - volume = {34}, - pages = {D354--D357}, - number = {Database issue}, - month = {Jan}, - abstract = {The increasing amount of genomic and molecular information is the - basis for understanding higher-order biological systems, such as - the cell and the organism, and their interactions with the environment, - as well as for medical, industrial and other practical applications. - The KEGG resource (http://www.genome.jp/kegg/) provides a reference - knowledge base for linking genomes to biological systems, categorized - as building blocks in the genomic space (KEGG GENES) and the chemical - space (KEGG LIGAND), and wiring diagrams of interaction networks - and reaction networks (KEGG PATHWAY). A fourth component, KEGG BRITE, - has been formally added to the KEGG suite of databases. This reflects - our attempt to computerize functional interpretations as part of - the pathway reconstruction process based on the hierarchically structured - knowledge about the genomic, chemical and network spaces. In accordance - with the new chemical genomics initiatives, the scope of KEGG LIGAND - has been significantly expanded to cover both endogenous and exogenous - molecules. Specifically, RPAIR contains curated chemical structure - transformation patterns extracted from known enzymatic reactions, - which would enable analysis of genome-environment interactions, such - as the prediction of new reactions and new enzyme genes that would - degrade new environmental compounds. Additionally, drug information - is now stored separately and linked to new KEGG DRUG structure maps.}, - doi = {10.1093/nar/gkj102}, - institution = {Bioinformatics Center, Institute for Chemical Research, Kyoto University, - Uji, Kyoto 611-0011, Japan. kanehisa@kuicr.kyoto-u.ac.jp}, - keywords = {Biotransformation; Chemical Phenomena; Chemistry; Databases, Factual; - Databases, Genetic; Environment; Enzymes, chemistry/genetics; Genomics; - Humans; Internet; Ligands; Pharmaceutical Preparations, chemistry/classification; - Signal Transduction; Systems Integration; User-Computer Interface}, - owner = {wrzodek}, - pii = {34/suppl_1/D354}, - pmid = {16381885}, - timestamp = {2012.03.21}, - url = {http://dx.doi.org/10.1093/nar/gkj102} -} - -@ARTICLE{Kauffmann2009, - author = {Audrey Kauffmann and Robert Gentleman and Wolfgang Huber}, - title = {arrayQualityMetrics--a bioconductor package for quality assessment - of microarray data.}, - journal = {Bioinformatics}, - year = {2009}, - volume = {25}, - pages = {415--416}, - number = {3}, - month = {Feb}, - abstract = {SUMMARY: The assessment of data quality is a major concern in microarray - analysis. arrayQualityMetrics is a Bioconductor package that provides - a report with diagnostic plots for one or two colour microarray data. - The quality metrics assess reproducibility, identify apparent outlier - arrays and compute measures of signal-to-noise ratio. The tool handles - most current microarray technologies and is amenable to use in automated - analysis pipelines or for automatic report generation, as well as - for use by individuals. The diagnosis of quality remains, in principle, - a context-dependent judgement, but our tool provides powerful, automated, - objective and comprehensive instruments on which to base a decision. - AVAILABILITY: arrayQualityMetrics is a free and open source package, - under LGPL license, available from the Bioconductor project at www.bioconductor.org. - A users guide and examples are provided with the package. Some examples - of HTML reports generated by arrayQualityMetrics can be found at - http://www.microarray-quality.org}, - doi = {10.1093/bioinformatics/btn647}, - institution = {EMBL European Bioinformatics Institute, Wellcome Trust Genome Campus, - Hinxton, Cambridge CB10 1SD, UK. audrey@ebi.ac.uk}, - keywords = {Algorithms; Computational Biology, methods; Data Interpretation, Statistical; - Internet; Oligonucleotide Array Sequence Analysis, methods/standards; - Quality Control; Software}, - language = {eng}, - medline-pst = {ppublish}, - owner = {eichner}, - pii = {btn647}, - pmid = {19106121}, - timestamp = {2012.06.15}, - url = {http://dx.doi.org/10.1093/bioinformatics/btn647} -} - -@ARTICLE{UCSCBrowser, - author = {W. James Kent and Charles W Sugnet and Terrence S Furey and Krishna - M Roskin and Tom H Pringle and Alan M Zahler and David Haussler}, - title = {The human genome browser at {UCSC}.}, - journal = {Genome Res}, - year = {2002}, - volume = {12}, - pages = {996--1006}, - number = {6}, - month = {Jun}, - abstract = {As vertebrate genome sequences near completion and research refocuses - to their analysis, the issue of effective genome annotation display - becomes critical. A mature web tool for rapid and reliable display - of any requested portion of the genome at any scale, together with - several dozen aligned annotation tracks, is provided at http://genome.ucsc.edu. - This browser displays assembly contigs and gaps, mRNA and expressed - sequence tag alignments, multiple gene predictions, cross-species - homologies, single nucleotide polymorphisms, sequence-tagged sites, - radiation hybrid data, transposon repeats, and more as a stack of - coregistered tracks. Text and sequence-based searches provide quick - and precise access to any region of specific interest. Secondary - links from individual features lead to sequence details and supplementary - off-site databases. One-half of the annotation tracks are computed - at the University of California, Santa Cruz from publicly available - sequence data; collaborators worldwide provide the rest. Users can - stably add their own custom tracks to the browser for educational - or research purposes. The conceptual and technical framework of the - browser, its underlying MYSQL database, and overall use are described. - The web site currently serves over 50,000 pages per day to over 3000 - different users.}, - doi = {10.1101/gr.229102. Article published online before print in May 2002}, - institution = {Department of Molecular, Cellular, and Developmental Biology, University - of California, Santa Cruz, CA 95064, USA. kent@biology.ucsc.edu}, - keywords = {California; Database Management Systems; Databases, Genetic; Gene - Expression; Genes; Genome, Human; Humans; RNA, Messenger; Sequence - Homology, Nucleic Acid; Software; Universities, trends}, - owner = {wrzodek}, - pmid = {12045153}, - timestamp = {2010.05.06}, - url = {http://dx.doi.org/10.1101/gr.229102} -} - -@ARTICLE{Khraiwesh2010, - author = {Basel Khraiwesh and M. Asif Arif and Gotelinde I Seumel and Stephan - Ossowski and Detlef Weigel and Ralf Reski and Wolfgang Frank}, - title = {Transcriptional control of gene expression by microRNAs.}, - journal = {Cell}, - year = {2010}, - volume = {140}, - pages = {111--122}, - number = {1}, - month = {Jan}, - __markedentry = {[eichner:6]}, - abstract = {MicroRNAs (miRNAs) control gene expression in animals and plants. - Like another class of small RNAs, siRNAs, they affect gene expression - posttranscriptionally. While siRNAs in addition act in transcriptional - gene silencing, a role of miRNAs in transcriptional regulation has - been less clear. We show here that in moss Physcomitrella patens - mutants without a DICER-LIKE1b gene, maturation of miRNAs is normal - but cleavage of target RNAs is abolished and levels of these transcripts - are drastically reduced. These mutants accumulate miRNA:target-RNA - duplexes and show hypermethylation of the genes encoding target RNAs, - leading to gene silencing. This pathway occurs also in the wild-type - upon hormone treatment. We propose that initiation of epigenetic - silencing by DNA methylation depends on the ratio of the miRNA and - its target RNA.}, - doi = {10.1016/j.cell.2009.12.023}, - institution = {Plant Biotechnology, Faculty of Biology, University of Freiburg, - Schaenzlestrasse 1, 79104 Freiburg, Germany.}, - keywords = {Base Sequence; Bryopsida, genetics/metabolism; DNA Methylation; Gene - Expression Regulation; MicroRNAs, genetics/metabolism; Molecular - Sequence Data; Plant Proteins, metabolism; RNA, Plant, genetics/metabolism; - RNA, Small Interfering; Transcription, Genetic}, - language = {eng}, - medline-pst = {ppublish}, - owner = {eichner}, - pii = {S0092-8674(09)01570-0}, - pmid = {20085706}, - timestamp = {2012.06.15}, - url = {http://dx.doi.org/10.1016/j.cell.2009.12.023} -} - -@ARTICLE{Kim2005, - author = {Tae Hoon Kim and Leah O Barrera and Ming Zheng and Chunxu Qu and - Michael A Singer and Todd A Richmond and Yingnian Wu and Roland D - Green and Bing Ren}, - title = {A high-resolution map of active promoters in the human genome.}, - journal = {Nature}, - year = {2005}, - volume = {436}, - pages = {876--880}, - number = {7052}, - month = {Aug}, - abstract = {In eukaryotic cells, transcription of every protein-coding gene begins - with the assembly of an RNA polymerase II preinitiation complex (PIC) - on the promoter. The promoters, in conjunction with enhancers, silencers - and insulators, define the combinatorial codes that specify gene - expression patterns. Our ability to analyse the control logic encoded - in the human genome is currently limited by a lack of accurate information - regarding the promoters for most genes. Here we describe a genome-wide - map of active promoters in human fibroblast cells, determined by - experimentally locating the sites of PIC binding throughout the human - genome. This map defines 10,567 active promoters corresponding to - 6,763 known genes and at least 1,196 un-annotated transcriptional - units. Features of the map suggest extensive use of multiple promoters - by the human genes and widespread clustering of active promoters - in the genome. In addition, examination of the genome-wide expression - profile reveals four general classes of promoters that define the - transcriptome of the cell. These results provide a global view of - the functional relationships among transcriptional machinery, chromatin - structure and gene expression in human cells.}, - doi = {10.1038/nature03877}, - institution = {Ludwig Institute for Cancer Research, UCSD School of Medicine, 9500 - Gilman Drive, La Jolla, California 92093-0653, USA.}, - keywords = {Chromatin, genetics/metabolism; Fibroblasts, metabolism; Gene Expression - Regulation, genetics; Genome, Human; Genomics; Humans; Physical Chromosome - Mapping; Promoter Regions, Genetic, genetics; Sensitivity and Specificity; - Transcription, Genetic, genetics}, - owner = {wrzodek}, - pii = {nature03877}, - pmid = {15988478}, - timestamp = {2012.06.08}, - url = {http://dx.doi.org/10.1038/nature03877} -} - -@ARTICLE{DLK1miRNA, - author = {John M Luk and Julja Burchard and Chunsheng Zhang and Angela M Liu - and Kwong F Wong and Felix H Shek and Nikki P Lee and Sheung Tat - Fan and Ronnie T Poon and Irena Ivanovska and Ulrike Philippar and - Michele A Cleary and Carolyn A Buser and Peter M Shaw and Chuen-Neng - Lee and Daniel G Tenen and Hongyue Dai and Mao Mao}, - title = {{DLK1-DIO3 genomic imprinted microRNA cluster at 14q32.2 defines - a stemlike subtype of hepatocellular carcinoma associated with poor - survival}.}, - journal = {J Biol Chem}, - year = {2011}, - volume = {286}, - pages = {30706--30713}, - number = {35}, - month = {Sep}, - abstract = {Hepatocellular carcinoma (HCC) is a heterogeneous and highly aggressive - malignancy, for which there are no effective cures. Identification - of a malignant stemlike subtype of HCC may offer patients with a - dismal prognosis a potential targeted therapy using c-MET and Wnt - pathway inhibitors. MicroRNAs (miRNAs) show promise as diagnostic - and prognostic tools for cancer detection and stratification. Using - a TRE-c-Met-driven transgenic HCC mouse model, we identified a cluster - of 23 miRNAs that is encoded within the Dlk1-Gtl2 imprinted region - on chromosome 12qF1 overexpressed in all of the isolated liver tumors. - Interestingly, this region is conserved among mammalian species and - maps to the human DLK1-DIO3 region on chromosome 14q32.2. We thus - examined the expression of the DLK1-DIO3 miRNA cluster in a cohort - of 97 hepatitis B virus-associated HCC patients and identified a - subgroup (n = 18) of patients showing strong coordinate overexpression - of miRNAs in this cluster but not in other cancer types (breast, - lung, kidney, stomach, and colon) that were tested. Expression levels - of imprinted gene transcripts from neighboring loci in this 14q32.2 - region and from a subset of other imprinted sites were concomitantly - elevated in human HCC. Interestingly, overexpression of the DLK1-DIO3 - miRNA cluster was positively correlated with HCC stem cell markers - (CD133, CD90, EpCAM, Nestin) and associated with a high level of - serum alpha-fetoprotein, a conventional biomarker for liver cancer, - and poor survival rate in HCC patients. In conclusion, our findings - suggest that coordinate up-regulation of the DLK1-DIO3 miRNA cluster - at 14q32.2 may define a novel molecular (stem cell-like) subtype - of HCC associated with poor prognosis.}, - doi = {10.1074/jbc.M111.229831}, - institution = {Cancer Science Institute and Department of Pharmacology, National - University of Singapore, 117597, Singapore. jmluk@nus.edu.sg}, - keywords = {Carcinoma, Hepatocellular, genetics/mortality; Chromosomes, Human, - Pair 14, genetics; Cohort Studies; Humans; Intercellular Signaling - Peptides and Proteins, genetics; Iodide Peroxidase, genetics; Liver - Neoplasms, genetics/mortality; Liver, metabolism; Membrane Proteins, - genetics; MicroRNAs, genetics/metabolism; Multigene Family; Prognosis; - Tissue Distribution; Treatment Outcome; Tumor Markers, Biological, - metabolism; Up-Regulation}, - owner = {wrzodek}, - pii = {M111.229831}, - pmid = {21737452}, - timestamp = {2012.03.23}, - url = {http://dx.doi.org/10.1074/jbc.M111.229831} -} - -@ARTICLE{Lopez-Romero2011, - author = {Pedro López-Romero}, - title = {Pre-processing and differential expression analysis of Agilent microRNA - arrays using the AgiMicroRna Bioconductor library.}, - journal = {BMC Genomics}, - year = {2011}, - volume = {12}, - pages = {64}, - abstract = {The main research tool for identifying microRNAs involved in specific - cellular processes is gene expression profiling using microarray - technology. Agilent is one of the major producers of microRNA arrays, - and microarray data are commonly analyzed by using R and the functions - and packages collected in the Bioconductor project. However, an analytical - package that integrates the specific characteristics of microRNA - Agilent arrays has been lacking.This report presents the new bioinformatic - tool AgiMicroRNA for the pre-processing and differential expression - analysis of Agilent microRNA array data. The software is implemented - in the open-source statistical scripting language R and is integrated - in the Bioconductor project (http://www.bioconductor.org) under the - GPL license. For the pre-processing of the microRNA signal, AgiMicroRNA - incorporates the robust multiarray average algorithm, a method that - produces a summary measure of the microRNA expression using a linear - model that takes into account the probe affinity effect. To obtain - a normalized microRNA signal useful for the statistical analysis, - AgiMicroRna offers the possibility of employing either the processed - signal estimated by the robust multiarray average algorithm or the - processed signal produced by the Agilent image analysis software. - The AgiMicroRNA package also incorporates different graphical utilities - to assess the quality of the data. AgiMicroRna uses the linear model - features implemented in the limma package to assess the differential - expression between different experimental conditions and provides - links to the miRBase for those microRNAs that have been declared - as significant in the statistical analysis.AgiMicroRna is a rational - collection of Bioconductor functions that have been wrapped into - specific functions in order to ease and systematize the pre-processing - and statistical analysis of Agilent microRNA data. The development - of this package contributes to the Bioconductor project filling the - gap in microRNA array data analysis.}, - doi = {10.1186/1471-2164-12-64}, - institution = {Epidemiology, Atherothrombosis and Imaging Department, Centro Nacional - de Investigaciones Cardiovasculares Carlos III, Melchor Fernández - Almagro 3, E-28029 Madrid, Spain. plopez@cnic.es}, - keywords = {Gene Expression Profiling, methods; Humans; MicroRNAs, genetics; Oligonucleotide - Array Sequence Analysis, methods; Software}, - language = {eng}, - medline-pst = {epublish}, - owner = {eichner}, - pii = {1471-2164-12-64}, - pmid = {21269452}, - timestamp = {2012.06.15}, - url = {http://dx.doi.org/10.1186/1471-2164-12-64} -} - -@ARTICLE{MarxStoelting2008, - author = {Philip Marx-Stoelting and Johanna Mahr and Thomas Knorpp and Sandra - Schreiber and Markus F Templin and Thomas Ott and Albrecht Buchmann - and Michael Schwarz}, - title = {Tumor promotion in liver of mice with a conditional {Cx26} knockout.}, - journal = {Toxicol Sci}, - year = {2008}, - volume = {103}, - pages = {260--267}, - number = {2}, - month = {Jun}, - abstract = {Connexin (Cx) 26 and 32 are the major gap junction proteins in liver. - We recently demonstrated that Cx32 is essential for phenobarbital - (PB)-mediated tumor promotion in mouse liver. To investigate whether - Cx26 plays a similar role, an initiation-promotion experiment was - conducted using mice with a liver-specific knockout of Cx26. Control - and Cx26-deficient mice were injected a single dose of N-nitrosodiethylamine - (DEN, 90 microg/g b.wt.) at 6 weeks of age and groups of mice were - subsequently kept on a PB (0.05\%) containing or control diet for - 35 weeks. At the end of the experiment, the carcinogenic response - in the liver was monitored. Mice from PB treatment groups showed - strongly increased liver weights compared with mice treated with - DEN alone, which was mostly due to a much higher tumor burden. The - tumor response in PB-treated mice of both strains was quite similar, - but the number of smaller tumors and of enzyme-altered neoplastic - lesions was somewhat larger in PB-treated Cx26 knockout (Cx26 KO) - compared with wild-type mice, whereas the volume fraction of enzyme-altered - lesions was slightly reduced in PB-treated Cx26-deficient mice. There - was no significant difference in tumor prevalence between Cx26 KO - and wild-type mice. Altogether our present data show that elimination - of Cx26 has only minor effects on chemically induced mouse hepatocarcinogenesis, - in striking contrast to the effects seen in Cx32 KO mice.}, - doi = {10.1093/toxsci/kfn043}, - institution = {Institute of Pharmacology and Toxicology, Department of Toxicology, - University of Tübingen, Wilhelmstr. 56, 72074 Tübingen, Germany.}, - keywords = {Animals; Apoptosis, drug effects; Carcinogens, toxicity; Cocarcinogenesis; - Connexins, deficiency/genetics/metabolism; Diethylnitrosamine, toxicity; - Female; Fluorescent Antibody Technique, Indirect; Gene Expression - Regulation, Neoplastic, drug effects; Gene Silencing; Liver Neoplasms, - Experimental, genetics/metabolism/pathology; Liver, drug effects/metabolism/pathology; - Male; Mice; Mice, Inbred C3H; Mice, Knockout; Organ Size, drug effects; - Protein Array Analysis; RNA, Neoplasm, analysis}, - owner = {wrzodek}, - pii = {kfn043}, - pmid = {18308698}, - timestamp = {2012.06.11}, - url = {http://dx.doi.org/10.1093/toxsci/kfn043} -} - -@ARTICLE{Montero2006, - author = {Alberto J Montero and C. Marcela D\'{i}az-Montero and Li Mao and - Emile M Youssef and Marcos Estecio and Lanlan Shen and Jean-Pierre - J Issa}, - title = {Epigenetic inactivation of {EGFR} by {CpG} island hypermethylation - in cancer.}, - journal = {Cancer Biol Ther}, - year = {2006}, - volume = {5}, - pages = {1494--1501}, - number = {11}, - month = {Nov}, - abstract = {The epidermal growth factor receptor (EGFR) is a member of the HER/ERB-B - family of transmembrane receptor kinases. Overexpression of EGFR - confers advantages in cell proliferation, survival, and migration - and correlates with decreased survival in multiple solid tumors. - However, a proportion of these malignancies have little or no expression - of EGFR. CpG island hypermethylation and associated transcriptional - silencing are common in solid tumors. The methylation status of the - EGFR CpG island was examined in a series of cell lines and tissues. - Dense EGFR methylation (90\%) was found in the breast cancer cell - line CAMA1, and a moderate degree of methylation (30-50\%) was observed - in the breast cancer cell lines MB435 and MB453. Transcriptional - silencing of EGFR in these cell lines closely correlated with methylation. - By contrast, no methylation of the HER-2/ neu CpG island was detected. - EGFR hypermethylation was also found in a subset of unselected primary - breast (20\%), head and neck squamous cell carcinoma (35\%), and - lung tumors (11\%). Treatment with decitabine resulted in the reexpression - of EGFR in CAMA1 and MB453. Both cell lines are relatively resistant - to killing by the EGFR inhibitor gefitinib. However, after cotreatment - with decitabine and gefitinib, a significant effect on the induction - of apoptosis was observed. In conclusion, EGFR is hypermethylated - and silenced in a subset of solid tumor cell lines and primary tumor - specimens, and cotreatment with decitabine and gefitinib has an additive - effect only in EGFR methylated breast cancer cell lines.}, - institution = {Division of Hematology-Oncology, Medical University of South Carolina, - Charleston, South Carolina, USA.}, - keywords = {Antineoplastic Agents, therapeutic use; Azacitidine, analogs /&/ derivatives/therapeutic - use; Breast Neoplasms, drug therapy/genetics; Cell Line; Cell Line, - Tumor; DNA Methylation; DNA Primers; Dinucleoside Phosphates, genetics; - Epigenesis, Genetic; Female; Gene Therapy, methods; Humans; Organ - Specificity; Quinazolines, therapeutic use; Receptor, Epidermal Growth - Factor, genetics; Restriction Mapping; Reverse Transcriptase Polymerase - Chain Reaction}, - owner = {wrzodek}, - pii = {3299}, - pmid = {17369752}, - timestamp = {2012.06.08} -} - -@BOOK{NimblegenSignalMapUserGuide, - title = {{SignalMap User's Guide}}, - year = {2006}, - author = {{NimbleGen Systems Inc.}}, - number = {Version 1.9}, - note = {Available from {NimbleGen} at \href{http://www.nimblegen.com/products/lit/signalmap1.9usersguide.pdf}{www.nimblegen.com/products/lit/signalmap1.9usersguide.pdf}. - Accessed 2012 Mar 22.}, - owner = {wrzodek}, - timestamp = {2012.03.22} -} - -@ARTICLE{TarBase, - author = {Papadopoulos, G. L. and Reczko, M. and Simossis, V. A. and Sethupathy, - P. and Hatzigeorgiou, A. G.}, - title = {The database of experimentally supported targets: a functional update - of {TarBase}.}, - journal = {Nucleic Acids Res}, - year = {2009}, - volume = {37}, - pages = {D155-8}, - number = {Database issue}, - abstract = {TarBase5.0 is a database which houses a manually curated collection - of experimentally supported microRNA (miRNA) targets in several animal - species of central scientific interest, plants and viruses. MiRNAs - are small non-coding RNA molecules that exhibit an inhibitory effect - on gene expression, interfering with the stability and translational - efficiency of the targeted mature messenger RNAs. Even though several - computational programs exist to predict miRNA targets, there is a - need for a comprehensive collection and description of miRNA targets - with experimental support. Here we introduce a substantially extended - version of this resource. The current version includes more than - 1300 experimentally supported targets. Each target site is described - by the miRNA that binds it, the gene in which it occurs, the nature - of the experiments that were conducted to test it, the sufficiency - of the site to induce translational repression and/or cleavage, and - the paper from which all these data were extracted. Additionally, - the database is functionally linked to several other relevant and - useful databases such as Ensembl, Hugo, UCSC and SwissProt. The TarBase5.0 - database can be queried or downloaded from http://microrna.gr/tarbase.}, - keywords = {Animals *Databases, Nucleic Acid Gene Expression Regulation MicroRNAs/*metabolism - RNA, Messenger/metabolism} -} - -@ARTICLE{Pelizzola2008, - author = {Mattia Pelizzola and Yasuo Koga and Alexander Eckehart Urban and - Michael Krauthammer and Sherman Weissman and Ruth Halaban and Annette - M Molinaro}, - title = {MEDME: an experimental and analytical methodology for the estimation - of DNA methylation levels based on microarray derived MeDIP-enrichment.}, - journal = {Genome Res}, - year = {2008}, - volume = {18}, - pages = {1652--1659}, - number = {10}, - month = {Oct}, - __markedentry = {[eichner:]}, - abstract = {DNA methylation is an important component of epigenetic modifications - that influences the transcriptional machinery and is aberrant in - many human diseases. Several methods have been developed to map DNA - methylation for either limited regions or genome-wide. In particular, - antibodies specific for methylated CpG have been successfully applied - in genome-wide studies. However, despite the relevance of the obtained - results, the interpretation of antibody enrichment is not trivial. - Of greatest importance, the coupling of antibody-enriched methylated - fragments with microarrays generates DNA methylation estimates that - are not linearly related to the true methylation level. Here, we - present an experimental and analytical methodology, MEDME (modeling - experimental data with MeDIP enrichment), to obtain enhanced estimates - that better describe the true values of DNA methylation level throughout - the genome. We propose an experimental scenario for evaluating the - true relationship in a high-throughput setting and a model-based - analysis to predict the absolute and relative DNA methylation levels. - We successfully applied this model to evaluate DNA methylation status - of normal human melanocytes compared to a melanoma cell strain. Despite - the low resolution typical of methods based on immunoprecipitation, - we show that model-derived estimates of DNA methylation provide relatively - high correlation with measured absolute and relative levels, as validated - by bisulfite genomic DNA sequencing. Importantly, the model-derived - DNA methylation estimates simplify the interpretation of the results - both at single-loci and at chromosome-wide levels.}, - doi = {10.1101/gr.080721.108}, - institution = {Department of Epidemiology and Public Health, Yale University School - of Medicine, New Haven, Connecticut 06520, USA.}, - keywords = {Algorithms; CpG Islands; DNA Methylation; DNA, Neoplasm, genetics/metabolism; - DNA, genetics/metabolism; Epigenesis, Genetic; Genome, Human; Humans; - Immunoprecipitation; Infant, Newborn; Melanocytes, metabolism; Oligonucleotide - Array Sequence Analysis, methods; Sequence Analysis, DNA, methods}, - language = {eng}, - medline-pst = {ppublish}, - owner = {eichner}, - pii = {gr.080721.108}, - pmid = {18765822}, - timestamp = {2012.06.15}, - url = {http://dx.doi.org/10.1101/gr.080721.108} -} - -@ARTICLE{Pirnia2009, - author = {Farzaneh Pirnia and Michael Pawlak and Gerhard G Thallinger and Berthold - Gierke and Markus F Templin and Andi Kappeler and Daniel C Betticher - and Beat Gloor and Markus M Borner}, - title = {Novel functional profiling approach combining reverse phase protein - microarrays and human {3-D} ex vivo tissue cultures: expression of - apoptosis-related proteins in human colon cancer.}, - journal = {Proteomics}, - year = {2009}, - volume = {9}, - pages = {3535--3548}, - number = {13}, - month = {Jul}, - abstract = {Cancer is caused by a complex pattern of molecular perturbations. - To understand the biology of cancer, it is thus important to look - at the activation state of key proteins and signaling networks. The - limited amount of available sample material from patients and the - complexity of protein expression patterns make the use of traditional - protein analysis methods particularly difficult. In addition, the - only approach that is currently available for performing functional - studies is the use of serial biopsies, which is limited by ethical - constraints and patient acceptance. The goal of this work was to - establish a 3-D ex vivo culture technique in combination with reverse-phase - protein microarrays (RPPM) as a novel experimental tool for use in - cancer research. The RPPM platform allows the parallel profiling - of large numbers of protein analytes to determine their relative - abundance and activation level. Cancer tissue and the respective - corresponding normal tissue controls from patients with colorectal - cancer were cultured ex vivo. At various time points, the cultured - samples were processed into lysates and analyzed on RPPM to assess - the expression of carcinoembryonic antigen (CEA) and 24 proteins - involved in the regulation of apoptosis. The methodology displayed - good robustness and low system noise. As a proof of concept, CEA - expression was significantly higher in tumor compared with normal - tissue (p<0.0001). The caspase 9 expression signal was lower in tumor - tissue than in normal tissue (p<0.001). Cleaved Caspase 8 (p=0.014), - Bad (p=0.007), Bim (p=0.007), p73 (p=0.005), PARP (p<0.001), and - cleaved PARP (p=0.007) were differentially expressed in normal liver - and normal colon tissue. We demonstrate here the feasibility of using - RPPM technology with 3-D ex vivo cultured samples. This approach - is useful for investigating complex patterns of protein expression - and modification over time. It should allow functional proteomics - in patient samples with various applications such as pharmacodynamic - analyses in drug development.}, - doi = {10.1002/pmic.200800159}, - institution = {Institute of Medical Oncology, University of Bern, Inselspital, Bern, - Switzerland.}, - keywords = {Apoptosis Regulatory Proteins, analysis/metabolism; Carcinoembryonic - Antigen, analysis/metabolism; Cluster Analysis; Colonic Neoplasms, - metabolism; Humans; Protein Array Analysis, instrumentation/methods; - Reproducibility of Results; Signal Transduction; Tissue Culture Techniques, - instrumentation/methods}, - owner = {wrzodek}, - pmid = {19609961}, - timestamp = {2012.03.22}, - url = {http://dx.doi.org/10.1002/pmic.200800159} -} - -@ARTICLE{Razin1991, - author = {A. Razin and H. Cedar}, - title = {{DNA} methylation and gene expression.}, - journal = {Microbiol Rev}, - year = {1991}, - volume = {55}, - pages = {451--458}, - number = {3}, - month = {Sep}, - abstract = {A large body of evidence demonstrates that DNA methylation plays a - role in gene regulation in animal cells. Not only is there a correlation - between gene transcription and undermethylation, but also transfection - experiments clearly show that the presence of methyl moieties inhibits - gene expression in vivo. Furthermore, gene activation can be induced - by treatment of cells with 5-azacytidine, a potent demethylating - agent. Methylation appears to influence gene expression by affecting - the interactions with DNA of both chromatin proteins and specific - transcription factors. Although methylation patterns are very stable - in somatic cells, the early embryo is characterized by large alterations - in DNA modification. New methodologies are now becoming available - for studying methylation at this stage and in the germ line. During - development, tissue-specific genes undergo demethylation in their - tissue of expression. In tissue culture cells this process is highly - specific and appears to involve an active mechanism which takes place - in the absence of DNA replication. The X chromosome undergoes inactivation - during development; this is accompanied by de novo methylation, which - appears necessary to stably maintain its silent state. As opposed - to the programmed changes in DNA methylation which occur in vivo, - immortalized tissue culture cells demonstrate alterations in DNA - modification which take place over a long time scale and which appear - to be the result of selective pressures present during the growth - of these cells in culture.}, - institution = {Department of Cellular Biochemistry, Hebrew University Medical School, - Jerusalem, Israel.}, - keywords = {Animals; Cell Line; DNA, metabolism; Gene Expression Regulation; Humans; - Methylation; Transcriptional Activation}, - owner = {wrzodek}, - pmid = {1943996}, - timestamp = {2012.06.08} -} - -@ARTICLE{Rignall2011, - author = {Benjamin Rignall and Albert Braeuning and Albrecht Buchmann and Michael - Schwarz}, - title = {Tumor formation in liver of conditional $\beta$-catenin-deficient - mice exposed to a diethylnitrosamine/phenobarbital tumor promotion - regimen.}, - journal = {Carcinogenesis}, - year = {2011}, - volume = {32}, - pages = {52--57}, - number = {1}, - month = {Jan}, - abstract = {The antiepileptic drug phenobarbital (PB) is a potent tumor promoter - in mouse liver, where it stimulates the selective outgrowth of tumor - populations harboring activating mutations in Ctnnb1, encoding beta-catenin. - A tumor initiation-promotion study was conducted in mice with conditional - hepatocyte-specific knockout (KO) of Ctnnb1 and in Ctnnb1 wild-type - controls. Mice received a single injection of N-nitrosodiethylamine - (DEN) at the age of 6 weeks followed by continuous administration - of PB given in the diet (0.05\%) for 27 weeks. Metabolic activation - of DEN in hepatocytes from both Ctnnb1 wild-type and KO mice was - demonstrated. PB strongly enhanced liver tumor formation in Ctnnb1 - wild-type mice, and 90\% of the PB-promoted tumors were Ctnnb1-mutated. - A similar increase in carcinogenic response was seen when using glucose-6-phosphatase - and glutamine synthetase as tumor markers. The prevalence of tumors - in Ctnnb1 KO mice was approximately 7-fold higher than in wild-type - mice, suggesting an enhancing effect of the gene KO on liver tumor - development. However, in strong contrast to wild-type mice, PB did - not promote tumor formation in the Ctnnb1 KO mice. Livers of KO mice, - particularly from the PB treatment group, demonstrated fibrosis and - massive infiltration of immune cells, an effect not seen in wild-type - mice. In summary, our data demonstrate that (i) liver tumor promotion - by PB requires functional beta-catenin signaling and (ii) absence - of beta-catenin enhances carcinogen-induced hepatocarcinogenesis - and induces a pre-cirrhotic phenotype in mouse liver.}, - doi = {10.1093/carcin/bgq226}, - institution = {Department of Toxicology, Institute of Experimental and Clinical - Pharmacology and Toxicology, University of Tübingen, Germany.}, - keywords = {Animals; Carcinogens, toxicity; DNA Mutational Analysis; Diethylnitrosamine, - toxicity; Immunohistochemistry; Liver Neoplasms, chemically induced/genetics/pathology; - Mice; Mice, Knockout; Phenobarbital, toxicity; Polymorphism, Restriction - Fragment Length; beta Catenin, deficiency/genetics}, - owner = {wrzodek}, - pii = {bgq226}, - pmid = {21047994}, - timestamp = {2012.06.14}, - url = {http://dx.doi.org/10.1093/carcin/bgq226} -} - -@ARTICLE{Schumacher2006, - author = {Axel Schumacher and Philipp Kapranov and Zachary Kaminsky and James - Flanagan and Abbas Assadzadeh and Patrick Yau and Carl Virtanen and - Neil Winegarden and Jill Cheng and Thomas Gingeras and Arturas Petronis}, - title = {Microarray-based {DNA} methylation profiling: technology and applications.}, - journal = {Nucleic Acids Res}, - year = {2006}, - volume = {34}, - pages = {528--542}, - number = {2}, - abstract = {This work is dedicated to the development of a technology for unbiased, - high-throughput DNA methylation profiling of large genomic regions. - In this method, unmethylated and methylated DNA fractions are enriched - using a series of treatments with methylation sensitive restriction - enzymes, and interrogated on microarrays. We have investigated various - aspects of the technology including its replicability, informativeness, - sensitivity and optimal PCR conditions using microarrays containing - oligonucleotides representing 100 kb of genomic DNA derived from - the chromosome 22 COMT region in addition to 12 192 element CpG island - microarrays. Several new aspects of methylation profiling are provided, - including the parallel identification of confounding effects of DNA - sequence variation, the description of the principles of microarray - design for epigenomic studies and the optimal choice of methylation - sensitive restriction enzymes. We also demonstrate the advantages - of using the unmethylated DNA fraction versus the methylated one, - which substantially improve the chances of detecting DNA methylation - differences. We applied this methodology for fine-mapping of methylation - patterns of chromosomes 21 and 22 in eight individuals using tiling - microarrays consisting of over 340 000 oligonucleotide probe pairs. - The principles developed in this work will help to make epigenetic - profiling of the entire human genome a routine procedure.}, - doi = {10.1093/nar/gkj461}, - institution = {The Krembil Family Epigenetics Laboratory, Centre for Addiction and - Mental Health, 250 College Street, Toronto, ON, Canada M5T 1R8.}, - keywords = {Chromosome Mapping; Chromosomes, Human, Pair 21; Chromosomes, Human, - Pair 22; CpG Islands; DNA Methylation; DNA, chemistry/isolation /&/ - purification; Epigenesis, Genetic; Genome, Human; Genomics, methods; - Humans; Oligonucleotide Array Sequence Analysis, methods; Polymerase - Chain Reaction; Polymorphism, Single Nucleotide; Reproducibility - of Results}, - owner = {wrzodek}, - pii = {34/2/528}, - pmid = {16428248}, - timestamp = {2012.03.21}, - url = {http://dx.doi.org/10.1093/nar/gkj461} -} - -@ARTICLE{Simion2010, - author = {Alexandru Simion and Ilaria Laudadio and Pierre-Paul Pr\'{e}vot and - Peggy Raynaud and Fr\'{e}d\'{e}ric P Lemaigre and Patrick Jacquemin}, - title = {{MiR-495 and miR-218 regulate the expression of the Onecut transcription - factors HNF-6 and OC-2}.}, - journal = {Biochem Biophys Res Commun}, - year = {2010}, - volume = {391}, - pages = {293--298}, - number = {1}, - month = {Jan}, - abstract = {MicroRNAs are small, non-coding RNAs that posttranscriptionally regulate - gene expression mainly by binding to the 3'UTR of their target mRNAs. - Recent data revealed that microRNAs have an important role in pancreas - and liver development and physiology. Using cloning and microarray - profiling approaches, we show that a unique repertoire of microRNAs - is expressed at the onset of liver and pancreas organogenesis, and - in pancreas and liver at key stages of cell fate determination. Among - the microRNAs that are expressed at these stages, miR-495 and miR-218 - were predicted to, respectively, target the Onecut (OC) transcription - factors Hepatocyte Nuclear Factor-6 (HNF-6/OC-1) and OC-2, two important - regulators of liver and pancreas development. MiR-495 and miR-218 - are dynamically expressed in developing liver and pancreas, and by - transient transfection, we show that they target HNF-6 and OC-2 3'UTRs. - Moreover, when overexpressed in cultured cells, miR-495 and miR-218 - decrease the endogenous levels of HNF-6 and OC-2 mRNA. These results - indicate that the expression of regulators of liver and pancreas - development is modulated by microRNAs. They also suggest a developmental - role for miR-495 and miR-218.}, - doi = {10.1016/j.bbrc.2009.11.052}, - institution = {Université catholique de Louvain, de Duve Institute, 75 Avenue Hippocrate - 7529, B-1200 Brussels, Belgium.}, - keywords = {Animals; Base Sequence; Cell Line; Gene Expression Profiling; Gene - Expression Regulation, Developmental; Hepatocyte Nuclear Factor 6, - genetics; Homeodomain Proteins, biosynthesis/genetics; Humans; Liver, - embryology/metabolism; Mice; MicroRNAs, genetics/metabolism; Molecular - Sequence Data; Onecut Transcription Factors, biosynthesis/genetics; - Pancreas, embryology/metabolism; Protein Biosynthesis, genetics; - Transcription Factors, biosynthesis/genetics}, - owner = {wrzodek}, - pii = {S0006-291X(09)02227-X}, - pmid = {19913497}, - timestamp = {2012.06.08}, - url = {http://dx.doi.org/10.1016/j.bbrc.2009.11.052} -} - -@ARTICLE{Smyth2004, - author = {Gordon K Smyth}, - title = {Linear models and empirical bayes methods for assessing differential - expression in microarray experiments.}, - journal = {Stat Appl Genet Mol Biol}, - year = {2004}, - volume = {3}, - pages = {Article3}, - abstract = {The problem of identifying differentially expressed genes in designed - microarray experiments is considered. Lonnstedt and Speed (2002) - derived an expression for the posterior odds of differential expression - in a replicated two-color experiment using a simple hierarchical - parametric model. The purpose of this paper is to develop the hierarchical - model of Lonnstedt and Speed (2002) into a practical approach for - general microarray experiments with arbitrary numbers of treatments - and RNA samples. The model is reset in the context of general linear - models with arbitrary coefficients and contrasts of interest. The - approach applies equally well to both single channel and two color - microarray experiments. Consistent, closed form estimators are derived - for the hyperparameters in the model. The estimators proposed have - robust behavior even for small numbers of arrays and allow for incomplete - data arising from spot filtering or spot quality weights. The posterior - odds statistic is reformulated in terms of a moderated t-statistic - in which posterior residual standard deviations are used in place - of ordinary standard deviations. The empirical Bayes approach is - equivalent to shrinkage of the estimated sample variances towards - a pooled estimate, resulting in far more stable inference when the - number of arrays is small. The use of moderated t-statistics has - the advantage over the posterior odds that the number of hyperparameters - which need to estimated is reduced; in particular, knowledge of the - non-null prior for the fold changes are not required. The moderated - t-statistic is shown to follow a t-distribution with augmented degrees - of freedom. The moderated t inferential approach extends to accommodate - tests of composite null hypotheses through the use of moderated F-statistics. - The performance of the methods is demonstrated in a simulation study. - Results are presented for two publicly available data sets.}, - doi = {10.2202/1544-6115.1027}, - institution = {Walter and Eliza Hall Institute. smyth@wehi.edu.au}, - language = {eng}, - medline-pst = {ppublish}, - owner = {eichner}, - pmid = {16646809}, - timestamp = {2012.06.15}, - url = {http://dx.doi.org/10.2202/1544-6115.1027} -} - -@ARTICLE{Stahl2005, - author = {Sabine Stahl and Carina Ittrich and Philip Marx-Stoelting and Christoph - Köhle and Ozge Altug-Teber and Olaf Riess and Michael Bonin and Jürgen - Jobst and Stephan Kaiser and Albrecht Buchmann and Michael Schwarz}, - title = {Genotype-phenotype relationships in hepatocellular tumors from mice - and man.}, - journal = {Hepatology}, - year = {2005}, - volume = {42}, - pages = {353--361}, - number = {2}, - month = {Aug}, - abstract = {Experimentally induced liver tumors in mice harbor activating mutations - in either Catnb (beta-catenin) or Ha-ras, according to the carcinogenic - treatment. We have now investigated by microarray analysis the gene - expression profiles in tumors of the two genotypes. In total, 364 - genes or expressed sequences with aberrant expression relative to - normal liver were identified, but only 30 of these demonstrated unidirectional - changes in both tumor types. Several functional clusters were identified - that involve changes in amino acid utilization and ammonia disposition - in Catnb-mutated tumors as opposed to alterations in lipid and cholesterol - metabolism in Ha-ras-mutated tumors. Moreover, several genes coding - for inhibitory molecules within the Wnt-signaling pathway were upregulated - in Catnb-mutated tumors, suggesting induction of a negative feedback - loop, whereas Ha-ras-mutated tumors showed alterations in the expression - of several genes functional in monomeric G-protein signaling. We - conclude that mouse hepatoma cells adopt different evolutionary strategies - that allow for their selective outgrowth under variable environmental - conditions. Human hepatocellular cancers (HCC) lack RAS mutations - but are frequently mutated in CTNNB1, the human Catnb ortholog. The - set of genes aberrantly expressed in Catnb-mutated mouse tumors was - used to screen, by expression profiling, for dysregulation of orthologous - genes within a panel of 25 HCCs, of which 10 were CTNNB1-mutated. - HCCs with activated beta-catenin displayed a gene expression profile - that was similar to Catnb-mutated mouse tumors but distinct from - the other human HCCs. In conclusion, expression fingerprints may - be used for diagnostic purposes and potential new therapeutic intervention - strategies. Supplementary material for this article can be found - on the HEPATOLOGY website (http://www.interscience.wiley.com/jpages/0270-9139/suppmat/index/html).}, - doi = {10.1002/hep.20768}, - institution = {Institut für Pharmakologie und Toxikologie, Abteilung Toxikologie, - Universität Tübingen, Wilhelmstrasse 56, 72074 Tübingen, Germany.}, - keywords = {Animals; Carcinoma, Hepatocellular, genetics; Cytoskeletal Proteins, - genetics; Gene Expression Profiling; Genes, ras; Genotype; Glutamate-Ammonia - Ligase, genetics; Humans; Liver Neoplasms, genetics; Male; Mice; - Mice, Inbred C3H; Mutation; Phenotype; Signal Transduction; Trans-Activators, - genetics; beta Catenin}, - owner = {wrzodek}, - pmid = {15965925}, - timestamp = {2012.06.06}, - url = {http://dx.doi.org/10.1002/hep.20768} -} - -@ARTICLE{Stahl2005a, - author = {Sabine Stahl and Carina Ittrich and Philip Marx-Stoelting and Christoph - Köhle and Thomas Ott and Albrecht Buchmann and Michael Schwarz}, - title = {Effect of the tumor promoter phenobarbital on the pattern of global - gene expression in liver of connexin32-wild-type and connexin32-deficient - mice.}, - journal = {Int J Cancer}, - year = {2005}, - volume = {115}, - pages = {861--869}, - number = {6}, - month = {Jul}, - abstract = {The antiepileptic drug phenobarbital (PB) is used frequently as a - model tumor promoter in rodent liver. It is believed to increase - the probability of cancer by accelerating the clonal expansion of - cells transformed during tumor initiation. The molecular mechanism - underlying this process is only partly understood but seems to require - the function of connexin32 (Cx32), one of the 2 gap junction proteins - expressed in hepatocytes. PB mediates transcriptional activation - of various genes in liver but which of these are relevant for tumor - promotion is unknown. We have used oligonucleotide microarrays to - identify genes differentially modulated in expression by PB in liver - of Cx32-wild-type and Cx32-null mice. Mice of both strains were kept - on PB containing (0.05\%) or control diet for 2 weeks. Total liver - RNA was isolated from 3 mice per experimental group and reverse transcribed; - cDNAs were hybridized to oligonucleotide microarrays and a gene-by-gene - linear model was used for statistical analysis of data. Five genes - were identified as induced or repressed in untreated Cx32-null as - compared to untreated Cx32-wild-type mice. PB affected the expression - of 53 genes, of which 13 code for members of Phase-I/II of drug metabolism, - and 12 genes were differentially affected in expression by PB in - Cx32-null as compared to Cx32-wild-type mice. Among the differentially - affected genes that could be verified by quantitative RT-PCR or Western - analysis were the insulin like growth factor binding protein-1, retinol - dehydrogenase-6 and the Y-chromosomally located gene Dby, among which - may be a candidate of relevance for PB-mediated tumor promotion.}, - doi = {10.1002/ijc.20815}, - institution = {Institut für Pharmakologie und Toxikologie, Abteilung Toxikologie, - Universität Tübingen, Germany.}, - keywords = {Animals; Carcinogens, pharmacology; Connexins, genetics; Female; Gene - Expression Profiling; Gene Expression, drug effects; Liver, drug - effects; Male; Mice; Mice, Mutant Strains; Oligonucleotide Array - Sequence Analysis; Phenobarbital, pharmacology}, - owner = {wrzodek}, - pmid = {15751032}, - timestamp = {2012.06.06}, - url = {http://dx.doi.org/10.1002/ijc.20815} -} - -@ARTICLE{Subramanian2005, - author = {Aravind Subramanian and Pablo Tamayo and Vamsi K Mootha and Sayan - Mukherjee and Benjamin L Ebert and Michael A Gillette and Amanda - Paulovich and Scott L Pomeroy and Todd R Golub and Eric S Lander - and Jill P Mesirov}, - title = {Gene set enrichment analysis: a knowledge-based approach for interpreting - genome-wide expression profiles.}, - journal = {Proc Natl Acad Sci U S A}, - year = {2005}, - volume = {102}, - pages = {15545--15550}, - number = {43}, - month = {Oct}, - abstract = {Although genomewide RNA expression analysis has become a routine tool - in biomedical research, extracting biological insight from such information - remains a major challenge. Here, we describe a powerful analytical - method called Gene Set Enrichment Analysis (GSEA) for interpreting - gene expression data. The method derives its power by focusing on - gene sets, that is, groups of genes that share common biological - function, chromosomal location, or regulation. We demonstrate how - GSEA yields insights into several cancer-related data sets, including - leukemia and lung cancer. Notably, where single-gene analysis finds - little similarity between two independent studies of patient survival - in lung cancer, GSEA reveals many biological pathways in common. - The GSEA method is embodied in a freely available software package, - together with an initial database of 1,325 biologically defined gene - sets.}, - doi = {10.1073/pnas.0506580102}, - institution = {Broad Institute of Massachusetts Institute of Technology and Harvard, - 320 Charles Street, Cambridge, MA 02141, USA.}, - keywords = {Cell Line, Tumor; Female; Gene Expression Profiling, methods; Genes, - p53, physiology; Genome; Humans; Leukemia, Myeloid, Acute, genetics; - Lung Neoplasms, genetics/mortality; Male; Oligonucleotide Array Sequence - Analysis; Precursor Cell Lymphoblastic Leukemia-Lymphoma, genetics}, - owner = {wrzodek}, - pii = {0506580102}, - pmid = {16199517}, - timestamp = {2012.06.12}, - url = {http://dx.doi.org/10.1073/pnas.0506580102} -} - -@ARTICLE{WEBER1955, - author = {G. Weber and A. Cantero}, - title = {Glucose-6-phosphatase activity in normal, pre-cancerous, and neoplastic - tissues.}, - journal = {Cancer Res}, - year = {1955}, - volume = {15}, - pages = {105--108}, - number = {2}, - month = {Feb}, - keywords = {Neoplasms, metabolism; Phosphoric Monoester Hydrolases}, - owner = {wrzodek}, - pmid = {14352196}, - timestamp = {2012.06.08} -} - -@ARTICLE{Wrzodek2011, - author = {Clemens Wrzodek and Andreas Dr\"ager and Andreas Zell}, - title = {{KEGGtranslator: visualizing and converting the KEGG PATHWAY database - to various formats}.}, - journal = {Bioinformatics}, - year = {2011}, - volume = {27}, - pages = {2314--2315}, - number = {16}, - month = {Aug}, - abstract = {The KEGG PATHWAY database provides a widely used service for metabolic - and nonmetabolic pathways. It contains manually drawn pathway maps - with information about the genes, reactions and relations contained - therein. To store these pathways, KEGG uses KGML, a proprietary XML-format. - Parsers and translators are needed to process the pathway maps for - usage in other applications and algorithms. We have developed KEGGtranslator, - an easy-to-use stand-alone application that can visualize and convert - KGML formatted XML-files into multiple output formats. Unlike other - translators, KEGGtranslator supports a plethora of output formats, - is able to augment the information in translated documents (e.g. - MIRIAM annotations) beyond the scope of the KGML document, and amends - missing components to fragmentary reactions within the pathway to - allow simulations on those.KEGGtranslator is freely available as - a Java(™) Web Start application and for download at http://www.cogsys.cs.uni-tuebingen.de/software/KEGGtranslator/. - KGML files can be downloaded from within the application.clemens.wrzodek@uni-tuebingen.deSupplementary - data are available at Bioinformatics online.}, - doi = {10.1093/bioinformatics/btr377}, - institution = {Center for Bioinformatics Tuebingen, University of Tuebingen, 72076 - Tübingen, Germany. clemens.wrzodek@uni-tuebingen.de}, - keywords = {Algorithms; Computer Graphics; Databases, Factual; Metabolic Networks - and Pathways; Software}, - owner = {wrzodek}, - pii = {btr377}, - pmid = {21700675}, - timestamp = {2012.03.08}, - url = {http://dx.doi.org/10.1093/bioinformatics/btr377} -} - -@ARTICLE{miRecords, - author = {Xiao, F. and Zuo, Z. and Cai, G. and Kang, S. and Gao, X. and Li, - T.}, - title = {{miRecords: an integrated resource for microRNA-target interactions}.}, - journal = {Nucleic Acids Res}, - year = {2009}, - volume = {37}, - pages = {D105-10}, - number = {Database issue}, - abstract = {MicroRNAs (miRNAs) are an important class of small noncoding RNAs - capable of regulating other genes' expression. Much progress has - been made in computational target prediction of miRNAs in recent - years. More than 10 miRNA target prediction programs have been established, - yet, the prediction of animal miRNA targets remains a challenging - task. We have developed miRecords, an integrated resource for animal - miRNA-target interactions. The Validated Targets component of this - resource hosts a large, high-quality manually curated database of - experimentally validated miRNA-target interactions with systematic - documentation of experimental support for each interaction. The current - release of this database includes 1135 records of validated miRNA-target - interactions between 301 miRNAs and 902 target genes in seven animal - species. The Predicted Targets component of miRecords stores predicted - miRNA targets produced by 11 established miRNA target prediction - programs. miRecords is expected to serve as a useful resource not - only for experimental miRNA researchers, but also for informatics - scientists developing the next-generation miRNA target prediction - programs. The miRecords is available at http://miRecords.umn.edu/miRecords.}, - keywords = {*Databases, Nucleic Acid *Gene Expression Regulation MicroRNAs/*metabolism - RNA, Messenger/chemistry/metabolism Systems Integration} -} - -@ARTICLE{Zhang2007, - author = {Hongtao Zhang and Alan Berezov and Qiang Wang and Geng Zhang and - Jeffrey Drebin and Ramachandran Murali and Mark I Greene}, - title = {{ErbB} receptors: from oncogenes to targeted cancer therapies.}, - journal = {J Clin Invest}, - year = {2007}, - volume = {117}, - pages = {2051--2058}, - number = {8}, - month = {Aug}, - abstract = {Understanding the genetic origin of cancer at the molecular level - has facilitated the development of novel targeted therapies. Aberrant - activation of the ErbB family of receptors is implicated in many - human cancers and is already the target of several anticancer therapeutics. - The use of mAbs specific for the extracellular domain of ErbB receptors - was the first implementation of rational targeted therapy. The cytoplasmic - tyrosine kinase domain is also a preferred target for small compounds - that inhibit the kinase activity of these receptors. However, current - therapy has not yet been optimized, allowing for opportunities for - optimization of the next generation of targeted therapy, particularly - with regards to inhibiting heteromeric ErbB family receptor complexes.}, - doi = {10.1172/JCI32278}, - institution = {Department of Pathology and Laboratory Medicine, University of Pennsylvania - School of Medicine, Philadelphia, Pennsylvania 19104-6082, USA.}, - keywords = {Animals; Antibodies, Monoclonal, therapeutic use; Drug Delivery Systems; - Enzyme Activation, drug effects; Enzyme Inhibitors, therapeutic use; - Humans; Neoplasms, drug therapy/enzymology/genetics; Protein Structure, - Tertiary; Receptor, Epidermal Growth Factor, antagonists /&/ inhibitors/genetics/metabolism}, - owner = {wrzodek}, - pmid = {17671639}, - timestamp = {2012.06.08}, - url = {http://dx.doi.org/10.1172/JCI32278} -} - -@comment{jabref-meta: selector_publisher:} - -@comment{jabref-meta: selector_author:} - -@comment{jabref-meta: selector_journal:} - -@comment{jabref-meta: selector_keywords:} - diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.pdf b/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.pdf deleted file mode 100644 index 2e44726..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.pdf and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.tex b/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.tex deleted file mode 100644 index 8a13459..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/KEGGtranslator_method.tex +++ /dev/null @@ -1,724 +0,0 @@ -% Template for PLoS -% Version 1.0 January 2009 -% -% To compile to pdf, run: -% latex plos.template -% bibtex plos.template -% latex plos.template -% latex plos.template -% dvipdf plos.template - -\documentclass[10pt]{article} - -% amsmath package, useful for mathematical formulas -\usepackage{amsmath} -% amssymb package, useful for mathematical symbols -\usepackage{amssymb} - -% graphicx package, useful for including eps and pdf graphics -% include graphics with the command \includegraphics -\usepackage{graphicx} - -% cite package, to clean up citations in the main text. Do not remove. -\usepackage{cite} - -\usepackage{color} - -% Use doublespacing - comment out for single spacing -%\usepackage{setspace} -%\doublespacing - - -% Text layout -\topmargin 0.0cm -\oddsidemargin 0.5cm -\evensidemargin 0.5cm -\textwidth 16cm -\textheight 21cm - -% Bold the 'Figure #' in the caption and separate it with a period -% Captions will be left justified -\usepackage[labelfont=bf,labelsep=period,justification=raggedright]{caption} - -% Use the PLoS provided bibtex style -\bibliographystyle{plos2009} - -% Remove brackets from numbering in List of References -\makeatletter -\renewcommand{\@biblabel}[1]{\quad#1.} -\makeatother - - -% Leave date blank -\date{} - -\pagestyle{myheadings} -%% ** EDIT HERE ** - -%\usepackage{graphicx} -\usepackage{booktabs} % allows \toprule and other table formatting utilities -\usepackage{multirow} -\usepackage[table]{xcolor} % U.a. allows for defining colors in HTML models -\usepackage[pdfborder={0 0 0}]{hyperref} % links, but no colored boxes - -%% ** EDIT HERE ** -%% PLEASE INCLUDE ALL MACROS BELOW - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Some macros -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\def\plus{\textsuperscript{+}} -\newcommand{\TODO}[1]{\textcolor{red}{\textbf{#1}}} - -% B -\newcommand{\BiochemicalReaction}{\texttt{Bio\-chemical\-Reaction}} -\newcommand{\BiochemicalReactions}{\texttt{Bio\-chemical\-Reaction}s} - -% C -\newcommand{\Catalysis}{\texttt{Cata\-lysis}} -\newcommand{\ComplexAssembly}{\texttt{Complex\-Assembly}} -\newcommand{\Conversion}{\texttt{Conversion}} -\newcommand{\control}{\texttt{control}} -\newcommand{\Control}{\texttt{Control}} -\newcommand{\Controller}{\texttt{Controller}} -\newcommand{\Controllers}{\texttt{Controller}s} -\newcommand{\Controlled}{\texttt{Controlled}} -\newcommand{\conversion}{\texttt{conversion}} -\newcommand{\Complex}{\texttt{Complex}} -\newcommand{\Complexes}{\texttt{Complex}es} - -% E -\newcommand{\EntityReference}{\texttt{EntityReference}} -\newcommand{\EntityReferences}{\texttt{EntityReference}s} -\newcommand{\Entity}{\texttt{Entity}} - - -% F -\newcommand{\functionTerm}{\texttt{functionTerm}} -\newcommand{\functionTerms}{\texttt{functionTerm}s} - -% I -\newcommand{\Interaction}{\texttt{Inter\-action}} -\newcommand{\InteractionVocabulary}{\texttt{Inter\-action\-Vo\-ca\-bu\-la\-ry}} - -% M -\newcommand{\model}{\texttt{model}} -\newcommand{\Modulation}{\texttt{Modulation}} -\newcommand{\MolecularInteraction}{\texttt{Mo\-le\-cu\-lar\-In\-ter\-ac\-tion}} -\newcommand{\ModifierSpeciesReference}{\texttt{Mo\-di\-fier\-Species\-Reference}} - - -% P -%\newcommand{\PhysicalEntity}{\texttt{Physical\-Entity}} -\newcommand{\physicalInteraction}{\texttt{physical\-Inter\-action}} -\newcommand{\protein}{\texttt{protein}} -\newcommand{\proteins}{\texttt{protein}s} - -% Q -\newcommand{\qualitativeModel}{\texttt{qualitative\-Model}} -%\newcommand{\quantitativeModel}{\texttt{quantitative\-Model}} -\newcommand{\qualitativeSpecies}{\texttt{qualitative\-Species}} - -% R -\newcommand{\reaction}{\texttt{re\-ac\-tion}} -\newcommand{\reactions}{\texttt{re\-ac\-tion}s} - -% S -\newcommand{\species}{\texttt{species}} -\newcommand{\smallMolecule}{\texttt{small\-Molecule}} -\newcommand{\SmallMolecules}{\texttt{Small\-Molecule}s} - - -% T -\newcommand{\TemplateReactionRegulation}{\texttt{Template\-Reaction\-Regulation}} -\newcommand{\TemplateReaction}{\texttt{Template\-Reaction}} -\newcommand{\transition}{\texttt{transition}} -\newcommand{\transitions}{\texttt{transition}s} -\newcommand{\Transport}{\texttt{Transport}} -\newcommand{\TransportWithBiochemicalReaction}{\texttt{Transport\-With\-Biochemical\-Reaction}} - -% X -\newcommand{\Xrefs}{\texttt{Xref}s} - -\hyphenation{ -bal-ance -con-ver-ter -trans-la-tor -KEGG-con-ver-ter -KEGG-trans-la-tor -mo-le-cule -be-tween -straight-for-ward -} - -%% END MACROS SECTION - -\begin{document} - -% Title must be 150 characters or less -\begin{flushleft} -{\Large -%Potential running title: Generation of systems biology models from KEGG -\textbf{Precise generation of systems biology models from KEGG pathways} -} -% Insert Author names, affiliations and corresponding author email. -\\ -Clemens Wrzodek$^{1,\ast}$, -Finja B\"uchel\,$^{1}$, -Andreas Dr\"ager\,$^{1}$, -Manuel Ruff\,$^{1}$ and -Andreas Zell\,$^{1}$ -\\ -\bf{1} Center for Bioinformatics Tuebingen (ZBIT), University of Tuebingen, Sand 1, 72076 T\"ubingen, Germany -\\ -$\ast$ E-mail: clemens.wrzodek@uni-tuebingen.de -\end{flushleft} - -% Please keep the abstract between 250 and 300 words -\section*{Abstract} -The KEGG PATHWAY database provides a plethora of pathways for a diversity of organisms. -All pathway components are directly linked to other KEGG databases, such as KEGG COMPOUND or KEGG REACTION. -% -Therefore, the pathways can be extended with an enormous amount of information and provide a foundation for initial structural modeling approaches. -% -As a drawback, KGML-formatted KEGG pathways are primarily intended for visualization purposes and often omit important details for the sake of a clear arrangement of its entries. -Thus, a direct conversion into systems biology models would produce incomplete and erroneous models. - -Here, we present a precise method for processing and converting KEGG pathways into initial metabolic and signaling models encoded in the standardized community pathway formats SBML (Levels 2 and 3) and BioPAX (Levels 2 and 3). %, including the qualitative models, groups and layout extensions, -This method involves correcting invalid or incomplete KGML content, creating complete and valid stoichiometric reactions, translating relations to signaling models and augmenting the pathway content with various information, such as cross-references to Entrez Gene, OMIM, UniProt ChEBI, and many more. -\newline\indent -Finally, we compare several existing conversion tools for KEGG pathways and show that the conversion from KEGG to BioPAX does not involve a loss of information, whilst lossless translations to SBML can only be performed using SBML Level~3, including its recently proposed qualitative models and groups extension packages. - -% Please keep the Author Summary between 150 and 200 words -% Use first person. PLoS ONE authors please skip this step. -% Author Summary not valid for PLoS ONE submissions. -\section*{Author Summary} -Qualitative and quantitative modeling of biochemical networks has become a key discipline in systems biology. Building models from scratch is very time-consuming, especially if multiple reaction equations and complex protein networks are involved. Hence, existing databases and models play an important role in the development of systems biology models. The KEGG PATHWAY database is a popular database that provides reaction equations, pathway models and many cross-references that could be used to generate initial models. There are only a few existing converters that convert the proprietary KGML-formatted KEGG pathways to the community standards SBML or BioPAX. None of those converters supports qualitative modeling in SBML, adds the correct stoichiometry to reaction participants, or makes use of the database cross-references, provided by KEGG. - -Therefore, we have developed a method, which is implemented in KEGGtranslator, that can be used to generate precise initial systems biology models from KEGG pathways. -In particular, we placed special emphasis on ensuring that these models are correct, complete, well-annotated and that they use and provide all information (i.e., chemical formula, cross-references, etc.) we could extract from various KEGG databases. -These models should ease further modeling steps and lay the foundation for many correct and complete qualitative and quantitative models in the future. - -%% Pathways wichtige ausgangsbasis -%% Leider gibt es wenige convert (2BioPAX) und viele machen gravierende fehler (missing components and stoichiometry) -% deshalb geben wir einen sehr guten vor welcher wert auf korrektheut, vollständigkeit und annotation legt un // using all infos -% in Zukunft zu viel besseren modellen führen wird. -% Sind auch die ersten die überhaupt qualitative modeling in SBML unterstützen - -%The described methods and published tool should provide other researchers tools to perform joint analysis of cross-platform datasets. -\section*{Introduction} - -The KEGG PATHWAY database provides a valuable resource for initial modeling approaches of specific biological networks \cite{Kanehisa2000,KEGG}. The database contains pathway maps for a multitude of different organisms and most provided information is cross-linked with other KEGG databases. Since many years, this database has been one of the most important sources for building initial structural models of various pathways \cite{Bauer-Mehren2009,Oberhardt2009}. All pathway information is stored in KGML formatted XML-files, which are barely supported by other applications. In systems biology, two wide-spread formats for modeling and exchanging pathways are the Systems Biology Markup Language (SBML) \cite{Finney2003} and Biological Pathway Exchange (BioPAX) \cite{Demir2010_short}. These formats can be used with graphical modeling applications (e.g., CellDesigner \cite{Funahashi2008} or Cytoscape \cite{Cytoscape}), complemented with rate laws (e.g., SBMLsqueezer \cite{SBMLSqueezer}), used for flux balance analysis (e.g., FASIMU \cite{Fasimu}), and many more applications. Therefore, converters exist that perform mostly basic conversions from KGML to those formats \cite{KEGG2SBML,Kuentzer2007,KEGGconverter,KEGG2BioPAX_and_SBML}. The drawback of many of those converters is that even for creating initial models, a basic translation of a KGML document to an SBML or BioPAX document is not sufficient. - -The KGML documents provided by KEGG are mainly designed for graphical representations of pathways and consist of entries (which correspond to nodes in a pathway map), relations (which correspond to edges in a pathway map) and reactions. Relations are mainly contained in signaling maps and encode information such as ``A activates B". Reactions are primarily contained in metabolic pathway maps and consist of substrates, products and information about reversibility of the reaction. Given this information, it seems straightforward to derive an algorithm for viable metabolic models. But a closer look on the actual maps shows that even those reactions are often created for visualization and not for modeling or simulation purposes. Reactions are sometimes bundled, i.e., one reaction instance is built and multiple reaction identifiers pointing to different reactions, are assigned. There are often missing reactants for reactions, stoichiometric information is omitted and also the list of enzymes, catalyzing a reaction, is not necessarily entirely contained in the KGML document. Similar difficulties arise for the entries in a KGML document. -For the sake of a high-quality graphical representation of the pathway, entries or other elements are sometimes duplicated.% in the KGML file -When interpreting the information content of those files, duplications must be taken into account. -% -Furthermore, a KGML document may contain references to entries, which are not physically present in the actual organism and the KGML specification even allows entries to be reactions. -All those exemplary mentioned problems show that simple one-to-one translations of KEGG pathway maps to other formats are not sufficient to build reliable and useful models. - -To overcome all those drawbacks, we deeply investigated the KGML documents, as well as the content of all cross-linked KEGG databases, and developed strategies for building useful initial models in SBML and BioPAX. Besides automatically correcting many of the mentioned issues, the proposed method includes extensive annotation and augmentation of all provided information to ease further model building and usage of those translated pathway maps. This ranges from adding simple database cross-references (e.g., to UniProt or Entrez Gene) over annotation of chemical formulas and molecular weight of small molecules, to an automated atom balance check of all reactions. All those strategies are now implemented in the second release of the KEGGtranslator application \cite{Wrzodek2011} and described in detail in the following sections. - - - -\section*{Preparation of pathway models from the KEGG database} - -Several subsequent steps are involved in the creation of initial models from KEGG pathways. All of these steps are described in detail in the following sections and depicted as a flowchart in Figure~\ref{fig:conversionScheme}. - -\subsection*{The KEGG Markup Language (KGML)} -KEGG uses the KGML format to encode its pathways \cite{KGML}. For each pathway, a generic reference pathway exists that is derived for a plethora of different organisms. All nodes in those pathways mainly correspond to proteins, small molecules, other referenced pathways or complexes and are encoded as entries in KGML. These entries have a type attribute that further specifies its nature. Additionally, they may have a graphics attribute that is essential for pathway visualizations. Entries corresponding to groups contain components that reference their contained entries. - -Besides entries, KGML specifies reactions, which contain substrates and products that are essentially references to the corresponding entries. The only additional information that is given for reactions is a type attribute: either `reversible' or `irreversible'. Moreover, KEGG specifies relations, which are primarily important for the visualization of signaling pathways. Relations contain network connections between two entries, such as ``A phosphorylates B", or ``A inhibits B" but they do not provide sufficient information for conversions to biochemical reactions. - - -\subsection*{Preprocessing and correcting issues in the input KGML} - - -Prior to converting the KEGG pathways to other modeling languages, several issues need to be corrected in preprocessing steps. -% -Operations that are not linked to SBML or to BioPAX are performed as first step directly on the input KGML. These include operations that involve adding or removing entries from the KGML document, as well as processing contained reactions. The actual conversion to models is independent of those steps and is performed after the preprocessing. -To generate reliable models, one might want to remove links to other pathway maps from the document. These referenced pathway maps are no physical instances and thus need to be ignored for some model simulation software. However, they might be required for cross-linking pathways. Furthermore, orphans (i.e., entries that are not present in reactions or relations) might be useless for some modeling approaches and therefore may also be removed. -% -An important step towards building metabolic models are correct biochemical reactions. The reactions specified in the KGML require significant preprocessing in order to reliably translate these to SBML or BioPAX. -% -% -KEGG files often contain bundled reactions. These must be disassembled into separate KGML reactions. Otherwise, it is not possible to create balanced and correct biochemical reactions if models keep multiple reactions that are bundled into one record. -Since the information provided in the KGML is limited, the KEGG API needs to be queried for further correction steps. From the KEGG API, information about reversibility of the reaction is retrieved, as well as the reaction equation, including all substrates, products, catalysts, and stoichiometric information. The reversibility is directly annotated on the reaction, the stoichiometric information has to be stored in separate classes, which are later translated to the desired output format. The equation is used to check for missing reaction participants. But simply comparing all KEGG identifiers that are present in the KGML with the reaction equation is not adequate. KEGG consists of many separate databases that contain information about compounds, drugs, glycans, etc. Therefore, one compound might have multiple KEGG identifiers, e.g., one in KEGG COMPOUND and another one in KEGG DRUG. The reaction equations specify just one identifier for each participant, which is any of all available identifiers for an object. Therefore, more queries to the KEGG API are necessary in order to fetch all synonyms for all identifiers. Now, it is possible to compare all reactants with the pathway components and check for missing reaction participants and eventually add those to the KGML. A similar method is required to check for missing enzymes (i.e., reaction modifiers) -- we use Enzyme Commission numbers (EC numbers) to check for missing enzymes. - -One last important preprocessing step might be performed before converting the pathways to models. -The KEGG database uses information about orthology to provide pathway maps for different organisms. Enzymes, catalyzing reactions are annotated using EC numbers, which are independent of actual organisms. In some cases, this leads to annotated enzymes or entries in the KGML, for which no physical instance in the current organism of interest is known. In other words, the entry does probably not exist in the current organism or its existence has not yet been proven. To visualize this information, KEGG changes the background color of those orthologous nodes to white. These nodes should also be removed in order to obtain organism-specific models. - -\subsection*{Atom balance of reactions} - -After the described preprocessing step, the KGML document contains unbundled and complete reactions, for which the equation and stoichiometry has been annotated. Using the KEGG API, the chemical formula of each compound, participating in a reaction can be fetched. By using this information together with the stoichiometry, it is possible to count and compare all atoms on the substrate and product side. There are some further properties that need to be considered: A generic `R' is sometimes used on the substrate and product side to indicate any substituent. Variables like $n$ and $n+1$ are used by KEGG to create more generic reactions. During our tests, we detected some simple cases in which an H\plus{} or P\plus{} was missing, but also some other cases in which multiple atoms (e.g., 2\,C, 3\,H and 1\,P) were missing. Automatically correcting those issues is not recommended because the real missing components are unknown. -% -For example if a P\plus{} is missing on the substrate side, larger compounds could be missing on any side of the reaction. The possibilities of missing components on both sides include ATP~$\longrightarrow$~ADP, NADPH~$\longrightarrow$~NADH, and many others. -Therefore, our method appends the result of each atom check as comment on every reaction and researchers might have to manually correct reactions with missing atoms. - - -\subsection*{Conversion and annotation of the KGML document} - - -The completed and corrected KGML document can now be used to generate models. Therefore, conversions to BioPAX, SBML, SBML-qual and several other formats are required. Typically, the model instance has to be initialized and all entries need to be added to the model. Caution needs to be taken in this step, because multiple copies of an entry might be existent in one KGML document. Usually, every graphical copy catalyzes different reactions. But for systems biology models, only one element should be created for all copies, representing a union of all physically identical entries. Furthermore, KGML specifies an entry type called `reaction', which should not be converted to a physical entity in the resulting model. -Depending on the modeling language, either the reactions or the relations or both need to be converted to the chosen format. - - -Besides those conversion steps, additional operations are required in order to facilitate further modeling efforts by researchers. This includes extensive annotations and comments for all elements. Hence, Gene Ontology terms, describing the elements and their function, as well as identifiers for a plethora of other databases for genes, proteins, interactions, structural information, small molecules, etc. are added to the model. In more detail, identifiers are added for Entrez Gene, OMIM, Ensembl, UniProt, ChEBI, DrugBank, Gene Ontology, HGNC, PubChem, 3DMET, NCBI Taxonomy, PDBeChem, GlycomeDB, LipidBank, EC-Numbers (enzyme nomenclature) and various KEGG databases (\uppercase{gene, glycan, reaction, compound, drug, pathway, orthology}). -% -Besides those cross-references, other helpful human and machine-readable annotations are added, for example, official gene symbols, synonyms, human-readable descriptions, links to more resources or visualizations, and the chemical formula and molecular weight for small molecules. - -The annotation of the models is an important step, because simulations on real data or simple experimental data visualization tools require unique identifiers to map the experimental data on the pathway structure. If models provide a simple data structure with labels, but no reference identifiers, they are hardly usable in conjunction with experimental data. - - -\subsection*{KEGG to BioPAX} - -Today, Level~3 is the most recent Level of BioPAX. But Level~2 is still common and there are some data structures in Level~3 that are not available in Level~2. Therefore, separate converters for BioPAX Level~2 and for Level~3 are required. First of all, a BioPAX \model{} has to be created and a pathway object, corresponding to the input KGML, needs to be added to the \model. Then, several annotations and cross-references are defined for this pathway. This includes, for instance, the organism, cross-references to other databases, and gene ontology terms to define the pathway´s function. The next step involves mapping each KGML element to a corresponding BioPAX element. Figure~\ref{fig:KGML2BioPAX} gives an overview of these mappings. - -Having the initial pathway model, the next step is to create BioPAX elements for each KGML entry. This translation mainly depends on the type of the KGML entry and is listed in detail in Table~\ref{tab:KGMLentries2X}. Entries with the same identifier (graphical copies of the same element) are grouped to one instance and only one BioPAX element is created for those. Depending on the just created BioPAX element, further annotation steps are required. For \Complexes, we need to add all of its components. For \SmallMolecules, we add the molecular weight and chemical formula to the corresponding BioPAX fields, which facilitates further modeling steps. For each element, cross-references to other databases and more annotations, as described in the previous section, are added. - -KEGG reactions always correspond to biochemical reactions. Thus, a \BiochemicalReaction{} is the appropriate data structure for those reactions and one instance of this class is created for each reaction. If catalyzing enzymes are annotated, a \Catalysis{} instance is created. This \Catalysis{} has all catalyzing enzymes as \Controllers{} and the \BiochemicalReaction{} as \Controlled{} element. The reaction is annotated with the reaction direction and if it is reversible or not. Further, the stoichiometry of each participant is annotated, as well as the EC numbers of all catalyzing enzymes. Even to the reactions, human readable supporting information is added, like the reaction equation, other pathways in which this reaction also occurs, and a generic description. In addition, the result of the atom balance check is added as further comment, together with comprehensive information which atoms are on the substrate side, which are on the product side and the difference between them. - -Next to biochemical reactions, BioPAX also supports other kinds of relationships between entities. -BioPAX distinguishes between interactions, for which one can specify a source and a target (called \Conversion), and interactions describing a pool of interacting components (called \Interaction). -For instance, to express KEGG relations, which have no associated chemical equation but structural information such as ``A activates B", a \Conversion{} can be used. -In contrast, an \Interaction{} is especially useful for cases, in which information is missing or no direction is available. -% -For example, a relation of type binding with two participants -- this just allows for expressing ``A binds B", but no other conclusion can be drawn from such a relation. Therefore, all relations, from which no direction can be inferred, are converted to a \physicalInteraction{} in BioPAX Level~2 and to a \MolecularInteraction{} in BioPAX Level~3 (the Level~2 \physicalInteraction{} has been replaced by \MolecularInteraction{} in Level~3). All relations from which a direction can be inferred are converted to a BioPAX \Conversion{}. -An \InteractionVocabulary{} is created for each interaction, that specifies the type of interaction as SBO term, GO term and human-readable string. Table~\ref{tab:KGMLrelations2X} shows in detail how each relation is converted and which SBO and GO terms are being used. - - - -\subsection*{KEGG to SBML} - -Even though it is not the latest release of SBML, Level~2 Version~4 is still used in many applications and hence, should be supported for the conversion of metabolic models. The most recent SBML Level~3 release introduces extension packages and is required to include qualitative models (qual), groups, and layout information in the document, which are essential for modeling signaling pathways. -% -At the first glance, conversion of KGML to SBML seems to be simple. This is also suggested by the mapping scheme, depicted in Figure~\ref{fig:KGML2SBML}. But many properties in SBML are encoded in other fields than actual class instances, and thus are not directly visible to researchers. KEGG defines entries and an entry type, which specifies if the entry corresponds to a protein, complex, small molecule, referenced pathway map, or some other type. BioPAX provides different classes to distinguish between those types. SBML, similar to KGML, just has a class named \species{} to encode all those entries. The type of the \species{} should be specified by using terms from the Systems Biology Ontology (SBO terms) \cite{SBO}. These SBO terms are hierarchically organized and only SBO terms from the `material entity' branch should be used to encode the entities. Table~\ref{tab:KGMLentries2X} shows, which SBO terms are most appropriate to encode the different KGML entries. Furthermore, as in BioPAX translations, it is important to group graphical copies of the same entries to one element and to create only one \species{} element for this entry. -To make the model usable for further applications, extensive annotations and references to other databases are added, using standardized controlled vocabulary (CV) terms and MIRIAM identifiers \cite{Juty2012,Novere2005}. Further, a description, various synonyms, the CAS number, chemical formula, a reference picture (structural formula for compounds, image of the pathway-map for pathways), molecular weight, and mass are added as human-readable annotation, if available. - -Groups are not supported by SBML-core. In order to encode entries of type `group' in SBML Level~3, one can use the groups extension package \cite{SBMLgroups}. To encode groups in SBML prior to Level~3, the only way are annotations, for example by adding a CV term with a \texttt{BQB\_IS\_ENCODED\_BY} or \texttt{BQB\_HAS\_PART} qualifier that specifies the contents of the group. In any case, an SBO term should also be used, which marks this \species{} as a complex of multiple other \species{}. - -KEGG reactions are converted to SBML \reactions{} with correct SBO terms for substrates (SBO:0000015) and products (SBO:0000011). If the reaction is reversible, a generic reactant SBO term (SBO:0000010) should be applied to all reaction participants. In addition, the reversibility is annotated to the \reaction{} itself and the stoichiometry is annotated on all reaction participants. Catalyzing enzymes are included as \ModifierSpeciesReference{} and CV terms, referring to the KEGG reaction identifier as well as all pathways, in which this reaction occurs, are added. Human-readable annotations on \reactions{} include the reaction definition, equation, a reference to the reaction equation as HTML-image, and the result of the atom balance check (i.e., if there are missing atoms in the reaction). - -Relations are required to encode signaling pathways but cannot properly be included into core SBML. There is no structure that encodes, e.g., ``A activates B" -- we can only add reactions to SBML. For SBML Level~3, the recently proposed qualitative models (qual) extension package solves this problem \cite{QualSpecification}. This extension is designed for qualitative modeling and allows for -modeling relationships that cannot be described in detail. -%creating transitions, that just specify input, output and a relation between those. -Thus, to encode the KEGG relations, we have to convert the \model{} to a \qualitativeModel{} and create a qualitative \transition{} for each relation. An SBO term, as given in Table~\ref{tab:KGMLrelations2X}, is assigned to the \transition{} to specify its type. A GO term, mentioned in the same table, is further added as CV term on the \transition{}. - - -\subsection*{Further KGML characteristics} - -\subsubsection*{KGML entries that are reactions.} -The KGML specification allows entries to have a type called `reaction'. This can be used, for example, to let a relation point to a reaction. Actually, KGML only allows entries to be targets of relations but these constructs can be used to relax the constraints. However, BioPAX naturally allows interactions to point to other interactions as sources or targets. Hence, the document structure is not invalidated if entries with type `reaction' are converted to real reactions in BioPAX and every use of this entry is replaced by using the BioPAX reaction. - -In SBML, these entries are also converted to real reactions. No \species{} is created for entries with type `reaction' in SBML-core. For SBML-qual, the specification has similar requirements as KGML: all \transitions{} must have \qualitativeSpecies{} as sources or targets. Therefore, for SBML-qual the translation is similar to the source KGML and a \qualitativeSpecies{} with adequate annotation is created for entries with type `reaction'. - -\subsubsection*{Relations of subtype `compound'.} -Some KGML documents include reactions and exclusively relations of subtype `compound'. These compound-relations are mostly relations between enzymes and compounds. KEGG states that this compound is ``shared with two successive reactions [\dots]" \cite{KGML}. In other words, these relations are copies of reactions that have been created by KEGG for the sake of better graphical representation of the pathway. Thus, if a converter translates both, the reactions and the relations, those compound-relations contain no additional information and should be skipped. - -\subsubsection*{Documents with glycans instead of compounds.} -Sometimes, KGML specifies glycans as reaction participants instead of compounds. Actually, there is nothing wrong with this, except that the KEGG API often returns reaction equations with compound identifiers and some attributes, such as chemical formula or molecular weight, are exclusively available for compounds. This leads to reactions that are erroneously detected as incorrect or to missing chemical formulas. Therefore, if a synonymous compound identifier is available for a KEGG glycan or another KEGG database identifier that contains synonyms in KEGG COMPOUND, it is advisable to fetch and internally work with the compound identifier. Otherwise, it is very likely that duplicates of the same entries but with different identifiers are created in a model and some relationships are not correctly resolved. - - -\subsection*{Implementation and availability} -All described methods are implemented in the second release of KEGGtranslator (since version 2.0). -% -The application uses and includes Paxtools, a Java\texttrademark{} library for working with BioPAX that facilitates building and writing the internal BioPAX data structure (\url{http://www.biopax.org/paxtools.php}). -% -To establish the SBML data structure, KEGGtranslator uses the Java\texttrademark{} library JSBML \cite{JSBML} and supports SBML Level~2 Version~4 \cite{SBMLl2} and SBML Level~3 Version~1 \cite{SBMLl3}. - -KEGGtranslator is implemented in Java\texttrademark, provides an interactive, user-friendly and easy-to-use graphical user interface (GUI), and is freely available under the LGPL version 3 license from \url{http://www.cogsys.cs.uni-tuebingen.de/software/KEGGtranslator/}. KGML pathways can be downloaded automatically from within KEGGtranslator. The application can convert KEGG pathways from KGML files to BioPAX Level~2, BioPAX Level~3, SBML (core), SBML (qual), or SBML-core and -qual in one model. If desired, graphical representations can be created in SBGN, SIF, GML, GraphML, JPG and some other formats. Furthermore, many options are provided that control the described (pre-) processing of KEGG conversions and allow for customization of the generated models to meet a great number of different requirements. - - -\section*{Discussion} - -We successfully established a procedure to create initial structural systems biology models from KEGG pathways. These steps aim at complete reconstruction of specific metabolic or signaling networks and hence, go far beyond simple translations. - -But even with all the discussed enhancements and corrections, all models derived from KEGG should only be considered as initial structural models. Many researchers are interested, e.g., in tissue-specific variants of those models. Others want to build kinetic models, constraint-based models, flux-based models, or any other specific model variant. Hence, our goal is to build a solid foundation that can quickly be used for further applications. The generation of these models is eased by providing cross-references to many databases, synonyms, descriptions and other information. This helps researchers to further process the generated models to the desired real model. With the help of annotated cross-references, it is quite easy to, e.g., map experimental data on the resulting model and perform simulations, or use the annotated reactions to identify kinetics in databases like SABIO-RK \cite{SabioRK}. - -The models reflect an effort to use all available information about KEGG pathways and consider the specific aspects of SBML or BioPAX to create complete and correct documents. These specific aspects include, for example, usage of SBO terms and MIRIAM URNs for metabolic SBML, as well as using \transitions{} and \qualitativeSpecies{} from the qual package to model signaling networks. For BioPAX, it is important to create correct instances, use cross-references and vocabularies for annotation and fill corresponding fields, e.g., chemical formula or molecular weight of \SmallMolecules{} or the EC numbers of catalyzed \BiochemicalReactions. -% -But besides those properties, there are more aspects of these formats that cannot be satisfied. This is owed to missing information and the aspiration to avoid creating knowledge out of nothing. -% -In SBML, the signaling maps contain \transitions{} that model all relations with information like `phosphorylation' or similar. The qualitative function of \transitions{} is encoded by \functionTerms{}, which define results and conditions in MathML. The information to fill those variables is not available for the KEGG pathways and thus, cannot be given. -% -Further, BioPAX Level~3 provides very interesting constructs to encode several instances of the same protein. For example, one protein might be contained in a pathway in multiple states: inactive (e.g., unphosphorylated), and active (phosphorylated). Since Level~3, BioPAX provides \EntityReferences{} that allow for the creation of several entities in different states for a single \Entity{} instance (i.e., protein). Unfortunately, we cannot fully use these structures, because KEGG does not specify wether a protein takes part in a relation with its phosphorylated, raw or any other form. This distinction is simply not available in KEGG databases. -% -Furthermore, a central dogma of BioPAX is to have \Controller{} and \Controlled{} elements to describe various interactions. For example, a \Controller{} could be an enzyme, controlling a reaction, which is then the \Controlled{} object. But if, e.g., KEGG annotates no enzyme on a reaction, or a relation is translated without knowing who controls this relation, no \Controller{} can be specified. -% -% -% -Besides this, KEGG does not provide information about compartmentalization. Some KEGG graphics do contain illustrations of compartments, but this information is hand-drawn in some pathway pictures and not encoded in any XML or referenced database. Hence, the resulting models just contain a default compartment in which all elements reside. - - - - -\subsection*{Comparison to other KEGG converters} - -There are some other approaches to convert KGML to SBML or BioPAX. Most of these approaches perform simple one-to-one conversions and do not augment or correct the content of the document. For visualizing a pathway model, this is not necessarily a problem, because there are almost no required processing steps, despite the actual format conversion. But for creating initial systems biology models, one should take care of all contained reactions and relations. Some important aspects are, for example, that one reaction really is one complete reaction, that all entities can be mapped computationally onto at least one database, and that the resulting document is valid. We created a list of various criteria to compare different conversion tools. Table~\ref{tab:AppVergleich} summarizes the result of this comparison. - -Besides the here described method, no referenced converter is able to build signaling networks. All converters focus on metabolic networks only. Before the release of the qualitative models extension for SBML Level~3, it was not possible to appropriately describe signaling networks in SBML. Because all referenced converters focus on SBML Level~1 or Level~2, it is correct that they do not convert signaling models. This is much more plausible than creating pseudo-\reactions{} or similar constructs. The BioPAX converters also focus on KEGG reactions. Generally, relations encoded in KEGG signaling maps seem to be completely ignored, which is incorrect, because BioPAX provides appropriate data structures to encode those relations. - -KEGGconverter \cite{KEGGconverter} is implemented in Java\texttrademark{} and able to translate KGML documents to SBML L2V1. The resulting \species{} (enzymes and small molecules) do not contain any annotations, notes, or SBO terms and are named with a human readable string containing KEGG identifiers in brackets. Thus, to computationally interpret those models and, e.g., map experimental data on them, one would need to reconstruct the KEGG identifier with a regular expression on the name. The conversion is complete (i.e., the complete KGML content is appropriately converted to SBML) and contains no duplicate entries or reactions. But reactions are directly converted as given: No unbundling of grouped reactions or augmenting of missing reactants is performed and the stoichiometry is not set. In our tests, the SBML validator complained that the generated SBML is not valid, because KEGGconverter uses spaces in identifiers which is not allowed in SBML. Besides the KGML conversion, KEGGconverter provides additional functionalities to add kinetics to the resulting models or merge different KGMLs to one model. - -KEGG2SBML \cite{KEGG2SBML} is a Perl script for converting KGML documents to valid SBML, supporting all Levels and Versions up to L2V3. This script uses various flat files from KEGG databases as additional resources and is capable of generating appropriate reactions (unbundled, no missing reactants and no duplicates). Unfortunately, the converted document is not complete (some reactions that should be contained in the pathway are missing), stoichiometry is omitted, and \species{} do not have any notes, annotations or SBO terms. All elements are named by their respective human-readable name, which is nice for manual inspections but renders the converted models barely usable for further subsequent modeling steps. JSim \cite{JSim}, a simulation system for quantitative SBML models, provides converted KEGG pathways for download. Those pathways have been created using KEGG2SBML and thus, the same properties apply for those files. - -BN++ \cite{Kuentzer2007} is an application that is not primarily designed for KEGG translations, but offers this functionality as a side-feature. According to its authors, the project is not maintained anymore and they are working on another project that may again support the translation of KGML files. Nevertheless, the available source code offers classes to convert KGML to SBML and BioPAX but we were not able to successfully compile and run their source code. However, BN++ has been used by the KEGG team to generate official BioPAX translations which are still downloadable from the official KEGG FTP and thus, represent a wide-spread used translation from KEGG to BioPAX. These BioPAX Level~2 files are only available for metabolic reference pathways and represent complete translations using appropriate BioPAX classes (e.g., \smallMolecule{} for small molecules and \protein{} for enzymes). All entities are nicely converted with cross-references to corresponding KEGG identifiers and no duplicate entities are created. KEGG database identifiers are also used as names for all entities, which makes the resulting models not directly interpretable to humans. Unfortunately, the conversion contains duplicate reactions, missing reactants are not augmented and there is no option to unbundle reactions. The stoichiometry is always set to one, which is not correct for many reactions. Furthermore the BioPAX fields for formula or molecular weight of small molecules are not used and the validator gives errors for `Cardinality violation' and `RDF Syntax errors'. - -KGML2BioPAX and KGML2SBML are two applications that are part of an ``ongoing effort to develop an ultimate KEGG-based pathway enrichment analysis system" \cite{KEGG2BioPAX_and_SBML}. Unfortunately, both the SBML and BioPAX conversions are not complete (some elements from the source document are missing), contain no revisions of the reactions, and the stoichiometry is erroneously always specified as one. But all elements use KEGG identifiers, which renders the models machine-interpretable and no reactions or entities are contained twice. The SBML Level~2 Version~4 documents are valid, but do not contain notes, annotations or SBO terms. The BioPAX Level~2 translations contain all KEGG entries as \proteins, which is not correct for small molecules or complexes, and contain no further annotations. The validator complains about errors in the RDF syntax and usage of ``unknown (or prohibited) class[es], not defined in the BioPAX specification". - -Despite these converters, there are even more possibilities to create SBML documents from KEGG pathways. A popular application is Cytoscape \cite{Cytoscape}, which provides KGMLReader (freely available at \url{http://code.google.com/p/kgmlreader/}), a plugin to read KGML documents, and BiNoM \cite{Zinovyev2008}, a plugin that can write SBML documents. -%These two plugins are not designed to match and neither is intended for building models from KEGG. -But the SBML code, that is generated by linking the results of both plugins, is not usable for further modeling steps. KGMLReader concentrates on graphical representations for Cytoscape and the resulting SBML export of BiNoM barley reflects the input file. It is obvious that the resulting SBML is merely a result of the graphical representation. Edges in the graph primarily connect metabolites with enzymes and each edge is encoded as an SBML reaction. This leads to reactions with small molecules as substrates and enzymes as products, which is clearly incorrect. No elements contain annotations and they are named with a consecutive number only. This renders those documents unusable for further modeling or simulation approaches. -Besides Cytoscape, there are many similar tools, e.g., PathVisio \cite{PathVisio}, Subio (\url{http://www.subio.jp}), or VANTED \cite{VANTED} that mainly focus on a graphical representation of the KGML files, most of which do not have SBML or BioPAX writers. Besides the graphical focus and missing writers, comparison to those tools is not reasonable because they are not thought to act as KEGG converters. - -The SuBliMinaL Toolbox \cite{SuBliMinaL} provides a very interesting alternative for metabolic modeling, based on KEGG data. SuBliMinaL does not provide KGML conversion and is thus not directly comparable to other converters. But it provides methods to reconstruct, e.g., whole organism maps from the KEGG database in an appropriate SBML document, which is well-annotated and contains complete and correct reactions. - - - -\subsection*{Conclusion} - -KEGG pathways are a valuable resource for pathway-based modeling approaches. Unfortunately, the KGML-formatted pathways are primarily designed for visualization purposes and not directly usable as metabolic or signaling models. Therefore, many aspects have to be revised and considered when converting the pathways to community standards such as BioPAX or SBML. This ranges from unbundling, correcting and annotating the stoichiometry of reactions, over using exclusively organism-specific and unique entities, to handling relations. With the help of additional information from multiple other KEGG databases, the resulting models provide correct and highly enriched structures that contain far more information than the original KGML. -% -The proposed method, including the qualitative models extension for SBML, is the first method that is able to generate signaling models in SBML or BioPAX from KEGG pathways. Currently, no other approach is able to generate complete pathway models with correct reactions, including stoichiometry and well-annotated SBML (i.e., including SBO terms or MIRIAM URNs) or valid BioPAX documents. - -% -All proposed methods are implemented in the KEGGtranslator application. The models, generated by KEGGtranslator with the here described method, lay the foundations for further modeling approaches, such as constraint-based models, tissue-specific models, or simply including kinetics to the models. All conversions obey the special requirements of SBML or BioPAX and include a huge amount of machine- and human-readable annotations. This facilitates the use of those models in other applications that perform further analysis, modeling or simulation steps on those. -% - - -% Do NOT remove this, even if you are not including acknowledgments -\section*{Acknowledgments} -We gratefully acknowledge very fruitful discussions with Nicloas Le Nov\`{e}re, Nicolas Rodriguez, Neil Swainston, Falk Schreiber, Roland Keller, Florian Mittag, Akira Funahashi, and Toshiaki Katayama.\newline - - -%\section*{References} -% The bibtex filename -%\bibliography{../2012-04_CMSB2012/KEGGtranslator_v2_methods} -\begin{thebibliography}{10} -\providecommand{\url}[1]{\texttt{#1}} -\providecommand{\urlprefix}{URL } -\expandafter\ifx\csname urlstyle\endcsname\relax - \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else - \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup - \urlstyle{rm}\Url}\fi -\providecommand{\bibAnnoteFile}[1]{% - \IfFileExists{#1}{\begin{quotation}\noindent\textsc{Key:} #1\\ - \textsc{Annotation:}\ \input{#1}\end{quotation}}{}} -\providecommand{\bibAnnote}[2]{% - \begin{quotation}\noindent\textsc{Key:} #1\\ - \textsc{Annotation:}\ #2\end{quotation}} -\providecommand{\eprint}[2][]{\url{#2}} - -\bibitem{Kanehisa2000} -Kanehisa M, Goto S (2000) {KEGG: Kyoto Encyclopedia of Genes and Genomes}. -\newblock Nucleic Acids Res 28: 27--30. -\bibAnnoteFile{Kanehisa2000} - -\bibitem{KEGG} -Kanehisa M, Goto S, Hattori M, Aoki-Kinoshita KF, Itoh M, et~al. (2006) From - genomics to chemical genomics: new developments in {KEGG}. -\newblock Nucleic Acids Res 34: D354--D357. -\bibAnnoteFile{KEGG} - -\bibitem{Bauer-Mehren2009} -Bauer-Mehren A, Furlong LI, Sanz F (2009) Pathway databases and tools for their - exploitation: benefits, current limitations and challenges. -\newblock Mol Syst Biol 5: 290. -\bibAnnoteFile{Bauer-Mehren2009} - -\bibitem{Oberhardt2009} -Oberhardt MA, Palsson B{\O}, Papin JA (2009) Applications of genome-scale - metabolic reconstructions. -\newblock Mol Syst Biol 5: 320. -\bibAnnoteFile{Oberhardt2009} - -\bibitem{Finney2003} -Finney A, Hucka M (2003) Systems biology markup language: Level 2 and beyond. -\newblock Biochem Soc Trans 31: 1472--1473. -\bibAnnoteFile{Finney2003} - -\bibitem{Demir2010_short} -Demir E, Cary MP, Paley S, Fukuda K, Lemer C, et~al. (2010) The {BioPAX} - community standard for pathway data sharing. -\newblock Nat Biotechnol 28: 935--942. -\bibAnnoteFile{Demir2010_short} - -\bibitem{Funahashi2008} -Funahashi A, Matsuoka Y, Jouraku A, Morohashi M, Kikuchi N, et~al. (2008) - {CellDesigner 3.5: A Versatile Modeling Tool for Biochemical Networks}. -\newblock Proceedings of the IEEE 96: 1254 -1265. -\bibAnnoteFile{Funahashi2008} - -\bibitem{Cytoscape} -Smoot ME, Ono K, Ruscheinski J, Wang PL, Ideker T (2011) Cytoscape 2.8: new - features for data integration and network visualization. -\newblock Bioinformatics 27: 431--432. -\bibAnnoteFile{Cytoscape} - -\bibitem{SBMLSqueezer} -Dr\"ager A, Hassis N, Supper J, Schr\"oder A, Zell A (2008) {SBMLsqueezer: a - CellDesigner plug-in to generate kinetic rate equations for biochemical - networks}. -\newblock BMC Systems Biology 2: 39. -\bibAnnoteFile{SBMLSqueezer} - -\bibitem{Fasimu} -Hoppe A, Hoffmann S, Gerasch A, Gille C, Holzh\"utter HG (2011) {FASIMU}: - flexible software for flux-balance computation series in large metabolic - networks. -\newblock BMC Bioinformatics 12: 28. -\bibAnnoteFile{Fasimu} - -\bibitem{KEGG2SBML} -Funahashi A, Jouraku A, Kitano H (2004) {Converting KEGG pathway database to - SBML}. -\newblock 8\textsuperscript{th} Annual International Conference on Research in - Computational Molecular Biology (RECOMB) . -\bibAnnoteFile{KEGG2SBML} - -\bibitem{Kuentzer2007} -K\"untzer J, Backes C, Blum T, Gerasch A, Kaufmann M, et~al. (2007) {BNDB -- - the Biochemical Network Database}. -\newblock BMC Bioinformatics 8: 367. -\bibAnnoteFile{Kuentzer2007} - -\bibitem{KEGGconverter} -Moutselos K, Kanaris I, Chatziioannou A, Maglogiannis I, Kolisis FN (2009) - {KEGGconverter: a tool for the \emph{in-silico} modelling of metabolic - networks of the KEGG Pathways database}. -\newblock BMC Bioinformatics 10: 324. -\bibAnnoteFile{KEGGconverter} - -\bibitem{KEGG2BioPAX_and_SBML} -Lee KE, Jang MH, Rhie A, Thong CT, Yang S, et~al. (2010) {Java DOM Parsers to - Convert KGML into SBML and BioPAX Common Exchange Formats}. -\newblock Genomics \& Informatics 8: 94-96. -\bibAnnoteFile{KEGG2BioPAX_and_SBML} - -\bibitem{Wrzodek2011} -Wrzodek C, Dr\"{a}ger A, Zell A (2011) {KEGGtranslator: visualizing and - converting the KEGG PATHWAY database to various formats}. -\newblock Bioinformatics 27: 2314--2315. -\bibAnnoteFile{Wrzodek2011} - -\bibitem{KGML} -{KEGG team} (2010) {KEGG Markup Language}. -\newblock Specification available from the {KEGG} homepage at - \url{http://www.kegg.jp/kegg/xml/docs/}. Accessed 2012, April 23. -\bibAnnoteFile{KGML} - -\bibitem{SBO} -Courtot M, Juty N, Kn\"{u}pfer C, Waltemath D, Zhukova A, et~al. (2011) - Controlled vocabularies and semantics in systems biology. -\newblock Mol Syst Biol 7: 543. -\bibAnnoteFile{SBO} - -\bibitem{Juty2012} -Juty N, {Le Nov\`{e}re} N, Laibe C (2012) {Identifiers.org and MIRIAM Registry: - community resources to provide persistent identification.} -\newblock Nucleic Acids Res 40: D580--D586. -\bibAnnoteFile{Juty2012} - -\bibitem{Novere2005} -{Le Nov\`{e}re} N, Finney A, Hucka M, Bhalla US, Campagne F, et~al. (2005) - Minimum information requested in the annotation of biochemical models - {(MIRIAM)}. -\newblock Nat Biotechnol 23: 1509--1515. -\bibAnnoteFile{Novere2005} - -\bibitem{SBMLgroups} -Hucka M (2009). -\newblock Groups proposal. -\newblock Specification available from - \url{http://sbml.org/Community/Wiki/SBML_Level_3_Proposals/Groups_Proposal_%282009-10%29}. - Accessed 2012, April 23. -\bibAnnoteFile{SBMLgroups} - -\bibitem{QualSpecification} -Berenguier D, Chaouiya C, Naldi A, Thieffry D, van Iersel MP (2011). -\newblock {Qualitative Models} (qual). -\newblock Specification available at - \url{http://sbml.org/Community/Wiki/SBML\_Level\_3\_Proposals/Qualitative\_Models}. - Accessed 2012, March 22. -\bibAnnoteFile{QualSpecification} - -\bibitem{JSBML} -Dr\"ager A, Rodriguez N, Dumousseau M, D\"orr A, Wrzodek C, et~al. (2011) - {JSBML: a flexible Java library for working with SBML}. -\newblock Bioinformatics 27: 2167--2168. -\bibAnnoteFile{JSBML} - -\bibitem{SBMLl2} -Hucka M, Hoops S, Keating S, {Le Nov\`{e}re} N, Sahle S, et~al. (2008). -\newblock {Systems Biology Markup Language (SBML) Level 2: Structures and - Facilities for Model Definitions}. -\newblock Specification available from Nature Precedings - \url{http://dx.doi.org/10.1038/npre.2008.2715.1}. Accessed 2012, March 22. -\bibAnnoteFile{SBMLl2} - -\bibitem{SBMLl3} -Hucka M, Bergmann FT, Hoops S, Keating S, Sahle S, et~al. (2010). -\newblock {The Systems Biology Markup Language (SBML): Language Specification - for Level 3 Version 1 Core}. -\newblock Specification available from Nature Precedings - \url{http://dx.doi.org/10.1038/npre.2010.4959.1}. Accessed 2012, March 22. -\bibAnnoteFile{SBMLl3} - -\bibitem{SabioRK} -Wittig U, Kania R, Golebiewski M, Rey M, Shi L, et~al. (2012) - {SABIO-RK}--database for biochemical reaction kinetics. -\newblock Nucleic Acids Res 40: D790--D796. -\bibAnnoteFile{SabioRK} - -\bibitem{JSim} -Raymond~GM BE, JB B (2003) {JSIM: Free software package for teaching - phyiological modeling and research}. -\newblock Exper Biol 280: p102. -\bibAnnoteFile{JSim} - -\bibitem{Zinovyev2008} -Zinovyev A, Viara E, Calzone L, Barillot E (2008) {BiNoM: a Cytoscape plugin - for manipulating and analyzing biological networks}. -\newblock Bioinformatics 24: 876--877. -\bibAnnoteFile{Zinovyev2008} - -\bibitem{PathVisio} -{van Iersel} MP, Kelder T, Pico AR, Hanspers K, Coort S, et~al. (2008) - Presenting and exploring biological pathways with {PathVisio}. -\newblock BMC Bioinformatics 9: 399. -\bibAnnoteFile{PathVisio} - -\bibitem{VANTED} -Klukas C, Schreiber F (2010) Integration of -omics data and networks for - biomedical research with {VANTED}. -\newblock J Integr Bioinform 7: 112. -\bibAnnoteFile{VANTED} - -\bibitem{SuBliMinaL} -Swainston N, Smallbone K, Mendes P, Kell D, Paton N (2011) The {SuBliMinaL - Toolbox}: automating steps in the reconstruction of metabolic networks. -\newblock J Integr Bioinform 8: 186. -\bibAnnoteFile{SuBliMinaL} - -\end{thebibliography} - - -\newpage -\section*{Figure Legends} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN FLOWCHART FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{figure*}[htbp] - \begin{center} -% \includegraphics[width=.7\textwidth]{Wrzodek_Fig1.tif} - \end{center} -\caption{Generation of systems biology models from KEGG pathways. The flowchart shows all major steps involved in the creation of initial systems biology models from KEGG pathways. The whole method requires two sources: a KGML-formatted KEGG pathway and access to other KEGG databases, e.g., via the KEGG API. The preprocessing steps, depicted on the top, involve mainly the removal of inappropriate nodes and processing of reactions. -An important step is the removal of duplicate entries. However, some further steps require information about these duplicates (e.g., when using the layout extension package for SBML) and thus, it is not always part of the preprocessing and may be performed at a later stage. -Depending on the desired output format, separate processing steps are executed that involve appropriate conversion and annotation of the initial model. -} -\label{fig:conversionScheme} -\end{figure*} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END FLOWCHART FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN 2BioPAX FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{figure*}[htb] - \begin{center} -% \includegraphics[width=1.5\columnwidth]{Wrzodek_Fig2.tif} - \end{center} -\caption{Simplified class structure and mapping from KGML to BioPAX. The figure shows the raw mapping of KGML to BioPAX class instances. The type attribute determines how each entry is translated (see Table~\ref{tab:KGMLentries2X}). Reactions that are catalyzed by enzymes are translated to \Catalysis{}, whereas non-catalyzed reactions are translated directly to \BiochemicalReactions. Relations are either translated to \Conversion{} or to \physicalInteraction{} in BioPAX Level~2 and \MolecularInteraction{} in Level~3 (see Table~\ref{tab:KGMLrelations2X}). To keep the clarity, the figure does not include the information that in BioPAX Level~2, \control{} and \conversion{} inherit from \physicalInteraction{}. -%Furthermore, a catalysis object contains another object that is controlled by the catalysis, which is for our purposes always a BiochemicalReaction. -Furthermore, a \Catalysis{} consists of two elements: a \Controller{} and a \Controlled{} element. For our purposes, \Controller{} is always an enzyme and \Controlled{} is a \BiochemicalReaction. -} -\label{fig:KGML2BioPAX} -\end{figure*} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END 2BioPAX FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%% BEGIN 2SBML FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{figure*}[htb] - \begin{center} -% \includegraphics[width=1.5\columnwidth]{Wrzodek_Fig3.tif} - \end{center} -\caption{Simplified class structure and mapping from KGML to SBML. This mapping includes the SBML qualitative models (qual) and groups extension packages. Most properties are encoded as attributes on the actual classes. Tables~\ref{tab:KGMLentries2X} and~\ref{tab:KGMLrelations2X} give further details about translation of entries and relations. SBML can only handle reactions, therefore SBML-qual is required to properly encode relations. This extension package requires its own model. Subsequently, the SBML-core \model{} and each \species{} have to be duplicated to obtain a \qualitativeModel{} including the translated relations. Furthermore, the groups extension package can be used for a proper encoding of groups in SBML.} -\label{fig:KGML2SBML} -\end{figure*} -%%%%%%%%%%%%%%%%%%%%%%%%%%%% END 2SBML FIGURE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\newpage -\section*{Tables} -%\begin{table}[!ht] -%\caption{ -%\bf{Table title}} -%\begin{tabular}{|c|c|c|} -%table information -%\end{tabular} -%\begin{flushleft}Table caption -%\end{flushleft} -%\label{tab:label} -% \end{table} - - -%\definecolor{tableShade}{HTML}{F1F5FA} -%\definecolor{tableShade2}{HTML}{ECF3FE} -\definecolor{tableShade2}{gray}{0.95} -%\rowcolors{1}{white}{tableShade2} -%\rowcolors{3}{white}{tableShade2} - -%%%%%%%%%%%%%%%%%%%%%%%% ENTRY TABLE %%%%%%%%%%%%%%%%%%%%%%%% -\begin{table}[htbp] -\centering - -\caption{\textbf{BioPAX instances and SBO terms corresponding to KGML entry types.}} -\label{tab:KGMLentries2X} - -{ -\input{Wrzodek_Tbl1} -} -\begin{flushleft} -This table depicts the conversion of KGML entries to BioPAX or SBML. The conversion depends on the KGML entry type attribute. For BioPAX, different class instances are initialized. Conversions to SBML always involve the creation of a \species{} with the given SBO term for each KGML entry. The KGML specification states that an entry of type `gene' ``is a gene product (mostly a protein)". Additionally, a `group' ``is a complex of gene products (mostly a protein complex)" \cite{KGML}. For compatibility with previous KGML versions, the deprecated type `genes' corresponds to `group' since KGML v0.6.1. Further, entries of type `reaction' are not listed in the table, but discussed in a separate section. -\end{flushleft} -\end{table} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%% RELATION TABLE %%%%%%%%%%%%%%%%%%%%%%%% -\setlength{\tabcolsep}{8pt} -\begin{table*}[htbp] -\caption{\textbf{BioPAX instances and SBO terms corresponding to KGML relation subtypes.}} -\label{tab:KGMLrelations2X} - -\setlength{\tabcolsep}{6.0pt} -\makebox[\textwidth]{ -\input{Wrzodek_Tbl2} -} - -\begin{flushleft} -This table shows how relations are handled during conversion to BioPAX or SBML. The conversion depends on the subtype of each relation. For each subtype, the corresponding BioPAX element, as well as SBO terms and GO terms are given. When converting to BioPAX, both terms are annotated as an instance of \InteractionVocabulary, whereas an SBML \transition{} has a field for the SBO term and both terms are additionally added as controlled vocabulary term on the \transition. -Please note that \physicalInteraction{} in BioPAX Level~2 corresponds to \MolecularInteraction{} in BioPAX Level~3. Furthermore, relations of type `compound' are treated differently, as described in a separate section of this publication. -\end{flushleft} -\end{table*} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - - -%%%%%%%%%%%%%%%%% Converter Comparison TABLE %%%%%%%%%%%%%%%%% -\newcolumntype{C}{>{\centering\arraybackslash}p{2cm}} -\begin{table*}[htbp] -\caption{\textbf{Comparison of different available converters for KEGG pathways.}} -\label{tab:AppVergleich} - -\setlength{\tabcolsep}{6.0pt} -\makebox[\textwidth]{ -\input{Wrzodek_Tbl3} -} -\begin{flushleft} -This table compares various applications that can convert KEGG pathways to BioPAX or SBML models. A checkmark (\checkmark) is given, if the corresponding converter completely fulfills all requirements, a circle ($\circ$) states that the requirements are only met partially or incorrectly and a minus (-) indicates features, which are not contained at all. `n/a' indicates that a criterion is not applicable to a converter. -A model is \emph{Machine interpretable} if entities in the model can directly be mapped to a database. The criterion \emph{Human interpretable} indicates that a model somehow assigns human readable names or gene symbols to entities. \emph{Signaling pathways} are supported if the converters can read and convert KEGG models with relations. A conversion is \emph{complete} if every relevant reaction of a KGML pathway also occurs in any form in the translated document. For visualization purposes, KGML files often contain multiple copies of entries or reactions. These \emph{duplicates} should be removed. The contained reactions are often \emph{bundled} (multiple reactions are summarized as one) or miss some reaction participants. \emph{Revision of reactions} refers to the completion of missing reaction participants. The \emph{stoichiometry} is not contained in KGML documents and must be parsed from reaction equations in the KEGG REACTION database. To test the validity of the models, we used the corresponding validators from \href{http://sbml.org/Facilities/Validator/}{SBML.org} and \href{http://www.biopax.org/biopax-validator/}{BioPAX.org}. A model is marked as \emph{valid}, if the validator does not return any errors. For SBML, we further inspect if the models contain \emph{SBO terms}. It is further recommended to include \emph{notes}, such as human readable descriptions, and \emph{annotations} (e.g., cross-references in form of CV terms, MIRIAM URNs, \Xrefs). Only for BioPAX, it is important to use the \emph{appropriate classes} (instances of \smallMolecule{} for small molecules and instances of \protein{} for proteins) and a nice feature to fill the available BioPAX fields for chemical formula or molecular weight of small molecules (\emph{SM annotations}). -\end{flushleft} -\end{table*} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\end{document} - diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig1.tif b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig1.tif deleted file mode 100644 index 920142c..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig1.tif and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig2.tif b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig2.tif deleted file mode 100644 index 273313c..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig2.tif and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig3.tif b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig3.tif deleted file mode 100644 index ee7a6e5..0000000 Binary files a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Fig3.tif and /dev/null differ diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl1.tex b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl1.tex deleted file mode 100644 index 7c11797..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl1.tex +++ /dev/null @@ -1,17 +0,0 @@ -\begin{tabular}{lll} -\toprule - -Entry type & BioPAX element & SBO term \\ -\midrule -\rowcolor{tableShade2} -compound & smallMolecule & 247 (simple chemical) \\ -enzyme & protein & 252 (polypeptide chain) \\ -\rowcolor{tableShade2} -gene & protein & 252 (polypeptide chain) \\ -ortholog & protein & 252 (polypeptide chain) \\ -\rowcolor{tableShade2} -group & complex & 253 (non-covalent complex) \\ -map & pathway & 552 (reference annotation) \\ - -\bottomrule -\end{tabular} \ No newline at end of file diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl2.tex b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl2.tex deleted file mode 100644 index f1c60ae..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl2.tex +++ /dev/null @@ -1,38 +0,0 @@ -\begin{tabular}{llllll} -\toprule -Relation subtype & BioPAX element & SBO term & SBO name & GO term & GO name\\ -\midrule - -\rowcolor{tableShade2} -activation & conversion & SBO:0000170 & stimulation & \emph{none} & \\ -inhibition & conversion & SBO:0000169 & inhibition & \emph{none} & \\ -\rowcolor{tableShade2} -expression & conversion & SBO:0000170 & stimulation & GO:0010467 & gene expression \\ -repression & conversion & SBO:0000169 & inhibition & \emph{none} &\\ -\rowcolor{tableShade2} -indirect effect & conversion & SBO:0000344 & molecular interaction & \emph{none} & \\ -state change & conversion & SBO:0000168 & control & \emph{none} & \\ - -\rowcolor{tableShade2} - & physicalInteraction/ & & non-covalent & & non-covalent \\ -\rowcolor{tableShade2} -\multirow{-2}{*}{binding/association} & MolecularInteraction & \multirow{-2}{*}{SBO:0000177} & binding & \multirow{-2}{*}{GO:0005488} & binding \\ - - & physicalInteraction/ & & & \\ -\multirow{-2}{*}{dissociation} & MolecularInteraction & \multirow{-2}{*}{SBO:0000180} & \multirow{-2}{*}{dissociation} & \multirow{-2}{*}{\emph{none}} &\\ - -\rowcolor{tableShade2} - & physicalInteraction/ & & & & \\ -\rowcolor{tableShade2} -\multirow{-2}{*}{missing interaction} & MolecularInteraction & \multirow{-2}{*}{SBO:0000396} & \multirow{-2}{*}{uncertain process} & \multirow{-2}{*}{\emph{none}} & \\ - -phosphorylation & conversion & SBO:0000216 & phosphorylation & GO:0016310 & phosphorylation \\ -\rowcolor{tableShade2} -dephosphorylation & conversion & SBO:0000330 & dephosphorylation & GO:0016311 & dephosphorylation \\ -glycosylation & conversion & SBO:0000217 & glycosylation & GO:0070085 & glycosylation \\ -\rowcolor{tableShade2} -ubiquitination & conversion & SBO:0000224 & ubiquitination & GO:0016567 & ubiquitination \\ -methylation & conversion & SBO:0000214 & methylation & GO:0032259 & methylation \\ - -\bottomrule -\end{tabular} \ No newline at end of file diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl3.tex b/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl3.tex deleted file mode 100644 index 01d3fe5..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/Wrzodek_Tbl3.tex +++ /dev/null @@ -1,53 +0,0 @@ -\begin{tabular}{lcccCcc} -\toprule - & KEGG2SBML & BN++ & KEGGconverter & KGML2BioPAX KGML2SBML & \multicolumn{2}{c}{KEGGtranslator} \\ -Version & 1.5.0 & 1.1 & n/a & n/a & 1.2 & 2.0 \\ -Release date & 2008-07-28 & 2009-04-22 & 2009-12-18 & 2010-06-03 & 2011-07-04 & 2012-06-04 \\ -Authors & Funahashi \emph{et al.} & K\"untzer \emph{et al.} & Moutselos \emph{et al.} & Lee \emph{et al.} & \multicolumn{2}{c}{Wrzodek \emph{et al.}} \\ -\midrule - -\multicolumn{7}{l}{\textbf{Supported model formats}} \\ - -\rowcolor{tableShade2} -~~SBML & \checkmark & $\circ$ & \checkmark & \checkmark & \checkmark & \checkmark \\ -~~BioPAX & - & \checkmark & - & \checkmark & - & \checkmark \\ - -\multicolumn{7}{l}{\textbf{Generic translation features}} \\ -\rowcolor{tableShade2} -~~Machine interpretable & $\circ$ & \checkmark & $\circ$ & \checkmark & \checkmark & \checkmark \\ -~~Human interpretable & \checkmark & - & \checkmark & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Signaling pathways & - & - & - & - & - & \checkmark \\ -~~Complete & - & \checkmark & \checkmark & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~No duplicate entries & \checkmark & \checkmark & \checkmark & \checkmark & - & \checkmark \\ -~~No duplicate reactions & \checkmark & - & \checkmark & \checkmark & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Unbundle reactions & \checkmark & - & - & - & - & \checkmark \\ -~~Revision of reactions & \checkmark & - & - & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Stoichiometry & - & - & - & - & - & \checkmark \\ - -\multicolumn{7}{l}{\textbf{SBML}} \\ -\rowcolor{tableShade2} -~~Valid & \checkmark & n/a & - & \checkmark & \checkmark & \checkmark \\ -~~Level.Version & 1.1 up to 2.3 & n/a & 2.1 & 2.4 & 2.4 & 2.4, 3.1 \\ -\rowcolor{tableShade2} -~~SBO terms & - & n/a & - & - & \checkmark & \checkmark \\ -~~Notes & - & n/a & - & - & \checkmark & \checkmark \\ -\rowcolor{tableShade2} -~~Annotations & - & n/a & - & - & \checkmark & \checkmark \\ - -\multicolumn{7}{l}{\textbf{BioPAX}} \\ -\rowcolor{tableShade2} -~~Valid & n/a & - & n/a & - & n/a & \checkmark \\ -~~Level & n/a &2& n/a & 2 & n/a & 2, 3 \\ -\rowcolor{tableShade2} -~~Appropriate classes & n/a & \checkmark & n/a & - & n/a & \checkmark \\ -~~Notes & n/a & - & n/a & - & n/a & \checkmark \\ -\rowcolor{tableShade2} -~~Annotations & n/a & \checkmark & n/a & - & n/a & \checkmark \\ -~~SM annotations & n/a & - & n/a & - & n/a & \checkmark \\ - -\bottomrule -\end{tabular} \ No newline at end of file diff --git a/doc/publications/2010-06 Method/2012-06 PLoS/plos2009.bst b/doc/publications/2010-06 Method/2012-06 PLoS/plos2009.bst deleted file mode 100644 index 122acf5..0000000 --- a/doc/publications/2010-06 Method/2012-06 PLoS/plos2009.bst +++ /dev/null @@ -1,1330 +0,0 @@ -%% -%% This is file `PLoS.bst', -%% generated with the docstrip utility. -%% -%% The original source files were: -%% -%% merlin.mbs (with options: `annote,seq-no,nm-rvx,ed-rev,jnrlst,nmlm,x5,m5,dt-beg,yr-par,xmth,yrp-x,jxper,jttl-rm,vnum-x,pp-last,num-xser,jnm-x,btit-rm,bt-rm,pg-bk,add-pub,pre-pub,doi,in-col,pp,xedn,jabr,xand,eprint,url,url-blk,nfss,') -%% ---------------------------------------- -%% *** This works for PLoS (as of October. 2008) *** -%% **** Updated Oct. 2008 by JZR -%% -%% Copyright 1994-2004 Patrick W Daly - % =============================================================== - % IMPORTANT NOTICE: - % This bibliographic style (bst) file has been generated from one or - % more master bibliographic style (mbs) files, listed above. - % - % This generated file can be redistributed and/or modified under the terms - % of the LaTeX Project Public License Distributed from CTAN - % archives in directory macros/latex/base/lppl.txt; either - % version 1 of the License, or any later version. - % =============================================================== - % Name and version information of the main mbs file: - % \ProvidesFile{merlin.mbs}[2004/02/09 4.13 (PWD, AO, DPC)] - % For use with BibTeX version 0.99a or later - %------------------------------------------------------------------- - % This bibliography style file is intended for texts in ENGLISH - % This is a numerical citation style, and as such is standard LaTeX. - % It requires no extra package to interface to the main text. - % The form of the \bibitem entries is - % \bibitem{key}... - % Usage of \cite is as follows: - % \cite{key} ==>> [#] - % \cite[chap. 2]{key} ==>> [#, chap. 2] - % where # is a number determined by the ordering in the reference list. - % The order in the reference list is that by which the works were originally - % cited in the text, or that in the database. - %--------------------------------------------------------------------- - -ENTRY - { address - annote - archive - author - booktitle - chapter - doi - edition - editor - eid - eprint - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - url - volume - year - } - {} - { label } -INTEGERS { output.state before.all mid.sentence after.sentence after.block } -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} -STRINGS { s t} -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ - annote missing$ - { "\bibAnnoteFile{" cite$ * "}" * write$ newline$ } - { "\bibAnnote{" cite$ * "}{" * annote * "}" * write$ newline$ - } - if$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} -FUNCTION {add.blank} -{ " " * before.all 'output.state := -} - -FUNCTION {date.block} -{ - add.blank -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} -STRINGS {z} -FUNCTION {remove.dots} -{ 'z := - "" - { z empty$ not } - { z #1 #1 substring$ - z #2 global.max$ substring$ 'z := - duplicate$ "." = 'pop$ - { * } - if$ - } - while$ -} -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "\emph{" swap$ * "}" * } - if$ -} -FUNCTION {tie.or.space.prefix} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ -} - -FUNCTION {capitalize} -{ "u" change.case$ "t" change.case$ } - -FUNCTION {space.word} -{ " " swap$ * " " * } - % Here are the language-specific definitions for explicit words. - % Each function has a name bbl.xxx where xxx is the English word. - % The language selected here is ENGLISH -FUNCTION {bbl.and} -{ "and"} - -FUNCTION {bbl.etal} -{ "et~al." } - -FUNCTION {bbl.editors} -{ "editors" } - -FUNCTION {bbl.editor} -{ "editor" } - -FUNCTION {bbl.edby} -{ "edited by" } - -FUNCTION {bbl.edition} -{ "edition" } - -FUNCTION {bbl.volume} -{ "volume" } - -FUNCTION {bbl.of} -{ "of" } - -FUNCTION {bbl.number} -{ "number" } - -FUNCTION {bbl.nr} -{ "no." } - -FUNCTION {bbl.in} -{ "in" } - -FUNCTION {bbl.pages} -{ "pp." } - -FUNCTION {bbl.page} -{ "p." } - -FUNCTION {bbl.chapter} -{ "chapter" } - -FUNCTION {bbl.techrep} -{ "Technical Report" } - -FUNCTION {bbl.mthesis} -{ "Master's thesis" } - -FUNCTION {bbl.phdthesis} -{ "Ph.D. thesis" } - -MACRO {jan} {"January"} - -MACRO {feb} {"February"} - -MACRO {mar} {"March"} - -MACRO {apr} {"April"} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"August"} - -MACRO {sep} {"September"} - -MACRO {oct} {"October"} - -MACRO {nov} {"November"} - -MACRO {dec} {"December"} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Software Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput. Aid. Des."} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Program."} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Graphic."} - -MACRO {toms} {"ACM Trans. Math. Software"} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Progr. Lang. Syst."} - -MACRO {tcs} {"Theor. Comput. Sci."} - -FUNCTION {bibinfo.check} -{ swap$ - duplicate$ missing$ - { - pop$ pop$ - "" - } - { duplicate$ empty$ - { - swap$ pop$ - } - { swap$ - pop$ - } - if$ - } - if$ -} -FUNCTION {bibinfo.warn} -{ swap$ - duplicate$ missing$ - { - swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ - "" - } - { duplicate$ empty$ - { - swap$ "empty " swap$ * " in " * cite$ * warning$ - } - { swap$ - pop$ - } - if$ - } - if$ -} -FUNCTION {format.eprint} -{ eprint duplicate$ empty$ - 'skip$ - { "\eprint" - archive empty$ - 'skip$ - { "[" * archive * "]" * } - if$ - "{" * swap$ * "}" * - } - if$ -} -FUNCTION {format.url} -{ url empty$ - { "" } - { "\urlprefix\url{" url * "}" * } - if$ -} - -STRINGS { bibinfo} -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 'bibinfo := - duplicate$ empty$ 'skip$ { - 's := - "" 't := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{vv~}{ll}{ f{}}{ jj}" - format.name$ - remove.dots - bibinfo bibinfo.check - 't := - nameptr #1 > - { - nameptr #5 - #1 + = - numnames #5 - > and - { "others" 't := - #1 'namesleft := } - 'skip$ - if$ - namesleft #1 > - { ", " * t * } - { - "," * - s nameptr "{ll}" format.name$ duplicate$ "others" = - { 't := } - { pop$ } - if$ - t "others" = - { - " " * bbl.etal * - } - { " " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ - } if$ -} -FUNCTION {format.names.ed} -{ - format.names -} -FUNCTION {format.authors} -{ author "author" format.names -} -FUNCTION {get.bbl.editor} -{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } - -FUNCTION {format.editors} -{ editor "editor" format.names duplicate$ empty$ 'skip$ - { - "," * - " " * - get.bbl.editor - * - } - if$ -} -FUNCTION {format.book.pages} -{ pages "pages" bibinfo.check - duplicate$ empty$ 'skip$ - { " " * bbl.pages * } - if$ -} -FUNCTION {format.doi} -{ doi "doi" bibinfo.check - duplicate$ empty$ 'skip$ - { - new.block - "\doi{" swap$ * "}" * - } - if$ -} -FUNCTION {format.note} -{ - note empty$ - { "" } - { note #1 #1 substring$ - duplicate$ "{" = - 'skip$ - { output.state mid.sentence = - { "l" } - { "u" } - if$ - change.case$ - } - if$ - note #2 global.max$ substring$ * "note" bibinfo.check - } - if$ -} - -FUNCTION {format.title} -{ title - duplicate$ empty$ 'skip$ - { "t" change.case$ } - if$ - "title" bibinfo.check -} -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {n.dashify} -{ - 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "-" = not - %{ "--" * - { "-" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {word.in} -{ bbl.in capitalize - ":" * - " " * } - -FUNCTION {format.date} -{ - "" - duplicate$ empty$ - year "year" bibinfo.check duplicate$ empty$ - { swap$ 'skip$ - { "there's a month but no year in " cite$ * warning$ } - if$ - * - } - { swap$ 'skip$ - { - swap$ - " " * swap$ - } - if$ - * - } - if$ - duplicate$ empty$ - 'skip$ - { - before.all 'output.state := - " (" swap$ * ")" * - } - if$ -} -FUNCTION {format.btitle} -{ title "title" bibinfo.check - duplicate$ empty$ 'skip$ - { - } - if$ -} -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { bbl.volume volume tie.or.space.prefix - "volume" bibinfo.check * * - series "series" bibinfo.check - duplicate$ empty$ 'pop$ - { swap$ bbl.of space.word * swap$ - emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { series empty$ - { number "number" bibinfo.check } - { output.state mid.sentence = - { bbl.number } - { bbl.number capitalize } - if$ - number tie.or.space.prefix "number" bibinfo.check * * - bbl.in space.word * - series "series" bibinfo.check * - } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition duplicate$ empty$ 'skip$ - { - output.state mid.sentence = - { "l" } - { "t" } - if$ change.case$ - "edition" bibinfo.check - " " * bbl.edition * - } - if$ -} -INTEGERS { multiresult } -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} -FUNCTION {format.pages} -{ pages duplicate$ empty$ 'skip$ - { duplicate$ multi.page.check - { - bbl.pages swap$ - n.dashify - } - { - bbl.page swap$ - } - if$ - tie.or.space.prefix - "pages" bibinfo.check - * * - } - if$ -} -FUNCTION {format.journal.pages} -{ pages duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ - { pop$ pop$ format.pages } - { - ": " * - swap$ - n.dashify - "pages" bibinfo.check - * - } - if$ - } - if$ -} -FUNCTION {format.journal.eid} -{ eid "eid" bibinfo.check - duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ 'skip$ - { - ": " * - } - if$ - swap$ * - } - if$ -} -FUNCTION {format.vol.num.pages} -{ volume field.or.null - duplicate$ empty$ 'skip$ - { - "volume" bibinfo.check - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - { "" } - { type empty$ - { bbl.chapter } - { type "l" change.case$ - "type" bibinfo.check - } - if$ - chapter tie.or.space.prefix - "chapter" bibinfo.check - * * - } - if$ -} - -FUNCTION {format.booktitle} -{ - booktitle "booktitle" bibinfo.check -} -FUNCTION {format.in.ed.booktitle} -{ format.booktitle duplicate$ empty$ 'skip$ - { - editor "editor" format.names.ed duplicate$ empty$ 'pop$ - { - "," * - " " * - get.bbl.editor - ", " * - * swap$ - * } - if$ - word.in swap$ * - } - if$ -} -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} -FUNCTION {format.thesis.type} -{ type duplicate$ empty$ - 'pop$ - { swap$ pop$ - "t" change.case$ "type" bibinfo.check - } - if$ -} -FUNCTION {format.tr.number} -{ number "number" bibinfo.check - type duplicate$ empty$ - { pop$ bbl.techrep } - 'skip$ - if$ - "type" bibinfo.check - swap$ duplicate$ empty$ - { pop$ "t" change.case$ } - { tie.or.space.prefix * * } - if$ -} -FUNCTION {format.article.crossref} -{ - key duplicate$ empty$ - { pop$ - journal duplicate$ empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * warning$ } - { "journal" bibinfo.check emphasize word.in swap$ * } - if$ - } - { word.in swap$ * " " *} - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - editor num.names$ duplicate$ - #2 > - { pop$ - "editor" bibinfo.check - " " * bbl.etal - * - } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { - "editor" bibinfo.check - " " * bbl.etal - * - } - { - bbl.and space.word - * editor #2 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - * - } - if$ - } - if$ - } - if$ -} -FUNCTION {format.book.crossref} -{ volume duplicate$ empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - pop$ word.in - } - { bbl.volume - capitalize - swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { series emphasize * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.incoll.inproc.crossref} -{ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { format.booktitle duplicate$ empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - } - { word.in swap$ * } - if$ - } - { word.in key * " " *} - if$ - } - { word.in format.crossref.editor * " " *} - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.org.or.pub} -{ 't := - "" - address empty$ t empty$ and - 'skip$ - { - address "address" bibinfo.check * - t empty$ - 'skip$ - { address empty$ - 'skip$ - { ": " * } - if$ - t * - } - if$ - } - if$ -} -FUNCTION {format.publisher.address} -{ publisher "publisher" bibinfo.warn format.org.or.pub -} - -FUNCTION {format.organization.address} -{ organization "organization" bibinfo.check format.org.or.pub -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.title "title" output.check - new.block - crossref missing$ - { - journal - remove.dots - "journal" bibinfo.check - "journal" output.check - add.blank - format.vol.num.pages output - } - { format.article.crossref output.nonnull - } - if$ - eid empty$ - { format.journal.pages } - { format.journal.eid } - if$ -% format.doi output -% new.block -% format.url output -% new.block -% format.note output -% format.eprint output - fin.entry -} -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - format.date "year" output.check - date.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - format.publisher.address output - } - { - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.book.pages output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} -FUNCTION {booklet} -{ output.bibitem - format.authors output - format.date output - date.block - format.title "title" output.check - new.block - howpublished "howpublished" bibinfo.check output - address "address" bibinfo.check output - format.book.pages output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - format.date "year" output.check - date.block - format.btitle "title" output.check - crossref missing$ - { - format.publisher.address output - format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - } - { - format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.pages "pages" output.check - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.publisher.address output - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - format.edition output - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - format.pages "pages" output.check - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - new.sentence - publisher empty$ - { format.organization.address output } - { organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - format.bvolume output - format.number.series output - } - { format.incoll.inproc.crossref output.nonnull - } - if$ - format.pages "pages" output.check - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} -FUNCTION {conference} { inproceedings } -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization "organization" bibinfo.check - duplicate$ empty$ 'pop$ - { output - address "address" bibinfo.check output - } - if$ - } - { format.authors output.nonnull } - if$ - format.date output - date.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { - address new.block.checka - address "address" bibinfo.check output - } - 'skip$ - if$ - } - { - organization address new.block.checkb - organization "organization" bibinfo.check output - address "address" bibinfo.check output - } - if$ - format.edition output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.btitle - "title" output.check - new.block - bbl.mthesis format.thesis.type output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - format.date output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished "howpublished" bibinfo.check output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry - empty.misc.check -} -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.btitle - "title" output.check - new.block - bbl.phdthesis format.thesis.type output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization "organization" bibinfo.check output - } - { format.editors output.nonnull } - if$ - format.date "year" output.check - date.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - editor empty$ - { publisher empty$ - 'skip$ - { - new.sentence - format.publisher.address output - } - if$ - } - { publisher empty$ - { - new.sentence - format.organization.address output } - { - new.sentence - organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - } - if$ - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - format.date "year" output.check - date.block - format.title - "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" bibinfo.warn output - address "address" bibinfo.check output - format.doi output - new.block - format.url output - new.block - format.note output - format.eprint output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - format.date output - date.block - format.title "title" output.check - format.doi output - new.block - format.url output - new.block - format.note "note" output.check - format.eprint output - fin.entry -} - -FUNCTION {default.type} { misc } -READ -STRINGS { longest.label } -INTEGERS { number.label longest.label.width } -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} -EXECUTE {initialize.longest.label} -ITERATE {longest.label.pass} -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * "}" * - write$ newline$ - "\providecommand{\url}[1]{\texttt{#1}}" - write$ newline$ - "\providecommand{\urlprefix}{URL }" - write$ newline$ - "\expandafter\ifx\csname urlstyle\endcsname\relax" - write$ newline$ - " \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else" - write$ newline$ - " \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup \urlstyle{rm}\Url}\fi" - write$ newline$ - "\providecommand{\bibAnnoteFile}[1]{%" - write$ newline$ - " \IfFileExists{#1}{\begin{quotation}\noindent\textsc{Key:} #1\\" - write$ newline$ - " \textsc{Annotation:}\ \input{#1}\end{quotation}}{}}" - write$ newline$ - "\providecommand{\bibAnnote}[2]{%" - write$ newline$ - " \begin{quotation}\noindent\textsc{Key:} #1\\" - write$ newline$ - " \textsc{Annotation:}\ #2\end{quotation}}" - write$ newline$ - "\providecommand{\eprint}[2][]{\url{#2}}" - write$ newline$ -} -EXECUTE {begin.bib} -EXECUTE {init.state.consts} -ITERATE {call.type$} -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} -EXECUTE {end.bib} -%% End of customized bst file -%% -%% End of file `PLoS.bst'. diff --git a/lib/keggapi.jar b/lib/keggapi.jar deleted file mode 100755 index a87db30..0000000 Binary files a/lib/keggapi.jar and /dev/null differ diff --git a/src/de/zbit/kegg/io/KEGG2BioPAX.java b/src/de/zbit/kegg/io/KEGG2BioPAX.java index 148e89a..4d229ad 100644 --- a/src/de/zbit/kegg/io/KEGG2BioPAX.java +++ b/src/de/zbit/kegg/io/KEGG2BioPAX.java @@ -343,7 +343,7 @@ public BioPAXElement createBioSource(Pathway p) { String taxonID=""; // Get from KEGG API - KeggInfos orgInfos = KeggInfos.get("GN:" + p.getOrg(), manager); // Retrieve all organism information via KeggAdaptor + KeggInfos orgInfos = KeggInfos.get("gn:" + p.getOrg(), manager); // Retrieve all organism information via KeggAdaptor if (orgInfos.queryWasSuccessfull()) { speciesString = orgInfos.getDefinition(); taxonID = orgInfos.getTaxonomy().trim().replaceAll("\\s.*", ""); diff --git a/src/de/zbit/kegg/io/KEGG2SBMLLayoutExtension.java b/src/de/zbit/kegg/io/KEGG2SBMLLayoutExtension.java index 70c0bd1..69aa43c 100644 --- a/src/de/zbit/kegg/io/KEGG2SBMLLayoutExtension.java +++ b/src/de/zbit/kegg/io/KEGG2SBMLLayoutExtension.java @@ -346,14 +346,15 @@ public static void addLayoutExtension(Pathway p, SBMLDocument doc, Model model, /** - * + * Returnes the next available (unsed) id of a layout, beginning with + * "layout", "layout2", "layout3",... * @param layout * @param layoutModel - * @return + * @return unused layout identifier */ private static String createUniqueLayoutId(Layout layout, ExtendedLayoutModel layoutModel) { String idPrefix = "layout"; - String id = "layout"; + String id = idPrefix; ListOf lol = layoutModel.getListOfLayouts(); if (lol == null) { diff --git a/src/de/zbit/kegg/io/KEGG2jSBML.java b/src/de/zbit/kegg/io/KEGG2jSBML.java index 863ec4f..f7a62fd 100644 --- a/src/de/zbit/kegg/io/KEGG2jSBML.java +++ b/src/de/zbit/kegg/io/KEGG2jSBML.java @@ -431,7 +431,7 @@ protected SBMLDocument translateWithoutPreprocessing(Pathway p) { // Retrieve further information via Kegg Adaptor boolean titleAdded = false; if (p.isSetOrg()) { - KeggInfos orgInfos = KeggInfos.get("GN:" + p.getOrg(), manager); // Retrieve all organism information via KeggAdaptor + KeggInfos orgInfos = KeggInfos.get("gn:" + p.getOrg(), manager); // Retrieve all organism information via KeggAdaptor if (orgInfos.queryWasSuccessfull()) { CVTerm mtOrgID = DatabaseIdentifierTools.getCVTerm(IdentifierDatabases.NCBI_Taxonomy, null, orgInfos.getTaxonomy().split("\\s")); if (mtOrgID.getResourceCount() > 0) {