From 727b58c2d491c202bc94f554a95b20aafc4f3d3f Mon Sep 17 00:00:00 2001 From: Clemens Wrzodek Date: Tue, 30 Oct 2012 14:12:50 +0000 Subject: [PATCH] Last commit prior to working on Methods-Paper Revision. - Added a invisible path2models option - Shortened the BioPAX display name - Fixed a bug that potentially causes spaces in BioPAX IDs (which is not allowed) - Fixed the Controlled Vocabulary of BioPAX interactions to include childs of the MI:0190 branch - Added MI-terms and getMITerm() methods and classes, e.g., in SBOMapping.java --- src/de/zbit/kegg/Translator.java | 56 ++++++++++++++-- .../zbit/kegg/io/AbstractKEGGtranslator.java | 22 ++++-- src/de/zbit/kegg/io/KEGG2BioPAX.java | 67 +++++++++++++------ src/de/zbit/kegg/io/KEGG2BioPAX_level3.java | 23 +++++-- src/de/zbit/kegg/io/KEGG2jSBML.java | 2 +- src/de/zbit/kegg/io/SBOMapping.java | 57 ++++++++++++++++ 6 files changed, 189 insertions(+), 38 deletions(-) diff --git a/src/de/zbit/kegg/Translator.java b/src/de/zbit/kegg/Translator.java index 1696a0f..65d0618 100644 --- a/src/de/zbit/kegg/Translator.java +++ b/src/de/zbit/kegg/Translator.java @@ -35,6 +35,7 @@ import de.zbit.Launcher; import de.zbit.gui.GUIOptions; import de.zbit.io.FileTools; +import de.zbit.kegg.api.KeggInfos; import de.zbit.kegg.api.cache.KeggFunctionManagement; import de.zbit.kegg.api.cache.KeggInfoManagement; import de.zbit.kegg.ext.KEGGTranslatorPanelOptions; @@ -74,6 +75,13 @@ public class Translator extends Launcher { * like the browser cache). Must be loaded upon start and saved upon exit. */ public final static String cacheFunctionFileName = "keggfc.dat"; + + /** + * Adjusts a few methods in KEGGtranslator to generate an ouput for + * the path2models project if true. + *

PLESE ALWAYS KEEP THE DEFAULT, INITIAL VALUE TO FALSE! + */ + public static boolean path2models = false; /** * The {@link Logger} for this class. @@ -333,14 +341,9 @@ public void commandLineMode(AppConf appConf) { // Maybe adjust for path2models SBPreferences prefs = SBPreferences.getPreferencesFor(KEGGtranslatorCommandLineOnlyOptions.class); if (KEGGtranslatorCommandLineOnlyOptions.PATH2MODELS.getValue(props)) { - /* TODO: - * 1) Set the required options - * 2) Uncomment code in [KEGG] Pathway.java for 'getCompoundPreviewPicture()' - * 3) Activate additional KEGG COMPOUND 2 ChEBI mapping in KeggInfos.java ('getChebi()'-method) - */ + adjustForPath2Models(); } - // Make command-line options persistent prefs.restoreDefaults(); // This is just used as an empty prefs-template. try { @@ -359,6 +362,47 @@ public void commandLineMode(AppConf appConf) { } } + /** + * Adjusts the current KEGGtranslator instances to create an outout + * for the path2models project. + */ + public static void adjustForPath2Models() { + path2models = true; + KeggInfos.path2models = true; + /* + * 1) Set the required options + * 2) Uncomment code in [KEGG] Pathway.java for 'getCompoundPreviewPicture()' + * 3) Activate additional KEGG COMPOUND 2 ChEBI mapping in KeggInfos.java ('getChebi()'-method) + */ + + KEGGtranslatorOptions.AUTOCOMPLETE_REACTIONS.setDefaultValue(Boolean.TRUE); + KEGGtranslatorOptions.USE_GROUPS_EXTENSION.setDefaultValue(Boolean.FALSE); + KEGGtranslatorOptions.REMOVE_ORPHANS.setDefaultValue(Boolean.FALSE); + KEGGtranslatorOptions.REMOVE_WHITE_GENE_NODES.setDefaultValue(Boolean.TRUE); + KEGGtranslatorOptions.SHOW_FORMULA_FOR_COMPOUNDS.setDefaultValue(Boolean.FALSE); + KEGGtranslatorOptions.REMOVE_PATHWAY_REFERENCES.setDefaultValue(Boolean.TRUE); + KEGGtranslatorOptions.CELLDESIGNER_ANNOTATIONS.setDefaultValue(Boolean.FALSE); + KEGGtranslatorOptions.ADD_LAYOUT_EXTENSION.setDefaultValue(Boolean.TRUE); + KEGGtranslatorOptions.CHECK_ATOM_BALANCE.setDefaultValue(Boolean.FALSE); + + SBPreferences prefs = SBPreferences.getPreferencesFor(KEGGtranslatorOptions.class); + prefs.put(KEGGtranslatorOptions.AUTOCOMPLETE_REACTIONS, Boolean.TRUE); + prefs.put(KEGGtranslatorOptions.USE_GROUPS_EXTENSION, Boolean.FALSE); + prefs.put(KEGGtranslatorOptions.REMOVE_ORPHANS, Boolean.FALSE); + prefs.put(KEGGtranslatorOptions.REMOVE_WHITE_GENE_NODES, Boolean.TRUE); + prefs.put(KEGGtranslatorOptions.SHOW_FORMULA_FOR_COMPOUNDS, Boolean.FALSE); + prefs.put(KEGGtranslatorOptions.REMOVE_PATHWAY_REFERENCES, Boolean.TRUE); + prefs.put(KEGGtranslatorOptions.CELLDESIGNER_ANNOTATIONS, Boolean.FALSE); + prefs.put(KEGGtranslatorOptions.ADD_LAYOUT_EXTENSION, Boolean.TRUE); + prefs.put(KEGGtranslatorOptions.CHECK_ATOM_BALANCE, Boolean.FALSE); + + try { + prefs.flush(); + } catch (BackingStoreException e) { + log.log(Level.SEVERE, "Could not adjust KEGGtranslator options for path2models.", e); + } + } + /* * (non-Javadoc) * @see de.zbit.Launcher#getAppName() diff --git a/src/de/zbit/kegg/io/AbstractKEGGtranslator.java b/src/de/zbit/kegg/io/AbstractKEGGtranslator.java index 034cd42..4fafdb8 100644 --- a/src/de/zbit/kegg/io/AbstractKEGGtranslator.java +++ b/src/de/zbit/kegg/io/AbstractKEGGtranslator.java @@ -805,22 +805,36 @@ protected String NameToSId(String name) { * letter ::= �a�..�z�,�A�..�Z� digit ::= �0�..�9� idChar ::= letter | * digit | �_� SId ::= ( letter | �_� ) idChar* */ - String ret = ""; + String ret; if (name == null || name.trim().length() == 0) { ret = incrementSIdSuffix("SId"); SIds.add(ret); } else { name = name.trim(); + StringBuilder ret2 = new StringBuilder(name.length()+4); char c = name.charAt(0); + // Must start with letter or '_'. - if (!(isLetter(c) || c == '_')) ret = "SId_"; else ret = Character.toString(c); + if (!(isLetter(c) || c == '_')) { + ret2.append("SId_"); + } else { + ret2.append(c); + } + // May contain letters, digits or '_' for (int i = 1; i < name.length(); i++) { c = name.charAt(i); - if (c==' ') c='_'; // Replace spaces with "_" - if (isLetter(c) || Character.isDigit(c) || c == '_') ret += Character.toString(c); + if (c==' ') { + c='_'; // Replace spaces with "_" + } + + if (isLetter(c) || Character.isDigit(c) || c == '_') { + ret2.append(c); + } // else: skip invalid characters } + // Make unique + ret = ret2.toString(); if (SIds.contains(ret)) ret = incrementSIdSuffix(ret); SIds.add(ret); } diff --git a/src/de/zbit/kegg/io/KEGG2BioPAX.java b/src/de/zbit/kegg/io/KEGG2BioPAX.java index 72a2773..43946f5 100644 --- a/src/de/zbit/kegg/io/KEGG2BioPAX.java +++ b/src/de/zbit/kegg/io/KEGG2BioPAX.java @@ -91,6 +91,7 @@ import de.zbit.util.EscapeChars; import de.zbit.util.Species; import de.zbit.util.Utils; +import de.zbit.util.objectwrapper.ValuePair; /** * Abstract KEGG2BioPAX converter (also called KGML2BioPAX). This converter is @@ -311,7 +312,8 @@ public BioPAXElement createXRef(IdentifierDatabases db, String id, int type) { return xr; } - + + /** * Creates a biosource, corrsponding to the organism/species of * the input pathway p. @@ -901,7 +903,9 @@ public void createPhysicalEntities(Pathway p) { * {@link openControlledVocabulary} for level 2. */ protected BioPAXElement getInteractionVocuabulary(SubType st) { - String rfid = "#relation_subtype_" + st.getName(); + String formattedName = st.getName().trim().replace(' ', '_').replace("/", "_or_"); + + String rfid = "#relation_subtype_" + formattedName; BioPAXElement voc=null; if (level == BioPAXLevel.L3) { voc = (InteractionVocabulary) model.getByID(rfid); @@ -909,50 +913,69 @@ protected BioPAXElement getInteractionVocuabulary(SubType st) { voc = (openControlledVocabulary) model.getByID(rfid); } + // Term is not yet available => create it. if (voc==null) { + // Convert to a term that is a child of 'MI:0190' (Molecular Interaction Ontology) + ValuePair miTerm = SBOMapping.getMITerm(st.getName()); + String termName = miTerm!=null?miTerm.getA():null;//formattedName; if (level == BioPAXLevel.L3) { voc = model.addNew(InteractionVocabulary.class, rfid); pathwayComponentCreated(voc); - ((InteractionVocabulary)voc).addTerm(st.getName()); + if (termName!=null) { + ((InteractionVocabulary)voc).addTerm(termName); + } + ((InteractionVocabulary)voc).addComment(formattedName);// In all cases, add the original KEGG name } else if (level == BioPAXLevel.L2) { voc = model.addNew(openControlledVocabulary.class, rfid); pathwayComponentCreated(voc); - ((openControlledVocabulary)voc).addTERM(st.getName()); + if (termName!=null) { + ((openControlledVocabulary)voc).addTERM(termName); + } + ((openControlledVocabulary)voc).addCOMMENT(formattedName);// In all cases, add the original KEGG name } + + // Add additional XRefs to MI, SBO and GO + if (miTerm!=null && miTerm.getB()!=null && miTerm.getB()>0) { + BioPAXElement xr = createXRef(IdentifierDatabases.MI, Integer.toString(miTerm.getB()), 1); + addOntologyXRef(voc, xr, miTerm.getA()); + } int sbo = SBOMapping.getSBOTerm(st.getName()); if (sbo>0) { BioPAXElement xr = createXRef(IdentifierDatabases.SBO, Integer.toString(sbo), 1); - if (xr!=null) { - if (level == BioPAXLevel.L3) { - ((Xref) xr).addComment(st.getName()); - ((org.biopax.paxtools.model.level3.XReferrable) voc).addXref((Xref) xr); - } else if (level == BioPAXLevel.L2) { - ((xref) xr).addCOMMENT(st.getName()); - ((org.biopax.paxtools.model.level2.XReferrable) voc).addXREF((xref) xr); - } - } + addOntologyXRef(voc, xr, formattedName); } int go = SBOMapping.getGOTerm(st.getName()); if (go>0) { BioPAXElement xr = createXRef(IdentifierDatabases.GeneOntology, Integer.toString(go), 1); - if (xr!=null) { - if (level == BioPAXLevel.L3) { - ((Xref) xr).addComment(st.getName()); - ((org.biopax.paxtools.model.level3.XReferrable) voc).addXref((Xref) xr); - } else if (level == BioPAXLevel.L2) { - ((xref) xr).addCOMMENT(st.getName()); - ((org.biopax.paxtools.model.level2.XReferrable) voc).addXREF((xref) xr); - } - } + addOntologyXRef(voc, xr, formattedName); } } return voc; } + + + /** + * @param xReferrableBPelement + * @param xRef + * @param formattedName + */ + private void addOntologyXRef(BioPAXElement xReferrableBPelement, + BioPAXElement xRef, String formattedName) { + if (xRef!=null) { + if (level == BioPAXLevel.L3) { + ((Xref) xRef).addComment(formattedName); + ((org.biopax.paxtools.model.level3.XReferrable) xReferrableBPelement).addXref((Xref) xRef); + } else if (level == BioPAXLevel.L2) { + ((xref) xRef).addCOMMENT(formattedName); + ((org.biopax.paxtools.model.level2.XReferrable) xReferrableBPelement).addXREF((xref) xRef); + } + } + } /* (non-Javadoc) * @see de.zbit.kegg.io.KEGGtranslator#isGraphicalOutput() diff --git a/src/de/zbit/kegg/io/KEGG2BioPAX_level3.java b/src/de/zbit/kegg/io/KEGG2BioPAX_level3.java index 474a2ca..9d87419 100644 --- a/src/de/zbit/kegg/io/KEGG2BioPAX_level3.java +++ b/src/de/zbit/kegg/io/KEGG2BioPAX_level3.java @@ -187,7 +187,11 @@ public BioPAXElement addEntry(Entry entry, Pathway p) { if (fullName!=null) { ((Entity)element).setStandardName(fullName); // Graphics name } - ((Entity)element).setDisplayName(name); // Intelligent name + String displayName = name; + if (displayName.length()>20) { + displayName = displayName.substring(0, 16)+"..."; + } + ((Entity)element).setDisplayName(displayName); // Intelligent name // --- addDataSources(element); @@ -299,9 +303,14 @@ private void addDataSources(BioPAXElement element) { protected BioPAXElement createPathwayInstance(Pathway p) { pathway = model.addNew(org.biopax.paxtools.model.level3.Pathway.class, p.getName()); pathway.addAvailability(String.format("This file has been generated by %s version %s", System.getProperty("app.name"), System.getProperty("app.version"))); - pathway.addName(formatTextForHTMLnotes(p.getTitle())); - pathway.setDisplayName(formatTextForHTMLnotes(p.getTitle())); - pathway.setStandardName(formatTextForHTMLnotes(p.getTitle())); + String htmlName = formatTextForHTMLnotes(p.getTitle()); + pathway.addName(htmlName); + String displayName = htmlName; + if (displayName.length()>20) { + displayName = displayName.substring(0, 16)+"..."; + } + pathway.setDisplayName(displayName); + pathway.setStandardName(htmlName); // Parse Kegg Pathway information boolean isKEGGPathway = DatabaseIdentifiers.checkID(DatabaseIdentifiers.IdentifierDatabases.KEGG_Pathway, p.getNameForMIRIAM()); @@ -394,7 +403,11 @@ public BioPAXElement addKGMLReaction(Reaction r, Pathway p) { reaction.addName(r.getName()); - reaction.setDisplayName(r.getName()); + String displayName = r.getName(); + if (displayName.length()>20) { + displayName = displayName.substring(0, 16)+"..."; + } + reaction.setDisplayName(displayName); addDataSources(reaction); // Add all reaction components diff --git a/src/de/zbit/kegg/io/KEGG2jSBML.java b/src/de/zbit/kegg/io/KEGG2jSBML.java index fe3b908..d90ff62 100644 --- a/src/de/zbit/kegg/io/KEGG2jSBML.java +++ b/src/de/zbit/kegg/io/KEGG2jSBML.java @@ -915,7 +915,7 @@ public static void addMiriamURNs(Entry entry, SBase spec) { String ko_id_uc_t = ko_id.toUpperCase().trim(); if (ko_id_uc_t.startsWith("CPD:")) { // KEGG and ChEBI provide picture for compounds (e.g., "C00118"). - notes.append(Pathway.getCompoundPreviewPicture(ko_id_uc_t, infos)); + notes.append(Pathway.getCompoundPreviewPicture(ko_id_uc_t, infos, Translator.path2models)); } } if (entry.getType().equals(EntryType.map)) { diff --git a/src/de/zbit/kegg/io/SBOMapping.java b/src/de/zbit/kegg/io/SBOMapping.java index 2416cf4..382d4d0 100644 --- a/src/de/zbit/kegg/io/SBOMapping.java +++ b/src/de/zbit/kegg/io/SBOMapping.java @@ -30,6 +30,7 @@ import de.zbit.kegg.parser.pathway.ext.EntryExtended; import de.zbit.kegg.parser.pathway.ext.EntryTypeExtended; import de.zbit.util.StringUtil; +import de.zbit.util.objectwrapper.ValuePair; /** * This static class defines how to map from certain @@ -266,6 +267,62 @@ public static int getGOTerm(String subtype) { return ret; } + /** + * Convert to a MI-term that is a child of 'MI:0190' (Molecular Interaction (PSI-MI)). + * @param subtype + * @return {@link ValuePair} with the term name and integer id. Or NULL if + * no MI term is available that matches the given input {@link SubType}. + */ + public static ValuePair getMITerm(String subtype) { + if (subtype.equals(SubType.ASSOCIATION)) { + return new ValuePair("association", 914); //MI:0914 + } else if (subtype.equals(SubType.PHOSPHORYLATION)) { + return new ValuePair("phosphorylation reaction", 217); + } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { + return new ValuePair("dephosphorylation reaction", 203); + } else if (subtype.equals(SubType.UBIQUITINATION) || + subtype.equalsIgnoreCase("ubiquination")) { + return new ValuePair("ubiquitination reaction", 220); + } else if (subtype.equals(SubType.GLYCOSYLATION)) { + return new ValuePair("glycosylation reaction", 559); + } else if (subtype.equals(SubType.METHYLATION)) { + return new ValuePair("methylation reaction", 213); + } else if (subtype.equals(SubType.BINDING)) { + return new ValuePair("covalent binding", 195); + } else if (subtype.equals(SubType.BINDING_ASSOCIATION)) { + return new ValuePair("association", 914); + } else if (subtype.equals(SubType.COMPOUND) || + subtype.equals(SubType.HIDDEN_COMPOUND)) { + return new ValuePair("direct interaction", 407); + +// } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { +// return new ValuePair("dephosphorylation reaction", 203); +// } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { +// return new ValuePair("dephosphorylation reaction", 203); +// } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { +// return new ValuePair("dephosphorylation reaction", 203); +// } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { +// return new ValuePair("dephosphorylation reaction", 203); +// } else if (subtype.equals(SubType.DEPHOSPHORYLATION)) { +// return new ValuePair("dephosphorylation reaction", 203); + } + /* MISSING: + * EXPRESSION => positive genetic interaction / 935 + * ACTIVATION => positive genetic interaction / 935 + * REPRESSION => negative genetic interaction / 933 + * INHIBITION => negative genetic interaction / 933 + * + * DISSOCIATION + * INDIRECT_EFFECT + * MISSING_INTERACTION + * STATE_CHANGE + * + */ + + return null; + } + + /** * Formats an SBO term. E.g. "177" to "SBO:0000177".