diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java index 30d311a5e4fa791a963e9fcaa750f573fb2a53f0..6753f34f6ea549f5a78a5bd62f219da6cfd97114 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtils.java @@ -104,6 +104,27 @@ public class BioNetworkUtils { } } + /** + * Remove from a network all duplicated reactions + * + * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} + * @param checkSameGPR if reactions should be considered non-redundant if they share same reactants but have different GPR + */ + public static void removeDuplicatedReactions(@NonNull BioNetwork network, boolean checkSameGPR) { + //1- for each reaction, create an id from equation and, optionally, GPR + //2- put id-reaction pairs in map, each new reaction overrides its duplicates, if any + //3- remove from network all reactions not in map + HashMap<String,BioReaction> indexedReaction = new HashMap<>(); + BioCollection<BioReaction> toRemove = new BioCollection<>(network.getReactionsView()); + for (BioReaction r : network.getReactionsView()){ + String uniqId = BioReactionUtils.getEquation(r,false,true); + if(checkSameGPR) uniqId = uniqId+BioReactionUtils.getGPR(network,r); + indexedReaction.put(uniqId,r); + } + toRemove.removeAll(indexedReaction.values()); + network.removeOnCascade(toRemove); + } + public static void deepCopy(BioNetwork networkIn, BioNetwork networkOut) { deepCopy(networkIn, networkOut, true, false); } diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java index 4dafe8eea12087ed2b3bb05910c21e85f6cd4596..8f9921e72e05f61a5d292cf2768140d7f1f401dc 100644 --- a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtils.java @@ -59,10 +59,11 @@ public class BioReactionUtils { * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} * @param r1 a first {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} * @param r2 a second {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} - * @return true if the substrates and the products have the same id + * @param checkSameGPR if reactions should be considered non-redundant if they share same reactants but have different GPR + * @return true if the two reactions are redundant (same reactants and optionally same GPR) * @throws java.lang.IllegalArgumentException if one of the reaction is not in the network */ - public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2) { + public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2, boolean checkSameGPR) { if (!network.contains(r1)) { @@ -88,18 +89,33 @@ public class BioReactionUtils { rightR1.containsAll(rightR2) && rightR2.containsAll(rightR1); - if (!r1.isReversible()) { - return flag1; - } else { + if (r1.isReversible()) { Boolean flag2 = rightR1.containsAll(leftR2) && leftR2.containsAll(rightR1) && leftR1.containsAll(rightR2) && rightR2.containsAll(leftR1); - return flag1 || flag2; + flag1 = (flag1 || flag2); + } + + if(flag1 && checkSameGPR){ + return BioReactionUtils.getGPR(network, r1).equals(BioReactionUtils.getGPR(network, r2)); } + return flag1; + } + /** + * Comparison of two reactions + * + * @param network a {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioNetwork} + * @param r1 a first {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} + * @param r2 a second {@link fr.inrae.toulouse.metexplore.met4j_core.biodata.BioReaction} + * @return true if the substrates and the products have the same id + * @throws java.lang.IllegalArgumentException if one of the reaction is not in the network + */ + public static Boolean areRedundant(@NonNull BioNetwork network, @NonNull BioReaction r1, @NonNull BioReaction r2) { + return areRedundant(network, r1, r2, false); } diff --git a/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java new file mode 100644 index 0000000000000000000000000000000000000000..5a813f3c1cd698668a3bce1f0f30b6d8306e0b50 --- /dev/null +++ b/met4j-core/src/main/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMerger.java @@ -0,0 +1,299 @@ +package fr.inrae.toulouse.metexplore.met4j_core.biodata.utils; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; + +import java.util.*; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * A class to create, from a network with multiple compartments, a new network with a single compartment, avoiding duplicated compounds. + */ +public class CompartmentMerger { + + //criterion for identifying same compound over multiple compartments + //default use name as common field + Function<BioMetabolite,String> getUniqIdFunction = BioMetabolite::getName; + //criterion for selecting a unique representative from a group of compound instances + //default takes first id in alphabetical order + Function<List<BioMetabolite>,BioMetabolite> pickFunction = (l -> {l.sort(Comparator.comparing(BioMetabolite::getId));return l.get(0);}); + + //unique compartment + //default is named "NA" + BioCompartment uniqComp = new BioCompartment("1", "NA"); + + // map for each compound toward their unique counterpart (can be themselves) + HashMap<BioMetabolite,BioMetabolite> convert; + // merged bioNetwork + BioNetwork merged; + + // merged network name + String name; + + /** + * Create a new Compartment Merger + */ + public CompartmentMerger(){ + } + + /** + * Fluent builder setting the function that provides the criterion used for identifying same compounds over multiple compartments, + * using a custom function provided as argument. + * Default use "name" as common field for the same compound over multiple compartments + * @param uniqIdFunction the function + * @return a CompartmentMerger instance + */ + public CompartmentMerger setGetUniqIdFunction(Function<BioMetabolite, String> uniqIdFunction) { + this.getUniqIdFunction = uniqIdFunction; + return this; + } + + /** + * Fluent builder setting both functions that provides the criterion used for identifying same compounds over multiple compartments, + * and that creates a unique representative for such compounds, using a common identifier convention. + * This will strip the two last characters from compound identifiers to create shared ids that will be used in final merged network. + * This should be used for SBML that use the naming convention "xxx_y" for compounds, where xxx is the base identifier and y is the compound identifier (single letter). + * @return a CompartmentMerger instance + */ + public CompartmentMerger usePalssonIdentifierConvention() { + this.getUniqIdFunction = c -> c.getId().substring(0,c.getId().length()-2); + this.pickFunction = (l -> { + BioMetabolite oldComp = l.get(0); + return new BioMetabolite(oldComp,oldComp.getId().substring(0,oldComp.getId().length()-2)); + }); + return this; + } + + /** + * Fluent builder setting both functions that provides the criterion used for identifying same compounds over multiple compartments, + * and that creates a unique representative for such compounds, when compound identifiers contains explicit compartment info. + * This use a provided regex to extract a shared base identifier from compound identifiers, and used it in final merged network. + * This should be used for SBML that use a compound identifier convention containing a base identifier and a compartment suffix/prefix, + * such as "xxx_y" (regex "^(\\w+)_\\w$") "xxx[y]" or "xxx-yyy", where xxx is the base identifier and y is the compound identifier. + * @return a CompartmentMerger instance + */ + public CompartmentMerger useBaseIdentifierRegex(String regex) { + this.getUniqIdFunction = (v ->{ + String id = v.getId(); + Matcher m = Pattern.compile(regex).matcher(id); + if(m.matches()) id=m.group(1); + return id;}); + this.pickFunction = (l -> { + BioMetabolite oldComp = l.get(0); + String id = oldComp.getId(); + Matcher m = Pattern.compile(regex).matcher(id); + if(m.matches()) id=m.group(1); + return new BioMetabolite(oldComp,id); + }); + return this; + } + + /** + * Fluent builder setting the function that select or create a unique representative from a group of compound instances + * default return compound from list with first id in alphabetical order. + * A new compound with custom id can be returned, but if an id is generated twice or more, the corresponding groups will be merged into a single one + * @param compoundMergeFunction the function + * @return a CompartmentMerger instance + */ + public CompartmentMerger setCompoundMergeFunction(Function<List<BioMetabolite>, BioMetabolite> compoundMergeFunction) { + this.pickFunction = compoundMergeFunction; + return this; + } + + /** + * Fluent builder setting the compartment where all the compounds will be merged. + * Default is a compartment named "NA". + * @param uniqComp the single compartment + * @return a CompartmentMerger instance + */ + public CompartmentMerger setUniqCompartment(BioCompartment uniqComp) { + this.uniqComp = uniqComp; + return this; + } + + /** + * Fluent builder setting the merged network name. + * Default append "_compartments-merged" to original network name. + * @param name the name + * @return a CompartmentMerger instance + */ + public CompartmentMerger setNewNetworkName(String name) { + this.name = name; + return this; + } + + /** + * Merge compartments by indexing compounds to identify groups of same compounds over different compartments, and select or + * create a unique compound to be added to a new single compartment. + * @param original the original network + * @return a network with merged compartments + */ + public BioNetwork merge(BioNetwork original){ + + //create new network with same metadata and single compartment + buildNetwork(original); + + //group corresponding compounds + Map<String, List<BioMetabolite>> compoundGroups = original.getMetabolitesView().stream().collect(Collectors.groupingBy(getUniqIdFunction)); + + //for each group, create a unique compound + convert = new HashMap<>(); + for(List<BioMetabolite> toContract : compoundGroups.values()){ + BioMetabolite uniq = buildCompound(toContract); //(add newly created compound to new network) + //populate map for each compound toward their unique counterpart (can be themselves) + for(BioMetabolite m : toContract){ + convert.put(m,uniq); + } + } + + //copy Gene, Protein and Enzyme + keepGPR(original); + + //for each reaction, replace reactants by their unique counterpart + for(BioReaction r : original.getReactionsView()){ + // create deep copy, except for reactants + buildReaction(r); + } + + //copy Pathways + for (BioPathway pathway : original.getPathwaysView()) { + + BioPathway newPathway = new BioPathway(pathway); + merged.add(newPathway); + + // Add reactions into pathway + BioCollection<BioReaction> reactions = original.getReactionsFromPathways(pathway); + + for (BioReaction reaction : reactions) { + BioReaction newReaction = merged.getReaction(reaction.getId()); + merged.affectToPathway(newPathway, newReaction); + } + } + + //remove reactions that create loops + removeLoops(); + + //remove redundant reactions? + //TODO + + return merged; + } + + private void buildNetwork(BioNetwork original){ + //create new network with single compartment + merged = new BioNetwork(); + if(name == null){ + merged.setName(original.getName()+"_compartments-merged"); + }else{ + merged.setName(name); + } + merged.addCompartment(uniqComp); + //update metadata + merged.setSynonyms(new ArrayList<>(original.getSynonyms())); + merged.setComment(original.getComment()); + merged.setRefs(new HashMap<>(original.getRefs())); + merged.setAttributes(new HashMap<>(original.getAttributes())); + } + + private BioMetabolite buildCompound(List<BioMetabolite> originalCtoMerge){ + //from compounds to merge, pick one as template for new unique compound + BioMetabolite chosen = pickFunction.apply(originalCtoMerge); + BioMetabolite newMetabolite = new BioMetabolite(chosen); + BioMetabolite old = merged.getMetabolite(newMetabolite.getId()); + if(old!=null){ + System.err.println("WARNING: collision in new compound identifiers. Compounds with different unique ids will be merged under the same new entity "+newMetabolite.getId()+"."); + System.err.println("If it is an expected behaviour that the provided merge function may not produce a different compounds at each call, please review the following merge:"); + System.err.println(originalCtoMerge.stream().map(c -> c.getId()+" : "+ getUniqIdFunction.apply(c)).collect(Collectors.toList())); + System.err.println(convert.entrySet().stream().filter(e -> old.equals(e.getValue())).map(e -> e.getKey().getId()+" : "+ getUniqIdFunction.apply(e.getKey())).collect(Collectors.toList())); + return old; + } + merged.add(newMetabolite); + merged.affectToCompartment(uniqComp,newMetabolite); + return newMetabolite; + } + + private BioReaction buildReaction(BioReaction originalR){ + BioReaction newReaction = new BioReaction(originalR); + newReaction.setSpontaneous(originalR.isSpontaneous()); + newReaction.setReversible(originalR.isReversible()); + newReaction.setEcNumber(originalR.getEcNumber()); + + merged.add(newReaction); + + // Create substrates, swap to unique compound + for (BioReactant reactant : originalR.getLeftReactantsView()) { + BioMetabolite newMetabolite = convert.get(reactant.getMetabolite()); + Double sto = reactant.getQuantity(); + merged.affectLeft(newReaction, sto, uniqComp, newMetabolite); + } + + // Create products, swap to unique compound + for (BioReactant reactant : originalR.getRightReactantsView()) { + BioMetabolite newMetabolite = convert.get(reactant.getMetabolite()); + Double sto = reactant.getQuantity(); + merged.affectRight(newReaction, sto, uniqComp, newMetabolite); + } + + //copy GPR + for (BioEnzyme enzyme : originalR.getEnzymesView()) { + BioEnzyme newEnzyme = merged.getEnzyme(enzyme.getId()); + merged.affectEnzyme(newReaction, newEnzyme); + } + + return newReaction; + } + + // remove reactions that create loops, i.e. transport reactions between compartments + private void removeLoops(){ + BioCollection<BioReaction> toRemove = new BioCollection<>(); + for(BioReaction r : merged.getReactionsView()){ + if(r.getLeftsView().stream().anyMatch(r.getRightsView()::contains)) toRemove.add(r); + } + merged.removeOnCascade(toRemove); + } + + // copy Gene, Protein and Enzyme from original network + private void keepGPR(BioNetwork original){ + // Copy genes + for (BioGene gene : original.getGenesView()) { + BioGene newGene = new BioGene(gene); + merged.add(newGene); + } + + // Copy proteins + for (BioProtein protein : original.getProteinsView()) { + BioProtein newProtein = new BioProtein(protein); + merged.add(newProtein); + if (protein.getGene() != null) { + String geneId = protein.getGene().getId(); + BioGene newGene = merged.getGene(geneId); + merged.affectGeneProduct(newProtein, newGene); + } + } + + // Copy enzymes + for (BioEnzyme enzyme : original.getEnzymesView()) { + BioEnzyme newEnzyme = new BioEnzyme(enzyme); + merged.add(newEnzyme); + + BioCollection<BioEnzymeParticipant> participants = enzyme.getParticipantsView(); + + for (BioEnzymeParticipant participant : participants) { + Double quantity = participant.getQuantity(); + if (participant.getPhysicalEntity().getClass().equals(BioMetabolite.class)) { + BioMetabolite metabolite = (BioMetabolite) participant.getPhysicalEntity(); + // swap to unique compound + merged.affectSubUnit(newEnzyme, quantity, convert.get(metabolite)); + } else if (participant.getPhysicalEntity().getClass().equals(BioProtein.class)) { + BioProtein protein = (BioProtein) participant.getPhysicalEntity(); + BioProtein newProtein = merged.getProtein(protein.getId()); + merged.affectSubUnit(newEnzyme, quantity, newProtein); + } + } + } + } + +} diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java index bc9c6f2eb96edbbd9d683892eee04312939f67a1..3220ce16ed27502fccf4c6524bda772f478b9b8a 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioNetworkUtilsTest.java @@ -94,6 +94,31 @@ public class BioNetworkUtilsTest { } + @Test + public void testRemoveDuplicatedReactions(){ + BioNetwork network = miniNetwork(); + BioReaction r3 = new BioReaction("R3"); + network.add(r3); + network.affectLeft(r3, 2.0, c1, m1); + network.affectRight(r3, 1.0, c1, m2); + + BioNetworkUtils.removeDuplicatedReactions(network,false); + assertFalse("Duplicated reaction not removed",network.containsReaction("R3")); + + r3 = new BioReaction("R3"); + network.add(r3); + network.affectLeft(r3, 2.0, c1, m1); + network.affectRight(r3, 1.0, c1, m2); + + BioNetworkUtils.removeDuplicatedReactions(network,true); + assertTrue("Non-duplicated reaction (considering GPR) removed",network.containsReaction("R3")); + + network.affectEnzyme(r3, enzyme1); + + BioNetworkUtils.removeDuplicatedReactions(network,true); + assertFalse("Duplicated reaction (considering GPR) not removed",network.containsReaction("R3")); + } + @Test public void removeNotConnectedMetabolites() { diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java index 92bd22a509b3a5038749e2cba3f9d2a9e7e63c53..bd031882d33795135291d81ff4b05fe9effb05fb 100644 --- a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/BioReactionUtilsTest.java @@ -121,6 +121,41 @@ public class BioReactionUtilsTest { } + /** + * Test method for + * {@link BioReactionUtils#areRedundant(BioNetwork, BioReaction, BioReaction)}. + */ + @Test + public void testAreRedundantCheckGPR() { + + BioReaction r2 = new BioReaction("r2"); + r2.setReversible(false); + network.add(r2); + + network.affectLeft(r2, 1.0, c, m1); + network.affectRight(r2, 2.0, c, m2); + network.affectRight(r2, 1.5, c, m3); + + network.affectGeneProduct(p1, g1); + network.affectGeneProduct(p2, g1); + network.affectGeneProduct(p3, g1); + + network.affectSubUnit(e1, 1.0, p1); + network.affectSubUnit(e1, 1.0, p2); + network.affectSubUnit(e2, 1.0, p3); + + network.affectEnzyme(r1, e1); + network.affectEnzyme(r1, e2); + + assertTrue("r1 and r2 must be identified as redundant", BioReactionUtils.areRedundant(network, r1, r2, false)); + assertFalse("r1 and r2 must be identified as not redundant, considering GPR", BioReactionUtils.areRedundant(network, r1, r2, true)); + + network.affectEnzyme(r2, e1); + network.affectEnzyme(r2, e2); + assertTrue("r1 and r2 must be identified as redundant, considering GPR", BioReactionUtils.areRedundant(network, r1, r2, true)); + + } + @Test public void testAreRedundantReversible() { diff --git a/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..6f31b2088498064527e27212e818d1cc07d77936 --- /dev/null +++ b/met4j-core/src/test/java/fr/inrae/toulouse/metexplore/met4j_core/biodata/utils/CompartmentMergerTest.java @@ -0,0 +1,153 @@ +package fr.inrae.toulouse.metexplore.met4j_core.biodata.utils; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class CompartmentMergerTest { + + + BioNetwork network; + BioReaction r1,r2,r1X,r3,rt1,rt2; + BioMetabolite a,b,c,d,aX,bX,cX,eX; + BioCompartment comp0,compX,compMerge; + BioProtein p1; + BioGene g1; + BioEnzyme e1; + + @Before + public void init() { + network = new BioNetwork(); + r1 = new BioReaction("r1"); + r2 = new BioReaction("r2"); + r1X = new BioReaction("r1X"); + r3 = new BioReaction("r3"); + rt1 = new BioReaction("rt1"); + rt2 = new BioReaction("rt2"); + network.add(r1,r2,r1X,r3,rt1,rt2); + + a = new BioMetabolite("a_0", "a"); + b = new BioMetabolite("b_0", "b"); + c = new BioMetabolite("c_0", "c"); + d = new BioMetabolite("d_0", "d"); + aX = new BioMetabolite("a_X", "a"); + bX = new BioMetabolite("b_X", "b"); + cX = new BioMetabolite("c_X", "c"); + eX = new BioMetabolite("e_X", "e"); + network.add(a,b,c,d,aX,bX,cX,eX); + comp0 = new BioCompartment("0"); + compX = new BioCompartment("X"); + compMerge = new BioCompartment("merge"); + network.add(comp0,compX); + network.affectToCompartment(comp0, a, b, c, d); + network.affectToCompartment(compX, aX, bX, cX, eX); + + network.affectLeft(r1, 2.0, comp0, a); + network.affectRight(r1, 1.0, comp0, b); + network.affectRight(r1, 1.0, comp0, c); + r1.setReversible(false); + + network.affectLeft(r1X, 2.0, compX, aX); + network.affectRight(r1X, 1.0, compX, bX); + network.affectRight(r1X, 1.0, compX, cX); + r1X.setReversible(false); + + network.affectLeft(r2, 1.0, comp0, c); + network.affectRight(r2, 1.0, comp0, d); + r2.setReversible(false); + + network.affectLeft(r3, 1.0, compX, cX); + network.affectRight(r3, 1.0, compX, eX); + r3.setReversible(false); + + network.affectLeft(rt1, 1.0, comp0, a); + network.affectRight(rt1, 1.0, compX, aX); + rt1.setReversible(true); + + network.affectLeft(rt2, 1.0, comp0, c); + network.affectRight(rt2, 1.0, compX, cX); + rt2.setReversible(true); + + + e1 = new BioEnzyme("e1"); + network.add(e1); + p1 = new BioProtein("p1"); + network.add(p1); + g1 = new BioGene("g1", "G1"); + network.add(g1); + network.affectGeneProduct(p1, g1); + network.affectSubUnit(e1, 1.0, p1); + network.affectEnzyme(r3, e1); + + } + + @Test + public void testMerge() { + a.setName("a"); + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .setGetUniqIdFunction(BioMetabolite::getName); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } + + @Test + public void testMergeII() { + a.setName("notA");//break default merging strategy + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .usePalssonIdentifierConvention(); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("a")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("b")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("c")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("d")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("e")); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } + + @Test + public void testMergeIII() { + a.setName("notA");//break default merging strategy + CompartmentMerger merger = new CompartmentMerger() + .setNewNetworkName("myNewName") + .setUniqCompartment(compMerge) + .useBaseIdentifierRegex("^(\\w+)_\\w$"); + + BioNetwork newNetwork = merger.merge(network); + assertEquals("Error while setting new name","myNewName",newNetwork.getName()); + assertTrue("Error while creating new compartment",newNetwork.containsCompartment("merge")); + assertEquals("Error while merging compartment, wrong number of final compartments",1,newNetwork.getCompartmentsView().size()); + assertEquals("Error while merging compartment, wrong number of final metabolites",5,newNetwork.getMetabolitesView().size()); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("a")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("b")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("c")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("d")); + assertTrue("Error while merging compartment, wrong merged metabolite",newNetwork.containsMetabolite("e")); + assertEquals("Error while merging compartment, wrong number of final reactions",4,newNetwork.getReactionsView().size()); + assertFalse("Error while merging compartment, gene lost",newNetwork.getGenesView().isEmpty()); + assertFalse("Error while merging compartment, enzyme lost",newNetwork.getEnzymesView().isEmpty()); + assertFalse("Error while merging compartment, protein lost",newNetwork.getProteinsView().isEmpty()); + } + +} diff --git a/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java new file mode 100644 index 0000000000000000000000000000000000000000..a44ce464c459668d16a004acd540fa8c4b9b8323 --- /dev/null +++ b/met4j-toolbox/src/main/java/fr/inrae/toulouse/metexplore/met4j_toolbox/convert/SBMLwizard.java @@ -0,0 +1,246 @@ +package fr.inrae.toulouse.metexplore.met4j_toolbox.convert; + +import fr.inrae.toulouse.metexplore.met4j_core.biodata.*; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.collection.BioCollection; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.BioNetworkUtils; +import fr.inrae.toulouse.metexplore.met4j_core.biodata.utils.CompartmentMerger; +import fr.inrae.toulouse.metexplore.met4j_io.annotations.reaction.ReactionAttributes; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.JsbmlReader; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.Met4jSbmlReaderException; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.FBCParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.GroupPathwayParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.NotesParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.reader.plugin.PackageParser; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.JsbmlWriter; +import fr.inrae.toulouse.metexplore.met4j_io.jsbml.writer.Met4jSbmlWriterException; +import fr.inrae.toulouse.metexplore.met4j_mapping.Mapper; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.AbstractMet4jApplication; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumFormats; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.EnumParameterTypes; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.Format; +import fr.inrae.toulouse.metexplore.met4j_toolbox.generic.annotations.ParameterType; +import org.kohsuke.args4j.Option; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +public class SBMLwizard extends AbstractMet4jApplication { + + @Format(name = EnumFormats.Sbml) + @ParameterType(name = EnumParameterTypes.InputFile) + @Option(name = "-s", usage = "input SBML file", required = true) + public String inputPath = null; + + @ParameterType(name = EnumParameterTypes.InputFile) + @Format(name = EnumFormats.Txt) + @Option(name = "-rc", usage = "remove compounds from input identifier file", required = false) + public String inputSide = null; + + @Option(name = "-ric", aliases = {"--noIsolated"}, usage = "remove isolated compounds (not involved in any reaction)") + public boolean removeIsolated; + + @ParameterType(name = EnumParameterTypes.InputFile) + @Format(name = EnumFormats.Txt) + @Option(name = "-rr", usage = "remove reaction from input identifier file", required = false) + public String inputReactions = null; + + @ParameterType(name = EnumParameterTypes.OutputFile) + @Format(name = EnumFormats.Sbml) + @Option(name = "-o", usage = "output SBML file", required = true) + public String outputPath = null; + + @Option(name = "-r0", aliases = {"--noFlux"}, usage = "remove reactions with lower and upper flux bounds both set to 0.0") + public boolean removeNoFlux; + + enum strategy {no, by_name, by_id} + + @Option(name = "-mc", aliases = {"--mergecomp"}, usage = "merge compartments. " + + "Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form \"xxx_y\" with xxx as base identifier and y as compartment label).") + public strategy mergingStrat = strategy.no; + + @Option(name = "-rdr", aliases = {"--noDuplicated"}, usage = "remove duplicated reactions (same reactants, same GPR)") + public boolean removeDuplicated; + + + @Option(name = "-rEX", aliases = {"--removeExchange"}, usage = "remove exchange reactions and species from given exchange compartment identifier", required = false) + public String exchangeCompToRemove; + + public static void main(String[] args) throws Met4jSbmlWriterException, IOException { + + SBMLwizard app = new SBMLwizard(); + + app.parseArguments(args); + + app.run(); + + } + + + public void run() throws Met4jSbmlWriterException, IOException { + System.out.print("Reading SBML..."); + JsbmlReader reader = new JsbmlReader(this.inputPath); + ArrayList<PackageParser> pkgs = new ArrayList<>(Arrays.asList( + new NotesParser(false), new FBCParser(), new GroupPathwayParser())); + + BioNetwork network = null; + + try { + network = reader.read(pkgs); + } catch (Met4jSbmlReaderException e) { + System.err.println("Error while reading the SBML file"); + System.err.println(e.getMessage()); + System.exit(1); + } + System.out.println(" Done."); + + //print info + System.out.println("\n\n\tcompartments:\t"+network.getCompartmentsView().size()); + System.out.println("\tmetabolites:\t"+network.getMetabolitesView().size()); + System.out.println("\treactions:\t"+network.getReactionsView().size()); + System.out.println("\tenzymes:\t"+network.getEnzymesView().size()); + System.out.println("\tgenes:\t"+network.getGenesView().size()); + System.out.println("\tprotein:\t"+network.getProteinsView().size()); + System.out.println("\tpathway:\t"+network.getPathwaysView().size()+"\n\n"); + + //side compound removal [optional] + if (inputSide != null) { + BioCollection<BioMetabolite> sideCpds = new BioCollection<>(); + System.err.println("removing side compounds..."); + Mapper<BioMetabolite> cmapper = new Mapper<>(network, BioNetwork::getMetabolitesView).skipIfNotFound(); + + try { + sideCpds = cmapper.map(inputSide); + } catch (IOException e) { + System.err.println("Error while reading the side compound file"); + System.err.println(e.getMessage()); + System.exit(1); + } + if (cmapper.getNumberOfSkippedEntries() > 0) + System.err.println(cmapper.getNumberOfSkippedEntries() + " side compounds not found in network."); + + for(BioMetabolite sc : sideCpds){ + network.removeOnCascade(sc); + } + System.err.println(sideCpds.size() + " side compounds removed from network."); + } + + //irrelevant reaction removal [optional] + if (inputReactions != null) { + BioCollection<BioReaction> sideRxns = new BioCollection<>(); + System.err.println("removing side reaction..."); + Mapper<BioReaction> rmapper = new Mapper<>(network, BioNetwork::getReactionsView).skipIfNotFound(); + + try { + sideRxns = rmapper.map(inputReactions); + } catch (IOException e) { + System.err.println("Error while reading the irrelevant reactions file"); + System.err.println(e.getMessage()); + System.exit(1); + } + if (rmapper.getNumberOfSkippedEntries() > 0) + System.err.println(rmapper.getNumberOfSkippedEntries() + " reactions not found in network."); + + for(BioReaction r : sideRxns){ + network.removeOnCascade(r); + } + System.err.println(sideRxns.size() + " irrelevant reactions removed from network."); + } + + //removal of reactions that cannot hold flux in any condition + if(removeNoFlux){ + System.err.println("removing reaction with closed flux bound..."); + BioCollection<BioReaction> toRemove = new BioCollection<>(); + for(BioReaction r : network.getReactionsView()){ + if(ReactionAttributes.getLowerBound(r).value==0.0 && + ReactionAttributes.getUpperBound(r).value==0.0){ + toRemove.add(r); + } + } + + network.removeOnCascade(toRemove); + System.err.println(toRemove.size() + " \"closed\" reactions removed from network."); + } + + //exchange reaction removal + if(exchangeCompToRemove!=null){ + System.err.println("removing external compartment..."); + BioCompartment exchange = network.getCompartment(exchangeCompToRemove); + if(exchange==null){ + System.err.println("Exchange compartment not found, please check provided identifier"); + }else{ + int n = 0; + for (BioEntity e : exchange.getComponentsView()){ + network.removeOnCascade(e); + n++; + } + System.err.println(n + " external species removed from network."); + } + } + + + //remove compounds not in any reactions + if(removeIsolated){ + System.err.println("removing isolated compounds..."); + int n = network.getMetabolitesView().size(); + BioNetworkUtils.removeNotConnectedMetabolites(network); + System.err.println((n-network.getMetabolitesView().size())+" isolated compounds removed from network."); + } + + //merge compartment + BioNetwork newNetwork; + if (mergingStrat == strategy.by_id) { + System.err.print("Merging compartments..."); + CompartmentMerger merger = new CompartmentMerger() + .usePalssonIdentifierConvention(); + newNetwork = merger.merge(network); + System.err.println(" Done."); + }else if (mergingStrat != strategy.by_name) { + System.err.print("Merging compartments..."); + CompartmentMerger merger = new CompartmentMerger() + .setGetUniqIdFunction(BioMetabolite::getName); + newNetwork = merger.merge(network); + System.err.println(" Done."); + }else{ + newNetwork = network; + } + + //remove duplicated reactions + if(removeDuplicated){ + System.err.println("removing duplicated reactions..."); + int n = network.getReactionsView().size(); + BioNetworkUtils.removeDuplicatedReactions(newNetwork,true); + System.err.println((n-network.getMetabolitesView().size())+" duplicated reactions removed from network."); + } + + //print info + System.out.println("\n\n\tcompartments:\t"+newNetwork.getCompartmentsView().size()); + System.out.println("\tmetabolites:\t"+newNetwork.getMetabolitesView().size()); + System.out.println("\treactions:\t"+newNetwork.getReactionsView().size()); + System.out.println("\tenzymes:\t"+newNetwork.getEnzymesView().size()); + System.out.println("\tgenes:\t"+newNetwork.getGenesView().size()); + System.out.println("\tprotein:\t"+newNetwork.getProteinsView().size()); + System.out.println("\tpathway:\t"+newNetwork.getPathwaysView().size()+"\n\n"); + + //export network + System.out.print("Exporting..."); + new JsbmlWriter(outputPath,newNetwork).write(); + System.out.println(" Done."); + return; + } + + @Override + public String getLabel() { + return this.getClass().getSimpleName(); + } + + @Override + public String getLongDescription() { + return "General SBML model processing including compound removal (such as side compounds or isolated compounds), reaction removal (ex. blocked or exchange reaction), and compartments merging"; + } + + @Override + public String getShortDescription() { + return "General SBML model processing"; + } +}