Exemplo n.º 1
0
 @Test
 public final void testAutoName() {
   Model model = BioPAXLevel.L3.getDefaultFactory().createModel();
   Provenance pro = model.addNew(Provenance.class, "http://identifiers.org/pid.pathway/");
   pro.setStandardName("foo");
   Normalizer.autoName(pro);
   assertNotNull(pro.getStandardName());
   assertTrue(pro.getName().contains("PID"));
   assertTrue(pro.getName().contains("NCI_Nature curated"));
   assertFalse(pro.getStandardName().equals("foo"));
 }
Exemplo n.º 2
0
  @Test
  public final void testNormalize() throws UnsupportedEncodingException {
    // Note: a UniProt AC version (e.g. P68250.1 .. P68250.94) is not the same thing as isoform ID!

    Model model = BioPAXLevel.L3.getDefaultFactory().createModel();
    Xref ref = model.addNew(UnificationXref.class, "Xref1");
    ref.setDb("uniprotkb");
    ref.setId("P68250");
    ProteinReference pr = model.addNew(ProteinReference.class, "ProteinReference1");
    pr.setDisplayName("ProteinReference1");
    pr.addXref(ref);
    ref = model.addNew(RelationshipXref.class, "Xref2");
    ref.setDb("refseq");
    ref.setId("NP_001734");
    ref.setIdVersion(
        "1"); // this xref won't be removed by norm. (version matters in xrefs comparing!)
    pr.addXref(ref);
    ref = model.addNew(UnificationXref.class, "Xref3");
    ref.setDb("uniprotkb"); // will be converted to 'uniprot knowledgebase'
    /* The following ID is the secondary accession of P68250,
     * but Normalizer won't complain (it's Validator's and - later - Merger's job)!
     * However, if it were P68250, the normalize(model) would throw exception
     * (because ProteinReference1 becomes ProteinReference2, both get RDFId= urn:miriam:uniprot:P68250!)
     */
    ref.setId("Q0VCL1");
    Xref uniprotX = ref;

    pr = model.addNew(ProteinReference.class, "ProteinReference2");
    pr.setDisplayName("ProteinReference2");
    pr.addXref(uniprotX);
    ref = model.addNew(RelationshipXref.class, "Xref4");
    ref.setDb("refseq");
    ref.setId("NP_001734");
    pr.addXref(ref);

    // this ER is duplicate (same uniprot xref as ProteinReference2's) and must be removed by
    // normalizer
    pr = model.addNew(ProteinReference.class, "ProteinReference3");
    pr.setDisplayName("ProteinReference3");
    pr.addXref(uniprotX);
    ref = model.addNew(RelationshipXref.class, "Xref5");
    ref.setDb("refseq");
    ref.setId("NP_001734");
    pr.addXref(ref);

    // normalizer won't merge diff. types of xref with the same db:id
    ref = model.addNew(PublicationXref.class, "Xref6");
    ref.setDb("pubmed");
    ref.setId("2549346"); // the same id
    pr.addXref(ref);
    ref = model.addNew(RelationshipXref.class, "Xref7");
    ref.setDb("pubmed");
    ref.setId("2549346"); // the same id
    pr.addXref(ref);

    // add biosource
    ref = model.addNew(UnificationXref.class, "Xref8");
    ref.setDb("taxonomy");
    ref.setId("10090"); // the same id
    BioSource bioSource = model.addNew(BioSource.class, "BioSource_Mouse_Tissue");
    bioSource.addXref((UnificationXref) ref);

    // Provenance (must set ID and standard names from a name)
    Provenance pro1 = model.addNew(Provenance.class, "pid");
    pro1.addName("nci_nature"); // must be case insensitive (recognized)
    pro1.setStandardName("foo"); // must be replaced
    // Provenance (must create names from urn)
    Provenance pro2 = model.addNew(Provenance.class, "http://identifiers.org/signaling-gateway/");

    // add some entities with props
    Pathway pw1 = model.addNew(Pathway.class, "pathway");
    pw1.addDataSource(pro1);
    pw1.setStandardName("Pathway");
    Pathway pw2 = model.addNew(Pathway.class, "sub_pathway");
    pw2.setStandardName("Sub-Pathway");
    pw2.addDataSource(pro2);
    pw1.addPathwayComponent(pw2);

    // add data to test uniprot isoform xref and PR normalization
    ref = model.addNew(UnificationXref.class, "Xref9");
    ref.setDb("UniProt"); // normalizer will change it to "uniprot isoform"
    ref.setId("P68250-2");
    pr = model.addNew(ProteinReference.class, "ProteinReference4");
    pr.setDisplayName("ProteinReference1isoformA");
    pr.addXref(ref);

    // next ones are to test normalizer can auto-fix 'uniprot' to 'uniprot isoform' xref,
    // and also merge xrefs #9,#10 and PRs #4,#5 into one PR with one xref
    // below, uniprot xref's idVersion='2' will be moved back to the id value, and db set to
    // "UniProt Isoform" -
    ref = model.addNew(UnificationXref.class, "Xref10");
    ref.setDb(
        "UniProtKb"); // NOT to be replaced with "UniProt Isoform" (version and isoform # are not
    // the same thing)
    ref.setId("P68250");
    ref.setIdVersion("2"); // may be lost after merging with two other P68250 xrefs
    // (version is not the same as isoform, unless db name is 'uniprot isoform')
    pr = model.addNew(ProteinReference.class, "ProteinReference5");
    pr.setDisplayName("ProteinReference1isoformB");
    pr.addXref(ref);

    // Following three Xrefs and PRs will be normalized to uniprot.isoform:P68250-1 and merged into
    // one
    ref = model.addNew(UnificationXref.class, "Xref11");
    ref.setDb("UniProtKb"); // will be replaced with "uniprot isoform"
    ref.setId("P68250-1");
    pr = model.addNew(ProteinReference.class, "ProteinReference6");
    pr.addXref(ref);
    ref = model.addNew(UnificationXref.class, "Xref12");
    ref.setDb("UniProt Isoform"); // because this is standard (isoform) db name (special case) ->
    ref.setId("P68250"); // - this id will set to "P68250-1",
    ref.setIdVersion("1"); // - and idVersion will be cleared!
    pr = model.addNew(ProteinReference.class, "ProteinReference7");
    pr.addXref(ref);
    ref = model.addNew(UnificationXref.class, "Xref13");
    ref.setDb("UniProt Isoform");
    ref.setId("P68250-1");
    pr = model.addNew(ProteinReference.class, "ProteinReference8");
    pr.addXref(ref);

    // special dangling UXs to test/catch a weird bug that accidentally makes db='uniprot
    // isoform'...
    UnificationXref ux = model.addNew(UnificationXref.class, "UniprotUX1");
    ux.setDb("uniprot");
    ux.setId("W0C7J9");
    ux = model.addNew(UnificationXref.class, "UniprotUX2");
    ux.setDb("uniprot");
    ux.setId("W0C7J9.1"); // NOT to be changed to 'uniprot isoform'
    ux = model.addNew(UnificationXref.class, "UniprotUX3");
    ux.setDb("uniprot");
    ux.setId("W0C7J9"); // NOT to be changed to 'uniprot isoform'
    ux.setIdVersion("1");
    ux = model.addNew(UnificationXref.class, "UniprotUX4");
    ux.setDb("uniprot"); // will be changed to 'uniprot isoform'
    ux.setId("W0C7J9-1");
    ux = model.addNew(UnificationXref.class, "UniprotUX5");
    ux.setDb("uniprot"); // will be changed to 'uniprot isoform'
    ux.setId("P68250-3");

    // go normalize!
    Normalizer normalizer = new Normalizer();
    normalizer.normalize(model);

    //		//tmp test print
    //		ByteArrayOutputStream out = new ByteArrayOutputStream();
    //		simpleIO.convertToOWL(model, out);
    //		System.out.println(out.toString());

    // test for a bug that causes db='uniprot' become 'uniprot isoform' (the id matches both
    // patterns)
    assertTrue(model.containsID("UnificationXref_uniprot_knowledgebase_W0C7J9"));
    assertFalse(model.containsID("UnificationXref_uniprot_isoform_W0C7J9"));
    assertTrue(model.containsID("UnificationXref_uniprot_knowledgebase_W0C7J9_1"));
    assertTrue(model.containsID("UnificationXref_uniprot_isoform_W0C7J9-1"));
    assertTrue(model.containsID("UnificationXref_uniprot_isoform_P68250-3"));
    assertTrue(model.containsID("UnificationXref_uniprot_knowledgebase_W0C7J9"));

    // check Xref
    String normUri = Normalizer.uri(model.getXmlBase(), "uniprot", "P68250", UnificationXref.class);
    BioPAXElement bpe = model.getByID(normUri);
    assertTrue(bpe instanceof UnificationXref);

    // check PR
    bpe = model.getByID("http://identifiers.org/uniprot/Q0VCL1");
    assertTrue(bpe instanceof ProteinReference);
    assertTrue(
        model.containsID(
            "Xref7")); // RX is not normalized unless (by mistake) it has identifiers.org uri

    // test BioSource
    assertFalse(model.containsID("BioSource_Mouse_Tissue"));
    bpe =
        model.getByID(
            Normalizer.uri(
                model.getXmlBase(),
                "taxonomy",
                "10090",
                BioSource.class)); // "taxonomy" - capitalization can be any
    assertTrue(bpe instanceof BioSource);
    normUri = Normalizer.uri(model.getXmlBase(), "taxonomy", "10090", UnificationXref.class);
    bpe = model.getByID(normUri);
    assertTrue(bpe instanceof UnificationXref);

    // test that one of each pair ProteinReference, 2nd,3rd and 4th,5th is removed/merged:
    assertEquals(4, model.getObjects(ProteinReference.class).size());

    // Provenance is no more normalized (Miriam is not enough for this task)!
    assertEquals(2, model.getObjects(Provenance.class).size());

    // dataSource property is not inferred/inherited from pw1 anymore (prop. inference feature was
    // removed)
    pw2 = (Pathway) model.getByID("sub_pathway");
    assertEquals(1, pw2.getDataSource().size());
    pw1 = (Pathway) model.getByID("pathway");
    assertEquals(1, pw1.getDataSource().size());

    // test uniprot isoform xrefs are detected and normalized the same way
    // get the expected xref URI first
    normUri =
        Normalizer.uri(model.getXmlBase(), "uniprot isoform", "P68250-2", UnificationXref.class);
    bpe = model.getByID(normUri);
    assertNotNull(bpe);
    assertEquals(1, ((Xref) bpe).getXrefOf().size());
  }