private String getSubstrateAccession( Matcher patternSubstrateAccession, SubstrateDatabaseEntry subtratedatabase) { String accession = null; if (patternSubstrateAccession.find()) { accession = patternSubstrateAccession.group(1); accession = accession.trim(); subtratedatabase.setS_UniprotID(accession); // System.out.println(accession); } else { accession = "n.d."; subtratedatabase.setS_UniprotID(accession); // System.out.println(accession); } return accession; }
private String getSubstrateNameSymbolId( Matcher patternSubstrateName, SubstrateDatabaseEntry substratedatabase, CsDatabaseEntry csdatabase, String entry, String substrateTaxon) throws IOException { String commentS = null; Matcher patternSubstrateAccession = getPatternmatcher( "UniProt\\s+Accession:</th>[^<]+<td><a\\s+href\\s+=\\s+\"[^\"]+\"\\s+target=\"[^\"]+\">" + "([^<]+)", entry); String accession = getSubstrateAccession(patternSubstrateAccession, substratedatabase); Matcher patternSubstrateSymbol = getPatternmatcher( "Substrate[^<]+</th>[^<]+<td>[^<]+<table>[^<]+<tr>[^<]+<th\\s+class=\"th3\">Definition:</th>[^<]+<td><b>" + "[^<]+" + "</b></td>[^<]+<tr>[^<]+<th\\s+class=\"th3\">Symbol:</th>" + "([^<]+<td><b>)?([^<]+)", entry); String symbol = getSubstrateSymbol(patternSubstrateSymbol, substratedatabase); if (patternSubstrateName.find()) { String Substratename = "to check"; String Substratesymbol = "to check"; String Substrateaccession = "to check"; substratedatabase.setS_NL_Name(Substratename); substratedatabase.setS_Name(Substratename); substratedatabase.setS_Symbol(Substratesymbol); substratedatabase.setS_UniprotID(Substrateaccession); Substratename = patternSubstrateName.group(1); Substratename = Substratename.trim(); Substratename = Substratename.replaceAll(",", ""); Substratename = Substratename.replaceAll(";", ""); substratedatabase.setS_NL_Name(Substratename); commentS = "Check Substrate Symbol and Accession; add to Substrate Librairy"; BufferedReader bReader = null; if (substrateTaxon.contains("H**o")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateHSALibrairy.txt"); } else if (substrateTaxon.contains("Mus")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateMMULibrairy.txt"); } else if (substrateTaxon.contains("Rattus")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateRNOLibrairy.txt"); } String line; while ((line = bReader.readLine()) != null) { String splitarray[] = line.split("\t"); String naturallanguage = splitarray[1]; naturallanguage = naturallanguage.replaceAll("\"", ""); naturallanguage = naturallanguage.replaceAll(",", ""); naturallanguage = naturallanguage.replaceAll(";", ""); if (naturallanguage.equalsIgnoreCase(Substratename)) { Substratesymbol = splitarray[0]; Substratesymbol = Substratesymbol.replaceAll("sept-0", "SEPT"); Substrateaccession = splitarray[2]; if (Substrateaccession.contains("n.d.")) { substratedatabase.setS_Name("n.d."); substratedatabase.setS_UniprotID(Substrateaccession); substratedatabase.setS_Symbol(Substratesymbol); } else { String UniprotURL = "http://www.uniprot.org/uniprot/" + Substrateaccession + ".xml"; NodeList entries = getEntries("/uniprot/entry", parseUniprot(UniprotURL)); for (int i = 0; i < entries.getLength(); i++) { getUniSubstratepproteinname(entries, i, substratedatabase); String genename = getUniSubstrategenename(entries, i, substratedatabase); } // System.out.println(Substrateaccession); substratedatabase.setS_UniprotID(Substrateaccession); commentS = "-"; System.out.println(commentS); } } csdatabase.setSubstrate(substratedatabase); } } else if (!symbol.contains("n.d.")) { String Substratename = "to check"; String Substratesymbol = "to check"; String Substrateaccession = "to check"; substratedatabase.setS_NL_Name(Substratename); substratedatabase.setS_Name(Substratename); substratedatabase.setS_Symbol(Substratesymbol); substratedatabase.setS_UniprotID(Substrateaccession); BufferedReader bReader = null; if (substrateTaxon.contains("H**o")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateHSALibrairy.txt"); } else if (substrateTaxon.contains("Mus")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateMMULibrairy.txt"); } else if (substrateTaxon.contains("Rattus")) { bReader = createBufferedreader( "/Users/julieklein/Dropbox/ProteasiX/LIBRAIRIES/SubstrateRNOLibrairy.txt"); } String line; while ((line = bReader.readLine()) != null) { String splitarray[] = line.split("\t"); String librairisymbol = splitarray[0]; librairisymbol = librairisymbol.replaceAll("\"", ""); librairisymbol = librairisymbol.replaceAll("sept-0", "SEPT"); if (librairisymbol.equals(symbol)) { Substrateaccession = splitarray[2]; if (Substrateaccession.contains("n.d.")) { substratedatabase.setS_Name("n.d."); substratedatabase.setS_UniprotID(Substrateaccession); substratedatabase.setS_Symbol(symbol); } else { String UniprotURL = "http://www.uniprot.org/uniprot/" + Substrateaccession + ".xml"; NodeList entries = getEntries("/uniprot/entry", parseUniprot(UniprotURL)); for (int i = 0; i < entries.getLength(); i++) { getUniSubstratepproteinname(entries, i, substratedatabase); String genename = getUniSubstrategenename(entries, i, substratedatabase); } // System.out.println(Substrateaccession); substratedatabase.setS_UniprotID(Substrateaccession); commentS = "-"; System.out.println(commentS); } } csdatabase.setSubstrate(substratedatabase); } } else { String Substratename = "n.d."; String Substratesymbol = "n.d."; String Substrateaccession = "n.d"; substratedatabase.setS_NL_Name(Substratename); substratedatabase.setS_Name(Substratename); substratedatabase.setS_Symbol(Substratesymbol); substratedatabase.setS_UniprotID(Substrateaccession); System.out.println(Substratename); System.out.println(Substratesymbol); System.out.println(Substrateaccession); commentS = "-"; System.out.println(commentS); csdatabase.setSubstrate(substratedatabase); } return commentS; }