/** * Attempts to find an EC number for every domain in {@code census}, skipping those which are * already listed in this ECFinder. Prints to {@code output} periodically. */ public void buildFromCensus(CensusResultList census, File output) { ScopDatabase scop = ScopFactory.getSCOP(); CensusSignificance sig = CensusSignificanceFactory.forCeSymmOrd(); int i = 0; for (CensusResult result : census.getEntries()) { try { String scopId = result.getId(); if (this.ecsByAsymmDomain.containsKey(scopId) || this.ecsBySymmDomain.containsKey(scopId) || ecsByUnknownDomain.contains(scopId)) { continue; } ScopDomain domain = scop.getDomainByScopID(scopId); if (domain == null) { logger.error(result.getId() + " is null"); continue; } // got a result; what's its EC? // we need to find the correct polymers corresponding to the domain // note that this still isn't perfect, since we don't know what part of the polymer actually // does the function List<RCSBPolymer> polymers = new ArrayList<RCSBPolymer>(); Set<String> chains = domain.getChains(); RCSBDescription desc = RCSBDescriptionFactory.get(domain.getPdbId()); for (RCSBPolymer polymer : desc.getPolymers()) { for (Character chain : polymer.getChains()) { if (chains.contains(String.valueOf(chain))) { polymers.add(polymer); break; } } } // get the EC numbers // use a set because we don't want > 1 just because we have duplicates NavigableSet<String> ecs = new TreeSet<String>(); for (RCSBPolymer polymer : polymers) { String ec = polymer.getEnzClass(); if (ec != null) ecs.add(ec); } if (ecs.size() == 1) { String ec = ecs.first(); if (sig.isSignificant(result)) { ecsBySymmDomain.put(scopId, ec); } else { ecsByAsymmDomain.put(scopId, ec); } } else if (ecs.size() > 1) { logger.info( "Found different EC numbers for " + domain.getScopId()); // technically, this doesn't mean anything's wrong } else { // logger.debug("Didn't find EC for " + scopId); ecsByUnknownDomain.add(scopId); } if (i > 0 && i % 100 == 0) { print(output); logger.debug("Working on #" + i); } } catch (RuntimeException e) { e.printStackTrace(); logger.error(e); } finally { i++; } } }
public void rebuild(File file, File output) throws IOException { buildFromCensus(CensusResultList.fromXML(file), output); }