/** * Loads the biological assembly for a given PDB ID and bioAssemblyId. If a bioAssemblyId > 0 is * specified, the corresponding biological assembly file will be loaded. Note, the number of * available biological unit files varies. Many entries don't have a biological assembly specified * (i.e. NMR structures), many entries have only one biological assembly (bioAssemblyId=1), and a * few structures have multiple biological assemblies. Set bioAssemblyFallback to true, to * download the original PDB file in cases that a biological assembly file is not available. * * @param pdbId the PDB ID * @param bioAssemblyId the 1-based index of the biological assembly (0 gets the asymmetric unit) * @param bioAssemblyFallback if true, try reading original PDB file in case the biological * assembly file is not available * @return a structure object * @throws IOException * @throws StructureException * @author Peter Rose * @since 3.2 */ public Structure getBiologicalAssembly( String pdbId, int bioAssemblyId, boolean bioAssemblyFallback) throws StructureException, IOException { if (bioAssemblyId < 0) { throw new StructureException( "bioAssemblyID must be nonnegative: " + pdbId + " bioAssemblyId " + bioAssemblyId); } Structure s = StructureIO.getBiologicalAssembly(pdbId, bioAssemblyId, this); if (s == null && bioAssemblyFallback) return StructureIO.getBiologicalAssembly(pdbId, 0, this); return s; }
public static void toSequenceFile(String fileName, Collection<String> pdbIds, boolean verbose) throws IOException { int failure = 0; int success = 0; int chains = 0; try (SequenceFile.Writer writer = SequenceFile.createWriter( new Configuration(), SequenceFile.Writer.file(new Path(fileName)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(IntArrayWritable.class), SequenceFile.Writer.compression( SequenceFile.CompressionType.BLOCK, new BZip2Codec())); ) { for (String pdbId : pdbIds) { if (verbose) { System.out.println(pdbId); } Structure s = null; try { s = StructureIO.getStructure(pdbId); success++; } catch (Exception e) { // some files can't be read. Let's just skip those! e.printStackTrace(); failure++; continue; } if (s == null) { System.err.println("structure null: " + pdbId); continue; } if (s.getChains().size() == 0) { continue; } chains += append(writer, pdbId, s); } IOUtils.closeStream(writer); } if (verbose) { System.out.println("Total structures: " + pdbIds.size()); System.out.println("Success: " + success); System.out.println("Failure: " + failure); System.out.println("Chains: " + chains); } }
public static void main(String[] args) throws IOException { String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime()); String uri = args[0] + "_" + timeStamp + ".seq"; Set<String> pdbIds = getAll(); StructureIO.setAtomCache(cache); cache.setPath("/Users/Chris/Documents/RCSB/Data/Protein_chains/cache/"); long start = System.nanoTime(); toSequenceFile(uri, pdbIds, true); long end = System.nanoTime(); System.out.println("Time: " + (end - start) / 1E9 + " sec."); }