public static void main(String[] args) { Point3d[] pts = SecondaryStructTools.obtain(NAME); if (smooth) pts = CS.getSmoothedPoints(pts); SecondaryStruct s = new SecondaryStruct(pts, smooth); File f = new File("data/smooth_" + NAME + ".pdb"); Chain c = new ChainImpl(); c.setChainID(NAME.split("\\.")[1]); try (PrintWriter pw = new PrintWriter(f)) { for (int i = 0; i < pts.length; i++) { Atom a = new AtomImpl(); a = new AtomImpl(); a.setName(CA_NAME); a.setAltLoc(' '); Group g = new AminoAcidImpl(); g.setPDBName(GROUP_NAME); g.addAtom(a); g.setResidueNumber(chainID, i + 1, null); c.addGroup(g); a.setX(pts[i].x); a.setY(pts[i].y); a.setZ(pts[i].z); pw.print(a.toPDB()); } } catch (FileNotFoundException e) { e.printStackTrace(); } // SecondaryStructureSequenceFeature sf = s.getSequenceFeature(); System.out.println("Start"); System.out.println(NAME); System.out.println(s.getAlphaLength()); try (Scanner scan = new Scanner(System.in)) { String in; while (!(in = scan.next()).equals("X")) { if (in.equals("g")) { int st = scan.nextInt(); System.out.println( SecondaryStructTools.distsToString(s.getRange(st - 1, scan.nextInt()), st)); } else if (in.equals("a")) s.printHelices(); else if (in.equals("b")) s.printStrands(); else if (in.equals("l")) System.out.println(s.length()); else if (in.equals("c")) s.printPoints(); // else if (in.equals("sf")) // for (int i = 0; i < s.length(); i++) // System.out.println((i + 1) + ":\t" + sf.toString(i)); else if (in.equals("test")) s.printVectors(s.getAlpha().getFeatures()); else if (in.equals("test1")) System.out.println("=(0,0,0)\t=" + s.normP + "*50\t=" + s.normX + "*50"); else if (in.equals("test2")) SecondaryStruct.printProjection(s.getAlphaNormProjection((byte) 0b00000000)); } } // sc.close(); }
/** * Makes dummy CA atoms at 1A intervals. Only the x coordinate increments by one at each * consecutive Atom. */ private Atom[] makeDummyCA(int len) { Atom[] ca1; Chain chain1 = new ChainImpl(); ca1 = new Atom[len]; for (int i = 0; i < len; i++) { ca1[i] = new AtomImpl(); ca1[i].setName("CA"); ca1[i].setCoords(new double[] {i, 0, 0}); Group aa = new AminoAcidImpl(); aa.setPDBName("GLY"); aa.setResidueNumber(ResidueNumber.fromString(i + "")); aa.addAtom(ca1[i]); chain1.addGroup(aa); } return ca1; }
/** * Returns a {@link Structure} corresponding to the CATH identifier supplied in {@code * structureName}, using the specified {@link CathDatabase}. */ public Structure getStructureForCathDomain(StructureName structureName, CathDatabase cathInstall) throws IOException, StructureException { CathDomain cathDomain = cathInstall.getDomainByCathId(structureName.getIdentifier()); Structure s = getStructureForPdbId(cathDomain.getIdentifier()); Structure n = cathDomain.reduce(s); // add the ligands of the chain... Chain newChain = n.getChainByPDB(structureName.getChainId()); Chain origChain = s.getChainByPDB(structureName.getChainId()); List<Group> ligands = origChain.getAtomLigands(); for (Group g : ligands) { if (!newChain.getAtomGroups().contains(g)) { newChain.addGroup(g); } } return n; }
/** * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. * * @param domain a SCOP domain * @param scopDatabase A {@link ScopDatabase} to use * @param strictLigandHandling If set to false, hetero-atoms are included if and only if they * belong to a chain to which the SCOP domain belongs; if set to true, hetero-atoms are * included if and only if they are strictly within the definition (residue numbers) of the * SCOP domain * @return a Structure object * @throws IOException * @throws StructureException */ public Structure getStructureForDomain( ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) throws IOException, StructureException { String pdbId = domain.getPdbId(); Structure fullStructure = getStructureForPdbId(pdbId); Structure structure = domain.reduce(fullStructure); // TODO It would be better to move all of this into the reduce method, // but that would require ligand handling properties in StructureIdentifiers // because ligands sometimes occur after TER records in PDB files, we may need to add some // ligands back in // specifically, we add a ligand if and only if it occurs within the domain AtomPositionMap map = null; List<ResidueRangeAndLength> rrs = null; if (strictLigandHandling) { map = new AtomPositionMap( StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); } for (Chain chain : fullStructure.getChains()) { if (!structure.hasChain(chain.getChainID())) { continue; // we can't do anything with a chain our domain } // doesn't contain Chain newChain = structure.getChainByPDB(chain.getChainID()); List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); for (Group group : ligands) { boolean shouldContain = true; if (strictLigandHandling) { shouldContain = false; // whether the ligand occurs within the domain for (ResidueRange rr : rrs) { if (rr.contains(group.getResidueNumber(), map)) { shouldContain = true; } } } boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate // ligands if (shouldContain && !alreadyContains) { newChain.addGroup(group); } } } // build a more meaningful description for the new structure StringBuilder header = new StringBuilder(); header.append(domain.getClassificationId()); if (scopDatabase != null) { int sf = domain.getSuperfamilyId(); ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); if (description != null) { header.append(" | "); header.append(description.getDescription()); } } structure.getPDBHeader().setDescription(header.toString()); return structure; }
private static int append(SequenceFile.Writer writer, String pdbId, Structure s) throws IOException { int chainCount = 0; for (Chain c : s.getChains()) { List<Group> groups = c.getSeqResGroups(GroupType.AMINOACID); System.out.println("seq len: " + c.getSeqResSequence().length()); System.out.println("seqresgroups.size(): " + c.getSeqResGroups(GroupType.AMINOACID).size()); List<Group> nAcids = c.getSeqResGroups(GroupType.NUCLEOTIDE); boolean aminoAcid = true; if (nAcids.size() > groups.size()) { groups = nAcids; aminoAcid = false; } int dna = 0; int rna = 0; int peptide = 0; int dPeptide = 0; int unknownResidues = 0; Point3d[] coords = new Point3d[groups.size() * 3]; Integer[] sequence = new Integer[groups.size()]; int gaps = 0; for (int i = 0; i < groups.size(); i++) { Group g = groups.get(i); char code = g.getChemComp().getOne_letter_code().charAt(0); if (code == 'X') { unknownResidues++; } sequence[i] = (int) code; PolymerType p = g.getChemComp().getPolymerType(); if (p.equals(PolymerType.peptide)) { peptide++; } else if (p.equals(PolymerType.dpeptide)) { dPeptide++; } else if (p.equals(PolymerType.dna)) { dna++; } else if (p.equals(PolymerType.rna)) { rna++; } Atom atom = null; Atom atomN = null; Atom atomC = null; if (aminoAcid) { atom = ((AminoAcidImpl) g).getCA(); atomN = ((AminoAcidImpl) g).getN(); atomC = ((AminoAcidImpl) g).getC(); } else { atom = ((NucleotideImpl) g).getP(); } if (atom == null || atomN == null || atomC == null) { gaps++; } else { coords[i * 3] = new Point3d(atom.getCoords()); coords[i * 3 + 1] = new Point3d(atomN.getCoords()); coords[i * 3 + 2] = new Point3d(atomC.getCoords()); } } // ignore chains with less than 10 residues (with coordinates) // System.out.println("size: " + (groups.size()-gaps)); if (groups.size() - gaps < 10) { continue; } if (unknownResidues > (groups.size() - gaps) / 2) { System.err.println("Polymer with many unknown residues ignored: " + pdbId + c.getChainID()); continue; } // ignore any mixed polymer types if (dPeptide > 0) { System.err.println("d-peptide ignored: " + pdbId + c.getChainID()); continue; } if (dna > 0 && rna > 0) { System.err.println("DNA/RNA hybrid ignored: " + pdbId + c.getChainID()); continue; } // determine polymer type SimplePolymerType polymerType = null; if (peptide > 0) { polymerType = SimplePolymerType.PROTEIN; // System.out.println("PROTEIN: " + dna); } else if (dna > 0) { polymerType = SimplePolymerType.DNA; // System.out.println("DNA: " + dna); } else if (rna > 0) { polymerType = SimplePolymerType.RNA; // System.out.println("RNA: " + rna); } else { continue; } chainCount++; Text key1 = new Text(pdbId + "." + c.getChainID()); ArrayWritable value1 = new IntArrayWritable(); value1.set( SimplePolymerChainCodecHL.encodePolymerChain( polymerType.ordinal(), coords, sequence, gaps)); writer.append(key1, value1); } return chainCount; }
/** * Identify a set of modifications in a a list of chains. * * @param chains query {@link Chain}s. * @param potentialModifications query {@link ProteinModification}s. */ public void identify( final List<Chain> chains, final Set<ProteinModification> potentialModifications) { if (chains == null) { throw new IllegalArgumentException("Null structure."); } if (potentialModifications == null) { throw new IllegalArgumentException("Null potentialModifications."); } reset(); if (potentialModifications.isEmpty()) { return; } Map<String, Chain> mapChainIdChain = new HashMap<String, Chain>(chains.size()); residues = new ArrayList<Group>(); List<Group> ligands = new ArrayList<Group>(); Map<Component, Set<Group>> mapCompGroups = new HashMap<Component, Set<Group>>(); for (Chain chain : chains) { mapChainIdChain.put(chain.getChainID(), chain); List<Group> ress = StructureUtil.getAminoAcids(chain); // List<Group> ligs = chain.getAtomLigands(); List<Group> ligs = StructureTools.filterLigands(chain.getAtomGroups()); residues.addAll(ress); residues.removeAll(ligs); ligands.addAll(ligs); addModificationGroups(potentialModifications, ress, ligs, mapCompGroups); } if (residues.isEmpty()) { String pdbId = "?"; if (chains.size() > 0) { Structure struc = chains.get(0).getParent(); if (struc != null) pdbId = struc.getPDBCode(); } logger.warn( "No amino acids found for {}. Either you did not parse the PDB file with alignSEQRES records, or this record does not contain any amino acids.", pdbId); } List<ModifiedCompound> modComps = new ArrayList<ModifiedCompound>(); for (ProteinModification mod : potentialModifications) { ModificationCondition condition = mod.getCondition(); List<Component> components = condition.getComponents(); if (!mapCompGroups.keySet().containsAll(components)) { // not all components exist for this mod. continue; } int sizeComps = components.size(); if (sizeComps == 1) { processCrosslink1(mapCompGroups, modComps, mod, components); } else { processMultiCrosslink(mapCompGroups, modComps, mod, condition); } } if (recordAdditionalAttachments) { // identify additional groups that are not directly attached to amino acids. for (ModifiedCompound mc : modComps) { identifyAdditionalAttachments(mc, ligands, mapChainIdChain); } } mergeModComps(modComps); identifiedModifiedCompounds.addAll(modComps); // record unidentifiable linkage if (recordUnidentifiableModifiedCompounds) { recordUnidentifiableAtomLinkages(modComps, ligands); recordUnidentifiableModifiedResidues(modComps); } }