public static void main(String[] args) { Point3d[] pts = SecondaryStructTools.obtain(NAME); if (smooth) pts = CS.getSmoothedPoints(pts); SecondaryStruct s = new SecondaryStruct(pts, smooth); File f = new File("data/smooth_" + NAME + ".pdb"); Chain c = new ChainImpl(); c.setChainID(NAME.split("\\.")[1]); try (PrintWriter pw = new PrintWriter(f)) { for (int i = 0; i < pts.length; i++) { Atom a = new AtomImpl(); a = new AtomImpl(); a.setName(CA_NAME); a.setAltLoc(' '); Group g = new AminoAcidImpl(); g.setPDBName(GROUP_NAME); g.addAtom(a); g.setResidueNumber(chainID, i + 1, null); c.addGroup(g); a.setX(pts[i].x); a.setY(pts[i].y); a.setZ(pts[i].z); pw.print(a.toPDB()); } } catch (FileNotFoundException e) { e.printStackTrace(); } // SecondaryStructureSequenceFeature sf = s.getSequenceFeature(); System.out.println("Start"); System.out.println(NAME); System.out.println(s.getAlphaLength()); try (Scanner scan = new Scanner(System.in)) { String in; while (!(in = scan.next()).equals("X")) { if (in.equals("g")) { int st = scan.nextInt(); System.out.println( SecondaryStructTools.distsToString(s.getRange(st - 1, scan.nextInt()), st)); } else if (in.equals("a")) s.printHelices(); else if (in.equals("b")) s.printStrands(); else if (in.equals("l")) System.out.println(s.length()); else if (in.equals("c")) s.printPoints(); // else if (in.equals("sf")) // for (int i = 0; i < s.length(); i++) // System.out.println((i + 1) + ":\t" + sf.toString(i)); else if (in.equals("test")) s.printVectors(s.getAlpha().getFeatures()); else if (in.equals("test1")) System.out.println("=(0,0,0)\t=" + s.normP + "*50\t=" + s.normX + "*50"); else if (in.equals("test2")) SecondaryStruct.printProjection(s.getAlphaNormProjection((byte) 0b00000000)); } } // sc.close(); }
/** * Makes dummy CA atoms at 1A intervals. Only the x coordinate increments by one at each * consecutive Atom. */ private Atom[] makeDummyCA(int len) { Atom[] ca1; Chain chain1 = new ChainImpl(); ca1 = new Atom[len]; for (int i = 0; i < len; i++) { ca1[i] = new AtomImpl(); ca1[i].setName("CA"); ca1[i].setCoords(new double[] {i, 0, 0}); Group aa = new AminoAcidImpl(); aa.setPDBName("GLY"); aa.setResidueNumber(ResidueNumber.fromString(i + "")); aa.addAtom(ca1[i]); chain1.addGroup(aa); } return ca1; }
private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) { Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>(); for (ModifiedCompound mc : modComps) { identifiedComps.addAll(mc.getGroups(true)); } // TODO: use the ModifiedAminoAcid after Andreas add that. for (Group group : residues) { if (group.getType().equals(GroupType.HETATM)) { StructureGroup strucGroup = StructureUtil.getStructureGroup(group, true); if (!identifiedComps.contains(strucGroup)) { unidentifiableModifiedResidues.add(strucGroup); } } } }
/** * Record unidentifiable atom linkages in a chain. Only linkages between two residues or one * residue and one ligand will be recorded. */ private void recordUnidentifiableAtomLinkages( List<ModifiedCompound> modComps, List<Group> ligands) { // first put identified linkages in a map for fast query Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>(); for (ModifiedCompound mc : modComps) { identifiedLinkages.addAll(mc.getAtomLinkages()); } // record // cross link int nRes = residues.size(); for (int i = 0; i < nRes - 1; i++) { Group group1 = residues.get(i); for (int j = i + 1; j < nRes; j++) { Group group2 = residues.get(j); List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, true, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], true); unidentifiableAtomLinkages.add(link); } } } // attachment int nLig = ligands.size(); for (int i = 0; i < nRes; i++) { Group group1 = residues.get(i); for (int j = 0; j < nLig; j++) { Group group2 = ligands.get(j); if (group1.equals(group2)) { // overlap between residues and ligands continue; } List<Atom[]> linkages = StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance); for (Atom[] atoms : linkages) { StructureAtomLinkage link = StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], false); unidentifiableAtomLinkages.add(link); } } } }
/** * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object. * * @param domain a SCOP domain * @param scopDatabase A {@link ScopDatabase} to use * @param strictLigandHandling If set to false, hetero-atoms are included if and only if they * belong to a chain to which the SCOP domain belongs; if set to true, hetero-atoms are * included if and only if they are strictly within the definition (residue numbers) of the * SCOP domain * @return a Structure object * @throws IOException * @throws StructureException */ public Structure getStructureForDomain( ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling) throws IOException, StructureException { String pdbId = domain.getPdbId(); Structure fullStructure = getStructureForPdbId(pdbId); Structure structure = domain.reduce(fullStructure); // TODO It would be better to move all of this into the reduce method, // but that would require ligand handling properties in StructureIdentifiers // because ligands sometimes occur after TER records in PDB files, we may need to add some // ligands back in // specifically, we add a ligand if and only if it occurs within the domain AtomPositionMap map = null; List<ResidueRangeAndLength> rrs = null; if (strictLigandHandling) { map = new AtomPositionMap( StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER); rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map); } for (Chain chain : fullStructure.getChains()) { if (!structure.hasChain(chain.getChainID())) { continue; // we can't do anything with a chain our domain } // doesn't contain Chain newChain = structure.getChainByPDB(chain.getChainID()); List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups()); for (Group group : ligands) { boolean shouldContain = true; if (strictLigandHandling) { shouldContain = false; // whether the ligand occurs within the domain for (ResidueRange rr : rrs) { if (rr.contains(group.getResidueNumber(), map)) { shouldContain = true; } } } boolean alreadyContains = newChain.getAtomGroups().contains(group); // we don't want to add duplicate // ligands if (shouldContain && !alreadyContains) { newChain.addGroup(group); } } } // build a more meaningful description for the new structure StringBuilder header = new StringBuilder(); header.append(domain.getClassificationId()); if (scopDatabase != null) { int sf = domain.getSuperfamilyId(); ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf); if (description != null) { header.append(" | "); header.append(description.getDescription()); } } structure.getPDBHeader().setDescription(header.toString()); return structure; }
private static int append(SequenceFile.Writer writer, String pdbId, Structure s) throws IOException { int chainCount = 0; for (Chain c : s.getChains()) { List<Group> groups = c.getSeqResGroups(GroupType.AMINOACID); System.out.println("seq len: " + c.getSeqResSequence().length()); System.out.println("seqresgroups.size(): " + c.getSeqResGroups(GroupType.AMINOACID).size()); List<Group> nAcids = c.getSeqResGroups(GroupType.NUCLEOTIDE); boolean aminoAcid = true; if (nAcids.size() > groups.size()) { groups = nAcids; aminoAcid = false; } int dna = 0; int rna = 0; int peptide = 0; int dPeptide = 0; int unknownResidues = 0; Point3d[] coords = new Point3d[groups.size() * 3]; Integer[] sequence = new Integer[groups.size()]; int gaps = 0; for (int i = 0; i < groups.size(); i++) { Group g = groups.get(i); char code = g.getChemComp().getOne_letter_code().charAt(0); if (code == 'X') { unknownResidues++; } sequence[i] = (int) code; PolymerType p = g.getChemComp().getPolymerType(); if (p.equals(PolymerType.peptide)) { peptide++; } else if (p.equals(PolymerType.dpeptide)) { dPeptide++; } else if (p.equals(PolymerType.dna)) { dna++; } else if (p.equals(PolymerType.rna)) { rna++; } Atom atom = null; Atom atomN = null; Atom atomC = null; if (aminoAcid) { atom = ((AminoAcidImpl) g).getCA(); atomN = ((AminoAcidImpl) g).getN(); atomC = ((AminoAcidImpl) g).getC(); } else { atom = ((NucleotideImpl) g).getP(); } if (atom == null || atomN == null || atomC == null) { gaps++; } else { coords[i * 3] = new Point3d(atom.getCoords()); coords[i * 3 + 1] = new Point3d(atomN.getCoords()); coords[i * 3 + 2] = new Point3d(atomC.getCoords()); } } // ignore chains with less than 10 residues (with coordinates) // System.out.println("size: " + (groups.size()-gaps)); if (groups.size() - gaps < 10) { continue; } if (unknownResidues > (groups.size() - gaps) / 2) { System.err.println("Polymer with many unknown residues ignored: " + pdbId + c.getChainID()); continue; } // ignore any mixed polymer types if (dPeptide > 0) { System.err.println("d-peptide ignored: " + pdbId + c.getChainID()); continue; } if (dna > 0 && rna > 0) { System.err.println("DNA/RNA hybrid ignored: " + pdbId + c.getChainID()); continue; } // determine polymer type SimplePolymerType polymerType = null; if (peptide > 0) { polymerType = SimplePolymerType.PROTEIN; // System.out.println("PROTEIN: " + dna); } else if (dna > 0) { polymerType = SimplePolymerType.DNA; // System.out.println("DNA: " + dna); } else if (rna > 0) { polymerType = SimplePolymerType.RNA; // System.out.println("RNA: " + rna); } else { continue; } chainCount++; Text key1 = new Text(pdbId + "." + c.getChainID()); ArrayWritable value1 = new IntArrayWritable(); value1.set( SimplePolymerChainCodecHL.encodePolymerChain( polymerType.ordinal(), coords, sequence, gaps)); writer.append(key1, value1); } return chainCount; }
/** Get matched atoms for all linkages. */ private List<List<Atom[]>> getMatchedAtomsOfLinkages( ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) { List<ModificationLinkage> linkages = condition.getLinkages(); int nLink = linkages.size(); List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink); for (int iLink = 0; iLink < nLink; iLink++) { ModificationLinkage linkage = linkages.get(iLink); Component comp1 = linkage.getComponent1(); Component comp2 = linkage.getComponent2(); // boolean isAA1 = comp1.; // boolean isAA2 = comp2.getType()==true; Set<Group> groups1 = mapCompGroups.get(comp1); Set<Group> groups2 = mapCompGroups.get(comp2); List<Atom[]> list = new ArrayList<Atom[]>(); List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1(); for (String name : potentialNamesOfAtomOnGroup1) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup1 = null; // search all atoms break; } } List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2(); for (String name : potentialNamesOfAtomOnGroup2) { if (name.equals("*")) { // wildcard potentialNamesOfAtomOnGroup2 = null; // search all atoms break; } } for (Group g1 : groups1) { for (Group g2 : groups2) { if (g1.equals(g2)) { continue; } // only for wildcard match of two residues boolean ignoreNCLinkage = potentialNamesOfAtomOnGroup1 == null && potentialNamesOfAtomOnGroup2 == null && residues.contains(g1) && residues.contains(g2); Atom[] atoms = StructureUtil.findNearestAtomLinkage( g1, g2, potentialNamesOfAtomOnGroup1, potentialNamesOfAtomOnGroup2, ignoreNCLinkage, bondLengthTolerance); if (atoms != null) { list.add(atoms); } } } if (list.isEmpty()) { // broken linkage break; } matchedAtomsOfLinkages.add(list); } return matchedAtomsOfLinkages; }
/** * @param modifications a set of {@link ProteinModification}s. * @param residues * @param ligands * @param saveTo save result to * @return map from component to list of corresponding residues in the chain. */ private void addModificationGroups( final Set<ProteinModification> modifications, final List<Group> residues, final List<Group> ligands, final Map<Component, Set<Group>> saveTo) { if (residues == null || ligands == null || modifications == null) { throw new IllegalArgumentException("Null argument(s)."); } Map<Component, Set<Component>> mapSingleMultiComps = new HashMap<Component, Set<Component>>(); for (ProteinModification mod : modifications) { ModificationCondition condition = mod.getCondition(); for (Component comp : condition.getComponents()) { for (String pdbccId : comp.getPdbccIds()) { Component single = Component.of(Collections.singleton(pdbccId), comp.isNTerminal(), comp.isCTerminal()); Set<Component> mult = mapSingleMultiComps.get(single); if (mult == null) { mult = new HashSet<Component>(); mapSingleMultiComps.put(single, mult); } mult.add(comp); } } } { // ligands Set<Component> ligandsWildCard = mapSingleMultiComps.get(Component.of("*")); for (Group group : ligands) { String pdbccId = group.getPDBName().trim(); Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId)); for (Component comp : unionComponentSet(ligandsWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(group); } } } { // residues if (residues.isEmpty()) { return; } Set<Component> residuesWildCard = mapSingleMultiComps.get(Component.of("*")); // for all residues for (Group group : residues) { String pdbccId = group.getPDBName().trim(); Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId)); for (Component comp : unionComponentSet(residuesWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(group); } } // for N-terminal int nRes = residues.size(); int iRes = 0; Group res; do { // for all ligands on N terminal and the first residue res = residues.get(iRes++); Set<Component> nTermWildCard = mapSingleMultiComps.get(Component.of("*", true, false)); Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), true, false)); for (Component comp : unionComponentSet(nTermWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(res); } } while (iRes < nRes && ligands.contains(res)); // for C-terminal iRes = residues.size() - 1; do { // for all ligands on C terminal and the last residue res = residues.get(iRes--); Set<Component> cTermWildCard = mapSingleMultiComps.get(Component.of("*", false, true)); Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), false, true)); for (Component comp : unionComponentSet(cTermWildCard, comps)) { Set<Group> gs = saveTo.get(comp); if (gs == null) { gs = new LinkedHashSet<Group>(); saveTo.put(comp, gs); } gs.add(res); } } while (iRes >= 0 && ligands.contains(res)); } }