예제 #1
0
 public static void main(String[] args) {
   Point3d[] pts = SecondaryStructTools.obtain(NAME);
   if (smooth) pts = CS.getSmoothedPoints(pts);
   SecondaryStruct s = new SecondaryStruct(pts, smooth);
   File f = new File("data/smooth_" + NAME + ".pdb");
   Chain c = new ChainImpl();
   c.setChainID(NAME.split("\\.")[1]);
   try (PrintWriter pw = new PrintWriter(f)) {
     for (int i = 0; i < pts.length; i++) {
       Atom a = new AtomImpl();
       a = new AtomImpl();
       a.setName(CA_NAME);
       a.setAltLoc(' ');
       Group g = new AminoAcidImpl();
       g.setPDBName(GROUP_NAME);
       g.addAtom(a);
       g.setResidueNumber(chainID, i + 1, null);
       c.addGroup(g);
       a.setX(pts[i].x);
       a.setY(pts[i].y);
       a.setZ(pts[i].z);
       pw.print(a.toPDB());
     }
   } catch (FileNotFoundException e) {
     e.printStackTrace();
   }
   // SecondaryStructureSequenceFeature sf = s.getSequenceFeature();
   System.out.println("Start");
   System.out.println(NAME);
   System.out.println(s.getAlphaLength());
   try (Scanner scan = new Scanner(System.in)) {
     String in;
     while (!(in = scan.next()).equals("X")) {
       if (in.equals("g")) {
         int st = scan.nextInt();
         System.out.println(
             SecondaryStructTools.distsToString(s.getRange(st - 1, scan.nextInt()), st));
       } else if (in.equals("a")) s.printHelices();
       else if (in.equals("b")) s.printStrands();
       else if (in.equals("l")) System.out.println(s.length());
       else if (in.equals("c")) s.printPoints();
       // else if (in.equals("sf"))
       // for (int i = 0; i < s.length(); i++)
       // System.out.println((i + 1) + ":\t" + sf.toString(i));
       else if (in.equals("test")) s.printVectors(s.getAlpha().getFeatures());
       else if (in.equals("test1"))
         System.out.println("=(0,0,0)\t=" + s.normP + "*50\t=" + s.normX + "*50");
       else if (in.equals("test2"))
         SecondaryStruct.printProjection(s.getAlphaNormProjection((byte) 0b00000000));
     }
   }
   // sc.close();
 }
 /**
  * Makes dummy CA atoms at 1A intervals. Only the x coordinate increments by one at each
  * consecutive Atom.
  */
 private Atom[] makeDummyCA(int len) {
   Atom[] ca1;
   Chain chain1 = new ChainImpl();
   ca1 = new Atom[len];
   for (int i = 0; i < len; i++) {
     ca1[i] = new AtomImpl();
     ca1[i].setName("CA");
     ca1[i].setCoords(new double[] {i, 0, 0});
     Group aa = new AminoAcidImpl();
     aa.setPDBName("GLY");
     aa.setResidueNumber(ResidueNumber.fromString(i + ""));
     aa.addAtom(ca1[i]);
     chain1.addGroup(aa);
   }
   return ca1;
 }
  private void recordUnidentifiableModifiedResidues(List<ModifiedCompound> modComps) {
    Set<StructureGroup> identifiedComps = new HashSet<StructureGroup>();
    for (ModifiedCompound mc : modComps) {
      identifiedComps.addAll(mc.getGroups(true));
    }

    // TODO: use the ModifiedAminoAcid after Andreas add that.
    for (Group group : residues) {
      if (group.getType().equals(GroupType.HETATM)) {
        StructureGroup strucGroup = StructureUtil.getStructureGroup(group, true);
        if (!identifiedComps.contains(strucGroup)) {
          unidentifiableModifiedResidues.add(strucGroup);
        }
      }
    }
  }
  /**
   * Record unidentifiable atom linkages in a chain. Only linkages between two residues or one
   * residue and one ligand will be recorded.
   */
  private void recordUnidentifiableAtomLinkages(
      List<ModifiedCompound> modComps, List<Group> ligands) {

    // first put identified linkages in a map for fast query
    Set<StructureAtomLinkage> identifiedLinkages = new HashSet<StructureAtomLinkage>();
    for (ModifiedCompound mc : modComps) {
      identifiedLinkages.addAll(mc.getAtomLinkages());
    }

    // record
    // cross link
    int nRes = residues.size();
    for (int i = 0; i < nRes - 1; i++) {
      Group group1 = residues.get(i);
      for (int j = i + 1; j < nRes; j++) {
        Group group2 = residues.get(j);
        List<Atom[]> linkages =
            StructureUtil.findAtomLinkages(group1, group2, true, bondLengthTolerance);
        for (Atom[] atoms : linkages) {
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], true);
          unidentifiableAtomLinkages.add(link);
        }
      }
    }

    // attachment
    int nLig = ligands.size();
    for (int i = 0; i < nRes; i++) {
      Group group1 = residues.get(i);
      for (int j = 0; j < nLig; j++) {
        Group group2 = ligands.get(j);
        if (group1.equals(group2)) { // overlap between residues and ligands
          continue;
        }
        List<Atom[]> linkages =
            StructureUtil.findAtomLinkages(group1, group2, false, bondLengthTolerance);
        for (Atom[] atoms : linkages) {
          StructureAtomLinkage link =
              StructureUtil.getStructureAtomLinkage(atoms[0], true, atoms[1], false);
          unidentifiableAtomLinkages.add(link);
        }
      }
    }
  }
예제 #5
0
  /**
   * Returns the representation of a {@link ScopDomain} as a BioJava {@link Structure} object.
   *
   * @param domain a SCOP domain
   * @param scopDatabase A {@link ScopDatabase} to use
   * @param strictLigandHandling If set to false, hetero-atoms are included if and only if they
   *     belong to a chain to which the SCOP domain belongs; if set to true, hetero-atoms are
   *     included if and only if they are strictly within the definition (residue numbers) of the
   *     SCOP domain
   * @return a Structure object
   * @throws IOException
   * @throws StructureException
   */
  public Structure getStructureForDomain(
      ScopDomain domain, ScopDatabase scopDatabase, boolean strictLigandHandling)
      throws IOException, StructureException {

    String pdbId = domain.getPdbId();
    Structure fullStructure = getStructureForPdbId(pdbId);
    Structure structure = domain.reduce(fullStructure);

    // TODO It would be better to move all of this into the reduce method,
    // but that would require ligand handling properties in StructureIdentifiers

    // because ligands sometimes occur after TER records in PDB files, we may need to add some
    // ligands back in
    // specifically, we add a ligand if and only if it occurs within the domain
    AtomPositionMap map = null;
    List<ResidueRangeAndLength> rrs = null;
    if (strictLigandHandling) {
      map =
          new AtomPositionMap(
              StructureTools.getAllAtomArray(fullStructure), AtomPositionMap.ANYTHING_MATCHER);
      rrs = ResidueRangeAndLength.parseMultiple(domain.getRanges(), map);
    }
    for (Chain chain : fullStructure.getChains()) {
      if (!structure.hasChain(chain.getChainID())) {
        continue; // we can't do anything with a chain our domain
      }
      // doesn't contain
      Chain newChain = structure.getChainByPDB(chain.getChainID());
      List<Group> ligands = StructureTools.filterLigands(chain.getAtomGroups());
      for (Group group : ligands) {
        boolean shouldContain = true;
        if (strictLigandHandling) {
          shouldContain = false; // whether the ligand occurs within the domain
          for (ResidueRange rr : rrs) {
            if (rr.contains(group.getResidueNumber(), map)) {
              shouldContain = true;
            }
          }
        }
        boolean alreadyContains =
            newChain.getAtomGroups().contains(group); // we don't want to add duplicate
        // ligands
        if (shouldContain && !alreadyContains) {
          newChain.addGroup(group);
        }
      }
    }

    // build a more meaningful description for the new structure
    StringBuilder header = new StringBuilder();
    header.append(domain.getClassificationId());
    if (scopDatabase != null) {
      int sf = domain.getSuperfamilyId();
      ScopDescription description = scopDatabase.getScopDescriptionBySunid(sf);
      if (description != null) {
        header.append(" | ");
        header.append(description.getDescription());
      }
    }
    structure.getPDBHeader().setDescription(header.toString());

    return structure;
  }
  private static int append(SequenceFile.Writer writer, String pdbId, Structure s)
      throws IOException {

    int chainCount = 0;

    for (Chain c : s.getChains()) {
      List<Group> groups = c.getSeqResGroups(GroupType.AMINOACID);
      System.out.println("seq len: " + c.getSeqResSequence().length());
      System.out.println("seqresgroups.size(): " + c.getSeqResGroups(GroupType.AMINOACID).size());
      List<Group> nAcids = c.getSeqResGroups(GroupType.NUCLEOTIDE);

      boolean aminoAcid = true;
      if (nAcids.size() > groups.size()) {
        groups = nAcids;
        aminoAcid = false;
      }
      int dna = 0;
      int rna = 0;
      int peptide = 0;
      int dPeptide = 0;
      int unknownResidues = 0;

      Point3d[] coords = new Point3d[groups.size() * 3];
      Integer[] sequence = new Integer[groups.size()];

      int gaps = 0;

      for (int i = 0; i < groups.size(); i++) {
        Group g = groups.get(i);
        char code = g.getChemComp().getOne_letter_code().charAt(0);
        if (code == 'X') {
          unknownResidues++;
        }
        sequence[i] = (int) code;

        PolymerType p = g.getChemComp().getPolymerType();
        if (p.equals(PolymerType.peptide)) {
          peptide++;
        } else if (p.equals(PolymerType.dpeptide)) {
          dPeptide++;
        } else if (p.equals(PolymerType.dna)) {
          dna++;
        } else if (p.equals(PolymerType.rna)) {
          rna++;
        }

        Atom atom = null;
        Atom atomN = null;
        Atom atomC = null;
        if (aminoAcid) {
          atom = ((AminoAcidImpl) g).getCA();
          atomN = ((AminoAcidImpl) g).getN();
          atomC = ((AminoAcidImpl) g).getC();
        } else {
          atom = ((NucleotideImpl) g).getP();
        }
        if (atom == null || atomN == null || atomC == null) {
          gaps++;
        } else {
          coords[i * 3] = new Point3d(atom.getCoords());
          coords[i * 3 + 1] = new Point3d(atomN.getCoords());
          coords[i * 3 + 2] = new Point3d(atomC.getCoords());
        }
      }

      // ignore chains with less than 10 residues (with coordinates)
      //			System.out.println("size:  " + (groups.size()-gaps));
      if (groups.size() - gaps < 10) {
        continue;
      }

      if (unknownResidues > (groups.size() - gaps) / 2) {
        System.err.println("Polymer with many unknown residues ignored: " + pdbId + c.getChainID());
        continue;
      }
      // ignore any mixed polymer types
      if (dPeptide > 0) {
        System.err.println("d-peptide ignored: " + pdbId + c.getChainID());
        continue;
      }
      if (dna > 0 && rna > 0) {
        System.err.println("DNA/RNA hybrid ignored: " + pdbId + c.getChainID());
        continue;
      }

      // determine polymer type
      SimplePolymerType polymerType = null;
      if (peptide > 0) {
        polymerType = SimplePolymerType.PROTEIN;
        //				System.out.println("PROTEIN: " + dna);
      } else if (dna > 0) {
        polymerType = SimplePolymerType.DNA;
        //				System.out.println("DNA: " + dna);
      } else if (rna > 0) {
        polymerType = SimplePolymerType.RNA;
        //				System.out.println("RNA: " + rna);
      } else {
        continue;
      }

      chainCount++;

      Text key1 = new Text(pdbId + "." + c.getChainID());
      ArrayWritable value1 = new IntArrayWritable();
      value1.set(
          SimplePolymerChainCodecHL.encodePolymerChain(
              polymerType.ordinal(), coords, sequence, gaps));
      writer.append(key1, value1);
    }
    return chainCount;
  }
  /** Get matched atoms for all linkages. */
  private List<List<Atom[]>> getMatchedAtomsOfLinkages(
      ModificationCondition condition, Map<Component, Set<Group>> mapCompGroups) {
    List<ModificationLinkage> linkages = condition.getLinkages();
    int nLink = linkages.size();

    List<List<Atom[]>> matchedAtomsOfLinkages = new ArrayList<List<Atom[]>>(nLink);

    for (int iLink = 0; iLink < nLink; iLink++) {
      ModificationLinkage linkage = linkages.get(iLink);
      Component comp1 = linkage.getComponent1();
      Component comp2 = linkage.getComponent2();

      //			boolean isAA1 = comp1.;
      //			boolean isAA2 = comp2.getType()==true;

      Set<Group> groups1 = mapCompGroups.get(comp1);
      Set<Group> groups2 = mapCompGroups.get(comp2);

      List<Atom[]> list = new ArrayList<Atom[]>();

      List<String> potentialNamesOfAtomOnGroup1 = linkage.getPDBNameOfPotentialAtomsOnComponent1();
      for (String name : potentialNamesOfAtomOnGroup1) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup1 = null; // search all atoms
          break;
        }
      }

      List<String> potentialNamesOfAtomOnGroup2 = linkage.getPDBNameOfPotentialAtomsOnComponent2();
      for (String name : potentialNamesOfAtomOnGroup2) {
        if (name.equals("*")) {
          // wildcard
          potentialNamesOfAtomOnGroup2 = null; // search all atoms
          break;
        }
      }

      for (Group g1 : groups1) {
        for (Group g2 : groups2) {
          if (g1.equals(g2)) {
            continue;
          }

          // only for wildcard match of two residues
          boolean ignoreNCLinkage =
              potentialNamesOfAtomOnGroup1 == null
                  && potentialNamesOfAtomOnGroup2 == null
                  && residues.contains(g1)
                  && residues.contains(g2);

          Atom[] atoms =
              StructureUtil.findNearestAtomLinkage(
                  g1,
                  g2,
                  potentialNamesOfAtomOnGroup1,
                  potentialNamesOfAtomOnGroup2,
                  ignoreNCLinkage,
                  bondLengthTolerance);
          if (atoms != null) {
            list.add(atoms);
          }
        }
      }

      if (list.isEmpty()) {
        // broken linkage
        break;
      }

      matchedAtomsOfLinkages.add(list);
    }

    return matchedAtomsOfLinkages;
  }
  /**
   * @param modifications a set of {@link ProteinModification}s.
   * @param residues
   * @param ligands
   * @param saveTo save result to
   * @return map from component to list of corresponding residues in the chain.
   */
  private void addModificationGroups(
      final Set<ProteinModification> modifications,
      final List<Group> residues,
      final List<Group> ligands,
      final Map<Component, Set<Group>> saveTo) {
    if (residues == null || ligands == null || modifications == null) {
      throw new IllegalArgumentException("Null argument(s).");
    }

    Map<Component, Set<Component>> mapSingleMultiComps = new HashMap<Component, Set<Component>>();
    for (ProteinModification mod : modifications) {
      ModificationCondition condition = mod.getCondition();
      for (Component comp : condition.getComponents()) {
        for (String pdbccId : comp.getPdbccIds()) {
          Component single =
              Component.of(Collections.singleton(pdbccId), comp.isNTerminal(), comp.isCTerminal());
          Set<Component> mult = mapSingleMultiComps.get(single);
          if (mult == null) {
            mult = new HashSet<Component>();
            mapSingleMultiComps.put(single, mult);
          }
          mult.add(comp);
        }
      }
    }

    {
      // ligands
      Set<Component> ligandsWildCard = mapSingleMultiComps.get(Component.of("*"));
      for (Group group : ligands) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(ligandsWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }
    }

    {
      // residues
      if (residues.isEmpty()) {
        return;
      }

      Set<Component> residuesWildCard = mapSingleMultiComps.get(Component.of("*"));

      // for all residues
      for (Group group : residues) {
        String pdbccId = group.getPDBName().trim();
        Set<Component> comps = mapSingleMultiComps.get(Component.of(pdbccId));

        for (Component comp : unionComponentSet(residuesWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(group);
        }
      }

      // for N-terminal
      int nRes = residues.size();
      int iRes = 0;
      Group res;
      do {
        // for all ligands on N terminal and the first residue
        res = residues.get(iRes++);

        Set<Component> nTermWildCard = mapSingleMultiComps.get(Component.of("*", true, false));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), true, false));

        for (Component comp : unionComponentSet(nTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes < nRes && ligands.contains(res));

      // for C-terminal
      iRes = residues.size() - 1;
      do {
        // for all ligands on C terminal and the last residue
        res = residues.get(iRes--);

        Set<Component> cTermWildCard = mapSingleMultiComps.get(Component.of("*", false, true));

        Set<Component> comps = mapSingleMultiComps.get(Component.of(res.getPDBName(), false, true));

        for (Component comp : unionComponentSet(cTermWildCard, comps)) {
          Set<Group> gs = saveTo.get(comp);
          if (gs == null) {
            gs = new LinkedHashSet<Group>();
            saveTo.put(comp, gs);
          }
          gs.add(res);
        }
      } while (iRes >= 0 && ligands.contains(res));
    }
  }