private IRenderingElement generate(IAtomContainer molecule, RendererModel model, int atomNum) throws CDKException { // tag the atom and bond ids String molId = molecule.getProperty(MarkedElement.ID_KEY); if (molId != null) { int atomId = 0, bondid = 0; for (IAtom atom : molecule.atoms()) setIfMissing(atom, MarkedElement.ID_KEY, molId + "atm" + ++atomId); for (IBond bond : molecule.bonds()) setIfMissing(bond, MarkedElement.ID_KEY, molId + "bnd" + ++bondid); } if (annotateAtomNum) { for (IAtom atom : molecule.atoms()) { if (atom.getProperty(StandardGenerator.ANNOTATION_LABEL) != null) throw new UnsupportedOperationException("Multiple annotation labels are not supported."); atom.setProperty(StandardGenerator.ANNOTATION_LABEL, Integer.toString(atomNum++)); } } else if (annotateAtomVal) { for (IAtom atom : molecule.atoms()) { if (atom.getProperty(StandardGenerator.ANNOTATION_LABEL) != null) throw new UnsupportedOperationException("Multiple annotation labels are not supported."); atom.setProperty( StandardGenerator.ANNOTATION_LABEL, atom.getProperty(CDKConstants.COMMENT)); } } else if (annotateAtomMap) { for (IAtom atom : molecule.atoms()) { if (atom.getProperty(StandardGenerator.ANNOTATION_LABEL) != null) throw new UnsupportedOperationException("Multiple annotation labels are not supported."); int mapidx = accessAtomMap(atom); if (mapidx > 0) { atom.setProperty(StandardGenerator.ANNOTATION_LABEL, Integer.toString(mapidx)); } } } ElementGroup grp = new ElementGroup(); for (IGenerator<IAtomContainer> gen : gens) grp.add(gen.generate(molecule, model)); // cleanup if (annotateAtomNum || annotateAtomMap) { for (IAtom atom : molecule.atoms()) { atom.removeProperty(StandardGenerator.ANNOTATION_LABEL); } } return grp; }
/** * Prepare the target molecule for analysis. * * <p>We perform ring perception and aromaticity detection and set up the appropriate properties. * Right now, this function is called each time we need to do a query and this is inefficient. * * @throws CDKException if there is a problem in ring perception or aromaticity detection, which * is usually related to a timeout in the ring finding code. */ private void initializeMolecule() throws CDKException { // Code copied from // org.openscience.cdk.qsar.descriptors.atomic.AtomValenceDescriptor; Map<String, Integer> valencesTable = new HashMap<String, Integer>(); valencesTable.put("H", 1); valencesTable.put("Li", 1); valencesTable.put("Be", 2); valencesTable.put("B", 3); valencesTable.put("C", 4); valencesTable.put("N", 5); valencesTable.put("O", 6); valencesTable.put("F", 7); valencesTable.put("Na", 1); valencesTable.put("Mg", 2); valencesTable.put("Al", 3); valencesTable.put("Si", 4); valencesTable.put("P", 5); valencesTable.put("S", 6); valencesTable.put("Cl", 7); valencesTable.put("K", 1); valencesTable.put("Ca", 2); valencesTable.put("Ga", 3); valencesTable.put("Ge", 4); valencesTable.put("As", 5); valencesTable.put("Se", 6); valencesTable.put("Br", 7); valencesTable.put("Rb", 1); valencesTable.put("Sr", 2); valencesTable.put("In", 3); valencesTable.put("Sn", 4); valencesTable.put("Sb", 5); valencesTable.put("Te", 6); valencesTable.put("I", 7); valencesTable.put("Cs", 1); valencesTable.put("Ba", 2); valencesTable.put("Tl", 3); valencesTable.put("Pb", 4); valencesTable.put("Bi", 5); valencesTable.put("Po", 6); valencesTable.put("At", 7); valencesTable.put("Fr", 1); valencesTable.put("Ra", 2); valencesTable.put("Cu", 2); valencesTable.put("Mn", 2); valencesTable.put("Co", 2); // do all ring perception AllRingsFinder arf = new AllRingsFinder(); IRingSet allRings; try { allRings = arf.findAllRings(atomContainer); } catch (CDKException e) { logger.debug(e.toString()); throw new CDKException(e.toString(), e); } // sets SSSR information SSSRFinder finder = new SSSRFinder(atomContainer); IRingSet sssr = finder.findEssentialRings(); for (IAtom atom : atomContainer.atoms()) { // add a property to each ring atom that will be an array of // Integers, indicating what size ring the given atom belongs to // Add SSSR ring counts if (allRings.contains(atom)) { // it's in a ring atom.setFlag(CDKConstants.ISINRING, true); // lets find which ring sets it is a part of List<Integer> ringsizes = new ArrayList<Integer>(); IRingSet currentRings = allRings.getRings(atom); int min = 0; for (int i = 0; i < currentRings.getAtomContainerCount(); i++) { int size = currentRings.getAtomContainer(i).getAtomCount(); if (min > size) min = size; ringsizes.add(size); } atom.setProperty(CDKConstants.RING_SIZES, ringsizes); atom.setProperty(CDKConstants.SMALLEST_RINGS, sssr.getRings(atom)); } else { atom.setFlag(CDKConstants.ISINRING, false); } // determine how many rings bonds each atom is a part of int hCount; if (atom.getImplicitHydrogenCount() == CDKConstants.UNSET) hCount = 0; else hCount = atom.getImplicitHydrogenCount(); List<IAtom> connectedAtoms = atomContainer.getConnectedAtomsList(atom); int total = hCount + connectedAtoms.size(); for (IAtom connectedAtom : connectedAtoms) { if (connectedAtom.getSymbol().equals("H")) { hCount++; } } atom.setProperty(CDKConstants.TOTAL_CONNECTIONS, total); atom.setProperty(CDKConstants.TOTAL_H_COUNT, hCount); if (valencesTable.get(atom.getSymbol()) != null) { int formalCharge = atom.getFormalCharge() == CDKConstants.UNSET ? 0 : atom.getFormalCharge(); atom.setValency(valencesTable.get(atom.getSymbol()) - formalCharge); } } for (IBond bond : atomContainer.bonds()) { if (allRings.getRings(bond).getAtomContainerCount() > 0) { bond.setFlag(CDKConstants.ISINRING, true); } } for (IAtom atom : atomContainer.atoms()) { List<IAtom> connectedAtoms = atomContainer.getConnectedAtomsList(atom); int counter = 0; IAtom any; for (IAtom connectedAtom : connectedAtoms) { any = connectedAtom; if (any.getFlag(CDKConstants.ISINRING)) { counter++; } } atom.setProperty(CDKConstants.RING_CONNECTIONS, counter); } // check for atomaticity try { AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(atomContainer); CDKHueckelAromaticityDetector.detectAromaticity(atomContainer); } catch (CDKException e) { logger.debug(e.toString()); throw new CDKException(e.toString(), e); } }
/** * initializes a path vector in every Atom of the given molecule * * @param molecule The given molecule */ private void initPath(IAtomContainer molecule) { for (int i = 0; i < molecule.getAtomCount(); i++) { IAtom atom = molecule.getAtom(i); atom.setProperty(PATH, new ArrayList<IAtom>()); } }
/** * This routine is called 'getRing() in Figueras original article finds the smallest ring of which * rootNode is part of. * * @param rootNode The Atom to be searched for the smallest ring it is part of * @param molecule The molecule that contains the rootNode * @return The smallest Ring rootnode is part of */ private IRing getRing(IAtom rootNode, IAtomContainer molecule) { IAtom node, neighbor, mAtom; List neighbors, mAtoms; /** OKatoms is Figueras nomenclature, giving the number of atoms in the structure */ int OKatoms = molecule.getAtomCount(); /** queue for Breadth First Search of this graph */ Queue queue = new Queue(); /* Initialize a path Vector for each node */ // Vector pfad1,pfad2; List<List<IAtom>> path = new ArrayList<List<IAtom>>(OKatoms); List<IAtom> intersection = new ArrayList<IAtom>(); List<IAtom> ring = new ArrayList<IAtom>(); for (int f = 0; f < OKatoms; f++) { path.set(f, new ArrayList<IAtom>()); ((List<IAtom>) molecule.getAtom(f).getProperty(PATH)).clear(); } // Initialize the queue with nodes attached to rootNode neighbors = molecule.getConnectedAtomsList(rootNode); for (int f = 0; f < neighbors.size(); f++) { // if the degree of the f-st neighbor of rootNode is greater // than zero (i.e., it has not yet been deleted from the list) neighbor = (IAtom) neighbors.get(f); // push the f-st node onto our FIFO queue // after assigning rootNode as its source queue.push(neighbor); ((List<IAtom>) neighbor.getProperty(PATH)).add(rootNode); ((List<IAtom>) neighbor.getProperty(PATH)).add(neighbor); } while (queue.size() > 0) { node = (IAtom) queue.pop(); mAtoms = molecule.getConnectedAtomsList(node); for (int f = 0; f < mAtoms.size(); f++) { mAtom = (IAtom) mAtoms.get(f); if (mAtom != ((List) node.getProperty(PATH)) .get(((List<IAtom>) node.getProperty(PATH)).size() - 2)) { if (((List) mAtom.getProperty(PATH)).size() > 0) { intersection = getIntersection((List) node.getProperty(PATH), (List) mAtom.getProperty(PATH)); if (intersection.size() == 1) { // we have found a valid ring closure // now let's prepare the path to // return in tempAtomSet logger.debug("path1 ", ((List) node.getProperty(PATH))); logger.debug("path2 ", ((List) mAtom.getProperty(PATH))); logger.debug("rootNode ", rootNode); logger.debug("ring ", ring); ring = getUnion((List) node.getProperty(PATH), (List) mAtom.getProperty(PATH)); return prepareRing(ring, molecule); } } else { // if path[mNumber] is null // update the path[mNumber] // pfad2 = (Vector)node.getProperty(PATH); mAtom.setProperty(PATH, new ArrayList<IAtom>((List<IAtom>) node.getProperty(PATH))); ((List<IAtom>) mAtom.getProperty(PATH)).add(mAtom); // pfad1 = (Vector)mAtom.getProperty(PATH); // now push the node m onto the queue queue.push(mAtom); } } } } return null; }
/** Recursive function to produce valid configurations for {@link #getAllConfigurations()}. */ private void findConfigurationsRecursively( List<Integer> rGroupNumbers, List<List<Integer>> occurrences, List<Integer> occurIndexes, List<Integer[]> distributions, List<List<RGroup>> substitutes, int level, List<IAtomContainer> result) throws CDKException { if (level == rGroupNumbers.size()) { if (!checkIfThenConditionsMet(rGroupNumbers, distributions)) return; // Clone the root to get a scaffold to plug the substitutes into. IAtomContainer root = this.getRootStructure(); IAtomContainer rootClone = null; try { rootClone = (IAtomContainer) root.clone(); } catch (CloneNotSupportedException e) { // Abort with CDK exception throw new CDKException("clone() failed; could not perform R-group substitution."); } for (int rgpIdx = 0; rgpIdx < rGroupNumbers.size(); rgpIdx++) { int rNum = rGroupNumbers.get(rgpIdx); int pos = 0; List<RGroup> mapped = substitutes.get(rgpIdx); for (RGroup substitute : mapped) { IAtom rAtom = this.getRgroupQueryAtoms(rNum).get(pos); if (substitute != null) { IAtomContainer rgrpClone = null; try { rgrpClone = (IAtomContainer) (substitute.getGroup().clone()); } catch (CloneNotSupportedException e) { throw new CDKException("clone() failed; could not perform R-group substitution."); } // root cloned, substitute cloned. These now need to be attached to each other.. rootClone.add(rgrpClone); Map<Integer, IBond> rAttachmentPoints = this.getRootAttachmentPoints().get(rAtom); if (rAttachmentPoints != null) { // Loop over attachment points of the R# atom for (int apo = 0; apo < rAttachmentPoints.size(); apo++) { IBond bond = rAttachmentPoints.get(apo + 1); // Check how R# is attached to bond int whichAtomInBond = 0; if (bond.getAtom(1).equals(rAtom)) whichAtomInBond = 1; IAtom subsAt = null; if (apo == 0) subsAt = substitute.getFirstAttachmentPoint(); else subsAt = substitute.getSecondAttachmentPoint(); // Do substitution with the clones IBond cloneBond = rootClone.getBond(getBondPosition(bond, root)); if (subsAt != null) { IAtom subsCloneAtom = rgrpClone.getAtom(getAtomPosition(subsAt, substitute.getGroup())); cloneBond.setAtom(subsCloneAtom, whichAtomInBond); } } } // Optional: shift substitutes 2D for easier visual checking if (rAtom.getPoint2d() != null && substitute != null && substitute.getFirstAttachmentPoint() != null && substitute.getFirstAttachmentPoint().getPoint2d() != null) { Point2d pointR = rAtom.getPoint2d(); Point2d pointC = substitute.getFirstAttachmentPoint().getPoint2d(); double xDiff = pointC.x - pointR.x; double yDiff = pointC.y - pointR.y; for (IAtom subAt : rgrpClone.atoms()) { if (subAt.getPoint2d() != null) { subAt.getPoint2d().x -= xDiff; subAt.getPoint2d().y -= yDiff; } } } } else { // Distribution flag is 0, this means the R# group will not be substituted. // Any atom connected to this group should be given the defined RestH value. IAtom discarded = rootClone.getAtom(getAtomPosition(rAtom, root)); for (IBond r0Bond : rootClone.bonds()) { if (r0Bond.contains(discarded)) { for (IAtom atInBond : r0Bond.atoms()) { atInBond.setProperty( CDKConstants.REST_H, this.getRGroupDefinitions().get(rNum).isRestH()); } } } } pos++; } } // Remove R# remnants from the clone, bonds and atoms that may linger. boolean confHasRGroupBonds = true; while (confHasRGroupBonds) { for (IBond cloneBond : rootClone.bonds()) { boolean removeBond = false; if (cloneBond.getAtom(0) instanceof IPseudoAtom && isValidRgroupQueryLabel(((IPseudoAtom) cloneBond.getAtom(0)).getLabel())) removeBond = true; else if (cloneBond.getAtom(1) instanceof IPseudoAtom && isValidRgroupQueryLabel(((IPseudoAtom) cloneBond.getAtom(1)).getLabel())) removeBond = true; if (removeBond) { rootClone.removeBond(cloneBond); confHasRGroupBonds = true; break; } confHasRGroupBonds = false; } } boolean confHasRGroupAtoms = true; while (confHasRGroupAtoms) { for (IAtom cloneAt : rootClone.atoms()) { if (cloneAt instanceof IPseudoAtom) if (isValidRgroupQueryLabel(((IPseudoAtom) cloneAt).getLabel())) { rootClone.removeAtom(cloneAt); confHasRGroupAtoms = true; break; } confHasRGroupAtoms = false; } } // Add to result list result.add(rootClone); } else { for (int idx = 0; idx < occurrences.get(level).size(); idx++) { occurIndexes.set(level, idx); // With an occurrence picked 0..n for this level's R-group, now find // all possible distributions (positional alternatives). int occurrence = occurrences.get(level).get(idx); int positions = this.getRgroupQueryAtoms(rGroupNumbers.get(level)).size(); Integer[] candidate = new Integer[positions]; for (int j = 0; j < candidate.length; j++) { candidate[j] = 0; } List<Integer[]> rgrpDistributions = new ArrayList<Integer[]>(); findDistributions(occurrence, candidate, rgrpDistributions, 0); for (Integer[] distribution : rgrpDistributions) { distributions.set(level, distribution); RGroup[] mapping = new RGroup[distribution.length]; List<List<RGroup>> mappedSubstitutes = new ArrayList<List<RGroup>>(); mapSubstitutes( this.getRGroupDefinitions().get(rGroupNumbers.get(level)), 0, distribution, mapping, mappedSubstitutes); for (List<RGroup> mappings : mappedSubstitutes) { substitutes.set(level, mappings); findConfigurationsRecursively( rGroupNumbers, occurrences, occurIndexes, distributions, substitutes, level + 1, result); } } } } }
/** * Procedure required by the CDOInterface. This function is only supposed to be called by the JCFL * library */ public void setObjectProperty(String objectType, String propertyType, String propertyValue) { logger.debug("objectType: " + objectType); logger.debug("propType: " + propertyType); logger.debug("property: " + propertyValue); if (objectType == null) { logger.error("Cannot add property for null object"); return; } if (propertyType == null) { logger.error("Cannot add property for null property type"); return; } if (propertyValue == null) { logger.warn("Will not add null property"); return; } if (objectType.equals("Molecule")) { if (propertyType.equals("id")) { currentMolecule.setID(propertyValue); } else if (propertyType.equals("inchi")) { currentMolecule.setProperty("iupac.nist.chemical.identifier", propertyValue); } } else if (objectType.equals("PseudoAtom")) { if (propertyType.equals("label")) { if (!(currentAtom instanceof IPseudoAtom)) { currentAtom = builder.newPseudoAtom(currentAtom); } ((IPseudoAtom) currentAtom).setLabel(propertyValue); } } else if (objectType.equals("Atom")) { if (propertyType.equals("type")) { if (propertyValue.equals("R") && !(currentAtom instanceof IPseudoAtom)) { currentAtom = builder.newPseudoAtom(currentAtom); } currentAtom.setSymbol(propertyValue); } else if (propertyType.equals("x2")) { Point2d coord = currentAtom.getPoint2d(); if (coord == null) coord = new Point2d(); coord.x = Double.parseDouble(propertyValue); currentAtom.setPoint2d(coord); } else if (propertyType.equals("y2")) { Point2d coord = currentAtom.getPoint2d(); if (coord == null) coord = new Point2d(); coord.y = Double.parseDouble(propertyValue); currentAtom.setPoint2d(coord); } else if (propertyType.equals("x3")) { Point3d coord = currentAtom.getPoint3d(); if (coord == null) coord = new Point3d(); coord.x = Double.parseDouble(propertyValue); currentAtom.setPoint3d(coord); } else if (propertyType.equals("y3")) { Point3d coord = currentAtom.getPoint3d(); if (coord == null) coord = new Point3d(); coord.y = Double.parseDouble(propertyValue); currentAtom.setPoint3d(coord); } else if (propertyType.equals("z3")) { Point3d coord = currentAtom.getPoint3d(); if (coord == null) coord = new Point3d(); coord.z = Double.parseDouble(propertyValue); currentAtom.setPoint3d(coord); } else if (propertyType.equals("xFract")) { Point3d coord = currentAtom.getFractionalPoint3d(); if (coord == null) coord = new Point3d(); coord.x = Double.parseDouble(propertyValue); currentAtom.setFractionalPoint3d(coord); } else if (propertyType.equals("yFract")) { Point3d coord = currentAtom.getFractionalPoint3d(); if (coord == null) coord = new Point3d(); coord.y = Double.parseDouble(propertyValue); currentAtom.setFractionalPoint3d(coord); } else if (propertyType.equals("zFract")) { Point3d coord = currentAtom.getFractionalPoint3d(); if (coord == null) coord = new Point3d(); coord.z = Double.parseDouble(propertyValue); currentAtom.setFractionalPoint3d(coord); } else if (propertyType.equals("formalCharge")) { currentAtom.setFormalCharge(Integer.parseInt(propertyValue)); } else if (propertyType.equals("charge") || propertyType.equals("partialCharge")) { currentAtom.setCharge(Double.parseDouble(propertyValue)); } else if (propertyType.equals("hydrogenCount")) { currentAtom.setHydrogenCount(Integer.parseInt(propertyValue)); } else if (propertyType.equals("dictRef")) { currentAtom.setProperty("org.openscience.cdk.dict", propertyValue); } else if (propertyType.equals("atomicNumber")) { currentAtom.setAtomicNumber(Integer.parseInt(propertyValue)); } else if (propertyType.equals("massNumber")) { currentAtom.setMassNumber((int) Double.parseDouble(propertyValue)); } else if (propertyType.equals("id")) { logger.debug("id: ", propertyValue); currentAtom.setID(propertyValue); atomEnumeration.put(propertyValue, numberOfAtoms); } } else if (objectType.equals("Bond")) { if (propertyType.equals("atom1")) { bond_a1 = Integer.parseInt(propertyValue); } else if (propertyType.equals("atom2")) { bond_a2 = Integer.parseInt(propertyValue); } else if (propertyType.equals("id")) { logger.debug("id: " + propertyValue); bond_id = propertyValue; } else if (propertyType.equals("order")) { try { Double order = Double.parseDouble(propertyValue); if (order == 1.0) { bond_order = IBond.Order.SINGLE; } else if (order == 2.0) { bond_order = IBond.Order.DOUBLE; } else if (order == 3.0) { bond_order = IBond.Order.TRIPLE; } else if (order == 4.0) { bond_order = IBond.Order.QUADRUPLE; } else { bond_order = IBond.Order.SINGLE; } } catch (Exception e) { logger.error("Cannot convert to double: " + propertyValue); bond_order = IBond.Order.SINGLE; } } else if (propertyType.equals("stereo")) { if (propertyValue.equals("H")) { bond_stereo = CDKConstants.STEREO_BOND_DOWN; } else if (propertyValue.equals("W")) { bond_stereo = CDKConstants.STEREO_BOND_UP; } } } logger.debug("Object property set..."); }
/** * Read an IAtomContainer from a file in MDL sd format * * @return The Molecule that was read from the MDL file. */ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException { logger.debug("Reading new molecule"); IAtomContainer outputContainer = null; int linecount = 0; int atoms = 0; int bonds = 0; int atom1 = 0; int atom2 = 0; int order = 0; IBond.Stereo stereo = (IBond.Stereo) CDKConstants.UNSET; int RGroupCounter = 1; int Rnumber = 0; String[] rGroup = null; double x = 0.0; double y = 0.0; double z = 0.0; double totalX = 0.0; double totalY = 0.0; double totalZ = 0.0; String title = null; String remark = null; // int[][] conMat = new int[0][0]; // String help; IAtom atom; String line = ""; // A map to keep track of R# atoms so that RGP line can be parsed Map<Integer, IPseudoAtom> rAtoms = new HashMap<Integer, IPseudoAtom>(); try { IsotopeFactory isotopeFactory = Isotopes.getInstance(); logger.info("Reading header"); line = input.readLine(); linecount++; if (line == null) { return null; } logger.debug("Line " + linecount + ": " + line); if (line.startsWith("$$$$")) { logger.debug("File is empty, returning empty molecule"); return molecule; } if (line.length() > 0) { title = line; } line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); if (line.length() > 0) { remark = line; } logger.info("Reading rest of file"); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); // if the line is empty we hav a problem - either a malformed // molecule entry or just extra new lines at the end of the file if (line.length() == 0) { // read till the next $$$$ or EOF while (true) { line = input.readLine(); linecount++; if (line == null) { return null; } if (line.startsWith("$$$$")) { return molecule; // an empty molecule } } } // check the CT block version if (line.contains("V3000") || line.contains("v3000")) { handleError("This file must be read with the MDLV3000Reader."); } else if (!line.contains("V2000") && !line.contains("v2000")) { handleError("This file must be read with the MDLReader."); } atoms = Integer.parseInt(line.substring(0, 3).trim()); List<IAtom> atomList = new ArrayList<IAtom>(); logger.debug("Atomcount: " + atoms); bonds = Integer.parseInt(line.substring(3, 6).trim()); logger.debug("Bondcount: " + bonds); List<IBond> bondList = new ArrayList<IBond>(); // used for applying the MDL valence model int[] explicitValence = new int[atoms]; // read ATOM block logger.info("Reading atom block"); atomsByLinePosition = new ArrayList<IAtom>(); atomsByLinePosition.add(null); // 0 is not a valid position int atomBlockLineNumber = 0; for (int f = 0; f < atoms; f++) { line = input.readLine(); linecount++; atomBlockLineNumber++; Matcher trailingSpaceMatcher = TRAILING_SPACE.matcher(line); if (trailingSpaceMatcher.find()) { handleError( "Trailing space found", linecount, trailingSpaceMatcher.start(), trailingSpaceMatcher.end()); line = trailingSpaceMatcher.replaceAll(""); } x = Double.parseDouble(line.substring(0, 10).trim()); y = Double.parseDouble(line.substring(10, 20).trim()); z = Double.parseDouble(line.substring(20, 30).trim()); // *all* values should be zero, not just the sum totalX += Math.abs(x); totalY += Math.abs(y); totalZ += Math.abs(z); logger.debug("Coordinates: " + x + "; " + y + "; " + z); String element = line.substring(31, Math.min(line.length(), 34)).trim(); if (line.length() < 34) { handleError( "Element atom type does not follow V2000 format type should of length three" + " and padded with space if required", linecount, 31, 34); } logger.debug("Atom type: ", element); if (isotopeFactory.isElement(element)) { atom = isotopeFactory.configure(molecule.getBuilder().newInstance(IAtom.class, element)); } else if ("A".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("Q".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("*".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("LP".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("L".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if (element.equals("R") || (element.length() > 0 && element.charAt(0) == 'R')) { logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom."); // check if the element is R rGroup = element.split("^R"); atom = null; if (rGroup.length > 1) { try { Rnumber = Integer.valueOf(rGroup[(rGroup.length - 1)]); RGroupCounter = Rnumber; element = "R" + Rnumber; atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } catch (Exception ex) { // This happens for atoms labeled "R#". // The Rnumber may be set later on, using RGP line atom = molecule.getBuilder().newInstance(IPseudoAtom.class, "R"); rAtoms.put(atomBlockLineNumber, (IPseudoAtom) atom); } } else { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } } else { handleError( "Invalid element type. Must be an existing " + "element, or one in: A, Q, L, LP, *.", linecount, 32, 35); atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); atom.setSymbol(element); } // store as 3D for now, convert to 2D (if totalZ == 0.0) later atom.setPoint3d(new Point3d(x, y, z)); // parse further fields if (line.length() >= 36) { String massDiffString = line.substring(34, 36).trim(); logger.debug("Mass difference: ", massDiffString); if (!(atom instanceof IPseudoAtom)) { try { int massDiff = Integer.parseInt(massDiffString); if (massDiff != 0) { IIsotope major = Isotopes.getInstance().getMajorIsotope(element); atom.setMassNumber(major.getMassNumber() + massDiff); } } catch (Exception exception) { handleError("Could not parse mass difference field.", linecount, 35, 37, exception); } } else { logger.error("Cannot set mass difference for a non-element!"); } } else { handleError("Mass difference is missing", linecount, 34, 36); } // set the stereo partiy Integer parity = line.length() > 41 ? Character.digit(line.charAt(41), 10) : 0; atom.setStereoParity(parity); if (line.length() >= 51) { String valenceString = removeNonDigits(line.substring(48, 51)); logger.debug("Valence: ", valenceString); if (!(atom instanceof IPseudoAtom)) { try { int valence = Integer.parseInt(valenceString); if (valence != 0) { // 15 is defined as 0 in mol files if (valence == 15) atom.setValency(0); else atom.setValency(valence); } } catch (Exception exception) { handleError( "Could not parse valence information field", linecount, 49, 52, exception); } } else { logger.error("Cannot set valence information for a non-element!"); } } if (line.length() >= 39) { String chargeCodeString = line.substring(36, 39).trim(); logger.debug("Atom charge code: ", chargeCodeString); int chargeCode = Integer.parseInt(chargeCodeString); if (chargeCode == 0) { // uncharged species } else if (chargeCode == 1) { atom.setFormalCharge(+3); } else if (chargeCode == 2) { atom.setFormalCharge(+2); } else if (chargeCode == 3) { atom.setFormalCharge(+1); } else if (chargeCode == 4) { } else if (chargeCode == 5) { atom.setFormalCharge(-1); } else if (chargeCode == 6) { atom.setFormalCharge(-2); } else if (chargeCode == 7) { atom.setFormalCharge(-3); } } else { handleError("Atom charge is missing", linecount, 36, 39); } try { // read the mmm field as position 61-63 String reactionAtomIDString = line.substring(60, 63).trim(); logger.debug("Parsing mapping id: ", reactionAtomIDString); try { int reactionAtomID = Integer.parseInt(reactionAtomIDString); if (reactionAtomID != 0) { atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, reactionAtomID); } } catch (Exception exception) { logger.error("Mapping number ", reactionAtomIDString, " is not an integer."); logger.debug(exception); } } catch (Exception exception) { // older mol files don't have all these fields... logger.warn("A few fields are missing. Older MDL MOL file?"); } // shk3: This reads shifts from after the molecule. I don't think this is an official // format, but I saw it frequently 80=>78 for alk if (line.length() >= 78) { double shift = Double.parseDouble(line.substring(69, 80).trim()); atom.setProperty("first shift", shift); } if (line.length() >= 87) { double shift = Double.parseDouble(line.substring(79, 87).trim()); atom.setProperty("second shift", shift); } atomList.add(atom); atomsByLinePosition.add(atom); } // convert to 2D, if totalZ == 0 if (totalX == 0.0 && totalY == 0.0 && totalZ == 0.0) { logger.info("All coordinates are 0.0"); if (atomList.size() == 1) { atomList.get(0).setPoint2d(new Point2d(x, y)); } else { for (IAtom atomToUpdate : atomList) { atomToUpdate.setPoint3d(null); } } } else if (totalZ == 0.0 && !forceReadAs3DCoords.isSet()) { logger.info("Total 3D Z is 0.0, interpreting it as a 2D structure"); for (IAtom atomToUpdate : atomList) { Point3d p3d = atomToUpdate.getPoint3d(); if (p3d != null) { atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y)); atomToUpdate.setPoint3d(null); } } } // read BOND block logger.info("Reading bond block"); int queryBondCount = 0; for (int f = 0; f < bonds; f++) { line = input.readLine(); linecount++; atom1 = Integer.parseInt(line.substring(0, 3).trim()); atom2 = Integer.parseInt(line.substring(3, 6).trim()); order = Integer.parseInt(line.substring(6, 9).trim()); if (line.length() >= 12) { int mdlStereo = line.length() > 12 ? Integer.parseInt(line.substring(9, 12).trim()) : Integer.parseInt(line.substring(9).trim()); if (mdlStereo == 1) { // MDL up bond stereo = IBond.Stereo.UP; } else if (mdlStereo == 6) { // MDL down bond stereo = IBond.Stereo.DOWN; } else if (mdlStereo == 0) { if (order == 2) { // double bond stereo defined by coordinates stereo = IBond.Stereo.E_Z_BY_COORDINATES; } else { // bond has no stereochemistry stereo = IBond.Stereo.NONE; } } else if (mdlStereo == 3 && order == 2) { // unknown E/Z stereochemistry stereo = IBond.Stereo.E_OR_Z; } else if (mdlStereo == 4) { // MDL bond undefined stereo = IBond.Stereo.UP_OR_DOWN; } } else { handleError("Missing expected stereo field at line: ", linecount, 10, 12); } if (logger.isDebugEnabled()) { logger.debug("Bond: " + atom1 + " - " + atom2 + "; order " + order); } // interpret CTfile's special bond orders IAtom a1 = atomList.get(atom1 - 1); IAtom a2 = atomList.get(atom2 - 1); IBond newBond = null; if (order >= 1 && order <= 3) { IBond.Order cdkOrder = IBond.Order.SINGLE; if (order == 2) cdkOrder = IBond.Order.DOUBLE; if (order == 3) cdkOrder = IBond.Order.TRIPLE; if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder); } } else if (order == 4) { // aromatic bond if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET); } // mark both atoms and the bond as aromatic and raise the SINGLE_OR_DOUBLE-flag newBond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true); newBond.setFlag(CDKConstants.ISAROMATIC, true); a1.setFlag(CDKConstants.ISAROMATIC, true); a2.setFlag(CDKConstants.ISAROMATIC, true); } else { queryBondCount++; newBond = new CTFileQueryBond(molecule.getBuilder()); IAtom[] bondAtoms = {a1, a2}; newBond.setAtoms(bondAtoms); newBond.setOrder(null); CTFileQueryBond.Type queryBondType = null; switch (order) { case 5: queryBondType = CTFileQueryBond.Type.SINGLE_OR_DOUBLE; break; case 6: queryBondType = CTFileQueryBond.Type.SINGLE_OR_AROMATIC; break; case 7: queryBondType = CTFileQueryBond.Type.DOUBLE_OR_AROMATIC; break; case 8: queryBondType = CTFileQueryBond.Type.ANY; break; } ((CTFileQueryBond) newBond).setType(queryBondType); newBond.setStereo(stereo); } bondList.add((newBond)); // add the bond order to the explicit valence for each atom if (newBond.getOrder() != null && newBond.getOrder() != IBond.Order.UNSET) { explicitValence[atom1 - 1] += newBond.getOrder().numeric(); explicitValence[atom2 - 1] += newBond.getOrder().numeric(); } else { explicitValence[atom1 - 1] = Integer.MIN_VALUE; explicitValence[atom2 - 1] = Integer.MIN_VALUE; } } if (queryBondCount == 0) outputContainer = molecule; else { outputContainer = new QueryAtomContainer(molecule.getBuilder()); } outputContainer.setProperty(CDKConstants.TITLE, title); outputContainer.setProperty(CDKConstants.REMARK, remark); for (IAtom at : atomList) { outputContainer.addAtom(at); } for (IBond bnd : bondList) { outputContainer.addBond(bnd); } // read PROPERTY block logger.info("Reading property block"); while (true) { line = input.readLine(); linecount++; if (line == null) { handleError("The expected property block is missing!", linecount, 0, 0); } if (line.startsWith("M END")) break; boolean lineRead = false; if (line.startsWith("M CHG")) { // FIXME: if this is encountered for the first time, all // atom charges should be set to zero first! int infoCount = Integer.parseInt(line.substring(6, 9).trim()); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { String token = st.nextToken(); int atomNumber = Integer.parseInt(token.trim()); token = st.nextToken(); int charge = Integer.parseInt(token.trim()); outputContainer.getAtom(atomNumber - 1).setFormalCharge(charge); } } else if (line.matches("A\\s{1,4}\\d+")) { // Reads the pseudo atom property from the mol file // The atom number of the to replaced atom int aliasAtomNumber = Integer.parseInt(line.replaceFirst("A\\s{1,4}", "")) - RGroupCounter; line = input.readLine(); linecount++; String[] aliasArray = line.split("\\\\"); // name of the alias atom like R1 or R2 etc. String alias = ""; for (int i = 0; i < aliasArray.length; i++) { alias += aliasArray[i]; } IAtom aliasAtom = outputContainer.getAtom(aliasAtomNumber); // skip if already a pseudoatom if (aliasAtom instanceof IPseudoAtom) { ((IPseudoAtom) aliasAtom).setLabel(alias); continue; } IAtom newPseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, alias); if (aliasAtom.getPoint2d() != null) { newPseudoAtom.setPoint2d(aliasAtom.getPoint2d()); } if (aliasAtom.getPoint3d() != null) { newPseudoAtom.setPoint3d(aliasAtom.getPoint3d()); } outputContainer.addAtom(newPseudoAtom); List<IBond> bondsOfAliasAtom = outputContainer.getConnectedBondsList(aliasAtom); for (int i = 0; i < bondsOfAliasAtom.size(); i++) { IBond bondOfAliasAtom = bondsOfAliasAtom.get(i); IAtom connectedToAliasAtom = bondOfAliasAtom.getConnectedAtom(aliasAtom); IBond newBond = bondOfAliasAtom.getBuilder().newInstance(IBond.class); newBond.setAtoms(new IAtom[] {connectedToAliasAtom, newPseudoAtom}); newBond.setOrder(bondOfAliasAtom.getOrder()); outputContainer.addBond(newBond); outputContainer.removeBond(aliasAtom, connectedToAliasAtom); } outputContainer.removeAtom(aliasAtom); RGroupCounter++; } else if (line.startsWith("M ISO")) { try { String countString = line.substring(6, 10).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(10)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int absMass = Integer.parseInt(st.nextToken().trim()); if (absMass != 0) { IAtom isotope = outputContainer.getAtom(atomNumber - 1); isotope.setMassNumber(absMass); } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in isotope information.", linecount, 7, 11, exception); } } else if (line.startsWith("M RAD")) { try { String countString = line.substring(6, 9).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int spinMultiplicity = Integer.parseInt(st.nextToken().trim()); MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE; if (spinMultiplicity > 0) { IAtom radical = outputContainer.getAtom(atomNumber - 1); switch (spinMultiplicity) { case 1: spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET; break; case 2: spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET; break; case 3: spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET; break; default: logger.debug("Invalid spin multiplicity found: " + spinMultiplicity); break; } for (int j = 0; j < spin.getSingleElectrons(); j++) { outputContainer.addSingleElectron( molecule.getBuilder().newInstance(ISingleElectron.class, radical)); } } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in radical information", linecount, 7, 10, exception); } } else if (line.startsWith("G ")) { try { String atomNumberString = line.substring(3, 6).trim(); int atomNumber = Integer.parseInt(atomNumberString); // String whatIsThisString = line.substring(6,9).trim(); String atomName = input.readLine(); // convert Atom into a PseudoAtom IAtom prevAtom = outputContainer.getAtom(atomNumber - 1); IPseudoAtom pseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, atomName); if (prevAtom.getPoint2d() != null) { pseudoAtom.setPoint2d(prevAtom.getPoint2d()); } if (prevAtom.getPoint3d() != null) { pseudoAtom.setPoint3d(prevAtom.getPoint3d()); } AtomContainerManipulator.replaceAtomByAtom(molecule, prevAtom, pseudoAtom); } catch (NumberFormatException exception) { String error = "Error (" + exception.toString() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError("NumberFormatException in group information", linecount, 4, 7, exception); } } else if (line.startsWith("M RGP")) { StringTokenizer st = new StringTokenizer(line); // Ignore first 3 tokens (overhead). st.nextToken(); st.nextToken(); st.nextToken(); // Process the R group numbers as defined in RGP line. while (st.hasMoreTokens()) { Integer position = new Integer(st.nextToken()); Rnumber = new Integer(st.nextToken()); IPseudoAtom pseudoAtom = rAtoms.get(position); if (pseudoAtom != null) { pseudoAtom.setLabel("R" + Rnumber); } } } if (line.startsWith("V ")) { Integer atomNumber = new Integer(line.substring(3, 6).trim()); IAtom atomWithComment = outputContainer.getAtom(atomNumber - 1); atomWithComment.setProperty(CDKConstants.COMMENT, line.substring(7)); } if (!lineRead) { logger.warn("Skipping line in property block: ", line); } } if (interpretHydrogenIsotopes.isSet()) { fixHydrogenIsotopes(molecule, isotopeFactory); } // note: apply the valence model last so that all fixes (i.e. hydrogen // isotopes) are in place for (int i = 0; i < atoms; i++) { applyMDLValenceModel(outputContainer.getAtom(i), explicitValence[i]); } } catch (CDKException exception) { String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); throw exception; } catch (Exception exception) { exception.printStackTrace(); String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); handleError("Error while parsing line: " + line, linecount, 0, 0, exception); } return outputContainer; }