/** * A unit test suite for JUnit. * * @return The test suite */ @Test public void testContains_IMolecularFormulaSet_IMolecularFormula() { IMolecularFormula mf1 = new MolecularFormula(); mf1.addIsotope(builder.newIsotope("C"), 4); mf1.addIsotope(builder.newIsotope("H"), 12); mf1.addIsotope(builder.newIsotope("N"), 1); mf1.addIsotope(builder.newIsotope("O"), 4); IMolecularFormula mf3 = new MolecularFormula(); mf3.addIsotope(builder.newIsotope("C"), 9); mf3.addIsotope(builder.newIsotope("H"), 5); mf3.addIsotope(builder.newIsotope("O"), 7); IMolecularFormulaSet formulaSet = new MolecularFormulaSet(); formulaSet.addMolecularFormula(mf1); formulaSet.addMolecularFormula(mf3); IMolecularFormula mf2 = new MolecularFormula(); mf2.addIsotope(builder.newIsotope("C"), 4); mf2.addIsotope(builder.newIsotope("H"), 12); mf2.addIsotope(builder.newIsotope("N"), 1); mf2.addIsotope(builder.newIsotope("O"), 4); IMolecularFormula mf4 = new MolecularFormula(); mf4.addIsotope(builder.newIsotope("C"), 4); IIsotope hyd = builder.newIsotope("H"); hyd.setExactMass(2.0032342); mf4.addIsotope(hyd, 12); mf4.addIsotope(builder.newIsotope("N"), 1); mf4.addIsotope(builder.newIsotope("O"), 4); Assert.assertTrue(MolecularFormulaSetManipulator.contains(formulaSet, mf2)); Assert.assertFalse(MolecularFormulaSetManipulator.contains(formulaSet, mf4)); }
/** * Get a list of IIsotope from a given IElement which is contained molecular. The search is based * only on the IElement. * * @param formula The MolecularFormula to check * @param element The IElement object * @return The list with the IIsotopes in this molecular formula */ public static List<IIsotope> getIsotopes(IMolecularFormula formula, IElement element) { List<IIsotope> isotopeList = new ArrayList<IIsotope>(); for (IIsotope isotope : formula.isotopes()) { if (isotope.getSymbol().equals(element.getSymbol())) isotopeList.add(isotope); } return isotopeList; }
/** * True, if the MolecularFormula contains the given element as IIsotope object. * * @param formula IMolecularFormula molecularFormula * @param element The element this MolecularFormula is searched for * @return True, if the MolecularFormula contains the given element object */ public static boolean containsElement(IMolecularFormula formula, IElement element) { for (IIsotope isotope : formula.isotopes()) { if (element.getSymbol().equals(isotope.getSymbol())) return true; } return false; }
/** * Checks a set of Nodes for the occurrence of the isotopes in the molecular formula from a * particular IElement. It returns 0 if the element does not exist. The search is based only on * the IElement. * * @param formula The MolecularFormula to check * @param element The IElement object * @return The occurrence of this element in this molecular formula */ public static int getElementCount(IMolecularFormula formula, IElement element) { int count = 0; for (IIsotope isotope : formula.isotopes()) { if (isotope.getSymbol().equals(element.getSymbol())) count += formula.getIsotopeCount(isotope); } return count; }
/** * Get the summed natural abundance of all isotopes from an MolecularFormula. Assumes abundances * to be preset, and will return 0.0 if not. * * @param formula The IMolecularFormula to calculate * @return The summed natural abundance of all isotopes in this MolecularFormula */ public static double getTotalNaturalAbundance(IMolecularFormula formula) { double abundance = 1.0; for (IIsotope isotope : formula.isotopes()) { if (isotope.getNaturalAbundance() == null) return 0.0; abundance = abundance * Math.pow(isotope.getNaturalAbundance(), formula.getIsotopeCount(isotope)); } return abundance / Math.pow(100, getAtomCount(formula)); }
@Test public void testDifference() { IIsotope element1 = mock(IIsotope.class); IIsotope element2 = mock(IIsotope.class); when(element1.getSymbol()).thenReturn("H"); when(element2.getSymbol()).thenReturn("C"); IDifference difference = IsotopeDiff.difference(element1, element2); Assert.assertNotNull(difference); }
/** * Get a list of all Elements which are contained molecular. * * @param formula The MolecularFormula to check * @return The list with the IElements in this molecular formula */ public static List<IElement> elements(IMolecularFormula formula) { List<IElement> elementList = new ArrayList<IElement>(); List<String> stringList = new ArrayList<String>(); for (IIsotope isotope : formula.isotopes()) { if (!stringList.contains(isotope.getSymbol())) { elementList.add(isotope); stringList.add(isotope.getSymbol()); } } return elementList; }
@Test public void testDiff() { IIsotope element1 = mock(IIsotope.class); IIsotope element2 = mock(IIsotope.class); when(element1.getSymbol()).thenReturn("H"); when(element2.getSymbol()).thenReturn("C"); String result = IsotopeDiff.diff(element1, element2); Assert.assertNotNull(result); Assert.assertNotSame(0, result.length()); assertContains(result, "IsotopeDiff"); assertContains(result, "H/C"); }
/** * Get the summed mass number of all isotopes from an MolecularFormula. It assumes isotope masses * to be preset, and returns 0.0 if not. * * @param formula The IMolecularFormula to calculate * @return The summed nominal mass of all atoms in this MolecularFormula */ public static double getTotalMassNumber(IMolecularFormula formula) { double mass = 0.0; for (IIsotope isotope : formula.isotopes()) { try { IIsotope isotope2 = Isotopes.getInstance().getMajorIsotope(isotope.getSymbol()); if (isotope2 != null) { mass += isotope2.getMassNumber() * formula.getIsotopeCount(isotope); } } catch (IOException e) { e.printStackTrace(); } } return mass; }
/** * Get the summed major isotopic mass of all elements from an MolecularFormula. * * @param formula The IMolecularFormula to calculate * @return The summed exact major isotope masses of all atoms in this MolecularFormula */ public static double getMajorIsotopeMass(IMolecularFormula formula) { double mass = 0.0; IsotopeFactory factory; try { factory = Isotopes.getInstance(); } catch (IOException e) { throw new RuntimeException("Could not instantiate the IsotopeFactory."); } for (IIsotope isotope : formula.isotopes()) { IIsotope major = factory.getMajorIsotope(isotope.getSymbol()); if (major != null) { mass += major.getExactMass() * formula.getIsotopeCount(isotope); } } return mass; }
/** * Returns the string representation of the molecule formula with numbers wrapped in * <sub></sub> tags and the isotope of each Element in <sup></sup> tags * and the total showCharge of IMolecularFormula in <sup></sup> tags. Useful for * displaying formulae in Swing components or on the web. * * @param formula The IMolecularFormula object * @param orderElements The order of Elements * @param showCharge True, If it has to show the showCharge * @param showIsotopes True, If it has to show the Isotope mass * @return A HTML representation of the molecular formula * @see #getHTML(IMolecularFormula) */ public static String getHTML( IMolecularFormula formula, String[] orderElements, boolean showCharge, boolean showIsotopes) { StringBuilder sb = new StringBuilder(); for (String orderElement : orderElements) { IElement element = formula.getBuilder().newInstance(IElement.class, orderElement); if (containsElement(formula, element)) { if (!showIsotopes) { sb.append(element.getSymbol()); int n = getElementCount(formula, element); if (n > 1) { sb.append("<sub>").append(n).append("</sub>"); } } else { for (IIsotope isotope : getIsotopes(formula, element)) { Integer massNumber = isotope.getMassNumber(); if (massNumber != null) sb.append("<sup>").append(massNumber).append("</sup>"); sb.append(isotope.getSymbol()); int n = formula.getIsotopeCount(isotope); if (n > 1) { sb.append("<sub>").append(n).append("</sub>"); } } } } } if (showCharge) { Integer charge = formula.getCharge(); if (charge == CDKConstants.UNSET || charge == 0) { return sb.toString(); } else { sb.append("<sup>"); if (charge > 1 || charge < -1) sb.append(Math.abs(charge)); if (charge > 0) sb.append('+'); else sb.append(MINUS); // note, not a hyphen! sb.append("</sup>"); } } return sb.toString(); }
/** * Returns the string representation of the molecule formula. * * @param formula The IMolecularFormula Object * @param orderElements The order of Elements * @param setOne True, when must be set the value 1 for elements with one atom * @return A String containing the molecular formula * @see #getHTML(IMolecularFormula) * @see #generateOrderEle() * @see #generateOrderEle_Hill_NoCarbons() * @see #generateOrderEle_Hill_WithCarbons() */ public static String getString( IMolecularFormula formula, String[] orderElements, boolean setOne) { StringBuffer stringMF = new StringBuffer(); List<IIsotope> isotopesList = putInOrder(orderElements, formula); // collect elements in a map - since different isotopes of the // same element will get repeated in the formula List<String> elemSet = new ArrayList<String>(); for (IIsotope isotope : isotopesList) { String symbol = isotope.getSymbol(); if (!elemSet.contains(symbol)) elemSet.add(symbol); } for (String elem : elemSet) { int count = 0; for (IIsotope isotope : formula.isotopes()) { if (isotope.getSymbol().equals(elem)) count += formula.getIsotopeCount(isotope); } stringMF.append(elem); if (!(count == 1 && !setOne)) stringMF.append(count); } return stringMF.toString(); }
private IAtomContainer makeAtomContainerFromFormula() { IAtomContainer atomContainer = this.builder.newAtomContainer(); ArrayList<IAtom> atoms = new ArrayList<IAtom>(); for (IIsotope isotope : formula.isotopes()) { for (int i = 0; i < formula.getIsotopeCount(isotope); i++) { atoms.add(this.builder.newAtom(isotope)); System.out.println("added " + isotope.getSymbol()); } } // sort by symbol lexicographic order Collections.sort( atoms, new Comparator<IAtom>() { public int compare(IAtom o1, IAtom o2) { return o1.getSymbol().compareTo(o2.getSymbol()); } }); atomContainer.setAtoms(atoms.toArray(new IAtom[] {})); return atomContainer; }
/** @deprecated Use {@link #getString(org.openscience.cdk.interfaces.IMolecularFormula)} */ @Deprecated public static String getHillString(IMolecularFormula formula) { StringBuffer hillString = new StringBuffer(); Map<String, Integer> hillMap = new TreeMap<String, Integer>(); for (IIsotope isotope : formula.isotopes()) { String symbol = isotope.getSymbol(); if (hillMap.containsKey(symbol)) hillMap.put(symbol, hillMap.get(symbol) + formula.getIsotopeCount(isotope)); else hillMap.put(symbol, formula.getIsotopeCount(isotope)); } // if we have a C append it and also add in the H // and then remove these elements int count; if (hillMap.containsKey("C")) { hillString.append('C'); count = hillMap.get("C"); if (count > 1) hillString.append(count); hillMap.remove("C"); if (hillMap.containsKey("H")) { hillString.append('H'); count = hillMap.get("H"); if (count > 1) hillString.append(count); hillMap.remove("H"); } } // now take all the rest in alphabetical order for (String key : hillMap.keySet()) { hillString.append(key); count = hillMap.get(key); if (count > 1) hillString.append(count); } return hillString.toString(); }
/** * Adjust the protonation of a molecular formula. This utility method adjusts the hydrogen isotope * count and charge at the same time. * * <pre> * IMolecularFormula mf = MolecularFormulaManipulator.getMolecularFormula("[C6H5O]-", bldr); * MolecularFormulaManipulator.adjustProtonation(mf, +1); // now "C6H6O" * MolecularFormulaManipulator.adjustProtonation(mf, -1); // now "C6H5O-" * </pre> * * The return value indicates whether the protonation could be adjusted: * * <pre> * IMolecularFormula mf = MolecularFormulaManipulator.getMolecularFormula("[Cl]-", bldr); * MolecularFormulaManipulator.adjustProtonation(mf, +0); // false still "[Cl]-" * MolecularFormulaManipulator.adjustProtonation(mf, +1); // true now "HCl" * MolecularFormulaManipulator.adjustProtonation(mf, -1); // true now "[Cl]-" (again) * MolecularFormulaManipulator.adjustProtonation(mf, -1); // false still "[Cl]-" (no H to remove!) * </pre> * * The method tries to select an existing hydrogen isotope to augment. If no hydrogen isotopes are * found a new major isotope (<sup>1</sup>H) is created. * * @param mf molecular formula * @param hcnt the number of hydrogens to add/remove, (>0 protonate:, <0: deprotonate) * @return the protonation was be adjusted */ public static boolean adjustProtonation(IMolecularFormula mf, int hcnt) { if (mf == null) throw new NullPointerException("No formula provided"); if (hcnt == 0) return false; // no protons to add final IChemObjectBuilder bldr = mf.getBuilder(); final int chg = mf.getCharge() != null ? mf.getCharge() : 0; IIsotope proton = null; int pcount = 0; for (IIsotope iso : mf.isotopes()) { if ("H".equals(iso.getSymbol())) { final int count = mf.getIsotopeCount(iso); if (count < hcnt) continue; // acceptable if (proton == null && (iso.getMassNumber() == null || iso.getMassNumber() == 1)) { proton = iso; pcount = count; } // better else if (proton != null && iso.getMassNumber() != null && iso.getMassNumber() == 1 && proton.getMassNumber() == null) { proton = iso; pcount = count; } } } if (proton == null && hcnt < 0) { return false; } else if (proton == null && hcnt > 0) { proton = bldr.newInstance(IIsotope.class, "H"); proton.setMassNumber(1); } mf.removeIsotope(proton); if (pcount + hcnt > 0) mf.addIsotope(proton, pcount + hcnt); mf.setCharge(chg + hcnt); return true; }
/** * Get the summed exact mass of all isotopes from an MolecularFormula. It assumes isotope masses * to be preset, and returns 0.0 if not. * * @param formula The IMolecularFormula to calculate * @return The summed exact mass of all atoms in this MolecularFormula */ public static double getTotalExactMass(IMolecularFormula formula) { Double mass = 0.0; for (IIsotope isotope : formula.isotopes()) { if (isotope.getExactMass() == CDKConstants.UNSET) { try { IIsotope majorIsotope = Isotopes.getInstance().getMajorIsotope(isotope.getSymbol()); if (majorIsotope != null) { mass += majorIsotope.getExactMass() * formula.getIsotopeCount(isotope); } } catch (IOException e) { throw new RuntimeException("Could not instantiate the IsotopeFactory."); } } else mass += isotope.getExactMass() * formula.getIsotopeCount(isotope); } if (formula.getCharge() != null) mass = correctMass(mass, formula.getCharge()); return mass; }
/** * Read an IAtomContainer from a file in MDL sd format * * @return The Molecule that was read from the MDL file. */ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException { logger.debug("Reading new molecule"); IAtomContainer outputContainer = null; int linecount = 0; int atoms = 0; int bonds = 0; int atom1 = 0; int atom2 = 0; int order = 0; IBond.Stereo stereo = (IBond.Stereo) CDKConstants.UNSET; int RGroupCounter = 1; int Rnumber = 0; String[] rGroup = null; double x = 0.0; double y = 0.0; double z = 0.0; double totalX = 0.0; double totalY = 0.0; double totalZ = 0.0; String title = null; String remark = null; // int[][] conMat = new int[0][0]; // String help; IAtom atom; String line = ""; // A map to keep track of R# atoms so that RGP line can be parsed Map<Integer, IPseudoAtom> rAtoms = new HashMap<Integer, IPseudoAtom>(); try { IsotopeFactory isotopeFactory = Isotopes.getInstance(); logger.info("Reading header"); line = input.readLine(); linecount++; if (line == null) { return null; } logger.debug("Line " + linecount + ": " + line); if (line.startsWith("$$$$")) { logger.debug("File is empty, returning empty molecule"); return molecule; } if (line.length() > 0) { title = line; } line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); if (line.length() > 0) { remark = line; } logger.info("Reading rest of file"); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); // if the line is empty we hav a problem - either a malformed // molecule entry or just extra new lines at the end of the file if (line.length() == 0) { // read till the next $$$$ or EOF while (true) { line = input.readLine(); linecount++; if (line == null) { return null; } if (line.startsWith("$$$$")) { return molecule; // an empty molecule } } } // check the CT block version if (line.contains("V3000") || line.contains("v3000")) { handleError("This file must be read with the MDLV3000Reader."); } else if (!line.contains("V2000") && !line.contains("v2000")) { handleError("This file must be read with the MDLReader."); } atoms = Integer.parseInt(line.substring(0, 3).trim()); List<IAtom> atomList = new ArrayList<IAtom>(); logger.debug("Atomcount: " + atoms); bonds = Integer.parseInt(line.substring(3, 6).trim()); logger.debug("Bondcount: " + bonds); List<IBond> bondList = new ArrayList<IBond>(); // used for applying the MDL valence model int[] explicitValence = new int[atoms]; // read ATOM block logger.info("Reading atom block"); atomsByLinePosition = new ArrayList<IAtom>(); atomsByLinePosition.add(null); // 0 is not a valid position int atomBlockLineNumber = 0; for (int f = 0; f < atoms; f++) { line = input.readLine(); linecount++; atomBlockLineNumber++; Matcher trailingSpaceMatcher = TRAILING_SPACE.matcher(line); if (trailingSpaceMatcher.find()) { handleError( "Trailing space found", linecount, trailingSpaceMatcher.start(), trailingSpaceMatcher.end()); line = trailingSpaceMatcher.replaceAll(""); } x = Double.parseDouble(line.substring(0, 10).trim()); y = Double.parseDouble(line.substring(10, 20).trim()); z = Double.parseDouble(line.substring(20, 30).trim()); // *all* values should be zero, not just the sum totalX += Math.abs(x); totalY += Math.abs(y); totalZ += Math.abs(z); logger.debug("Coordinates: " + x + "; " + y + "; " + z); String element = line.substring(31, Math.min(line.length(), 34)).trim(); if (line.length() < 34) { handleError( "Element atom type does not follow V2000 format type should of length three" + " and padded with space if required", linecount, 31, 34); } logger.debug("Atom type: ", element); if (isotopeFactory.isElement(element)) { atom = isotopeFactory.configure(molecule.getBuilder().newInstance(IAtom.class, element)); } else if ("A".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("Q".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("*".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("LP".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("L".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if (element.equals("R") || (element.length() > 0 && element.charAt(0) == 'R')) { logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom."); // check if the element is R rGroup = element.split("^R"); atom = null; if (rGroup.length > 1) { try { Rnumber = Integer.valueOf(rGroup[(rGroup.length - 1)]); RGroupCounter = Rnumber; element = "R" + Rnumber; atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } catch (Exception ex) { // This happens for atoms labeled "R#". // The Rnumber may be set later on, using RGP line atom = molecule.getBuilder().newInstance(IPseudoAtom.class, "R"); rAtoms.put(atomBlockLineNumber, (IPseudoAtom) atom); } } else { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } } else { handleError( "Invalid element type. Must be an existing " + "element, or one in: A, Q, L, LP, *.", linecount, 32, 35); atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); atom.setSymbol(element); } // store as 3D for now, convert to 2D (if totalZ == 0.0) later atom.setPoint3d(new Point3d(x, y, z)); // parse further fields if (line.length() >= 36) { String massDiffString = line.substring(34, 36).trim(); logger.debug("Mass difference: ", massDiffString); if (!(atom instanceof IPseudoAtom)) { try { int massDiff = Integer.parseInt(massDiffString); if (massDiff != 0) { IIsotope major = Isotopes.getInstance().getMajorIsotope(element); atom.setMassNumber(major.getMassNumber() + massDiff); } } catch (Exception exception) { handleError("Could not parse mass difference field.", linecount, 35, 37, exception); } } else { logger.error("Cannot set mass difference for a non-element!"); } } else { handleError("Mass difference is missing", linecount, 34, 36); } // set the stereo partiy Integer parity = line.length() > 41 ? Character.digit(line.charAt(41), 10) : 0; atom.setStereoParity(parity); if (line.length() >= 51) { String valenceString = removeNonDigits(line.substring(48, 51)); logger.debug("Valence: ", valenceString); if (!(atom instanceof IPseudoAtom)) { try { int valence = Integer.parseInt(valenceString); if (valence != 0) { // 15 is defined as 0 in mol files if (valence == 15) atom.setValency(0); else atom.setValency(valence); } } catch (Exception exception) { handleError( "Could not parse valence information field", linecount, 49, 52, exception); } } else { logger.error("Cannot set valence information for a non-element!"); } } if (line.length() >= 39) { String chargeCodeString = line.substring(36, 39).trim(); logger.debug("Atom charge code: ", chargeCodeString); int chargeCode = Integer.parseInt(chargeCodeString); if (chargeCode == 0) { // uncharged species } else if (chargeCode == 1) { atom.setFormalCharge(+3); } else if (chargeCode == 2) { atom.setFormalCharge(+2); } else if (chargeCode == 3) { atom.setFormalCharge(+1); } else if (chargeCode == 4) { } else if (chargeCode == 5) { atom.setFormalCharge(-1); } else if (chargeCode == 6) { atom.setFormalCharge(-2); } else if (chargeCode == 7) { atom.setFormalCharge(-3); } } else { handleError("Atom charge is missing", linecount, 36, 39); } try { // read the mmm field as position 61-63 String reactionAtomIDString = line.substring(60, 63).trim(); logger.debug("Parsing mapping id: ", reactionAtomIDString); try { int reactionAtomID = Integer.parseInt(reactionAtomIDString); if (reactionAtomID != 0) { atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, reactionAtomID); } } catch (Exception exception) { logger.error("Mapping number ", reactionAtomIDString, " is not an integer."); logger.debug(exception); } } catch (Exception exception) { // older mol files don't have all these fields... logger.warn("A few fields are missing. Older MDL MOL file?"); } // shk3: This reads shifts from after the molecule. I don't think this is an official // format, but I saw it frequently 80=>78 for alk if (line.length() >= 78) { double shift = Double.parseDouble(line.substring(69, 80).trim()); atom.setProperty("first shift", shift); } if (line.length() >= 87) { double shift = Double.parseDouble(line.substring(79, 87).trim()); atom.setProperty("second shift", shift); } atomList.add(atom); atomsByLinePosition.add(atom); } // convert to 2D, if totalZ == 0 if (totalX == 0.0 && totalY == 0.0 && totalZ == 0.0) { logger.info("All coordinates are 0.0"); if (atomList.size() == 1) { atomList.get(0).setPoint2d(new Point2d(x, y)); } else { for (IAtom atomToUpdate : atomList) { atomToUpdate.setPoint3d(null); } } } else if (totalZ == 0.0 && !forceReadAs3DCoords.isSet()) { logger.info("Total 3D Z is 0.0, interpreting it as a 2D structure"); for (IAtom atomToUpdate : atomList) { Point3d p3d = atomToUpdate.getPoint3d(); if (p3d != null) { atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y)); atomToUpdate.setPoint3d(null); } } } // read BOND block logger.info("Reading bond block"); int queryBondCount = 0; for (int f = 0; f < bonds; f++) { line = input.readLine(); linecount++; atom1 = Integer.parseInt(line.substring(0, 3).trim()); atom2 = Integer.parseInt(line.substring(3, 6).trim()); order = Integer.parseInt(line.substring(6, 9).trim()); if (line.length() >= 12) { int mdlStereo = line.length() > 12 ? Integer.parseInt(line.substring(9, 12).trim()) : Integer.parseInt(line.substring(9).trim()); if (mdlStereo == 1) { // MDL up bond stereo = IBond.Stereo.UP; } else if (mdlStereo == 6) { // MDL down bond stereo = IBond.Stereo.DOWN; } else if (mdlStereo == 0) { if (order == 2) { // double bond stereo defined by coordinates stereo = IBond.Stereo.E_Z_BY_COORDINATES; } else { // bond has no stereochemistry stereo = IBond.Stereo.NONE; } } else if (mdlStereo == 3 && order == 2) { // unknown E/Z stereochemistry stereo = IBond.Stereo.E_OR_Z; } else if (mdlStereo == 4) { // MDL bond undefined stereo = IBond.Stereo.UP_OR_DOWN; } } else { handleError("Missing expected stereo field at line: ", linecount, 10, 12); } if (logger.isDebugEnabled()) { logger.debug("Bond: " + atom1 + " - " + atom2 + "; order " + order); } // interpret CTfile's special bond orders IAtom a1 = atomList.get(atom1 - 1); IAtom a2 = atomList.get(atom2 - 1); IBond newBond = null; if (order >= 1 && order <= 3) { IBond.Order cdkOrder = IBond.Order.SINGLE; if (order == 2) cdkOrder = IBond.Order.DOUBLE; if (order == 3) cdkOrder = IBond.Order.TRIPLE; if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder); } } else if (order == 4) { // aromatic bond if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET); } // mark both atoms and the bond as aromatic and raise the SINGLE_OR_DOUBLE-flag newBond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true); newBond.setFlag(CDKConstants.ISAROMATIC, true); a1.setFlag(CDKConstants.ISAROMATIC, true); a2.setFlag(CDKConstants.ISAROMATIC, true); } else { queryBondCount++; newBond = new CTFileQueryBond(molecule.getBuilder()); IAtom[] bondAtoms = {a1, a2}; newBond.setAtoms(bondAtoms); newBond.setOrder(null); CTFileQueryBond.Type queryBondType = null; switch (order) { case 5: queryBondType = CTFileQueryBond.Type.SINGLE_OR_DOUBLE; break; case 6: queryBondType = CTFileQueryBond.Type.SINGLE_OR_AROMATIC; break; case 7: queryBondType = CTFileQueryBond.Type.DOUBLE_OR_AROMATIC; break; case 8: queryBondType = CTFileQueryBond.Type.ANY; break; } ((CTFileQueryBond) newBond).setType(queryBondType); newBond.setStereo(stereo); } bondList.add((newBond)); // add the bond order to the explicit valence for each atom if (newBond.getOrder() != null && newBond.getOrder() != IBond.Order.UNSET) { explicitValence[atom1 - 1] += newBond.getOrder().numeric(); explicitValence[atom2 - 1] += newBond.getOrder().numeric(); } else { explicitValence[atom1 - 1] = Integer.MIN_VALUE; explicitValence[atom2 - 1] = Integer.MIN_VALUE; } } if (queryBondCount == 0) outputContainer = molecule; else { outputContainer = new QueryAtomContainer(molecule.getBuilder()); } outputContainer.setProperty(CDKConstants.TITLE, title); outputContainer.setProperty(CDKConstants.REMARK, remark); for (IAtom at : atomList) { outputContainer.addAtom(at); } for (IBond bnd : bondList) { outputContainer.addBond(bnd); } // read PROPERTY block logger.info("Reading property block"); while (true) { line = input.readLine(); linecount++; if (line == null) { handleError("The expected property block is missing!", linecount, 0, 0); } if (line.startsWith("M END")) break; boolean lineRead = false; if (line.startsWith("M CHG")) { // FIXME: if this is encountered for the first time, all // atom charges should be set to zero first! int infoCount = Integer.parseInt(line.substring(6, 9).trim()); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { String token = st.nextToken(); int atomNumber = Integer.parseInt(token.trim()); token = st.nextToken(); int charge = Integer.parseInt(token.trim()); outputContainer.getAtom(atomNumber - 1).setFormalCharge(charge); } } else if (line.matches("A\\s{1,4}\\d+")) { // Reads the pseudo atom property from the mol file // The atom number of the to replaced atom int aliasAtomNumber = Integer.parseInt(line.replaceFirst("A\\s{1,4}", "")) - RGroupCounter; line = input.readLine(); linecount++; String[] aliasArray = line.split("\\\\"); // name of the alias atom like R1 or R2 etc. String alias = ""; for (int i = 0; i < aliasArray.length; i++) { alias += aliasArray[i]; } IAtom aliasAtom = outputContainer.getAtom(aliasAtomNumber); // skip if already a pseudoatom if (aliasAtom instanceof IPseudoAtom) { ((IPseudoAtom) aliasAtom).setLabel(alias); continue; } IAtom newPseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, alias); if (aliasAtom.getPoint2d() != null) { newPseudoAtom.setPoint2d(aliasAtom.getPoint2d()); } if (aliasAtom.getPoint3d() != null) { newPseudoAtom.setPoint3d(aliasAtom.getPoint3d()); } outputContainer.addAtom(newPseudoAtom); List<IBond> bondsOfAliasAtom = outputContainer.getConnectedBondsList(aliasAtom); for (int i = 0; i < bondsOfAliasAtom.size(); i++) { IBond bondOfAliasAtom = bondsOfAliasAtom.get(i); IAtom connectedToAliasAtom = bondOfAliasAtom.getConnectedAtom(aliasAtom); IBond newBond = bondOfAliasAtom.getBuilder().newInstance(IBond.class); newBond.setAtoms(new IAtom[] {connectedToAliasAtom, newPseudoAtom}); newBond.setOrder(bondOfAliasAtom.getOrder()); outputContainer.addBond(newBond); outputContainer.removeBond(aliasAtom, connectedToAliasAtom); } outputContainer.removeAtom(aliasAtom); RGroupCounter++; } else if (line.startsWith("M ISO")) { try { String countString = line.substring(6, 10).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(10)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int absMass = Integer.parseInt(st.nextToken().trim()); if (absMass != 0) { IAtom isotope = outputContainer.getAtom(atomNumber - 1); isotope.setMassNumber(absMass); } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in isotope information.", linecount, 7, 11, exception); } } else if (line.startsWith("M RAD")) { try { String countString = line.substring(6, 9).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int spinMultiplicity = Integer.parseInt(st.nextToken().trim()); MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE; if (spinMultiplicity > 0) { IAtom radical = outputContainer.getAtom(atomNumber - 1); switch (spinMultiplicity) { case 1: spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET; break; case 2: spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET; break; case 3: spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET; break; default: logger.debug("Invalid spin multiplicity found: " + spinMultiplicity); break; } for (int j = 0; j < spin.getSingleElectrons(); j++) { outputContainer.addSingleElectron( molecule.getBuilder().newInstance(ISingleElectron.class, radical)); } } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in radical information", linecount, 7, 10, exception); } } else if (line.startsWith("G ")) { try { String atomNumberString = line.substring(3, 6).trim(); int atomNumber = Integer.parseInt(atomNumberString); // String whatIsThisString = line.substring(6,9).trim(); String atomName = input.readLine(); // convert Atom into a PseudoAtom IAtom prevAtom = outputContainer.getAtom(atomNumber - 1); IPseudoAtom pseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, atomName); if (prevAtom.getPoint2d() != null) { pseudoAtom.setPoint2d(prevAtom.getPoint2d()); } if (prevAtom.getPoint3d() != null) { pseudoAtom.setPoint3d(prevAtom.getPoint3d()); } AtomContainerManipulator.replaceAtomByAtom(molecule, prevAtom, pseudoAtom); } catch (NumberFormatException exception) { String error = "Error (" + exception.toString() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError("NumberFormatException in group information", linecount, 4, 7, exception); } } else if (line.startsWith("M RGP")) { StringTokenizer st = new StringTokenizer(line); // Ignore first 3 tokens (overhead). st.nextToken(); st.nextToken(); st.nextToken(); // Process the R group numbers as defined in RGP line. while (st.hasMoreTokens()) { Integer position = new Integer(st.nextToken()); Rnumber = new Integer(st.nextToken()); IPseudoAtom pseudoAtom = rAtoms.get(position); if (pseudoAtom != null) { pseudoAtom.setLabel("R" + Rnumber); } } } if (line.startsWith("V ")) { Integer atomNumber = new Integer(line.substring(3, 6).trim()); IAtom atomWithComment = outputContainer.getAtom(atomNumber - 1); atomWithComment.setProperty(CDKConstants.COMMENT, line.substring(7)); } if (!lineRead) { logger.warn("Skipping line in property block: ", line); } } if (interpretHydrogenIsotopes.isSet()) { fixHydrogenIsotopes(molecule, isotopeFactory); } // note: apply the valence model last so that all fixes (i.e. hydrogen // isotopes) are in place for (int i = 0; i < atoms; i++) { applyMDLValenceModel(outputContainer.getAtom(i), explicitValence[i]); } } catch (CDKException exception) { String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); throw exception; } catch (Exception exception) { exception.printStackTrace(); String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); handleError("Error while parsing line: " + line, linecount, 0, 0, exception); } return outputContainer; }