@Test public void testMDMoleculeCustomization() { StringWriter writer = new StringWriter(); CMLWriter cmlWriter = new CMLWriter(writer); cmlWriter.registerCustomizer(new MDMoleculeCustomizer()); try { IAtomContainer molecule = makeMDBenzene(); cmlWriter.write(molecule); } catch (Exception exception) { logger.error("Error while creating an CML2 file: ", exception.getMessage()); logger.debug(exception); Assert.fail(exception.getMessage()); } String cmlContent = writer.toString(); logger.debug("****************************** testMDMoleculeCustomization()"); logger.debug(cmlContent); logger.debug("******************************"); // System.out.println("****************************** testMDMoleculeCustomization()"); // System.out.println(cmlContent); // System.out.println("******************************"); Assert.assertTrue(cmlContent.indexOf("xmlns:md") != -1); Assert.assertTrue(cmlContent.indexOf("md:residue\"") != -1); Assert.assertTrue(cmlContent.indexOf("md:resNumber\"") != -1); Assert.assertTrue(cmlContent.indexOf("md:chargeGroup\"") != -1); Assert.assertTrue(cmlContent.indexOf("md:cgNumber\"") != -1); Assert.assertTrue(cmlContent.indexOf("md:switchingAtom\"") != -1); }
@Override public synchronized void setChemFilters( boolean stereoFilter, boolean fragmentFilter, boolean energyFilter) { if (getMappingCount() > 0) { if (energyFilter) { try { sortResultsByEnergies(); this.bondEnergiesList = getSortedEnergy(); } catch (CDKException ex) { Logger.error(Level.SEVERE, null, ex); } } if (fragmentFilter) { sortResultsByFragments(); this.fragmentSizeList = getSortedFragment(); } if (stereoFilter) { try { sortResultsByStereoAndBondMatch(); this.stereoScoreList = getStereoMatches(); } catch (CDKException ex) { Logger.error(Level.SEVERE, null, ex); } } } }
/** * Returns the ring that is formed by the atoms in the given vector. * * @param vec The vector that contains the atoms of the ring * @param mol The molecule this ring is a substructure of * @return The ring formed by the given atoms */ private IRing prepareRing(List vec, IAtomContainer mol) { // add the atoms in vec to the new ring int atomCount = vec.size(); IRing ring = mol.getBuilder().newInstance(IRing.class, atomCount); IAtom[] atoms = new IAtom[atomCount]; vec.toArray(atoms); ring.setAtoms(atoms); // add the bonds in mol to the new ring try { IBond b; for (int i = 0; i < atomCount - 1; i++) { b = mol.getBond(atoms[i], atoms[i + 1]); if (b != null) { ring.addBond(b); } else { logger.error("This should not happen."); } } b = mol.getBond(atoms[0], atoms[atomCount - 1]); if (b != null) { ring.addBond(b); } else { logger.error("This should not happen either."); } } catch (Exception exc) { logger.debug(exc); } logger.debug("found Ring ", ring); return ring; }
/** * Starts the reading of the CML file. Whenever a new Molecule is read, a event is thrown to the * ReaderListener. */ public void process() throws CDKException { logger.debug("Started parsing from input..."); try { parser.setFeature("http://xml.org/sax/features/validation", false); logger.info("Deactivated validation"); } catch (SAXException e) { logger.warn("Cannot deactivate validation."); } parser.setContentHandler(new EventCMLHandler(this, builder)); parser.setEntityResolver(new CMLResolver()); parser.setErrorHandler(new CMLErrorHandler()); try { logger.debug("Parsing from Reader"); parser.parse(new InputSource(input)); } catch (IOException e) { String error = "Error while reading file: " + e.getMessage(); logger.error(error); logger.debug(e); throw new CDKException(error, e); } catch (SAXParseException saxe) { SAXParseException spe = (SAXParseException) saxe; String error = "Found well-formedness error in line " + spe.getLineNumber(); logger.error(error); logger.debug(saxe); throw new CDKException(error, saxe); } catch (SAXException saxe) { String error = "Error while parsing XML: " + saxe.getMessage(); logger.error(error); logger.debug(saxe); throw new CDKException(error, saxe); } }
private synchronized double calRelation(IReaction reaction, IMappingAlgorithm theory) { try { Map<Integer, IAtomContainer> educts = synchronizedSortedMap(new TreeMap<Integer, IAtomContainer>()); for (int i = 0; i < reaction.getReactantCount(); i++) { educts.put(i, reaction.getReactants().getAtomContainer(i)); } Map<Integer, IAtomContainer> products = synchronizedSortedMap(new TreeMap<Integer, IAtomContainer>()); for (int i = 0; i < reaction.getProductCount(); i++) { products.put(i, reaction.getProducts().getAtomContainer(i)); } GameTheoryMatrix EDSH = new GameTheoryMatrix(theory, reaction, removeHydrogen); IGameTheory gameTheory = make(theory, reaction, removeHydrogen, educts, products, EDSH); this.reactionBlastMolMapping = gameTheory.getReactionMolMapping(); EDSH.Clear(); return gameTheory.getDelta(); } catch (Exception e) { logger.error(e); return -1; } }
public Object clone() throws CloneNotSupportedException { Object clone = null; try { clone = super.clone(); } catch (Exception exception) { logger.error("Could not clone DebugAtom: " + exception.getMessage(), exception); logger.debug(exception); } return clone; }
private boolean isReady() throws CDKException { try { return input.ready(); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } }
/** Returns true if another IMolecule can be read. */ public boolean hasNext() { if (!nextAvailableIsKnown) { hasNext = false; // now try to parse the next Molecule try { if ((currentLine = input.readLine()) != null) { currentFormat = (IChemFormat) MDLFormat.getInstance(); StringBuffer buffer = new StringBuffer(); while (currentLine != null && !currentLine.equals("M END")) { // still in a molecule buffer.append(currentLine); buffer.append(System.getProperty("line.separator")); currentLine = input.readLine(); // do MDL molfile version checking if (currentLine.contains("V2000") || currentLine.contains("v2000")) { currentFormat = (IChemFormat) MDLV2000Format.getInstance(); } else if (currentLine.contains("V3000") || currentLine.contains("v3000")) { currentFormat = (IChemFormat) MDLV3000Format.getInstance(); } } buffer.append(currentLine); buffer.append(System.getProperty("line.separator")); logger.debug("MDL file part read: ", buffer); ISimpleChemObjectReader reader = factory.createReader(currentFormat); reader.setReader(new StringReader(buffer.toString())); if (currentFormat instanceof MDLV2000Format) { reader.addChemObjectIOListener(this); ((MDLV2000Reader) reader).customizeJob(); } nextMolecule = (IMolecule) reader.read(builder.newInstance(IMolecule.class)); // note that a molecule may have 0 atoms, but still // be useful (by having SD tags for example), so just // check for null'ness rather than atom count hasNext = nextMolecule != null; // now read the data part currentLine = input.readLine(); readDataBlockInto(nextMolecule); } else { hasNext = false; } } catch (Exception exception) { logger.error("Error while reading next molecule: " + exception.getMessage()); logger.debug(exception); hasNext = false; } if (!hasNext) nextMolecule = null; nextAvailableIsKnown = true; } return hasNext; }
private void init() { boolean success = false; // If JAXP is prefered (comes with Sun JVM 1.4.0 and higher) if (!success) { try { javax.xml.parsers.SAXParserFactory spf = javax.xml.parsers.SAXParserFactory.newInstance(); spf.setNamespaceAware(true); javax.xml.parsers.SAXParser saxParser = spf.newSAXParser(); parser = saxParser.getXMLReader(); logger.info("Using JAXP/SAX XML parser."); success = true; } catch (ParserConfigurationException | SAXException e) { logger.warn("Could not instantiate JAXP/SAX XML reader: ", e.getMessage()); logger.debug(e); } } // Aelfred is first alternative. if (!success) { try { parser = (XMLReader) this.getClass() .getClassLoader() .loadClass("gnu.xml.aelfred2.XmlReader") .newInstance(); logger.info("Using Aelfred2 XML parser."); success = true; } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { logger.warn("Could not instantiate Aelfred2 XML reader!"); logger.debug(e); } } // Xerces is second alternative if (!success) { try { parser = (XMLReader) this.getClass() .getClassLoader() .loadClass("org.apache.xerces.parsers.SAXParser") .newInstance(); logger.info("Using Xerces XML parser."); success = true; } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { logger.warn("Could not instantiate Xerces XML reader!"); logger.debug(e); } } if (!success) { logger.error("Could not instantiate any XML parser!"); } }
private String readLine() throws CDKException { String line = null; try { line = input.readLine(); logger.debug("read line: " + line); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } return line; }
public static Dictionary unmarshal(Reader reader) { ILoggingTool logger = LoggingToolFactory.createLoggingTool(Dictionary.class); DictionaryHandler handler = new DictionaryHandler(); XMLReader parser = null; try { parser = XMLReaderFactory.createXMLReader(); logger.debug("Using " + parser); } catch (Exception e) { logger.error("Could not instantiate any JAXP parser!"); logger.debug(e); } try { if (parser == null) { logger.debug("parser object was null!"); return null; } parser.setFeature("http://xml.org/sax/features/validation", false); logger.debug("Deactivated validation"); } catch (SAXException e) { logger.warn("Cannot deactivate validation."); logger.debug(e); } parser.setContentHandler(handler); Dictionary dict = null; try { parser.parse(new InputSource(reader)); dict = handler.getDictionary(); } catch (IOException e) { logger.error("IOException: " + e.toString()); logger.debug(e); } catch (SAXException saxe) { logger.error("SAXException: " + saxe.getClass().getName()); logger.debug(saxe); } return dict; }
/** * The method returns partial charges assigned to an heavy atom through MMFF94 method. It is * needed to call the addExplicitHydrogensToSatisfyValency method from the class * tools.HydrogenAdder. * * @param atom The IAtom for which the DescriptorValue is requested * @param org AtomContainer * @return partial charge of parameter atom */ @Override public DescriptorValue calculate(IAtom atom, IAtomContainer org) { if (atom.getProperty(CHARGE_CACHE) == null) { IAtomContainer copy; try { copy = org.clone(); } catch (CloneNotSupportedException e) { return new DescriptorValue( getSpecification(), getParameterNames(), getParameters(), new DoubleResult(Double.NaN), NAMES); } for (IAtom a : org.atoms()) { if (a.getImplicitHydrogenCount() == null || a.getImplicitHydrogenCount() != 0) { logger.error("Hydrogens must be explict for MMFF charge calculation"); return new DescriptorValue( getSpecification(), getParameterNames(), getParameters(), new DoubleResult(Double.NaN), NAMES); } } if (!mmff.assignAtomTypes(copy)) logger.warn("One or more atoms could not be assigned an MMFF atom type"); mmff.partialCharges(copy); mmff.clearProps(copy); // cache charges for (int i = 0; i < org.getAtomCount(); i++) { org.getAtom(i).setProperty(CHARGE_CACHE, copy.getAtom(i).getCharge()); } } return new DescriptorValue( getSpecification(), getParameterNames(), getParameters(), new DoubleResult(atom.getProperty(CHARGE_CACHE, Double.class)), NAMES); }
/** * Generates a shortest path based BitSet fingerprint for the given AtomContainer. * * @param ac The AtomContainer for which a fingerprint is generated * @exception CDKException if there error in aromaticity perception or other CDK functions * @return A {@link BitSet} representing the fingerprint */ @Override public IBitFingerprint getBitFingerprint(IAtomContainer ac) throws CDKException { IAtomContainer atomContainer = null; try { atomContainer = (IAtomContainer) ac.clone(); } catch (CloneNotSupportedException ex) { logger.error("Failed to clone the molecule:", ex); } Aromaticity.cdkLegacy().apply(atomContainer); BitSet bitSet = new BitSet(fingerprintLength); if (!ConnectivityChecker.isConnected(atomContainer)) { IAtomContainerSet partitionedMolecules = ConnectivityChecker.partitionIntoMolecules(atomContainer); for (IAtomContainer container : partitionedMolecules.atomContainers()) { addUniquePath(container, bitSet); } } else { addUniquePath(atomContainer, bitSet); } return new BitSetFingerprint(bitSet); }
public void doInit() { String version = JCPPropertyHandler.getInstance(true).getVersion(); String s1 = "JChemPaint " + version + "\n"; s1 += GT._("An open-source editor for 2D chemical structures."); String s2 = GT._("An OpenScience project.")+"\n"; s2 += GT._("See 'http://jchempaint.github.com' for more information."); getContentPane().setLayout(new BorderLayout()); getContentPane().setBackground(Color.white); JLabel label1 = new JLabel(); try { JCPPropertyHandler jcpph = JCPPropertyHandler.getInstance(true); URL url = jcpph.getResource("jcplogo" + JCPAction.imageSuffix); ImageIcon icon = new ImageIcon(url); //ImageIcon icon = new ImageIcon(../resources/); label1 = new JLabel(icon); } catch (Exception exception) { logger.error("Cannot add JCP logo: " + exception.getMessage()); logger.debug(exception); } label1.setBackground(Color.white); Border lb = BorderFactory.createLineBorder(Color.white, 5); JTextArea jtf1 = new JTextArea(s1); jtf1.setBorder(lb); jtf1.setEditable(false); JTextArea jtf2 = new JTextArea(s2); jtf2.setEditable(false); jtf2.setBorder(lb); getContentPane().add("Center", label1); getContentPane().add("North", jtf1); getContentPane().add("South", jtf2); pack(); setVisible(true); }
/** * Read a ChemFile from a file in MDL SDF format. * * @return The ChemFile that was read from the MDL file. */ private IChemFile readChemFile(IChemFile chemFile) throws CDKException { IChemSequence chemSequence = chemFile.getBuilder().newInstance(IChemSequence.class); IChemModel chemModel = chemFile.getBuilder().newInstance(IChemModel.class); IAtomContainerSet setOfMolecules = chemFile.getBuilder().newInstance(IAtomContainerSet.class); IAtomContainer m = readAtomContainer(chemFile.getBuilder().newInstance(IAtomContainer.class)); if (m != null && m instanceof IAtomContainer) { setOfMolecules.addAtomContainer((IAtomContainer) m); } chemModel.setMoleculeSet(setOfMolecules); chemSequence.addChemModel(chemModel); setOfMolecules = chemFile.getBuilder().newInstance(IAtomContainerSet.class); chemModel = chemFile.getBuilder().newInstance(IChemModel.class); String str; try { String line; while ((line = input.readLine()) != null) { logger.debug("line: ", line); // apparently, this is a SDF file, continue with // reading mol files str = new String(line); if (str.equals("$$$$")) { m = readAtomContainer(chemFile.getBuilder().newInstance(IAtomContainer.class)); if (m != null && m instanceof IAtomContainer) { setOfMolecules.addAtomContainer((IAtomContainer) m); chemModel.setMoleculeSet(setOfMolecules); chemSequence.addChemModel(chemModel); setOfMolecules = chemFile.getBuilder().newInstance(IAtomContainerSet.class); chemModel = chemFile.getBuilder().newInstance(IChemModel.class); } } else { // here the stuff between 'M END' and '$$$$' if (m != null) { // ok, the first lines should start with '>' String fieldName = null; if (str.startsWith("> ")) { // ok, should extract the field name str.substring(2); // String content = int index = str.indexOf("<"); if (index != -1) { int index2 = str.substring(index).indexOf(">"); if (index2 != -1) { fieldName = str.substring(index + 1, index + index2); } } } if (line == null) { throw new CDKException("Expecting data line here, but found null!"); } StringBuilder data = new StringBuilder(); int dataLineCount = 0; boolean lineIsContinued = false; while ((line = input.readLine()) != null) { if (line.equals(" ") && dataLineCount == 0) { // apparently a file can have a field whose value is a single space. Moronic // we check for it *before* trimming it. ideally we should check for any length // of whitespace // In adition some SD files have the blank line after the value line contain // a space, rather than being a true blank line. So we only store a blank value // line if it's the first line after the key line data.append(line); lineIsContinued = false; dataLineCount++; if (!lineIsContinued && dataLineCount > 1) data.append(System.getProperty("line.separator")); continue; } line = line.trim(); if (line.length() == 0) break; if (line.equals("$$$$")) { logger.error("Expecting data line here, but found end of molecule: ", line); break; } logger.debug("data line: ", line); lineIsContinued = false; // reset property dataLineCount++; // preserve newlines, unless the line is exactly 80 chars; // in that case it is assumed to continue on the next line. // See MDL documentation. if (!lineIsContinued && dataLineCount > 1) data.append(System.getProperty("line.separator")); // add the data line data.append(line); // check if the line will be continued on the next line if (line.length() == 80) lineIsContinued = true; } if (fieldName != null) { logger.info("fieldName, data: ", fieldName, ", ", data); m.setProperty(fieldName, data.toString()); } } } } } catch (CDKException cdkexc) { throw cdkexc; } catch (Exception exception) { String error = "Error while parsing SDF"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } try { input.close(); } catch (Exception exc) { String error = "Error while closing file: " + exc.getMessage(); logger.error(error); throw new CDKException(error, exc); } chemFile.addChemSequence(chemSequence); return chemFile; }
private IReaction readReaction(IChemObjectBuilder builder) throws CDKException { IReaction reaction = builder.newInstance(IReaction.class); readLine(); // first line should be $RXN readLine(); // second line readLine(); // third line readLine(); // fourth line int reactantCount = 0; int productCount = 0; boolean foundCOUNTS = false; while (isReady() && !foundCOUNTS) { String command = readCommand(); if (command.startsWith("COUNTS")) { StringTokenizer tokenizer = new StringTokenizer(command); try { tokenizer.nextToken(); reactantCount = Integer.valueOf(tokenizer.nextToken()).intValue(); logger.info("Expecting " + reactantCount + " reactants in file"); productCount = Integer.valueOf(tokenizer.nextToken()).intValue(); logger.info("Expecting " + productCount + " products in file"); } catch (Exception exception) { logger.debug(exception); throw new CDKException("Error while counts line of RXN file", exception); } foundCOUNTS = true; } else { logger.warn("Waiting for COUNTS line, but found: " + command); } } // now read the reactants for (int i = 1; i <= reactantCount; i++) { StringBuffer molFile = new StringBuffer(); String announceMDLFileLine = readCommand(); if (!announceMDLFileLine.equals("BEGIN REACTANT")) { String error = "Excepted start of reactant, but found: " + announceMDLFileLine; logger.error(error); throw new CDKException(error); } String molFileLine = ""; while (!molFileLine.endsWith("END REACTANT")) { molFileLine = readLine(); molFile.append(molFileLine); molFile.append(System.getProperty("line.separator")); } ; try { // read MDL molfile content MDLV3000Reader reader = new MDLV3000Reader(new StringReader(molFile.toString()), super.mode); IAtomContainer reactant = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class)); // add reactant reaction.addReactant(reactant); } catch (Exception exception) { String error = "Error while reading reactant: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } // now read the products for (int i = 1; i <= productCount; i++) { StringBuffer molFile = new StringBuffer(); String announceMDLFileLine = readCommand(); if (!announceMDLFileLine.equals("BEGIN PRODUCT")) { String error = "Excepted start of product, but found: " + announceMDLFileLine; logger.error(error); throw new CDKException(error); } String molFileLine = ""; while (!molFileLine.endsWith("END PRODUCT")) { molFileLine = readLine(); molFile.append(molFileLine); molFile.append(System.getProperty("line.separator")); } ; try { // read MDL molfile content MDLV3000Reader reader = new MDLV3000Reader(new StringReader(molFile.toString())); IAtomContainer product = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class)); // add product reaction.addProduct(product); } catch (Exception exception) { String error = "Error while reading product: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } return reaction; }
/** * Choose any possible quadruple of the set of atoms in ac and establish all of the possible * bonding schemes according to Faulon's equations. */ public static List sample(IAtomContainer ac) { logger.debug("RandomGenerator->mutate() Start"); List structures = new ArrayList(); int nrOfAtoms = ac.getAtomCount(); double a11 = 0, a12 = 0, a22 = 0, a21 = 0; double b11 = 0, lowerborder = 0, upperborder = 0; double b12 = 0; double b21 = 0; double b22 = 0; double[] cmax = new double[4]; double[] cmin = new double[4]; IAtomContainer newAc = null; IAtom ax1 = null, ax2 = null, ay1 = null, ay2 = null; IBond b1 = null, b2 = null, b3 = null, b4 = null; // int[] choices = new int[3]; /* We need at least two non-zero bonds in order to be successful */ int nonZeroBondsCounter = 0; for (int x1 = 0; x1 < nrOfAtoms; x1++) { for (int x2 = x1 + 1; x2 < nrOfAtoms; x2++) { for (int y1 = x2 + 1; y1 < nrOfAtoms; y1++) { for (int y2 = y1 + 1; y2 < nrOfAtoms; y2++) { nonZeroBondsCounter = 0; ax1 = ac.getAtom(x1); ay1 = ac.getAtom(y1); ax2 = ac.getAtom(x2); ay2 = ac.getAtom(y2); /* Get four bonds for these four atoms */ b1 = ac.getBond(ax1, ay1); if (b1 != null) { a11 = BondManipulator.destroyBondOrder(b1.getOrder()); nonZeroBondsCounter++; } else { a11 = 0; } b2 = ac.getBond(ax1, ay2); if (b2 != null) { a12 = BondManipulator.destroyBondOrder(b2.getOrder()); nonZeroBondsCounter++; } else { a12 = 0; } b3 = ac.getBond(ax2, ay1); if (b3 != null) { a21 = BondManipulator.destroyBondOrder(b3.getOrder()); nonZeroBondsCounter++; } else { a21 = 0; } b4 = ac.getBond(ax2, ay2); if (b4 != null) { a22 = BondManipulator.destroyBondOrder(b4.getOrder()); nonZeroBondsCounter++; } else { a22 = 0; } if (nonZeroBondsCounter > 1) { /* Compute the range for b11 (see Faulons formulae for details) */ cmax[0] = 0; cmax[1] = a11 - a22; cmax[2] = a11 + a12 - 3; cmax[3] = a11 + a21 - 3; cmin[0] = 3; cmin[1] = a11 + a12; cmin[2] = a11 + a21; cmin[3] = a11 - a22 + 3; lowerborder = MathTools.max(cmax); upperborder = MathTools.min(cmin); for (b11 = lowerborder; b11 <= upperborder; b11++) { if (b11 != a11) { b12 = a11 + a12 - b11; b21 = a11 + a21 - b11; b22 = a22 - a11 + b11; logger.debug("Trying atom combination : " + x1 + ":" + x2 + ":" + y1 + ":" + y2); try { newAc = (IAtomContainer) ac.clone(); change(newAc, x1, y1, x2, y2, b11, b12, b21, b22); if (ConnectivityChecker.isConnected(newAc)) { structures.add(newAc); } else { logger.debug("not connected"); } } catch (CloneNotSupportedException e) { logger.error("Cloning exception: " + e.getMessage()); logger.debug(e); } } } } } } } } return structures; }
/** * Read an IAtomContainer from a file in MDL sd format * * @return The Molecule that was read from the MDL file. */ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException { logger.debug("Reading new molecule"); IAtomContainer outputContainer = null; int linecount = 0; int atoms = 0; int bonds = 0; int atom1 = 0; int atom2 = 0; int order = 0; IBond.Stereo stereo = (IBond.Stereo) CDKConstants.UNSET; int RGroupCounter = 1; int Rnumber = 0; String[] rGroup = null; double x = 0.0; double y = 0.0; double z = 0.0; double totalX = 0.0; double totalY = 0.0; double totalZ = 0.0; String title = null; String remark = null; // int[][] conMat = new int[0][0]; // String help; IAtom atom; String line = ""; // A map to keep track of R# atoms so that RGP line can be parsed Map<Integer, IPseudoAtom> rAtoms = new HashMap<Integer, IPseudoAtom>(); try { IsotopeFactory isotopeFactory = Isotopes.getInstance(); logger.info("Reading header"); line = input.readLine(); linecount++; if (line == null) { return null; } logger.debug("Line " + linecount + ": " + line); if (line.startsWith("$$$$")) { logger.debug("File is empty, returning empty molecule"); return molecule; } if (line.length() > 0) { title = line; } line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); if (line.length() > 0) { remark = line; } logger.info("Reading rest of file"); line = input.readLine(); linecount++; logger.debug("Line " + linecount + ": " + line); // if the line is empty we hav a problem - either a malformed // molecule entry or just extra new lines at the end of the file if (line.length() == 0) { // read till the next $$$$ or EOF while (true) { line = input.readLine(); linecount++; if (line == null) { return null; } if (line.startsWith("$$$$")) { return molecule; // an empty molecule } } } // check the CT block version if (line.contains("V3000") || line.contains("v3000")) { handleError("This file must be read with the MDLV3000Reader."); } else if (!line.contains("V2000") && !line.contains("v2000")) { handleError("This file must be read with the MDLReader."); } atoms = Integer.parseInt(line.substring(0, 3).trim()); List<IAtom> atomList = new ArrayList<IAtom>(); logger.debug("Atomcount: " + atoms); bonds = Integer.parseInt(line.substring(3, 6).trim()); logger.debug("Bondcount: " + bonds); List<IBond> bondList = new ArrayList<IBond>(); // used for applying the MDL valence model int[] explicitValence = new int[atoms]; // read ATOM block logger.info("Reading atom block"); atomsByLinePosition = new ArrayList<IAtom>(); atomsByLinePosition.add(null); // 0 is not a valid position int atomBlockLineNumber = 0; for (int f = 0; f < atoms; f++) { line = input.readLine(); linecount++; atomBlockLineNumber++; Matcher trailingSpaceMatcher = TRAILING_SPACE.matcher(line); if (trailingSpaceMatcher.find()) { handleError( "Trailing space found", linecount, trailingSpaceMatcher.start(), trailingSpaceMatcher.end()); line = trailingSpaceMatcher.replaceAll(""); } x = Double.parseDouble(line.substring(0, 10).trim()); y = Double.parseDouble(line.substring(10, 20).trim()); z = Double.parseDouble(line.substring(20, 30).trim()); // *all* values should be zero, not just the sum totalX += Math.abs(x); totalY += Math.abs(y); totalZ += Math.abs(z); logger.debug("Coordinates: " + x + "; " + y + "; " + z); String element = line.substring(31, Math.min(line.length(), 34)).trim(); if (line.length() < 34) { handleError( "Element atom type does not follow V2000 format type should of length three" + " and padded with space if required", linecount, 31, 34); } logger.debug("Atom type: ", element); if (isotopeFactory.isElement(element)) { atom = isotopeFactory.configure(molecule.getBuilder().newInstance(IAtom.class, element)); } else if ("A".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("Q".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("*".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("LP".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("L".equals(element)) { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } else if (element.equals("R") || (element.length() > 0 && element.charAt(0) == 'R')) { logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom."); // check if the element is R rGroup = element.split("^R"); atom = null; if (rGroup.length > 1) { try { Rnumber = Integer.valueOf(rGroup[(rGroup.length - 1)]); RGroupCounter = Rnumber; element = "R" + Rnumber; atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } catch (Exception ex) { // This happens for atoms labeled "R#". // The Rnumber may be set later on, using RGP line atom = molecule.getBuilder().newInstance(IPseudoAtom.class, "R"); rAtoms.put(atomBlockLineNumber, (IPseudoAtom) atom); } } else { atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); } } else { handleError( "Invalid element type. Must be an existing " + "element, or one in: A, Q, L, LP, *.", linecount, 32, 35); atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element); atom.setSymbol(element); } // store as 3D for now, convert to 2D (if totalZ == 0.0) later atom.setPoint3d(new Point3d(x, y, z)); // parse further fields if (line.length() >= 36) { String massDiffString = line.substring(34, 36).trim(); logger.debug("Mass difference: ", massDiffString); if (!(atom instanceof IPseudoAtom)) { try { int massDiff = Integer.parseInt(massDiffString); if (massDiff != 0) { IIsotope major = Isotopes.getInstance().getMajorIsotope(element); atom.setMassNumber(major.getMassNumber() + massDiff); } } catch (Exception exception) { handleError("Could not parse mass difference field.", linecount, 35, 37, exception); } } else { logger.error("Cannot set mass difference for a non-element!"); } } else { handleError("Mass difference is missing", linecount, 34, 36); } // set the stereo partiy Integer parity = line.length() > 41 ? Character.digit(line.charAt(41), 10) : 0; atom.setStereoParity(parity); if (line.length() >= 51) { String valenceString = removeNonDigits(line.substring(48, 51)); logger.debug("Valence: ", valenceString); if (!(atom instanceof IPseudoAtom)) { try { int valence = Integer.parseInt(valenceString); if (valence != 0) { // 15 is defined as 0 in mol files if (valence == 15) atom.setValency(0); else atom.setValency(valence); } } catch (Exception exception) { handleError( "Could not parse valence information field", linecount, 49, 52, exception); } } else { logger.error("Cannot set valence information for a non-element!"); } } if (line.length() >= 39) { String chargeCodeString = line.substring(36, 39).trim(); logger.debug("Atom charge code: ", chargeCodeString); int chargeCode = Integer.parseInt(chargeCodeString); if (chargeCode == 0) { // uncharged species } else if (chargeCode == 1) { atom.setFormalCharge(+3); } else if (chargeCode == 2) { atom.setFormalCharge(+2); } else if (chargeCode == 3) { atom.setFormalCharge(+1); } else if (chargeCode == 4) { } else if (chargeCode == 5) { atom.setFormalCharge(-1); } else if (chargeCode == 6) { atom.setFormalCharge(-2); } else if (chargeCode == 7) { atom.setFormalCharge(-3); } } else { handleError("Atom charge is missing", linecount, 36, 39); } try { // read the mmm field as position 61-63 String reactionAtomIDString = line.substring(60, 63).trim(); logger.debug("Parsing mapping id: ", reactionAtomIDString); try { int reactionAtomID = Integer.parseInt(reactionAtomIDString); if (reactionAtomID != 0) { atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, reactionAtomID); } } catch (Exception exception) { logger.error("Mapping number ", reactionAtomIDString, " is not an integer."); logger.debug(exception); } } catch (Exception exception) { // older mol files don't have all these fields... logger.warn("A few fields are missing. Older MDL MOL file?"); } // shk3: This reads shifts from after the molecule. I don't think this is an official // format, but I saw it frequently 80=>78 for alk if (line.length() >= 78) { double shift = Double.parseDouble(line.substring(69, 80).trim()); atom.setProperty("first shift", shift); } if (line.length() >= 87) { double shift = Double.parseDouble(line.substring(79, 87).trim()); atom.setProperty("second shift", shift); } atomList.add(atom); atomsByLinePosition.add(atom); } // convert to 2D, if totalZ == 0 if (totalX == 0.0 && totalY == 0.0 && totalZ == 0.0) { logger.info("All coordinates are 0.0"); if (atomList.size() == 1) { atomList.get(0).setPoint2d(new Point2d(x, y)); } else { for (IAtom atomToUpdate : atomList) { atomToUpdate.setPoint3d(null); } } } else if (totalZ == 0.0 && !forceReadAs3DCoords.isSet()) { logger.info("Total 3D Z is 0.0, interpreting it as a 2D structure"); for (IAtom atomToUpdate : atomList) { Point3d p3d = atomToUpdate.getPoint3d(); if (p3d != null) { atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y)); atomToUpdate.setPoint3d(null); } } } // read BOND block logger.info("Reading bond block"); int queryBondCount = 0; for (int f = 0; f < bonds; f++) { line = input.readLine(); linecount++; atom1 = Integer.parseInt(line.substring(0, 3).trim()); atom2 = Integer.parseInt(line.substring(3, 6).trim()); order = Integer.parseInt(line.substring(6, 9).trim()); if (line.length() >= 12) { int mdlStereo = line.length() > 12 ? Integer.parseInt(line.substring(9, 12).trim()) : Integer.parseInt(line.substring(9).trim()); if (mdlStereo == 1) { // MDL up bond stereo = IBond.Stereo.UP; } else if (mdlStereo == 6) { // MDL down bond stereo = IBond.Stereo.DOWN; } else if (mdlStereo == 0) { if (order == 2) { // double bond stereo defined by coordinates stereo = IBond.Stereo.E_Z_BY_COORDINATES; } else { // bond has no stereochemistry stereo = IBond.Stereo.NONE; } } else if (mdlStereo == 3 && order == 2) { // unknown E/Z stereochemistry stereo = IBond.Stereo.E_OR_Z; } else if (mdlStereo == 4) { // MDL bond undefined stereo = IBond.Stereo.UP_OR_DOWN; } } else { handleError("Missing expected stereo field at line: ", linecount, 10, 12); } if (logger.isDebugEnabled()) { logger.debug("Bond: " + atom1 + " - " + atom2 + "; order " + order); } // interpret CTfile's special bond orders IAtom a1 = atomList.get(atom1 - 1); IAtom a2 = atomList.get(atom2 - 1); IBond newBond = null; if (order >= 1 && order <= 3) { IBond.Order cdkOrder = IBond.Order.SINGLE; if (order == 2) cdkOrder = IBond.Order.DOUBLE; if (order == 3) cdkOrder = IBond.Order.TRIPLE; if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder); } } else if (order == 4) { // aromatic bond if (stereo != null) { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET, stereo); } else { newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET); } // mark both atoms and the bond as aromatic and raise the SINGLE_OR_DOUBLE-flag newBond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true); newBond.setFlag(CDKConstants.ISAROMATIC, true); a1.setFlag(CDKConstants.ISAROMATIC, true); a2.setFlag(CDKConstants.ISAROMATIC, true); } else { queryBondCount++; newBond = new CTFileQueryBond(molecule.getBuilder()); IAtom[] bondAtoms = {a1, a2}; newBond.setAtoms(bondAtoms); newBond.setOrder(null); CTFileQueryBond.Type queryBondType = null; switch (order) { case 5: queryBondType = CTFileQueryBond.Type.SINGLE_OR_DOUBLE; break; case 6: queryBondType = CTFileQueryBond.Type.SINGLE_OR_AROMATIC; break; case 7: queryBondType = CTFileQueryBond.Type.DOUBLE_OR_AROMATIC; break; case 8: queryBondType = CTFileQueryBond.Type.ANY; break; } ((CTFileQueryBond) newBond).setType(queryBondType); newBond.setStereo(stereo); } bondList.add((newBond)); // add the bond order to the explicit valence for each atom if (newBond.getOrder() != null && newBond.getOrder() != IBond.Order.UNSET) { explicitValence[atom1 - 1] += newBond.getOrder().numeric(); explicitValence[atom2 - 1] += newBond.getOrder().numeric(); } else { explicitValence[atom1 - 1] = Integer.MIN_VALUE; explicitValence[atom2 - 1] = Integer.MIN_VALUE; } } if (queryBondCount == 0) outputContainer = molecule; else { outputContainer = new QueryAtomContainer(molecule.getBuilder()); } outputContainer.setProperty(CDKConstants.TITLE, title); outputContainer.setProperty(CDKConstants.REMARK, remark); for (IAtom at : atomList) { outputContainer.addAtom(at); } for (IBond bnd : bondList) { outputContainer.addBond(bnd); } // read PROPERTY block logger.info("Reading property block"); while (true) { line = input.readLine(); linecount++; if (line == null) { handleError("The expected property block is missing!", linecount, 0, 0); } if (line.startsWith("M END")) break; boolean lineRead = false; if (line.startsWith("M CHG")) { // FIXME: if this is encountered for the first time, all // atom charges should be set to zero first! int infoCount = Integer.parseInt(line.substring(6, 9).trim()); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { String token = st.nextToken(); int atomNumber = Integer.parseInt(token.trim()); token = st.nextToken(); int charge = Integer.parseInt(token.trim()); outputContainer.getAtom(atomNumber - 1).setFormalCharge(charge); } } else if (line.matches("A\\s{1,4}\\d+")) { // Reads the pseudo atom property from the mol file // The atom number of the to replaced atom int aliasAtomNumber = Integer.parseInt(line.replaceFirst("A\\s{1,4}", "")) - RGroupCounter; line = input.readLine(); linecount++; String[] aliasArray = line.split("\\\\"); // name of the alias atom like R1 or R2 etc. String alias = ""; for (int i = 0; i < aliasArray.length; i++) { alias += aliasArray[i]; } IAtom aliasAtom = outputContainer.getAtom(aliasAtomNumber); // skip if already a pseudoatom if (aliasAtom instanceof IPseudoAtom) { ((IPseudoAtom) aliasAtom).setLabel(alias); continue; } IAtom newPseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, alias); if (aliasAtom.getPoint2d() != null) { newPseudoAtom.setPoint2d(aliasAtom.getPoint2d()); } if (aliasAtom.getPoint3d() != null) { newPseudoAtom.setPoint3d(aliasAtom.getPoint3d()); } outputContainer.addAtom(newPseudoAtom); List<IBond> bondsOfAliasAtom = outputContainer.getConnectedBondsList(aliasAtom); for (int i = 0; i < bondsOfAliasAtom.size(); i++) { IBond bondOfAliasAtom = bondsOfAliasAtom.get(i); IAtom connectedToAliasAtom = bondOfAliasAtom.getConnectedAtom(aliasAtom); IBond newBond = bondOfAliasAtom.getBuilder().newInstance(IBond.class); newBond.setAtoms(new IAtom[] {connectedToAliasAtom, newPseudoAtom}); newBond.setOrder(bondOfAliasAtom.getOrder()); outputContainer.addBond(newBond); outputContainer.removeBond(aliasAtom, connectedToAliasAtom); } outputContainer.removeAtom(aliasAtom); RGroupCounter++; } else if (line.startsWith("M ISO")) { try { String countString = line.substring(6, 10).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(10)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int absMass = Integer.parseInt(st.nextToken().trim()); if (absMass != 0) { IAtom isotope = outputContainer.getAtom(atomNumber - 1); isotope.setMassNumber(absMass); } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in isotope information.", linecount, 7, 11, exception); } } else if (line.startsWith("M RAD")) { try { String countString = line.substring(6, 9).trim(); int infoCount = Integer.parseInt(countString); StringTokenizer st = new StringTokenizer(line.substring(9)); for (int i = 1; i <= infoCount; i++) { int atomNumber = Integer.parseInt(st.nextToken().trim()); int spinMultiplicity = Integer.parseInt(st.nextToken().trim()); MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE; if (spinMultiplicity > 0) { IAtom radical = outputContainer.getAtom(atomNumber - 1); switch (spinMultiplicity) { case 1: spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET; break; case 2: spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET; break; case 3: spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET; break; default: logger.debug("Invalid spin multiplicity found: " + spinMultiplicity); break; } for (int j = 0; j < spin.getSingleElectrons(); j++) { outputContainer.addSingleElectron( molecule.getBuilder().newInstance(ISingleElectron.class, radical)); } } } } catch (NumberFormatException exception) { String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError( "NumberFormatException in radical information", linecount, 7, 10, exception); } } else if (line.startsWith("G ")) { try { String atomNumberString = line.substring(3, 6).trim(); int atomNumber = Integer.parseInt(atomNumberString); // String whatIsThisString = line.substring(6,9).trim(); String atomName = input.readLine(); // convert Atom into a PseudoAtom IAtom prevAtom = outputContainer.getAtom(atomNumber - 1); IPseudoAtom pseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, atomName); if (prevAtom.getPoint2d() != null) { pseudoAtom.setPoint2d(prevAtom.getPoint2d()); } if (prevAtom.getPoint3d() != null) { pseudoAtom.setPoint3d(prevAtom.getPoint3d()); } AtomContainerManipulator.replaceAtomByAtom(molecule, prevAtom, pseudoAtom); } catch (NumberFormatException exception) { String error = "Error (" + exception.toString() + ") while parsing line " + linecount + ": " + line + " in property block."; logger.error(error); handleError("NumberFormatException in group information", linecount, 4, 7, exception); } } else if (line.startsWith("M RGP")) { StringTokenizer st = new StringTokenizer(line); // Ignore first 3 tokens (overhead). st.nextToken(); st.nextToken(); st.nextToken(); // Process the R group numbers as defined in RGP line. while (st.hasMoreTokens()) { Integer position = new Integer(st.nextToken()); Rnumber = new Integer(st.nextToken()); IPseudoAtom pseudoAtom = rAtoms.get(position); if (pseudoAtom != null) { pseudoAtom.setLabel("R" + Rnumber); } } } if (line.startsWith("V ")) { Integer atomNumber = new Integer(line.substring(3, 6).trim()); IAtom atomWithComment = outputContainer.getAtom(atomNumber - 1); atomWithComment.setProperty(CDKConstants.COMMENT, line.substring(7)); } if (!lineRead) { logger.warn("Skipping line in property block: ", line); } } if (interpretHydrogenIsotopes.isSet()) { fixHydrogenIsotopes(molecule, isotopeFactory); } // note: apply the valence model last so that all fixes (i.e. hydrogen // isotopes) are in place for (int i = 0; i < atoms; i++) { applyMDLValenceModel(outputContainer.getAtom(i), explicitValence[i]); } } catch (CDKException exception) { String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); throw exception; } catch (Exception exception) { exception.printStackTrace(); String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage(); logger.error(error); logger.debug(exception); handleError("Error while parsing line: " + line, linecount, 0, 0, exception); } return outputContainer; }