Пример #1
0
  /** A unit test for JUnit */
  @Test
  public void testAlanin() throws Exception {
    HydrogenPlacer hydrogenPlacer = new HydrogenPlacer();
    IAtomContainer mol1 = new AtomContainer();
    SmilesGenerator sg = new SmilesGenerator();
    mol1.addAtom(new Atom("N", new Point2d(1, 0)));
    // 1
    mol1.addAtom(new Atom("C", new Point2d(1, 2)));
    // 2
    mol1.addAtom(new Atom("F", new Point2d(1, 2)));
    // 3
    mol1.addAtom(new Atom("C", new Point2d(0, 0)));
    // 4
    mol1.addAtom(new Atom("C", new Point2d(1, 4)));
    // 5
    mol1.addAtom(new Atom("O", new Point2d(1, 5)));
    // 6
    mol1.addAtom(new Atom("O", new Point2d(1, 6)));
    // 7
    mol1.addBond(0, 1, IBond.Order.SINGLE);
    // 1
    mol1.addBond(1, 2, IBond.Order.SINGLE, IBond.Stereo.UP);
    // 2
    mol1.addBond(1, 3, IBond.Order.SINGLE, IBond.Stereo.DOWN);
    // 3
    mol1.addBond(1, 4, IBond.Order.SINGLE);
    // 4
    mol1.addBond(4, 5, IBond.Order.SINGLE);
    // 5
    mol1.addBond(4, 6, IBond.Order.DOUBLE);
    // 6
    addExplicitHydrogens(mol1);
    hydrogenPlacer.placeHydrogens2D(mol1, 1.0);
    IsotopeFactory ifac = IsotopeFactory.getInstance(mol1.getBuilder());
    ifac.configureAtoms(mol1);

    String smiles1 = null;
    if (standAlone) {
      display(mol1);
    }
    smiles1 = sg.createSMILES(mol1, true, new boolean[mol1.getBondCount()]);
    if (standAlone) {
      System.err.println("SMILES 1: " + smiles1);
    }
    Assert.assertNotNull(smiles1);
    Assert.assertEquals("[H]OC(=O)[C@](F)(N([H])[H])C([H])([H])[H]", smiles1);
    mol1.getBond(1).setStereo(IBond.Stereo.DOWN);
    mol1.getBond(2).setStereo(IBond.Stereo.UP);
    smiles1 = sg.createSMILES(mol1, true, new boolean[mol1.getBondCount()]);
    if (standAlone) {
      System.err.println("SMILES 1: " + smiles1);
    }
    Assert.assertNotNull(smiles1);
    Assert.assertEquals("[H]OC(=O)[C@](F)(C([H])([H])[H])N([H])[H]", smiles1);
  }
Пример #2
0
 /**
  * Get the summed natural mass of all elements from an MolecularFormula.
  *
  * @param formula The IMolecularFormula to calculate
  * @return The summed exact mass of all atoms in this MolecularFormula
  */
 public static double getNaturalExactMass(IMolecularFormula formula) {
   double mass = 0.0;
   IsotopeFactory factory;
   try {
     factory = Isotopes.getInstance();
   } catch (IOException e) {
     throw new RuntimeException("Could not instantiate the IsotopeFactory.");
   }
   for (IIsotope isotope : formula.isotopes()) {
     IElement isotopesElement = formula.getBuilder().newInstance(IElement.class, isotope);
     mass += factory.getNaturalMass(isotopesElement) * formula.getIsotopeCount(isotope);
   }
   return mass;
 }
Пример #3
0
 /**
  * Get the summed major isotopic mass of all elements from an MolecularFormula.
  *
  * @param formula The IMolecularFormula to calculate
  * @return The summed exact major isotope masses of all atoms in this MolecularFormula
  */
 public static double getMajorIsotopeMass(IMolecularFormula formula) {
   double mass = 0.0;
   IsotopeFactory factory;
   try {
     factory = Isotopes.getInstance();
   } catch (IOException e) {
     throw new RuntimeException("Could not instantiate the IsotopeFactory.");
   }
   for (IIsotope isotope : formula.isotopes()) {
     IIsotope major = factory.getMajorIsotope(isotope.getSymbol());
     if (major != null) {
       mass += major.getExactMass() * formula.getIsotopeCount(isotope);
     }
   }
   return mass;
 }
Пример #4
0
 private void fixHydrogenIsotopes(IAtomContainer molecule, IsotopeFactory isotopeFactory) {
   for (IAtom atom : AtomContainerManipulator.getAtomArray(molecule)) {
     if (atom instanceof IPseudoAtom) {
       IPseudoAtom pseudo = (IPseudoAtom) atom;
       if ("D".equals(pseudo.getLabel())) {
         IAtom newAtom = molecule.getBuilder().newInstance(IAtom.class, atom);
         newAtom.setSymbol("H");
         newAtom.setAtomicNumber(1);
         isotopeFactory.configure(newAtom, isotopeFactory.getIsotope("H", 2));
         AtomContainerManipulator.replaceAtomByAtom(molecule, atom, newAtom);
       } else if ("T".equals(pseudo.getLabel())) {
         IAtom newAtom = molecule.getBuilder().newInstance(IAtom.class, atom);
         newAtom.setSymbol("H");
         newAtom.setAtomicNumber(1);
         isotopeFactory.configure(newAtom, isotopeFactory.getIsotope("H", 3));
         AtomContainerManipulator.replaceAtomByAtom(molecule, atom, newAtom);
       }
     }
   }
 }
Пример #5
0
  /**
   * Loads one or more files into IAtomContainer objects.
   *
   * <p>This method does not need knowledge of the format since it is autodetected. Note that if
   * aromaticity detection or atom typing is specified and fails for a specific molecule, that
   * molecule will be set to <i>null</i>
   *
   * @param filenames An array of String's containing the filenames of the structures we want to
   *     load
   * @param doAromaticity If true, then aromaticity perception is performed
   * @param doTyping If true, atom typing and configuration is performed. This will use the internal
   *     CDK atom typing scheme
   * @return An array of AtoContainer's
   * @throws CDKException if there is an error when reading a file
   */
  public static IAtomContainer[] loadMolecules(
      String[] filenames, boolean doAromaticity, boolean doTyping, boolean doIsotopes)
      throws CDKException, IOException {
    Vector<IAtomContainer> v = new Vector<IAtomContainer>();
    IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
    try {
      int i;
      int j;

      for (i = 0; i < filenames.length; i++) {
        File input = new File(filenames[i]);
        ReaderFactory readerFactory = new ReaderFactory();
        ISimpleChemObjectReader reader = readerFactory.createReader(new FileReader(input));

        if (reader == null) { // see if it's a SMI file
          if (filenames[i].endsWith(".smi")) {
            reader = new SMILESReader(new FileReader(input));
          }
        }
        IChemFile content = (IChemFile) reader.read(builder.newInstance(IChemFile.class));
        if (content == null) continue;

        List<IAtomContainer> c = ChemFileManipulator.getAllAtomContainers(content);

        // we should do this loop in case we have files
        // that contain multiple molecules
        v.addAll(c);
      }

    } catch (Exception e) {
      e.printStackTrace();
      throw new CDKException(e.toString());
    }

    // convert the vector to a simple array
    IAtomContainer[] retValues = new IAtomContainer[v.size()];
    for (int i = 0; i < v.size(); i++) {
      retValues[i] = v.get(i);
    }

    // before returning, lets make see if we
    // need to perceive aromaticity and atom typing
    if (doAromaticity) {
      for (int i = 0; i < retValues.length; i++) {
        try {
          CDKHueckelAromaticityDetector.detectAromaticity(retValues[i]);
        } catch (CDKException e) {
          retValues[i] = null;
        }
      }
    }

    if (doTyping) {
      for (int i = 0; i < retValues.length; i++) {
        try {
          AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(retValues[i]);
        } catch (CDKException e) {
          retValues[i] = null;
        }
      }
    }

    if (doIsotopes) {
      IsotopeFactory ifac = IsotopeFactory.getInstance(DefaultChemObjectBuilder.getInstance());
      for (IAtomContainer retValue : retValues) {
        ifac.configureAtoms(retValue);
      }
    }

    return retValues;
  }
Пример #6
0
  /**
   * Calculates the 3 MI's, 3 ration and the R_gyr value.
   *
   * <p>The molecule should have hydrogens
   *
   * @param container Parameter is the atom container.
   * @return An ArrayList containing 7 elements in the order described above
   */
  @TestMethod("testCalculate_IAtomContainer")
  public DescriptorValue calculate(IAtomContainer container) {
    if (!GeometryTools.has3DCoordinates(container))
      return getDummyDescriptorValue(new CDKException("Molecule must have 3D coordinates"));

    IAtomContainer clone;
    IsotopeFactory factory;
    try {
      clone = (IAtomContainer) container.clone();
      factory = IsotopeFactory.getInstance(container.getBuilder());
      factory.configureAtoms(clone);
    } catch (Exception e) {
      logger.debug(e);
      return getDummyDescriptorValue(e);
    }

    DoubleArrayResult retval = new DoubleArrayResult(7);

    double ccf = 1.000138;
    double eps = 1e-5;

    double[][] imat = new double[3][3];
    Point3d centerOfMass = GeometryTools.get3DCentreOfMass(clone);

    double xdif;
    double ydif;
    double zdif;
    double xsq;
    double ysq;
    double zsq;
    for (int i = 0; i < clone.getAtomCount(); i++) {
      IAtom currentAtom = clone.getAtom(i);

      double mass = factory.getMajorIsotope(currentAtom.getSymbol()).getExactMass();

      xdif = currentAtom.getPoint3d().x - centerOfMass.x;
      ydif = currentAtom.getPoint3d().y - centerOfMass.y;
      zdif = currentAtom.getPoint3d().z - centerOfMass.z;
      xsq = xdif * xdif;
      ysq = ydif * ydif;
      zsq = zdif * zdif;

      imat[0][0] += mass * (ysq + zsq);
      imat[1][1] += mass * (xsq + zsq);
      imat[2][2] += mass * (xsq + ysq);

      imat[1][0] += -1 * mass * ydif * xdif;
      imat[0][1] = imat[1][0];

      imat[2][0] += -1 * mass * xdif * zdif;
      imat[0][2] = imat[2][0];

      imat[2][1] += -1 * mass * ydif * zdif;
      imat[1][2] = imat[2][1];
    }

    // diagonalize the MI tensor
    Matrix tmp = new Matrix(imat);
    EigenvalueDecomposition eigenDecomp = tmp.eig();
    double[] eval = eigenDecomp.getRealEigenvalues();

    retval.add(eval[2]);
    retval.add(eval[1]);
    retval.add(eval[0]);

    double etmp = eval[0];
    eval[0] = eval[2];
    eval[2] = etmp;

    if (Math.abs(eval[1]) > 1e-3) retval.add(eval[0] / eval[1]);
    else retval.add(1000);

    if (Math.abs(eval[2]) > 1e-3) {
      retval.add(eval[0] / eval[2]);
      retval.add(eval[1] / eval[2]);
    } else {
      retval.add(1000);
      retval.add(1000);
    }

    // finally get the radius of gyration
    double pri;
    IMolecularFormula formula = MolecularFormulaManipulator.getMolecularFormula(clone);
    if (Math.abs(eval[2]) > eps) pri = Math.pow(eval[0] * eval[1] * eval[2], 1.0 / 3.0);
    else pri = Math.sqrt(eval[0] * ccf / MolecularFormulaManipulator.getTotalExactMass(formula));
    retval.add(
        Math.sqrt(
            Math.PI * 2 * pri * ccf / MolecularFormulaManipulator.getTotalExactMass(formula)));

    return new DescriptorValue(
        getSpecification(), getParameterNames(), getParameters(), retval, getDescriptorNames());
  }
Пример #7
0
  /** A unit test for JUnit */
  @Test
  public void testDoubleBondConfiguration() throws Exception {
    HydrogenPlacer hydrogenPlacer = new HydrogenPlacer();
    IAtomContainer mol1 = new AtomContainer();
    SmilesGenerator sg = new SmilesGenerator();
    mol1.addAtom(new Atom("S", new Point2d(0, 0)));
    // 1
    mol1.addAtom(new Atom("C", new Point2d(1, 1)));
    // 2
    mol1.addAtom(new Atom("F", new Point2d(2, 0)));
    // 3
    mol1.addAtom(new Atom("C", new Point2d(1, 2)));
    // 4
    mol1.addAtom(new Atom("F", new Point2d(2, 3)));
    // 5
    mol1.addAtom(new Atom("S", new Point2d(0, 3)));
    // 1

    mol1.addBond(0, 1, IBond.Order.SINGLE);
    // 1
    mol1.addBond(1, 2, IBond.Order.SINGLE);
    // 2
    mol1.addBond(1, 3, IBond.Order.DOUBLE);
    // 3
    mol1.addBond(3, 4, IBond.Order.SINGLE);
    // 4
    mol1.addBond(3, 5, IBond.Order.SINGLE);
    // 4
    try {
      IsotopeFactory ifac = IsotopeFactory.getInstance(mol1.getBuilder());
      ifac.configureAtoms(mol1);
    } catch (IOException ex) {
    }
    String smiles1 = null;
    if (standAlone) {
      display(mol1);
    }
    boolean[] bool = new boolean[mol1.getBondCount()];
    bool[2] = true;
    try {
      smiles1 = sg.createSMILES(mol1, true, bool);
    } catch (Exception exc) {
      System.out.println(exc);
      if (!standAlone) {
        Assert.fail();
      }
    }
    if (standAlone) {
      System.err.println("SMILES 1: " + smiles1);
    }
    Assert.assertNotNull(smiles1);
    Assert.assertEquals("F/C(=C/(F)S)S", smiles1);
    mol1.getAtom(4).setPoint2d(new Point2d(0, 3));
    mol1.getAtom(5).setPoint2d(new Point2d(2, 3));
    try {
      smiles1 = sg.createSMILES(mol1, true, bool);
    } catch (Exception exc) {
      System.out.println(exc);
      if (!standAlone) {
        Assert.fail();
      }
    }
    if (standAlone) {
      System.err.println("SMILES 1: " + smiles1);
    }
    Assert.assertNotNull(smiles1);
    Assert.assertEquals("F/C(=C\\(F)S)S", smiles1);
    try {
      addExplicitHydrogens(mol1);
      hydrogenPlacer.placeHydrogens2D(mol1, 1.0);
    } catch (IOException ex) {
    } catch (ClassNotFoundException ex) {
    }
    bool = new boolean[mol1.getBondCount()];
    bool[2] = true;
    try {
      smiles1 = sg.createSMILES(mol1, true, bool);
    } catch (Exception exc) {
      System.out.println(exc);
      if (!standAlone) {
        Assert.fail();
      }
    }
    Assert.assertEquals("[H]S/C(F)=C/(F)S[H]", smiles1);
    mol1.getAtom(5).setPoint2d(new Point2d(0, 3));
    mol1.getAtom(4).setPoint2d(new Point2d(2, 3));
    try {
      smiles1 = sg.createSMILES(mol1, true, bool);
    } catch (Exception exc) {
      System.out.println(exc);
      if (!standAlone) {
        Assert.fail();
      }
    }
    Assert.assertEquals("[H]S/C(F)=C\\(F)S[H]", smiles1);
  }
Пример #8
0
 /** A unit test for JUnit */
 @Test
 public void testCisTransDecalin() throws Exception {
   HydrogenPlacer hydrogenPlacer = new HydrogenPlacer();
   IAtomContainer mol1 = new AtomContainer();
   SmilesGenerator sg = new SmilesGenerator();
   mol1.addAtom(new Atom("H", new Point2d(1, 0)));
   // 1
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 2
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 3
   mol1.addAtom(new Atom("C", new Point2d(0, 0)));
   // 4
   mol1.addAtom(new Atom("C", new Point2d(1, 4)));
   // 5
   mol1.addAtom(new Atom("C", new Point2d(1, 5)));
   // 6
   mol1.addAtom(new Atom("C", new Point2d(1, 6)));
   // 7
   mol1.addAtom(new Atom("H", new Point2d(1, 0)));
   // 1
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 2
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 3
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 2
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 3
   mol1.addBond(0, 1, IBond.Order.SINGLE, IBond.Stereo.DOWN);
   // 1
   mol1.addBond(1, 2, IBond.Order.SINGLE);
   // 2
   mol1.addBond(2, 3, IBond.Order.SINGLE);
   // 3
   mol1.addBond(3, 4, IBond.Order.SINGLE);
   // 4
   mol1.addBond(4, 5, IBond.Order.SINGLE);
   // 5
   mol1.addBond(5, 6, IBond.Order.SINGLE);
   // 6
   mol1.addBond(6, 7, IBond.Order.SINGLE, IBond.Stereo.DOWN);
   // 3
   mol1.addBond(6, 8, IBond.Order.SINGLE);
   // 4
   mol1.addBond(8, 9, IBond.Order.SINGLE);
   // 5
   mol1.addBond(9, 10, IBond.Order.SINGLE);
   // 6
   mol1.addBond(10, 11, IBond.Order.SINGLE);
   // 6
   mol1.addBond(11, 1, IBond.Order.SINGLE);
   // 6
   mol1.addBond(1, 6, IBond.Order.SINGLE);
   // 6
   try {
     addExplicitHydrogens(mol1);
     hydrogenPlacer.placeHydrogens2D(mol1, 1.0);
     IsotopeFactory ifac = IsotopeFactory.getInstance(mol1.getBuilder());
     ifac.configureAtoms(mol1);
   } catch (IOException ex) {
   } catch (ClassNotFoundException ex) {
   }
   String smiles1 = null;
   if (standAlone) {
     display(mol1);
   }
   try {
     smiles1 = sg.createSMILES(mol1, true, new boolean[mol1.getBondCount()]);
   } catch (Exception exc) {
     System.out.println(exc);
     if (!standAlone) {
       Assert.fail();
     }
   }
   if (standAlone) {
     System.err.println("SMILES 1: " + smiles1);
   }
   Assert.assertNotNull(smiles1);
   Assert.assertEquals(
       "[H]C1([H])(C([H])([H])C([H])([H])C\\2([H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])C\\2([H])(C1([H])([H]))))",
       smiles1);
   mol1.getBond(6).setStereo(IBond.Stereo.UP);
   String smiles3 = null;
   try {
     smiles3 = sg.createSMILES(mol1, true, new boolean[mol1.getBondCount()]);
   } catch (Exception exc) {
     System.out.println(exc);
     if (!standAlone) {
       Assert.fail();
     }
   }
   Assert.assertNotSame(smiles3, smiles1);
 }
Пример #9
0
 /** A unit test for JUnit */
 @Test
 public void testCisResorcinol() throws Exception {
   HydrogenPlacer hydrogenPlacer = new HydrogenPlacer();
   IAtomContainer mol1 = new AtomContainer();
   SmilesGenerator sg = new SmilesGenerator();
   mol1.addAtom(new Atom("O", new Point2d(3, 1)));
   // 1
   mol1.addAtom(new Atom("H", new Point2d(2, 0)));
   // 2
   mol1.addAtom(new Atom("C", new Point2d(2, 1)));
   // 3
   mol1.addAtom(new Atom("C", new Point2d(1, 1)));
   // 4
   mol1.addAtom(new Atom("C", new Point2d(1, 4)));
   // 5
   mol1.addAtom(new Atom("C", new Point2d(1, 5)));
   // 6
   mol1.addAtom(new Atom("C", new Point2d(1, 2)));
   // 7
   mol1.addAtom(new Atom("C", new Point2d(2, 2)));
   // 1
   mol1.addAtom(new Atom("O", new Point2d(3, 2)));
   // 2
   mol1.addAtom(new Atom("H", new Point2d(2, 3)));
   // 3
   mol1.addBond(0, 2, IBond.Order.SINGLE, IBond.Stereo.DOWN);
   // 1
   mol1.addBond(1, 2, IBond.Order.SINGLE, IBond.Stereo.UP);
   // 2
   mol1.addBond(2, 3, IBond.Order.SINGLE);
   // 3
   mol1.addBond(3, 4, IBond.Order.SINGLE);
   // 4
   mol1.addBond(4, 5, IBond.Order.SINGLE);
   // 5
   mol1.addBond(5, 6, IBond.Order.SINGLE);
   // 6
   mol1.addBond(6, 7, IBond.Order.SINGLE);
   // 3
   mol1.addBond(7, 8, IBond.Order.SINGLE, IBond.Stereo.UP);
   // 4
   mol1.addBond(7, 9, IBond.Order.SINGLE, IBond.Stereo.DOWN);
   // 5
   mol1.addBond(7, 2, IBond.Order.SINGLE);
   // 6
   try {
     addExplicitHydrogens(mol1);
     hydrogenPlacer.placeHydrogens2D(mol1, 1.0);
     IsotopeFactory ifac = IsotopeFactory.getInstance(mol1.getBuilder());
     ifac.configureAtoms(mol1);
   } catch (IOException ex) {
   } catch (ClassNotFoundException ex) {
   }
   String smiles1 = null;
   if (standAlone) {
     display(mol1);
   }
   try {
     smiles1 = sg.createSMILES(mol1, true, new boolean[mol1.getBondCount()]);
   } catch (Exception exc) {
     System.out.println(exc);
     if (!standAlone) {
       Assert.fail();
     }
   }
   if (standAlone) {
     System.err.println("SMILES 1: " + smiles1);
   }
   Assert.assertNotNull(smiles1);
   Assert.assertEquals(
       "[H]O[C@]1(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[C@]1(O[H])([H]))([H])", smiles1);
   mol1 = AtomContainerManipulator.removeHydrogens(mol1);
   try {
     smiles1 = sg.createSMILES(mol1);
   } catch (Exception exc) {
     System.out.println(exc);
     if (!standAlone) {
       Assert.fail();
     }
   }
   if (standAlone) {
     System.err.println("SMILES 1: " + smiles1);
   }
   Assert.assertNotNull(smiles1);
   Assert.assertEquals("OC1CCCCC1(O)", smiles1);
 }
Пример #10
0
  /**
   * Read an IAtomContainer from a file in MDL sd format
   *
   * @return The Molecule that was read from the MDL file.
   */
  private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException {
    logger.debug("Reading new molecule");
    IAtomContainer outputContainer = null;
    int linecount = 0;
    int atoms = 0;
    int bonds = 0;
    int atom1 = 0;
    int atom2 = 0;
    int order = 0;
    IBond.Stereo stereo = (IBond.Stereo) CDKConstants.UNSET;
    int RGroupCounter = 1;
    int Rnumber = 0;
    String[] rGroup = null;
    double x = 0.0;
    double y = 0.0;
    double z = 0.0;
    double totalX = 0.0;
    double totalY = 0.0;
    double totalZ = 0.0;
    String title = null;
    String remark = null;
    // int[][] conMat = new int[0][0];
    // String help;
    IAtom atom;
    String line = "";
    // A map to keep track of R# atoms so that RGP line can be parsed
    Map<Integer, IPseudoAtom> rAtoms = new HashMap<Integer, IPseudoAtom>();

    try {
      IsotopeFactory isotopeFactory = Isotopes.getInstance();

      logger.info("Reading header");
      line = input.readLine();
      linecount++;
      if (line == null) {
        return null;
      }
      logger.debug("Line " + linecount + ": " + line);

      if (line.startsWith("$$$$")) {
        logger.debug("File is empty, returning empty molecule");
        return molecule;
      }
      if (line.length() > 0) {
        title = line;
      }
      line = input.readLine();
      linecount++;
      logger.debug("Line " + linecount + ": " + line);
      line = input.readLine();
      linecount++;
      logger.debug("Line " + linecount + ": " + line);
      if (line.length() > 0) {
        remark = line;
      }

      logger.info("Reading rest of file");
      line = input.readLine();
      linecount++;
      logger.debug("Line " + linecount + ": " + line);

      // if the line is empty we hav a problem - either a malformed
      // molecule entry or just extra new lines at the end of the file
      if (line.length() == 0) {
        // read till the next $$$$ or EOF
        while (true) {
          line = input.readLine();
          linecount++;
          if (line == null) {
            return null;
          }
          if (line.startsWith("$$$$")) {
            return molecule; // an empty molecule
          }
        }
      }

      // check the CT block version
      if (line.contains("V3000") || line.contains("v3000")) {
        handleError("This file must be read with the MDLV3000Reader.");
      } else if (!line.contains("V2000") && !line.contains("v2000")) {
        handleError("This file must be read with the MDLReader.");
      }

      atoms = Integer.parseInt(line.substring(0, 3).trim());
      List<IAtom> atomList = new ArrayList<IAtom>();

      logger.debug("Atomcount: " + atoms);
      bonds = Integer.parseInt(line.substring(3, 6).trim());
      logger.debug("Bondcount: " + bonds);
      List<IBond> bondList = new ArrayList<IBond>();

      // used for applying the MDL valence model
      int[] explicitValence = new int[atoms];

      // read ATOM block
      logger.info("Reading atom block");
      atomsByLinePosition = new ArrayList<IAtom>();
      atomsByLinePosition.add(null); // 0 is not a valid position
      int atomBlockLineNumber = 0;
      for (int f = 0; f < atoms; f++) {
        line = input.readLine();
        linecount++;
        atomBlockLineNumber++;
        Matcher trailingSpaceMatcher = TRAILING_SPACE.matcher(line);
        if (trailingSpaceMatcher.find()) {
          handleError(
              "Trailing space found",
              linecount,
              trailingSpaceMatcher.start(),
              trailingSpaceMatcher.end());
          line = trailingSpaceMatcher.replaceAll("");
        }
        x = Double.parseDouble(line.substring(0, 10).trim());
        y = Double.parseDouble(line.substring(10, 20).trim());
        z = Double.parseDouble(line.substring(20, 30).trim());
        // *all* values should be zero, not just the sum
        totalX += Math.abs(x);
        totalY += Math.abs(y);
        totalZ += Math.abs(z);
        logger.debug("Coordinates: " + x + "; " + y + "; " + z);
        String element = line.substring(31, Math.min(line.length(), 34)).trim();
        if (line.length() < 34) {
          handleError(
              "Element atom type does not follow V2000 format type should of length three"
                  + " and padded with space if required",
              linecount,
              31,
              34);
        }

        logger.debug("Atom type: ", element);
        if (isotopeFactory.isElement(element)) {
          atom = isotopeFactory.configure(molecule.getBuilder().newInstance(IAtom.class, element));
        } else if ("A".equals(element)) {
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
        } else if ("Q".equals(element)) {
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
        } else if ("*".equals(element)) {
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
        } else if ("LP".equals(element)) {
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
        } else if ("L".equals(element)) {
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
        } else if (element.equals("R") || (element.length() > 0 && element.charAt(0) == 'R')) {
          logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom.");
          // check if the element is R
          rGroup = element.split("^R");
          atom = null;
          if (rGroup.length > 1) {
            try {
              Rnumber = Integer.valueOf(rGroup[(rGroup.length - 1)]);
              RGroupCounter = Rnumber;
              element = "R" + Rnumber;
              atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);

            } catch (Exception ex) {
              // This happens for atoms labeled "R#".
              // The Rnumber may be set later on, using RGP line
              atom = molecule.getBuilder().newInstance(IPseudoAtom.class, "R");
              rAtoms.put(atomBlockLineNumber, (IPseudoAtom) atom);
            }
          } else {
            atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
          }
        } else {
          handleError(
              "Invalid element type. Must be an existing " + "element, or one in: A, Q, L, LP, *.",
              linecount,
              32,
              35);
          atom = molecule.getBuilder().newInstance(IPseudoAtom.class, element);
          atom.setSymbol(element);
        }

        // store as 3D for now, convert to 2D (if totalZ == 0.0) later
        atom.setPoint3d(new Point3d(x, y, z));

        // parse further fields
        if (line.length() >= 36) {
          String massDiffString = line.substring(34, 36).trim();
          logger.debug("Mass difference: ", massDiffString);
          if (!(atom instanceof IPseudoAtom)) {
            try {
              int massDiff = Integer.parseInt(massDiffString);
              if (massDiff != 0) {
                IIsotope major = Isotopes.getInstance().getMajorIsotope(element);
                atom.setMassNumber(major.getMassNumber() + massDiff);
              }
            } catch (Exception exception) {
              handleError("Could not parse mass difference field.", linecount, 35, 37, exception);
            }
          } else {
            logger.error("Cannot set mass difference for a non-element!");
          }
        } else {
          handleError("Mass difference is missing", linecount, 34, 36);
        }

        // set the stereo partiy
        Integer parity = line.length() > 41 ? Character.digit(line.charAt(41), 10) : 0;
        atom.setStereoParity(parity);

        if (line.length() >= 51) {
          String valenceString = removeNonDigits(line.substring(48, 51));
          logger.debug("Valence: ", valenceString);
          if (!(atom instanceof IPseudoAtom)) {
            try {
              int valence = Integer.parseInt(valenceString);
              if (valence != 0) {
                // 15 is defined as 0 in mol files
                if (valence == 15) atom.setValency(0);
                else atom.setValency(valence);
              }
            } catch (Exception exception) {
              handleError(
                  "Could not parse valence information field", linecount, 49, 52, exception);
            }
          } else {
            logger.error("Cannot set valence information for a non-element!");
          }
        }

        if (line.length() >= 39) {
          String chargeCodeString = line.substring(36, 39).trim();
          logger.debug("Atom charge code: ", chargeCodeString);
          int chargeCode = Integer.parseInt(chargeCodeString);
          if (chargeCode == 0) {
            // uncharged species
          } else if (chargeCode == 1) {
            atom.setFormalCharge(+3);
          } else if (chargeCode == 2) {
            atom.setFormalCharge(+2);
          } else if (chargeCode == 3) {
            atom.setFormalCharge(+1);
          } else if (chargeCode == 4) {
          } else if (chargeCode == 5) {
            atom.setFormalCharge(-1);
          } else if (chargeCode == 6) {
            atom.setFormalCharge(-2);
          } else if (chargeCode == 7) {
            atom.setFormalCharge(-3);
          }
        } else {
          handleError("Atom charge is missing", linecount, 36, 39);
        }

        try {
          // read the mmm field as position 61-63
          String reactionAtomIDString = line.substring(60, 63).trim();
          logger.debug("Parsing mapping id: ", reactionAtomIDString);
          try {
            int reactionAtomID = Integer.parseInt(reactionAtomIDString);
            if (reactionAtomID != 0) {
              atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, reactionAtomID);
            }
          } catch (Exception exception) {
            logger.error("Mapping number ", reactionAtomIDString, " is not an integer.");
            logger.debug(exception);
          }
        } catch (Exception exception) {
          // older mol files don't have all these fields...
          logger.warn("A few fields are missing. Older MDL MOL file?");
        }

        // shk3: This reads shifts from after the molecule. I don't think this is an official
        // format, but I saw it frequently 80=>78 for alk
        if (line.length() >= 78) {
          double shift = Double.parseDouble(line.substring(69, 80).trim());
          atom.setProperty("first shift", shift);
        }
        if (line.length() >= 87) {
          double shift = Double.parseDouble(line.substring(79, 87).trim());
          atom.setProperty("second shift", shift);
        }
        atomList.add(atom);
        atomsByLinePosition.add(atom);
      }

      // convert to 2D, if totalZ == 0
      if (totalX == 0.0 && totalY == 0.0 && totalZ == 0.0) {
        logger.info("All coordinates are 0.0");
        if (atomList.size() == 1) {
          atomList.get(0).setPoint2d(new Point2d(x, y));
        } else {
          for (IAtom atomToUpdate : atomList) {
            atomToUpdate.setPoint3d(null);
          }
        }
      } else if (totalZ == 0.0 && !forceReadAs3DCoords.isSet()) {
        logger.info("Total 3D Z is 0.0, interpreting it as a 2D structure");
        for (IAtom atomToUpdate : atomList) {
          Point3d p3d = atomToUpdate.getPoint3d();
          if (p3d != null) {
            atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y));
            atomToUpdate.setPoint3d(null);
          }
        }
      }

      // read BOND block
      logger.info("Reading bond block");
      int queryBondCount = 0;
      for (int f = 0; f < bonds; f++) {
        line = input.readLine();
        linecount++;
        atom1 = Integer.parseInt(line.substring(0, 3).trim());
        atom2 = Integer.parseInt(line.substring(3, 6).trim());
        order = Integer.parseInt(line.substring(6, 9).trim());
        if (line.length() >= 12) {
          int mdlStereo =
              line.length() > 12
                  ? Integer.parseInt(line.substring(9, 12).trim())
                  : Integer.parseInt(line.substring(9).trim());
          if (mdlStereo == 1) {
            // MDL up bond
            stereo = IBond.Stereo.UP;
          } else if (mdlStereo == 6) {
            // MDL down bond
            stereo = IBond.Stereo.DOWN;
          } else if (mdlStereo == 0) {
            if (order == 2) {
              // double bond stereo defined by coordinates
              stereo = IBond.Stereo.E_Z_BY_COORDINATES;
            } else {
              // bond has no stereochemistry
              stereo = IBond.Stereo.NONE;
            }
          } else if (mdlStereo == 3 && order == 2) {
            // unknown E/Z stereochemistry
            stereo = IBond.Stereo.E_OR_Z;
          } else if (mdlStereo == 4) {
            // MDL bond undefined
            stereo = IBond.Stereo.UP_OR_DOWN;
          }
        } else {
          handleError("Missing expected stereo field at line: ", linecount, 10, 12);
        }
        if (logger.isDebugEnabled()) {
          logger.debug("Bond: " + atom1 + " - " + atom2 + "; order " + order);
        }
        // interpret CTfile's special bond orders
        IAtom a1 = atomList.get(atom1 - 1);
        IAtom a2 = atomList.get(atom2 - 1);
        IBond newBond = null;
        if (order >= 1 && order <= 3) {
          IBond.Order cdkOrder = IBond.Order.SINGLE;
          if (order == 2) cdkOrder = IBond.Order.DOUBLE;
          if (order == 3) cdkOrder = IBond.Order.TRIPLE;
          if (stereo != null) {
            newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder, stereo);
          } else {
            newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, cdkOrder);
          }
        } else if (order == 4) {
          // aromatic bond
          if (stereo != null) {
            newBond =
                molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET, stereo);
          } else {
            newBond = molecule.getBuilder().newInstance(IBond.class, a1, a2, IBond.Order.UNSET);
          }
          // mark both atoms and the bond as aromatic and raise the SINGLE_OR_DOUBLE-flag
          newBond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true);
          newBond.setFlag(CDKConstants.ISAROMATIC, true);
          a1.setFlag(CDKConstants.ISAROMATIC, true);
          a2.setFlag(CDKConstants.ISAROMATIC, true);
        } else {
          queryBondCount++;
          newBond = new CTFileQueryBond(molecule.getBuilder());
          IAtom[] bondAtoms = {a1, a2};
          newBond.setAtoms(bondAtoms);
          newBond.setOrder(null);
          CTFileQueryBond.Type queryBondType = null;
          switch (order) {
            case 5:
              queryBondType = CTFileQueryBond.Type.SINGLE_OR_DOUBLE;
              break;
            case 6:
              queryBondType = CTFileQueryBond.Type.SINGLE_OR_AROMATIC;
              break;
            case 7:
              queryBondType = CTFileQueryBond.Type.DOUBLE_OR_AROMATIC;
              break;
            case 8:
              queryBondType = CTFileQueryBond.Type.ANY;
              break;
          }
          ((CTFileQueryBond) newBond).setType(queryBondType);
          newBond.setStereo(stereo);
        }
        bondList.add((newBond));

        // add the bond order to the explicit valence for each atom
        if (newBond.getOrder() != null && newBond.getOrder() != IBond.Order.UNSET) {
          explicitValence[atom1 - 1] += newBond.getOrder().numeric();
          explicitValence[atom2 - 1] += newBond.getOrder().numeric();
        } else {
          explicitValence[atom1 - 1] = Integer.MIN_VALUE;
          explicitValence[atom2 - 1] = Integer.MIN_VALUE;
        }
      }

      if (queryBondCount == 0) outputContainer = molecule;
      else {
        outputContainer = new QueryAtomContainer(molecule.getBuilder());
      }

      outputContainer.setProperty(CDKConstants.TITLE, title);
      outputContainer.setProperty(CDKConstants.REMARK, remark);
      for (IAtom at : atomList) {
        outputContainer.addAtom(at);
      }
      for (IBond bnd : bondList) {
        outputContainer.addBond(bnd);
      }

      // read PROPERTY block
      logger.info("Reading property block");
      while (true) {
        line = input.readLine();
        linecount++;
        if (line == null) {
          handleError("The expected property block is missing!", linecount, 0, 0);
        }
        if (line.startsWith("M  END")) break;

        boolean lineRead = false;
        if (line.startsWith("M  CHG")) {
          // FIXME: if this is encountered for the first time, all
          // atom charges should be set to zero first!
          int infoCount = Integer.parseInt(line.substring(6, 9).trim());
          StringTokenizer st = new StringTokenizer(line.substring(9));
          for (int i = 1; i <= infoCount; i++) {
            String token = st.nextToken();
            int atomNumber = Integer.parseInt(token.trim());
            token = st.nextToken();
            int charge = Integer.parseInt(token.trim());
            outputContainer.getAtom(atomNumber - 1).setFormalCharge(charge);
          }
        } else if (line.matches("A\\s{1,4}\\d+")) {
          // Reads the pseudo atom property from the mol file

          // The atom number of the to replaced atom
          int aliasAtomNumber =
              Integer.parseInt(line.replaceFirst("A\\s{1,4}", "")) - RGroupCounter;
          line = input.readLine();
          linecount++;
          String[] aliasArray = line.split("\\\\");
          // name of the alias atom like R1 or R2 etc.
          String alias = "";
          for (int i = 0; i < aliasArray.length; i++) {
            alias += aliasArray[i];
          }
          IAtom aliasAtom = outputContainer.getAtom(aliasAtomNumber);

          // skip if already a pseudoatom
          if (aliasAtom instanceof IPseudoAtom) {
            ((IPseudoAtom) aliasAtom).setLabel(alias);
            continue;
          }

          IAtom newPseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, alias);
          if (aliasAtom.getPoint2d() != null) {
            newPseudoAtom.setPoint2d(aliasAtom.getPoint2d());
          }
          if (aliasAtom.getPoint3d() != null) {
            newPseudoAtom.setPoint3d(aliasAtom.getPoint3d());
          }
          outputContainer.addAtom(newPseudoAtom);
          List<IBond> bondsOfAliasAtom = outputContainer.getConnectedBondsList(aliasAtom);

          for (int i = 0; i < bondsOfAliasAtom.size(); i++) {
            IBond bondOfAliasAtom = bondsOfAliasAtom.get(i);
            IAtom connectedToAliasAtom = bondOfAliasAtom.getConnectedAtom(aliasAtom);
            IBond newBond = bondOfAliasAtom.getBuilder().newInstance(IBond.class);
            newBond.setAtoms(new IAtom[] {connectedToAliasAtom, newPseudoAtom});
            newBond.setOrder(bondOfAliasAtom.getOrder());
            outputContainer.addBond(newBond);
            outputContainer.removeBond(aliasAtom, connectedToAliasAtom);
          }
          outputContainer.removeAtom(aliasAtom);
          RGroupCounter++;

        } else if (line.startsWith("M  ISO")) {
          try {
            String countString = line.substring(6, 10).trim();
            int infoCount = Integer.parseInt(countString);
            StringTokenizer st = new StringTokenizer(line.substring(10));
            for (int i = 1; i <= infoCount; i++) {
              int atomNumber = Integer.parseInt(st.nextToken().trim());
              int absMass = Integer.parseInt(st.nextToken().trim());
              if (absMass != 0) {
                IAtom isotope = outputContainer.getAtom(atomNumber - 1);
                isotope.setMassNumber(absMass);
              }
            }
          } catch (NumberFormatException exception) {
            String error =
                "Error ("
                    + exception.getMessage()
                    + ") while parsing line "
                    + linecount
                    + ": "
                    + line
                    + " in property block.";
            logger.error(error);
            handleError(
                "NumberFormatException in isotope information.", linecount, 7, 11, exception);
          }
        } else if (line.startsWith("M  RAD")) {
          try {
            String countString = line.substring(6, 9).trim();
            int infoCount = Integer.parseInt(countString);
            StringTokenizer st = new StringTokenizer(line.substring(9));
            for (int i = 1; i <= infoCount; i++) {
              int atomNumber = Integer.parseInt(st.nextToken().trim());
              int spinMultiplicity = Integer.parseInt(st.nextToken().trim());
              MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE;
              if (spinMultiplicity > 0) {
                IAtom radical = outputContainer.getAtom(atomNumber - 1);
                switch (spinMultiplicity) {
                  case 1:
                    spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET;
                    break;
                  case 2:
                    spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET;
                    break;
                  case 3:
                    spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET;
                    break;
                  default:
                    logger.debug("Invalid spin multiplicity found: " + spinMultiplicity);
                    break;
                }
                for (int j = 0; j < spin.getSingleElectrons(); j++) {
                  outputContainer.addSingleElectron(
                      molecule.getBuilder().newInstance(ISingleElectron.class, radical));
                }
              }
            }
          } catch (NumberFormatException exception) {
            String error =
                "Error ("
                    + exception.getMessage()
                    + ") while parsing line "
                    + linecount
                    + ": "
                    + line
                    + " in property block.";
            logger.error(error);
            handleError(
                "NumberFormatException in radical information", linecount, 7, 10, exception);
          }
        } else if (line.startsWith("G  ")) {
          try {
            String atomNumberString = line.substring(3, 6).trim();
            int atomNumber = Integer.parseInt(atomNumberString);
            // String whatIsThisString = line.substring(6,9).trim();

            String atomName = input.readLine();

            // convert Atom into a PseudoAtom
            IAtom prevAtom = outputContainer.getAtom(atomNumber - 1);
            IPseudoAtom pseudoAtom = molecule.getBuilder().newInstance(IPseudoAtom.class, atomName);
            if (prevAtom.getPoint2d() != null) {
              pseudoAtom.setPoint2d(prevAtom.getPoint2d());
            }
            if (prevAtom.getPoint3d() != null) {
              pseudoAtom.setPoint3d(prevAtom.getPoint3d());
            }
            AtomContainerManipulator.replaceAtomByAtom(molecule, prevAtom, pseudoAtom);
          } catch (NumberFormatException exception) {
            String error =
                "Error ("
                    + exception.toString()
                    + ") while parsing line "
                    + linecount
                    + ": "
                    + line
                    + " in property block.";
            logger.error(error);
            handleError("NumberFormatException in group information", linecount, 4, 7, exception);
          }
        } else if (line.startsWith("M  RGP")) {
          StringTokenizer st = new StringTokenizer(line);
          // Ignore first 3 tokens (overhead).
          st.nextToken();
          st.nextToken();
          st.nextToken();
          // Process the R group numbers as defined in RGP line.
          while (st.hasMoreTokens()) {
            Integer position = new Integer(st.nextToken());
            Rnumber = new Integer(st.nextToken());
            IPseudoAtom pseudoAtom = rAtoms.get(position);
            if (pseudoAtom != null) {
              pseudoAtom.setLabel("R" + Rnumber);
            }
          }
        }
        if (line.startsWith("V  ")) {
          Integer atomNumber = new Integer(line.substring(3, 6).trim());
          IAtom atomWithComment = outputContainer.getAtom(atomNumber - 1);
          atomWithComment.setProperty(CDKConstants.COMMENT, line.substring(7));
        }

        if (!lineRead) {
          logger.warn("Skipping line in property block: ", line);
        }
      }

      if (interpretHydrogenIsotopes.isSet()) {
        fixHydrogenIsotopes(molecule, isotopeFactory);
      }

      // note: apply the valence model last so that all fixes (i.e. hydrogen
      // isotopes) are in place
      for (int i = 0; i < atoms; i++) {
        applyMDLValenceModel(outputContainer.getAtom(i), explicitValence[i]);
      }

    } catch (CDKException exception) {
      String error =
          "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
      logger.error(error);
      logger.debug(exception);
      throw exception;
    } catch (Exception exception) {
      exception.printStackTrace();
      String error =
          "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
      logger.error(error);
      logger.debug(exception);
      handleError("Error while parsing line: " + line, linecount, 0, 0, exception);
    }
    return outputContainer;
  }