コード例 #1
0
ファイル: UniStsParser.java プロジェクト: NCIP/geneconnect
 /**
  * This Function returns true if file is UNISTS false for UNISTSAlias.
  *
  * @param fileName FileName for which to check if it is UniSTS alias file
  * @return b
  */
 private boolean findFormat(String fileName) {
   if (-1 == fileName.lastIndexOf("alias")) {
     Logger.log(" alias = false ", Logger.INFO);
     return true;
   } else {
     Logger.log(" alias = true", Logger.INFO);
     return false;
   }
 }
コード例 #2
0
ファイル: JDOMParser.java プロジェクト: NCIP/geneconnect
 /**
  * Method to find format of the file (History or normal file)
  *
  * @param fileName Name of the file
  * @return true if file is not history file
  */
 private boolean findFormat(String fileName) {
   /** Check if the file-name contains "history" in it */
   if (-1 == fileName.lastIndexOf(Constants.HISTORYFILE)) {
     Logger.log(" history = false ", Logger.DEBUG);
     return false;
   } else {
     Logger.log(" history = true", Logger.DEBUG);
     return true;
   }
 }
コード例 #3
0
ファイル: UniStsParser.java プロジェクト: NCIP/geneconnect
 /**
  * Method to initialize file writers for uniSTS data source
  *
  * @param alias boolean to show whether file is uniSTS.alias
  * @throws FatalException Throws exception if unable to initialise file writers
  */
 private void createFileWriters(boolean noAlias) throws FatalException {
   try {
     /**
      * If UniSTS.sts file is input then unists base table and unists_accession tables are
      * populated so their file writers are being initialised
      */
     if (true == noAlias) {
       m_fileWriterHashTable.put(
           Variables.uniStsBaseTableName,
           new FileWriter(Variables.uniStsBaseTableName + "." + m_fileToParse));
       m_fileWriterHashTable.put(
           Variables.uniStsAccessionTableName,
           new FileWriter(Variables.uniStsAccessionTableName + "." + m_fileToParse));
     } else {
       /** In case of UniSTS.alias file initialise corresponsing table */
       m_fileWriterHashTable.put(
           Variables.uniStsAliasTableName,
           new FileWriter(Variables.uniStsAliasTableName + "." + m_fileToParse));
     }
   } catch (IOException ioEx) {
     Logger.log(
         "Uniable to initialize file writers (UniSTS parser): " + ioEx.getLocalizedMessage(),
         Logger.INFO);
   }
 }
コード例 #4
0
ファイル: UniStsParser.java プロジェクト: NCIP/geneconnect
 /**
  * Parsing of the Unigene record for 1st,5th,7th and 8th token. The first field indicates STS id.
  * The fifth (NAME) coresponds to STSNAME. The seventh(ACCESSION NUMBER) & eighth(ORGANISM). For
  * the UniStsAlias Record we use 1st and 2nd token The first field indicates STS id. The first
  * field indicates Aliases.
  *
  * @param alias boolean to indicate if file being parsed in unSTS.alias
  */
 private void parseLine(boolean noAlias) throws FatalException {
   m_tokenizer = new StringTokenizer(m_line, m_fieldDelimiter);
   if (true == noAlias) {
       /** noAlias = true ---> filename = UNISTS.STS */
     if (m_tokenizer.countTokens() >= Constants.uniStsTableCols) {
       String uniStsElement = null;
       int count = 0;
       while (m_tokenizer.hasMoreTokens()) {
         uniStsElement = m_tokenizer.nextToken();
         uniStsElement = uniStsElement.trim();
         count++; // ith term
         if (count == 1) {
           uniStsRecord.fields[0].append(uniStsElement);
           uniStsAccNoRecord.fields[0].append(uniStsElement);
         } else if (count == 5) {
           uniStsRecord.fields[1].append(uniStsElement);
         } else if (count == 7) {
           uniStsAccNoRecord.fields[2].append(uniStsElement);
         } else if (count == 8) {
           String localTaxid = (String) Variables.hmOrganismLocalId.get(uniStsElement);
           uniStsRecord.fields[2].append(localTaxid);
           uniStsAccNoRecord.fields[1].append(localTaxid);
         }
       }
     }
   } else {
     /** noAlias = false ---> filename = UNISTSalias file */
     if (m_tokenizer.countTokens() >= Constants.uniStsAliasTableCols) {
       String uniStsAliasElement = null;
       int count = 0;
       /** here we have to add the records with same locus id,different alias. */
       while (m_tokenizer.hasMoreTokens()) {
         uniStsAliasElement = m_tokenizer.nextToken();
         uniStsAliasElement = uniStsAliasElement.trim();
         count++; // ith term
         if (count == 1) uniStsAliasRecord.fields[0].append(uniStsAliasElement);
         else if (count == 2) {
           StringTokenizer token = new StringTokenizer(uniStsAliasElement, semiColonSeperator);
           int counttoken = token.countTokens();
           if (counttoken != 0) {
             StringBuffer id = new StringBuffer(uniStsAliasRecord.fields[0].toString());
             uniStsAliasRecord.resetAllFields();
             while (token.hasMoreElements()) {
               uniStsAliasRecord.fields[0].append(id.toString());
               uniStsAliasRecord.fields[1].append(token.nextElement());
               try {
                 writeRecordToDb(Variables.uniStsAliasTableName, uniStsAliasRecord);
               } catch (InsertException ie) {
                 Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG);
               }
               uniStsAliasRecord.resetAllFields();
             }
           } else {
             uniStsAliasRecord.fields[1].append(uniStsAliasElement);
             try {
               writeRecordToDb(Variables.uniStsAliasTableName, uniStsAliasRecord);
             } catch (InsertException ie) {
               Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG);
             }
           }
           resetRecords(false);
         }
       }
     }
   }
 }
コード例 #5
0
ファイル: UniStsParser.java プロジェクト: NCIP/geneconnect
  /**
   * Prase downloaded uniSTS data source file
   *
   * @param file Name of the file to parse
   * @throws FatalException Throws exception if error occurs during parsing
   */
  public void parse(FileInfo file) throws FatalException {
    /**
     * UniStsParser parser deals with single file only. So just pick up the first file name from the
     * list of files
     */
    String fileName = (String) file.getFiles().firstElement();

    System.out.println("UNISTS parsing started");
    Logger.log(" UniSts::parsing started " + fileName, Logger.INFO);
    boolean noAlias = false;
    /**
     * Based on the name of source file to parse detect whether the aliase or base unists file is
     * getting parsed. This can be detected by checking whether the filename has "alias" keyword in
     * its name. It indicates that the file name is having information about unists aliases and
     * hence will populate unists_alias table and not the other uniSTS base tables. Below function
     * returns true or false based on whether the file is for UniSTS Alias or not.
     */
    noAlias = findFormat(fileName);

    /**
     * call initialise table method to set the metadata information about each of the used table.
     * the information will be present with DBManager
     */
    initializeTables(noAlias);
    Logger.log("initilalise tables over", Logger.INFO);

    /**
     * Initialise file writers for each of the file which will correspond to each of the table being
     * populated. The file writers will be stored in m_fileWriterHashTable present with the base
     * class parser. Later on when writeRecordToDb method is called file writer for the required
     * table is obtained from the HashTable and record is written there
     */
    createFileWriters(noAlias);
    Logger.log("Create file writers over", Logger.INFO);

    /**
     * Create required record objects based on what(UniSTS/UniSTS alias) file is getting parsed.
     * These records will later hold the field information which is to be written to file for
     * loading into the database
     */
    createRecords(noAlias);
    Logger.log("create records over", Logger.INFO);

    /**
     * This function will be called to write metadata information in each of the file where records
     * will be written later. This file will be input to sqlloader and mysqlimport. sqlloader
     * requires the table meat information to be present in the data file. This is done by the below
     * function before we start writing records to the file
     */
    writeMETADATA();
    Logger.log("Write metadata over", Logger.INFO);
    try {
      while ((m_line = getNextRecord()) != null) {
        if (false == m_line.startsWith("#")) {
          parseLine(noAlias);
          try {
            if (true == noAlias) {
              writeRecordToDb(Variables.uniStsBaseTableName, uniStsRecord);

              String strAccNos = uniStsAccNoRecord.fields[2].toString();
              StringTokenizer sTok = new StringTokenizer(strAccNos, ";");
              while (sTok.hasMoreTokens()) {
                String strAccNo = sTok.nextToken();
                uniStsAccNoRecord.fields[2].setLength(0);
                uniStsAccNoRecord.fields[2].append(strAccNo);
                writeRecordToDb(Variables.uniStsAccessionTableName, uniStsAccNoRecord);
              }
            }
          } catch (InsertException ie) {
            Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG);
          }
          /** current record is completed, reset various table records */
          resetRecords(noAlias);
        }
      }
    } catch (IOException ioex) {
      /** io exception occured - remaining records will be skipped ? ? */
      Logger.log("Exception in UniSTS parser :" + ioex.getMessage(), Logger.DEBUG);
      throw new FatalException(ioex.getMessage());
    } finally {
      Logger.log(" UNISTS::parsing over. ", Logger.INFO);
    }
    /**
     * set the variable representing the unists last modified value in the revision_history table
     */
    Variables.UniSTSRevisionHistory = getFileRevisionHistory(fileName);
  }
コード例 #6
0
ファイル: JDOMParser.java プロジェクト: NCIP/geneconnect
  /**
   * Method to parse file
   *
   * @param file Information of the file to parse
   * @exception FatalException throws exception if error during parsing
   */
  public void parse(FileInfo file) throws FatalException {
    /**
     * JDOM parser deals with single file only. So just pick up the first file name from the list of
     * files
     */
    String fileName = (String) file.getFiles().firstElement();

    String oldFileName = fileName;

    /** JDOM element scanner for the xml file */
    ElementScanner xmlScanner;
    Logger.log("Starting parsing for " + m_dbType + ": " + fileName, Logger.INFO);
    ElementListener xmlListener = null;
    try {
      long startParsingTime = System.currentTimeMillis();
      boolean history = false;
      history = findFormat(fileName);
      /** Check if file to be parsed is a Gene-History file */
      if (false == history) {
        xmlScanner = new ElementScanner();
        xmlScanner.setValidation(false);
        /** add the listener for the individual nodes according the the dbType specified. */
        if (m_dbType.equalsIgnoreCase(Constants.ENTREZGENE)) {
          String convertedFileName = null;
          if (fileName.endsWith(".xml") == false) {
            convertedFileName = ConvertToXml(fileName);
            fileName = convertedFileName;
          } else {
            convertedFileName = fileName;
          }
          Logger.log("Converted .asn file to XML " + convertedFileName, Logger.WARNING);
          m_parserDataObject = new EntrezParser(file, m_filesParsed, history);
          /** Initialise the system_termtree and system_termdata tables. */
          startSystemTreeDataFiles();
          xmlListener =
              new JDOMXMLListener(m_dbType, convertedFileName, ((EntrezParser) m_parserDataObject));
          /** add a listener for node 'Entrezgene' in xml file */
          xmlScanner.addElementListener(xmlListener, Constants.ENTREZNODE);
        }
        Logger.log(xmlScanner.toString(), Logger.INFO);
        Logger.log("input to xmlscanner " + fileName, Logger.INFO);
        /**
         * scan the input file and call elementMatched function whenever the required node is found
         * in the input file
         */
        xmlScanner.parse(new InputSource(fileName));
        /** close all the open fileWriter streams. */
        m_parserDataObject.closeFileWriters();
        deleteDownloadedFile(file);
      }
      /** Or if it is a Gene-History file */
      else {
        Logger.log("Parsing started for GENE_HISTORY table", Logger.INFO);
        m_parserDataObject = new EntrezParser(file, m_filesParsed, history);
        EntrezParser tempParser = (EntrezParser) m_parserDataObject;
        while ((m_line = getNextRecord()) != null) {
          StringTokenizer sTok = new StringTokenizer(m_line);
          tempParser.parseGeneHistory(history, sTok);
        }
        m_parserDataObject.closeFileWriters();
      }
      Logger.log("Parsing over for " + m_dbType + ": " + fileName, Logger.INFO);
      long endParsingTime = System.currentTimeMillis();
      long parsingTime = endParsingTime - startParsingTime;
      Logger.log("Parsing Time for: " + m_dbType + ": " + parsingTime, Logger.INFO);
    } catch (NullPointerException nexcp) {
      Variables.errorCount++;
      Logger.log("NullPointerException has occured: " + nexcp.getMessage(), Logger.FATAL);
    } catch (OutOfMemoryError outexcp) {
      Variables.errorCount++;
      Logger.log("OutOfMemoryError has occured: " + outexcp.getMessage(), Logger.FATAL);
    } catch (JDOMException jexcp) {
      Variables.errorCount++;
      Logger.log("JDOMException has occured: " + jexcp.getMessage(), Logger.FATAL);
    } catch (SAXException saxexcp) {
      Variables.errorCount++;
      Logger.log("SAXException has occured: " + saxexcp.getMessage(), Logger.FATAL);
    } catch (IOException ioexcp) {
      Variables.errorCount++;
      Logger.log("IOException has occured: " + ioexcp.getMessage(), Logger.FATAL);
    }
  }
コード例 #7
0
ファイル: JDOMParser.java プロジェクト: NCIP/geneconnect
  /**
   * Method to convert .asn file to .xml
   *
   * @param fileName name of the file
   * @return Path of newly created .xml file
   * @throws FileNotFoundException Throws exception if error during conversion
   * @throws IOException Throws exception if error during conversion
   */
  private String ConvertToXml(String fileName) throws FileNotFoundException, IOException {
    String outFileName = new String(fileName + ".xml");
    int index = fileName.lastIndexOf(".gz");
    String unzippedFileName = fileName.substring(0, index);
    Logger.log("input file name is " + fileName, Logger.INFO);
    Logger.log("unzipped file name is " + unzippedFileName, Logger.INFO);
    m_inputFileReader.close();
    Runtime run = Runtime.getRuntime();
    try {
      StringBuffer cmdUnzip = new StringBuffer();
      cmdUnzip.append("gzip -d " + fileName);
      /** o/p file will be file name without gz as extension */
      Logger.log(cmdUnzip.toString(), Logger.DEBUG);
      Process unzip = run.exec(cmdUnzip.toString());
      /**
       * Close the Stream of The Process.If the Stream is not closed then it will not allow next
       * process to create new Stream.
       */
      unzip.getInputStream().close();
      unzip.getOutputStream().close();
      unzip.waitFor();
      Logger.log("process exit val " + unzip.exitValue(), Logger.INFO);
    } catch (InterruptedException ie) {
      Logger.log("Exception : Unzipping file " + fileName + " interrupted", Logger.FATAL);
      Logger.log(ie.getMessage(), Logger.FATAL);
    } catch (IOException io) {
      Logger.log("IOException : unzipping file " + fileName + " failed", Logger.FATAL);
      Logger.log(io.getMessage(), Logger.FATAL);
    }
    Logger.log("successfully unzipped file " + fileName, Logger.INFO);
    try {
      StringBuffer cmd = new StringBuffer();
      cmd.append(Variables.currentDir + System.getProperty("file.separator"));
      cmd.append("gene2xml -b -i ");
      cmd.append(unzippedFileName);
      cmd.append(" -o ");
      cmd.append(outFileName);
      Logger.log("converting asn file " + fileName + " to xml file " + outFileName, Logger.INFO);
      System.out.println(cmd);

      Process gene2xml = run.exec(cmd.toString());
      // Close the Stream of The Process.If the Stream is not closed
      // then it will not allow next process to create new Stream.

      InputStream stderr = gene2xml.getInputStream();
      InputStreamReader isr = new InputStreamReader(stderr);
      BufferedReader br = new BufferedReader(isr);
      String line = null;
      String errCheck = br.readLine();
      System.out.println(errCheck);
      while ((line = br.readLine()) != null) {
        System.out.println("inside print output line");
        System.out.println(line);
      }

      gene2xml.getInputStream().close();
      gene2xml.waitFor();
      Logger.log("completed asn to xml file convertion", Logger.INFO);
      Logger.log("process exit val " + gene2xml.exitValue(), Logger.INFO);
      deleteDownloadedFile(unzippedFileName);
    } catch (IOException io) {
      Logger.log(
          "IOException : Gene to Xml convertion for Entrezegene file " + fileName + " failed",
          Logger.FATAL);
      Logger.log(io.getMessage(), Logger.FATAL);
      outFileName = null;
    } catch (InterruptedException ie) {
      Logger.log(
          "Exception : Gene to Xml convertion for Entrezegene file " + fileName + " interrupted",
          Logger.FATAL);
      Logger.log(ie.getMessage(), Logger.FATAL);
      outFileName = null;
    }
    Logger.log("Converted file name " + outFileName, Logger.INFO);
    return outFileName;
  }