public void run(ArgParser ap, Logger logger) {
    String[] files = ap.gets("manual");
    if (files != null)
      for (String f : files) {
        Document d = new Elsevier("test", f, null).next();
        if (d != null) System.out.println(d.toString());
        System.out.println();
      }

    String inTable = ap.get("inTable");
    String outTable = ap.get("outTable");

    try {
      Connection inputConn = SQL.connectMySQL(ap, logger, ap.get("inDB", "articles"));
      Statement stmt =
          inputConn.createStatement(
              java.sql.ResultSet.TYPE_FORWARD_ONLY, java.sql.ResultSet.CONCUR_READ_ONLY);
      stmt.setFetchSize(Integer.MIN_VALUE);
      ResultSet rs = stmt.executeQuery("SELECT doi, xml from " + inTable);
      EProblemIterator epi = new EProblemIterator(rs);
      IteratorBasedMaster<Document> master =
          new IteratorBasedMaster<Document>(epi, ap.getInt("threads", 1), 1000);
      master.startThread();

      Connection outputConn = SQL.connectMySQL(ap, logger, ap.get("outDB", "articles"));
      PreparedStatement pstmt = Document.prepareInsertStatements(outputConn, outTable);

      int report = ap.getInt("report", -1);
      int c = 0;
      int c_null = 0;

      for (Document d : master) {
        if (d != null) d.saveToDB(pstmt);
        else c_null++;

        if (report != -1 && ++c % report == 0)
          logger.info("%t: Processed " + c + " documents (" + c_null + " errors)\n");
      }

    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
      System.exit(-1);
    }
  }
  private static void store(
      Map<Integer, UniqueEvent> data, java.sql.Connection conn, String table) {

    try {
      Statement stmt = conn.createStatement();

      String q = "DROP TABLE IF EXISTS `" + table + "`;";
      stmt.execute(q);

      q =
          "CREATE TABLE `"
              + table
              + "` ("
              + "`hash` INTEGER NOT NULL,"
              + "`count` INTEGER UNSIGNED NOT NULL,"
              + "`sum_confidence` DOUBLE UNSIGNED NOT NULL,";

      for (String k : columns) q += "`" + k + "` VARCHAR(255), KEY `" + k + "` (`" + k + "`),";
      q += "PRIMARY KEY (`hash`)) ENGINE = MyISAM;";

      stmt.execute(q);

      q = "INSERT INTO " + table + " (hash,count,sum_confidence";
      for (int i = 0; i < columns.length; i++) q += "," + columns[i];
      q += ") VALUES (?,?,?" + Misc.replicateString(",?", columns.length) + ")";
      PreparedStatement pstmt = conn.prepareStatement(q);

      for (Integer key : data.keySet()) {
        SQL.set(pstmt, 1, key);
        SQL.set(pstmt, 2, data.get(key).getCountDocs());
        SQL.set(pstmt, 3, data.get(key).getSumConfidences());
        int c = 4;
        for (String v : data.get(key).getData()) SQL.set(pstmt, c++, v);

        pstmt.execute();
      }
    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
      System.exit(-1);
    }
  }
  /** @param args */
  public static void main(String[] args) {
    ArgParser ap = ArgParser.getParser(args);
    Logger logger = Loggers.getDefaultLogger(ap);

    java.sql.Connection conn = SQL.connectMySQL(ap, logger, "farzin");
    String where = ap.get("where");
    String limit = ap.get("limit");

    Map<Integer, UniqueEvent> data = null;

    if (ap.containsKey("collapse")) {
      System.out.println("Collapsing universe data... ");
      data = collapseData(conn, where, limit);
      System.out.println("Done. Read data for " + data.size() + " hashes.");
    }

    if (ap.containsKey("load")) {
      System.out.println("Loading... ");
      data = load(conn, ap.get("load"), where, limit);
      System.out.println("Done. Read data for " + data.size() + " hashes.");
    }

    if (ap.containsKey("store")) {
      System.out.println("Storing... ");
      store(data, conn, ap.get("store"));
      System.out.println("Done. Stored data for " + data.size() + " hashes.");
    }

    if (ap.containsKey("contradictions")) {
      System.out.println("Calculating contradictions...");
      List<Pair<Integer>> contradictions = contradictions(data);
      System.out.println("Done. Detected " + contradictions.size() + " contradictions.");

      if (ap.containsKey("countTypes")) {
        System.out.println("Counting...");
        countTypes(contradictions, data);
      }

      if (ap.containsKey("sentences")) {
        System.out.println("Printing sentences...");
        printContradictorySentences(conn, ap.getInt("sentences"), contradictions, data);
      }

      if (ap.containsKey("dumpContradictions")) {
        System.out.println("Dumping...");
        dumpContradictions(contradictions, data, ap.getFile("dumpContradictions"));
      }
    }

    if (ap.containsKey("contrasts")) {
      //			contrasts(data, conn, ap.getInt("sentences",-1));
    }
  }
 private static void printSentences(PreparedStatement pstmt, int hash) {
   SQL.set(pstmt, 1, "" + hash);
   try {
     ResultSet rs = pstmt.executeQuery();
     while (rs.next()) {
       System.out.print(
           "<li>" + rs.getString(2) + ": " + rs.getString(1).replace('\n', ' ') + "<br>");
     }
   } catch (Exception e) {
     System.err.println(e);
     e.printStackTrace();
     System.exit(0);
   }
 }
Exemplo n.º 5
0
  public void saveToDB(PreparedStatement pstmt) {
    try {
      SQL.set(pstmt, 1, xml);
      SQL.set(pstmt, 2, externalID != null ? externalID.getID() : null);
      SQL.set(
          pstmt,
          3,
          externalID != null && externalID.getSource() != null
              ? externalID.getSource().toString().toLowerCase()
              : null);

      SQL.set(pstmt, 4, title);
      SQL.set(pstmt, 5, abs);
      SQL.set(pstmt, 6, body);
      SQL.set(pstmt, 7, rawContent);
      SQL.set(pstmt, 8, raw_type != null ? raw_type.toString().toLowerCase() : null);

      SQL.set(pstmt, 9, type != null ? type.toString().toLowerCase() : null);

      if (authors != null) SQL.set(pstmt, 10, Misc.implode(authors, "|"));
      else SQL.set(pstmt, 10, (String) null);

      SQL.set(pstmt, 11, year);
      SQL.set(pstmt, 12, journal != null ? journal.getISSN() : null);
      SQL.set(pstmt, 13, volume);
      SQL.set(pstmt, 14, issue);
      SQL.set(pstmt, 15, pages);

      pstmt.execute();
      /*ResultSet rs = pstmt.getGeneratedKeys();
      rs.first();
      int id = rs.getInt(1);
      return id;*/

    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
      System.exit(-1);
    }
  }