private double entropy(Map<String, String> specifiedAttributes) {
    double totalExamples = records.count();
    double positiveExamples = records.countPositive(specifiedAttributes);
    double negativeExamples = records.countNegative(specifiedAttributes);

    return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples);
  }
  @Override
  public Records getNext(int maxNumberOfRecords) {
    ensureBuffered();

    if (!it.hasNext() && buffer.isEndOfShard()) {
      return new Records(ImmutableList.<Record>of(), true);
    }

    ImmutableList.Builder<Record> recs = new ImmutableList.Builder<>();
    int recsSize = 0;

    while (recsSize < maxNumberOfRecords) {
      if (it.hasNext()) {
        recs.add(it.next());
        recsSize++;
      } else if (!it.hasNext() && !buffer.isEndOfShard()) {
        rebuffer();
        // No more data in shard.
        if (!it.hasNext()) {
          break;
        }
      } else {
        // No more records, end of shard.
        break;
      }
    }

    return new Records(recs.build(), false);
  }
Example #3
0
  public static void main(String[] args) {

    try {
      Records records = new Records();
      List<RecordBean> recordList = new ArrayList<RecordBean>();

      SAXReader saxReader = new SAXReader();
      Document document = saxReader.read("test.xml");
      Element root = document.getRootElement();
      List<Element> elements = root.selectNodes("//RECORD");
      System.out.println(elements.size());
      for (Element element : elements) {
        RecordBean recordBean = new RecordBean();
        recordBean.setAuthor(element.elementText("FRatingNum"));
        recordBean.setImage(element.elementText("FDesc"));
        recordBean.setTitle(element.elementText("FTitle"));
        recordBean.setItemId(element.elementText("FItemId"));
        recordList.add(recordBean);
      }
      records.setRecords(recordList);
      JSONObject jsonObject = JSONObject.fromObject(records);
      System.out.println(jsonObject.toString());
    } catch (DocumentException e) {
      e.printStackTrace();
    }
  }
 private Records recordsFromCursor(Cursor cursor) {
   Records records = new Records();
   while (cursor.moveToNext()) {
     Record record = buildRecord(cursor);
     records.add(record);
   }
   return records;
 }
 private double entropy(
     String attribute, String decision, Map<String, String> specifiedAttributes) {
   double totalExamples = records.count(attribute, decision, specifiedAttributes);
   double positiveExamples = records.countPositive(attribute, decision, specifiedAttributes);
   double negativeExamples = records.countNegative(attribute, decision, specifiedAttributes);
   // logger.info("positiveExamples is --> {}.", positiveExamples);
   // logger.info("negativeExamples is --> {}.", negativeExamples);
   // logger.info("totalExamples is --> {}.", totalExamples);
   if (positiveExamples == 0 || negativeExamples == 0 || totalExamples == 0) return 0;
   return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples);
 }
  private double informationGain(String attribute, Map<String, String> specifiedAttributes) {
    double sum = entropy(specifiedAttributes);
    double examplesCount = records.count(specifiedAttributes);
    if (examplesCount == 0) return sum;

    Map<String, Set<String>> decisions = records.extractDecisions();

    for (String decision : decisions.get(attribute)) {
      double entropyPart = entropy(attribute, decision, specifiedAttributes);
      // logger.info("entropyPart is --> {}.", entropyPart);
      double decisionCount = records.countDecisions(attribute, decision);

      sum += -(decisionCount / examplesCount) * entropyPart;
    }

    return sum;
  }
  /**
   * Returns the next attribute to be chosen.
   *
   * <p>chosenAttributes represents the decision path from the root attribute to the node under
   * consideration. usedAttributes is the set of all attributes that have been incorporated into the
   * tree prior to this call to nextAttribute(), even if the attributes were not used in the path to
   * the node under consideration.
   *
   * <p>Results are undefined if records.count() == 0.
   */
  public Attribute nextAttribute(Map<String, String> chosenAttributes, Set<String> usedAttributes) {
    double currentGain = 0.0, bestGain = 0.0;
    String bestAttribute = "";

    /*
     * If there are no positive records for the already chosen attributes,
     * then return a false classifier leaf. If no negative records,
     * then return a true classifier leaf.
     */
    if (records.countPositive(chosenAttributes) == 0) return new Attribute(false);
    else if (records.countNegative(chosenAttributes) == 0) return new Attribute(true);

    logger.info(
        "Choosing attribute out of {} remaining attributes.",
        remainingAttributes(usedAttributes).size());
    logger.info("Already chosen attributes/decisions are {}.", chosenAttributes);

    for (String attribute : remainingAttributes(usedAttributes)) {
      // for each remaining attribute, determine the information gain of using it
      // to choose among the records selected by the chosenAttributes
      // if none give any information gain, return a leaf attribute,
      // otherwise return the found attribute as a non-leaf attribute
      currentGain = informationGain(attribute, chosenAttributes);
      logger.info("Evaluating attribute {}, information gain is {}", attribute, currentGain);
      if (currentGain > bestGain) {
        bestAttribute = attribute;
        bestGain = currentGain;
      }
    }

    // If no attribute gives information gain, generate leaf attribute.
    // Leaf is true if there are any true classifiers.
    // If there is at least one negative example, then the information gain
    // would be greater than 0.
    if (bestGain == 0.0) {
      boolean classifier = records.countPositive(chosenAttributes) > 0;
      logger.warn("Creating new leaf attribute with classifier {}.", classifier);
      return new Attribute(classifier);
    } else {
      logger.info("Creating new non-leaf attribute {}.", bestAttribute);
      return new Attribute(bestAttribute);
    }
  }
 // Post : buffer != null && it != null
 private void rebuffer() {
   buffer = getter.getNext(maxBufferSize);
   it = buffer.getRecords().iterator();
 }
 @Override
 public void delete(Records records) {
   String query = "DELETE FROM " + TABLE_NAME + " WHERE id IN (" + records.getIdsAsString() + ")";
   db.execSQL(query);
 }
  /**
   * main - writes some data and checks the tables size (with time measureing)
   *
   * @param args
   */
  public static void main(final String[] args) {
    // open a file, add one entry and exit
    final File f = new File(args[0]);
    if (f.exists()) FileUtils.deletedelete(f);
    try {
      final Records t = new Records(f, 8);
      final byte[] b = new byte[8];
      t.add("01234567".getBytes(), 0);
      t.add("ABCDEFGH".getBytes(), 0);
      t.add("abcdefgh".getBytes(), 0);
      t.add("--------".getBytes(), 0);
      t.add("********".getBytes(), 0);
      for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0);
      t.add("=======0".getBytes(), 0);
      t.add("=======1".getBytes(), 0);
      t.add("=======2".getBytes(), 0);
      t.cleanLast(b, 0);
      System.out.println(UTF8.String(b));
      t.cleanLast(b, 0);
      // t.clean(2, b, 0);
      System.out.println(UTF8.String(b));
      t.get(1, b, 0);
      System.out.println(UTF8.String(b));
      t.put(1, "AbCdEfGh".getBytes(), 0);
      t.get(1, b, 0);
      System.out.println(UTF8.String(b));
      t.get(3, b, 0);
      System.out.println(UTF8.String(b));
      t.get(4, b, 0);
      System.out.println(UTF8.String(b));
      System.out.println("size = " + t.size());
      // t.clean(t.size() - 2);
      t.cleanLast();
      final long start = System.currentTimeMillis();
      long c = 0;
      for (int i = 0; i < 100000; i++) {
        c = t.size();
      }
      System.out.println(
          "size() needs " + ((System.currentTimeMillis() - start) / 100) + " nanoseconds");
      System.out.println("size = " + c);

      t.close();
    } catch (final IOException e) {
      ConcurrentLog.logException(e);
    }
  }
 private Set<String> remainingAttributes(Set<String> usedAttributes) {
   Set<String> result = records.extractAttributes();
   result.removeAll(usedAttributes);
   return result;
 }