private double entropy(Map<String, String> specifiedAttributes) { double totalExamples = records.count(); double positiveExamples = records.countPositive(specifiedAttributes); double negativeExamples = records.countNegative(specifiedAttributes); return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples); }
@Override public Records getNext(int maxNumberOfRecords) { ensureBuffered(); if (!it.hasNext() && buffer.isEndOfShard()) { return new Records(ImmutableList.<Record>of(), true); } ImmutableList.Builder<Record> recs = new ImmutableList.Builder<>(); int recsSize = 0; while (recsSize < maxNumberOfRecords) { if (it.hasNext()) { recs.add(it.next()); recsSize++; } else if (!it.hasNext() && !buffer.isEndOfShard()) { rebuffer(); // No more data in shard. if (!it.hasNext()) { break; } } else { // No more records, end of shard. break; } } return new Records(recs.build(), false); }
public static void main(String[] args) { try { Records records = new Records(); List<RecordBean> recordList = new ArrayList<RecordBean>(); SAXReader saxReader = new SAXReader(); Document document = saxReader.read("test.xml"); Element root = document.getRootElement(); List<Element> elements = root.selectNodes("//RECORD"); System.out.println(elements.size()); for (Element element : elements) { RecordBean recordBean = new RecordBean(); recordBean.setAuthor(element.elementText("FRatingNum")); recordBean.setImage(element.elementText("FDesc")); recordBean.setTitle(element.elementText("FTitle")); recordBean.setItemId(element.elementText("FItemId")); recordList.add(recordBean); } records.setRecords(recordList); JSONObject jsonObject = JSONObject.fromObject(records); System.out.println(jsonObject.toString()); } catch (DocumentException e) { e.printStackTrace(); } }
private Records recordsFromCursor(Cursor cursor) { Records records = new Records(); while (cursor.moveToNext()) { Record record = buildRecord(cursor); records.add(record); } return records; }
private double entropy( String attribute, String decision, Map<String, String> specifiedAttributes) { double totalExamples = records.count(attribute, decision, specifiedAttributes); double positiveExamples = records.countPositive(attribute, decision, specifiedAttributes); double negativeExamples = records.countNegative(attribute, decision, specifiedAttributes); // logger.info("positiveExamples is --> {}.", positiveExamples); // logger.info("negativeExamples is --> {}.", negativeExamples); // logger.info("totalExamples is --> {}.", totalExamples); if (positiveExamples == 0 || negativeExamples == 0 || totalExamples == 0) return 0; return -nlog2(positiveExamples / totalExamples) - nlog2(negativeExamples / totalExamples); }
private double informationGain(String attribute, Map<String, String> specifiedAttributes) { double sum = entropy(specifiedAttributes); double examplesCount = records.count(specifiedAttributes); if (examplesCount == 0) return sum; Map<String, Set<String>> decisions = records.extractDecisions(); for (String decision : decisions.get(attribute)) { double entropyPart = entropy(attribute, decision, specifiedAttributes); // logger.info("entropyPart is --> {}.", entropyPart); double decisionCount = records.countDecisions(attribute, decision); sum += -(decisionCount / examplesCount) * entropyPart; } return sum; }
/** * Returns the next attribute to be chosen. * * <p>chosenAttributes represents the decision path from the root attribute to the node under * consideration. usedAttributes is the set of all attributes that have been incorporated into the * tree prior to this call to nextAttribute(), even if the attributes were not used in the path to * the node under consideration. * * <p>Results are undefined if records.count() == 0. */ public Attribute nextAttribute(Map<String, String> chosenAttributes, Set<String> usedAttributes) { double currentGain = 0.0, bestGain = 0.0; String bestAttribute = ""; /* * If there are no positive records for the already chosen attributes, * then return a false classifier leaf. If no negative records, * then return a true classifier leaf. */ if (records.countPositive(chosenAttributes) == 0) return new Attribute(false); else if (records.countNegative(chosenAttributes) == 0) return new Attribute(true); logger.info( "Choosing attribute out of {} remaining attributes.", remainingAttributes(usedAttributes).size()); logger.info("Already chosen attributes/decisions are {}.", chosenAttributes); for (String attribute : remainingAttributes(usedAttributes)) { // for each remaining attribute, determine the information gain of using it // to choose among the records selected by the chosenAttributes // if none give any information gain, return a leaf attribute, // otherwise return the found attribute as a non-leaf attribute currentGain = informationGain(attribute, chosenAttributes); logger.info("Evaluating attribute {}, information gain is {}", attribute, currentGain); if (currentGain > bestGain) { bestAttribute = attribute; bestGain = currentGain; } } // If no attribute gives information gain, generate leaf attribute. // Leaf is true if there are any true classifiers. // If there is at least one negative example, then the information gain // would be greater than 0. if (bestGain == 0.0) { boolean classifier = records.countPositive(chosenAttributes) > 0; logger.warn("Creating new leaf attribute with classifier {}.", classifier); return new Attribute(classifier); } else { logger.info("Creating new non-leaf attribute {}.", bestAttribute); return new Attribute(bestAttribute); } }
// Post : buffer != null && it != null private void rebuffer() { buffer = getter.getNext(maxBufferSize); it = buffer.getRecords().iterator(); }
@Override public void delete(Records records) { String query = "DELETE FROM " + TABLE_NAME + " WHERE id IN (" + records.getIdsAsString() + ")"; db.execSQL(query); }
/** * main - writes some data and checks the tables size (with time measureing) * * @param args */ public static void main(final String[] args) { // open a file, add one entry and exit final File f = new File(args[0]); if (f.exists()) FileUtils.deletedelete(f); try { final Records t = new Records(f, 8); final byte[] b = new byte[8]; t.add("01234567".getBytes(), 0); t.add("ABCDEFGH".getBytes(), 0); t.add("abcdefgh".getBytes(), 0); t.add("--------".getBytes(), 0); t.add("********".getBytes(), 0); for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0); t.add("=======0".getBytes(), 0); t.add("=======1".getBytes(), 0); t.add("=======2".getBytes(), 0); t.cleanLast(b, 0); System.out.println(UTF8.String(b)); t.cleanLast(b, 0); // t.clean(2, b, 0); System.out.println(UTF8.String(b)); t.get(1, b, 0); System.out.println(UTF8.String(b)); t.put(1, "AbCdEfGh".getBytes(), 0); t.get(1, b, 0); System.out.println(UTF8.String(b)); t.get(3, b, 0); System.out.println(UTF8.String(b)); t.get(4, b, 0); System.out.println(UTF8.String(b)); System.out.println("size = " + t.size()); // t.clean(t.size() - 2); t.cleanLast(); final long start = System.currentTimeMillis(); long c = 0; for (int i = 0; i < 100000; i++) { c = t.size(); } System.out.println( "size() needs " + ((System.currentTimeMillis() - start) / 100) + " nanoseconds"); System.out.println("size = " + c); t.close(); } catch (final IOException e) { ConcurrentLog.logException(e); } }
private Set<String> remainingAttributes(Set<String> usedAttributes) { Set<String> result = records.extractAttributes(); result.removeAll(usedAttributes); return result; }