/** * Divide the given record into sub-records. * * @return the sub-records or null if the record cannot be divided by this strategy. */ public List<Record> divide(Record record) { List<Record> result = null; // // algorithm: // - find repeating text with repeating paths using the view. // - group the most (equally) frequent paths into records. // - distribute same paths across records. // final PathHistogram pathHistogram = new PathHistogram(record); final PathHistogram.FieldGroupCollector fieldGroupCollector = new PathHistogram.FieldGroupCollector(); for (Iterator<PathHistogram.PathGroup> pgIter = pathHistogram.iterator(); pgIter.hasNext(); ) { final PathHistogram.PathGroup pathGroup = pgIter.next(); if (!accept(pathGroup)) continue; final boolean complete = fieldGroupCollector.addAll(pathGroup); if (accept(fieldGroupCollector)) { result = fieldGroupCollector.getRecords(false /*includeIncomplete*/); if (accept(result)) break; } else { // zero out for another try fieldGroupCollector.clear(); } } return result; }