public void collectTermInfo( LeafReader leafReader, List<JATETerm> terms, String ngramInfoFieldname, String idFieldname) throws IOException { Logger log = Logger.getLogger(this.getClass().getName()); TermInfoCollector infoCollector = new TermInfoCollector(leafReader, ngramInfoFieldname, idFieldname); log.info( "Gathering term information (e.g., provenance and offsets). This may take a while. Total=" + terms.size()); int count = 0; for (JATETerm jt : terms) { jt.setTermInfo(infoCollector.collect(jt.getString())); count++; if (count % 500 == 0) log.info("done " + count); } }
protected static List<JATETerm> applyThresholds(List<JATETerm> terms, String t, String n) { List<JATETerm> selected = new ArrayList<>(); if (t != null) { try { double threshold = Double.valueOf(t); for (JATETerm jt : terms) { if (jt.getScore() >= threshold) selected.add(jt); else break; } } catch (NumberFormatException nfe) { } } if (n == null && selected.size() > 0) return selected; if (selected.size() == 0) selected.addAll(terms); double topN; try { topN = Integer.valueOf(n); Iterator<JATETerm> it = selected.iterator(); int count = 0; while (it.hasNext()) { it.next(); count++; if (count > topN) it.remove(); } } catch (NumberFormatException nfe) { try { topN = Double.valueOf(n); } catch (NumberFormatException nfe2) { topN = DEFAULT_THRESHOLD_N; } int topNInteger = (int) (topN * terms.size()); Iterator<JATETerm> it = selected.iterator(); int count = 0; while (it.hasNext()) { it.next(); count++; if (count > topNInteger) it.remove(); } } return selected; }