private static Set<String> mergeStringFields(FlamdexReader[] readers) { final Set<String> ret = new TreeSet<String>(); for (final FlamdexReader reader : readers) { ret.addAll(reader.getStringFields()); } return ret; }
public static void writeFlamdex(final FlamdexReader fdx, final FlamdexWriter w) throws IOException { final DocIdStream dis = fdx.getDocIdStream(); final int[] docIdBuf = new int[DOC_ID_BUFFER_SIZE]; for (final String intField : fdx.getIntFields()) { final IntFieldWriter ifw = w.getIntFieldWriter(intField); final IntTermIterator iter = fdx.getIntTermIterator(intField); while (iter.next()) { ifw.nextTerm(iter.term()); dis.reset(iter); while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int i = 0; i < n; ++i) { ifw.nextDoc(docIdBuf[i]); } if (n < docIdBuf.length) break; } } iter.close(); ifw.close(); } for (final String stringField : fdx.getStringFields()) { final StringFieldWriter sfw = w.getStringFieldWriter(stringField); final StringTermIterator iter = fdx.getStringTermIterator(stringField); while (iter.next()) { sfw.nextTerm(iter.term()); dis.reset(iter); while (true) { final int n = dis.fillDocIdBuffer(docIdBuf); for (int i = 0; i < n; ++i) { sfw.nextDoc(docIdBuf[i]); } if (n < docIdBuf.length) break; } } iter.close(); sfw.close(); } dis.close(); w.close(); }
public static void addField( String indexDir, String newFieldName, FlamdexReader docReader, final String[] values) throws IOException { final int[] indices = new int[docReader.getNumDocs()]; for (int i = 0; i < indices.length; i++) { indices[i] = i; } log.debug("sorting"); Quicksortables.sort( new Quicksortable() { @Override public void swap(int i, int j) { Quicksortables.swap(indices, i, j); } @Override public int compare(int i, int j) { // Sorting logic: Primarily by value (String), secondarily by document ID (indices[i]) final String left = values[indices[i]]; final String right = values[indices[j]]; if (left.compareTo(right) < 0) { return -1; } else if (left.compareTo(right) > 0) { return 1; } else { // left == right if (indices[i] < indices[j]) { return -1; } else if (indices[i] > indices[j]) { return 1; } else { return 0; // Both value & doc ID match } } } }, values.length); log.debug("writing field " + newFieldName); final SimpleFlamdexWriter w = new SimpleFlamdexWriter(indexDir, docReader.getNumDocs(), false); final StringFieldWriter sfw = w.getStringFieldWriter(newFieldName, true); final IntArrayList docList = new IntArrayList(); docList.add(indices[0]); for (int i = 1; i < indices.length; ++i) { final String prev = values[indices[i - 1]]; final String cur = values[indices[i]]; if (cur.compareTo(prev) != 0) { sfw.nextTerm(prev); for (int j = 0; j < docList.size(); ++j) { sfw.nextDoc(docList.getInt(j)); } docList.clear(); } docList.add(indices[i]); } if (docList.size() > 0) { sfw.nextTerm(values[indices[indices.length - 1]]); for (int j = 0; j < docList.size(); ++j) { sfw.nextDoc(docList.getInt(j)); } } sfw.close(); w.close(); }
public static void addField(String dir, String fieldName, FlamdexReader r, final long[] cache) throws IOException { final File tempFile = new File(dir, "temp-" + fieldName + "-" + UUID.randomUUID() + ".intarray.bin"); try { final MMapBuffer buffer = new MMapBuffer( tempFile, 0, 4 * cache.length, FileChannel.MapMode.READ_WRITE, ByteOrder.nativeOrder()); try { final IntArray indices = buffer.memory().intArray(0, cache.length); for (int i = 0; i < cache.length; ++i) { indices.set(i, i); } log.debug("sorting"); Quicksortables.sort( new Quicksortable() { @Override public void swap(int i, int j) { final int t = indices.get(i); indices.set(i, indices.get(j)); indices.set(j, t); } @Override public int compare(int i, int j) { final long ii = cache[indices.get(i)]; final long ij = cache[indices.get(j)]; return ii < ij ? -1 : ii > ij ? 1 : indices.get(i) < indices.get(j) ? -1 : indices.get(i) > indices.get(j) ? 1 : 0; } }, cache.length); log.debug("writing field " + fieldName); final SimpleFlamdexWriter w = new SimpleFlamdexWriter(dir, r.getNumDocs(), false); final IntFieldWriter ifw = w.getIntFieldWriter(fieldName, true); long prev = 0; boolean prevInitialized = false; for (int i = 0; i < cache.length; ++i) { final long cur = cache[indices.get(i)]; if (!prevInitialized || cur != prev) { ifw.nextTerm(cur); prev = cur; prevInitialized = true; } ifw.nextDoc(indices.get(i)); } ifw.close(); w.close(); } finally { try { buffer.close(); } catch (IOException e) { log.error("error closing MMapBuffer", e); } } } finally { if (!tempFile.delete()) { log.warn("unable to delete temp file " + tempFile); } } }