public boolean append(BlockCursor cursor) { // the extra BYTE here is for the null flag int writableBytes = sliceOutput.writableBytes() - SIZE_OF_BYTE; boolean isNull = cursor.isNull(); if (type == Type.FIXED_INT_64) { if (writableBytes < SIZE_OF_LONG) { return false; } positionOffsets.add(sliceOutput.size()); sliceOutput.writeByte(isNull ? 1 : 0); sliceOutput.appendLong(isNull ? 0 : cursor.getLong()); } else if (type == Type.DOUBLE) { if (writableBytes < SIZE_OF_DOUBLE) { return false; } positionOffsets.add(sliceOutput.size()); sliceOutput.writeByte(isNull ? 1 : 0); sliceOutput.appendDouble(isNull ? 0 : cursor.getDouble()); } else if (type == Type.BOOLEAN) { if (writableBytes < SIZE_OF_BYTE) { return false; } positionOffsets.add(sliceOutput.size()); sliceOutput.writeByte(isNull ? 1 : 0); sliceOutput.writeByte(!isNull && cursor.getBoolean() ? 1 : 0); } else if (type == Type.VARIABLE_BINARY) { int sliceLength = isNull ? 0 : getVariableBinaryLength(cursor.getRawSlice(), cursor.getRawOffset()); if (writableBytes < SIZE_OF_INT + sliceLength) { return false; } int startingOffset = sliceOutput.size(); positionOffsets.add(startingOffset); sliceOutput.writeByte(isNull ? 1 : 0); sliceOutput.appendInt(sliceLength + SIZE_OF_BYTE + SIZE_OF_INT); if (!isNull) { sliceOutput.writeBytes( cursor.getRawSlice(), cursor.getRawOffset() + SIZE_OF_BYTE + SIZE_OF_INT, sliceLength); } } else { throw new IllegalArgumentException("Unsupported type " + type); } return true; }
public static void addField( String indexDir, String newFieldName, FlamdexReader docReader, final String[] values) throws IOException { final int[] indices = new int[docReader.getNumDocs()]; for (int i = 0; i < indices.length; i++) { indices[i] = i; } log.debug("sorting"); Quicksortables.sort( new Quicksortable() { @Override public void swap(int i, int j) { Quicksortables.swap(indices, i, j); } @Override public int compare(int i, int j) { // Sorting logic: Primarily by value (String), secondarily by document ID (indices[i]) final String left = values[indices[i]]; final String right = values[indices[j]]; if (left.compareTo(right) < 0) { return -1; } else if (left.compareTo(right) > 0) { return 1; } else { // left == right if (indices[i] < indices[j]) { return -1; } else if (indices[i] > indices[j]) { return 1; } else { return 0; // Both value & doc ID match } } } }, values.length); log.debug("writing field " + newFieldName); final SimpleFlamdexWriter w = new SimpleFlamdexWriter(indexDir, docReader.getNumDocs(), false); final StringFieldWriter sfw = w.getStringFieldWriter(newFieldName, true); final IntArrayList docList = new IntArrayList(); docList.add(indices[0]); for (int i = 1; i < indices.length; ++i) { final String prev = values[indices[i - 1]]; final String cur = values[indices[i]]; if (cur.compareTo(prev) != 0) { sfw.nextTerm(prev); for (int j = 0; j < docList.size(); ++j) { sfw.nextDoc(docList.getInt(j)); } docList.clear(); } docList.add(indices[i]); } if (docList.size() > 0) { sfw.nextTerm(values[indices[indices.length - 1]]); for (int j = 0; j < docList.size(); ++j) { sfw.nextDoc(docList.getInt(j)); } } sfw.close(); w.close(); }
/** * loads multi-value facet data. This method uses a workarea to prepare loading. * * @param fieldName * @param reader * @param listFactory * @param workArea * @throws IOException */ public void load( String fieldName, IndexReader reader, TermListFactory<T> listFactory, WorkArea workArea) throws IOException { long t0 = System.currentTimeMillis(); int maxdoc = reader.maxDoc(); BufferedLoader loader = getBufferedLoader(maxdoc, workArea); TermEnum tenum = null; TermDocs tdoc = null; TermValueList<T> list = (listFactory == null ? (TermValueList<T>) new TermStringList() : listFactory.createTermList()); IntArrayList minIDList = new IntArrayList(); IntArrayList maxIDList = new IntArrayList(); IntArrayList freqList = new IntArrayList(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = getNegativeValueCount(reader, fieldName.intern()); int t = 0; // current term number list.add(null); minIDList.add(-1); maxIDList.add(-1); freqList.add(0); t++; _overflow = false; try { tdoc = reader.termDocs(); tenum = reader.terms(new Term(fieldName, "")); if (tenum != null) { do { Term term = tenum.term(); if (term == null || !fieldName.equals(term.field())) break; String val = term.text(); if (val != null) { list.add(val); tdoc.seek(tenum); // freqList.add(tenum.docFreq()); // removed because the df doesn't take into account // the num of deletedDocs int df = 0; int minID = -1; int maxID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (tdoc.next()) { df++; int docid = tdoc.doc(); if (!loader.add(docid, valId)) logOverflow(fieldName); minID = docid; bitset.fastSet(docid); while (tdoc.next()) { df++; docid = tdoc.doc(); if (!loader.add(docid, valId)) logOverflow(fieldName); bitset.fastSet(docid); } maxID = docid; } freqList.add(df); minIDList.add(minID); maxIDList.add(maxID); } t++; } while (tenum.next()); } } finally { try { if (tdoc != null) { tdoc.close(); } } finally { if (tenum != null) { tenum.close(); } } } list.seal(); try { _nestedArray.load(maxdoc + 1, loader); } catch (IOException e) { throw e; } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.toString(), e); } this.valArray = list; this.freqs = freqList.toIntArray(); this.minIDs = minIDList.toIntArray(); this.maxIDs = maxIDList.toIntArray(); int doc = 0; while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true)) { ++doc; } if (doc <= maxdoc) { this.minIDs[0] = doc; doc = maxdoc; while (doc > 0 && !_nestedArray.contains(doc, 0, true)) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxdoc + 1 - (int) bitset.cardinality(); }