Ejemplo n.º 1
0
  public void verifyEquals(DirectoryReader r1, DirectoryReader r2, String idField)
      throws Throwable {
    if (VERBOSE) {
      System.out.println("\nr1 docs:");
      printDocs(r1);
      System.out.println("\nr2 docs:");
      printDocs(r2);
    }
    if (r1.numDocs() != r2.numDocs()) {
      assert false : "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs();
    }
    boolean hasDeletes = !(r1.maxDoc() == r2.maxDoc() && r1.numDocs() == r1.maxDoc());

    int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping

    // create mapping from id2 space to id2 based on idField
    final Fields f1 = MultiFields.getFields(r1);
    if (f1 == null) {
      // make sure r2 is empty
      assertNull(MultiFields.getFields(r2));
      return;
    }
    final Terms terms1 = f1.terms(idField);
    if (terms1 == null) {
      assertTrue(
          MultiFields.getFields(r2) == null || MultiFields.getFields(r2).terms(idField) == null);
      return;
    }
    final TermsEnum termsEnum = terms1.iterator(null);

    final Bits liveDocs1 = MultiFields.getLiveDocs(r1);
    final Bits liveDocs2 = MultiFields.getLiveDocs(r2);

    Fields fields = MultiFields.getFields(r2);
    if (fields == null) {
      // make sure r1 is in fact empty (eg has only all
      // deleted docs):
      Bits liveDocs = MultiFields.getLiveDocs(r1);
      DocsEnum docs = null;
      while (termsEnum.next() != null) {
        docs = TestUtil.docs(random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE);
        while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          fail("r1 is not empty but r2 is");
        }
      }
      return;
    }
    Terms terms2 = fields.terms(idField);
    TermsEnum termsEnum2 = terms2.iterator(null);

    DocsEnum termDocs1 = null;
    DocsEnum termDocs2 = null;

    while (true) {
      BytesRef term = termsEnum.next();
      // System.out.println("TEST: match id term=" + term);
      if (term == null) {
        break;
      }

      termDocs1 = TestUtil.docs(random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE);
      if (termsEnum2.seekExact(term)) {
        termDocs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE);
      } else {
        termDocs2 = null;
      }

      if (termDocs1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        // This doc is deleted and wasn't replaced
        assertTrue(termDocs2 == null || termDocs2.nextDoc() == DocIdSetIterator.NO_MORE_DOCS);
        continue;
      }

      int id1 = termDocs1.docID();
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs1.nextDoc());

      assertTrue(termDocs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
      int id2 = termDocs2.docID();
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs2.nextDoc());

      r2r1[id2] = id1;

      // verify stored fields are equivalent
      try {
        verifyEquals(r1.document(id1), r2.document(id2));
      } catch (Throwable t) {
        System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
        System.out.println("  d1=" + r1.document(id1));
        System.out.println("  d2=" + r2.document(id2));
        throw t;
      }

      try {
        // verify term vectors are equivalent
        verifyEquals(r1.getTermVectors(id1), r2.getTermVectors(id2));
      } catch (Throwable e) {
        System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
        Fields tv1 = r1.getTermVectors(id1);
        System.out.println("  d1=" + tv1);
        if (tv1 != null) {
          DocsAndPositionsEnum dpEnum = null;
          DocsEnum dEnum = null;
          for (String field : tv1) {
            System.out.println("    " + field + ":");
            Terms terms3 = tv1.terms(field);
            assertNotNull(terms3);
            TermsEnum termsEnum3 = terms3.iterator(null);
            BytesRef term2;
            while ((term2 = termsEnum3.next()) != null) {
              System.out.println(
                  "      " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
              dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
              if (dpEnum != null) {
                assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dpEnum.freq();
                System.out.println("        doc=" + dpEnum.docID() + " freq=" + freq);
                for (int posUpto = 0; posUpto < freq; posUpto++) {
                  System.out.println("          pos=" + dpEnum.nextPosition());
                }
              } else {
                dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                assertNotNull(dEnum);
                assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dEnum.freq();
                System.out.println("        doc=" + dEnum.docID() + " freq=" + freq);
              }
            }
          }
        }

        Fields tv2 = r2.getTermVectors(id2);
        System.out.println("  d2=" + tv2);
        if (tv2 != null) {
          DocsAndPositionsEnum dpEnum = null;
          DocsEnum dEnum = null;
          for (String field : tv2) {
            System.out.println("    " + field + ":");
            Terms terms3 = tv2.terms(field);
            assertNotNull(terms3);
            TermsEnum termsEnum3 = terms3.iterator(null);
            BytesRef term2;
            while ((term2 = termsEnum3.next()) != null) {
              System.out.println(
                  "      " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
              dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
              if (dpEnum != null) {
                assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dpEnum.freq();
                System.out.println("        doc=" + dpEnum.docID() + " freq=" + freq);
                for (int posUpto = 0; posUpto < freq; posUpto++) {
                  System.out.println("          pos=" + dpEnum.nextPosition());
                }
              } else {
                dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS);
                assertNotNull(dEnum);
                assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                final int freq = dEnum.freq();
                System.out.println("        doc=" + dEnum.docID() + " freq=" + freq);
              }
            }
          }
        }

        throw e;
      }
    }

    // System.out.println("TEST: done match id");

    // Verify postings
    // System.out.println("TEST: create te1");
    final Fields fields1 = MultiFields.getFields(r1);
    final Iterator<String> fields1Enum = fields1.iterator();
    final Fields fields2 = MultiFields.getFields(r2);
    final Iterator<String> fields2Enum = fields2.iterator();

    String field1 = null, field2 = null;
    TermsEnum termsEnum1 = null;
    termsEnum2 = null;
    DocsEnum docs1 = null, docs2 = null;

    // pack both doc and freq into single element for easy sorting
    long[] info1 = new long[r1.numDocs()];
    long[] info2 = new long[r2.numDocs()];

    for (; ; ) {
      BytesRef term1 = null, term2 = null;

      // iterate until we get some docs
      int len1;
      for (; ; ) {
        len1 = 0;
        if (termsEnum1 == null) {
          if (!fields1Enum.hasNext()) {
            break;
          }
          field1 = fields1Enum.next();
          Terms terms = fields1.terms(field1);
          if (terms == null) {
            continue;
          }
          termsEnum1 = terms.iterator(null);
        }
        term1 = termsEnum1.next();
        if (term1 == null) {
          // no more terms in this field
          termsEnum1 = null;
          continue;
        }

        // System.out.println("TEST: term1=" + term1);
        docs1 = TestUtil.docs(random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS);
        while (docs1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          int d = docs1.docID();
          int f = docs1.freq();
          info1[len1] = (((long) d) << 32) | f;
          len1++;
        }
        if (len1 > 0) break;
      }

      // iterate until we get some docs
      int len2;
      for (; ; ) {
        len2 = 0;
        if (termsEnum2 == null) {
          if (!fields2Enum.hasNext()) {
            break;
          }
          field2 = fields2Enum.next();
          Terms terms = fields2.terms(field2);
          if (terms == null) {
            continue;
          }
          termsEnum2 = terms.iterator(null);
        }
        term2 = termsEnum2.next();
        if (term2 == null) {
          // no more terms in this field
          termsEnum2 = null;
          continue;
        }

        // System.out.println("TEST: term1=" + term1);
        docs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS);
        while (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          int d = r2r1[docs2.docID()];
          int f = docs2.freq();
          info2[len2] = (((long) d) << 32) | f;
          len2++;
        }
        if (len2 > 0) break;
      }

      assertEquals(len1, len2);
      if (len1 == 0) break; // no more terms

      assertEquals(field1, field2);
      assertTrue(term1.bytesEquals(term2));

      if (!hasDeletes) assertEquals(termsEnum1.docFreq(), termsEnum2.docFreq());

      assertEquals("len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes, term1, term2);

      // sort info2 to get it into ascending docid
      Arrays.sort(info2, 0, len2);

      // now compare
      for (int i = 0; i < len1; i++) {
        assertEquals(
            "i="
                + i
                + " len="
                + len1
                + " d1="
                + (info1[i] >>> 32)
                + " f1="
                + (info1[i] & Integer.MAX_VALUE)
                + " d2="
                + (info2[i] >>> 32)
                + " f2="
                + (info2[i] & Integer.MAX_VALUE)
                + " field="
                + field1
                + " term="
                + term1.utf8ToString(),
            info1[i],
            info2[i]);
      }
    }
  }
Ejemplo n.º 2
0
  /**
   * Given a file, return a VersionedTranslogStream based on an optionally-existing header in the
   * file. If the file does not exist, or has zero length, returns the latest version. If the header
   * does not exist, assumes Version 0 of the translog file format.
   */
  public static ImmutableTranslogReader open(
      ChannelReference channelReference, Checkpoint checkpoint, String translogUUID)
      throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation
        : "expected generation: "
            + channelReference.getGeneration()
            + " but got: "
            + checkpoint.generation;

    try {
      if (checkpoint.offset == 0
          && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
        return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
      }

      InputStreamStreamInput headerStream =
          new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
      // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
      // header, in binary this looks like:
      //
      // binary: 0011 1111 1101 0111 0110 1100 0001 0111
      // hex   :    3    f    d    7    6    c    1    7
      //
      // With version 0 of the translog, the first byte is the
      // Operation.Type, which will always be between 0-4, so we know if
      // we grab the first byte, it can be:
      // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
      // 0x00 => version 0 of the translog
      //
      // otherwise the first byte of the translog is corrupted and we
      // should bail
      byte b1 = headerStream.readByte();
      if (b1 == LUCENE_CODEC_HEADER_BYTE) {
        // Read 3 more bytes, meaning a whole integer has been read
        byte b2 = headerStream.readByte();
        byte b3 = headerStream.readByte();
        byte b4 = headerStream.readByte();
        // Convert the 4 bytes that were read into an integer
        int header =
            ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
        // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
        // ourselves here, because it allows us to read the first
        // byte separately
        if (header != CodecUtil.CODEC_MAGIC) {
          throw new TranslogCorruptedException(
              "translog looks like version 1 or later, but has corrupted header");
        }
        // Confirm the rest of the header using CodecUtil, extracting
        // the translog version
        int version =
            CodecUtil.checkHeaderNoMagic(
                new InputStreamDataInput(headerStream),
                TranslogWriter.TRANSLOG_CODEC,
                1,
                Integer.MAX_VALUE);
        switch (version) {
          case TranslogWriter.VERSION_CHECKSUMS:
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
                : "expected unknown op count but got: " + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path)
                : "offset("
                    + checkpoint.offset
                    + ") != file_size("
                    + Files.size(path)
                    + ") for: "
                    + path;
            // legacy - we still have to support it somehow
            return new LegacyTranslogReaderBase(
                channelReference.getGeneration(),
                channelReference,
                CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC),
                checkpoint.offset);
          case TranslogWriter.VERSION_CHECKPOINTS:
            assert path.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)
                : "new file ends with old suffix: " + path;
            assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT
                : "expected at least 0 operatin but got: " + checkpoint.numOps;
            assert checkpoint.offset <= channel.size()
                : "checkpoint is inconsistent with channel length: "
                    + channel.size()
                    + " "
                    + checkpoint;
            int len = headerStream.readInt();
            if (len > channel.size()) {
              throw new TranslogCorruptedException("uuid length can't be larger than the translog");
            }
            BytesRef ref = new BytesRef(len);
            ref.length = len;
            headerStream.read(ref.bytes, ref.offset, ref.length);
            BytesRef uuidBytes = new BytesRef(translogUUID);
            if (uuidBytes.bytesEquals(ref) == false) {
              throw new TranslogCorruptedException(
                  "expected shard UUID ["
                      + uuidBytes
                      + "] but got: ["
                      + ref
                      + "] this translog file belongs to a different translog");
            }
            return new ImmutableTranslogReader(
                channelReference.getGeneration(),
                channelReference,
                ref.length
                    + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                    + RamUsageEstimator.NUM_BYTES_INT,
                checkpoint.offset,
                checkpoint.numOps);
          default:
            throw new TranslogCorruptedException(
                "No known translog stream version: " + version + " path:" + path);
        }
      } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
        assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT
            : "expected unknown op count but got: " + checkpoint.numOps;
        assert checkpoint.offset == Files.size(path)
            : "offset("
                + checkpoint.offset
                + ") != file_size("
                + Files.size(path)
                + ") for: "
                + path;
        return new LegacyTranslogReader(
            channelReference.getGeneration(), channelReference, checkpoint.offset);
      } else {
        throw new TranslogCorruptedException(
            "Invalid first byte in translog file, got: "
                + Long.toHexString(b1)
                + ", expected 0x00 or 0x3f");
      }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
      throw new TranslogCorruptedException("Translog header corrupted", e);
    }
  }
Ejemplo n.º 3
0
 @Override
 public boolean equals(Object _other) {
   FieldAndTerm other = (FieldAndTerm) _other;
   return other.field.equals(field) && term.bytesEquals(other.term);
 }