Beispiel #1
1
  /** Return the next element of the json document */
  private String getNextNode() {
    final int popState;

    switch (states.peek()) {
      case ARRAY:
        switch (rand.nextInt(9)) {
          case 0: // String case
            final String val = "stepha" + this.getWhitespace() + "n" + this.getWhitespace() + "e";
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add(val);
            types.add(ExtendedJsonTokenizer.getTokenTypes()[LITERAL]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_STRING);
            return "\"" + val + "\"" + this.getWhitespace() + ",";
          case 1: // DOUBLE case
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add("34.560e-9");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NUMBER]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_DOUBLE);
            return "34.560e-9" + this.getWhitespace() + ",";
          case 2: // LONG case
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add("34560e-9");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NUMBER]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_LONG);
            return "34560e-9" + this.getWhitespace() + ",";
          case 3: // true case
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add("true");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[TRUE]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_BOOLEAN);
            return "true" + this.getWhitespace() + ",";
          case 4: // false case
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add("false");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[FALSE]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_BOOLEAN);
            return "false" + this.getWhitespace() + ",";
          case 5: // null case
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            images.add("null");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NULL]);
            incr.add(1);
            datatypes.add(XSDDatatype.XSD_STRING);
            return "null" + this.getWhitespace() + ",";
          case 6: // nested array case
            if (states.size() <= MAX_DEPTH) {
              this.addToLastNode(1);
              this.incrNodeObjectPath();
              states.add(ARRAY);
              return "[";
            }
            return "";
          case 7: // nested object case
            if (states.size() <= MAX_DEPTH) {
              this.addToLastNode(1);
              this.incrNodeObjectPath();
              states.add(ARRAY_OBJECT);
              return "{";
            }
            return "";
          case 8: // closing array case
            this.decrNodeObjectPath();
            popState = states.pop();
            if (popState != ARRAY) {
              shouldFail = true;
            }
            // Remove previous comma, this is not allowed
            final int comma = sb.lastIndexOf(",");
            if (comma != -1 && sb.substring(comma + 1).matches("\\s*")) {
              sb.deleteCharAt(comma);
            }
            return "],";
        }
      case ARRAY_OBJECT:
      case OBJECT_ATT:
        switch (rand.nextInt(3)) {
          case 0: // new object field
            types.add(ExtendedJsonTokenizer.getTokenTypes()[LITERAL]);
            images.add("ste ph ane");
            incr.add(1);
            this.addToLastNode(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            datatypes.add(JSONDatatype.JSON_FIELD);

            states.push(OBJECT_VAL);
            return "\"ste ph ane\"" + this.getWhitespace() + ":";
          case 1: // close object
            if (states.peek() == OBJECT_ATT && nestedObjs > 0) {
              this.decrNodeObjectPath();
              nestedObjs--;
            }
            this.decrNodeObjectPath();
            popState = states.pop();
            if (popState != OBJECT_ATT && popState != ARRAY_OBJECT) {
              shouldFail = true;
            }
            // Remove previous comma, this is not allowed
            final int comma = sb.lastIndexOf(",");
            if (comma != -1 && sb.substring(comma + 1).matches("\\s*")) {
              sb.deleteCharAt(comma);
            }
            return states.empty() ? "}" : "},";
          case 2: // Datatype
            if (getLastNode() >= 0) {
              // this nested object cannot be a datatype object because other things have been added
              // to it
              return "";
            }
            final String field;
            if (states.isEmpty()) {
              // datatype object at the root are not possible
              shouldFail = true;
              field = "{";
            } else if (states.peek() == OBJECT_ATT) {
              // field name
              this.addToLastNode(1);
              field = "\"field\":{";
              types.add(ExtendedJsonTokenizer.getTokenTypes()[LITERAL]);
              images.add("field");
              incr.add(1);
              nodes.add(IntsRef.deepCopyOf(curNodePath));
              datatypes.add(JSONDatatype.JSON_FIELD);
              // value
              this.incrNodeObjectPath();
              this.setLastNode(0);
            } else if (states.peek() == ARRAY) {
              this.addToLastNode(1);
              field = "{";
            } else if (states.peek() == ARRAY_OBJECT) {
              this.decrNodeObjectPath();
              field = "";
            } else {
              // should not happen
              throw new IllegalStateException("Received unknown state=" + states.peek());
            }

            types.add(ExtendedJsonTokenizer.getTokenTypes()[LITERAL]);
            images.add("Luke Skywalker");
            incr.add(1);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            datatypes.add("jedi");
            // close datatype object
            if (states.peek() == ARRAY_OBJECT) {
              popState = states.pop();
            } else {
              this.decrNodeObjectPath();
            }
            return field
                + this.getWhitespace()
                + "\""
                + ExtendedJsonTokenizer.DATATYPE_LABEL
                + "\":"
                + this.getWhitespace()
                + "\"jedi\","
                + "\""
                + ExtendedJsonTokenizer.DATATYPE_VALUES
                + "\":"
                + this.getWhitespace()
                + "\"Luke Skywalker\""
                + this.getWhitespace()
                + "},";
        }
      case OBJECT_VAL:
        switch (rand.nextInt(8)) {
          case 0: // String
            return this.doValString(
                "stepha" + this.getWhitespace() + "n" + this.getWhitespace() + "e");
          case 1: // DOUBLE case
            images.add("34.560e-9");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NUMBER]);
            incr.add(1);
            this.incrNodeObjectPath();
            this.setLastNode(0);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            this.decrNodeObjectPath();
            datatypes.add(XSDDatatype.XSD_DOUBLE);

            states.pop(); // remove OBJECT_VAL state
            return "34.560e-9" + this.getWhitespace() + ",";
          case 2: // LONG case
            images.add("34560e-9");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NUMBER]);
            incr.add(1);
            this.incrNodeObjectPath();
            this.setLastNode(0);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            this.decrNodeObjectPath();
            datatypes.add(XSDDatatype.XSD_LONG);

            states.pop(); // remove OBJECT_VAL state
            return "34560e-9" + this.getWhitespace() + ",";
          case 3: // True
            images.add("true");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[TRUE]);
            incr.add(1);
            this.incrNodeObjectPath();
            this.setLastNode(0);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            this.decrNodeObjectPath();
            datatypes.add(XSDDatatype.XSD_BOOLEAN);

            states.pop(); // remove OBJECT_VAL state
            return "true" + this.getWhitespace() + ",";
          case 4: // False
            images.add("false");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[FALSE]);
            incr.add(1);
            this.incrNodeObjectPath();
            this.setLastNode(0);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            this.decrNodeObjectPath();
            datatypes.add(XSDDatatype.XSD_BOOLEAN);

            states.pop(); // remove OBJECT_VAL state
            return "false" + this.getWhitespace() + ",";
          case 5: // NULL
            images.add("null");
            types.add(ExtendedJsonTokenizer.getTokenTypes()[NULL]);
            incr.add(1);
            this.incrNodeObjectPath();
            this.setLastNode(0);
            nodes.add(IntsRef.deepCopyOf(curNodePath));
            this.decrNodeObjectPath();
            datatypes.add(XSDDatatype.XSD_STRING);

            states.pop(); // remove OBJECT_VAL state
            return "null" + this.getWhitespace() + ",";
          case 6: // New array
            if (states.size() <= MAX_DEPTH) {
              states.pop(); // remove OBJECT_VAL state
              this.incrNodeObjectPath();
              states.add(ARRAY);
              return "[";
            }
            return this.doValString("");
          case 7: // new Object
            if (states.size() <= MAX_DEPTH) {
              states.pop(); // remove OBJECT_VAL state
              // Two incrementations, because the object introduce a "blank" node
              nestedObjs++;
              this.incrNodeObjectPath();
              this.setLastNode(0);
              this.incrNodeObjectPath();
              states.add(OBJECT_ATT);
              return "{";
            }
            return this.doValString("");
        }
      default:
        throw new IllegalStateException("Got unknown lexical state: " + states.peek());
    }
  }
Beispiel #2
0
  /** Create a random Json document with random values */
  public String getRandomJson(int nbNodes) {
    // init
    sb.setLength(0);
    sb.append("{");
    states.clear();
    states.add(OBJECT_ATT);
    images.clear();
    nodes.clear();
    incr.clear();
    datatypes.clear();
    types.clear();
    curNodePath.length = 1;
    curNodePath.offset = 0;
    Arrays.fill(curNodePath.ints, -1);
    shouldFail = false;
    nestedObjs = 0;

    // <= so that when nbNodes == 1, the json is still valid
    /*
     * the generated json might be uncomplete, if states is not empty, and
     * the maximum number of nodes has been reached.
     */
    for (final int i = 0; i <= nbNodes && !states.empty(); nbNodes++) {
      sb.append(this.getWhitespace()).append(this.getNextNode()).append(this.getWhitespace());
    }
    shouldFail = shouldFail ? true : !states.empty();
    return sb.toString();
  }
 private InputOutput<T> setResult() {
   if (upto == 0) {
     return null;
   } else {
     current.length = upto - 1;
     result.output = output[upto];
     return result;
   }
 }
Beispiel #4
0
  /** Add a string value to an object entry */
  private String doValString(final String val) {
    images.add(val);
    types.add(ExtendedJsonTokenizer.getTokenTypes()[LITERAL]);
    incr.add(1);
    this.incrNodeObjectPath();
    this.setLastNode(0);
    nodes.add(IntsRef.deepCopyOf(curNodePath));
    this.decrNodeObjectPath();
    datatypes.add(XSDDatatype.XSD_STRING);

    states.pop(); // remove OBJECT_VAL state
    return "\"" + val + "\"" + this.getWhitespace() + ",";
  }
    private void decodeTermFreqs() throws IOException {
      // logger.debug("Decode Term Freq in Node: {}", this.hashCode());
      // logger.debug("Decode Term Freq in Node at {}", in.getFilePointer());
      in.readBytes(termFreqCompressedBuffer.bytes, 0, termFreqCompressedBufferLength);
      termFreqCompressedBuffer.offset = 0;
      termFreqCompressedBuffer.length = termFreqCompressedBufferLength;
      nodDecompressor.decompress(termFreqCompressedBuffer, termFreqBuffer);
      // set length limit based on block size, as certain decompressor with
      // large window size can set it larger than the blockSize, e.g., AFor
      termFreqBuffer.length = termFreqBlockSize;

      termFreqReadPending = false;
    }
    private void decodeNodeLengths() throws IOException {
      // logger.debug("Decode Nodes Length: {}", this.hashCode());
      // logger.debug("Decode Nodes Length at {}", in.getFilePointer());
      in.readBytes(nodLenCompressedBuffer.bytes, 0, nodLenCompressedBufferLength);
      nodLenCompressedBuffer.offset = 0;
      nodLenCompressedBuffer.length = nodLenCompressedBufferLength;
      nodDecompressor.decompress(nodLenCompressedBuffer, nodLenBuffer);
      // set length limit based on block size, as certain decompressor with
      // large window size can set it larger than the blockSize, e.g., AFor
      nodLenBuffer.length = nodLenBlockSize;

      nodLenReadPending = false;
    }
    /**
     * Decode delta of the node.
     *
     * <p>If a new doc has been read (currentNode.length == 0), then update currentNode offset and
     * length. Otherwise, perform delta decoding.
     *
     * <p>Perform delta decoding while current node id and previous node id are equals.
     */
    private final void deltaDecoding() {
      final int[] nodBufferInts = nodBuffer.ints;
      // increment length by one
      final int nodLength = nodLenBuffer.ints[nodLenBuffer.offset++] + 1;
      final int nodOffset = nodBuffer.offset;
      final int nodEnd = nodOffset + nodLength;

      final int currentNodeOffset = currentNode.offset;
      final int currentNodeEnd = currentNodeOffset + currentNode.length;

      for (int i = nodOffset, j = currentNodeOffset; i < nodEnd && j < currentNodeEnd; i++, j++) {
        nodBufferInts[i] += nodBufferInts[j];
        // if node ids are different, then stop decoding
        if (nodBufferInts[i] != nodBufferInts[j]) {
          break;
        }
      }

      // increment node buffer offset
      nodBuffer.offset += nodLength;
      // update last node offset and length
      currentNode.offset = nodOffset;
      currentNode.length = nodLength;
    }
    @Override
    protected void readHeader() throws IOException {
      // logger.debug("Read Nod header: {}", this.hashCode());
      // logger.debug("Nod header start at {}", in.getFilePointer());

      // read blockSize and check buffer size
      nodLenBlockSize = in.readVInt();
      // ensure that the output buffer has the minimum size required
      final int nodLenBufferLength =
          this.getMinimumBufferSize(nodLenBlockSize, nodDecompressor.getWindowSize());
      nodLenBuffer = ArrayUtils.grow(nodLenBuffer, nodLenBufferLength);
      // logger.debug("Read Nod length block size: {}", nodLenblockSize);

      nodBlockSize = in.readVInt();
      // ensure that the output buffer has the minimum size required
      final int nodBufferLength =
          this.getMinimumBufferSize(nodBlockSize, nodDecompressor.getWindowSize());
      nodBuffer = ArrayUtils.grow(nodBuffer, nodBufferLength);
      // logger.debug("Read Nod block size: {}", nodBlockSize);

      termFreqBlockSize = in.readVInt();
      // ensure that the output buffer has the minimum size required
      final int termFreqBufferLength =
          this.getMinimumBufferSize(termFreqBlockSize, nodDecompressor.getWindowSize());
      termFreqBuffer = ArrayUtils.grow(termFreqBuffer, termFreqBufferLength);
      // logger.debug("Read Term Freq In Node block size: {}", termFreqblockSize);

      // read size of each compressed data block and check buffer size
      nodLenCompressedBufferLength = in.readVInt();
      nodLenCompressedBuffer =
          ArrayUtils.grow(nodLenCompressedBuffer, nodLenCompressedBufferLength);
      nodLenReadPending = true;

      nodCompressedBufferLength = in.readVInt();
      nodCompressedBuffer = ArrayUtils.grow(nodCompressedBuffer, nodCompressedBufferLength);
      nodReadPending = true;

      termFreqCompressedBufferLength = in.readVInt();
      termFreqCompressedBuffer =
          ArrayUtils.grow(termFreqCompressedBuffer, termFreqCompressedBufferLength);
      termFreqReadPending = true;

      // decode node lengths
      this.decodeNodeLengths();

      // copy reference of node buffer
      currentNode.ints = nodBuffer.ints;
    }
Beispiel #9
0
  /**
   * Increments the byte buffer to the next String in binary order after s that will not put the
   * machine into a reject state. If such a string does not exist, returns false.
   *
   * <p>The correctness of this method depends upon the automaton being deterministic, and having no
   * transitions to dead states.
   *
   * @return true if more possible solutions exist for the DFA
   */
  private boolean nextString() {
    int state;
    int pos = 0;
    savedStates.grow(seekBytesRef.length + 1);
    final int[] states = savedStates.ints;
    states[0] = runAutomaton.getInitialState();

    while (true) {
      curGen++;
      linear = false;
      // walk the automaton until a character is rejected.
      for (state = states[pos]; pos < seekBytesRef.length; pos++) {
        visited[state] = curGen;
        int nextState = runAutomaton.step(state, seekBytesRef.bytes[pos] & 0xff);
        if (nextState == -1) break;
        states[pos + 1] = nextState;
        // we found a loop, record it for faster enumeration
        if (!finite && !linear && visited[nextState] == curGen) {
          setLinear(pos);
        }
        state = nextState;
      }

      // take the useful portion, and the last non-reject state, and attempt to
      // append characters that will match.
      if (nextString(state, pos)) {
        return true;
      } else {
        /* no more solutions exist from this useful portion, backtrack */
        if ((pos = backtrack(pos)) < 0) /* no more solutions at all */ return false;
        final int newState = runAutomaton.step(states[pos], seekBytesRef.bytes[pos] & 0xff);
        if (newState >= 0 && runAutomaton.isAccept(newState))
          /* String is good to go as-is */
          return true;
        /* else advance further */
        // TODO: paranoia? if we backtrack thru an infinite DFA, the loop detection is important!
        // for now, restart from scratch for all infinite DFAs
        if (!finite) pos = 0;
      }
    }
  }
    @Override
    public void initBlock() {
      nodLenBuffer.offset = nodLenBuffer.length = 0;
      nodBuffer.offset = nodBuffer.length = 0;
      termFreqBuffer.offset = termFreqBuffer.length = 0;
      this.resetCurrentNode();

      nodLenReadPending = true;
      nodReadPending = true;
      termFreqReadPending = true;

      nodLenCompressedBufferLength = 0;
      nodCompressedBufferLength = 0;
      termFreqCompressedBufferLength = 0;
    }
  @Override
  public void compress(final IntsRef input, final BytesRef output) {
    assert input.ints.length % 32 == 0;
    final int[] uncompressedData = input.ints;
    final byte[] compressedData = output.bytes;

    // prepare the input buffer before starting the compression
    this.prepareInputBuffer(input);

    while (input.offset < input.length) {
      for (final long compressorCode :
          this.frameCompressorCodes(uncompressedData, input.offset, input.length)) {
        compressedData[output.offset] = (byte) compressorCode;
        this.compressors[(int) compressorCode].compress(input, output);
      }
    }

    // flip buffer
    input.offset = 0;
    output.length = output.offset;
    output.offset = 0;
  }
 /**
  * doFloor controls the behavior of advance: if it's true doFloor is true, advance positions to
  * the biggest term before target.
  */
 public IntsRefFSTEnum(FST<T> fst) {
   super(fst);
   result.input = current;
   current.offset = 1;
 }
 @Override
 protected void grow() {
   current.ints = ArrayUtil.grow(current.ints, upto + 1);
 }
 public void resetCurrentNode() {
   currentNode.offset = currentNode.length = 0;
 }