Ejemplos de Token.type en Java

Lenguaje de programación: Java

Namespace/Package Name: org.apache.lucene.analysis

Clase / Tipo: Token

Método / Función: type

Ejemplos en hotexamples.com: 7

Java Token.type - 7 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.apache.lucene.analysis.Token.type extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

startOffset(12)

termText(11)

endOffset(10)

type(7)

term(7)

setPositionIncrement(7)

getPositionIncrement(6)

buffer(5)

length(4)

setType(3)

setPositionLength(3)

getPayload(2)

toString(2)

copyBuffer(2)

termLength(2)

termBuffer(2)

getPositionLength(2)

copyToWithoutPayloadClone(1)

setTermBuffer(1)

equals(1)

setStartOffset(1)

getFlags(1)

clear(1)

setEndOffset(1)

payload(1)

setOffset(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: CutLetterDigitFilter.java Proyecto: khaliyo/pet

  private Token nextToken(Token reusableToken) throws IOException {
    assert reusableToken != null;

    // 先使用上次留下来的。
    Token nextToken = tokenQueue.poll();
    if (nextToken != null) {
      return nextToken;
    }

    /*//在 TokenUtils.nextToken 已经调用了 inc
    if(!input.incrementToken()) {
    	return null;
    }*/

    /*TermAttribute termAtt = (TermAttribute)input.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = (OffsetAttribute)input.getAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = (TypeAttribute)input.getAttribute(TypeAttribute.class);

    nextToken = reusableToken.reinit(termAtt.termBuffer(), 0, termAtt.termLength(), offsetAtt.startOffset(), offsetAtt.endOffset(), typeAtt.type());*/

    nextToken = TokenUtils.nextToken(input, reusableToken);

    if (nextToken != null
        && (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type())
            || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))) {
      final char[] buffer = nextToken.buffer();
      final int length = nextToken.length();
      byte lastType = (byte) Character.getType(buffer[0]); // 与上次的字符是否同类
      int termBufferOffset = 0;
      int termBufferLength = 0;
      for (int i = 0; i < length; i++) {
        byte type = (byte) Character.getType(buffer[i]);
        if (type <= Character.MODIFIER_LETTER) {
          type = Character.LOWERCASE_LETTER;
        }
        if (type != lastType) { // 与上一次的不同
          addToken(nextToken, termBufferOffset, termBufferLength, lastType);

          termBufferOffset += termBufferLength;
          termBufferLength = 0;

          lastType = type;
        }

        termBufferLength++;
      }
      if (termBufferLength > 0) { // 最后一次
        addToken(nextToken, termBufferOffset, termBufferLength, lastType);
      }
      nextToken = tokenQueue.poll();
    }

    return nextToken;
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: CutLetterDigitFilter.java Proyecto: khaliyo/pet

 public boolean incrementToken() throws IOException {
   clearAttributes();
   Token token = nextToken(reusableToken);
   if (token != null) {
     termAtt.copyBuffer(token.buffer(), 0, token.length());
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     typeAtt.setType(token.type());
     return true;
   } else {
     end();
     return false;
   }
 }

Ejemplo n.º 3

Mostrar archivo

Archivo: SlowSynonymMap.java Proyecto: sudarshang/lucene-solr

  /**
   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
   * the tokens end up at the same position.
   *
   * <p>Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same
   * position) Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a
   * has posInc=n)
   */
  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 == null || lst2 == null) {
      if (lst2 != null) result.addAll(lst2);
      if (lst1 != null) result.addAll(lst1);
      return result;
    }

    int pos = 0;
    Iterator<Token> iter1 = lst1.iterator();
    Iterator<Token> iter2 = lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
    while (tok1 != null || tok2 != null) {
      while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
        tok.setPositionIncrement(pos1 - pos);
        result.add(tok);
        pos = pos1;
        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
        tok.setPositionIncrement(pos2 - pos);
        result.add(tok);
        pos = pos2;
        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
      }
    }
    return result;
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: TestTrimFilter.java Proyecto: jibaro/lucene_solr

 @Override
 public boolean incrementToken() throws IOException {
   if (index >= tokens.length) return false;
   else {
     clearAttributes();
     Token token = tokens[index++];
     termAtt.setEmpty().append(token);
     offsetAtt.setOffset(token.startOffset(), token.endOffset());
     posIncAtt.setPositionIncrement(token.getPositionIncrement());
     flagsAtt.setFlags(token.getFlags());
     typeAtt.setType(token.type());
     payloadAtt.setPayload(token.getPayload());
     return true;
   }
 }

Ejemplo n.º 5

Mostrar archivo

Archivo: IntegerTokenFilter.java Proyecto: bulias/community-edition

 public Token next() throws IOException {
   Token candidate;
   while ((candidate = baseTokeniser.next()) != null) {
     try {
       Integer integer = Integer.valueOf(candidate.termText());
       String valueString = NumericEncoder.encode(integer.intValue());
       Token integerToken =
           new Token(
               valueString, candidate.startOffset(), candidate.startOffset(), candidate.type());
       return integerToken;
     } catch (NumberFormatException e) {
       // just ignore and try the next one
     }
   }
   return null;
 }

Ejemplo n.º 6

Mostrar archivo

Archivo: StandardFilter.java Proyecto: Rashmos/Information_Retrieval_System

 public final Token next() throws IOException {
   Token localToken = this.input.next();
   if (localToken == null) return null;
   String str1 = localToken.termText();
   String str2 = localToken.type();
   if ((str2 == APOSTROPHE_TYPE) && ((str1.endsWith("'s")) || (str1.endsWith("'S"))))
     return new Token(
         str1.substring(0, str1.length() - 2),
         localToken.startOffset(),
         localToken.endOffset(),
         str2);
   if (str2 == ACRONYM_TYPE) {
     StringBuffer localStringBuffer = new StringBuffer();
     for (int i = 0; i < str1.length(); i++) {
       char c = str1.charAt(i);
       if (c != '.') localStringBuffer.append(c);
     }
     return new Token(
         localStringBuffer.toString(), localToken.startOffset(), localToken.endOffset(), str2);
   }
   return localToken;
 }

Ejemplo n.º 7

Mostrar archivo

Archivo: AlfrescoStandardFilter.java Proyecto: bulias/community-edition

  /**
   * Returns the next token in the stream, or null at EOS.
   *
   * <p>Removes <tt>'s</tt> from the end of words.
   *
   * <p>Removes dots from acronyms.
   *
   * <p>Splits host names ...
   */
  public final org.apache.lucene.analysis.Token next() throws java.io.IOException {
    if (hostTokens == null) {
      org.apache.lucene.analysis.Token t = input.next();

      if (t == null) return null;

      String text = t.termText();
      String type = t.type();

      if (type == APOSTROPHE_TYPE
          && // remove 's
          (text.endsWith("'s") || text.endsWith("'S"))) {
        return new org.apache.lucene.analysis.Token(
            text.substring(0, text.length() - 2), t.startOffset(), t.endOffset(), type);

      } else if (type == ACRONYM_TYPE) { // remove dots
        StringBuffer trimmed = new StringBuffer();
        for (int i = 0; i < text.length(); i++) {
          char c = text.charAt(i);
          if (c != '.') trimmed.append(c);
        }
        return new org.apache.lucene.analysis.Token(
            trimmed.toString(), t.startOffset(), t.endOffset(), type);

      } else if (type == HOST_TYPE) {
        // <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
        // There must be at least two tokens ....
        hostTokens = new LinkedList<org.apache.lucene.analysis.Token>();
        StringTokenizer tokeniser = new StringTokenizer(text, ".");
        int start = t.startOffset();
        int end;
        while (tokeniser.hasMoreTokens()) {
          String token = tokeniser.nextToken();
          end = start + token.length();
          hostTokens.offer(new org.apache.lucene.analysis.Token(token, start, end, ALPHANUM_TYPE));
          start = end + 1;
        }
        // check if we have an acronym ..... yes a.b.c ends up here ...

        if (text.length() == hostTokens.size() * 2 - 1) {
          hostTokens = null;
          // acronym
          StringBuffer trimmed = new StringBuffer();
          for (int i = 0; i < text.length(); i++) {
            char c = text.charAt(i);
            if (c != '.') trimmed.append(c);
          }
          return new org.apache.lucene.analysis.Token(
              trimmed.toString(), t.startOffset(), t.endOffset(), ALPHANUM_TYPE);
        } else {
          return hostTokens.remove();
        }
      } else {
        return t;
      }
    } else {
      org.apache.lucene.analysis.Token token = hostTokens.remove();
      if (hostTokens.isEmpty()) {
        hostTokens = null;
      }
      return token;
    }
  }