Exemplos de Token.type em Java

Linguagem de programação: Java

Espaço para nome / nome do pacote: org.apache.lucene.analysis

Classe / Tipo: Token

Método / Função: type

Exemplos em hotexamples.com: 7

Token.type em Java - 7 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de org.apache.lucene.analysis.Token.type em Java extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

startOffset(12)

termText(11)

endOffset(10)

type(7)

term(7)

setPositionIncrement(7)

getPositionIncrement(6)

buffer(5)

length(4)

setType(3)

setPositionLength(3)

getPayload(2)

toString(2)

copyBuffer(2)

termLength(2)

termBuffer(2)

getPositionLength(2)

copyToWithoutPayloadClone(1)

setTermBuffer(1)

equals(1)

setStartOffset(1)

getFlags(1)

clear(1)

setEndOffset(1)

payload(1)

setOffset(1)

Métodos Frequentes

startOffset (12)

termText (11)

endOffset (10)

type (7)

term (7)

setPositionIncrement (7)

getPositionIncrement (6)

buffer (5)

length (4)

setType (3)

Métodos Frequentes

setPositionLength (3)

getPayload (2)

toString (2)

copyBuffer (2)

termLength (2)

termBuffer (2)

getPositionLength (2)

copyToWithoutPayloadClone (1)

setTermBuffer (1)

equals (1)

setStartOffset (1)

getFlags (1)

clear (1)

setEndOffset (1)

payload (1)

setOffset (1)

Métodos Frequentes

setStartOffset (1)

getFlags (1)

clear (1)

setEndOffset (1)

payload (1)

setOffset (1)

Relacionados

Tcf

DetermineDatasetProperties

CFSecurityISOCountryBuff

BooleanIntCalculatable

ParserResult

METRIC_SYSTEM

DialerFragment

HexDump

CFBamPopSubDep3ByUNameIdxKey

BoardService

Related in langs

Calendar (PHP)

acymailing_checkPluginsFolders (PHP)

Fighter (C#)

UpdateFleetAttributesRequestMarshaller (C#)

fibonacci (C++)

setGeometry (C++)

NewObjectMeta (Go)

NewExpander (Go)

is_data_archive (Python)

applyJ (Python)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: CutLetterDigitFilter.java Projeto: khaliyo/pet

private Token nextToken(Token reusableToken) throws IOException { assert reusableToken != null; // 先使用上次留下来的。 Token nextToken = tokenQueue.poll(); if (nextToken != null) { return nextToken; } /*//在 TokenUtils.nextToken 已经调用了 inc if(!input.incrementToken()) { return null; }*/ /*TermAttribute termAtt = (TermAttribute)input.getAttribute(TermAttribute.class); OffsetAttribute offsetAtt = (OffsetAttribute)input.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = (TypeAttribute)input.getAttribute(TypeAttribute.class); nextToken = reusableToken.reinit(termAtt.termBuffer(), 0, termAtt.termLength(), offsetAtt.startOffset(), offsetAtt.endOffset(), typeAtt.type());*/ nextToken = TokenUtils.nextToken(input, reusableToken); if (nextToken != null && (Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type()) || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type()))) { final char[] buffer = nextToken.buffer(); final int length = nextToken.length(); byte lastType = (byte) Character.getType(buffer[0]); // 与上次的字符是否同类 int termBufferOffset = 0; int termBufferLength = 0; for (int i = 0; i < length; i++) { byte type = (byte) Character.getType(buffer[i]); if (type <= Character.MODIFIER_LETTER) { type = Character.LOWERCASE_LETTER; } if (type != lastType) { // 与上一次的不同 addToken(nextToken, termBufferOffset, termBufferLength, lastType); termBufferOffset += termBufferLength; termBufferLength = 0; lastType = type; } termBufferLength++; } if (termBufferLength > 0) { // 最后一次 addToken(nextToken, termBufferOffset, termBufferLength, lastType); } nextToken = tokenQueue.poll(); } return nextToken; }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: CutLetterDigitFilter.java Projeto: khaliyo/pet

public boolean incrementToken() throws IOException { clearAttributes(); Token token = nextToken(reusableToken); if (token != null) { termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); return true; } else { end(); return false; } }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: SlowSynonymMap.java Projeto: sudarshang/lucene-solr

/** * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that * the tokens end up at the same position. * * <p>Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same * position) Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a * has posInc=n) */ public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) { ArrayList<Token> result = new ArrayList<Token>(); if (lst1 == null || lst2 == null) { if (lst2 != null) result.addAll(lst2); if (lst1 != null) result.addAll(lst1); return result; } int pos = 0; Iterator<Token> iter1 = lst1.iterator(); Iterator<Token> iter2 = lst2.iterator(); Token tok1 = iter1.hasNext() ? iter1.next() : null; Token tok2 = iter2.hasNext() ? iter2.next() : null; int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0; int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0; while (tok1 != null || tok2 != null) { while (tok1 != null && (pos1 <= pos2 || tok2 == null)) { Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type()); tok.copyBuffer(tok1.buffer(), 0, tok1.length()); tok.setPositionIncrement(pos1 - pos); result.add(tok); pos = pos1; tok1 = iter1.hasNext() ? iter1.next() : null; pos1 += tok1 != null ? tok1.getPositionIncrement() : 0; } while (tok2 != null && (pos2 <= pos1 || tok1 == null)) { Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type()); tok.copyBuffer(tok2.buffer(), 0, tok2.length()); tok.setPositionIncrement(pos2 - pos); result.add(tok); pos = pos2; tok2 = iter2.hasNext() ? iter2.next() : null; pos2 += tok2 != null ? tok2.getPositionIncrement() : 0; } } return result; }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: TestTrimFilter.java Projeto: jibaro/lucene_solr

@Override public boolean incrementToken() throws IOException { if (index >= tokens.length) return false; else { clearAttributes(); Token token = tokens[index++]; termAtt.setEmpty().append(token); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); payloadAtt.setPayload(token.getPayload()); return true; } }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: IntegerTokenFilter.java Projeto: bulias/community-edition

public Token next() throws IOException { Token candidate; while ((candidate = baseTokeniser.next()) != null) { try { Integer integer = Integer.valueOf(candidate.termText()); String valueString = NumericEncoder.encode(integer.intValue()); Token integerToken = new Token( valueString, candidate.startOffset(), candidate.startOffset(), candidate.type()); return integerToken; } catch (NumberFormatException e) { // just ignore and try the next one } } return null; }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: StandardFilter.java Projeto: Rashmos/Information_Retrieval_System

public final Token next() throws IOException { Token localToken = this.input.next(); if (localToken == null) return null; String str1 = localToken.termText(); String str2 = localToken.type(); if ((str2 == APOSTROPHE_TYPE) && ((str1.endsWith("'s")) || (str1.endsWith("'S")))) return new Token( str1.substring(0, str1.length() - 2), localToken.startOffset(), localToken.endOffset(), str2); if (str2 == ACRONYM_TYPE) { StringBuffer localStringBuffer = new StringBuffer(); for (int i = 0; i < str1.length(); i++) { char c = str1.charAt(i); if (c != '.') localStringBuffer.append(c); } return new Token( localStringBuffer.toString(), localToken.startOffset(), localToken.endOffset(), str2); } return localToken; }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: AlfrescoStandardFilter.java Projeto: bulias/community-edition

/** * Returns the next token in the stream, or null at EOS. * * <p>Removes <tt>'s</tt> from the end of words. * * <p>Removes dots from acronyms. * * <p>Splits host names ... */ public final org.apache.lucene.analysis.Token next() throws java.io.IOException { if (hostTokens == null) { org.apache.lucene.analysis.Token t = input.next(); if (t == null) return null; String text = t.termText(); String type = t.type(); if (type == APOSTROPHE_TYPE && // remove 's (text.endsWith("'s") || text.endsWith("'S"))) { return new org.apache.lucene.analysis.Token( text.substring(0, text.length() - 2), t.startOffset(), t.endOffset(), type); } else if (type == ACRONYM_TYPE) { // remove dots StringBuffer trimmed = new StringBuffer(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c != '.') trimmed.append(c); } return new org.apache.lucene.analysis.Token( trimmed.toString(), t.startOffset(), t.endOffset(), type); } else if (type == HOST_TYPE) { // <HOST: <ALPHANUM> ("." <ALPHANUM>)+ > // There must be at least two tokens .... hostTokens = new LinkedList<org.apache.lucene.analysis.Token>(); StringTokenizer tokeniser = new StringTokenizer(text, "."); int start = t.startOffset(); int end; while (tokeniser.hasMoreTokens()) { String token = tokeniser.nextToken(); end = start + token.length(); hostTokens.offer(new org.apache.lucene.analysis.Token(token, start, end, ALPHANUM_TYPE)); start = end + 1; } // check if we have an acronym ..... yes a.b.c ends up here ... if (text.length() == hostTokens.size() * 2 - 1) { hostTokens = null; // acronym StringBuffer trimmed = new StringBuffer(); for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c != '.') trimmed.append(c); } return new org.apache.lucene.analysis.Token( trimmed.toString(), t.startOffset(), t.endOffset(), ALPHANUM_TYPE); } else { return hostTokens.remove(); } } else { return t; } } else { org.apache.lucene.analysis.Token token = hostTokens.remove(); if (hostTokens.isEmpty()) { hostTokens = null; } return token; } }