Java IntsRefBuilder Exemples

Langage de programmation: Java

Espace de nommage/Pack: org.apache.lucene.util

Class/Type: IntsRefBuilder

Exemples au hotexamples.com: 2

Java IntsRefBuilder - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de org.apache.lucene.util.IntsRefBuilder extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

copyUTF8Bytes(1)

get(1)

grow(1)

ints(1)

length(1)

setIntAt(1)

setLength(1)

Méthodes fréquemment utilisées

copyUTF8Bytes (1)

get (1)

grow (1)

ints (1)

length (1)

setIntAt (1)

setLength (1)

Associées

JwSqlUpdate

AlertConditionCacheMonitor

BitstreamFormat

SaajSoapMessageFactory

CatalogStar

time_lease_vm_pool_map

ClSeccion

UserRepository

RaplaTestCase

IValueConverterService

Related in langs

getTimeElapsed (PHP)

ShopLoading (PHP)

ConditionManager (C#)

IPersistedCallbackFacet (C#)

dr (C++)

f1 (C++)

EnumName (Go)

ParseImport (Go)

getComplexByMultiplierPrefix (Python)

createPage (Python)

Exemple #1

0

Afficher le fichier

Fichier : SlowFuzzyTermsEnum.java Projet : rmuir/lucene-solr

/** * The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance * between the given term and the comparing term. * * <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison. Otherwise, * this method uses the following logic to calculate similarity. * * <pre> * similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); * </pre> * * where distance is the Levenshtein distance for the two words. */ @Override protected final AcceptStatus accept(BytesRef term) { if (StringHelper.startsWith(term, prefixBytesRef)) { utf32.copyUTF8Bytes(term); final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength); // Integer.MIN_VALUE is the sentinel that Levenshtein stopped early if (distance == Integer.MIN_VALUE) { return AcceptStatus.NO; } // no need to calc similarity, if raw is true and distance > maxEdits if (raw == true && distance > maxEdits) { return AcceptStatus.NO; } final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length); // if raw is true, then distance must also be <= maxEdits by now // given the previous if statement if (raw == true || (raw == false && similarity > minSimilarity)) { boostAtt.setBoost((similarity - minSimilarity) * scale_factor); return AcceptStatus.YES; } else { return AcceptStatus.NO; } } else { return AcceptStatus.END; } }

Exemple #2

0

Afficher le fichier

Fichier : UserDictionary.java Projet : PATRIC3/p3_solr

private UserDictionary(List<String[]> featureEntries) throws IOException { int wordId = CUSTOM_DICTIONARY_WORD_ID_OFFSET; // TODO: should we allow multiple segmentations per input 'phrase'? // the old treemap didn't support this either, and i'm not sure if it's needed/useful? Collections.sort( featureEntries, new Comparator<String[]>() { @Override public int compare(String[] left, String[] right) { return left[0].compareTo(right[0]); } }); List<String> data = new ArrayList<>(featureEntries.size()); List<int[]> segmentations = new ArrayList<>(featureEntries.size()); PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput); IntsRefBuilder scratch = new IntsRefBuilder(); long ord = 0; for (String[] values : featureEntries) { String[] segmentation = values[1].replaceAll(" *", " ").split(" "); String[] readings = values[2].replaceAll(" *", " ").split(" "); String pos = values[3]; if (segmentation.length != readings.length) { throw new RuntimeException( "Illegal user dictionary entry " + values[0] + " - the number of segmentations (" + segmentation.length + ")" + " does not the match number of readings (" + readings.length + ")"); } int[] wordIdAndLength = new int[segmentation.length + 1]; // wordId offset, length, length.... wordIdAndLength[0] = wordId; for (int i = 0; i < segmentation.length; i++) { wordIdAndLength[i + 1] = segmentation[i].length(); data.add(readings[i] + INTERNAL_SEPARATOR + pos); wordId++; } // add mapping to FST String token = values[0]; scratch.grow(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); } fstBuilder.add(scratch.get(), ord); segmentations.add(wordIdAndLength); ord++; } this.fst = new TokenInfoFST(fstBuilder.finish(), false); this.data = data.toArray(new String[data.size()]); this.segmentations = segmentations.toArray(new int[segmentations.size()][]); }