Ejemplo n.º 1
0
  @Test
  public void testOffsetCorrection() throws Exception {
    final String INPUT = "Günther Günther is here";

    // create MappingCharFilter
    List<String> mappingRules = new ArrayList<>();
    mappingRules.add("\"&uuml;\" => \"ü\"");
    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("&uuml;", "ü");
    NormalizeCharMap normMap = builder.build();
    CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT));

    // create PatternTokenizer
    Tokenizer stream =
        new PatternTokenizer(newAttributeFactory(), Pattern.compile("[,;/\\s]+"), -1);
    stream.setReader(charStream);
    assertTokenStreamContents(
        stream,
        new String[] {"Günther", "Günther", "is", "here"},
        new int[] {0, 13, 26, 29},
        new int[] {12, 25, 28, 33},
        INPUT.length());

    charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
    stream = new PatternTokenizer(newAttributeFactory(), Pattern.compile("Günther"), 0);
    stream.setReader(charStream);
    assertTokenStreamContents(
        stream,
        new String[] {"Günther", "Günther"},
        new int[] {0, 13},
        new int[] {12, 25},
        INPUT.length());
  }
Ejemplo n.º 2
0
  public void test() throws Exception {
    final CharArraySet cas = new CharArraySet(3, false);
    cas.add("jjp");
    cas.add("wlmwoknt");
    cas.add("tcgyreo");

    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    builder.add("mtqlpi", "");
    builder.add("mwoknt", "jjp");
    builder.add("tcgyreo", "zpfpajyws");
    final NormalizeCharMap map = builder.build();

    Analyzer a =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenFilter.ENGLISH_STOPSET, false, -65);
            TokenFilter f = new CommonGramsFilter(t, cas);
            return new TokenStreamComponents(t, f);
          }

          @Override
          protected Reader initReader(String fieldName, Reader reader) {
            reader = new MockCharFilter(reader, 0);
            reader = new MappingCharFilter(map, reader);
            reader = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader);
            return reader;
          }
        };
    checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
    a.close();
  }