Java DoubleArrayTrie Examples

Programming Language: Java

Namespace/Package Name: com.hankcs.hanlp.collection.trie

Class/Type: DoubleArrayTrie

Examples at hotexamples.com: 8

Java DoubleArrayTrie - 8 examples found. These are the top rated real world Java examples of com.hankcs.hanlp.collection.trie.DoubleArrayTrie extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get(4)

resize(2)

size(2)

build(1)

fetch(1)

insert(1)

load(1)

output(1)

save(1)

transition(1)

Example #1

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

 /**
  * 从磁盘加载双数组
  *
  * @param path
  * @return
  */
 static boolean loadDat(String path) {
   try {
     ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
     if (byteArray == null) return false;
     int size = byteArray.nextInt();
     CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
     final Nature[] natureIndexArray = Nature.values();
     for (int i = 0; i < size; ++i) {
       // 第一个是全部频次，第二个是词性个数
       int currentTotalFrequency = byteArray.nextInt();
       int length = byteArray.nextInt();
       attributes[i] = new CoreDictionary.Attribute(length);
       attributes[i].totalFrequency = currentTotalFrequency;
       for (int j = 0; j < length; ++j) {
         attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
         attributes[i].frequency[j] = byteArray.nextInt();
       }
     }
     if (!trie.load(byteArray, attributes) || byteArray.hasMore()) return false;
   } catch (Exception e) {
     logger.warning("读取失败，问题发生在" + e);
     return false;
   }
   return true;
 }

Example #2

Show file

File: MaxEntModel.java Project: zhoufeng/HanLP

 /**
  * 预测分布
  *
  * @param context 环境
  * @param outsums 先验分布
  * @return 概率数组
  */
 public final double[] eval(String[] context, double[] outsums) {
   int[] scontexts = new int[context.length];
   for (int i = 0; i < context.length; i++) {
     Integer ci = pmap.get(context[i]);
     scontexts[i] = ci == null ? -1 : ci;
   }
   prior.logPrior(outsums);
   return eval(scontexts, outsums, evalParams);
 }

Example #3

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

 // 自动加载词典
 static {
   long start = System.currentTimeMillis();
   if (!load(path)) {
     System.err.printf("核心词典%s加载失败\n", path);
     System.exit(-1);
   } else {
     logger.info(
         path + "加载成功，" + trie.size() + "个词条，耗时" + (System.currentTimeMillis() - start) + "ms");
   }
 }

Example #4

Show file

File: Segment.java Project: zhoufeng/HanLP

 /**
  * 使用用户词典合并粗分结果
  *
  * @param vertexList 粗分结果
  * @return 合并后的结果
  */
 protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList) {
   Vertex[] wordNet = new Vertex[vertexList.size()];
   vertexList.toArray(wordNet);
   DoubleArrayTrie<CoreDictionary.Attribute> dat = CustomDictionary.dat;
   for (int i = 0; i < wordNet.length; ++i) {
     int state = 1;
     state = dat.transition(wordNet[i].realWord, state);
     if (state > 0) {
       int start = i;
       int to = i + 1;
       int end = -1;
       CoreDictionary.Attribute value = null;
       for (; to < wordNet.length; ++to) {
         state = dat.transition(wordNet[to].realWord, state);
         if (state < 0) break;
         CoreDictionary.Attribute output = dat.output(state);
         if (output != null) {
           value = output;
           end = to + 1;
         }
       }
       if (value != null) {
         StringBuilder sbTerm = new StringBuilder();
         for (int j = start; j < end; ++j) {
           sbTerm.append(wordNet[j]);
           wordNet[j] = null;
         }
         wordNet[i] = new Vertex(sbTerm.toString(), value);
         i = end - 1;
       }
     }
   }
   vertexList.clear();
   for (Vertex vertex : wordNet) {
     if (vertex != null) vertexList.add(vertex);
   }
   return vertexList;
 }

Example #5

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

  private static boolean load(String path) {
    logger.info("核心词典开始加载:" + path);
    if (loadDat(path)) return true;
    TreeMap<String, CoreDictionary.Attribute> map = new TreeMap<String, Attribute>();
    BufferedReader br = null;
    try {
      br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
      String line;
      int MAX_FREQUENCY = 0;
      long start = System.currentTimeMillis();
      while ((line = br.readLine()) != null) {
        String param[] = line.split("\\s");
        int natureCount = (param.length - 1) / 2;
        CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(natureCount);
        for (int i = 0; i < natureCount; ++i) {
          attribute.nature[i] = Enum.valueOf(Nature.class, param[1 + 2 * i]);
          attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
          attribute.totalFrequency += attribute.frequency[i];
        }
        map.put(param[0], attribute);
        MAX_FREQUENCY += attribute.totalFrequency;
      }
      logger.info(
          "核心词典读入词条"
              + map.size()
              + " 全部频次"
              + MAX_FREQUENCY
              + "，耗时"
              + (System.currentTimeMillis() - start)
              + "ms");
      br.close();
      trie.build(map);
      logger.info("核心词典加载成功:" + trie.size() + "个词条，下面将写入缓存……");
      try {
        DataOutputStream out = new DataOutputStream(new FileOutputStream(path + Predefine.BIN_EXT));
        Collection<CoreDictionary.Attribute> attributeList = map.values();
        out.writeInt(attributeList.size());
        for (CoreDictionary.Attribute attribute : attributeList) {
          out.writeInt(attribute.totalFrequency);
          out.writeInt(attribute.nature.length);
          for (int i = 0; i < attribute.nature.length; ++i) {
            out.writeInt(attribute.nature[i].ordinal());
            out.writeInt(attribute.frequency[i]);
          }
        }
        trie.save(out);
        out.close();
      } catch (Exception e) {
        logger.warning("保存失败" + e);
        return false;
      }
    } catch (FileNotFoundException e) {
      logger.warning("核心词典" + path + "不存在！" + e);
      return false;
    } catch (IOException e) {
      logger.warning("核心词典" + path + "读取错误！" + e);
      return false;
    }

    return true;
  }

Example #6

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

 /**
  * 是否包含词语
  *
  * @param key
  * @return
  */
 public static boolean contains(String key) {
   return trie.get(key) != null;
 }

Example #7

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

 /**
  * 获取条目
  *
  * @param wordID
  * @return
  */
 public static Attribute get(int wordID) {
   return trie.get(wordID);
 }

Example #8

Show file

File: CoreDictionary.java Project: aunjgr/HanLP

 /**
  * 获取条目
  *
  * @param key
  * @return
  */
 public static Attribute get(String key) {
   return trie.get(key);
 }