Exemplo n.º 1
0
  @Test
  public void test() {
    String newWord = "爸爸去哪儿";
    String nature = "aaaaa";
    String str = "上海电力2012年财务报表如下怎爸爸去哪儿么办";

    // 增加新词
    UserDefineLibrary.insertWord(newWord, nature, 1000);

    List<Term> parse = ToAnalysis.parse(str);
    HashMap<String, Term> hs = new HashMap<String, Term>();
    for (Term term : parse) {
      hs.put(term.getName(), term);
    }

    Assert.assertTrue(hs.containsKey(newWord));

    Assert.assertEquals(hs.get(newWord).natrue().natureStr, nature);

    // 删除词
    UserDefineLibrary.removeWord(newWord);
    parse = ToAnalysis.parse(str);
    hs = new HashMap<String, Term>();
    for (Term term : parse) {
      hs.put(term.getName(), term);
    }

    Assert.assertTrue(!hs.containsKey(newWord));
  }
Exemplo n.º 2
0
  public List<String> cut(String text) {
    List<Term> terms = ToAnalysis.parse(text);
    List<String> array = new ArrayList<>();
    StringBuilder sbDebug = new StringBuilder();
    for (Term term : terms) {
      String word = term.getName().trim();
      if (word.length() == 0) {
        continue;
      }
      //            System.out.println(word);
      if (filterSingleWord) { // 过滤单字
        if (word.length() < 2) {
          continue;
        }
      }
      if (filterStopWords) {
        if (stopWordsSet.contains(word)) {
          continue;
        }
      }
      Matcher matcher = pureNum.matcher(word); // 含数字的词都不要
      if (matcher.find()) {
        continue;
      }
      array.add(word);

      if (debug) {
        sbDebug.append(word + ",");
      }
    }
    if (debug) {
      logger.info("cut {} into {}", text, sbDebug.toString());
    }
    return array;
  }
  @Override
  public EmotionResponse emotionJudge(EmotionRequest req) throws TException {
    long s1 = System.currentTimeMillis();
    String text = req.getText();
    List<Term> terms = ToAnalysis.parse(text);
    List<String> words = new ArrayList<>();
    for (Term term : terms) {
      words.add(term.getName());
    }
    //        logger.info("{} is cut into {}", text, words);

    double score = fbsc.classify(words);
    EmotionResponse response = new EmotionResponse();
    response.setReq(req);
    response.setScore(score);
    int emotion = 0;
    if (score > 0.5) {
      emotion = 1;
    } else if (score < -0.5) {
      emotion = 2;
    } else {
      emotion = 3;
    }
    response.setEmotion(emotion);
    long s2 = System.currentTimeMillis();
    logger.info("emotion: {}, used:{}ms", emotion, (s2 - s1));
    return response;
  }
Exemplo n.º 4
0
 public void test() {
   List<Term> terms = ToAnalysis.parse(line);
   for (Term term : terms) {
     String name = term.getName();
     System.out.println(name);
   }
 }
Exemplo n.º 5
0
 @Override
 public List<ParseTerm> parse(String sentence) {
   // TODO Auto-generated method stub
   List<ParseTerm> result = new ArrayList<ParseTerm>();
   List<Term> list = ToAnalysis.parse(sentence);
   for (Iterator<Term> it = list.iterator(); it.hasNext(); ) {
     Term t = it.next();
     result.add(new ParseTerm(t.getRealName(), t.getNatureStr()));
   }
   return result;
 }
Exemplo n.º 6
0
 public String segAnsi(String text) {
   String segs = "";
   List<Term> parse2 = ToAnalysis.parse(text);
   StringBuilder sb = new StringBuilder();
   for (Term term : parse2) {
     sb.append(term.getName());
     sb.append(" ");
   }
   segs = sb.toString();
   segs = segs.trim();
   return segs;
 }
Exemplo n.º 7
0
 public static String doSplit(String content) {
   List<Term> lis = ToAnalysis.parse(content);
   String res = "";
   for (int j = 0; j < lis.size(); j++) {
     String tem = lis.get(j).toString();
     if (tem.charAt(tem.length() - 1) != 'n') continue;
     String[] ary = new String[100];
     ary = tem.split("/");
     if (ary.length > 0) tem = ary[0];
     res += tem + " ";
   }
   return res;
 }
Exemplo n.º 8
0
  @Override
  public void run() {
    String[] strings = line.split("\t");

    if (strings.length == 8) {
      int hfd = Integer.parseInt(strings[3]);
      if (hfd > 1) {
        String content = strings[7];
        List<Term> terms = ToAnalysis.parse(content);
        for (Term term : terms) {
          String name = term.getName();
          System.out.println(name);
        }
      }
    }
  }
Exemplo n.º 9
0
 public static Vocabulary loadFromDB(String path) {
   File file = new File(path);
   if (!file.exists()) {
     logger.error("词典文件不存在, {}", path);
   }
   String text;
   ToAnalysis.parse("hello world for acc load speed");
   try {
     text = FileUtils.readFileToString(file);
     Gson gson = new Gson();
     Vocabulary voc = gson.fromJson(text, Vocabulary.class);
     return voc;
   } catch (IOException e) {
     logger.error("gson转化失败");
     return null;
   }
 }
  @Override
  public HashMap<String, TermScore> tokenizeTerm(String input_str) {
    tokens = ToAnalysis.parse(input_str);
    token_iterator = tokens.listIterator();

    HashMap<String, TermScore> hash = new HashMap<String, TermScore>();
    while (token_iterator.hasNext()) {
      Term term = token_iterator.next();
      if (hash.get(term.getName()) == null)
        hash.put(term.getName(), new TermScore(term.getName(), 0));
      else {
        TermScore exist_term = hash.get(term.getName());
        int new_score = exist_term.getScore() + 1;
        exist_term.setScore(new_score);
        hash.put(term.getName(), exist_term);
      }
    }

    return hash;
  }
 public void tokenize(String input_str) {
   tokens = ToAnalysis.parse(input_str);
   token_iterator = tokens.listIterator();
 }
Exemplo n.º 12
0
  public static List<Unit> create(String query) throws Exception {
    // TODO Auto-generated method stub
    /*
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

          DocumentBuilder db = dbf.newDocumentBuilder();

          Document document = db.parse(new File(Path + "req_result.xml"));

          NodeList list = document.getElementsByTagName("Pro");
          */
    if (hasInit == 0) {
      init();
      hasInit = 1;
    }
    File file = new File(Path + "TF-IDF_result_x.txt");
    OutputStreamWriter write = new OutputStreamWriter(new FileOutputStream(file), "utf-8");
    BufferedWriter writer = new BufferedWriter(write);

    System.out.println(query);
    List<Term> lis = ToAnalysis.parse(query);

    List<List<Unit>> res1 = new ArrayList<List<Unit>>();
    // List<List<Unit>> res2 = new ArrayList<List<Unit>>();
    for (int i = 0; i < lis.size(); i++) {
      String tem = lis.get(i).toString();
      System.out.println(tem);
      String[] ary = tem.split("/");
      String term = "";
      double e = ((ary[1].contains("w") || ary[1].contains("nr")) ? 100 : 1);
      if (!Stopwords.isstop(ary[0]) && ary.length > 0) {
        term = ary[0];
        System.out.println(e);
        List<Unit> t1 = TFIDF(term);

        res1.add(Normal(t1, e));
        // List<Unit> t2 = BM25(term);
        // res2.add(Normal(t2));
      }
    }
    Map<String, Double> res = new HashMap<String, Double>();
    for (int i = 0; i < res1.size(); i++) {
      for (int j = 0; j < res1.get(i).size(); j++) {
        String iDoc = res1.get(i).get(j).getDocId();
        if (res.containsKey(iDoc)) {
          double tem = res.get(iDoc);
          res.remove(iDoc);
          res.put(iDoc, tem + res1.get(i).get(j).getSrc());
        } else {
          res.put(iDoc, res1.get(i).get(j).getSrc());
        }
      }
    }
    List<Unit> iRes = new ArrayList<Unit>();
    for (String key : res.keySet()) {
      Unit t = new Unit();
      int id = docmap.get(key);
      t.setDocId(key);
      t.setChName(docs[id].getChName());
      t.setAddr(docs[id].getAddr());
      t.setURL(docs[id].getURL());
      t.setType(docs[id].getType());
      t.setSrc(res.get(key));
      iRes.add(t);
    }
    Collections.sort(iRes);
    System.out.println("size = " + iRes.size());
    for (int i = 0; i < iRes.size(); i++) {
      writer.write(iRes.get(i).getDocId() + " " + iRes.get(i).getSrc() + "\n");
    }
    writer.close();
    /*
          double maxx = 0,minn = 1e11;
          for(int i = 0;i < res.size();i ++){
          	//writer.write(res.get(i).getDocId() + " " + res.get(i).getNum() + "\n");
          	double tem = res.get(i).getNum();
          	maxx = Math.max(maxx, tem);minn = Math.min(minn, tem);
          }
          for(int i = 0;i < res.size();i ++){
          	double tem = res.get(i).getNum();
          	res.get(i).setNum(10 * (tem - minn) / (maxx - minn));
          	writer.write(res.get(i).getDocId() + " " + res.get(i).getNum() + "\n");
          }
          writer.close();

          writer.close();
          file = new File("TF-IDF_result.txt");
          write = new OutputStreamWriter(new FileOutputStream(file),"utf-8");
    writer = new BufferedWriter(write);

    */
    System.out.println("done");

    return iRes;
  }
Exemplo n.º 13
0
 public static void main(String[] args) {
   System.out.println(ToAnalysis.parse("一次性交纳五百元送话费,法轮功"));
   System.out.println(NlpAnalysis.parse("一次性交纳五百元送话费,法轮功"));
 }
  public static void main(String[] args) {

    /*
    for(int i = 50; i < 52; i ++){
    	File file = new File("C:/Users/hzzhangchi/Desktop/Health/"+i+".txt");
    	texts = texts +" "+ txt2String(file);
    }
    */
    File DictFile = new File("C:/Users/hzzhangchi/Desktop/28.txt");
    String DictString = txt2String(DictFile);

    String[] Dict = DictString.split(";"); // 这样才能得到正确的结果
    // String[] Dict =
    // {"美国","健美","中","十","早餐","摄入","减少","少","吃","身体","运动","运动量","纤维素","食物","不易","鱼","肉","鸡肉","香皂","清爽","营养","最佳","新","危险","愈","久","厚","暗","沉","粉刺","慢","小时","富有","35岁","下降","深层","皱纹","刀片","部","受损","保护","紧","绷","灼热","胡须","刮","前","面临","电脑","净化","不吃","节食","意味着","导致","食品","含量","干燥","润肤霜","锻炼","清晨","体重","吸收","产品","肤色","配方","弹性","减肥","健康","脂肪","胆固醇","低","油性","护理","刺激","新陈代谢","剃","高","时","油","毒素","——","角质","男士","层","肌肤","女性","须","误区","男性","皮肤"};
    System.out.println(Dict[0]);
    String texts = null;
    /*
    int z = 10;
    int y = 1;
    File file = null;

    while(z<100){
    switch(y)
    {
    case 1: file = new File("C:/Users/hzzhangchi/Desktop/Health/"+z+".txt");
    case 2: file = new File("C:/Users/hzzhangchi/Desktop/Military/"+z+".txt");
    //case 3: file = new File("C:/Users/hzzhangchi/Desktop/Net/"+z+".txt");
    //case 4: file = new File("C:/Users/hzzhangchi/Desktop/Culture/"+z+".txt");
    //case 5: file = new File("C:/Users/hzzhangchi/Desktop/Money/"+z+".txt");
    }
    */
    for (int z = 500; z < 1000; z++) {
      File file = new File("C:/Users/hzzhangchi/Desktop/Health/" + z + ".txt");

      texts = txt2String(file);

      List<Term> al = ToAnalysis.parse(texts);
      String[] Words = new String[al.size()];

      int j = 0;
      for (Iterator<Term> i = al.iterator(); i.hasNext(); ) {
        if (j <= al.size()) {
          Words[j] = i.next().toString();
        } else {
          i.next();
        }
        j = j + 1;
      }
      Map<String, Integer> map = new LinkedHashMap();
      for (int i = 0; i < Dict.length; i++) {
        map.put(Dict[i], 0);
      }
      for (int i = 0; i < al.size(); i++) {
        if (map.containsKey(Words[i])) {
          map.put(Words[i], map.get(Words[i]) + 1);
        }
      }
      // printMap(map);
      // map = sortMap(map);
      // printMap(map);
      // System.out.println(map);

      try {
        String line = System.getProperty("line.separator");
        StringBuffer str = new StringBuffer();
        FileWriter fw = new FileWriter("C:/Users/hzzhangchi/Desktop/29.txt", true);
        Set set = map.entrySet();
        Iterator iter = set.iterator();

        str.append("1 ");

        int count = 1;
        while (iter.hasNext()) {
          Map.Entry entry = (Map.Entry) iter.next();
          // if(Integer.parseInt(entry.getValue().toString())>1){
          // 	 str.append("\""+entry.getKey()+"\",");
          // }
          // str.append(entry.getKey()).append(line);
          // str.append(entry.getKey()+":"+entry.getValue()+" ");
          str.append(count + ":" + entry.getValue() + " ");
          count = count + 1;
        }
        str.append(line);
        fw.write(str.toString());
        fw.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      /*
      z = z +1;
      if(z == 100){
       if(y<2){
        y=y+1;
        z=10;
       }
      }
      */
    }
    for (int z = 500; z < 1000; z++) {
      File file = new File("C:/Users/hzzhangchi/Desktop/Military/" + z + ".txt");

      texts = txt2String(file);

      List<Term> al = ToAnalysis.parse(texts);
      String[] Words = new String[al.size()];

      int j = 0;
      for (Iterator<Term> i = al.iterator(); i.hasNext(); ) {
        if (j <= al.size()) {
          Words[j] = i.next().toString();
        } else {
          i.next();
        }
        j = j + 1;
      }
      Map<String, Integer> map = new LinkedHashMap();
      for (int i = 0; i < Dict.length; i++) {
        map.put(Dict[i], 0);
      }
      for (int i = 0; i < al.size(); i++) {
        if (map.containsKey(Words[i])) {
          map.put(Words[i], map.get(Words[i]) + 1);
        }
      }
      // printMap(map);
      // map = sortMap(map);
      // printMap(map);
      // System.out.println(map);

      try {
        String line = System.getProperty("line.separator");
        StringBuffer str = new StringBuffer();
        FileWriter fw = new FileWriter("C:/Users/hzzhangchi/Desktop/29.txt", true);
        Set set = map.entrySet();
        Iterator iter = set.iterator();

        str.append("2 ");

        int count = 1;
        while (iter.hasNext()) {
          Map.Entry entry = (Map.Entry) iter.next();
          // if(Integer.parseInt(entry.getValue().toString())>1){
          // 	 str.append("\""+entry.getKey()+"\",");
          // }
          // str.append(entry.getKey()).append(line);
          // str.append(entry.getKey()+":"+entry.getValue()+" ");
          str.append(count + ":" + entry.getValue() + " ");
          count = count + 1;
        }
        str.append(line);
        fw.write(str.toString());
        fw.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      /*
      z = z +1;
      if(z == 100){
       if(y<2){
        y=y+1;
        z=10;
       }
      }
      */
    }
    for (int z = 500; z < 1000; z++) {
      File file = new File("C:/Users/hzzhangchi/Desktop/Net/" + z + ".txt");

      texts = txt2String(file);

      List<Term> al = ToAnalysis.parse(texts);
      String[] Words = new String[al.size()];

      int j = 0;
      for (Iterator<Term> i = al.iterator(); i.hasNext(); ) {
        if (j <= al.size()) {
          Words[j] = i.next().toString();
        } else {
          i.next();
        }
        j = j + 1;
      }
      Map<String, Integer> map = new LinkedHashMap();
      for (int i = 0; i < Dict.length; i++) {
        map.put(Dict[i], 0);
      }
      for (int i = 0; i < al.size(); i++) {
        if (map.containsKey(Words[i])) {
          map.put(Words[i], map.get(Words[i]) + 1);
        }
      }
      // printMap(map);
      // map = sortMap(map);
      // printMap(map);
      // System.out.println(map);

      try {
        String line = System.getProperty("line.separator");
        StringBuffer str = new StringBuffer();
        FileWriter fw = new FileWriter("C:/Users/hzzhangchi/Desktop/29.txt", true);
        Set set = map.entrySet();
        Iterator iter = set.iterator();

        str.append("3 ");

        int count = 1;
        while (iter.hasNext()) {
          Map.Entry entry = (Map.Entry) iter.next();
          // if(Integer.parseInt(entry.getValue().toString())>1){
          // 	 str.append("\""+entry.getKey()+"\",");
          // }
          // str.append(entry.getKey()).append(line);
          // str.append(entry.getKey()+":"+entry.getValue()+" ");
          str.append(count + ":" + entry.getValue() + " ");
          count = count + 1;
        }
        str.append(line);
        fw.write(str.toString());
        fw.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      /*
      z = z +1;
      if(z == 100){
       if(y<2){
        y=y+1;
        z=10;
       }
      }
      */
    }
    for (int z = 500; z < 1000; z++) {
      File file = new File("C:/Users/hzzhangchi/Desktop/Culture/" + z + ".txt");

      texts = txt2String(file);

      List<Term> al = ToAnalysis.parse(texts);
      String[] Words = new String[al.size()];

      int j = 0;
      for (Iterator<Term> i = al.iterator(); i.hasNext(); ) {
        if (j <= al.size()) {
          Words[j] = i.next().toString();
        } else {
          i.next();
        }
        j = j + 1;
      }
      Map<String, Integer> map = new LinkedHashMap();
      for (int i = 0; i < Dict.length; i++) {
        map.put(Dict[i], 0);
      }
      for (int i = 0; i < al.size(); i++) {
        if (map.containsKey(Words[i])) {
          map.put(Words[i], map.get(Words[i]) + 1);
        }
      }
      // printMap(map);
      // map = sortMap(map);
      // printMap(map);
      // System.out.println(map);

      try {
        String line = System.getProperty("line.separator");
        StringBuffer str = new StringBuffer();
        FileWriter fw = new FileWriter("C:/Users/hzzhangchi/Desktop/29.txt", true);
        Set set = map.entrySet();
        Iterator iter = set.iterator();

        str.append("4 ");

        int count = 1;
        while (iter.hasNext()) {
          Map.Entry entry = (Map.Entry) iter.next();
          // if(Integer.parseInt(entry.getValue().toString())>1){
          // 	 str.append("\""+entry.getKey()+"\",");
          // }
          // str.append(entry.getKey()).append(line);
          // str.append(entry.getKey()+":"+entry.getValue()+" ");
          str.append(count + ":" + entry.getValue() + " ");
          count = count + 1;
        }
        str.append(line);
        fw.write(str.toString());
        fw.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      /*
      z = z +1;
      if(z == 100){
       if(y<2){
        y=y+1;
        z=10;
       }
      }
      */
    }
    for (int z = 500; z < 1000; z++) {
      File file = new File("C:/Users/hzzhangchi/Desktop/Money/" + z + ".txt");

      texts = txt2String(file);

      List<Term> al = ToAnalysis.parse(texts);
      String[] Words = new String[al.size()];

      int j = 0;
      for (Iterator<Term> i = al.iterator(); i.hasNext(); ) {
        if (j <= al.size()) {
          Words[j] = i.next().toString();
        } else {
          i.next();
        }
        j = j + 1;
      }
      Map<String, Integer> map = new LinkedHashMap();
      for (int i = 0; i < Dict.length; i++) {
        map.put(Dict[i], 0);
      }
      for (int i = 0; i < al.size(); i++) {
        if (map.containsKey(Words[i])) {
          map.put(Words[i], map.get(Words[i]) + 1);
        }
      }
      // printMap(map);
      // map = sortMap(map);
      // printMap(map);
      // System.out.println(map);

      try {
        String line = System.getProperty("line.separator");
        StringBuffer str = new StringBuffer();
        FileWriter fw = new FileWriter("C:/Users/hzzhangchi/Desktop/29.txt", true);
        Set set = map.entrySet();
        Iterator iter = set.iterator();

        str.append("5 ");

        int count = 1;
        while (iter.hasNext()) {
          Map.Entry entry = (Map.Entry) iter.next();
          // if(Integer.parseInt(entry.getValue().toString())>1){
          // 	 str.append("\""+entry.getKey()+"\",");
          // }
          // str.append(entry.getKey()).append(line);
          // str.append(entry.getKey()+":"+entry.getValue()+" ");
          str.append(count + ":" + entry.getValue() + " ");
          count = count + 1;
        }
        str.append(line);
        fw.write(str.toString());
        fw.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      /*
      z = z +1;
      if(z == 100){
       if(y<2){
        y=y+1;
        z=10;
       }
      }
      */
    }
    System.out.println(1);
  }