Пример #1
0
  /**
   * @author dengxiang.liu
   * @description update jieba user dictionary with securities and themes just came up recently;
   *     should be called before getThemeSecPair methods;
   * @see
   */
  public void updateLocalJiebaDictWithSecAndTheme() {
    String jiebaUserDictPath = JiebaUserDictPath;

    logger.info("jieba user dictionary file path: {}", jiebaUserDictPath);
    try {
      List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();
      Map<String, String> tokenMap = FileIOUtil.readJiebaDict(jiebaUserDictPath);
      logger.info("original jieba dictionary size: {}", tokenMap == null ? 0 : tokenMap.size());
      Integer addedNewsWordsCount = 0;
      for (DatayesdbpSecurity datayesdbpSecurity : securityList) {
        if (!tokenMap.containsKey(datayesdbpSecurity.getSecShortName())) {
          tokenMap.put(datayesdbpSecurity.getSecShortName(), "n");
          addedNewsWordsCount++;
        }
      }

      List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList();
      for (BigdataTheme bigdataTheme : bigdataThemeList) {
        if (!tokenMap.containsKey(bigdataTheme.getThemeName())) {
          tokenMap.put(bigdataTheme.getThemeName(), "n");
        }
      }

      FileIOUtil.writeJiebaDict(jiebaUserDictPath, tokenMap);
      logger.info("Add {} new words into jieba dictionary.", addedNewsWordsCount);
    } catch (Exception e) {
      logger.error("error occurs when update jieba dictionary, {}", e.toString());
    }
  }
Пример #2
0
  /**
   * @Author: Dengxiang.Liu @Return: Map<Long, List<ThemeSecurityPair>>; a map from themeID to it's
   * ThemeSecurityPair List; @Desc: the result will be QA before insert into Database;
   */
  public Map<String, List<ThemeSecurityObjPair>> getThemeSecPairToBeQA() {
    logger.info("start getThemeSecPairToBeQA();");
    Map<String, Set<String>> curThemeSecPairMap = getCurrentThemeSecurityPair();
    List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();

    /* handle new themes */
    List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList(); // all themes list;

    logger.info(
        "all themes list size: {};", bigdataThemeList == null ? 0 : bigdataThemeList.size());

    List<BigdataTheme> unhandledThemeList = getUnhandledThemeList(bigdataThemeList);
    logger.info(
        "unhandled themes list size: {};",
        unhandledThemeList == null ? 0 : unhandledThemeList.size());
    Set<BigdataTheme> unhandledThemeSet =
        (unhandledThemeList == null || unhandledThemeList.isEmpty())
            ? null
            : new HashSet<BigdataTheme>(unhandledThemeList);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapUnhandled =
        textAnalizer.findThemesSecPair(unhandledThemeSet, securityList, false);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapUnhandled =
        mongoDataAnalizer.findThemeSecPair(unhandledThemeSet, securityList, false);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapToBeQAUnhandled =
        getDiffThemeSecPairBetween(curThemeSecPairMap, newsThemeSecPairMapUnhandled);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapToBeQAUnhandled =
        getDiffThemeSecPairBetween(curThemeSecPairMap, crawlThemeSecPairMapUnhandled);

    /* handle schedule themes: schedule-themes = all-themes - unhandled-themes*/
    List<BigdataTheme> scheduleThemeList =
        getSchedualThemeList(bigdataThemeList, unhandledThemeList);
    Set<BigdataTheme> scheduleThemeSet =
        new HashSet<BigdataTheme>(
            scheduleThemeList == null ? new ArrayList<BigdataTheme>() : scheduleThemeList);
    logger.info(
        "schedule themes Set size: {}", scheduleThemeSet == null ? 0 : scheduleThemeSet.size());

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapSchedule =
        textAnalizer.findThemesSecPair(scheduleThemeSet, securityList, true);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapSchedule =
        mongoDataAnalizer.findThemeSecPair(scheduleThemeSet, securityList, true);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapToBeQASchedule =
        getDiffThemeSecPairBetween(curThemeSecPairMap, newsThemeSecPairMapSchedule);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapToBeQASchedule =
        getDiffThemeSecPairBetween(curThemeSecPairMap, crawlThemeSecPairMapSchedule);

    /* integrate theme-security pair maps */
    Map<String, List<ThemeSecurityObjPair>> globalThemeSecPairMapToBeQA =
        new HashMap<String, List<ThemeSecurityObjPair>>();

    IntegrateMaps(globalThemeSecPairMapToBeQA, newsThemeSecPairMapToBeQAUnhandled);
    IntegrateMaps(globalThemeSecPairMapToBeQA, newsThemeSecPairMapToBeQASchedule);
    IntegrateMaps(globalThemeSecPairMapToBeQA, crawlThemeSecPairMapToBeQAUnhandled);
    IntegrateMaps(globalThemeSecPairMapToBeQA, crawlThemeSecPairMapToBeQASchedule);

    logger.info("finished getThemeSecPairToBeQA();");
    return globalThemeSecPairMapToBeQA;
  }