示例#1
0
  /**
   * @author dengxiang.liu
   * @description update jieba user dictionary with securities and themes just came up recently;
   *     should be called before getThemeSecPair methods;
   * @see
   */
  public void updateLocalJiebaDictWithSecAndTheme() {
    String jiebaUserDictPath = JiebaUserDictPath;

    logger.info("jieba user dictionary file path: {}", jiebaUserDictPath);
    try {
      List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();
      Map<String, String> tokenMap = FileIOUtil.readJiebaDict(jiebaUserDictPath);
      logger.info("original jieba dictionary size: {}", tokenMap == null ? 0 : tokenMap.size());
      Integer addedNewsWordsCount = 0;
      for (DatayesdbpSecurity datayesdbpSecurity : securityList) {
        if (!tokenMap.containsKey(datayesdbpSecurity.getSecShortName())) {
          tokenMap.put(datayesdbpSecurity.getSecShortName(), "n");
          addedNewsWordsCount++;
        }
      }

      List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList();
      for (BigdataTheme bigdataTheme : bigdataThemeList) {
        if (!tokenMap.containsKey(bigdataTheme.getThemeName())) {
          tokenMap.put(bigdataTheme.getThemeName(), "n");
        }
      }

      FileIOUtil.writeJiebaDict(jiebaUserDictPath, tokenMap);
      logger.info("Add {} new words into jieba dictionary.", addedNewsWordsCount);
    } catch (Exception e) {
      logger.error("error occurs when update jieba dictionary, {}", e.toString());
    }
  }
示例#2
0
 /**
  * @author dengxiang.liu
  * @return current theme security pairs in database; @Desc get QAed and active theme-security
  *     relationships from database;
  */
 public Map<String, Set<String>> getCurrentThemeSecurityPair() {
   List<BigdataThemeSecRel> themeSecPairList = bigdataMapper.getThemeSecurityPairList();
   Map<String, Set<String>> themeSecSetMap = new HashMap<String, Set<String>>();
   for (BigdataThemeSecRel bigdataThemeSecRel : themeSecPairList) {
     Long themeID = bigdataThemeSecRel.getThemeID();
     String themeName = bigdataThemeSecRel.getThemeName();
     String tickerSymbol = bigdataThemeSecRel.getTickerSymbol();
     if (!themeSecSetMap.containsKey(themeID))
       themeSecSetMap.put(themeName, new HashSet<String>());
     themeSecSetMap.get(themeID).add(tickerSymbol);
   }
   return themeSecSetMap;
 }
示例#3
0
  /**
   * @Author: Dengxiang.Liu @Return: Map<Long, List<ThemeSecurityPair>>; a map from themeID to it's
   * ThemeSecurityPair List; @Desc: the result will be QA before insert into Database;
   */
  public Map<String, List<ThemeSecurityObjPair>> getThemeSecPairToBeQA() {
    logger.info("start getThemeSecPairToBeQA();");
    Map<String, Set<String>> curThemeSecPairMap = getCurrentThemeSecurityPair();
    List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();

    /* handle new themes */
    List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList(); // all themes list;

    logger.info(
        "all themes list size: {};", bigdataThemeList == null ? 0 : bigdataThemeList.size());

    List<BigdataTheme> unhandledThemeList = getUnhandledThemeList(bigdataThemeList);
    logger.info(
        "unhandled themes list size: {};",
        unhandledThemeList == null ? 0 : unhandledThemeList.size());
    Set<BigdataTheme> unhandledThemeSet =
        (unhandledThemeList == null || unhandledThemeList.isEmpty())
            ? null
            : new HashSet<BigdataTheme>(unhandledThemeList);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapUnhandled =
        textAnalizer.findThemesSecPair(unhandledThemeSet, securityList, false);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapUnhandled =
        mongoDataAnalizer.findThemeSecPair(unhandledThemeSet, securityList, false);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapToBeQAUnhandled =
        getDiffThemeSecPairBetween(curThemeSecPairMap, newsThemeSecPairMapUnhandled);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapToBeQAUnhandled =
        getDiffThemeSecPairBetween(curThemeSecPairMap, crawlThemeSecPairMapUnhandled);

    /* handle schedule themes: schedule-themes = all-themes - unhandled-themes*/
    List<BigdataTheme> scheduleThemeList =
        getSchedualThemeList(bigdataThemeList, unhandledThemeList);
    Set<BigdataTheme> scheduleThemeSet =
        new HashSet<BigdataTheme>(
            scheduleThemeList == null ? new ArrayList<BigdataTheme>() : scheduleThemeList);
    logger.info(
        "schedule themes Set size: {}", scheduleThemeSet == null ? 0 : scheduleThemeSet.size());

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapSchedule =
        textAnalizer.findThemesSecPair(scheduleThemeSet, securityList, true);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapSchedule =
        mongoDataAnalizer.findThemeSecPair(scheduleThemeSet, securityList, true);

    Map<String, List<ThemeSecurityObjPair>> newsThemeSecPairMapToBeQASchedule =
        getDiffThemeSecPairBetween(curThemeSecPairMap, newsThemeSecPairMapSchedule);
    Map<String, List<ThemeSecurityObjPair>> crawlThemeSecPairMapToBeQASchedule =
        getDiffThemeSecPairBetween(curThemeSecPairMap, crawlThemeSecPairMapSchedule);

    /* integrate theme-security pair maps */
    Map<String, List<ThemeSecurityObjPair>> globalThemeSecPairMapToBeQA =
        new HashMap<String, List<ThemeSecurityObjPair>>();

    IntegrateMaps(globalThemeSecPairMapToBeQA, newsThemeSecPairMapToBeQAUnhandled);
    IntegrateMaps(globalThemeSecPairMapToBeQA, newsThemeSecPairMapToBeQASchedule);
    IntegrateMaps(globalThemeSecPairMapToBeQA, crawlThemeSecPairMapToBeQAUnhandled);
    IntegrateMaps(globalThemeSecPairMapToBeQA, crawlThemeSecPairMapToBeQASchedule);

    logger.info("finished getThemeSecPairToBeQA();");
    return globalThemeSecPairMapToBeQA;
  }