Esempio n. 1
0
  /**
   * @author dengxiang.liu
   * @description update jieba user dictionary with securities and themes just came up recently;
   *     should be called before getThemeSecPair methods;
   * @see
   */
  public void updateLocalJiebaDictWithSecAndTheme() {
    String jiebaUserDictPath = JiebaUserDictPath;

    logger.info("jieba user dictionary file path: {}", jiebaUserDictPath);
    try {
      List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();
      Map<String, String> tokenMap = FileIOUtil.readJiebaDict(jiebaUserDictPath);
      logger.info("original jieba dictionary size: {}", tokenMap == null ? 0 : tokenMap.size());
      Integer addedNewsWordsCount = 0;
      for (DatayesdbpSecurity datayesdbpSecurity : securityList) {
        if (!tokenMap.containsKey(datayesdbpSecurity.getSecShortName())) {
          tokenMap.put(datayesdbpSecurity.getSecShortName(), "n");
          addedNewsWordsCount++;
        }
      }

      List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList();
      for (BigdataTheme bigdataTheme : bigdataThemeList) {
        if (!tokenMap.containsKey(bigdataTheme.getThemeName())) {
          tokenMap.put(bigdataTheme.getThemeName(), "n");
        }
      }

      FileIOUtil.writeJiebaDict(jiebaUserDictPath, tokenMap);
      logger.info("Add {} new words into jieba dictionary.", addedNewsWordsCount);
    } catch (Exception e) {
      logger.error("error occurs when update jieba dictionary, {}", e.toString());
    }
  }
  /**
   * @author dengxiang.liu
   * @param mongoThemeStockList
   * @param securityList
   * @param bigdataThemeSet
   * @return theme security pairs list @Desc get theme security pairs from mongo data about target
   *     theme;
   */
  public List<ThemeSecurityObjPair> getThemeSecurityPairList(
      List<MongoThemeStock> mongoThemeStockList,
      List<DatayesdbpSecurity> securityList,
      Set<BigdataTheme> bigdataThemeSet) {

    List<ThemeSecurityObjPair> themeSecurityPairList = new ArrayList<ThemeSecurityObjPair>();

    /** used to make sure the uniqueness of theme-security pair: themeID ---> tickerSymbol set */
    Map<Long, Set<String>> globalThemeSecPairMap = new HashMap<Long, Set<String>>();
    if (mongoThemeStockList == null || securityList == null) return themeSecurityPairList;

    /** @Map: security ticer symbol ---> DatayesdbpSecurity Object */
    Map<String, DatayesdbpSecurity> tickerSymbol2Obj = new HashMap<String, DatayesdbpSecurity>();
    for (DatayesdbpSecurity security : securityList) {
      String tickerSymbol = security.getTickerSymbol();
      tickerSymbol2Obj.put(tickerSymbol, security);
    }
    /** @Map: theme name ---> BigdataTheme themeID */
    Map<String, Long> themeName2ID = new HashMap<String, Long>();
    for (BigdataTheme bigdataTheme : bigdataThemeSet) {
      Long themeID = bigdataTheme.getThemeID();
      String themeName = bigdataTheme.getThemeName();
      themeName2ID.put(themeName, themeID);
    }

    for (MongoThemeStock mongoThemeStock : mongoThemeStockList) {
      String themeName = mongoThemeStock.getThemeName();
      String dateStr = mongoThemeStock.getDate();
      Date findTime = null;
      try {
        findTime = DateUtil.strToDate(dateStr, DateUtil.DatePattern.day);
      } catch (ParseException e) {
        e.printStackTrace();
      }
      /** theme-security pair description */
      String webSite = mongoThemeStock.getWebSite();
      String pairType = "crawl";
      // List<String> tickerSymbolList = new ArrayList<String>();
      try {
        List<String> tickerSymbolList = mongoThemeStock.getRelateStock();
        if (tickerSymbolList == null || tickerSymbolList.isEmpty()) continue;
        for (String tickerSymbol : tickerSymbolList) {

          if (!tickerSymbol2Obj.containsKey(tickerSymbol)) continue;

          DatayesdbpSecurity curSecurity = tickerSymbol2Obj.get(tickerSymbol);

          /**
           * if themes came from database do not contains the current theme (comes from mongo db),
           * set mongo theme's id to -1
           */
          Long themeID = themeName2ID.containsKey(themeName) ? themeName2ID.get(themeName) : -1L;
          if (themeID == -1L
              || (globalThemeSecPairMap.containsKey(themeID)
                  && globalThemeSecPairMap.get(themeID).contains(curSecurity.getTickerSymbol())))
            continue;
          ThemeSecurityObjPair themeSecurityPair =
              new ThemeSecurityObjPair(
                  themeID, themeName, curSecurity, webSite, findTime, pairType);
          themeSecurityPairList.add(themeSecurityPair);
          if (!globalThemeSecPairMap.containsKey(themeID))
            globalThemeSecPairMap.put(themeID, new HashSet<String>());
          globalThemeSecPairMap.get(themeID).add(curSecurity.getTickerSymbol());
        }
      } catch (Exception e) {
        logger.error(e.toString());
      }
    }

    return themeSecurityPairList;
  }