Esempio n. 1
0
  /**
   * @author dengxiang.liu
   * @description update jieba user dictionary with securities and themes just came up recently;
   *     should be called before getThemeSecPair methods;
   * @see
   */
  public void updateLocalJiebaDictWithSecAndTheme() {
    String jiebaUserDictPath = JiebaUserDictPath;

    logger.info("jieba user dictionary file path: {}", jiebaUserDictPath);
    try {
      List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList();
      Map<String, String> tokenMap = FileIOUtil.readJiebaDict(jiebaUserDictPath);
      logger.info("original jieba dictionary size: {}", tokenMap == null ? 0 : tokenMap.size());
      Integer addedNewsWordsCount = 0;
      for (DatayesdbpSecurity datayesdbpSecurity : securityList) {
        if (!tokenMap.containsKey(datayesdbpSecurity.getSecShortName())) {
          tokenMap.put(datayesdbpSecurity.getSecShortName(), "n");
          addedNewsWordsCount++;
        }
      }

      List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList();
      for (BigdataTheme bigdataTheme : bigdataThemeList) {
        if (!tokenMap.containsKey(bigdataTheme.getThemeName())) {
          tokenMap.put(bigdataTheme.getThemeName(), "n");
        }
      }

      FileIOUtil.writeJiebaDict(jiebaUserDictPath, tokenMap);
      logger.info("Add {} new words into jieba dictionary.", addedNewsWordsCount);
    } catch (Exception e) {
      logger.error("error occurs when update jieba dictionary, {}", e.toString());
    }
  }
Esempio n. 2
0
  /**
   * @Author: dengxiang.liu @Desc: get unhandled themes from the different between themes list in
   * database and themes list in local file; @See: updateHandledThemeFile()
   */
  public List<BigdataTheme> getUnhandledThemeList(List<BigdataTheme> bigdataThemeList) {

    /** get themes have been handled from the file */
    Map<Long, BigdataTheme> handledThemeMap = FileIOUtil.readThemeMap(HANDLED_THEMES_FILE_PATH);
    /** themes to be handled */
    List<BigdataTheme> unHandledThemeList = new ArrayList<BigdataTheme>();

    if (null == bigdataThemeList) return unHandledThemeList;
    for (BigdataTheme bigdataTheme : bigdataThemeList) {
      try {
        Long themeID = bigdataTheme.getThemeID();
        if (!handledThemeMap.containsKey(themeID)) {
          unHandledThemeList.add(bigdataTheme);
        }
      } catch (Exception e) {
        logger.error(e.toString());
      }
    }
    logger.info("get unhandled theme list size: {}", unHandledThemeList.size());
    return unHandledThemeList;
  }
  /**
   * @author dengxiang.liu
   * @param mongoThemeStockList
   * @param securityList
   * @param bigdataThemeSet
   * @return theme security pairs list @Desc get theme security pairs from mongo data about target
   *     theme;
   */
  public List<ThemeSecurityObjPair> getThemeSecurityPairList(
      List<MongoThemeStock> mongoThemeStockList,
      List<DatayesdbpSecurity> securityList,
      Set<BigdataTheme> bigdataThemeSet) {

    List<ThemeSecurityObjPair> themeSecurityPairList = new ArrayList<ThemeSecurityObjPair>();

    /** used to make sure the uniqueness of theme-security pair: themeID ---> tickerSymbol set */
    Map<Long, Set<String>> globalThemeSecPairMap = new HashMap<Long, Set<String>>();
    if (mongoThemeStockList == null || securityList == null) return themeSecurityPairList;

    /** @Map: security ticer symbol ---> DatayesdbpSecurity Object */
    Map<String, DatayesdbpSecurity> tickerSymbol2Obj = new HashMap<String, DatayesdbpSecurity>();
    for (DatayesdbpSecurity security : securityList) {
      String tickerSymbol = security.getTickerSymbol();
      tickerSymbol2Obj.put(tickerSymbol, security);
    }
    /** @Map: theme name ---> BigdataTheme themeID */
    Map<String, Long> themeName2ID = new HashMap<String, Long>();
    for (BigdataTheme bigdataTheme : bigdataThemeSet) {
      Long themeID = bigdataTheme.getThemeID();
      String themeName = bigdataTheme.getThemeName();
      themeName2ID.put(themeName, themeID);
    }

    for (MongoThemeStock mongoThemeStock : mongoThemeStockList) {
      String themeName = mongoThemeStock.getThemeName();
      String dateStr = mongoThemeStock.getDate();
      Date findTime = null;
      try {
        findTime = DateUtil.strToDate(dateStr, DateUtil.DatePattern.day);
      } catch (ParseException e) {
        e.printStackTrace();
      }
      /** theme-security pair description */
      String webSite = mongoThemeStock.getWebSite();
      String pairType = "crawl";
      // List<String> tickerSymbolList = new ArrayList<String>();
      try {
        List<String> tickerSymbolList = mongoThemeStock.getRelateStock();
        if (tickerSymbolList == null || tickerSymbolList.isEmpty()) continue;
        for (String tickerSymbol : tickerSymbolList) {

          if (!tickerSymbol2Obj.containsKey(tickerSymbol)) continue;

          DatayesdbpSecurity curSecurity = tickerSymbol2Obj.get(tickerSymbol);

          /**
           * if themes came from database do not contains the current theme (comes from mongo db),
           * set mongo theme's id to -1
           */
          Long themeID = themeName2ID.containsKey(themeName) ? themeName2ID.get(themeName) : -1L;
          if (themeID == -1L
              || (globalThemeSecPairMap.containsKey(themeID)
                  && globalThemeSecPairMap.get(themeID).contains(curSecurity.getTickerSymbol())))
            continue;
          ThemeSecurityObjPair themeSecurityPair =
              new ThemeSecurityObjPair(
                  themeID, themeName, curSecurity, webSite, findTime, pairType);
          themeSecurityPairList.add(themeSecurityPair);
          if (!globalThemeSecPairMap.containsKey(themeID))
            globalThemeSecPairMap.put(themeID, new HashSet<String>());
          globalThemeSecPairMap.get(themeID).add(curSecurity.getTickerSymbol());
        }
      } catch (Exception e) {
        logger.error(e.toString());
      }
    }

    return themeSecurityPairList;
  }
  /**
   * @Author: Dengxiang.Liu @Param: themeList; could be themes have never been
   * handled(getUnhandledThemeList()) or themes have been handled, depends on @Param
   * isScheduale; @Param: isSchedule; true if the themes arn't new to system; @Return: Map<Long,
   * List<ThemeSecurityPair>>; a map from themeID to it's ThemeSecurityPair List; @Desc: the result
   * will be QA before insert into Database;
   */
  public Map<String, List<ThemeSecurityObjPair>> findThemeSecPair(
      Set<BigdataTheme> themeSet, List<DatayesdbpSecurity> securityList, Boolean isSchedule) {

    Map<String, List<ThemeSecurityObjPair>> themeSecurityPairMap =
        new HashMap<String, List<ThemeSecurityObjPair>>();

    logger.info("findThemeSecPair with mongo data");
    logger.info(
        "themeSet size: {}; securityList size: {}, isSchedule: {}",
        themeSet == null ? 0 : themeSet.size(),
        securityList == null ? 0 : securityList.size(),
        isSchedule);

    if (null == themeSet) return themeSecurityPairMap;

    Date endDate = new Date();

    /** configure data scale */
    Integer mongoTimeInterval = isSchedule ? 5 : 50;
    try {
      mongoTimeInterval =
          isSchedule
              ? Integer.parseInt(ConfigConst.MONGO_TIME_INTERVAL_SCHEDULE)
              : Integer.parseInt(ConfigConst.MONGO_TIME_INTERVAL_UNSCHEDULE);
    } catch (Exception e) {
      e.printStackTrace();
      mongoTimeInterval = isSchedule ? 5 : 50;
    }
    Date mongoStartDate = DateUtil.addDay(endDate, -1 * mongoTimeInterval);

    logger.info(
        "mongo data from: {}, to {}",
        DateUtil.dateToStr(mongoStartDate, DateUtil.DatePattern.day),
        DateUtil.dateToStr(endDate, DateUtil.DatePattern.day2));

    for (BigdataTheme bigdataTheme : themeSet) {

      try {
        List<ThemeSecurityObjPair> themeSecurityPairList = new ArrayList<ThemeSecurityObjPair>();
        Long themeID = bigdataTheme.getThemeID();
        String themeName = bigdataTheme.getThemeName();

        /** get mongo data ; with theme_name */
        List<MongoThemeStock> mongoThemeStockList =
            mongoThemeStockDao.searchByThemeName(
                themeName,
                DateUtil.dateToStr(mongoStartDate, DateUtil.DatePattern.day),
                DateUtil.dateToStr(endDate, DateUtil.DatePattern.day));

        logger.info(
            "themeName: {}, mongoThemeStockList size: {}",
            themeName,
            mongoThemeStockList == null ? 0 : mongoThemeStockList.size());

        /** get theme-security pair from mongo data */
        List<ThemeSecurityObjPair> themeSecurityPairListMongo =
            getThemeSecurityPairList(mongoThemeStockList, securityList, themeSet);

        if (null != themeSecurityPairListMongo)
          themeSecurityPairList.addAll(themeSecurityPairListMongo);

        /** @Map themeName ---> ThemeSecurityPair List */
        themeSecurityPairMap.put(themeName, themeSecurityPairList);

      } catch (Exception e) {
        logger.error(e.toString());
      }
    }
    return themeSecurityPairMap;
  }