/** * @author dengxiang.liu * @description update jieba user dictionary with securities and themes just came up recently; * should be called before getThemeSecPair methods; * @see */ public void updateLocalJiebaDictWithSecAndTheme() { String jiebaUserDictPath = JiebaUserDictPath; logger.info("jieba user dictionary file path: {}", jiebaUserDictPath); try { List<DatayesdbpSecurity> securityList = datayesdbpMapper.getSecurityList(); Map<String, String> tokenMap = FileIOUtil.readJiebaDict(jiebaUserDictPath); logger.info("original jieba dictionary size: {}", tokenMap == null ? 0 : tokenMap.size()); Integer addedNewsWordsCount = 0; for (DatayesdbpSecurity datayesdbpSecurity : securityList) { if (!tokenMap.containsKey(datayesdbpSecurity.getSecShortName())) { tokenMap.put(datayesdbpSecurity.getSecShortName(), "n"); addedNewsWordsCount++; } } List<BigdataTheme> bigdataThemeList = bigdataMapper.getThemeList(); for (BigdataTheme bigdataTheme : bigdataThemeList) { if (!tokenMap.containsKey(bigdataTheme.getThemeName())) { tokenMap.put(bigdataTheme.getThemeName(), "n"); } } FileIOUtil.writeJiebaDict(jiebaUserDictPath, tokenMap); logger.info("Add {} new words into jieba dictionary.", addedNewsWordsCount); } catch (Exception e) { logger.error("error occurs when update jieba dictionary, {}", e.toString()); } }
/** * @author dengxiang.liu * @param mongoThemeStockList * @param securityList * @param bigdataThemeSet * @return theme security pairs list @Desc get theme security pairs from mongo data about target * theme; */ public List<ThemeSecurityObjPair> getThemeSecurityPairList( List<MongoThemeStock> mongoThemeStockList, List<DatayesdbpSecurity> securityList, Set<BigdataTheme> bigdataThemeSet) { List<ThemeSecurityObjPair> themeSecurityPairList = new ArrayList<ThemeSecurityObjPair>(); /** used to make sure the uniqueness of theme-security pair: themeID ---> tickerSymbol set */ Map<Long, Set<String>> globalThemeSecPairMap = new HashMap<Long, Set<String>>(); if (mongoThemeStockList == null || securityList == null) return themeSecurityPairList; /** @Map: security ticer symbol ---> DatayesdbpSecurity Object */ Map<String, DatayesdbpSecurity> tickerSymbol2Obj = new HashMap<String, DatayesdbpSecurity>(); for (DatayesdbpSecurity security : securityList) { String tickerSymbol = security.getTickerSymbol(); tickerSymbol2Obj.put(tickerSymbol, security); } /** @Map: theme name ---> BigdataTheme themeID */ Map<String, Long> themeName2ID = new HashMap<String, Long>(); for (BigdataTheme bigdataTheme : bigdataThemeSet) { Long themeID = bigdataTheme.getThemeID(); String themeName = bigdataTheme.getThemeName(); themeName2ID.put(themeName, themeID); } for (MongoThemeStock mongoThemeStock : mongoThemeStockList) { String themeName = mongoThemeStock.getThemeName(); String dateStr = mongoThemeStock.getDate(); Date findTime = null; try { findTime = DateUtil.strToDate(dateStr, DateUtil.DatePattern.day); } catch (ParseException e) { e.printStackTrace(); } /** theme-security pair description */ String webSite = mongoThemeStock.getWebSite(); String pairType = "crawl"; // List<String> tickerSymbolList = new ArrayList<String>(); try { List<String> tickerSymbolList = mongoThemeStock.getRelateStock(); if (tickerSymbolList == null || tickerSymbolList.isEmpty()) continue; for (String tickerSymbol : tickerSymbolList) { if (!tickerSymbol2Obj.containsKey(tickerSymbol)) continue; DatayesdbpSecurity curSecurity = tickerSymbol2Obj.get(tickerSymbol); /** * if themes came from database do not contains the current theme (comes from mongo db), * set mongo theme's id to -1 */ Long themeID = themeName2ID.containsKey(themeName) ? themeName2ID.get(themeName) : -1L; if (themeID == -1L || (globalThemeSecPairMap.containsKey(themeID) && globalThemeSecPairMap.get(themeID).contains(curSecurity.getTickerSymbol()))) continue; ThemeSecurityObjPair themeSecurityPair = new ThemeSecurityObjPair( themeID, themeName, curSecurity, webSite, findTime, pairType); themeSecurityPairList.add(themeSecurityPair); if (!globalThemeSecPairMap.containsKey(themeID)) globalThemeSecPairMap.put(themeID, new HashSet<String>()); globalThemeSecPairMap.get(themeID).add(curSecurity.getTickerSymbol()); } } catch (Exception e) { logger.error(e.toString()); } } return themeSecurityPairList; }