/** csv格式 */ @Test public void testImportCsv() throws IOException { long beginTime = System.currentTimeMillis(); File file = new File("D:\\Backup\\test.csv"); InputStream is = new BufferedInputStream(new FileInputStream(file)); String encoding = FileCharset.getCharset(file); LineIterator iterator = IOUtils.lineIterator(is, encoding); String separator = ","; int batchSize = 100; // 批处理大小 int totalSize = 0; // 总大小 final List<ExcelData> dataList = Lists.newArrayList(); if (iterator.hasNext()) { iterator.nextLine(); // 跳过第一行标题 } while (iterator.hasNext()) { totalSize++; String line = iterator.nextLine(); String[] dataArray = StringUtils.split(line, separator); ExcelData data = new ExcelData(); data.setId(Long.valueOf(dataArray[0])); data.setContent(dataArray[1]); dataList.add(data); if (totalSize % batchSize == 0) { try { doBatchSave(dataList); } catch (Exception e) { Long fromId = dataList.get(0).getId(); Long endId = dataList.get(dataList.size() - 1).getId(); log.error("from " + fromId + " to " + endId + ", error", e); } dataList.clear(); } } IOUtils.closeQuietly(is); long endTime = System.currentTimeMillis(); log.info("耗时(秒):" + (endTime - beginTime) / 1000); }
public DoubleMatrix getScoreMatrix(File file) { Counter<String> docWords = new Counter<String>(); try { LineIterator iter = FileUtils.lineIterator(file); while (iter.hasNext()) { Tokenizer t = tokenizerFactory.create((new InputHomogenization(iter.nextLine()).transform())); while (t.hasMoreTokens()) { docWords.incrementCount(t.nextToken(), 1.0); } } iter.close(); } catch (IOException e) { throw new IllegalStateException("Unable to read file", e); } DoubleMatrix ret = new DoubleMatrix(1, currVocab.size()); for (int i = 0; i < currVocab.size(); i++) { if (docWords.getCount(currVocab.get(i).toString()) > 0) { ret.put(i, wordScores.getCount(currVocab.get(i).toString())); } } return ret; }
/** * All files containing serialization policy are located during construction of this object. * Serialization policies are loaded from them (and cached) as needed. * * @param servletContext * @throws IOException */ @Autowired(required = false) public MultiModuleSerializationPolicyProvider(ServletContext servletContext) throws IOException { for (File rpcPolicyManifest : listRpcPolicyManifestFiles(servletContext.getRealPath("/"))) { File moduleDir = rpcPolicyManifest.getParentFile().getParentFile(); LineIterator entries = FileUtils.lineIterator(rpcPolicyManifest); while (entries.hasNext()) { String line = entries.nextLine(); if (line.startsWith("#") || line.trim().length() == 0) continue; String[] entry = line.split(","); assert entry.length == 2 : "Invalid format of file: " + rpcPolicyManifest.getAbsolutePath(); String rpcServiceInterfaceName = entry[0].trim(); String rpcPolicyStrongFileName = entry[1].trim(); if (serializationPolicyFiles.containsKey(rpcServiceInterfaceName)) { assert serializationPolicyFiles .get(rpcServiceInterfaceName) .getName() .equals(rpcPolicyStrongFileName); } else { File serializationPolicyFile = new File(moduleDir, rpcPolicyStrongFileName); assert serializationPolicyFile.exists(); serializationPolicyFiles.put(rpcServiceInterfaceName, serializationPolicyFile); } } LineIterator.closeQuietly(entries); } }
// For example, here is a line from the 5kb chr1 MAPQGE30 raw observed contact matrix // (GM12878_combined/5kb_resolution_intrachromosomal/chr1/MAPQGE30/chr1_5kb.RAWobserved): // 40000000 40100000 59.0 private static void processRawContactInformation( String fileToRead, double minValue, ArrayList<DesiredChrContact> contactsToCheck, boolean intra) throws IOException { // Check if sorted version is available // If not make sorted available. if (!Gpio.exists(fileToRead + ".sorted")) { if (intra) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } else { umcg.genetica.io.chrContacts.SortInterChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]); if (contact >= minValue) { contactsToCheck.get(numberToBeMatched).setContact(); numberToBeMatched++; } else { numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }
/** * Helper function to create DataMatrix. * * @param data InputStream * @return DataMatrix */ private DataMatrix getDataMatrix(InputStream data) throws Exception { // iterate over all lines in byte[] List<String> columnNames = null; List<LinkedList<String>> rowData = null; LineIterator it = IOUtils.lineIterator(data, null); try { int count = -1; while (it.hasNext()) { // first row is our column heading, create column vector if (++count == 0) { columnNames = new LinkedList(Arrays.asList(it.nextLine().split(Converter.VALUE_DELIMITER, -1))); } // all other rows are rows in the table else { rowData = (rowData == null) ? new LinkedList<LinkedList<String>>() : rowData; rowData.add( new LinkedList(Arrays.asList(it.nextLine().split(Converter.VALUE_DELIMITER, -1)))); } } } finally { LineIterator.closeQuietly(it); } // problem reading from data? if (columnNames == null || rowData == null) { if (LOG.isInfoEnabled()) { LOG.info( "getDataMatrix(), problem creating DataMatrix from file, data file probably missing data, returning null"); } return null; } // made it here, we can create DataMatrix if (LOG.isInfoEnabled()) { LOG.info("creating new DataMatrix(), from file data"); } // outta here return new DataMatrix(rowData, columnNames); }
public Map<String, Object> next() { Map<String, Object> map = null; do { String line = delegate.nextLine(); try { map = mapper.readValue(line, Map.class); return map; } catch (IOException e) { } } while (map == null && delegate.hasNext()); throw new NoSuchElementException(); }
public static void main(String[] args) throws IOException { String workDir = "E:/dev_workspace/tmp/workspace/duc2007"; String idfFilename = "duc2007.idf"; final double TOTAL_PAGE_COUNT = 30000000000.0D; Map<String, Double> idfValues = new HashMap<String, Double>(); File idfFIle = FileUtils.getFile(workDir + "/" + DIR_IDF_FILE, idfFilename); log.info("Loading idf value file[" + idfFIle.getAbsolutePath() + "]"); LineIterator lineIterator = null; try { lineIterator = FileUtils.lineIterator(idfFIle, DEFAULT_CHARSET.toString()); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] strs = line.split("###"); if (strs.length != 2) { log.warn("Line[" + line + "] format is illegal, ignore it!"); continue; } idfValues.put(strs[0].trim(), Long.parseLong(strs[1]) / TOTAL_PAGE_COUNT); } log.info("Load idf value file[" + idfFIle.getAbsolutePath() + "] finished!"); } catch (IOException e) { log.error("Load idf value file[" + idfFIle.getAbsolutePath() + "] error!", e); throw e; } finally { if (lineIterator != null) { lineIterator.close(); } } String question = "Describe the legal battle between various recording artists and members of the record industry and the Internet music site Napster. What support, or lack thereof, have the litigants received?"; EhCacheUtil ehCacheUtil = new EhCacheUtil("db_cache_vec", "lab"); SummaryBuilderByVector summaryBuilder = new SummaryBuilderByVector( workDir, "0", "D0714D.txt", 10, idfValues, question, ehCacheUtil, 1.0f, 1.6f); ExecutorService es = Executors.newSingleThreadExecutor(); Future<Boolean> future = es.submit(summaryBuilder); try { future.get(); } catch (InterruptedException | ExecutionException e) { e.printStackTrace(); } es.shutdown(); EhCacheUtil.close(); }
protected List<DiagnosGrupp> getDiagnosGrupperInternal(Resource resource) throws IOException { LineIterator it = FileUtils.lineIterator(resource.getFile(), "UTF-8"); List<DiagnosGrupp> list = new ArrayList<>(); try { while (it.hasNext()) { String line = it.nextLine(); list.add(new DiagnosGrupp(line)); } } finally { LineIterator.closeQuietly(it); } return list; }
/** 打印帮助信息 */ private static void showHelpInfo() { String helpfile = System.getProperty("user.dir") + File.separator + "conf" + File.separator + "help.info"; File f = new File(helpfile); if (!f.exists()) { System.out.println("help.info not exists"); } else { try { LineIterator itr = FileUtils.lineIterator(f, "UTF-8"); while (itr.hasNext()) { System.out.println(itr.nextLine()); } itr.close(); } catch (IOException e) { e.printStackTrace(); } } }
/** * Creates a hash code from the source code of the warning line and the surrounding context. * * @param fileName the absolute path of the file to read * @param line the line of the warning * @param encoding the encoding of the file, if <code>null</code> or empty then the default * encoding of the platform is used * @return a has code of the source code * @throws IOException if the contents of the file could not be read */ public int create(final String fileName, final int line, final String encoding) throws IOException { LineIterator lineIterator = EncodingValidator.readFile(fileName, encoding); StringBuilder context = new StringBuilder(1000); for (int i = 0; lineIterator.hasNext(); i++) { String currentLine = lineIterator.nextLine(); if (i >= line - 3) { context.append(currentLine); } if (i > line + 3) { break; } } lineIterator.close(); return context.toString().hashCode(); }
/** * Loads an in memory cache from the given path (sets syn0 and the vocab) * * @param vectorsFile the path of the file to load * @return * @throws FileNotFoundException */ public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException { BufferedReader write = new BufferedReader(new FileReader(vectorsFile)); VocabCache cache = new InMemoryLookupCache(); InMemoryLookupTable lookupTable; LineIterator iter = IOUtils.lineIterator(write); List<INDArray> arrays = new ArrayList<>(); while (iter.hasNext()) { String line = iter.nextLine(); String[] split = line.split(" "); String word = split[0]; VocabWord word1 = new VocabWord(1.0, word); cache.addToken(word1); cache.addWordToIndex(cache.numWords(), word); word1.setIndex(cache.numWords()); cache.putVocabWord(word); INDArray row = Nd4j.create(Nd4j.createBuffer(split.length - 1)); for (int i = 1; i < split.length; i++) { row.putScalar(i - 1, Float.parseFloat(split[i])); } arrays.add(row); } INDArray syn = Nd4j.create(new int[] {arrays.size(), arrays.get(0).columns()}); for (int i = 0; i < syn.rows(); i++) { syn.putRow(i, arrays.get(i)); } lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder() .vectorLength(arrays.get(0).columns()) .useAdaGrad(false) .cache(cache) .build(); Nd4j.clearNans(syn); lookupTable.setSyn0(syn); iter.close(); return new Pair<>(lookupTable, cache); }
@TestData @Provides Map<String, String> provideTestData() { ClassLoader cl = Thread.currentThread().getContextClassLoader(); try (InputStream is = cl.getResourceAsStream("testdata/fibonacci.txt")) { Map<String, String> result = newHashMapWithExpectedSize(20); for (LineIterator it = lineIterator(is, "UTF-8"); it.hasNext(); ) { String line = it.nextLine(); if (line.startsWith("#")) { continue; } String[] columns = line.split(";"); result.put(columns[0], columns[1]); } return result; } catch (IOException ex) { throw new IllegalStateException("Error reading test data.", ex); } }
public static List<String> lines(final File file) { List<String> lines = new LinkedList<String>(); if (file.exists()) { LineIterator it = null; try { it = FileUtils.lineIterator(file, "UTF-8"); while (it.hasNext()) { lines.add(it.nextLine()); } } catch (IOException e) { logger.warn("I/O error with file " + file.getPath(), e); } finally { LineIterator.closeQuietly(it); } } return lines; }
public void update(File fPathInfo) { try { updateinfo = new Hashtable<String, Long[]>(); LineIterator lineIterator = FileUtils.lineIterator(fPathInfo); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String name = line; long crc = 0; if (line.indexOf("----------") > 0) { name = line.substring(0, line.indexOf("----------")); crc = Long.parseLong(line.substring(line.indexOf("----------") + "----------".length())); } updateinfo.put(name, new Long[] {-1l, crc}); } System.out.println("updateinfo " + updateinfo.size()); listDir(new File(home), 2); System.out.println("updateinfo " + updateinfo.size()); File foUpdate = new File(home + "\\Update.zip"); if (foUpdate.exists()) { foUpdate.delete(); } ZipOutputStream out = new ZipOutputStream(new FileOutputStream(home + "\\Update.zip")); StringBuffer sb = new StringBuffer(); for (String fname : updateinfo.keySet()) { Long[] status = updateinfo.get(fname); if (status[0] == -1) { sb.append("delete " + fname + "\n"); } else if (status[0] == 1) { out.putNextEntry(new ZipEntry(fname)); IOUtils.copy(new FileInputStream(fname), out); } } IOUtils.closeQuietly(out); FileUtils.writeStringToFile(new File("del.bat"), sb.toString()); } catch (IOException e) { e.printStackTrace(System.err); } }
/** * Load a look up cache from an input stream delimited by \n * * @param from the input stream to read from * @return the in memory lookup cache */ public static InMemoryLookupCache load(InputStream from) { Reader inputStream = new InputStreamReader(from); LineIterator iter = IOUtils.lineIterator(inputStream); String line; InMemoryLookupCache ret = new InMemoryLookupCache(); int count = 0; while ((iter.hasNext())) { line = iter.nextLine(); if (line.isEmpty()) continue; ret.incrementWordCount(line); VocabWord word = new VocabWord(1.0, line); word.setIndex(count); ret.addToken(word); ret.addWordToIndex(count, line); ret.putVocabWord(line); count++; } return ret; }
/** * Reads headers from the batch starting from the given position. * * <p>Retrieved headers will be added to the map given by target parameter. * * @param iterator batch iterator. * @param target destination of the retrieved headers. */ public static void readHeaders( final ODataBatchLineIterator iterator, final Map<String, Collection<String>> target) { try { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); readBatchPart(new ODataBatchController(iterator, null), baos, true); final LineIterator headers = IOUtils.lineIterator(new ByteArrayInputStream(baos.toByteArray()), Constants.UTF8); while (headers.hasNext()) { final String line = headers.nextLine().trim(); if (StringUtils.isNotBlank(line)) { addHeaderLine(line, target); } } } catch (Exception e) { LOG.error("Error retrieving headers", e); throw new IllegalStateException(e); } }
@Override public List<Route> parseRoutes(File file) throws IOException { LineIterator it = FileUtils.lineIterator(file, "UTF-8"); List<Route> routes = new LinkedList<Route>(); Route currentRoute = null; while (it.hasNext()) { String line = it.nextLine(); if (line.trim().isEmpty()) { currentRoute = null; } else if (line.startsWith(" ")) { loadModelEntryForRoute(currentRoute, line); } else { currentRoute = parseLine(line.trim()); if (currentRoute != null) { routes.add(currentRoute); } } } return routes; }
public void load() throws IOException { log.info("Loading lexicon..."); File dataFile = new File("data/lexicon.txt.gz"); Reader reader = new BufferedReader( new InputStreamReader(new GZIPInputStream(new FileInputStream(dataFile)))); LineIterator iterator = IOUtils.lineIterator(reader); while (iterator.hasNext()) { String line = iterator.nextLine(); String[] splits = line.split("\\s"); for (int x = 1; x < splits.length; ++x) { POSTag tag = POSTag.fromString(splits[x]); if (tag == null) log.warn("Unknown tag: {0}", splits[x]); else lexiconMap.put(splits[0], tag); } } iterator.close(); log.info("Lexicon loaded!"); }
private TitleNameNormalizer(String pathToEvaluationRedirectsData) throws IOException { if (useBloomFilter) { redirectFilter = BloomFilter.create(Funnels.stringFunnel(), ESTIMATED_REDIRECTS); redirects = new LRUCache<String, String>(5000) { protected String loadValue(String src) { String normalized = TitleNameIndexer.normalize(src); if (normalized == null) return src; return TitleNameIndexer.normalize(src); } }; } else redirects = new StringMap<String>(); if (showInitProgress) System.out.println( "Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version"); if (pathToEvaluationRedirectsData != null) { InputStream is = CompressionUtils.readSnappyCompressed(pathToEvaluationRedirectsData); LineIterator iterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8); long linecount = 0; while (iterator.hasNext()) { String line = iterator.nextLine(); if (showInitProgress && linecount++ % 100000 == 0) System.out.println("loading the latest redirects; linecount=" + linecount); String[] parts = StringUtils.split(line, '\t'); String src = parts[0].trim().replace(' ', '_'); String trg = parts[1].trim().replace(' ', '_'); if (useBloomFilter) redirectFilter.put(src); else redirects.put(src, trg); } iterator.close(); } redirects = Collections.unmodifiableMap(redirects); if (showInitProgress) System.out.println( "Done - Loading the most recent redirect pages from Wikipedia to normalize the output links to the latest version"); }
/** * Reads the precomputed md5 digest out of a .md5 file (firehose). Assume the file only contains * one line wit checksum. * * @param file File * @return String * @throws Exception */ @Override public String getPrecomputedMD5Digest(File file) throws Exception { if (LOG.isInfoEnabled()) { LOG.info("getPrecomputedMD5Digest(): " + file.getCanonicalPath()); } String toReturn = ""; LineIterator it = org.apache.commons.io.FileUtils.lineIterator(file); try { while (it.hasNext()) { String content = it.nextLine(); if (content.split(" ").length == 2) { toReturn = content.split(" ")[0].toUpperCase(); } } } finally { LineIterator.closeQuietly(it); } // outta here return toReturn; }
@Override public List<Date> read(java.io.Reader reader, String separator) throws Exception { List<Date> dates = new ArrayList<Date>(); try { SimpleDateFormat sdf = new SimpleDateFormat(FORMATO_DATA); sdf.setTimeZone(TimeZone.getTimeZone("GMT")); LineIterator it = IOUtils.lineIterator(reader); while (it.hasNext()) { String[] line = it.nextLine().split(separator); for (int i = 0; i < line.length; i++) { if (i < (line.length - 1)) { try { dates.add(sdf.parse(line[i])); } catch (ParseException e) { break; } } } } } finally { IOUtils.closeQuietly(reader); } return dates; }
/** * Copies the next line of the input to the output. * * @param output output * @param lineIterator input */ private void copyLine(final StringBuilder output, final LineIterator lineIterator) { output.append(lineIterator.nextLine()); output.append("\n"); }
/** * 加载压缩后的句子,按类别组织 * * @param count: 每个类别下选取的句子数量 * @return * @throws IOException */ private Map<String, ClustItem> loadSentences(int count) throws IOException { Map<String, ClustItem> clustedSentences = new HashMap<String, ClustItem>(); Pattern pattern = Pattern.compile("(classes_\\d+):"); try { log.info( "Loading msc file[" + this.workDir + "/" + GlobalConstant.DIR_SENTENCES_COMPRESSION + "/" + this.filename + "]"); LineIterator lineIterator = FileUtils.lineIterator( FileUtils.getFile( this.workDir + '/' + GlobalConstant.DIR_SENTENCES_COMPRESSION, this.filename), GlobalConstant.DEFAULT_CHARSET.toString()); String currentKey = ""; int sentCount = 0; // 存储当前选择的句子数 int totalCount = 0; // 总句子数 while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); Matcher matcher = pattern.matcher(line); if (matcher.find()) { // 当前为classes_ currentKey = matcher.group(1); ClustItem clustItem = new ClustItem(); clustItem.setName(currentKey); clustedSentences.put(currentKey, clustItem); totalCount += sentCount; sentCount = 0; } else { ClustItem ci = clustedSentences.get(currentKey); ci.setSize(ci.getSize() + 1); if (sentCount > count) { continue; } List<Pair<Float, String>> sentences = ci.getSentences(); if (null == sentences) { sentences = new ArrayList<Pair<Float, String>>(); ci.setSentences(sentences); } // 将score#sentence转换成(score, sentence) int flagNum = line.indexOf("#"); sentences.add( new Pair<Float, String>( Float.parseFloat(line.substring(0, flagNum)), line.substring(flagNum + 1))); ++sentCount; } } log.info("Load msc file finished[sentence count:" + totalCount + "]"); } catch (IOException e) { log.error( "Load msc file[" + this.workDir + "/" + GlobalConstant.DIR_SENTENCES_COMPRESSION + "/" + this.filename + "] error!", e); throw e; } return clustedSentences; }
/** * Parses the user item rating data and stores them into collections. * * @param filePath */ @SuppressWarnings("unused") private void parseDataIntoItemUserMatrix(String filePath) { File file = new File(filePath); int i = 0; if (file.isFile()) { try { Charset charset = Charset.forName("UTF-8"); LineIterator content = FileUtils.lineIterator(file); String productId = null; String productTitle = null; String profileName = null; String profileId = null; Double score = null; while (content.hasNext()) { String line = content.nextLine(); if (line.startsWith("product/productId:")) { productId = line.split("product/productId:")[1].trim(); } else if (line.startsWith("product/title:")) { productTitle = line.split("product/title:")[1].trim(); } else if (line.startsWith("review/userId:")) { profileId = line.split("review/userId:")[1].trim(); if (profileId.trim().equals("unknown")) { profileId = profileId.concat("" + i).trim(); } } else if (line.startsWith("review/profileName:")) { profileName = line.split("review/profileName:")[1].trim(); } else if (line.startsWith("review/score:")) { score = Double.valueOf(line.split("review/score:")[1].trim()); } else if (line.startsWith("review/text:")) { if (score != null && score > 3.0) { if (itemUserMatrixRelevant.containsKey(productId)) { ArrayList<String> userIdForItem = itemUserMatrixRelevant.get(productId); if (!userIdForItem.contains(profileId)) { userIdForItem.add(profileId); } itemUserMatrixRelevant.put(productId, userIdForItem); } else { ArrayList<String> userIdForItem = new ArrayList<String>(); userIdForItem.add(profileId); itemUserMatrixRelevant.put(productId, userIdForItem); } if (userItemMatrixRelevant.containsKey(profileId)) { ArrayList<String> userIdForItem = userItemMatrixRelevant.get(profileId); if (!userIdForItem.contains(productId)) { userIdForItem.add(productId); } userItemMatrixRelevant.put(profileId, userIdForItem); } else { ArrayList<String> userIdForItem = new ArrayList<String>(); userIdForItem.add(productId); userItemMatrixRelevant.put(profileId, userIdForItem); } } else { if (itemUserMatrixNonRelevant.containsKey(productId)) { ArrayList<String> userIdForItem = itemUserMatrixNonRelevant.get(productId); if (!userIdForItem.contains(profileId)) { userIdForItem.add(profileId); } itemUserMatrixNonRelevant.put(productId, userIdForItem); } else { ArrayList<String> userIdForItem = new ArrayList<String>(); userIdForItem.add(profileId); itemUserMatrixNonRelevant.put(productId, userIdForItem); } } if (userItemMatrix.containsKey(profileId)) { ArrayList<String> itemForUserId = userItemMatrix.get(profileId); if (!itemForUserId.contains(productId)) { itemForUserId.add(productId); } userItemMatrix.put(profileId, itemForUserId); } else { ArrayList<String> itemForUserId = new ArrayList<String>(); itemForUserId.add(productId); userItemMatrix.put(profileId, itemForUserId); } i++; if (productId.equals("B00006690A")) { System.out.println(productId); } itemDetails.put(productId, productTitle); userDetails.put(profileId, profileName); } } removeNoise(); if (itemDetails.containsKey("B00006690A")) { System.out.println(productId); } } catch (IOException e) { e.printStackTrace(); } } System.out.println( "total size of the itemUserMatrixRelevant is " + itemUserMatrixRelevant.size()); System.out.println( "total size of the itemUserMatrixNonRelevant is " + itemUserMatrixNonRelevant.size()); System.out.println("total size of the userItemMatrix is " + userItemMatrix.size()); System.out.println("total size of ratings for all items " + i); }
public int run() { int rt = -1; String step = leadsfile.split("_")[2]; if (!"01".equals(step)) { logger.info( "Leads setp :" + step + " skip leadsInsert", leadsfile, "", "2-1", "", "名单营销波次为:" + step + ",跳过LeadsInsert."); return 0; } LineIterator it = null; JDBCExecute jdbcExecute = new JDBCExecute( conf.getProperty("leads.insert.db.driverClass"), conf.getProperty("leads.insert.db.databaseURL"), new KeyMapResource().get(conf.getProperty("leads.insert.db.databaseUser")), new KeyMapResource().get(conf.getProperty("leads.insert.db.databasePassword"))); String getCampSQL = conf.getProperty("leads.insert.get.camp.sql"); // JDBCExecute jdbcExecute1 = new JDBCExecute( // conf.getProperty("leads.insert.dwdb.driverClass"), // conf.getProperty("leads.insert.dwdb.databaseURL"), // new KeyMapResource().get(conf.getProperty("leads.insert.dwdb.databaseUser")), // new // KeyMapResource().get(conf.getProperty("leads.insert.dwdb.databasePassword")) // ); // logger.info("LeadsInsert.getCampSQL:"+getCampSQL, // leadsfile, // "", // "2-1", // "", // "LeadsInsert.getCampSQL:"+getCampSQL); List<String[]> sqls = new ArrayList<String[]>(); List<String> file_sqls = new ArrayList<String>(); try { File readfile = new File(conf.getProperty("leads.insert.readpath") + leadsfile); File outfile = new File(conf.getProperty("leads.insert.savepath") + insertfile); String tableName = conf.getProperty("leads.insert.db.table.name"); logger.info( "Leads Insert Table Name:" + tableName, leadsfile, "", "2-1", "", "名单要插入的表为:" + tableName); // INSERT INTO ABC(C1,C1) VALUES ('1','2'); StringBuffer sqlColumnBuffer = new StringBuffer("INSERT INTO"); sqlColumnBuffer.append(" "); sqlColumnBuffer.append(tableName); sqlColumnBuffer.append("("); StringBuffer sqlValuesBuffer = new StringBuffer("VALUES ("); int i = 1; String tempStr = conf.getProperty("leads.insert.column." + i); String[] tempArr; List<InsertColumn> insertColumns = new ArrayList<InsertColumn>(); InsertColumn ic; while (tempStr != null) { ic = new InsertColumn(); tempArr = tempStr.split("\\|", -1); // System.out.println(tempStr); if (i != 1) { sqlColumnBuffer.append(","); } ic.setName(tempArr[0]); ic.setLctype(tempArr[1]); ic.setExpstr(tempArr[2]); insertColumns.add(ic); // System.out.println(tempArr[0]); sqlColumnBuffer.append(tempArr[0]); i++; tempStr = conf.getProperty("leads.insert.column." + i); // System.out.println(tempArr); } sqlColumnBuffer.append(") "); it = FileUtils.lineIterator(readfile, conf.getProperty("leads.insert.file.encoding")); String[] sql; String line; String[] file_columns; String[] oneObjects = null; String value; String selectSQL = conf.getProperty("leads.insert.check.sql"); String newSelectSQL; boolean isOneQuery = false; while (it.hasNext()) { line = it.nextLine(); file_columns = line.split("\\|", -1); if (!isOneQuery) { // 文件读取循环开始时,执行一次。 String tempsql = SqlStrUtils.toSQL(getCampSQL, file_columns); logger.info( "LeadsInsert.getCampSQL:" + tempsql.replaceAll("\'", ""), leadsfile, "", "2-1", "", "LeadsInsert.getCampSQL:" + tempsql.replaceAll("\'", "")); oneObjects = jdbcExecute.getOneObject(tempsql); isOneQuery = true; } for (int j = 0; j < insertColumns.size(); j++) { // int index : indexStrs){ // indexStrs[j]; if (j != 0) { sqlValuesBuffer.append(","); } value = toValue(oneObjects, file_columns, insertColumns.get(j)); if (value != null && !value.equals("")) { // toValue(camp_resultSet,file_columns,insertColumns.get(j)) sqlValuesBuffer.append("'" + value + "'"); } else { sqlValuesBuffer.append("NULL"); } } sqlValuesBuffer.append(")"); newSelectSQL = toExpstrValue(selectSQL, file_columns); sql = new String[2]; sql[0] = sqlColumnBuffer.toString() + sqlValuesBuffer.toString(); sql[1] = newSelectSQL; file_sqls.add(sql[0]); sqls.add(sql); sqlValuesBuffer.delete(8, sqlValuesBuffer.length()); } // FileUtils.writeLines(outfile,sqls,conf.getProperty("leads.insert.file.encoding")); FileUtils.writeLines(outfile, conf.getProperty("leads.insert.file.encoding"), file_sqls); jdbcExecute.execute(sqls); logger.info( "Leads Insert Table[" + tableName + "] success. row count:" + sqls.size(), leadsfile, "", "2-1", "", "名单要插入的表[" + tableName + "]完成!插入笔数:" + sqls.size()); logger.info( "Leads Insert run success!gen .SQL file:" + insertfile + " ", leadsfile, "", "2-1", "LINST00001", "LeadsInsert运行成功!生成 " + insertfile + " 完成!"); rt = 0; // List<String[]> leadsList = new ArrayList<String[]>(); } catch (Exception e) { if (e instanceof FileNotFoundException) { rt = 805; logger.error( e.getMessage().replaceAll("\'", ""), leadsfile, "", "2-1", "805", "没有找到指定文件!" + e.getMessage().replaceAll("\'", "")); } else if (e instanceof SQLIntegrityConstraintViolationException) { rt = 815; logger.error( e.getMessage().replaceAll("\'", ""), leadsfile, "", "2-1", "815", "插入数据库发生异常!" + e.getMessage().replaceAll("\'", "")); } else { rt = 809; e.printStackTrace(); logger.error( "leadsInsert error:" + e.getMessage().replaceAll("\'", ""), leadsfile, "", "2-1", "809", "程序发生异常!" + e.getMessage().replaceAll("\'", "")); } } finally { LineIterator.closeQuietly(it); jdbcExecute.closeConnection(); } return rt; }
private static void processNormalizedIntraContactInformation( String fileToRead, String baseName, String normMethod, String chrSmaller, ArrayList<DesiredChrContact> contactsToCheck, String resolution, double minValue, TextFile outWriter) throws IOException { // ReadIn normalization chr1 TextFile inputNormChr1 = new TextFile( baseName + "\\chr" + chrSmaller + "_" + resolution + "." + normMethod, TextFile.R); ArrayList<String> normFactorSmallerChr = inputNormChr1.readAsArrayList(); inputNormChr1.close(); // System.out.println("Done reading norm factor 1"); if (!Gpio.exists(fileToRead + ".sorted")) { umcg.genetica.io.chrContacts.SortIntraChrContacts.readNonSortedWriteSorted( fileToRead, fileToRead + ".sorted"); } int numberToBeMatched = 0; LineIterator it = FileUtils.lineIterator(new File(fileToRead + ".sorted"), "UTF-8"); try { while (it.hasNext()) { String[] parts = StringUtils.split(it.nextLine(), '\t'); int posChr1 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[0]); int posChr2 = org.apache.commons.lang.math.NumberUtils.createInteger(parts[1]); while (numberToBeMatched < contactsToCheck.size()) { if (posChr1 < contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { break; } else if (posChr1 == contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { if (posChr2 < contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { break; } if (posChr2 == contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { String factor1Base = normFactorSmallerChr.get((posChr1 / getNumericResolution(resolution)) + 1); String factor2Base = normFactorSmallerChr.get((posChr2 / getNumericResolution(resolution)) + 1); double factor1; double factor2; if (StringUtils.isNumeric(factor1Base) && StringUtils.isNumeric(factor2Base)) { factor1 = org.apache.commons.lang.math.NumberUtils.createDouble(factor1Base); factor2 = org.apache.commons.lang.math.NumberUtils.createDouble(factor2Base); double contact = org.apache.commons.lang.math.NumberUtils.createDouble(parts[2]) / (factor1 * factor2); if (contact >= minValue) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\tContact\t" + contact + "\t" + org.apache.commons.lang.math.NumberUtils.createDouble(parts[2])); numberToBeMatched++; } else { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else { System.out.println("Error in files."); numberToBeMatched++; } } else if (posChr2 > contactsToCheck.get(numberToBeMatched).getChrLocationLarger()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } else if (posChr1 > contactsToCheck.get(numberToBeMatched).getChrLocationSmaller()) { outWriter.writeln( contactsToCheck.get(numberToBeMatched).getSnpName() + "\t" + contactsToCheck.get(numberToBeMatched).getProbeName() + "\t" + posChr1 + "\t" + posChr2 + "\t-\t-\t-"); numberToBeMatched++; } } } } finally { LineIterator.closeQuietly(it); } }
/** * Runs a MAF file through the Oncotator and OMA tools. * * @param inputMAFURL String * @param outputMAFURL String * @throws Exception */ @Override public void oncotateMAF(String inputMAFURL, String outputMAFURL) throws Exception { // sanity check if (inputMAFURL == null || inputMAFURL.length() == 0 || outputMAFURL == null || outputMAFURL.length() == 0) { throw new IllegalArgumentException( "oncotateMAFdownloadFile(): url or urlDestination argument is null..."); } URL inputMAF = new URL(inputMAFURL); URL outputMAF = new URL(outputMAFURL); // determine if we have to call liftover boolean cleanOncotatorInputFile = false; File oncotatorInputFile = new File(inputMAF.getFile()); org.apache.commons.io.LineIterator it = org.apache.commons.io.FileUtils.lineIterator(oncotatorInputFile); it.nextLine(); // skip header String[] parts = it.nextLine().split("\t"); if (parts[3].contains("36") || parts[3].equals("hg18")) { it.close(); File liftoverInputFile = org.apache.commons.io.FileUtils.getFile( org.apache.commons.io.FileUtils.getTempDirectory(), "liftoverInputFile"); org.apache.commons.io.FileUtils.copyFile(oncotatorInputFile, liftoverInputFile); oncotatorInputFile = new File(inputMAF.getFile()); // call lift over if (LOG.isInfoEnabled()) { LOG.info("oncotateMAF(), calling Hg18ToHg19..."); } Hg18ToHg19.driver( liftoverInputFile.getCanonicalPath(), oncotatorInputFile.getCanonicalPath(), getLiftOverBinary(), getLiftOverChain()); org.apache.commons.io.FileUtils.forceDelete(liftoverInputFile); cleanOncotatorInputFile = true; } // create a temp output file from the oncotator File oncotatorOutputFile = org.apache.commons.io.FileUtils.getFile( org.apache.commons.io.FileUtils.getTempDirectory(), "oncotatorOutputFile"); // call oncotator if (LOG.isInfoEnabled()) { LOG.info("oncotateMAF(), calling OncotateTool..."); } OncotateTool.driver( oncotatorInputFile.getCanonicalPath(), oncotatorOutputFile.getCanonicalPath(), true, true, true); // we call OMA here - // we use output from oncotator as input file if (LOG.isInfoEnabled()) { LOG.info("oncotateMAF(), calling MutationAssessorTool..."); } File outputMAFFile = new File(outputMAF.getFile()); outputMAFFile.createNewFile(); MutationAssessorTool.driver( oncotatorOutputFile.getCanonicalPath(), outputMAFFile.getCanonicalPath(), false, true, true); // clean up org.apache.commons.io.FileUtils.forceDelete(oncotatorOutputFile); if (cleanOncotatorInputFile) org.apache.commons.io.FileUtils.forceDelete(oncotatorInputFile); }
/** * Get the case list from the staging file. * * @param caseIDs CaseIDs; * @param portalMetadata PortalMetadata * @param cancerStudyMetadata CancerStudyMetadata * @param stagingFilename String * @return List<String> * @throws Exception */ @Override public List<String> getCaseListFromStagingFile( CaseIDs caseIDs, PortalMetadata portalMetadata, CancerStudyMetadata cancerStudyMetadata, String stagingFilename) throws Exception { if (LOG.isInfoEnabled()) { LOG.info("getCaseListFromStagingFile(): " + stagingFilename); } // we use set here HashSet<String> caseSet = new HashSet<String>(); // staging file File stagingFile = org.apache.commons.io.FileUtils.getFile( portalMetadata.getStagingDirectory(), cancerStudyMetadata.getStudyPath(), stagingFilename); // sanity check if (!stagingFile.exists()) { return new ArrayList<String>(); } // iterate over all rows in file org.apache.commons.io.LineIterator it = org.apache.commons.io.FileUtils.lineIterator(stagingFile); try { int mafCaseIDColumnIndex = 0; boolean processHeader = true; while (it.hasNext()) { // create a string list from row in file List<String> thisRow = Arrays.asList(it.nextLine().split(Converter.VALUE_DELIMITER)); // is this the header file? if (processHeader) { // look for MAF file case id column header mafCaseIDColumnIndex = thisRow.indexOf(Converter.MUTATION_CASE_ID_COLUMN_HEADER); // this is not a MAF file, header contains the case ids, return here if (mafCaseIDColumnIndex == -1) { for (String potentialCaseID : thisRow) { if (caseIDs.isTumorCaseID(potentialCaseID)) { caseSet.add(caseIDs.convertCaseID(potentialCaseID)); } } break; } processHeader = false; continue; } // we want to add the value at mafCaseIDColumnIndex into return set - this is a case ID String potentialCaseID = thisRow.get(mafCaseIDColumnIndex); if (caseIDs.isTumorCaseID(potentialCaseID)) { caseSet.add(caseIDs.convertCaseID(potentialCaseID)); } } } finally { it.close(); } // outta here return new ArrayList<String>(caseSet); }