public static void generateData1() throws Exception { System.out.println("generate data."); Indexer<String> wordIndexer = new Indexer<String>(); TextFileWriter writer = new TextFileWriter(DATA_FILE); List<SparseVector> data = new ArrayList<SparseVector>(); TextFileReader reader = new TextFileReader(MIRPath.TREC_CDS_QUERY_DOC_FILE); while (reader.hasNext()) { List<String> lines = reader.getNextLines(); List<SparseVector> svs = new ArrayList<SparseVector>(); int qid = -1; for (int i = 0; i < lines.size(); i++) { String line = lines.get(i); String[] parts = line.split("\t"); double relevance = -1; if (i == 0) { qid = Integer.parseInt(parts[1]); } else { relevance = Double.parseDouble(parts[1]); } StrCounter c = new StrCounter(); String[] toks = parts[2].split(" "); for (int j = 0; j < toks.length; j++) { String[] two = StrUtils.split2Two(":", toks[j]); c.incrementCount(two[0], Double.parseDouble(two[1])); } SparseVector sv = VectorUtils.toSparseVector(c, wordIndexer, true); if (i > 0) { sv.setLabel((int) relevance); } svs.add(sv); } SparseVector q = svs.get(0); for (int i = 1; i < svs.size(); i++) { SparseVector d = svs.get(i); SparseVector qd = VectorMath.add(q, d); qd.setLabel(d.label()); data.add(qd); } } reader.close(); writer.close(); SparseVector.write(DATA_FILE, data); IOUtils.write(WORD_INDEXER_FILE, wordIndexer); }