public static void main(String[] args) throws IOException { String workDir = "E:/dev_workspace/tmp/workspace/duc2007"; String idfFilename = "duc2007.idf"; final double TOTAL_PAGE_COUNT = 30000000000.0D; Map<String, Double> idfValues = new HashMap<String, Double>(); File idfFIle = FileUtils.getFile(workDir + "/" + DIR_IDF_FILE, idfFilename); log.info("Loading idf value file[" + idfFIle.getAbsolutePath() + "]"); LineIterator lineIterator = null; try { lineIterator = FileUtils.lineIterator(idfFIle, DEFAULT_CHARSET.toString()); while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); String[] strs = line.split("###"); if (strs.length != 2) { log.warn("Line[" + line + "] format is illegal, ignore it!"); continue; } idfValues.put(strs[0].trim(), Long.parseLong(strs[1]) / TOTAL_PAGE_COUNT); } log.info("Load idf value file[" + idfFIle.getAbsolutePath() + "] finished!"); } catch (IOException e) { log.error("Load idf value file[" + idfFIle.getAbsolutePath() + "] error!", e); throw e; } finally { if (lineIterator != null) { lineIterator.close(); } } String question = "Describe the legal battle between various recording artists and members of the record industry and the Internet music site Napster. What support, or lack thereof, have the litigants received?"; EhCacheUtil ehCacheUtil = new EhCacheUtil("db_cache_vec", "lab"); SummaryBuilderByVector summaryBuilder = new SummaryBuilderByVector( workDir, "0", "D0714D.txt", 10, idfValues, question, ehCacheUtil, 1.0f, 1.6f); ExecutorService es = Executors.newSingleThreadExecutor(); Future<Boolean> future = es.submit(summaryBuilder); try { future.get(); } catch (InterruptedException | ExecutionException e) { e.printStackTrace(); } es.shutdown(); EhCacheUtil.close(); }
public WriterOutputStream(Writer out) { this.writer = out; decoder = DEFAULT_CHARSET.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }