/** runs the GB features */ public void runGB() { MTOutputProcessor mtop = null; if (gbMode == 1) gbXML = initialiseGBResources(); String nbestSentPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + "temp"; String ngramExecPath = resourceManager.getString("tools.ngram.path"); mtop = new MTOutputProcessor(gbXML, nbestSentPath, ngramExecPath, ngramSize); // MorphAnalysisProcessor map = new MorphAnalysisProcessor(madaFile); File f = new File(sourceFile); String sourceFileName = f.getName(); f = new File(targetFile); String targetFileName = f.getName(); String outputFileName = sourceFileName + "_to_" + targetFileName + ".out"; String out = resourceManager.getString("output") + File.separator + getMod() + outputFileName; System.out.println("Output will be: " + out); String lineTarget; try { BufferedReader brSource = new BufferedReader(new FileReader(sourceFile)); BufferedReader brTarget = new BufferedReader(new FileReader(targetFile)); BufferedWriter output = new BufferedWriter(new FileWriter(out)); ResourceManager.printResources(); Sentence targetSent; Sentence sourceSent; int sentCount = 0; String lineSource; while (((lineSource = brSource.readLine()) != null) && ((lineTarget = brTarget.readLine()) != null)) { lineSource = lineSource.trim().substring(lineSource.indexOf(" ")); sourceSent = new Sentence(lineSource, sentCount); targetSent = new Sentence(lineTarget, sentCount); // map.processNextSentence(sourceSent); mtop.processNextSentence(sourceSent); ++sentCount; output.write(featureManager.runFeatures(sourceSent, targetSent)); output.write("\r\n"); } brSource.close(); brTarget.close(); output.close(); featureManager.printFeatureIndeces(); Logger.close(); } catch (Exception e) { e.printStackTrace(); } }
public static void main(String args[]) throws IOException, ParseException { Options options = new Options(); options.addOption("u", "uniquehits", false, "only output hits with a single mapping"); options.addOption( "s", "nosuboptimal", false, "do not include hits whose score is not equal to the best score for the read"); CommandLineParser parser = new GnuParser(); CommandLine cl = parser.parse(options, args, false); boolean uniqueOnly = cl.hasOption("uniquehits"); boolean filterSubOpt = cl.hasOption("nosuboptimal"); ArrayList<String[]> lines = new ArrayList<String[]>(); String line; String lastRead = ""; BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); while ((line = reader.readLine()) != null) { String pieces[] = line.split("\t"); if (!pieces[0].equals(lastRead)) { printLines(lines, uniqueOnly, filterSubOpt); lines.clear(); } lines.add(pieces); lastRead = pieces[0]; } printLines(lines, uniqueOnly, filterSubOpt); }
public static String[] generateFileNamesFromOptions(CommandLine cli) { String[] filenames = cli.getArgs(); List<String> fileList = new ArrayList<String>(filenames.length); boolean errors = false; for (String filename : filenames) { if (filename.startsWith("@")) { try { BufferedReader br = new BufferedReader(new FileReader(filename.substring(1))); String file; while ((file = br.readLine()) != null) { fileList.add(file); } } catch (IOException ioe) { System.err.println("error: file not readable: " + filename.substring(1)); errors = true; } } else { fileList.add(filename); } } if (errors) { return null; } else { return fileList.toArray(new String[fileList.size()]); } }
private Map<String, List<String>> loadHistoSectors(String sectorFile) { FileReader input; Map<String, List<String>> sectors = new HashMap<String, List<String>>(); try { input = new FileReader(sectorFile); BufferedReader bufRead = new BufferedReader(input); String line; int i = 1; while ((line = bufRead.readLine()) != null) { Bin sectorRecord = Utils.readBin(line); List<String> stockList = sectorRecord.symbols; String startEnd = formatter.format(sectorRecord.end); String key = intgerFormaterr.format(i) + ":" + startEnd; sectors.put(key, stockList); for (String s : stockList) { invertedSectors.put(s, key); } i++; } } catch (IOException e) { throw new RuntimeException("Failed to load sector file", e); } return sectors; }
private Map<String, Integer> loadFixedClasses(String file) { FileReader input; try { Map<Integer, List<String>> fixedClaszzes = new HashMap<Integer, List<String>>(); Map<String, Integer> invertedFixedClasses = new HashMap<String, Integer>(); File f = new File(file); if (!f.exists()) { System.out.println("Extra classes file doesn't exist: " + fixedClassesFile); return invertedFixedClasses; } input = new FileReader(f); BufferedReader bufRead = new BufferedReader(input); String line; while ((line = bufRead.readLine()) != null) { String parts[] = line.split(","); int clazz = Integer.parseInt(parts[0]); List<String> symbols = new ArrayList<String>(); symbols.addAll(Arrays.asList(parts).subList(1, parts.length)); fixedClaszzes.put(clazz, symbols); } for (Map.Entry<Integer, List<String>> e : fixedClaszzes.entrySet()) { for (String s : e.getValue()) { invertedFixedClasses.put(s, e.getKey()); } } return invertedFixedClasses; } catch (IOException e) { e.printStackTrace(); } return null; }
private void applyLabel(String inPointsFile, String outPointsFile, List<String> symbols) { System.out.println("Applying labels for points file: " + inPointsFile); FileReader input; BufferedWriter bufWriter = null; try { FileOutputStream fos = new FileOutputStream(outPointsFile); bufWriter = new BufferedWriter(new OutputStreamWriter(fos)); File inFile = new File(inPointsFile); if (!inFile.exists()) { System.out.println("ERROR: In file doens't exist"); return; } input = new FileReader(inPointsFile); BufferedReader bufRead = new BufferedReader(input); String inputLine; int index = 0; while ((inputLine = bufRead.readLine()) != null && index < symbols.size()) { Point p = Utils.readPoint(inputLine); String symbol = symbols.get(index); int clazz = 0; if (this.invertedFixedClases.containsKey(symbol)) { clazz = this.invertedFixedClases.get(symbol); } else { // get the corresponding symbol // get the class for this one String sector = invertedSectors.get(symbol); if (sector != null) { clazz = sectorToClazz.get(sector); } else { // System.out.println("No sector: " + symbol); } } p.setClazz(clazz); String s = p.serialize(); bufWriter.write(s); bufWriter.newLine(); index++; } System.out.println("Read lines: " + index); } catch (Exception e) { throw new RuntimeException("Failed to read/write file", e); } finally { if (bufWriter != null) { try { bufWriter.close(); } catch (IOException ignore) { } } } }
public static String getLicensee() { InputStream is = Q2.class.getResourceAsStream(LICENSEE); ByteArrayOutputStream baos = new ByteArrayOutputStream(); if (is != null) { BufferedReader br = new BufferedReader(new InputStreamReader(is)); PrintStream p = new PrintStream(baos); p.println(); p.println(); try { while (br.ready()) p.println(br.readLine()); } catch (Exception e) { e.printStackTrace(p); } } return baos.toString(); }
private Map<String, List<String>> loadStockSectors(String sectorFile) { FileReader input; Map<String, List<String>> sectors = new HashMap<String, List<String>>(); try { input = new FileReader(sectorFile); BufferedReader bufRead = new BufferedReader(input); String line; while ((line = bufRead.readLine()) != null) { SectorRecord sectorRecord = Utils.readSectorRecord(line); List<String> stockList = sectors.get(sectorRecord.getSector()); if (stockList == null) { stockList = new ArrayList<String>(); sectors.put(sectorRecord.getSector(), stockList); } stockList.add(sectorRecord.getSymbol()); invertedSectors.put(sectorRecord.getSymbol(), sectorRecord.getSector()); } } catch (IOException e) { throw new RuntimeException("Failed to load sector file", e); } return sectors; }
/** runs the BB features */ public void runBB() { File f = new File(sourceFile); String sourceFileName = f.getName(); f = new File(targetFile); String targetFileName = f.getName(); String outputFileName = sourceFileName + "_to_" + targetFileName + ".out"; File file = new File(resourceManager.getString("output")); if (!file.exists()) { System.err.println("Creating dir: " + resourceManager.getString("output")); Logger.log("Creating dir: " + resourceManager.getString("output")); file.mkdirs(); } else { Logger.log("output dir exists: " + resourceManager.getString("output")); } String out = resourceManager.getString("output") + File.separator + outputFileName; System.out.println("Output will be: " + out); String pplSourcePath = resourceManager.getString("input") + File.separator + sourceLang + File.separator + sourceFileName + resourceManager.getString("tools.ngram.output.ext"); String pplTargetPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + targetFileName + resourceManager.getString("tools.ngram.output.ext"); String pplPOSTargetPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + targetFileName + PosTagger.getXPOS() + resourceManager.getString("tools.ngram.output.ext"); runNGramPPL(); FileModel fm = new FileModel(sourceFile, resourceManager.getString(sourceLang + ".corpus")); // FileModel fm = new FileModel(sourceFile, // resourceManager.getString("source" + ".corpus")); PPLProcessor pplProcSource = new PPLProcessor(pplSourcePath, new String[] {"logprob", "ppl", "ppl1"}); PPLProcessor pplProcTarget = new PPLProcessor(pplTargetPath, new String[] {"logprob", "ppl", "ppl1"}); String sourcePosOutput = null; String targetPosOutput = null; PPLProcessor pplPosTarget = null; if (!isBaseline) { sourcePosOutput = runPOS(sourceFile, sourceLang, "source"); targetPosOutput = runPOS(targetFile, targetLang, "target"); String targetPPLPos = runNGramPPLPos(targetPosOutput + PosTagger.getXPOS()); System.out.println("---------TARGET PPLPOS: " + targetPPLPos); pplPosTarget = new PPLProcessor(targetPPLPos, new String[] {"poslogprob", "posppl", "posppl1"}); } loadGiza(); processNGrams(); boolean gl = false; String temp0 = resourceManager.getString("GL"); if (null != temp0 && temp0.equals("1")) { gl = true; } if (gl) { loadGlobalLexicon(); } // Preparing the indices for IR_similarity_features Lucene sourceLuc = null; Lucene targetLuc = null; if (featureManager.hasFeature("1700")) { // The indices reside under lang_resources path String lang_resources = workDir + File.separator + "lang_resources"; // Indices are saved under: luceneIndex folder String source_lucene_path = lang_resources + File.separator + sourceLang + File.separator + "luceneIndex"; // The corpus to index String source_lucene_corpus = source_lucene_path + File.separator + sourceLang + ".corpus"; // System.out.println("SOURCE: " + source_lucene_path + " ||| " + source_lucene_corpus); try { sourceLuc = new Lucene(source_lucene_path, source_lucene_corpus, true, true, "Source"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } String target_lucene_path = lang_resources + File.separator + targetLang + File.separator + "luceneIndex"; String target_lucene_corpus = target_lucene_path + File.separator + targetLang + ".corpus"; // System.out.println("TARGET: " + target_lucene_path + " ||| " + target_lucene_corpus); try { targetLuc = new Lucene(target_lucene_path, target_lucene_corpus, true, true, "Target"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } // MQM kicks in MQMManager.getInstance().initialize(resourceManager); Context context = new Context(); context.setSourceFilePath(sourceFile); context.setTargetFilePath(targetFile); MQMManager.getInstance().globalProcessing(context); try { BufferedReader brSource = new BufferedReader(new FileReader(sourceFile)); BufferedReader brTarget = new BufferedReader(new FileReader(targetFile)); BufferedWriter output = new BufferedWriter(new FileWriter(out)); BufferedReader posSource = null; BufferedReader posTarget = null; boolean posSourceExists = ResourceManager.isRegistered("sourcePosTagger"); boolean posTargetExists = ResourceManager.isRegistered("targetPosTagger"); POSProcessor posSourceProc = null; POSProcessor posTargetProc = null; // lefterav: Berkeley parser modifications start here // Check if user has defined the grammar files for source // and target language // if ( ResourceManager.isRegistered("BParser")){ boolean bp = false; String temp = resourceManager.getString("BP"); if (null != temp && temp.equals("1")) { bp = true; } BParserProcessor sourceParserProcessor = null; BParserProcessor targetParserProcessor = null; if (bp) { sourceParserProcessor = new BParserProcessor(); targetParserProcessor = new BParserProcessor(); sourceParserProcessor.initialize(sourceFile, resourceManager, sourceLang); targetParserProcessor.initialize(targetFile, resourceManager, targetLang); } // } /** BEGIN: Added by Raphael Rubino for the Topic Model Features */ boolean tm = false; String temp1 = resourceManager.getString("TM"); if (temp1 != null && temp1.equals("1")) { tm = true; } TopicDistributionProcessor sourceTopicDistributionProcessor = null; TopicDistributionProcessor targetTopicDistributionProcessor = null; if (tm) { String sourceTopicDistributionFile = resourceManager.getString(sourceLang + ".topic.distribution"); String targetTopicDistributionFile = resourceManager.getString(targetLang + ".topic.distribution"); sourceTopicDistributionProcessor = new TopicDistributionProcessor(sourceTopicDistributionFile, "sourceTopicDistribution"); targetTopicDistributionProcessor = new TopicDistributionProcessor(targetTopicDistributionFile, "targetTopicDistribution"); } /* END: Added by Raphael Rubino for the Topic Model Features */ if (!isBaseline) { if (posSourceExists) { posSourceProc = new POSProcessor(sourcePosOutput); posSource = new BufferedReader( new InputStreamReader(new FileInputStream(sourcePosOutput), "utf-8")); } if (posTargetExists) { posTargetProc = new POSProcessor(targetPosOutput); posTarget = new BufferedReader(new InputStreamReader(new FileInputStream(targetPosOutput))); } } ResourceManager.printResources(); Sentence sourceSent; Sentence targetSent; int sentCount = 0; String lineSource = brSource.readLine(); String lineTarget = brTarget.readLine(); /** Triggers (by David Langlois) */ boolean tr = false; String temp2 = resourceManager.getString("TR"); if (temp2 != null && temp2.equals("1")) { tr = true; } Triggers itl_target = null; TriggersProcessor itl_target_p = null; Triggers itl_source = null; TriggersProcessor itl_source_p = null; // TriggersProcessor itl_source_p = null; Triggers itl_source_target = null; TriggersProcessor itl_source_target_p = null; if (tr) { itl_target = new Triggers( resourceManager.getString("target.intra.triggers.file"), Integer.parseInt(resourceManager.getString("nb.max.triggers.target.intra")), resourceManager.getString("phrase.separator")); itl_target_p = new TriggersProcessor(itl_target); itl_source = new Triggers( resourceManager.getString("source.intra.triggers.file"), Integer.parseInt(resourceManager.getString("nb.max.triggers.source.intra")), resourceManager.getString("phrase.separator")); itl_source_p = new TriggersProcessor(itl_source); itl_source_target = new Triggers( resourceManager.getString("source.target.inter.triggers.file"), Integer.parseInt(resourceManager.getString("nb.max.triggers.source.target.inter")), resourceManager.getString("phrase.separator")); itl_source_target_p = new TriggersProcessor(itl_source_target); } /* * End modification for Triggers */ // read in each line from the source and target files // create a sentence from each // process each sentence // run the features on the sentences while ((lineSource != null) && (lineTarget != null)) { // lineSource = lineSource.trim().substring(lineSource.indexOf(" ")).replace("+", ""); sourceSent = new Sentence(lineSource, sentCount); targetSent = new Sentence(lineTarget, sentCount); // System.out.println("Processing sentence "+sentCount); // System.out.println("SORCE: " + sourceSent.getText()); // System.out.println("TARGET: " + targetSent.getText()); if (posSourceExists) { posSourceProc.processSentence(sourceSent); } if (posTargetExists) { posTargetProc.processSentence(targetSent); } sourceSent.computeNGrams(3); targetSent.computeNGrams(3); pplProcSource.processNextSentence(sourceSent); pplProcTarget.processNextSentence(targetSent); if (!isBaseline) { pplPosTarget.processNextSentence(targetSent); } // lefterav: Parse code here if (bp) { sourceParserProcessor.processNextSentence(sourceSent); targetParserProcessor.processNextSentence(targetSent); } if (tm) { sourceTopicDistributionProcessor.processNextSentence(sourceSent); targetTopicDistributionProcessor.processNextSentence(targetSent); } // modified by David if (tr) { itl_source_p.processNextSentence(sourceSent); itl_target_p.processNextSentence(targetSent); itl_source_target_p.processNextParallelSentences(sourceSent, targetSent); } // end modification by David // MQM kicks in MQMManager.getInstance().processNextParallelSentences(sourceSent, targetSent); // Ergun if (featureManager.hasFeature("1700")) { sourceLuc.processNextSentence(sourceSent); targetLuc.processNextSentence(targetSent); } ++sentCount; output.write(featureManager.runFeatures(sourceSent, targetSent)); output.newLine(); lineSource = brSource.readLine(); lineTarget = brTarget.readLine(); } if (posSource != null) { posSource.close(); } if (posTarget != null) { posTarget.close(); } brSource.close(); brTarget.close(); output.close(); Logger.close(); } catch (Exception e) { e.printStackTrace(); } }
public void runAll() { File f = new File(sourceFile); String sourceFileName = f.getName(); f = new File(targetFile); String targetFileName = f.getName(); String outputFileName = sourceFileName + "_to_" + targetFileName + ".out"; String out = resourceManager.getString("output") + File.separator + outputFileName; System.out.println("Output will be: " + out); MTOutputProcessor mtop = null; if (gbMode == 1) gbXML = initialiseGBResources(); String nbestSentPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + "temp"; String ngramExecPath = resourceManager.getString("tools.ngram.path"); mtop = new MTOutputProcessor(gbXML, nbestSentPath, ngramExecPath, ngramSize); // wlv.mt.features.coherence.Coherence coh = new wlv.mt.features.coherence.Coherence( // getTargetFile()); String pplSourcePath = resourceManager.getString("input") + File.separator + sourceLang + File.separator + sourceFileName + resourceManager.getString("tools.ngram.output.ext"); String pplTargetPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + targetFileName + resourceManager.getString("tools.ngram.output.ext"); String pplPOSTargetPath = resourceManager.getString("input") + File.separator + targetLang + File.separator + targetFileName + PosTagger.getXPOS() + resourceManager.getString("tools.ngram.output.ext"); runNGramPPL(); PPLProcessor pplProcSource = new PPLProcessor(pplSourcePath, new String[] {"logprob", "ppl", "ppl1"}); PPLProcessor pplProcTarget = new PPLProcessor(pplTargetPath, new String[] {"logprob", "ppl", "ppl1"}); FileModel fm = new FileModel(sourceFile, resourceManager.getString(sourceLang + ".corpus")); String sourcePosOutput = runPOS(sourceFile, sourceLang, "source"); String targetPosOutput = runPOS(targetFile, targetLang, "target"); String targetPPLPos = runNGramPPLPos(targetPosOutput + PosTagger.getXPOS()); System.out.println("---------TARGET PPLPOS: " + targetPPLPos); PPLProcessor pplPosTarget = new PPLProcessor(targetPPLPos, new String[] {"poslogprob", "posppl", "posppl1"}); loadGiza(); processNGrams(); try { BufferedReader brSource = new BufferedReader(new FileReader(sourceFileName)); BufferedReader brTarget = new BufferedReader(new FileReader(targetFileName)); BufferedWriter output = new BufferedWriter(new FileWriter(out)); BufferedReader posSource = null; BufferedReader posTarget = null; boolean posSourceExists = ResourceManager.isRegistered("sourcePosTagger"); boolean posTargetExists = ResourceManager.isRegistered("targetPosTagger"); POSProcessor posSourceProc = null; POSProcessor posTargetProc = null; if (posSourceExists) { posSourceProc = new POSProcessor(sourcePosOutput); posSource = new BufferedReader( new InputStreamReader(new FileInputStream(sourcePosOutput), "utf-8")); } if (posTargetExists) { posTargetProc = new POSProcessor(targetPosOutput); posTarget = new BufferedReader(new InputStreamReader(new FileInputStream(targetPosOutput))); } ResourceManager.printResources(); Sentence targetSent; // HACK Sentence sourceSent; int sentCount = 0; // HACK String lineSource = brSource.readLine(); String lineTarget = brTarget.readLine(); // HACK int result; while ((lineSource != null) && (lineTarget != null)) { // the MADA-tokenised files contain start each sentence with the setence ID. We put it there // (why?) - no we've got to remove it lineSource = lineSource.trim().substring(lineSource.indexOf(" ")).replace("+", ""); sourceSent = new Sentence(lineSource, sentCount); targetSent = new Sentence(lineTarget, sentCount); System.out.println("Processing sentence " + sentCount); if (posSourceExists) { posSourceProc.processSentence(sourceSent); } if (posTargetExists) { posTargetProc.processSentence(targetSent); } sourceSent.computeNGrams(3); targetSent.computeNGrams(3); pplProcSource.processNextSentence(sourceSent); pplProcTarget.processNextSentence(targetSent); pplPosTarget.processNextSentence(targetSent); // coh.processNextSentence(targetSent); mtop.processNextSentence(sourceSent); ++sentCount; output.write(featureManager.runFeatures(sourceSent, targetSent)); output.write("\r\n"); lineSource = brSource.readLine(); lineTarget = brTarget.readLine(); } // featureManager.printFeatureIndeces(); if (posSource != null) { posSource.close(); } if (posTarget != null) { posTarget.close(); } brSource.close(); brTarget.close(); output.close(); Logger.close(); } catch (Exception e) { e.printStackTrace(); } }