private InstanceList readFile() throws IOException { String NL = System.getProperty("line.separator"); Scanner scanner = new Scanner(new FileInputStream(fileName), encoding); ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}\\p{L}+"))); pipeList.add(new TokenSequence2FeatureSequence()); InstanceList testing = new InstanceList(new SerialPipes(pipeList)); try { while (scanner.hasNextLine()) { String text = scanner.nextLine(); text = text.replaceAll("\\x0d", ""); Pattern patten = Pattern.compile("^(.*?),(.*?),(.*)$"); Matcher matcher = patten.matcher(text); if (matcher.find()) { docIds.add(matcher.group(1)); testing.addThruPipe(new Instance(matcher.group(3), null, "test instance", null)); } } } finally { scanner.close(); } return testing; }
private InstanceList generateInstanceList() throws Exception { ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}\\p{L}+"))); pipeList.add(new TokenSequence2FeatureSequence()); Reader fileReader = new InputStreamReader(new FileInputStream(new File(fileName)), "UTF-8"); InstanceList instances = new InstanceList(new SerialPipes(pipeList)); instances.addThruPipe( new CsvIterator( fileReader, Pattern.compile("^(\\S*)[\\s,]*(\\S*)[\\s,]*(.*)$"), 3, 2, 1)); // data, label, name fields return instances; }
public void test() throws Exception { ParallelTopicModel model = ParallelTopicModel.read(new File(inferencerFile)); TopicInferencer inferencer = model.getInferencer(); ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}\\p{L}+"))); pipeList.add(new TokenSequence2FeatureSequence()); InstanceList instances = new InstanceList(new SerialPipes(pipeList)); Reader fileReader = new InputStreamReader(new FileInputStream(new File(fileName)), "UTF-8"); instances.addThruPipe( new CsvIterator( fileReader, Pattern.compile("^(\\S*)[\\s,]*(\\S*)[\\s,]*(.*)$"), 3, 2, 1)); // data, label, name fields double[] testProbabilities = inferencer.getSampledDistribution(instances.get(1), 10, 1, 5); for (int i = 0; i < 1000; i++) System.out.println(i + ": " + testProbabilities[i]); }