Beispiel #1
0
  private InstanceList readFile() throws IOException {

    String NL = System.getProperty("line.separator");
    Scanner scanner = new Scanner(new FileInputStream(fileName), encoding);

    ArrayList<Pipe> pipeList = new ArrayList<Pipe>();
    pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}\\p{L}+")));
    pipeList.add(new TokenSequence2FeatureSequence());

    InstanceList testing = new InstanceList(new SerialPipes(pipeList));

    try {
      while (scanner.hasNextLine()) {

        String text = scanner.nextLine();
        text = text.replaceAll("\\x0d", "");

        Pattern patten = Pattern.compile("^(.*?),(.*?),(.*)$");
        Matcher matcher = patten.matcher(text);

        if (matcher.find()) {
          docIds.add(matcher.group(1));
          testing.addThruPipe(new Instance(matcher.group(3), null, "test instance", null));
        }
      }
    } finally {
      scanner.close();
    }

    return testing;
  }