@Override public Collection<Writable> next() { Text t = (Text) super.next().iterator().next(); String val = new String(t.getBytes()); Collection<Writable> ret = new ArrayList<>(); StringTokenizer tok; int index, max; String col; double value; // actual data try { // determine max index max = 0; tok = new StringTokenizer(val, " \t"); tok.nextToken(); // skip class while (tok.hasMoreTokens()) { col = tok.nextToken(); // finished? if (col.startsWith("#")) break; // qid is not supported if (col.startsWith("qid:")) continue; // actual value index = Integer.parseInt(col.substring(0, col.indexOf(":"))); if (index > max) max = index; } // read values into array tok = new StringTokenizer(val, " \t"); // 1. class double classVal = Double.parseDouble(tok.nextToken()); int numRecordsAdded = 0; // 2. attributes while (tok.hasMoreTokens()) { col = tok.nextToken(); // finished? if (col.startsWith("#")) break; // qid is not supported if (col.startsWith("qid:")) continue; // actual value index = Integer.parseInt(col.substring(0, col.indexOf(":"))); if (index > numRecordsAdded) { int totalDiff = Math.abs(numRecordsAdded - index); for (int i = numRecordsAdded; i < index; i++) { ret.add(new DoubleWritable(0.0)); } numRecordsAdded += totalDiff; } value = Double.parseDouble(col.substring(col.indexOf(":") + 1)); ret.add(new DoubleWritable(value)); numRecordsAdded++; } if (numAttributes >= 1 && ret.size() < numAttributes) { int totalDiff = Math.abs(ret.size() - numAttributes); for (int i = 0; i < totalDiff; i++) { ret.add(new DoubleWritable(0.0)); } } ret.add(new DoubleWritable(classVal)); } catch (Exception e) { log.error("Error parsing line '" + val + "': ", e); } return ret; }