void readData(DataInput in) throws IOException { int rowNum = in.readInt(); if (rowNum > 0) { int n = in.readInt(); rowIndices = new ArrayList<int[]>(rowNum); if (this.useDictionary == true) { this.dict = new TrieDictionary<String>(); dict.readFields(in); for (int i = 0; i < rowNum; i++) { int[] row = new int[n]; this.rowIndices.add(row); for (int j = 0; j < n; j++) { row[j] = in.readInt(); } } } else { List<String[]> rows = new ArrayList<String[]>(rowNum); TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); for (int i = 0; i < rowNum; i++) { String[] row = new String[n]; rows.add(row); for (int j = 0; j < n; j++) { row[j] = in.readUTF(); if (row[j] != null) b.addValue(row[j]); } } this.dict = b.build(0); for (String[] row : rows) { int[] rowIndex = new int[n]; for (int i = 0; i < n; i++) { rowIndex[i] = dict.getIdFromValue(row[i]); } this.rowIndices.add(rowIndex); } } } }
public void takeSnapshot(ReadableTable table, TableDesc tableDesc) throws IOException { this.signature = table.getSignature(); this.columnDelimeter = table.getColumnDelimeter(); int maxIndex = tableDesc.getMaxColumnIndex(); TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); TableReader reader = table.getReader(); while (reader.next()) { String[] row = reader.getRow(); if (row.length <= maxIndex) { throw new IllegalStateException( "Bad hive table row, " + tableDesc + " expect " + (maxIndex + 1) + " columns, but got " + Arrays.toString(row)); } for (String cell : row) { if (cell != null) b.addValue(cell); } } this.dict = b.build(0); reader = table.getReader(); ArrayList<int[]> allRowIndices = new ArrayList<int[]>(); while (reader.next()) { String[] row = reader.getRow(); int[] rowIndex = new int[row.length]; for (int i = 0; i < row.length; i++) { rowIndex[i] = dict.getIdFromValue(row[i]); } allRowIndices.add(rowIndex); } this.rowIndices = allRowIndices; }