/* Set the output string entry i to the contents of Text object t. * If t is a null object reference, record that the value is a SQL NULL. */ private static void setString(BytesColumnVector outV, int i, Text t) { if (t == null) { outV.noNulls = false; outV.isNull[i] = true; return; } outV.setVal(i, t.getBytes(), 0, t.getLength()); }
@Override public Void call() throws Exception { int batchSize = 1024; VectorUDFDateString udf = new VectorUDFDateString(0, 1); VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize); BytesColumnVector in = new BytesColumnVector(batchSize); LongColumnVector out = new LongColumnVector(batchSize); batch.cols[0] = in; batch.cols[1] = out; for (int i = 0; i < batchSize; i++) { byte[] data = String.format("1999-%02d-%02d", 1 + (i % 12), 1 + (i % 15)).getBytes("UTF-8"); in.setRef(i, data, 0, data.length); in.isNull[i] = false; } udf.evaluate(batch); // bug if it throws an exception return (Void) null; }
@Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; outV.initBuffer(); Text t; if (n == 0) { // Nothing to do return; } // Design Note: In the future, if this function can be implemented // directly to translate input to output without creating new // objects, performance can probably be improved significantly. // It's implemented in the simplest way now, just calling the // existing built-in function. if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; s.set(vector[0], start[0], length[0]); t = func.evaluate(s); setString(outV, 0, t); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; /* Fill output isNull with false for selected elements since there is a chance we'll * convert to noNulls == false in setString(); */ outV.isNull[i] = false; s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } outV.isRepeating = false; } else { // Set all elements to not null. The setString call can override this. Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } outV.isRepeating = false; } } else { // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; if (inputColVector.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inputColVector.isNull[0]; // setString can override this if (!inputColVector.isNull[0]) { s.set(vector[0], start[0], length[0]); t = func.evaluate(s); setString(outV, 0, t); } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; // setString can override this if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } } outV.isRepeating = false; } else { // setString can override this null propagation System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for (int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } } outV.isRepeating = false; } } }
@Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; int[] sel = batch.selected; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; if (n == 0) { // Nothing to do return; } // initialize output vector buffer to receive data outV.initBuffer(); if (inputColVector.noNulls) { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } outV.isRepeating = false; } else { for (int i = 0; i != n; i++) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } outV.isRepeating = false; } } else { /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ outV.noNulls = false; if (inputColVector.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inputColVector.isNull[0]; if (!inputColVector.isNull[0]) { outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); } } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } outV.isNull[i] = inputColVector.isNull[i]; } outV.isRepeating = false; } else { for (int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } outV.isNull[i] = inputColVector.isNull[i]; } outV.isRepeating = false; } } }