public String getCreateVarInstructions(String outputFileName, String outputLabel) throws LopsException { if (getDataType() == DataType.MATRIX || getDataType() == DataType.FRAME) { if (isTransient()) throw new LopsException("getInstructions() should not be called for transient nodes."); OutputParameters oparams = getOutputParameters(); String fmt = ""; // TODO: following logic should change once we LOPs encode key-value-class information. if (oparams.getFormat() == Format.TEXT) fmt = "textcell"; else if (oparams.getFormat() == Format.MM) fmt = "matrixmarket"; else if (oparams.getFormat() == Format.CSV) fmt = "csv"; else { if (oparams.getRowsInBlock() > 0 || oparams.getColsInBlock() > 0) fmt = "binaryblock"; else fmt = "binarycell"; } StringBuilder sb = new StringBuilder(); sb.append("CP"); sb.append(OPERAND_DELIMITOR); sb.append("createvar"); sb.append(OPERAND_DELIMITOR); sb.append(outputLabel); sb.append(OPERAND_DELIMITOR); sb.append(outputFileName); sb.append(OPERAND_DELIMITOR); sb.append(false); sb.append(OPERAND_DELIMITOR); // only persistent reads come here! sb.append(fmt); sb.append(OPERAND_DELIMITOR); sb.append(oparams.getNumRows()); sb.append(OPERAND_DELIMITOR); sb.append(oparams.getNumCols()); sb.append(OPERAND_DELIMITOR); sb.append(oparams.getRowsInBlock()); sb.append(OPERAND_DELIMITOR); sb.append(oparams.getColsInBlock()); sb.append(OPERAND_DELIMITOR); sb.append(oparams.getNnz()); /* Format-specific properties */ if (oparams.getFormat() == Format.CSV) { sb.append(OPERAND_DELIMITOR); sb.append(createVarCSVHelper()); } return sb.toString(); } else { throw new LopsException( this.printErrorLocation() + "In Data Lop, Unexpected data type " + getDataType()); } }
/** * Method to get CP instructions for reading/writing scalars and matrices from/to HDFS. This * method generates CP read/write instructions. */ @Override public String getInstructions(String input1, String input2) throws LopsException { if (getOutputParameters().getFile_name() == null && operation == OperationTypes.READ) throw new LopsException( this.printErrorLocation() + "Data.getInstructions(): Exepecting a SCALAR data type, encountered " + getDataType()); StringBuilder sb = new StringBuilder(); if (this.getExecType() == ExecType.SPARK) sb.append("SPARK"); else sb.append("CP"); sb.append(OPERAND_DELIMITOR); if (operation == OperationTypes.READ) { sb.append("read"); sb.append(OPERAND_DELIMITOR); sb.append(this.prepInputOperand(input1)); } else if (operation == OperationTypes.WRITE) { sb.append("write"); sb.append(OPERAND_DELIMITOR); sb.append(getInputs().get(0).prepInputOperand(input1)); } else throw new LopsException( this.printErrorLocation() + "In Data Lop, Unknown operation: " + operation); sb.append(OPERAND_DELIMITOR); Lop fnameLop = _inputParams.get(DataExpression.IO_FILENAME); boolean literal = (fnameLop instanceof Data && ((Data) fnameLop).isLiteral()); sb.append(prepOperand(input2, DataType.SCALAR, ValueType.STRING, literal)); // attach outputInfo in case of matrices OutputParameters oparams = getOutputParameters(); if (operation == OperationTypes.WRITE) { sb.append(OPERAND_DELIMITOR); String fmt = ""; if (getDataType() == DataType.MATRIX) { if (oparams.getFormat() == Format.MM) fmt = "matrixmarket"; else if (oparams.getFormat() == Format.TEXT) fmt = "textcell"; else if (oparams.getFormat() == Format.CSV) fmt = "csv"; else if (oparams.getFormat() == Format.BINARY) { if (oparams.getRowsInBlock() > 0 || oparams.getColsInBlock() > 0) fmt = "binaryblock"; else fmt = "binarycell"; } else { throw new LopsException("Unexpected format: " + oparams.getFormat()); } } else { // scalars will always be written in text format fmt = "textcell"; } sb.append(prepOperand(fmt, DataType.SCALAR, ValueType.STRING, true)); if (oparams.getFormat() == Format.CSV) { Data headerLop = (Data) getNamedInputLop(DataExpression.DELIM_HAS_HEADER_ROW); Data delimLop = (Data) getNamedInputLop(DataExpression.DELIM_DELIMITER); Data sparseLop = (Data) getNamedInputLop(DataExpression.DELIM_SPARSE); if (headerLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_HAS_HEADER_ROW + " must be a literal for a seq operation."); if (delimLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_DELIMITER + " must be a literal for a seq operation."); if (sparseLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_SPARSE + " must be a literal for a seq operation."); sb.append(OPERAND_DELIMITOR); sb.append(headerLop.getBooleanValue()); sb.append(OPERAND_DELIMITOR); sb.append(delimLop.getStringValue()); sb.append(OPERAND_DELIMITOR); sb.append(sparseLop.getBooleanValue()); if (this.getExecType() == ExecType.SPARK) { boolean isInputMatrixBlock = true; Lop input = getInputs().get(0); if (input instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) input).getOp() == ParameterizedBuiltin.OperationTypes.TRANSFORM) { // in the case of transform input, the input will be Text strings insteadof MatrixBlocks // This information is used to have correct class information while accessing RDDs from // the symbol table isInputMatrixBlock = false; } sb.append(OPERAND_DELIMITOR); sb.append(isInputMatrixBlock); } } } return sb.toString(); }
/** * Method to generate appropriate MR write instructions. Explicit write instructions are generated * only in case of external file formats (e.g., CSV) except for MatrixMarket. MM format is * overridden by TextCell, instead. */ @Override public String getInstructions(int input_index, int output_index) throws LopsException { OutputParameters oparams = getOutputParameters(); if (operation != OperationTypes.WRITE) throw new LopsException( "This method should only be executed for generating MR Write instructions."); if (oparams.getFormat() != Format.CSV) throw new LopsException( "MR Write instructions can not be generated for the output format: " + oparams.getFormat()); StringBuilder sb = new StringBuilder(); sb.append("MR"); sb.append(OPERAND_DELIMITOR); // Generate opcode based on the output format if (oparams.getFormat() == Format.CSV) sb.append("csvwrite"); else throw new LopsException( "MR Write instructions can not be generated for the output format: " + oparams.getFormat()); sb.append(OPERAND_DELIMITOR); sb.append(getInputs().get(0).prepInputOperand(input_index)); sb.append(OPERAND_DELIMITOR); sb.append(this.prepOutputOperand(output_index)); // Attach format-specific properties if (oparams.getFormat() == Format.CSV) { Data headerLop = (Data) getNamedInputLop(DataExpression.DELIM_HAS_HEADER_ROW); Data delimLop = (Data) getNamedInputLop(DataExpression.DELIM_DELIMITER); Data sparseLop = (Data) getNamedInputLop(DataExpression.DELIM_SPARSE); if (headerLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_HAS_HEADER_ROW + " must be a literal."); if (delimLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_DELIMITER + " must be a literal."); if (sparseLop.isVariable()) throw new LopsException( this.printErrorLocation() + "Parameter " + DataExpression.DELIM_SPARSE + " must be a literal."); sb.append(OPERAND_DELIMITOR); sb.append(headerLop.getBooleanValue()); sb.append(OPERAND_DELIMITOR); sb.append(delimLop.getStringValue()); sb.append(OPERAND_DELIMITOR); sb.append(sparseLop.getBooleanValue()); } else { throw new LopsException( "MR Write instructions can not be generated for the output format: " + oparams.getFormat()); } return sb.toString(); }