/** Helper method to represent an Attribute as a single-element vector. */ private int[] toValueIndexArray(Attribute attribute) { TIntArrayList out = new TIntArrayList(); for (String value : values) { int valueIndex = attribute.indexOfValue(value); if (valueIndex < 0) throw new NoSuchElementException( "no such value: '" + value + "' in attribute '" + attribute.toString() + "'"); out.add(valueIndex); } return out.toNativeArray(); }
/** * Constructs an instance suitable for passing to the model for scoring * * @param incoming the incoming instance * @return an instance with values mapped to be consistent with what the model is expecting */ protected Instance mapIncomingFieldsToModelFields(Instance incoming) { Instances modelHeader = m_model.getHeader(); double[] vals = new double[modelHeader.numAttributes()]; for (int i = 0; i < modelHeader.numAttributes(); i++) { if (m_attributeMap[i] < 0) { // missing or type mismatch vals[i] = Utils.missingValue(); continue; } Attribute modelAtt = modelHeader.attribute(i); Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]); if (incoming.isMissing(incomingAtt.index())) { vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { String incomingVal = incoming.stringValue(m_attributeMap[i]); int modelIndex = modelAtt.indexOfValue(incomingVal); if (modelIndex < 0) { vals[i] = Utils.missingValue(); } else { vals[i] = modelIndex; } } else if (modelAtt.isString()) { vals[i] = 0; modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i])); } } if (modelHeader.classIndex() >= 0) { // set class to missing value vals[modelHeader.classIndex()] = Utils.missingValue(); } Instance newInst = null; if (incoming instanceof SparseInstance) { newInst = new SparseInstance(incoming.weight(), vals); } else { newInst = new DenseInstance(incoming.weight(), vals); } newInst.setDataset(modelHeader); return newInst; }
/** * Input an instance for filtering. The instance is processed and made available for output * immediately. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isOutputFormatDefined()) { Instance newInstance = (Instance) instance.copy(); // make sure that we get the right indexes set for the converted // string attributes when operating on a second batch of instances for (int i = 0; i < newInstance.numAttributes(); i++) { if (newInstance.attribute(i).isString() && !newInstance.isMissing(i) && m_AttIndices.isInRange(i)) { Attribute outAtt = getOutputFormat().attribute(newInstance.attribute(i).name()); String inVal = newInstance.stringValue(i); int outIndex = outAtt.indexOfValue(inVal); if (outIndex < 0) { newInstance.setMissing(i); } else { newInstance.setValue(i, outIndex); } } } push(newInstance); return true; } bufferInput(instance); return false; }
@Override public void init(Instances structure, Environment env) { super.init(structure, env); m_resolvedLhsName = m_lhsAttributeName; m_resolvedRhsOperand = m_rhsOperand; try { m_resolvedLhsName = m_env.substitute(m_resolvedLhsName); m_resolvedRhsOperand = m_env.substitute(m_resolvedRhsOperand); } catch (Exception ex) { } Attribute lhs = null; // try as an index or "special" label first if (m_resolvedLhsName.toLowerCase().startsWith("/first")) { lhs = structure.attribute(0); } else if (m_resolvedLhsName.toLowerCase().startsWith("/last")) { lhs = structure.attribute(structure.numAttributes() - 1); } else { // try as an index try { int indx = Integer.parseInt(m_resolvedLhsName); indx--; lhs = structure.attribute(indx); } catch (NumberFormatException ex) { } } if (lhs == null) { lhs = structure.attribute(m_resolvedLhsName); } if (lhs == null) { throw new IllegalArgumentException( "Data does not contain attribute " + "\"" + m_resolvedLhsName + "\""); } m_lhsAttIndex = lhs.index(); if (m_rhsIsAttribute) { Attribute rhs = null; // try as an index or "special" label first if (m_resolvedRhsOperand.toLowerCase().equals("/first")) { rhs = structure.attribute(0); } else if (m_resolvedRhsOperand.toLowerCase().equals("/last")) { rhs = structure.attribute(structure.numAttributes() - 1); } else { // try as an index try { int indx = Integer.parseInt(m_resolvedRhsOperand); indx--; rhs = structure.attribute(indx); } catch (NumberFormatException ex) { } } if (rhs == null) { rhs = structure.attribute(m_resolvedRhsOperand); } if (rhs == null) { throw new IllegalArgumentException( "Data does not contain attribute " + "\"" + m_resolvedRhsOperand + "\""); } m_rhsAttIndex = rhs.index(); } else if (m_operator != ExpressionType.CONTAINS && m_operator != ExpressionType.STARTSWITH && m_operator != ExpressionType.ENDSWITH && m_operator != ExpressionType.REGEX && m_operator != ExpressionType.ISMISSING) { // make sure the operand is parseable as a number (unless missing has // been specified - equals only) if (lhs.isNominal()) { m_numericOperand = lhs.indexOfValue(m_resolvedRhsOperand); if (m_numericOperand < 0) { throw new IllegalArgumentException( "Unknown nominal value '" + m_resolvedRhsOperand + "' for attribute '" + lhs.name() + "'"); } } else { try { m_numericOperand = Double.parseDouble(m_resolvedRhsOperand); } catch (NumberFormatException e) { throw new IllegalArgumentException( "\"" + m_resolvedRhsOperand + "\" is not parseable as a number!"); } } } if (m_operator == ExpressionType.REGEX) { m_regexPattern = Pattern.compile(m_resolvedRhsOperand); } }
public MappingInfo(Instances dataSet, MiningSchema miningSchema, Logger log) throws Exception { m_log = log; // miningSchema.convertStringAttsToNominal(); Instances fieldsI = miningSchema.getMiningSchemaAsInstances(); m_fieldsMap = new int[fieldsI.numAttributes()]; m_nominalValueMaps = new int[fieldsI.numAttributes()][]; for (int i = 0; i < fieldsI.numAttributes(); i++) { String schemaAttName = fieldsI.attribute(i).name(); boolean found = false; for (int j = 0; j < dataSet.numAttributes(); j++) { if (dataSet.attribute(j).name().equals(schemaAttName)) { Attribute miningSchemaAtt = fieldsI.attribute(i); Attribute incomingAtt = dataSet.attribute(j); // check type match if (miningSchemaAtt.type() != incomingAtt.type()) { throw new Exception( "[MappingInfo] type mismatch for field " + schemaAttName + ". Mining schema type " + miningSchemaAtt.toString() + ". Incoming type " + incomingAtt.toString() + "."); } // check nominal values (number, names...) if (miningSchemaAtt.numValues() != incomingAtt.numValues()) { String warningString = "[MappingInfo] WARNING: incoming nominal attribute " + incomingAtt.name() + " does not have the same " + "number of values as the corresponding mining " + "schema attribute."; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } } if (miningSchemaAtt.isNominal() || miningSchemaAtt.isString()) { int[] valuesMap = new int[incomingAtt.numValues()]; for (int k = 0; k < incomingAtt.numValues(); k++) { String incomingNomVal = incomingAtt.value(k); int indexInSchema = miningSchemaAtt.indexOfValue(incomingNomVal); if (indexInSchema < 0) { String warningString = "[MappingInfo] WARNING: incoming nominal attribute " + incomingAtt.name() + " has value " + incomingNomVal + " that doesn't occur in the mining schema."; if (m_log != null) { m_log.logMessage(warningString); } else { System.err.println(warningString); } valuesMap[k] = UNKNOWN_NOMINAL_VALUE; } else { valuesMap[k] = indexInSchema; } } m_nominalValueMaps[i] = valuesMap; } /*if (miningSchemaAtt.isNominal()) { for (int k = 0; k < miningSchemaAtt.numValues(); k++) { if (!miningSchemaAtt.value(k).equals(incomingAtt.value(k))) { throw new Exception("[PMMLUtils] value " + k + " (" + miningSchemaAtt.value(k) + ") does not match " + "incoming value (" + incomingAtt.value(k) + ") for attribute " + miningSchemaAtt.name() + "."); } } }*/ found = true; m_fieldsMap[i] = j; } } if (!found) { throw new Exception( "[MappingInfo] Unable to find a match for mining schema " + "attribute " + schemaAttName + " in the " + "incoming instances!"); } } // check class attribute (if set) if (fieldsI.classIndex() >= 0) { if (dataSet.classIndex() < 0) { // first see if we can find a matching class String className = fieldsI.classAttribute().name(); Attribute classMatch = dataSet.attribute(className); if (classMatch == null) { throw new Exception( "[MappingInfo] Can't find match for target field " + className + "in incoming instances!"); } dataSet.setClass(classMatch); } else if (!fieldsI.classAttribute().name().equals(dataSet.classAttribute().name())) { throw new Exception( "[MappingInfo] class attribute in mining schema does not match " + "class attribute in incoming instances!"); } } // Set up the textual description of the mapping fieldsMappingString(fieldsI, dataSet); }