public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { if (featureVector == null) { throw new LibException("The feature vector cannot be found"); } else if (decision == null) { throw new LibException("The decision cannot be found"); } try { sb.append(decision.getDecisionCode() + "\t"); final int n = featureVector.size(); for (int i = 0; i < n; i++) { FeatureValue featureValue = featureVector.getFeatureValue(i); if (featureValue == null || (excludeNullValues == true && featureValue.isNullValue())) { sb.append("-1"); } else { if (featureValue instanceof SingleFeatureValue) { SingleFeatureValue singleFeatureValue = (SingleFeatureValue) featureValue; if (singleFeatureValue.getValue() == 1) { sb.append(singleFeatureValue.getIndexCode()); } else if (singleFeatureValue.getValue() == 0) { sb.append("-1"); } else { sb.append(singleFeatureValue.getIndexCode()); sb.append(":"); sb.append(singleFeatureValue.getValue()); } } else if (featureValue instanceof MultipleFeatureValue) { Set<Integer> values = ((MultipleFeatureValue) featureValue).getCodes(); int j = 0; for (Integer value : values) { sb.append(value.toString()); if (j != values.size() - 1) { sb.append("|"); } j++; } } else { throw new LibException( "Don't recognize the type of feature value: " + featureValue.getClass()); } } sb.append('\t'); } sb.append('\n'); instanceOutput.write(sb.toString()); instanceOutput.flush(); increaseNumberOfInstances(); sb.setLength(0); } catch (IOException e) { throw new LibException("The learner cannot write to the instance file. ", e); } }
public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { if (featureVector == null) { throw new LibException( "The learner cannot predict the next class, because the feature vector cannot be found. "); } final FeatureList featureList = new FeatureList(); final int size = featureVector.size(); for (int i = 1; i <= size; i++) { final FeatureValue featureValue = featureVector.getFeatureValue(i - 1); if (featureValue != null && !(excludeNullValues == true && featureValue.isNullValue())) { if (featureValue instanceof SingleFeatureValue) { SingleFeatureValue singleFeatureValue = (SingleFeatureValue) featureValue; int index = featureMap.getIndex(i, singleFeatureValue.getIndexCode()); if (index != -1 && singleFeatureValue.getValue() != 0) { featureList.add(index, singleFeatureValue.getValue()); } } else if (featureValue instanceof MultipleFeatureValue) { for (Integer value : ((MultipleFeatureValue) featureValue).getCodes()) { int v = featureMap.getIndex(i, value); if (v != -1) { featureList.add(v, 1); } } } } } try { decision.getKBestList().addList(model.predict(featureList.toArray())); // decision.getKBestList().addList(prediction(featureList)); } catch (OutOfMemoryError e) { throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); } return true; }
/** * Initialize the feature split parameters and the split feature vector and master feature vector * according to the behavior strategy. * * @param featureVector the parent guide model's feature vector. * @throws MaltChainedException */ protected void initSplitParam(FeatureVector featureVector) throws MaltChainedException { if (getGuide().getConfiguration().getOptionValue("guide", "data_split_column") == null || getGuide() .getConfiguration() .getOptionValue("guide", "data_split_column") .toString() .length() == 0) { throw new GuideException( "The option '--guide-data_split_column' cannot be found, when initializing the data split. "); } if (getGuide().getConfiguration().getOptionValue("guide", "data_split_structure") == null || getGuide() .getConfiguration() .getOptionValue("guide", "data_split_structure") .toString() .length() == 0) { throw new GuideException( "The option '--guide-data_split_structure' cannot be found, when initializing the data split. "); } try { final String spec = "InputColumn(" + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_column") .toString() .trim() + ", " + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_structure") .toString() .trim() + ")"; divideFeature = featureVector.getFeatureModel().identifyFeature(spec); } catch (FeatureException e) { throw new GuideException( "The data split feature 'InputColumn(" + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_column") .toString() + ", " + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_structure") .toString() + ") cannot be initialized. ", e); } if (!(divideFeature instanceof Modifiable)) { throw new GuideException( "The data split feature 'InputColumn(" + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_column") .toString() + ", " + getGuide() .getConfiguration() .getOptionValue("guide", "data_split_structure") .toString() + ") does not implement Modifiable interface. "); } divideFeatureIndexVector = new ArrayList<Integer>(); for (int i = 0; i < featureVector.size(); i++) { if (featureVector.get(i).equals(divideFeature)) { divideFeatureIndexVector.add(i); } } // if ((Boolean)getGuide().getConfiguration().getOptionValue("malt0.4", "behavior") == true) { // /* MaltParser 0.4 removes the divide feature for all divide models. For the "Sum-up" model // or // * master model adds the divide feature in the end of the feature vector. // */ // masterFeatureVector = (FeatureVector)featureVector.clone(); // for (Integer i : divideFeatureIndexVector) { // masterFeatureVector.remove(masterFeatureVector.get(i)); // } // for (Integer i : divideFeatureIndexVector) { // masterFeatureVector.add(featureVector.get(i)); // } // // divideFeatureVector = (FeatureVector)featureVector.clone(); // for (Integer i : divideFeatureIndexVector) { // divideFeatureVector.remove(divideFeatureVector.get(i)); // } // } else { masterFeatureVector = featureVector; divideFeatureVector = (FeatureVector) featureVector.clone(); for (Integer i : divideFeatureIndexVector) { divideFeatureVector.remove(divideFeatureVector.get(i)); } // } try { if (getGuide().getConfiguration().getOptionValue("guide", "data_split_threshold").toString() != null) { divideThreshold = Integer.parseInt( getGuide() .getConfiguration() .getOptionValue("guide", "data_split_threshold") .toString()); } else { divideThreshold = 0; } } catch (NumberFormatException e) { throw new GuideException( "The --guide-data_split_threshold option is not an integer value. ", e); } }