Beispiel #1
0
  public void addInstance(SingleDecision decision, FeatureVector featureVector)
      throws MaltChainedException {
    if (featureVector == null) {
      throw new LibException("The feature vector cannot be found");
    } else if (decision == null) {
      throw new LibException("The decision cannot be found");
    }

    try {
      sb.append(decision.getDecisionCode() + "\t");
      final int n = featureVector.size();
      for (int i = 0; i < n; i++) {
        FeatureValue featureValue = featureVector.getFeatureValue(i);
        if (featureValue == null || (excludeNullValues == true && featureValue.isNullValue())) {
          sb.append("-1");
        } else {
          if (featureValue instanceof SingleFeatureValue) {
            SingleFeatureValue singleFeatureValue = (SingleFeatureValue) featureValue;
            if (singleFeatureValue.getValue() == 1) {
              sb.append(singleFeatureValue.getIndexCode());
            } else if (singleFeatureValue.getValue() == 0) {
              sb.append("-1");
            } else {
              sb.append(singleFeatureValue.getIndexCode());
              sb.append(":");
              sb.append(singleFeatureValue.getValue());
            }
          } else if (featureValue instanceof MultipleFeatureValue) {
            Set<Integer> values = ((MultipleFeatureValue) featureValue).getCodes();
            int j = 0;
            for (Integer value : values) {
              sb.append(value.toString());
              if (j != values.size() - 1) {
                sb.append("|");
              }
              j++;
            }
          } else {
            throw new LibException(
                "Don't recognize the type of feature value: " + featureValue.getClass());
          }
        }
        sb.append('\t');
      }
      sb.append('\n');
      instanceOutput.write(sb.toString());
      instanceOutput.flush();
      increaseNumberOfInstances();
      sb.setLength(0);
    } catch (IOException e) {
      throw new LibException("The learner cannot write to the instance file. ", e);
    }
  }
Beispiel #2
0
  public boolean predict(FeatureVector featureVector, SingleDecision decision)
      throws MaltChainedException {
    if (featureVector == null) {
      throw new LibException(
          "The learner cannot predict the next class, because the feature vector cannot be found. ");
    }

    final FeatureList featureList = new FeatureList();
    final int size = featureVector.size();
    for (int i = 1; i <= size; i++) {
      final FeatureValue featureValue = featureVector.getFeatureValue(i - 1);
      if (featureValue != null && !(excludeNullValues == true && featureValue.isNullValue())) {
        if (featureValue instanceof SingleFeatureValue) {
          SingleFeatureValue singleFeatureValue = (SingleFeatureValue) featureValue;
          int index = featureMap.getIndex(i, singleFeatureValue.getIndexCode());
          if (index != -1 && singleFeatureValue.getValue() != 0) {
            featureList.add(index, singleFeatureValue.getValue());
          }
        } else if (featureValue instanceof MultipleFeatureValue) {
          for (Integer value : ((MultipleFeatureValue) featureValue).getCodes()) {
            int v = featureMap.getIndex(i, value);
            if (v != -1) {
              featureList.add(v, 1);
            }
          }
        }
      }
    }
    try {
      decision.getKBestList().addList(model.predict(featureList.toArray()));
      //			decision.getKBestList().addList(prediction(featureList));
    } catch (OutOfMemoryError e) {
      throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
    }
    return true;
  }
  /**
   * Initialize the feature split parameters and the split feature vector and master feature vector
   * according to the behavior strategy.
   *
   * @param featureVector the parent guide model's feature vector.
   * @throws MaltChainedException
   */
  protected void initSplitParam(FeatureVector featureVector) throws MaltChainedException {
    if (getGuide().getConfiguration().getOptionValue("guide", "data_split_column") == null
        || getGuide()
                .getConfiguration()
                .getOptionValue("guide", "data_split_column")
                .toString()
                .length()
            == 0) {
      throw new GuideException(
          "The option '--guide-data_split_column' cannot be found, when initializing the data split. ");
    }
    if (getGuide().getConfiguration().getOptionValue("guide", "data_split_structure") == null
        || getGuide()
                .getConfiguration()
                .getOptionValue("guide", "data_split_structure")
                .toString()
                .length()
            == 0) {
      throw new GuideException(
          "The option '--guide-data_split_structure' cannot be found, when initializing the data split. ");
    }
    try {
      final String spec =
          "InputColumn("
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_column")
                  .toString()
                  .trim()
              + ", "
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_structure")
                  .toString()
                  .trim()
              + ")";
      divideFeature = featureVector.getFeatureModel().identifyFeature(spec);
    } catch (FeatureException e) {
      throw new GuideException(
          "The data split feature 'InputColumn("
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_column")
                  .toString()
              + ", "
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_structure")
                  .toString()
              + ") cannot be initialized. ",
          e);
    }
    if (!(divideFeature instanceof Modifiable)) {
      throw new GuideException(
          "The data split feature 'InputColumn("
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_column")
                  .toString()
              + ", "
              + getGuide()
                  .getConfiguration()
                  .getOptionValue("guide", "data_split_structure")
                  .toString()
              + ") does not implement Modifiable interface. ");
    }
    divideFeatureIndexVector = new ArrayList<Integer>();
    for (int i = 0; i < featureVector.size(); i++) {
      if (featureVector.get(i).equals(divideFeature)) {
        divideFeatureIndexVector.add(i);
      }
    }

    //		if ((Boolean)getGuide().getConfiguration().getOptionValue("malt0.4", "behavior") == true) {
    //			/* MaltParser 0.4 removes the divide feature for all divide models. For the "Sum-up" model
    // or
    //			 * master model adds the divide feature in the end of the feature vector.
    //			 */
    //			masterFeatureVector = (FeatureVector)featureVector.clone();
    //			for (Integer i : divideFeatureIndexVector) {
    //				masterFeatureVector.remove(masterFeatureVector.get(i));
    //			}
    //			for (Integer i : divideFeatureIndexVector) {
    //				masterFeatureVector.add(featureVector.get(i));
    //			}
    //
    //			divideFeatureVector = (FeatureVector)featureVector.clone();
    //			for (Integer i : divideFeatureIndexVector) {
    //				divideFeatureVector.remove(divideFeatureVector.get(i));
    //			}
    //		} else {
    masterFeatureVector = featureVector;
    divideFeatureVector = (FeatureVector) featureVector.clone();
    for (Integer i : divideFeatureIndexVector) {
      divideFeatureVector.remove(divideFeatureVector.get(i));
    }
    //		}
    try {
      if (getGuide().getConfiguration().getOptionValue("guide", "data_split_threshold").toString()
          != null) {
        divideThreshold =
            Integer.parseInt(
                getGuide()
                    .getConfiguration()
                    .getOptionValue("guide", "data_split_threshold")
                    .toString());
      } else {
        divideThreshold = 0;
      }
    } catch (NumberFormatException e) {
      throw new GuideException(
          "The --guide-data_split_threshold option is not an integer value. ", e);
    }
  }