Пример #1
0
 /* (non-Javadoc)
  * @see lexelt.ILexeltWriter#getInstances(lexelt.ILexelt)
  */
 public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException {
   svm_problem retVal = new svm_problem();
   ArrayList<svm_node[]> featureVectors = new ArrayList<svm_node[]>();
   ArrayList<Double> classes = new ArrayList<Double>();
   int[][] indice = this.loadStatistic(p_Lexelt);
   if (indice == null) {
     throw new IllegalArgumentException("the input lexelt should not be null.");
   }
   IStatistic stat = p_Lexelt.getStatistic();
   int size = p_Lexelt.size(); // instance count
   for (int i = 0; i < size; i++) {
     IInstance instance = p_Lexelt.getInstance(i);
     svm_node[] featureVector = this.getVector(instance, stat, indice);
     ArrayList<String> tags = instance.getTag();
     if (tags.size() > 0) {
       for (String tag : tags) {
         double c = Double.parseDouble(tag);
         featureVectors.add(Arrays.copyOf(featureVector, featureVector.length));
         classes.add(c);
       }
     } else {
       featureVectors.add(featureVector);
       classes.add(new Double(0));
     }
   }
   retVal.l = featureVectors.size();
   retVal.x = new svm_node[retVal.l][];
   retVal.y = new double[retVal.l];
   for (int i = 0; i < featureVectors.size(); i++) {
     retVal.x[i] = featureVectors.get(i);
     retVal.y[i] = classes.get(i);
   }
   return retVal;
 }
Пример #2
0
  /**
   * get the vector of one instance
   *
   * @param p_Instance
   * @param p_Stat
   * @return
   */
  private svm_node[] getVector(IInstance p_Instance, IStatistic p_Stat, int[][] p_Indice) {
    String value = null;
    int kIndex = 0;
    int featureSize = p_Instance.size();
    Hashtable<Integer, Double> exist = new Hashtable<Integer, Double>();

    for (int fIndex = 0; fIndex < featureSize; fIndex++) {
      IFeature feature = p_Instance.getFeature(fIndex);
      kIndex = p_Stat.getIndex(feature.getKey());
      if (kIndex < 0) {
        continue;
      }
      if (ANumericFeature.class.isInstance(feature)) {
        exist.put(p_Indice[kIndex][0], Double.parseDouble(feature.getValue()));
      } else if (ABinaryFeature.class.isInstance(feature)) {
        if (feature.getValue().equals("1")) {
          exist.put(p_Indice[kIndex][0], 1.0);
        }
      } else {
        List<String> values = p_Stat.getValue(kIndex);
        value = feature.getValue();
        if (value == null || !p_Stat.contains(kIndex, value)) {
          value = p_Stat.getDefaultValue();
        }
        for (int i = 0; i < values.size(); i++) {
          if (values.get(i).equals(value)) {
            exist.put(p_Indice[kIndex][i], 1.0);
            break;
          }
        }
      }
    }
    ArrayList<Integer> indice = new ArrayList<Integer>(exist.keySet());
    Collections.sort(indice);
    svm_node[] retVal = new svm_node[exist.size()];
    for (int i = 0; i < indice.size(); i++) {
      retVal[i] = new svm_node();
      retVal[i].index = indice.get(i);
      retVal[i].value = exist.get(indice.get(i));
    }
    return retVal;
  }
Пример #3
0
 /**
  * @param p_Instance
  * @param p_Stat
  * @return
  */
 private String toString(IInstance p_Instance, IStatistic p_Stat, int[][] p_Indice) {
   StringBuilder featureBuilder = new StringBuilder();
   svm_node[] features = this.getVector(p_Instance, p_Stat, p_Indice);
   for (int i = 0; i < features.length; i++) {
     featureBuilder.append(" ");
     featureBuilder.append(features[i].index);
     featureBuilder.append(":");
     featureBuilder.append(features[i].value);
   }
   String featureOnly = featureBuilder.toString();
   StringBuilder builder = new StringBuilder();
   for (Integer tag : this.processTags(p_Stat, p_Instance.getTag())) {
     builder.append(tag.toString());
     builder.append(featureOnly);
     builder.append("\n");
   }
   return builder.toString();
 }