/* (non-Javadoc) * @see lexelt.ILexeltWriter#getInstances(lexelt.ILexelt) */ public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { svm_problem retVal = new svm_problem(); ArrayList<svm_node[]> featureVectors = new ArrayList<svm_node[]>(); ArrayList<Double> classes = new ArrayList<Double>(); int[][] indice = this.loadStatistic(p_Lexelt); if (indice == null) { throw new IllegalArgumentException("the input lexelt should not be null."); } IStatistic stat = p_Lexelt.getStatistic(); int size = p_Lexelt.size(); // instance count for (int i = 0; i < size; i++) { IInstance instance = p_Lexelt.getInstance(i); svm_node[] featureVector = this.getVector(instance, stat, indice); ArrayList<String> tags = instance.getTag(); if (tags.size() > 0) { for (String tag : tags) { double c = Double.parseDouble(tag); featureVectors.add(Arrays.copyOf(featureVector, featureVector.length)); classes.add(c); } } else { featureVectors.add(featureVector); classes.add(new Double(0)); } } retVal.l = featureVectors.size(); retVal.x = new svm_node[retVal.l][]; retVal.y = new double[retVal.l]; for (int i = 0; i < featureVectors.size(); i++) { retVal.x[i] = featureVectors.get(i); retVal.y[i] = classes.get(i); } return retVal; }
/** * get the vector of one instance * * @param p_Instance * @param p_Stat * @return */ private svm_node[] getVector(IInstance p_Instance, IStatistic p_Stat, int[][] p_Indice) { String value = null; int kIndex = 0; int featureSize = p_Instance.size(); Hashtable<Integer, Double> exist = new Hashtable<Integer, Double>(); for (int fIndex = 0; fIndex < featureSize; fIndex++) { IFeature feature = p_Instance.getFeature(fIndex); kIndex = p_Stat.getIndex(feature.getKey()); if (kIndex < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { exist.put(p_Indice[kIndex][0], Double.parseDouble(feature.getValue())); } else if (ABinaryFeature.class.isInstance(feature)) { if (feature.getValue().equals("1")) { exist.put(p_Indice[kIndex][0], 1.0); } } else { List<String> values = p_Stat.getValue(kIndex); value = feature.getValue(); if (value == null || !p_Stat.contains(kIndex, value)) { value = p_Stat.getDefaultValue(); } for (int i = 0; i < values.size(); i++) { if (values.get(i).equals(value)) { exist.put(p_Indice[kIndex][i], 1.0); break; } } } } ArrayList<Integer> indice = new ArrayList<Integer>(exist.keySet()); Collections.sort(indice); svm_node[] retVal = new svm_node[exist.size()]; for (int i = 0; i < indice.size(); i++) { retVal[i] = new svm_node(); retVal[i].index = indice.get(i); retVal[i].value = exist.get(indice.get(i)); } return retVal; }
/** * @param p_Instance * @param p_Stat * @return */ private String toString(IInstance p_Instance, IStatistic p_Stat, int[][] p_Indice) { StringBuilder featureBuilder = new StringBuilder(); svm_node[] features = this.getVector(p_Instance, p_Stat, p_Indice); for (int i = 0; i < features.length; i++) { featureBuilder.append(" "); featureBuilder.append(features[i].index); featureBuilder.append(":"); featureBuilder.append(features[i].value); } String featureOnly = featureBuilder.toString(); StringBuilder builder = new StringBuilder(); for (Integer tag : this.processTags(p_Stat, p_Instance.getTag())) { builder.append(tag.toString()); builder.append(featureOnly); builder.append("\n"); } return builder.toString(); }