/* (non-Javadoc) * @see lexelt.ILexeltWriter#getInstances(lexelt.ILexelt) */ public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { svm_problem retVal = new svm_problem(); ArrayList<svm_node[]> featureVectors = new ArrayList<svm_node[]>(); ArrayList<Double> classes = new ArrayList<Double>(); int[][] indice = this.loadStatistic(p_Lexelt); if (indice == null) { throw new IllegalArgumentException("the input lexelt should not be null."); } IStatistic stat = p_Lexelt.getStatistic(); int size = p_Lexelt.size(); // instance count for (int i = 0; i < size; i++) { IInstance instance = p_Lexelt.getInstance(i); svm_node[] featureVector = this.getVector(instance, stat, indice); ArrayList<String> tags = instance.getTag(); if (tags.size() > 0) { for (String tag : tags) { double c = Double.parseDouble(tag); featureVectors.add(Arrays.copyOf(featureVector, featureVector.length)); classes.add(c); } } else { featureVectors.add(featureVector); classes.add(new Double(0)); } } retVal.l = featureVectors.size(); retVal.x = new svm_node[retVal.l][]; retVal.y = new double[retVal.l]; for (int i = 0; i < featureVectors.size(); i++) { retVal.x[i] = featureVectors.get(i); retVal.y[i] = classes.get(i); } return retVal; }
/* (non-Javadoc) * @see lexelt.ILexeltWriter#getString(lexelt.ILexelt) */ public String toString(ILexelt p_iLexelt) throws ClassNotFoundException { int[][] indice = this.loadStatistic(p_iLexelt); if (indice == null) { throw new IllegalArgumentException("the input lexelt should not be null."); } StringBuilder builder = new StringBuilder(); IStatistic stat = p_iLexelt.getStatistic(); int size = p_iLexelt.size(); // instance count for (int i = 0; i < size; i++) { IInstance instance = p_iLexelt.getInstance(i); builder.append(this.toString(instance, stat, indice)); } return builder.toString(); }
/* (non-Javadoc) * @see lexelt.ILexeltWriter#write(java.lang.String, lexelt.ILexelt) */ public void write(String p_Filename, ILexelt p_iLexelt) throws IOException, ClassNotFoundException { int[][] indice = this.loadStatistic(p_iLexelt); if (indice == null) { throw new IllegalArgumentException("the input lexelt should not be null."); } BufferedWriter writer = new BufferedWriter(new FileWriter(p_Filename)); IStatistic stat = p_iLexelt.getStatistic(); int size = p_iLexelt.size(); // instance count for (int i = 0; i < size; i++) { IInstance instance = p_iLexelt.getInstance(i); writer.write(this.toString(instance, stat, indice)); } writer.flush(); writer.close(); }
/** * load the statistic of p_iLexelt for each feature type in statistic if feature is binary keep it * else if feature is list and the number of values is less than 2 one new feature else set each * value as a new feature * * @param p_iLexelt lexelt * @return indices * @throws ClassNotFoundException cannot find the defined type */ protected int[][] loadStatistic(ILexelt p_iLexelt) throws ClassNotFoundException { int[][] retIndice = null; int accuIndex = 1; if (p_iLexelt != null) { IStatistic stat = p_iLexelt.getStatistic(); int keySize = stat.getKeys().size(); retIndice = new int[keySize][0]; int keyIndex = 0; for (keyIndex = 0; keyIndex < keySize; keyIndex++) { Class<?> type = Class.forName(stat.getType(keyIndex)); if (ANumericFeature.class.isAssignableFrom(type) || ABinaryFeature.class.isAssignableFrom(type)) { retIndice[keyIndex] = new int[] {accuIndex++}; } else { List<String> values = stat.getValue(keyIndex); retIndice[keyIndex] = new int[values.size()]; for (int i = 0; i < values.size(); i++) { retIndice[keyIndex][i] = accuIndex++; } } } } return retIndice; }