/** * get the vector of one instance * * @param p_Instance * @param p_Stat * @return */ private svm_node[] getVector(IInstance p_Instance, IStatistic p_Stat, int[][] p_Indice) { String value = null; int kIndex = 0; int featureSize = p_Instance.size(); Hashtable<Integer, Double> exist = new Hashtable<Integer, Double>(); for (int fIndex = 0; fIndex < featureSize; fIndex++) { IFeature feature = p_Instance.getFeature(fIndex); kIndex = p_Stat.getIndex(feature.getKey()); if (kIndex < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { exist.put(p_Indice[kIndex][0], Double.parseDouble(feature.getValue())); } else if (ABinaryFeature.class.isInstance(feature)) { if (feature.getValue().equals("1")) { exist.put(p_Indice[kIndex][0], 1.0); } } else { List<String> values = p_Stat.getValue(kIndex); value = feature.getValue(); if (value == null || !p_Stat.contains(kIndex, value)) { value = p_Stat.getDefaultValue(); } for (int i = 0; i < values.size(); i++) { if (values.get(i).equals(value)) { exist.put(p_Indice[kIndex][i], 1.0); break; } } } } ArrayList<Integer> indice = new ArrayList<Integer>(exist.keySet()); Collections.sort(indice); svm_node[] retVal = new svm_node[exist.size()]; for (int i = 0; i < indice.size(); i++) { retVal[i] = new svm_node(); retVal[i].index = indice.get(i); retVal[i].value = exist.get(indice.get(i)); } return retVal; }
/** * load the statistic of p_iLexelt for each feature type in statistic if feature is binary keep it * else if feature is list and the number of values is less than 2 one new feature else set each * value as a new feature * * @param p_iLexelt lexelt * @return indices * @throws ClassNotFoundException cannot find the defined type */ protected int[][] loadStatistic(ILexelt p_iLexelt) throws ClassNotFoundException { int[][] retIndice = null; int accuIndex = 1; if (p_iLexelt != null) { IStatistic stat = p_iLexelt.getStatistic(); int keySize = stat.getKeys().size(); retIndice = new int[keySize][0]; int keyIndex = 0; for (keyIndex = 0; keyIndex < keySize; keyIndex++) { Class<?> type = Class.forName(stat.getType(keyIndex)); if (ANumericFeature.class.isAssignableFrom(type) || ABinaryFeature.class.isAssignableFrom(type)) { retIndice[keyIndex] = new int[] {accuIndex++}; } else { List<String> values = stat.getValue(keyIndex); retIndice[keyIndex] = new int[values.size()]; for (int i = 0; i < values.size(); i++) { retIndice[keyIndex][i] = accuIndex++; } } } } return retIndice; }
/** * change tags to integer (start from 1) * * @param p_Stat statistic * @param p_Tags real tags * @return new tags */ protected HashSet<Integer> processTags(IStatistic p_Stat, ArrayList<String> p_Tags) { HashSet<Integer> retVal = new HashSet<Integer>(); if (p_Tags == null || p_Tags.size() == 0) { retVal.add(0); } else { for (String tag : p_Tags) { Integer iTag = 0; if (!tag.equals("'?'") && !tag.equals("?")) { iTag = p_Stat.getTagsInOrder().indexOf(tag); if (iTag < 0) { iTag = -1; } iTag++; } retVal.add(iTag); } } return retVal; }