/** Load input file */ private void loadInputFile() { URI_ID = new TObjectIntHashMap<String>(); // load uri--id from input file TextFileUtils.loadInputURIs(inputFile, URI_ID, false); logger.debug("Input URIs loading. " + URI_ID.size() + " URIs loaded."); }
public static TIntObjectHashMap<String> getAllWordIds() { TObjectIntHashMap<String> wordIds = DataAccess.getAllWordIds(); TIntObjectHashMap<String> idWords = new TIntObjectHashMap<String>(wordIds.size()); for (TObjectIntIterator<String> itr = wordIds.iterator(); itr.hasNext(); ) { itr.advance(); idWords.put(itr.value(), itr.key()); } return idWords; }
public TreeMap<Integer, Integer> addStringFeatureVector( String[] strFeatures, String label, boolean flagTest) { HashSet<String> setFeatures = new HashSet<String>(); TreeMap<Integer, Integer> vector = new TreeMap<Integer, Integer>(); for (String feature : strFeatures) { setFeatures.add(feature); } if (setFeatures.size() == 0) return null; if (!label.equals("")) if (labels.contains(label)) { vector.put(labelKey, labels.indexOf(label)); } else { if (!flagTest) { labels.add(label); vector.put(labelKey, labels.indexOf(label)); } else { // throw new IllegalArgumentException("Label of Testing Data is error!!!"); return null; } } for (String feature : setFeatures) { if (wordlist.contains(feature)) { vector.put(wordlist.get(feature), 1); } else { if (!flagTest) { wordlist.put(feature, wordlist.size()); vector.put(wordlist.get(feature), 1); } } } return vector; }