/** Load input file */
  private void loadInputFile() {

    URI_ID = new TObjectIntHashMap<String>();
    // load uri--id from input file
    TextFileUtils.loadInputURIs(inputFile, URI_ID, false);
    logger.debug("Input URIs loading. " + URI_ID.size() + " URIs loaded.");
  }
 public static TIntObjectHashMap<String> getAllWordIds() {
   TObjectIntHashMap<String> wordIds = DataAccess.getAllWordIds();
   TIntObjectHashMap<String> idWords = new TIntObjectHashMap<String>(wordIds.size());
   for (TObjectIntIterator<String> itr = wordIds.iterator(); itr.hasNext(); ) {
     itr.advance();
     idWords.put(itr.value(), itr.key());
   }
   return idWords;
 }
Example #3
0
  public TreeMap<Integer, Integer> addStringFeatureVector(
      String[] strFeatures, String label, boolean flagTest) {
    HashSet<String> setFeatures = new HashSet<String>();
    TreeMap<Integer, Integer> vector = new TreeMap<Integer, Integer>();

    for (String feature : strFeatures) {
      setFeatures.add(feature);
    }

    if (setFeatures.size() == 0) return null;

    if (!label.equals(""))
      if (labels.contains(label)) {
        vector.put(labelKey, labels.indexOf(label));
      } else {
        if (!flagTest) {
          labels.add(label);
          vector.put(labelKey, labels.indexOf(label));
        } else {
          // throw new IllegalArgumentException("Label of Testing Data is error!!!");
          return null;
        }
      }

    for (String feature : setFeatures) {
      if (wordlist.contains(feature)) {
        vector.put(wordlist.get(feature), 1);
      } else {
        if (!flagTest) {
          wordlist.put(feature, wordlist.size());
          vector.put(wordlist.get(feature), 1);
        }
      }
    }
    return vector;
  }