Ejemplo n.º 1
0
 public Keywords findKeywords(User user, Request request) throws ParseException, IOException {
   Path dumpFile = dumpSearchResults(user, request);
   Scanner scanner = new Scanner(dumpFile);
   TObjectIntHashMap<String> words = new TObjectIntHashMap<>();
   while (scanner.hasNextLine()) {
     String line = scanner.nextLine();
     for (String word : StringUtils.tokenize(line, /* Remove stop words */ true)) {
       if (request.query.contains(word)) continue;
       Integer cnt = words.get(word);
       if (cnt == null) {
         cnt = 1;
       } else {
         cnt++;
       }
       words.put(word, cnt);
     }
   }
   PriorityQueue<WordAndCount> pq = createPriorityQueue();
   words.forEachEntry((a, b) -> pq.add(new WordAndCount(a, b)));
   scanner.close();
   Keywords kw = new Keywords();
   WordAndCount[] wc = new WordAndCount[Math.min(request.pageSize, pq.size())];
   for (int i = 0; i < wc.length; i++) wc[i] = pq.poll();
   kw.keywords = wc;
   return kw;
 }
Ejemplo n.º 2
0
 public int get(Text text) throws IOException {
   int res = numbers.get(text);
   if (res == 0) {
     res = current++;
     numbers.put(new Text(text), res);
     value.set(res);
     writer.append(text, value);
   }
   return res;
 }
Ejemplo n.º 3
0
 public ValidityChecker(int ari, IntVar[] vars) {
   arity = ari;
   sortedvs = new IntVar[arity];
   mapinit = new TObjectIntHashMap<IntVar>(arity);
   position = new int[arity];
   for (int i = 0; i < vars.length; i++) {
     sortedvs[i] = vars[i];
     mapinit.put(vars[i], i);
     position[i] = i;
   }
 }
Ejemplo n.º 4
0
 /**
  * Deserialize alphabet
  *
  * @param in
  * @throws IOException
  * @throws ClassNotFoundException
  */
 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
   int version = in.readInt();
   int size = in.readInt();
   list = new ArrayList(size);
   map = new TObjectIntHashMap(size);
   for (int i = 0; i < size; i++) {
     Object o = in.readObject();
     map.put(o, i);
     list.add(o);
   }
   if (version > 0) instanceId = (VMID) in.readObject();
 }
Ejemplo n.º 5
0
  /**
   * Lookup the hash table to find item
   *
   * @param entry
   * @param isUpdate boolean variable to insert new item if not contained.
   * @return id
   */
  public int lookup(Object entry, boolean isUpdate) {
    if (entry == null)
      throw new IllegalArgumentException("Can't find \"null\" entry in Dictionary");

    int ret = -1;
    if (map.containsKey(entry)) ret = map.get(entry);
    else if (isUpdate) {
      ret = list.size();
      map.put(entry, ret);
      list.add(entry);
    }

    return ret;
  }
Ejemplo n.º 6
0
  public Dictionary(SequenceFile.Reader reader, boolean reverse) throws IOException {
    this.writer = null;
    this.numbers = new TObjectIntHashMap<Text>();
    if (!reverse) {
      this.reverse = null;
    } else {
      this.reverse = new TIntObjectHashMap<Text>();
    }
    Text text;

    while (reader.next(text = new Text(), value)) {
      numbers.put(text, value.get());
      if (reverse) {
        this.reverse.put(value.get(), text);
      }
    }
  }
Ejemplo n.º 7
0
  public TreeMap<Integer, Integer> addStringFeatureVector(
      String[] strFeatures, String label, boolean flagTest) {
    HashSet<String> setFeatures = new HashSet<String>();
    TreeMap<Integer, Integer> vector = new TreeMap<Integer, Integer>();

    for (String feature : strFeatures) {
      setFeatures.add(feature);
    }

    if (setFeatures.size() == 0) return null;

    if (!label.equals(""))
      if (labels.contains(label)) {
        vector.put(labelKey, labels.indexOf(label));
      } else {
        if (!flagTest) {
          labels.add(label);
          vector.put(labelKey, labels.indexOf(label));
        } else {
          // throw new IllegalArgumentException("Label of Testing Data is error!!!");
          return null;
        }
      }

    for (String feature : setFeatures) {
      if (wordlist.contains(feature)) {
        vector.put(wordlist.get(feature), 1);
      } else {
        if (!flagTest) {
          wordlist.put(feature, wordlist.size());
          vector.put(wordlist.get(feature), 1);
        }
      }
    }
    return vector;
  }
  public static void main(String[] args) {
    // String file = args[0];

    IntInt2IntHashMap lEr = new IntInt2IntHashMap();

    HashMap<String, TObjectIntHashMap<String>> stat =
        new HashMap<String, TObjectIntHashMap<String>>();

    try {
      HashSet<String> headPat = PatternFileParser.parse(new File("ipa_head_pat.txt"));
      HashSet<String> funcPat = PatternFileParser.parse(new File("ipa_func_pat.txt"));

      CaboCha2Dep pipe = new CaboCha2Dep(System.in);
      JapaneseDependencyTree2CaboCha caboChaOutPipe = new JapaneseDependencyTree2CaboCha();
      caboChaOutPipe.setFormat(CaboChaFormat.OLD);

      BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8"));
      while (!pipe.eof()) {
        DependencyTree tree = pipe.pipePerSentence();
        if (tree == null) {
          continue;
        }
        JapaneseDependencyTreeLib.setBunsetsuHead(tree, funcPat, headPat);
        PredicateArgumentStructure[] pasList = tree.getPASList();

        for (int j = 0; j < pasList.length; j++) {
          int predId = pasList[j].getPredicateId();
          String predType = pasList[j].predicateType;
          int[] aIds = pasList[j].getIds();
          String[] aLabels = pasList[j].getLabels();
          for (int k = 0; k < aIds.length; k++) {
            DepType dt = getDepType(tree, predId, aIds[k]);
            ArgPositionType apt = getArgPositionType(tree, aIds[k]);
            if (!stat.containsKey(aLabels[k])) {
              stat.put(aLabels[k], new TObjectIntHashMap<String>());
            }
            TObjectIntHashMap<String> inner = stat.get(aLabels[k]);
            if (!inner.containsKey(dt.toString() + ":" + apt.toString())) {
              inner.put(dt.toString() + ":" + apt.toString(), 0);
            }
            inner.increment(dt.toString() + ":" + apt.toString());
            aLabels[k] += ":" + dt + ":" + apt;
          }
        }
        StringBuilder resultStr = new StringBuilder();
        resultStr.append(caboChaOutPipe.pipePerSentence(tree));
        writer.write(resultStr.toString());
      }
      // print statistics
      for (Iterator it = stat.keySet().iterator(); it.hasNext(); ) {
        String key = (String) it.next();
        TObjectIntHashMap inner = stat.get(key);
        for (TObjectIntIterator iit = inner.iterator(); iit.hasNext(); ) {
          iit.advance();
          System.err.print(key + "\t" + iit.key() + "\t" + iit.value() + "\n");
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Ejemplo n.º 9
0
  /**
   * Returns the encoded output stream of the underlying {@link Stream}'s encoder.
   *
   * @return the encoded output stream.
   */
  public Stream<int[]> getOutputStream() {
    if (isTerminal()) {
      throw new IllegalStateException("Stream is already \"terminal\" (operated upon or empty)");
    }

    final MultiEncoder encoder = (MultiEncoder) getEncoder();
    if (encoder == null) {
      throw new IllegalStateException(
          "setLocalParameters(Parameters) must be called before calling this method.");
    }

    // Protect outputStream formation and creation of "fan out" also make sure
    // that no other thread is trying to update the fan out lists
    Stream<int[]> retVal = null;
    try {
      criticalAccessLock.lock();

      final String[] fieldNames = getFieldNames();
      final FieldMetaType[] fieldTypes = getFieldTypes();

      if (outputStream == null) {
        if (indexFieldMap.isEmpty()) {
          for (int i = 0; i < fieldNames.length; i++) {
            indexFieldMap.put(fieldNames[i], i);
          }
        }

        // NOTE: The "inputMap" here is a special local implementation
        //       of the "Map" interface, overridden so that we can access
        //       the keys directly (without hashing). This map is only used
        //       for this use case so it is ok to use this optimization as
        //       a convenience.
        if (inputMap == null) {
          inputMap = new InputMap();
          inputMap.fTypes = fieldTypes;
        }

        final boolean isParallel = delegate.getInputStream().isParallel();

        output = new ArrayList<>();

        outputStream =
            delegate
                .getInputStream()
                .map(
                    l -> {
                      String[] arr = (String[]) l;
                      inputMap.arr = arr;
                      return input(arr, fieldNames, fieldTypes, output, isParallel);
                    });

        mainIterator = outputStream.iterator();
      }

      LinkedList<int[]> l = new LinkedList<int[]>();
      fanOuts.add(l);
      Copy copy = new Copy(l);

      retVal =
          StreamSupport.stream(
              Spliterators.spliteratorUnknownSize(
                  copy, Spliterator.ORDERED | Spliterator.NONNULL | Spliterator.IMMUTABLE),
              false);

    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      criticalAccessLock.unlock();
    }

    return retVal;
  }
Ejemplo n.º 10
0
  public DataByteArray exec(Tuple input) throws IOException {
    try {
      Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass());
      byte inputType = Byte.parseByte(prop.getProperty(INPUT_TYPE_SIGNATURE));
      byte arrayType = Byte.parseByte(prop.getProperty(COLLECTION_TYPE_SIGNATURE));

      if (arrayType == PigCollection.INT_ARRAY) {
        Tuple t = getTupleToEncode(input, inputType);
        int arr[] = new int[t.size()];
        for (int i = 0; i < t.size(); i++) {
          arr[i] = (Integer) t.get(i);
        }
        return PigCollection.serialize(arr);
      } else if (arrayType == PigCollection.FLOAT_ARRAY) {
        Tuple t = getTupleToEncode(input, inputType);
        float arr[] = new float[t.size()];
        for (int i = 0; i < t.size(); i++) {
          arr[i] = (Float) t.get(i);
        }
        return PigCollection.serialize(arr);
      } else if (arrayType == PigCollection.INT_INT_MAP) {
        DataBag bag = (DataBag) input.get(0);
        TIntIntHashMap map = new TIntIntHashMap((int) bag.size());
        for (Tuple t : bag) {
          map.put((Integer) t.get(0), (Integer) t.get(1));
        }
        return PigCollection.serialize(map);
      } else if (arrayType == PigCollection.INT_FLOAT_MAP) {
        DataBag bag = (DataBag) input.get(0);
        TIntFloatHashMap map = new TIntFloatHashMap((int) bag.size());
        for (Tuple t : bag) {
          map.put((Integer) t.get(0), (Float) t.get(1));
        }
        return PigCollection.serialize(map);
      } else if (arrayType == PigCollection.STRING_INT_MAP) {
        DataBag bag = (DataBag) input.get(0);
        TObjectIntHashMap map = new TObjectIntHashMap((int) bag.size());
        for (Tuple t : bag) {
          map.put((String) t.get(0), (Integer) t.get(1));
        }
        return PigCollection.serialize(map);
      } else if (arrayType == PigCollection.STRING_FLOAT_MAP) {
        DataBag bag = (DataBag) input.get(0);
        TObjectFloatHashMap map = new TObjectFloatHashMap((int) bag.size());
        for (Tuple t : bag) {
          map.put((String) t.get(0), (Float) t.get(1));
        }
        return PigCollection.serialize(map);
      } else if (arrayType == PigCollection.INT_SET) {
        DataBag bag = (DataBag) input.get(0);
        TIntHashSet set = new TIntHashSet((int) bag.size());
        for (Tuple t : bag) {
          set.add((Integer) t.get(0));
        }
        return PigCollection.serialize(set);
      } else if (arrayType == PigCollection.STRING_SET) {
        DataBag bag = (DataBag) input.get(0);
        Set<String> set = new HashSet<String>();
        for (Tuple t : bag) {
          set.add((String) t.get(0));
        }
        return PigCollection.serialize(set);
      } else {
        throw new RuntimeException("Invalid PigCollection type requested");
      }
    } catch (ExecException e) {
      throw new RuntimeException(e);
    }
  }
Ejemplo n.º 11
0
 public FeatureSet() {
   wordlist = new TObjectIntHashMap<String>();
   wordlist.put("NO_USE", 0);
   labels = new ArrayList<String>();
 }
Ejemplo n.º 12
0
  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(args[0]));
    /*
     * read header
     */
    String header = reader.readLine();
    TObjectIntHashMap<String> colIndices = new TObjectIntHashMap<String>();
    int idx = 0;
    for (String token : header.split("\t")) {
      colIndices.put(token, idx);
      idx++;
    }
    /*
     * create new header
     */
    BufferedWriter writer = new BufferedWriter(new FileWriter(args[1]));

    StringBuilder builder = new StringBuilder(header.length() + 50);
    builder.append(header);
    builder.append("\t");
    builder.append("x_start");
    builder.append("\t");
    builder.append("y_start");
    builder.append("\t");
    builder.append("x_dest");
    builder.append("\t");
    builder.append("y_dest");

    writer.write(builder.toString());
    writer.newLine();
    /*
     * parse file
     */
    logger.info("Starting geo coding...");
    int lineCount = 0;
    int invalid = 0;
    String line;
    while ((line = reader.readLine()) != null) {
      String[] tokens = line.split("\t");
      /*
       * get coordinates
       */
      try {
        double[] start = coordinates(tokens, colIndices, "S");
        double[] dest = coordinates(tokens, colIndices, "Z");

        /*
         * write new line
         */
        builder = new StringBuilder(line.length() + 50);
        builder.append(line);
        builder.append("\t");

        if (start != null) {
          builder.append(String.valueOf(start[0]));
          builder.append("\t");
          builder.append(String.valueOf(start[1]));
          builder.append("\t");

        } else {
          builder.append("\t");
          builder.append("\t");
        }

        if (dest != null) {
          builder.append(String.valueOf(dest[0]));
          builder.append("\t");
          builder.append(String.valueOf(dest[1]));
        } else {
          builder.append("\t");
        }

        writer.write(builder.toString());
        writer.newLine();
        writer.flush();

        lineCount++;
        if (start == null || dest == null) invalid++;

        if (lineCount % 20 == 0)
          logger.info(
              String.format("Parsed %1$s lines. %2$s addresses not found.", lineCount, invalid));
      } catch (RequestLimitException e) {
        e.printStackTrace();

        writer.close();

        BufferedWriter remainingWriter = new BufferedWriter(new FileWriter(args[1] + ".remaining"));
        remainingWriter.write(header);
        remainingWriter.newLine();

        remainingWriter.write(line);
        remainingWriter.newLine();
        while ((line = reader.readLine()) != null) {
          remainingWriter.write(line);
          remainingWriter.newLine();
        }
        logger.info("Writing remaining file done.");
        System.exit(0);
      }
    }
    writer.close();

    logger.info("Done.");
  }