public Keywords findKeywords(User user, Request request) throws ParseException, IOException { Path dumpFile = dumpSearchResults(user, request); Scanner scanner = new Scanner(dumpFile); TObjectIntHashMap<String> words = new TObjectIntHashMap<>(); while (scanner.hasNextLine()) { String line = scanner.nextLine(); for (String word : StringUtils.tokenize(line, /* Remove stop words */ true)) { if (request.query.contains(word)) continue; Integer cnt = words.get(word); if (cnt == null) { cnt = 1; } else { cnt++; } words.put(word, cnt); } } PriorityQueue<WordAndCount> pq = createPriorityQueue(); words.forEachEntry((a, b) -> pq.add(new WordAndCount(a, b))); scanner.close(); Keywords kw = new Keywords(); WordAndCount[] wc = new WordAndCount[Math.min(request.pageSize, pq.size())]; for (int i = 0; i < wc.length; i++) wc[i] = pq.poll(); kw.keywords = wc; return kw; }
public int get(Text text) throws IOException { int res = numbers.get(text); if (res == 0) { res = current++; numbers.put(new Text(text), res); value.set(res); writer.append(text, value); } return res; }
public ValidityChecker(int ari, IntVar[] vars) { arity = ari; sortedvs = new IntVar[arity]; mapinit = new TObjectIntHashMap<IntVar>(arity); position = new int[arity]; for (int i = 0; i < vars.length; i++) { sortedvs[i] = vars[i]; mapinit.put(vars[i], i); position[i] = i; } }
/** * Deserialize alphabet * * @param in * @throws IOException * @throws ClassNotFoundException */ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt(); int size = in.readInt(); list = new ArrayList(size); map = new TObjectIntHashMap(size); for (int i = 0; i < size; i++) { Object o = in.readObject(); map.put(o, i); list.add(o); } if (version > 0) instanceId = (VMID) in.readObject(); }
/** * Lookup the hash table to find item * * @param entry * @param isUpdate boolean variable to insert new item if not contained. * @return id */ public int lookup(Object entry, boolean isUpdate) { if (entry == null) throw new IllegalArgumentException("Can't find \"null\" entry in Dictionary"); int ret = -1; if (map.containsKey(entry)) ret = map.get(entry); else if (isUpdate) { ret = list.size(); map.put(entry, ret); list.add(entry); } return ret; }
public Dictionary(SequenceFile.Reader reader, boolean reverse) throws IOException { this.writer = null; this.numbers = new TObjectIntHashMap<Text>(); if (!reverse) { this.reverse = null; } else { this.reverse = new TIntObjectHashMap<Text>(); } Text text; while (reader.next(text = new Text(), value)) { numbers.put(text, value.get()); if (reverse) { this.reverse.put(value.get(), text); } } }
public TreeMap<Integer, Integer> addStringFeatureVector( String[] strFeatures, String label, boolean flagTest) { HashSet<String> setFeatures = new HashSet<String>(); TreeMap<Integer, Integer> vector = new TreeMap<Integer, Integer>(); for (String feature : strFeatures) { setFeatures.add(feature); } if (setFeatures.size() == 0) return null; if (!label.equals("")) if (labels.contains(label)) { vector.put(labelKey, labels.indexOf(label)); } else { if (!flagTest) { labels.add(label); vector.put(labelKey, labels.indexOf(label)); } else { // throw new IllegalArgumentException("Label of Testing Data is error!!!"); return null; } } for (String feature : setFeatures) { if (wordlist.contains(feature)) { vector.put(wordlist.get(feature), 1); } else { if (!flagTest) { wordlist.put(feature, wordlist.size()); vector.put(wordlist.get(feature), 1); } } } return vector; }
public static void main(String[] args) { // String file = args[0]; IntInt2IntHashMap lEr = new IntInt2IntHashMap(); HashMap<String, TObjectIntHashMap<String>> stat = new HashMap<String, TObjectIntHashMap<String>>(); try { HashSet<String> headPat = PatternFileParser.parse(new File("ipa_head_pat.txt")); HashSet<String> funcPat = PatternFileParser.parse(new File("ipa_func_pat.txt")); CaboCha2Dep pipe = new CaboCha2Dep(System.in); JapaneseDependencyTree2CaboCha caboChaOutPipe = new JapaneseDependencyTree2CaboCha(); caboChaOutPipe.setFormat(CaboChaFormat.OLD); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8")); while (!pipe.eof()) { DependencyTree tree = pipe.pipePerSentence(); if (tree == null) { continue; } JapaneseDependencyTreeLib.setBunsetsuHead(tree, funcPat, headPat); PredicateArgumentStructure[] pasList = tree.getPASList(); for (int j = 0; j < pasList.length; j++) { int predId = pasList[j].getPredicateId(); String predType = pasList[j].predicateType; int[] aIds = pasList[j].getIds(); String[] aLabels = pasList[j].getLabels(); for (int k = 0; k < aIds.length; k++) { DepType dt = getDepType(tree, predId, aIds[k]); ArgPositionType apt = getArgPositionType(tree, aIds[k]); if (!stat.containsKey(aLabels[k])) { stat.put(aLabels[k], new TObjectIntHashMap<String>()); } TObjectIntHashMap<String> inner = stat.get(aLabels[k]); if (!inner.containsKey(dt.toString() + ":" + apt.toString())) { inner.put(dt.toString() + ":" + apt.toString(), 0); } inner.increment(dt.toString() + ":" + apt.toString()); aLabels[k] += ":" + dt + ":" + apt; } } StringBuilder resultStr = new StringBuilder(); resultStr.append(caboChaOutPipe.pipePerSentence(tree)); writer.write(resultStr.toString()); } // print statistics for (Iterator it = stat.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); TObjectIntHashMap inner = stat.get(key); for (TObjectIntIterator iit = inner.iterator(); iit.hasNext(); ) { iit.advance(); System.err.print(key + "\t" + iit.key() + "\t" + iit.value() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } }
/** * Returns the encoded output stream of the underlying {@link Stream}'s encoder. * * @return the encoded output stream. */ public Stream<int[]> getOutputStream() { if (isTerminal()) { throw new IllegalStateException("Stream is already \"terminal\" (operated upon or empty)"); } final MultiEncoder encoder = (MultiEncoder) getEncoder(); if (encoder == null) { throw new IllegalStateException( "setLocalParameters(Parameters) must be called before calling this method."); } // Protect outputStream formation and creation of "fan out" also make sure // that no other thread is trying to update the fan out lists Stream<int[]> retVal = null; try { criticalAccessLock.lock(); final String[] fieldNames = getFieldNames(); final FieldMetaType[] fieldTypes = getFieldTypes(); if (outputStream == null) { if (indexFieldMap.isEmpty()) { for (int i = 0; i < fieldNames.length; i++) { indexFieldMap.put(fieldNames[i], i); } } // NOTE: The "inputMap" here is a special local implementation // of the "Map" interface, overridden so that we can access // the keys directly (without hashing). This map is only used // for this use case so it is ok to use this optimization as // a convenience. if (inputMap == null) { inputMap = new InputMap(); inputMap.fTypes = fieldTypes; } final boolean isParallel = delegate.getInputStream().isParallel(); output = new ArrayList<>(); outputStream = delegate .getInputStream() .map( l -> { String[] arr = (String[]) l; inputMap.arr = arr; return input(arr, fieldNames, fieldTypes, output, isParallel); }); mainIterator = outputStream.iterator(); } LinkedList<int[]> l = new LinkedList<int[]>(); fanOuts.add(l); Copy copy = new Copy(l); retVal = StreamSupport.stream( Spliterators.spliteratorUnknownSize( copy, Spliterator.ORDERED | Spliterator.NONNULL | Spliterator.IMMUTABLE), false); } catch (Exception e) { e.printStackTrace(); } finally { criticalAccessLock.unlock(); } return retVal; }
public DataByteArray exec(Tuple input) throws IOException { try { Properties prop = UDFContext.getUDFContext().getUDFProperties(this.getClass()); byte inputType = Byte.parseByte(prop.getProperty(INPUT_TYPE_SIGNATURE)); byte arrayType = Byte.parseByte(prop.getProperty(COLLECTION_TYPE_SIGNATURE)); if (arrayType == PigCollection.INT_ARRAY) { Tuple t = getTupleToEncode(input, inputType); int arr[] = new int[t.size()]; for (int i = 0; i < t.size(); i++) { arr[i] = (Integer) t.get(i); } return PigCollection.serialize(arr); } else if (arrayType == PigCollection.FLOAT_ARRAY) { Tuple t = getTupleToEncode(input, inputType); float arr[] = new float[t.size()]; for (int i = 0; i < t.size(); i++) { arr[i] = (Float) t.get(i); } return PigCollection.serialize(arr); } else if (arrayType == PigCollection.INT_INT_MAP) { DataBag bag = (DataBag) input.get(0); TIntIntHashMap map = new TIntIntHashMap((int) bag.size()); for (Tuple t : bag) { map.put((Integer) t.get(0), (Integer) t.get(1)); } return PigCollection.serialize(map); } else if (arrayType == PigCollection.INT_FLOAT_MAP) { DataBag bag = (DataBag) input.get(0); TIntFloatHashMap map = new TIntFloatHashMap((int) bag.size()); for (Tuple t : bag) { map.put((Integer) t.get(0), (Float) t.get(1)); } return PigCollection.serialize(map); } else if (arrayType == PigCollection.STRING_INT_MAP) { DataBag bag = (DataBag) input.get(0); TObjectIntHashMap map = new TObjectIntHashMap((int) bag.size()); for (Tuple t : bag) { map.put((String) t.get(0), (Integer) t.get(1)); } return PigCollection.serialize(map); } else if (arrayType == PigCollection.STRING_FLOAT_MAP) { DataBag bag = (DataBag) input.get(0); TObjectFloatHashMap map = new TObjectFloatHashMap((int) bag.size()); for (Tuple t : bag) { map.put((String) t.get(0), (Float) t.get(1)); } return PigCollection.serialize(map); } else if (arrayType == PigCollection.INT_SET) { DataBag bag = (DataBag) input.get(0); TIntHashSet set = new TIntHashSet((int) bag.size()); for (Tuple t : bag) { set.add((Integer) t.get(0)); } return PigCollection.serialize(set); } else if (arrayType == PigCollection.STRING_SET) { DataBag bag = (DataBag) input.get(0); Set<String> set = new HashSet<String>(); for (Tuple t : bag) { set.add((String) t.get(0)); } return PigCollection.serialize(set); } else { throw new RuntimeException("Invalid PigCollection type requested"); } } catch (ExecException e) { throw new RuntimeException(e); } }
public FeatureSet() { wordlist = new TObjectIntHashMap<String>(); wordlist.put("NO_USE", 0); labels = new ArrayList<String>(); }
/** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(args[0])); /* * read header */ String header = reader.readLine(); TObjectIntHashMap<String> colIndices = new TObjectIntHashMap<String>(); int idx = 0; for (String token : header.split("\t")) { colIndices.put(token, idx); idx++; } /* * create new header */ BufferedWriter writer = new BufferedWriter(new FileWriter(args[1])); StringBuilder builder = new StringBuilder(header.length() + 50); builder.append(header); builder.append("\t"); builder.append("x_start"); builder.append("\t"); builder.append("y_start"); builder.append("\t"); builder.append("x_dest"); builder.append("\t"); builder.append("y_dest"); writer.write(builder.toString()); writer.newLine(); /* * parse file */ logger.info("Starting geo coding..."); int lineCount = 0; int invalid = 0; String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\t"); /* * get coordinates */ try { double[] start = coordinates(tokens, colIndices, "S"); double[] dest = coordinates(tokens, colIndices, "Z"); /* * write new line */ builder = new StringBuilder(line.length() + 50); builder.append(line); builder.append("\t"); if (start != null) { builder.append(String.valueOf(start[0])); builder.append("\t"); builder.append(String.valueOf(start[1])); builder.append("\t"); } else { builder.append("\t"); builder.append("\t"); } if (dest != null) { builder.append(String.valueOf(dest[0])); builder.append("\t"); builder.append(String.valueOf(dest[1])); } else { builder.append("\t"); } writer.write(builder.toString()); writer.newLine(); writer.flush(); lineCount++; if (start == null || dest == null) invalid++; if (lineCount % 20 == 0) logger.info( String.format("Parsed %1$s lines. %2$s addresses not found.", lineCount, invalid)); } catch (RequestLimitException e) { e.printStackTrace(); writer.close(); BufferedWriter remainingWriter = new BufferedWriter(new FileWriter(args[1] + ".remaining")); remainingWriter.write(header); remainingWriter.newLine(); remainingWriter.write(line); remainingWriter.newLine(); while ((line = reader.readLine()) != null) { remainingWriter.write(line); remainingWriter.newLine(); } logger.info("Writing remaining file done."); System.exit(0); } } writer.close(); logger.info("Done."); }