public Keywords findKeywords(User user, Request request) throws ParseException, IOException {
   Path dumpFile = dumpSearchResults(user, request);
   Scanner scanner = new Scanner(dumpFile);
   TObjectIntHashMap<String> words = new TObjectIntHashMap<>();
   while (scanner.hasNextLine()) {
     String line = scanner.nextLine();
     for (String word : StringUtils.tokenize(line, /* Remove stop words */ true)) {
       if (request.query.contains(word)) continue;
       Integer cnt = words.get(word);
       if (cnt == null) {
         cnt = 1;
       } else {
         cnt++;
       }
       words.put(word, cnt);
     }
   }
   PriorityQueue<WordAndCount> pq = createPriorityQueue();
   words.forEachEntry((a, b) -> pq.add(new WordAndCount(a, b)));
   scanner.close();
   Keywords kw = new Keywords();
   WordAndCount[] wc = new WordAndCount[Math.min(request.pageSize, pq.size())];
   for (int i = 0; i < wc.length; i++) wc[i] = pq.poll();
   kw.keywords = wc;
   return kw;
 }
 public int get(Text text) throws IOException {
   int res = numbers.get(text);
   if (res == 0) {
     res = current++;
     numbers.put(new Text(text), res);
     value.set(res);
     writer.append(text, value);
   }
   return res;
 }
Esempio n. 3
0
  /**
   * Lookup the hash table to find item
   *
   * @param entry
   * @param isUpdate boolean variable to insert new item if not contained.
   * @return id
   */
  public int lookup(Object entry, boolean isUpdate) {
    if (entry == null)
      throw new IllegalArgumentException("Can't find \"null\" entry in Dictionary");

    int ret = -1;
    if (map.containsKey(entry)) ret = map.get(entry);
    else if (isUpdate) {
      ret = list.size();
      map.put(entry, ret);
      list.add(entry);
    }

    return ret;
  }
Esempio n. 4
0
  public TreeMap<Integer, Integer> addStringFeatureVector(
      String[] strFeatures, String label, boolean flagTest) {
    HashSet<String> setFeatures = new HashSet<String>();
    TreeMap<Integer, Integer> vector = new TreeMap<Integer, Integer>();

    for (String feature : strFeatures) {
      setFeatures.add(feature);
    }

    if (setFeatures.size() == 0) return null;

    if (!label.equals(""))
      if (labels.contains(label)) {
        vector.put(labelKey, labels.indexOf(label));
      } else {
        if (!flagTest) {
          labels.add(label);
          vector.put(labelKey, labels.indexOf(label));
        } else {
          // throw new IllegalArgumentException("Label of Testing Data is error!!!");
          return null;
        }
      }

    for (String feature : setFeatures) {
      if (wordlist.contains(feature)) {
        vector.put(wordlist.get(feature), 1);
      } else {
        if (!flagTest) {
          wordlist.put(feature, wordlist.size());
          vector.put(wordlist.get(feature), 1);
        }
      }
    }
    return vector;
  }
Esempio n. 5
0
  private static double[] coordinates(
      String tokens[], TObjectIntHashMap<String> colNames, String prefix) {
    StringBuilder builder = new StringBuilder();

    builder.append(tokens[colNames.get(prefix + "_STRA")]);
    builder.append(" ");
    builder.append(tokens[colNames.get(prefix + "_HNR")]);
    builder.append(" ");

    String plz = tokens[colNames.get(prefix + "_PLZ")];
    if (!plz.equalsIgnoreCase("-97")) {
      builder.append(plz);
      builder.append(" ");
    }

    builder.append(tokens[colNames.get(prefix + "_ORT")]);
    builder.append(" ");
    builder.append(tokens[colNames.get(prefix + "_LAND")]);
    builder.append(" ");

    String query = builder.toString().trim();
    Coord c = null;

    if (query.isEmpty()) c = null;
    else {
      LatLng coord = googleLookup.requestCoordinate(builder.toString());
      if (coord != null) c = new Coord(coord.getLng().doubleValue(), coord.getLat().doubleValue());
    }

    if (c == null) {
      logger.warn(String.format("No results for query \"%1$s\".", query));
      return null;
    } else {
      return new double[] {c.getX(), c.getY()};
    }
  }
Esempio n. 6
0
 /** Sort the variable to speedup the check */
 public void sortvars() {
   Arrays.sort(sortedvs, this);
   for (int i = 0; i < arity; i++) {
     position[i] = mapinit.get(sortedvs[i]);
   }
 }
  /** Start RDF triple extraction */
  private void run() {

    // get processors number for multi-threading
    int n_threads = Runtime.getRuntime().availableProcessors();
    n_threads = 4;
    logger.debug("Threads number: " + n_threads);

    ExecutorService executor;
    executor = Executors.newFixedThreadPool(n_threads);

    metadata_counter = new SynchronizedCounter();
    properties_counter = new SynchronizedCounter();
    metadata_index = new TObjectIntHashMap<String>();
    props_index = new TObjectIntHashMap<String>();

    logger.info("Risorse da interrogare: " + num_items);

    try {

      TextFileManager textWriter = null;
      if (outputTextFormat) textWriter = new TextFileManager(textFile);

      ItemFileManager fileManager = new ItemFileManager(metadataFile, ItemFileManager.WRITE);

      for (int i = 0; i < num_items; i++) {

        String uri = (String) URI_ID.keys()[i];

        Runnable worker;

        if (model == null) {
          // create worker thread - extraction from endpoint
          worker =
              new QueryExecutor(
                  uri,
                  URI_ID.get(uri),
                  props_index,
                  graphURI,
                  endpoint,
                  metadata_counter,
                  properties_counter,
                  metadata_index,
                  textWriter,
                  fileManager,
                  depth);
        } else {
          // create worker thread - extraction from tdb local dataset
          worker =
              new QueryExecutor(
                  uri,
                  URI_ID.get(uri),
                  props_index,
                  graphURI,
                  endpoint,
                  metadata_counter,
                  properties_counter,
                  metadata_index,
                  textWriter,
                  fileManager,
                  depth,
                  model);
        }

        executor.execute(worker);
      }

      // This will make the executor accept no new threads
      // and finish all existing threads in the queue
      executor.shutdown();
      // Wait until all threads are finish
      executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);

      if (textWriter != null) textWriter.close();

      fileManager.close();

    } catch (Exception e) {
      e.printStackTrace();
    }

    // write metadata index file
    TextFileUtils.writeData(metadataFile + "_index", metadata_index);
    // write metadata index file
    TextFileUtils.writeData("props_index", props_index);
  }
 public int getOnly(Text text) {
   return numbers.get(text);
 }
Esempio n. 9
0
  public void importBukmak(
      List<Bookmark2> xbukmak,
      boolean tumpuk,
      TObjectIntHashMap<Bookmark2> bukmakToRelIdMap,
      TIntLongHashMap labelRelIdToAbsIdMap,
      TIntObjectHashMap<TIntList> bukmak2RelIdToLabelRelIdsMap) {
    SQLiteDatabase db = helper.getWritableDatabase();
    db.beginTransaction();
    try {
      TIntLongHashMap bukmakRelIdToAbsIdMap = new TIntLongHashMap();

      { // tulis bukmak2 baru
        String[] params1 = new String[1];
        String[] params2 = new String[2];
        for (Bookmark2 bukmak : xbukmak) {
          int bukmak2_relId = bukmakToRelIdMap.get(bukmak);

          params2[0] = String.valueOf(bukmak.ari);
          params2[1] = String.valueOf(bukmak.jenis);

          long _id = -1;

          boolean ada = false;
          Cursor cursor =
              db.query(
                  Db.TABEL_Bukmak2,
                  null,
                  Db.Bukmak2.ari + "=? and " + Db.Bukmak2.jenis + "=?",
                  params2,
                  null,
                  null,
                  null); //$NON-NLS-1$ //$NON-NLS-2$
          if (cursor.moveToNext()) {
            ada = true;
            _id = cursor.getLong(cursor.getColumnIndexOrThrow(BaseColumns._ID)); /* [1] */
          }
          cursor.close();

          // --------------------------------- dapet _id dari
          //  ada  tumpuk:     delete insert     [2]
          //  ada !tumpuk: (nop)                 [1]
          // !ada  tumpuk:            insert     [2]
          // !ada !tumpuk:            insert     [2]

          if (ada && tumpuk) {
            params1[0] = String.valueOf(_id);
            db.delete(Db.TABEL_Bukmak2, "_id=?", params1); // $NON-NLS-1$
            db.delete(
                Db.TABEL_Bukmak2_Label, Db.Bukmak2_Label.bukmak2_id + "=?", params1); // $NON-NLS-1$
          }
          if ((ada && tumpuk) || (!ada)) {
            _id = db.insert(Db.TABEL_Bukmak2, null, bukmak.toContentValues()); /* [2] */
          }

          // map it
          bukmakRelIdToAbsIdMap.put(bukmak2_relId, _id);
        }
      }

      { // sekarang pemasangan label
        String where = Db.Bukmak2_Label.bukmak2_id + "=?"; // $NON-NLS-1$
        String[] params = {null};
        ContentValues cv = new ContentValues();

        // nlabel>0  tumpuk:  delete insert
        // nlabel>0 !tumpuk: (nop)
        // nlabel=0  tumpuk:         insert
        // nlabel=0 !tumpuk:         insert

        for (int bukmak2_relId : bukmak2RelIdToLabelRelIdsMap.keys()) {
          TIntList label_relIds = bukmak2RelIdToLabelRelIdsMap.get(bukmak2_relId);

          long bukmak2_id = bukmakRelIdToAbsIdMap.get(bukmak2_relId);

          if (bukmak2_id > 0) {
            params[0] = String.valueOf(bukmak2_id);

            // cek ada berapa label untuk bukmak2_id ini
            int nlabel = 0;
            Cursor c =
                db.rawQuery(
                    "select count(*) from " + Db.TABEL_Bukmak2_Label + " where " + where,
                    params); //$NON-NLS-1$ //$NON-NLS-2$
            try {
              c.moveToNext();
              nlabel = c.getInt(0);
            } finally {
              c.close();
            }

            if (nlabel > 0 && tumpuk) {
              db.delete(Db.TABEL_Bukmak2_Label, where, params);
            }
            if ((nlabel > 0 && tumpuk) || (!(nlabel > 0))) {
              for (int label_relId : label_relIds.toArray()) {
                long label_id = labelRelIdToAbsIdMap.get(label_relId);
                if (label_id > 0) {
                  cv.put(Db.Bukmak2_Label.bukmak2_id, bukmak2_id);
                  cv.put(Db.Bukmak2_Label.label_id, label_id);
                  db.insert(Db.TABEL_Bukmak2_Label, null, cv);
                } else {
                  Log.w(TAG, "label_id ngaco!: " + label_id); // $NON-NLS-1$
                }
              }
            }
          } else {
            Log.w(TAG, "bukmak2_id ngaco!: " + bukmak2_id); // $NON-NLS-1$
          }
        }
      }

      db.setTransactionSuccessful();
    } finally {
      db.endTransaction();
    }
  }
  @Test
  public void testSimpleAwareness() throws Exception {
    Settings commonSettings =
        ImmutableSettings.settingsBuilder()
            .put("cluster.routing.schedule", "10ms")
            .put("cluster.routing.allocation.awareness.attributes", "rack_id")
            .build();

    logger.info("--> starting 2 nodes on the same rack");
    startNode(
        "node1",
        ImmutableSettings.settingsBuilder().put(commonSettings).put("node.rack_id", "rack_1"));
    startNode(
        "node2",
        ImmutableSettings.settingsBuilder().put(commonSettings).put("node.rack_id", "rack_1"));

    client("node1").admin().indices().prepareCreate("test1").execute().actionGet();
    client("node1").admin().indices().prepareCreate("test2").execute().actionGet();

    ClusterHealthResponse health =
        client("node1")
            .admin()
            .cluster()
            .prepareHealth()
            .setWaitForEvents(Priority.LANGUID)
            .setWaitForGreenStatus()
            .execute()
            .actionGet();
    assertThat(health.isTimedOut(), equalTo(false));

    logger.info("--> starting 1 node on a different rack");
    startNode(
        "node3",
        ImmutableSettings.settingsBuilder().put(commonSettings).put("node.rack_id", "rack_2"));

    long start = System.currentTimeMillis();
    TObjectIntHashMap<String> counts;
    // On slow machines the initial relocation might be delayed
    do {
      Thread.sleep(100);
      logger.info("--> waiting for no relocation");
      health =
          client("node1")
              .admin()
              .cluster()
              .prepareHealth()
              .setWaitForEvents(Priority.LANGUID)
              .setWaitForGreenStatus()
              .setWaitForNodes("3")
              .setWaitForRelocatingShards(0)
              .execute()
              .actionGet();
      assertThat(health.isTimedOut(), equalTo(false));

      logger.info("--> checking current state");
      ClusterState clusterState =
          client("node1").admin().cluster().prepareState().execute().actionGet().getState();
      // System.out.println(clusterState.routingTable().prettyPrint());
      // verify that we have 10 shards on node3
      counts = new TObjectIntHashMap<String>();
      for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
          for (ShardRouting shardRouting : indexShardRoutingTable) {
            counts.adjustOrPutValue(
                clusterState.nodes().get(shardRouting.currentNodeId()).name(), 1, 1);
          }
        }
      }
    } while (counts.get("node3") != 10 && (System.currentTimeMillis() - start) < 10000);
    assertThat(counts.get("node3"), equalTo(10));
  }