public static int GetCountByCkan3(String url) {
    int count = 0;
    HttpClient client = new HttpClient();
    LOG.info("**** INPUT SPLIT COUNT *** " + url);
    GetMethod method = new GetMethod(url);
    method
        .getParams()
        .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false));

    method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler");
    method
        .getParams()
        .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler");

    try {
      client.executeMethod(method);
      byte[] responseBody = method.getResponseBody();
      Document doc = Document.parse(new String(responseBody));
      if (doc.containsKey("result")) {
        count = ((Document) doc.get("result")).getInteger("count");
        LOG.info("Find --> " + count);
      }
    } catch (Exception e) {
      e.printStackTrace();
      LOG.error(e);
    } finally {
      method.releaseConnection();
    }
    return count;
  }
Exemple #2
0
  @SuppressWarnings("deprecation")
  com.mongodb.client.model.IndexOptions convert(
      final IndexOptions options, final boolean background) {
    if (options.dropDups()) {
      LOG.warning(
          "dropDups value is no longer supported by the server.  Please set this value to false and "
              + "validate your system behaves as expected.");
    }
    com.mongodb.client.model.IndexOptions indexOptions =
        new com.mongodb.client.model.IndexOptions()
            .background(options.background() || background)
            .sparse(options.sparse())
            .unique(options.unique());

    if (!options.language().equals("")) {
      indexOptions.defaultLanguage(options.language());
    }
    if (!options.languageOverride().equals("")) {
      indexOptions.languageOverride(options.languageOverride());
    }
    if (!options.name().equals("")) {
      indexOptions.name(options.name());
    }
    if (options.expireAfterSeconds() != -1) {
      indexOptions.expireAfter((long) options.expireAfterSeconds(), TimeUnit.SECONDS);
    }
    if (!options.partialFilter().equals("")) {
      indexOptions.partialFilterExpression(Document.parse(options.partialFilter()));
    }
    if (!options.collation().locale().equals("")) {
      indexOptions.collation(convert(options.collation()));
    }

    return indexOptions;
  }
  /** Parse a raw string into json for the aggregation framework */
  private void fromRawString() {

    String totalPop = "{ $group: { _id: \"$state\", totalPop: { $sum: \"$pop\"}}}";
    String gtOneMillion = "{ $match: { totalPop: {$gte: 1000000}}}";

    //
    // Compose aggregation pipeline
    List<Document> pipeline = Arrays.asList(Document.parse(totalPop), Document.parse(gtOneMillion));

    //
    // Executes aggregation query
    List<Document> results =
        DBHelper.getZipCodesCollection().aggregate(pipeline).into(new ArrayList<Document>());

    //
    // Display aggregation results
    System.out.println("\nStates with population greater than one million");
    for (Document result : results) {
      System.out.println(result.toJson());
    }
  }
    @Override
    public boolean start() {
      Read spec = source.spec;
      client = new MongoClient(new MongoClientURI(spec.uri()));

      MongoDatabase mongoDatabase = client.getDatabase(spec.database());

      MongoCollection<Document> mongoCollection = mongoDatabase.getCollection(spec.collection());

      if (spec.filter() == null) {
        cursor = mongoCollection.find().iterator();
      } else {
        Document bson = Document.parse(spec.filter());
        cursor = mongoCollection.find(bson).iterator();
      }

      return advance();
    }
  /**
   * Get data set from CKAN repository
   *
   * <p>4 Big Source --> Direct Link
   */
  @SuppressWarnings("unchecked")
  private void getPackageList() {
    HttpClient client = new HttpClient();
    LOG.info(split.getAction());
    GetMethod method = new GetMethod(split.getAction());
    method
        .getParams()
        .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false));

    method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler");
    method
        .getParams()
        .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler");

    try {
      int statusCode = client.executeMethod(method);

      if (statusCode != HttpStatus.SC_OK) {
        throw new Exception("Method failed: " + method.getStatusLine());
      }
      byte[] responseBody = method.getResponseBody();
      LOG.debug(new String(responseBody));
      setOutput(new String(responseBody));

      Document document = Document.parse(getOutput());

      if (document.containsKey("result")) {

        ArrayList<Document> docs =
            (ArrayList<Document>) ((Document) document.get("result")).get("results");

        for (Document doc : docs) {
          this.dataset.add(doc.getString("id"));
        }

        LOG.info("SANTA MARIA CKAN3 RECORD READER found" + this.dataset.size());
      }
    } catch (Exception e) {
      LOG.error(e);
    } finally {
      method.releaseConnection();
    }
  }
  /**
   * Test
   *
   * @param pArgs
   * @throws Exception
   */
  @SuppressWarnings("unchecked")
  public static void main(String[] pArgs) throws Exception {
    GetCountByCkan3("http://catalog.data.gov/api/action/package_search?start=0&rows=1");

    HttpClient client = new HttpClient();
    GetMethod method =
        new GetMethod("http://catalog.data.gov/api/action/package_search?start=0&rows=10");
    method
        .getParams()
        .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false));

    method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler");
    method
        .getParams()
        .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler");

    try {
      int statusCode = client.executeMethod(method);

      if (statusCode != HttpStatus.SC_OK) {
        throw new Exception("Method failed: " + method.getStatusLine());
      }
      byte[] responseBody = method.getResponseBody();
      LOG.info(new String(responseBody));

      Document document = Document.parse(new String(responseBody));

      if (document.containsKey("result")) {

        ArrayList<Document> docs =
            (ArrayList<Document>) ((Document) document.get("result")).get("results");

        for (Document doc : docs) {
          LOG.info(doc.getString("id"));
        }
      }
    } catch (Exception e) {
      LOG.error(e);
    } finally {
      method.releaseConnection();
    }
  }
  /**
   * 缓存每条汇总结果
   *
   * @param cacheMap
   * @param tuple
   */
  private void doCacheEventDetailInfo(
      Map<String, Document> cacheMap, Tuple2<String, Integer> tuple) {
    logger.debug("come into doCacheEventDetailInfo ==> " + tuple._1() + " <--> " + tuple._2());
    // 解析表名:字段名:timeValue:appId:appVersion:appChannel:appPlatform:eventId:paramKey:paramValue(tenantId)
    String keys[] = tuple._1().split(":");
    String tableName = keys[0];
    String fieldName = keys[1];
    String timeValue = keys[2];
    String appId = keys[3];
    String appVersion = keys[4];
    String appChannel = keys[5];
    String appPlatform = keys[6];
    String eventId = keys[7];
    String paramKey = keys[8];
    String paramValue = keys[9];
    String tenantId =
        (keys.length == 11 && keys[10] != null && !keys[10].trim().isEmpty()) ? (keys[10]) : "";
    String tenantIdKey =
        (keys.length == 11 && keys[10] != null && !keys[10].trim().isEmpty())
            ? (":" + keys[10])
            : ""; // 存在tenantId就加到keyStr中去

    Document eventDetailInfoDoc = null;

    String keyStr =
        tableName
            + ":"
            + timeValue
            + ":"
            + appId
            + ":"
            + appVersion
            + ":"
            + appChannel
            + ":"
            + appPlatform
            + ":"
            + eventId
            + ":"
            + paramKey
            + ":"
            + paramValue
            + tenantIdKey;
    // 如果缓存命中,使用缓存的对象
    if (cacheMap.containsKey(keyStr)) {
      eventDetailInfoDoc = cacheMap.get(keyStr);
    } else {
      eventDetailInfoDoc =
          eventDetailInfoDao.findOneBy(
              tableName,
              timeValue,
              appId,
              appVersion,
              appChannel,
              appPlatform,
              eventId,
              paramKey,
              paramValue,
              tenantId);
    }

    if (eventDetailInfoDoc == null) {
      EventDetailInfo eventDetailInfoTemp = new EventDetailInfo();
      eventDetailInfoTemp.setEventId(eventId);
      eventDetailInfoTemp.setTimeValue(timeValue);
      eventDetailInfoTemp.setAppId(appId);
      if (tenantId != null && !tenantId.trim().isEmpty()) {
        eventDetailInfoTemp.setTenantId(tenantId);
      }
      Gson gson = new Gson();
      eventDetailInfoDoc = Document.parse(gson.toJson(eventDetailInfoTemp));
      ObjectId objId = new ObjectId();
      eventDetailInfoDoc.put("_id", objId);
      eventDetailInfoDoc.put("appVersion", appVersion);
      eventDetailInfoDoc.put("appChannel", appChannel);
      eventDetailInfoDoc.put("appPlatform", appPlatform);
      eventDetailInfoDoc.put("paramKey", paramKey);
      eventDetailInfoDoc.put("paramValue", paramValue);
    }
    if (eventDetailInfoDoc.get(fieldName) == null) {
      eventDetailInfoDoc.put(fieldName, (long) tuple._2());
    } else {
      long fieldValue = 0;
      try {
        fieldValue = eventDetailInfoDoc.getLong(fieldName);
      } catch (ClassCastException e) {
        fieldValue = (long) eventDetailInfoDoc.getInteger(fieldName);
      }
      eventDetailInfoDoc.put(fieldName, (long) (fieldValue + tuple._2()));
    }
    cacheMap.put(keyStr, eventDetailInfoDoc);
    return;
  }
  /**
   * 缓存每条汇总结果
   *
   * @param cacheMap
   * @param tuple
   */
  private void doCacheEventDetailInfo(
      Map<String, Document> cacheMap, Tuple2<String, Integer> tuple) {
    logger.debug("come into doCacheEventDetailInfo ==> " + tuple._1() + " <--> " + tuple._2());
    // 解析表名:字段名1、字段名2:字段名1值:timeValue:appId:appVersion(tenantId)
    String keys[] = tuple._1().split(":");
    String tableName = keys[0];
    String fieldName1 = keys[1];
    String fieldName2 = keys[2];
    String fieldName1Value = keys[3];
    String timeValue = keys[4];
    String appId = keys[5];
    String appVersion = keys[6];
    String tenantId =
        (keys.length == 8 && keys[7] != null && !keys[7].trim().isEmpty()) ? (keys[7]) : "";
    String tenantIdKey =
        (keys.length == 8 && keys[7] != null && !keys[7].trim().isEmpty())
            ? (":" + keys[7])
            : ""; // 存在tenantId就加到keyStr中去

    Document versionDetailDoc = null;

    String newUserFromChannal = null;
    String updateUserFromChannal = null;
    String updateUserFromVersion = null;
    if ("newUserFromChannal".equals(fieldName1)) {
      newUserFromChannal = fieldName1Value;
    }
    if ("updateUserFromChannal".equals(fieldName1)) {
      updateUserFromChannal = fieldName1Value;
    }
    if ("updateUserFromVersion".equals(fieldName1)) {
      updateUserFromVersion = fieldName1Value;
    }
    String keyStr =
        tableName
            + ":"
            + timeValue
            + ":"
            + appId
            + ":"
            + appVersion
            + ":"
            + fieldName1
            + ":"
            + fieldName1Value
            + tenantIdKey;
    // 如果缓存命中,使用缓存的对象
    if (cacheMap.containsKey(keyStr)) {
      versionDetailDoc = cacheMap.get(keyStr);
    } else {
      versionDetailDoc =
          versionDetailDao.findOneBy(
              tableName,
              timeValue,
              appId,
              appVersion,
              newUserFromChannal,
              updateUserFromChannal,
              updateUserFromVersion,
              tenantId);
    }

    if (versionDetailDoc == null) {
      VersionDetail versionDetail = new VersionDetail();
      versionDetail.setAppId(appId);
      versionDetail.setAppVersion(appVersion);
      versionDetail.setTimeValue(timeValue);
      if (tenantId != null && !tenantId.trim().isEmpty()) {
        versionDetail.setTenantId(tenantId);
      }
      Gson gson = new Gson();
      versionDetailDoc = Document.parse(gson.toJson(versionDetail));
      ObjectId objId = new ObjectId();
      versionDetailDoc.put("_id", objId);
      versionDetailDoc.put(fieldName1, fieldName1Value);
    }
    if (versionDetailDoc.get(fieldName2) == null) {
      versionDetailDoc.put(fieldName2, tuple._2());
    } else {
      long fieldValue = 0;
      try {
        fieldValue = versionDetailDoc.getLong(fieldName2);
      } catch (ClassCastException e) {
        fieldValue = (long) versionDetailDoc.getInteger(fieldName2);
      }

      versionDetailDoc.put(fieldName2, (long) (fieldValue + tuple._2()));
    }
    cacheMap.put(keyStr, versionDetailDoc);
    return;
  }
  private void doUpload(final BsonDocument rawArguments, final BsonDocument assertion) {
    Throwable error = null;
    ObjectId objectId = null;
    BsonDocument arguments = parseHexDocument(rawArguments, "source");
    try {
      final String filename = arguments.getString("filename").getValue();
      final InputStream inputStream =
          new ByteArrayInputStream(arguments.getBinary("source").getData());
      final GridFSUploadOptions options = new GridFSUploadOptions();
      BsonDocument rawOptions = arguments.getDocument("options", new BsonDocument());
      if (rawOptions.containsKey("chunkSizeBytes")) {
        options.chunkSizeBytes(rawOptions.getInt32("chunkSizeBytes").getValue());
      }
      if (rawOptions.containsKey("metadata")) {
        options.metadata(Document.parse(rawOptions.getDocument("metadata").toJson()));
      }

      objectId =
          new MongoOperation<ObjectId>() {
            @Override
            public void execute() {
              gridFSBucket.uploadFromStream(
                  filename, toAsyncInputStream(inputStream), options, getCallback());
            }
          }.get();
    } catch (Throwable e) {
      error = e;
    }

    if (assertion.containsKey("error")) {
      // We don't need to read anything more so don't see the extra chunk
      if (!assertion.getString("error").getValue().equals("ExtraChunk")) {
        assertNotNull("Should have thrown an exception", error);
      }
    } else {
      assertNull("Should not have thrown an exception", error);
      for (BsonValue rawDataItem : assertion.getArray("data", new BsonArray())) {
        BsonDocument dataItem = rawDataItem.asDocument();
        String insert = dataItem.getString("insert", new BsonString("none")).getValue();
        if (insert.equals("expected.files")) {
          List<BsonDocument> documents =
              processFiles(
                  dataItem.getArray("documents", new BsonArray()), new ArrayList<BsonDocument>());

          assertEquals(getFilesCount(new BsonDocument()), documents.size());
          BsonDocument actual =
              new MongoOperation<BsonDocument>() {
                @Override
                public void execute() {
                  filesCollection.find().first(getCallback());
                }
              }.get();
          for (BsonDocument expected : documents) {
            assertEquals(expected.get("length"), actual.get("length"));
            assertEquals(expected.get("chunkSize"), actual.get("chunkSize"));
            assertEquals(expected.get("md5"), actual.get("md5"));
            assertEquals(expected.get("filename"), actual.get("filename"));

            if (expected.containsKey("metadata")) {
              assertEquals(expected.get("metadata"), actual.get("metadata"));
            }
          }
        } else if (insert.equals("expected.chunks")) {
          List<BsonDocument> documents =
              processChunks(
                  dataItem.getArray("documents", new BsonArray()), new ArrayList<BsonDocument>());
          assertEquals(getChunksCount(new BsonDocument()), documents.size());

          List<BsonDocument> actualDocuments =
              new MongoOperation<List<BsonDocument>>() {
                @Override
                public void execute() {
                  chunksCollection.find().into(new ArrayList<BsonDocument>(), getCallback());
                }
              }.get();

          for (int i = 0; i < documents.size(); i++) {
            BsonDocument expected = documents.get(i);
            BsonDocument actual;
            actual = actualDocuments.get(i);
            assertEquals(new BsonObjectId(objectId), actual.getObjectId("files_id"));
            assertEquals(expected.get("n"), actual.get("n"));
            assertEquals(expected.get("data"), actual.get("data"));
          }
        }
      }
    }
  }