public static int GetCountByCkan3(String url) { int count = 0; HttpClient client = new HttpClient(); LOG.info("**** INPUT SPLIT COUNT *** " + url); GetMethod method = new GetMethod(url); method .getParams() .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler"); method .getParams() .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler"); try { client.executeMethod(method); byte[] responseBody = method.getResponseBody(); Document doc = Document.parse(new String(responseBody)); if (doc.containsKey("result")) { count = ((Document) doc.get("result")).getInteger("count"); LOG.info("Find --> " + count); } } catch (Exception e) { e.printStackTrace(); LOG.error(e); } finally { method.releaseConnection(); } return count; }
@SuppressWarnings("deprecation") com.mongodb.client.model.IndexOptions convert( final IndexOptions options, final boolean background) { if (options.dropDups()) { LOG.warning( "dropDups value is no longer supported by the server. Please set this value to false and " + "validate your system behaves as expected."); } com.mongodb.client.model.IndexOptions indexOptions = new com.mongodb.client.model.IndexOptions() .background(options.background() || background) .sparse(options.sparse()) .unique(options.unique()); if (!options.language().equals("")) { indexOptions.defaultLanguage(options.language()); } if (!options.languageOverride().equals("")) { indexOptions.languageOverride(options.languageOverride()); } if (!options.name().equals("")) { indexOptions.name(options.name()); } if (options.expireAfterSeconds() != -1) { indexOptions.expireAfter((long) options.expireAfterSeconds(), TimeUnit.SECONDS); } if (!options.partialFilter().equals("")) { indexOptions.partialFilterExpression(Document.parse(options.partialFilter())); } if (!options.collation().locale().equals("")) { indexOptions.collation(convert(options.collation())); } return indexOptions; }
/** Parse a raw string into json for the aggregation framework */ private void fromRawString() { String totalPop = "{ $group: { _id: \"$state\", totalPop: { $sum: \"$pop\"}}}"; String gtOneMillion = "{ $match: { totalPop: {$gte: 1000000}}}"; // // Compose aggregation pipeline List<Document> pipeline = Arrays.asList(Document.parse(totalPop), Document.parse(gtOneMillion)); // // Executes aggregation query List<Document> results = DBHelper.getZipCodesCollection().aggregate(pipeline).into(new ArrayList<Document>()); // // Display aggregation results System.out.println("\nStates with population greater than one million"); for (Document result : results) { System.out.println(result.toJson()); } }
@Override public boolean start() { Read spec = source.spec; client = new MongoClient(new MongoClientURI(spec.uri())); MongoDatabase mongoDatabase = client.getDatabase(spec.database()); MongoCollection<Document> mongoCollection = mongoDatabase.getCollection(spec.collection()); if (spec.filter() == null) { cursor = mongoCollection.find().iterator(); } else { Document bson = Document.parse(spec.filter()); cursor = mongoCollection.find(bson).iterator(); } return advance(); }
/** * Get data set from CKAN repository * * <p>4 Big Source --> Direct Link */ @SuppressWarnings("unchecked") private void getPackageList() { HttpClient client = new HttpClient(); LOG.info(split.getAction()); GetMethod method = new GetMethod(split.getAction()); method .getParams() .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler"); method .getParams() .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler"); try { int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { throw new Exception("Method failed: " + method.getStatusLine()); } byte[] responseBody = method.getResponseBody(); LOG.debug(new String(responseBody)); setOutput(new String(responseBody)); Document document = Document.parse(getOutput()); if (document.containsKey("result")) { ArrayList<Document> docs = (ArrayList<Document>) ((Document) document.get("result")).get("results"); for (Document doc : docs) { this.dataset.add(doc.getString("id")); } LOG.info("SANTA MARIA CKAN3 RECORD READER found" + this.dataset.size()); } } catch (Exception e) { LOG.error(e); } finally { method.releaseConnection(); } }
/** * Test * * @param pArgs * @throws Exception */ @SuppressWarnings("unchecked") public static void main(String[] pArgs) throws Exception { GetCountByCkan3("http://catalog.data.gov/api/action/package_search?start=0&rows=1"); HttpClient client = new HttpClient(); GetMethod method = new GetMethod("http://catalog.data.gov/api/action/package_search?start=0&rows=10"); method .getParams() .setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); method.setRequestHeader("User-Agent", "Hammer Project - SantaMaria crawler"); method .getParams() .setParameter(HttpMethodParams.USER_AGENT, "Hammer Project - SantaMaria crawler"); try { int statusCode = client.executeMethod(method); if (statusCode != HttpStatus.SC_OK) { throw new Exception("Method failed: " + method.getStatusLine()); } byte[] responseBody = method.getResponseBody(); LOG.info(new String(responseBody)); Document document = Document.parse(new String(responseBody)); if (document.containsKey("result")) { ArrayList<Document> docs = (ArrayList<Document>) ((Document) document.get("result")).get("results"); for (Document doc : docs) { LOG.info(doc.getString("id")); } } } catch (Exception e) { LOG.error(e); } finally { method.releaseConnection(); } }
/** * 缓存每条汇总结果 * * @param cacheMap * @param tuple */ private void doCacheEventDetailInfo( Map<String, Document> cacheMap, Tuple2<String, Integer> tuple) { logger.debug("come into doCacheEventDetailInfo ==> " + tuple._1() + " <--> " + tuple._2()); // 解析表名:字段名:timeValue:appId:appVersion:appChannel:appPlatform:eventId:paramKey:paramValue(tenantId) String keys[] = tuple._1().split(":"); String tableName = keys[0]; String fieldName = keys[1]; String timeValue = keys[2]; String appId = keys[3]; String appVersion = keys[4]; String appChannel = keys[5]; String appPlatform = keys[6]; String eventId = keys[7]; String paramKey = keys[8]; String paramValue = keys[9]; String tenantId = (keys.length == 11 && keys[10] != null && !keys[10].trim().isEmpty()) ? (keys[10]) : ""; String tenantIdKey = (keys.length == 11 && keys[10] != null && !keys[10].trim().isEmpty()) ? (":" + keys[10]) : ""; // 存在tenantId就加到keyStr中去 Document eventDetailInfoDoc = null; String keyStr = tableName + ":" + timeValue + ":" + appId + ":" + appVersion + ":" + appChannel + ":" + appPlatform + ":" + eventId + ":" + paramKey + ":" + paramValue + tenantIdKey; // 如果缓存命中,使用缓存的对象 if (cacheMap.containsKey(keyStr)) { eventDetailInfoDoc = cacheMap.get(keyStr); } else { eventDetailInfoDoc = eventDetailInfoDao.findOneBy( tableName, timeValue, appId, appVersion, appChannel, appPlatform, eventId, paramKey, paramValue, tenantId); } if (eventDetailInfoDoc == null) { EventDetailInfo eventDetailInfoTemp = new EventDetailInfo(); eventDetailInfoTemp.setEventId(eventId); eventDetailInfoTemp.setTimeValue(timeValue); eventDetailInfoTemp.setAppId(appId); if (tenantId != null && !tenantId.trim().isEmpty()) { eventDetailInfoTemp.setTenantId(tenantId); } Gson gson = new Gson(); eventDetailInfoDoc = Document.parse(gson.toJson(eventDetailInfoTemp)); ObjectId objId = new ObjectId(); eventDetailInfoDoc.put("_id", objId); eventDetailInfoDoc.put("appVersion", appVersion); eventDetailInfoDoc.put("appChannel", appChannel); eventDetailInfoDoc.put("appPlatform", appPlatform); eventDetailInfoDoc.put("paramKey", paramKey); eventDetailInfoDoc.put("paramValue", paramValue); } if (eventDetailInfoDoc.get(fieldName) == null) { eventDetailInfoDoc.put(fieldName, (long) tuple._2()); } else { long fieldValue = 0; try { fieldValue = eventDetailInfoDoc.getLong(fieldName); } catch (ClassCastException e) { fieldValue = (long) eventDetailInfoDoc.getInteger(fieldName); } eventDetailInfoDoc.put(fieldName, (long) (fieldValue + tuple._2())); } cacheMap.put(keyStr, eventDetailInfoDoc); return; }
/** * 缓存每条汇总结果 * * @param cacheMap * @param tuple */ private void doCacheEventDetailInfo( Map<String, Document> cacheMap, Tuple2<String, Integer> tuple) { logger.debug("come into doCacheEventDetailInfo ==> " + tuple._1() + " <--> " + tuple._2()); // 解析表名:字段名1、字段名2:字段名1值:timeValue:appId:appVersion(tenantId) String keys[] = tuple._1().split(":"); String tableName = keys[0]; String fieldName1 = keys[1]; String fieldName2 = keys[2]; String fieldName1Value = keys[3]; String timeValue = keys[4]; String appId = keys[5]; String appVersion = keys[6]; String tenantId = (keys.length == 8 && keys[7] != null && !keys[7].trim().isEmpty()) ? (keys[7]) : ""; String tenantIdKey = (keys.length == 8 && keys[7] != null && !keys[7].trim().isEmpty()) ? (":" + keys[7]) : ""; // 存在tenantId就加到keyStr中去 Document versionDetailDoc = null; String newUserFromChannal = null; String updateUserFromChannal = null; String updateUserFromVersion = null; if ("newUserFromChannal".equals(fieldName1)) { newUserFromChannal = fieldName1Value; } if ("updateUserFromChannal".equals(fieldName1)) { updateUserFromChannal = fieldName1Value; } if ("updateUserFromVersion".equals(fieldName1)) { updateUserFromVersion = fieldName1Value; } String keyStr = tableName + ":" + timeValue + ":" + appId + ":" + appVersion + ":" + fieldName1 + ":" + fieldName1Value + tenantIdKey; // 如果缓存命中,使用缓存的对象 if (cacheMap.containsKey(keyStr)) { versionDetailDoc = cacheMap.get(keyStr); } else { versionDetailDoc = versionDetailDao.findOneBy( tableName, timeValue, appId, appVersion, newUserFromChannal, updateUserFromChannal, updateUserFromVersion, tenantId); } if (versionDetailDoc == null) { VersionDetail versionDetail = new VersionDetail(); versionDetail.setAppId(appId); versionDetail.setAppVersion(appVersion); versionDetail.setTimeValue(timeValue); if (tenantId != null && !tenantId.trim().isEmpty()) { versionDetail.setTenantId(tenantId); } Gson gson = new Gson(); versionDetailDoc = Document.parse(gson.toJson(versionDetail)); ObjectId objId = new ObjectId(); versionDetailDoc.put("_id", objId); versionDetailDoc.put(fieldName1, fieldName1Value); } if (versionDetailDoc.get(fieldName2) == null) { versionDetailDoc.put(fieldName2, tuple._2()); } else { long fieldValue = 0; try { fieldValue = versionDetailDoc.getLong(fieldName2); } catch (ClassCastException e) { fieldValue = (long) versionDetailDoc.getInteger(fieldName2); } versionDetailDoc.put(fieldName2, (long) (fieldValue + tuple._2())); } cacheMap.put(keyStr, versionDetailDoc); return; }
private void doUpload(final BsonDocument rawArguments, final BsonDocument assertion) { Throwable error = null; ObjectId objectId = null; BsonDocument arguments = parseHexDocument(rawArguments, "source"); try { final String filename = arguments.getString("filename").getValue(); final InputStream inputStream = new ByteArrayInputStream(arguments.getBinary("source").getData()); final GridFSUploadOptions options = new GridFSUploadOptions(); BsonDocument rawOptions = arguments.getDocument("options", new BsonDocument()); if (rawOptions.containsKey("chunkSizeBytes")) { options.chunkSizeBytes(rawOptions.getInt32("chunkSizeBytes").getValue()); } if (rawOptions.containsKey("metadata")) { options.metadata(Document.parse(rawOptions.getDocument("metadata").toJson())); } objectId = new MongoOperation<ObjectId>() { @Override public void execute() { gridFSBucket.uploadFromStream( filename, toAsyncInputStream(inputStream), options, getCallback()); } }.get(); } catch (Throwable e) { error = e; } if (assertion.containsKey("error")) { // We don't need to read anything more so don't see the extra chunk if (!assertion.getString("error").getValue().equals("ExtraChunk")) { assertNotNull("Should have thrown an exception", error); } } else { assertNull("Should not have thrown an exception", error); for (BsonValue rawDataItem : assertion.getArray("data", new BsonArray())) { BsonDocument dataItem = rawDataItem.asDocument(); String insert = dataItem.getString("insert", new BsonString("none")).getValue(); if (insert.equals("expected.files")) { List<BsonDocument> documents = processFiles( dataItem.getArray("documents", new BsonArray()), new ArrayList<BsonDocument>()); assertEquals(getFilesCount(new BsonDocument()), documents.size()); BsonDocument actual = new MongoOperation<BsonDocument>() { @Override public void execute() { filesCollection.find().first(getCallback()); } }.get(); for (BsonDocument expected : documents) { assertEquals(expected.get("length"), actual.get("length")); assertEquals(expected.get("chunkSize"), actual.get("chunkSize")); assertEquals(expected.get("md5"), actual.get("md5")); assertEquals(expected.get("filename"), actual.get("filename")); if (expected.containsKey("metadata")) { assertEquals(expected.get("metadata"), actual.get("metadata")); } } } else if (insert.equals("expected.chunks")) { List<BsonDocument> documents = processChunks( dataItem.getArray("documents", new BsonArray()), new ArrayList<BsonDocument>()); assertEquals(getChunksCount(new BsonDocument()), documents.size()); List<BsonDocument> actualDocuments = new MongoOperation<List<BsonDocument>>() { @Override public void execute() { chunksCollection.find().into(new ArrayList<BsonDocument>(), getCallback()); } }.get(); for (int i = 0; i < documents.size(); i++) { BsonDocument expected = documents.get(i); BsonDocument actual; actual = actualDocuments.get(i); assertEquals(new BsonObjectId(objectId), actual.getObjectId("files_id")); assertEquals(expected.get("n"), actual.get("n")); assertEquals(expected.get("data"), actual.get("data")); } } } } }