public void putBatch(Optional<List<Request>> putRequests, boolean optimize) { if (!valid) { Logger.error("CANNOT PUT! NO VALID CONNECTION"); return; } List<Put> puts = new ArrayList<>(); if (putRequests.isPresent() && !putRequests.get().isEmpty()) { String tableName = putRequests.get().get(0).table; putRequests .get() .forEach( pr -> pr.getPut() .ifPresent( p -> { if (optimize) { p.setDurability(Durability.SKIP_WAL); } puts.add(p); })); try { final Table table = connection.getTable(TableName.valueOf(tableName)); if (optimize && table instanceof HTable) { ((HTable) table).setAutoFlush(false, true); } table.put(puts); table.close(); } catch (IOException e) { e.printStackTrace(); } } }
@Override public void readFields(DataInput input) throws IOException { byte[] tableNameBytes = Bytes.readByteArray(input); PName tableName = new PNameImpl(tableNameBytes); PTableType tableType = PTableType.values()[WritableUtils.readVInt(input)]; long sequenceNumber = WritableUtils.readVLong(input); long timeStamp = input.readLong(); byte[] pkNameBytes = Bytes.readByteArray(input); String pkName = pkNameBytes.length == 0 ? null : Bytes.toString(pkNameBytes); int nColumns = WritableUtils.readVInt(input); List<PColumn> columns = Lists.newArrayListWithExpectedSize(nColumns); for (int i = 0; i < nColumns; i++) { PColumn column = new PColumnImpl(); column.readFields(input); columns.add(column); } Map<String, byte[][]> guidePosts = new HashMap<String, byte[][]>(); int size = WritableUtils.readVInt(input); for (int i = 0; i < size; i++) { String key = WritableUtils.readString(input); int valueSize = WritableUtils.readVInt(input); byte[][] value = new byte[valueSize][]; for (int j = 0; j < valueSize; j++) { value[j] = Bytes.readByteArray(input); } guidePosts.put(key, value); } PTableStats stats = new PTableStatsImpl(guidePosts); init(tableName, tableType, timeStamp, sequenceNumber, pkName, columns, stats); }
@Override public List<Mutation> toRowMutations() { // TODO: change to List<Mutation> once it implements Row List<Mutation> mutations = new ArrayList<Mutation>(3); if (deleteRow != null) { // Include only deleteRow mutation if present because it takes precedence over all others mutations.add(deleteRow); } else { // Because we cannot enforce a not null constraint on a KV column (since we don't know if // the row exists when // we upsert it), se instead add a KV that is always emtpy. This allows us to imitate SQL // semantics given the // way HBase works. setValues.add( SchemaUtil.getEmptyColumnFamily(getColumnFamilies()), QueryConstants.EMPTY_COLUMN_BYTES, ts, ByteUtil.EMPTY_BYTE_ARRAY); mutations.add(setValues); if (!unsetValues.isEmpty()) { mutations.add(unsetValues); } } return mutations; }
@Override public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh) throws StorageException { final long delTS = System.currentTimeMillis(); final long putTS = delTS + 1; Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS); List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation // convert sorted commands into representation required for 'batch' operation for (Pair<Put, Delete> commands : commandsPerKey.values()) { if (commands.getFirst() != null) batch.add(commands.getFirst()); if (commands.getSecond() != null) batch.add(commands.getSecond()); } try { HTableInterface table = null; try { table = connectionPool.getTable(tableName); table.batch(batch); table.flushCommits(); } finally { IOUtils.closeQuietly(table); } } catch (IOException e) { throw new TemporaryStorageException(e); } catch (InterruptedException e) { throw new TemporaryStorageException(e); } waitUntil(putTS); }
@Override public List<QueryResult> getAllVariantsByRegionList( List<Region> regions, String sourceId, QueryOptions options) { List<QueryResult> allResults = new LinkedList<>(); for (Region r : regions) { QueryResult queryResult = getAllVariantsByRegionAndStudy(r, sourceId, options); allResults.add(queryResult); } return allResults; }
public Optional<List<Response>> getBatch(Optional<List<Request>> requests) { if (!valid) { Logger.error("CANNOT GET! NO VALID CONNECTION"); return Optional.empty(); } List<Response> responses = new ArrayList<>(); requests.ifPresent( reqs -> reqs.forEach(r -> get(Optional.of(r)).ifPresent(response -> responses.add(response)))); return Optional.of(responses); }
private void removeIfPresent(Mutation m, byte[] family, byte[] qualifier) { Map<byte[], List<KeyValue>> familyMap = m.getFamilyMap(); List<KeyValue> kvs = familyMap.get(family); if (kvs != null) { Iterator<KeyValue> iterator = kvs.iterator(); while (iterator.hasNext()) { KeyValue kv = iterator.next(); if (Bytes.compareTo(kv.getQualifier(), qualifier) == 0) { iterator.remove(); } } } }
@Override public void write(DataOutput output) throws IOException { Bytes.writeByteArray(output, name.getBytes()); WritableUtils.writeVInt(output, type.ordinal()); WritableUtils.writeVLong(output, sequenceNumber); output.writeLong(timeStamp); Bytes.writeByteArray( output, pkName == null ? ByteUtil.EMPTY_BYTE_ARRAY : Bytes.toBytes(pkName)); WritableUtils.writeVInt(output, allColumns.size()); for (int i = 0; i < allColumns.size(); i++) { PColumn column = allColumns.get(i); column.write(output); } stats.write(output); }
public Optional<Put> getPut() { if (valid()) { Put p = new Put(key); columns.forEach(c -> p.addColumn(c.family.getBytes(), c.qualifier.getBytes(), c.value)); return Optional.of(p); } return Optional.empty(); }
@Override public PColumn getColumn(String name) throws ColumnNotFoundException, AmbiguousColumnException { List<PColumn> columns = columnsByName.get(name); int size = columns.size(); if (size == 0) { throw new ColumnNotFoundException(name); } if (size > 1) { for (PColumn column : columns) { if (QueryConstants.DEFAULT_COLUMN_FAMILY.equals(column.getFamilyName().getString())) { // Allow ambiguity with default column, since a user would not know how to prefix it. return column; } } throw new AmbiguousColumnException(name); } return columns.get(0); }
@Override public PColumn getPKColumn(String name) throws ColumnNotFoundException { List<PColumn> columns = columnsByName.get(name); int size = columns.size(); if (size == 0) { throw new ColumnNotFoundException(name); } if (size > 1) { do { PColumn column = columns.get(--size); if (column.getFamilyName() == null) { return column; } } while (size > 0); throw new ColumnNotFoundException(name); } return columns.get(0); }
@Override public QueryResult<Variant> getAllVariantsByRegionAndStudy( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new LinkedList<>(); boolean includeSamples; boolean includeStats; boolean includeEffects; if (!options.containsKey("samples") && !options.containsKey("stats") && !options.containsKey("effects")) { includeSamples = true; includeStats = true; includeEffects = true; } else { includeSamples = options.containsKey("samples") && options.getBoolean("samples"); includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } try { String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); HTable table = new HTable(admin.getConfiguration(), tableName); dbstart = System.currentTimeMillis(); Scan regionScan = new Scan(startRow.getBytes(), stopRow.getBytes()); ResultScanner scanres = table.getScanner(regionScan); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); // Iterate over results and, optionally, their samples and statistics for (Result result : scanres) { String[] rowkeyParts = new String(result.getRow(), CHARSET_UTF_8).split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); // Get basic result fields from Protocol Buffers message NavigableMap<byte[], byte[]> infoMap = result.getFamilyMap("i".getBytes()); byte[] byteInfo = infoMap.get((sourceId + "_data").getBytes()); VariantFieldsProtos.VariantInfo protoInfo = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String reference = protoInfo.getReference(); String alternate = StringUtils.join(protoInfo.getAlternateList(), ","); String format = StringUtils.join(protoInfo.getFormatList(), ":"); Variant variant = new Variant(chromosome, position, position, reference, alternate); // Set samples if requested if (includeSamples) { NavigableMap<byte[], byte[]> sampleMap = result.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // Set samples for (byte[] s : sampleMap.keySet()) { String sampleName = (new String(s, CHARSET_UTF_8)).replaceAll(sourceId + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } // TODO // variant.addSampleData(sampleName, singleSampleMap); } } // Set stats if requested if (includeStats) { byte[] byteStats = infoMap.get((sourceId + "_stats").getBytes()); VariantFieldsProtos.VariantStats protoStats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); VariantStats variantStats = new VariantStats( chromosome, position, reference, alternate, protoStats.getMaf(), protoStats.getMgf(), protoStats.getMafAllele(), protoStats.getMgfGenotype(), protoStats.getMissingAlleles(), protoStats.getMissingGenotypes(), protoStats.getMendelianErrors(), protoStats.getIsIndel(), protoStats.getCasesPercentDominant(), protoStats.getControlsPercentDominant(), protoStats.getCasesPercentRecessive(), protoStats.getControlsPercentRecessive()); variant.setStats(variantStats); } if (includeEffects) { QueryResult<VariantEffect> queryEffects = getEffectsByVariant(variant, options); variant.setEffect(queryEffects.getResult()); } results.add(variant); } } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
private void init( PName name, PTableType type, long timeStamp, long sequenceNumber, String pkName, List<PColumn> columns, PTableStats stats) { this.name = name; this.type = type; this.timeStamp = timeStamp; this.sequenceNumber = sequenceNumber; this.columnsByName = ArrayListMultimap.create(columns.size(), 1); this.pkName = pkName; List<PColumn> pkColumns = Lists.newArrayListWithExpectedSize(columns.size() - 1); PColumn[] allColumns = new PColumn[columns.size()]; RowKeySchemaBuilder builder = new RowKeySchemaBuilder(); for (int i = 0; i < allColumns.length; i++) { PColumn column = columns.get(i); allColumns[column.getPosition()] = column; PName familyName = column.getFamilyName(); if (familyName == null) { pkColumns.add(column); builder.addField(column); } columnsByName.put(column.getName().getString(), column); } this.pkColumns = ImmutableList.copyOf(pkColumns); this.rowKeySchema = builder.setMinNullable(pkColumns.size()).build(); this.allColumns = ImmutableList.copyOf(allColumns); // Two pass so that column order in column families matches overall column order // and to ensure that column family order is constant int maxExpectedSize = allColumns.length - pkColumns.size(); // Maintain iteration order so that column families are ordered as they are listed Map<PName, List<PColumn>> familyMap = Maps.newLinkedHashMap(); for (PColumn column : allColumns) { PName familyName = column.getFamilyName(); if (familyName != null) { List<PColumn> columnsInFamily = familyMap.get(familyName); if (columnsInFamily == null) { columnsInFamily = Lists.newArrayListWithExpectedSize(maxExpectedSize); familyMap.put(familyName, columnsInFamily); } columnsInFamily.add(column); } } Iterator<Map.Entry<PName, List<PColumn>>> iterator = familyMap.entrySet().iterator(); PColumnFamily[] families = new PColumnFamily[familyMap.size()]; ImmutableMap.Builder<String, PColumnFamily> familyByString = ImmutableMap.builder(); ImmutableSortedMap.Builder<byte[], PColumnFamily> familyByBytes = ImmutableSortedMap.orderedBy(Bytes.BYTES_COMPARATOR); for (int i = 0; i < families.length; i++) { Map.Entry<PName, List<PColumn>> entry = iterator.next(); PColumnFamily family = new PColumnFamilyImpl(entry.getKey(), entry.getValue()); families[i] = family; familyByString.put(family.getName().getString(), family); familyByBytes.put(family.getName().getBytes(), family); } this.families = ImmutableList.copyOf(families); this.familyByBytes = familyByBytes.build(); this.familyByString = familyByString.build(); this.stats = stats; }
public List<Variant> getRecordSimpleStats( String study, int missing_gt, float maf, String maf_allele) { BasicDBObject compare = new BasicDBObject("studies.stats.allele_maf", maf_allele) .append("studies.stats.MAF", maf) .append("studies.stats.missing", missing_gt); List<Get> hbaseQuery = new ArrayList<>(); DBCollection collection = db.getCollection("variants"); Iterator<DBObject> result = collection.find(compare); String chromosome = new String(); while (result.hasNext()) { DBObject variant = result.next(); String position = variant.get("_id").toString(); // hbase query construction Get get = new Get(position.getBytes()); hbaseQuery.add(get); } // Complete results, from HBase tableName = study; effectTableName = tableName + "effect"; Map<String, Variant> resultsMap = new HashMap<>(); try { HTable table = new HTable(admin.getConfiguration(), tableName); HTable effectTable = new HTable(admin.getConfiguration(), effectTableName); Result[] hbaseResultEffect = effectTable.get(hbaseQuery); Result[] hbaseResultStats = table.get(hbaseQuery); // List<Variant> results = new LinkedList<>(); for (Result r : hbaseResultStats) { String position = new String(r.getRow(), CHARSET_UTF_8); String[] aux = position.split("_"); String inner_position = aux[1]; String chr = aux[0]; // position parsing if (chr.startsWith("0")) { chr = chr.substring(1); } while (inner_position.startsWith("0")) { inner_position = inner_position.substring(1); } List<VariantFieldsProtos.VariantSample> samples = new LinkedList<>(); NavigableMap<byte[], byte[]> infoMap = r.getFamilyMap("i".getBytes()); byte[] byteStats = infoMap.get((study + "_stats").getBytes()); VariantFieldsProtos.VariantStats stats = VariantFieldsProtos.VariantStats.parseFrom(byteStats); byte[] byteInfo = infoMap.get((study + "_data").getBytes()); VariantFieldsProtos.VariantInfo info = VariantFieldsProtos.VariantInfo.parseFrom(byteInfo); String alternate = StringUtils.join(info.getAlternateList(), ", "); String reference = info.getReference(); Variant partialResult = new Variant( chr, Integer.parseInt(inner_position), Integer.parseInt(inner_position), reference, alternate); String format = StringUtils.join(info.getFormatList(), ":"); NavigableMap<byte[], byte[]> sampleMap = r.getFamilyMap("d".getBytes()); Map<String, Map<String, String>> resultSampleMap = new HashMap<>(); // StringBuilder sampleRaw = new StringBuilder(); for (byte[] s : sampleMap.keySet()) { String qual = (new String(s, CHARSET_UTF_8)).replaceAll(study + "_", ""); VariantFieldsProtos.VariantSample sample = VariantFieldsProtos.VariantSample.parseFrom(sampleMap.get(s)); String sample1 = sample.getSample(); String[] values = sample1.split(":"); String[] fields = format.split(":"); Map<String, String> singleSampleMap = new HashMap<>(); for (int i = 0; i < fields.length; i++) { singleSampleMap.put(fields[i], values[i]); } resultSampleMap.put(qual, singleSampleMap); } VariantStats variantStats = new VariantStats( chromosome, Integer.parseInt(inner_position), reference, alternate, stats.getMaf(), stats.getMgf(), stats.getMafAllele(), stats.getMgfGenotype(), stats.getMissingAlleles(), stats.getMissingGenotypes(), stats.getMendelianErrors(), stats.getIsIndel(), stats.getCasesPercentDominant(), stats.getControlsPercentDominant(), stats.getCasesPercentRecessive(), stats.getControlsPercentRecessive()); partialResult.setStats(variantStats); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } for (Result r : hbaseResultEffect) { if (!r.isEmpty()) { NavigableMap<byte[], byte[]> effectMap = r.getFamilyMap("e".getBytes()); Variant partialResult = resultsMap.get(new String(r.getRow(), CHARSET_UTF_8)); System.out.println("Recuperado " + partialResult.toString()); String s = partialResult.getReference() + "_" + partialResult.getAlternate(); VariantEffectProtos.EffectInfo effectInfo = VariantEffectProtos.EffectInfo.parseFrom(effectMap.get(s.getBytes())); VariantEffect variantEffect = new VariantEffect( partialResult.getChromosome(), (int) partialResult.getStart(), partialResult.getReference(), partialResult.getAlternate(), effectInfo.getFeatureId(), effectInfo.getFeatureName(), effectInfo.getFeatureType(), effectInfo.getFeatureBiotype(), effectInfo.getFeatureChromosome(), effectInfo.getFeatureStart(), effectInfo.getFeatureEnd(), effectInfo.getFeatureStrand(), effectInfo.getSnpId(), effectInfo.getAncestral(), effectInfo.getAlternative(), effectInfo.getGeneId(), effectInfo.getTranscriptId(), effectInfo.getGeneName(), effectInfo.getConsequenceType(), effectInfo.getConsequenceTypeObo(), effectInfo.getConsequenceTypeDesc(), effectInfo.getConsequenceTypeType(), effectInfo.getAaPosition(), effectInfo.getAminoacidChange(), effectInfo.getCodonChange()); resultsMap.put(new String(r.getRow(), CHARSET_UTF_8), partialResult); } } } catch (InvalidProtocolBufferException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } catch (IOException e) { System.err.println(e.getClass().getName() + ": " + e.getMessage()); } List<Variant> results = new ArrayList<>(resultsMap.values()); return results; }
public QueryResult getSimpleVariantsByRegion( Region region, String sourceId, QueryOptions options) { Long start, end, dbstart, dbend; start = System.currentTimeMillis(); boolean includeStats; boolean includeEffects; if (!options.containsKey("stats") && !options.containsKey("effects")) { includeStats = true; includeEffects = true; } else { includeStats = options.containsKey("stats") && options.getBoolean("stats"); includeEffects = options.containsKey("effects") && options.getBoolean("effects"); } QueryResult<Variant> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<Variant> results = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("sources.sourceId", sourceId); DBCollection collection = db.getCollection("variants"); dbstart = System.currentTimeMillis(); DBCursor variantInStudies = collection.find(query); dbend = System.currentTimeMillis(); queryResult.setDbTime(dbend - dbstart); for (DBObject result : variantInStudies) { String[] rowkeyParts = result.get("position").toString().split("_"); String chromosome = rowkeyParts[0].replaceFirst("^0+(?!$)", ""); int position = Integer.parseInt(rowkeyParts[1]); BasicDBList studies = (BasicDBList) result.get("sources"); BasicDBObject st = (BasicDBObject) studies.get(0); String ref = (String) st.get("ref"); String alt = StringUtils.join((ArrayList<String>) st.get("alt"), ","); // TODO Needs rework Variant variant = new Variant(chromosome, position, position, ref, alt); // Set stats informations if (includeStats) { VariantStats stats = new VariantStats(); BasicDBObject mongoStats = (BasicDBObject) st.get("stats"); stats.setMaf((float) (double) mongoStats.get("maf")); stats.setMafAllele((String) mongoStats.get("alleleMaf")); stats.setMissingGenotypes((int) mongoStats.get("missing")); List<Genotype> genotypeCount = new ArrayList<>(); for (BasicDBObject s : (List<BasicDBObject>) mongoStats.get("genotypeCount")) { for (Map.Entry<String, Object> entry : s.entrySet()) { Genotype genotype = new Genotype(entry.getKey()); genotype.setCount((Integer) entry.getValue()); genotypeCount.add(genotype); } } stats.setGenotypes(genotypeCount); variant.setStats(stats); } // TODO Set consequence type names if (includeEffects) { BasicDBList mongoEffects = (BasicDBList) st.get("effects"); if (mongoEffects != null) { for (Object e : mongoEffects) { String effectObo = e.toString(); VariantEffect effect = new VariantEffect(); effect.setConsequenceTypeObo(effectObo); variant.addEffect(effect); } } } results.add(variant); } queryResult.setResult(results); queryResult.setNumResults(results.size()); end = System.currentTimeMillis(); queryResult.setTime(end - start); return queryResult; }
@Override public QueryResult getVariantsHistogramByRegion( Region region, String sourceId, boolean histogramLogarithm, int histogramMax) { QueryResult<ObjectMap> queryResult = new QueryResult<>( String.format("%s:%d-%d", region.getChromosome(), region.getStart(), region.getEnd())); List<ObjectMap> data = new ArrayList<>(); String startRow = buildRowkey(region.getChromosome(), Long.toString(region.getStart())); String stopRow = buildRowkey(region.getChromosome(), Long.toString(region.getEnd())); long startTime = System.currentTimeMillis(); long startDbTime = System.currentTimeMillis(); BasicDBObject query = new BasicDBObject("position", new BasicDBObject("$gte", startRow).append("$lte", stopRow)) .append("studies.studyId", sourceId); DBCollection collection = db.getCollection("variants"); DBCursor queryResults = collection.find(query); queryResult.setDbTime(System.currentTimeMillis() - startDbTime); int resultSize = queryResults.size(); if (resultSize > histogramMax) { // Need to group results to fit maximum size of the histogram int sumChunkSize = resultSize / histogramMax; int i = 0, j = 0; int featuresCount = 0; ObjectMap item = null; for (DBObject result : queryResults) { // featuresCount += result.getInt("features_count"); // if (i == 0) { // item = new ObjectMap("chromosome", result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // } else if (i == sumChunkSize - 1 || j == resultSize - 1) { // if (histogramLogarithm) { // item.put("featuresCount", (featuresCount > 0) ? // Math.log(featuresCount) : 0); // } else { // item.put("featuresCount", featuresCount); // } // item.put("end", result.getInt("end")); // data.add(item); // i = -1; // featuresCount = 0; // } // j++; // i++; } } else { for (DBObject result : queryResults) { // ObjectMap item = new ObjectMap("chromosome", // result.getString("chromosome")); // item.put("chunkId", result.getInt("chunk_id")); // item.put("start", result.getInt("start")); // if (histogramLogarithm) { // int features_count = result.getInt("features_count"); // result.put("featuresCount", (features_count > 0) ? // Math.log(features_count) : 0); // } else { // item.put("featuresCount", result.getInt("features_count")); // } // item.put("end", result.getInt("end")); // data.add(item); } } queryResult.setResult(data); queryResult.setNumResults(data.size()); queryResult.setTime(System.currentTimeMillis() - startTime); return queryResult; }
@Override public int newKey(ImmutableBytesWritable key, byte[][] values) { int i = 0; TrustedByteArrayOutputStream os = new TrustedByteArrayOutputStream(SchemaUtil.estimateKeyLength(this)); try { List<PColumn> columns = getPKColumns(); int nColumns = columns.size(); PColumn lastPKColumn = columns.get(nColumns - 1); while (i < values.length && i < nColumns) { PColumn column = columns.get(i); PDataType type = column.getDataType(); // This will throw if the value is null and the type doesn't allow null byte[] byteValue = values[i++]; if (byteValue == null) { byteValue = ByteUtil.EMPTY_BYTE_ARRAY; } // An empty byte array return value means null. Do this, // since a type may have muliple representations of null. // For example, VARCHAR treats both null and an empty string // as null. This way we don't need to leak that part of the // implementation outside of PDataType by checking the value // here. if (byteValue.length == 0 && !column.isNullable()) { throw new ConstraintViolationException( name.getString() + "." + column.getName().getString() + " may not be null"); } Integer byteSize = column.getByteSize(); if (type.isFixedWidth()) { // TODO: handle multi-byte characters if (byteValue.length != byteSize) { throw new ConstraintViolationException( name.getString() + "." + column.getName().getString() + " must be " + byteSize + " bytes (" + SchemaUtil.toString(type, byteValue) + ")"); } } else if (byteSize != null && byteValue.length > byteSize) { throw new ConstraintViolationException( name.getString() + "." + column.getName().getString() + " may not exceed " + byteSize + " bytes (" + SchemaUtil.toString(type, byteValue) + ")"); } os.write(byteValue, 0, byteValue.length); // Separate variable length column values in key with zero byte if (!type.isFixedWidth() && column != lastPKColumn) { os.write(SEPARATOR_BYTE); } } // If some non null pk values aren't set, then throw if (i < nColumns) { PColumn column = columns.get(i); PDataType type = column.getDataType(); if (type.isFixedWidth() || !column.isNullable()) { throw new ConstraintViolationException( name.getString() + "." + column.getName().getString() + " may not be null"); } // Separate variable length column values in key with zero byte if (column != lastPKColumn) { os.write(SEPARATOR_BYTE); } } key.set(os.getBuffer(), 0, os.size()); return i; } finally { try { os.close(); } catch (IOException e) { throw new RuntimeException(e); // Impossible } } }
/** * init station code map * * @param file station code file path */ private static void initStationCode(String file) { List<String> lstStation = new Vector<>(); strConfig = file + ".process"; File fsP = new File(strConfig); if (!fsP.exists()) { try { fsP.createNewFile(); } catch (IOException e) { e.printStackTrace(); } } else { BufferedReader readp = null; try { String line = null; readp = new BufferedReader(new FileReader(fsP)); while ((line = readp.readLine()) != null) { processed.add(line); } } catch (IOException e) { e.printStackTrace(); } finally { if (readp != null) { try { readp.close(); } catch (IOException e) { e.printStackTrace(); } } } } File fs = new File(file); BufferedReader reader = null; try { String stationName = null; String stationCode = null; String line = null; String[] splitLine = null; reader = new BufferedReader(new FileReader(fs)); while ((line = reader.readLine()) != null) { splitLine = line.split("\t"); if (splitLine.length != 2) { continue; } stationName = splitLine[0]; stationCode = splitLine[1]; mapStationCode.put(stationName, stationCode); lstStation.add(stationName); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader == null) { try { reader.close(); } catch (IOException e1) { e1.printStackTrace(); } } } int len = lstStation.size(); String start = null; for (int i = 0; i < len; i++) { start = lstStation.get(i); for (String end : lstStation) { if (start.equals(end) || processed.contains(start + ":" + end)) { continue; } lstAllProcessStation.add(new DefaultKeyValue(start, end)); } } }
public void createTable(String tableName, String columnFamily) { List<String> families = new ArrayList<>(); families.add(columnFamily); createTable(tableName, families); }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
public boolean valid() { return !table.isEmpty() && key != null && key.length > 0 && columns.stream().filter(c -> !c.valid()).count() == 0; }
public static List<KeyValue> getAllTrainInfo(Configuration config, String date) { List<KeyValue> result = new ArrayList<>(); String strJson = null; BufferedWriter writer = null; Table table = null; try (Connection connect = ConnectionFactory.createConnection(config); Admin admin = connect.getAdmin()) { TableName tablename = TableName.valueOf(TABLE_NAME); if (!admin.tableExists(tablename)) { System.out.println("Table does not exist."); return null; } table = connect.getTable(tablename); Put put = null; String start = null; String end = null; writer = new BufferedWriter(new FileWriter(new File(strConfig), true)); for (KeyValue item : lstAllProcessStation) { start = (String) item.getKey(); end = (String) item.getValue(); try { try { Thread.sleep(200); } catch (InterruptedException e1) { e1.printStackTrace(); } System.out.println("process : " + start + ":" + end); strJson = getFromAPIX(mapStationCode.get(start), mapStationCode.get(end), date); writer.write(start + ":" + end); writer.newLine(); } catch (Exception e) { System.out.println(start + ":" + end + "error"); e.printStackTrace(); break; } JSONObject jo = new JSONObject(strJson); if (jo.has("httpstatus") && (jo.getInt("httpstatus") == 200)) { JSONObject joData = jo.getJSONObject("data"); if (joData.has("flag") && joData.getBoolean("flag")) { result.add(new DefaultKeyValue(start, end)); // 插入到hbase String rowkey = start + ":" + end; put = new Put(rowkey.getBytes()); put.addColumn( CF_JSON.getBytes(), "json".getBytes(), joData.toString().getBytes("utf-8")); table.put(put); System.out.println("start " + start + "\t end " + end + "\t has ticket"); } } } } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.flush(); writer.close(); } catch (IOException e) { e.printStackTrace(); } } if (table != null) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; }