private void computeViolationsPerRules(DecoratorContext context) { Map<RulePriority, Multiset<Rule>> rulesPerSeverity = Maps.newHashMap(); for (Violation violation : context.getViolations()) { Multiset<Rule> rulesBag = initRules(rulesPerSeverity, violation.getSeverity()); rulesBag.add(violation.getRule()); } for (RulePriority severity : RulePriority.values()) { Metric metric = SeverityUtils.severityToViolationMetric(severity); Collection<Measure> children = context.getChildrenMeasures(MeasuresFilters.rules(metric)); for (Measure child : children) { RuleMeasure childRuleMeasure = (RuleMeasure) child; Rule rule = childRuleMeasure.getRule(); if (rule != null && MeasureUtils.hasValue(childRuleMeasure)) { Multiset<Rule> rulesBag = initRules(rulesPerSeverity, severity); rulesBag.add(rule, childRuleMeasure.getIntValue()); } } Multiset<Rule> rulesBag = rulesPerSeverity.get(severity); if (rulesBag != null) { for (Multiset.Entry<Rule> entry : rulesBag.entrySet()) { RuleMeasure measure = RuleMeasure.createForRule(metric, entry.getElement(), (double) entry.getCount()); measure.setSeverity(severity); context.saveMeasure(measure); } } } }
private ScoredCandidates<Container> scoreContainers( Multiset<String> parents, int children, ResultDescription desc) { Builder<Container> candidates = DefaultScoredCandidates.fromSource(NAME); ResolvedContent containers = resolver.findByCanonicalUris(parents.elementSet()); for (Multiset.Entry<String> parent : parents.entrySet()) { Maybe<Identified> possibledContainer = containers.get(parent.getElement()); if (possibledContainer.hasValue()) { Identified identified = possibledContainer.requireValue(); if (identified instanceof Container) { Container container = (Container) identified; Score score = score(parent.getCount(), children); candidates.addEquivalent(container, score); desc.appendText( "%s: scored %s (%s)", container.getCanonicalUri(), score, container.getTitle()); } else { desc.appendText("%s: %s not container", parent, identified.getClass().getSimpleName()); } } else { desc.appendText("%s: missing", parent); } } return candidates.build(); }
public static void main(String args[]) { // create a multiset collection Multiset<String> multiset = HashMultiset.create(); multiset.add("a"); multiset.add("b"); multiset.add("c"); multiset.add("d"); multiset.add("a"); multiset.add("b"); multiset.add("c"); multiset.add("b"); multiset.add("b"); multiset.add("b"); // print the occurrence of an element System.out.println("Occurrence of 'b' : " + multiset.count("b")); // print the total size of the multiset System.out.println("Total Size : " + multiset.size()); // get the distinct elements of the multiset as set Set<String> set = multiset.elementSet(); // display the elements of the set System.out.println("Set ["); for (String s : set) { System.out.println(s); } System.out.println("]"); // display all the elements of the multiset using iterator Iterator<String> iterator = multiset.iterator(); System.out.println("MultiSet ["); while (iterator.hasNext()) { System.out.println(iterator.next()); } System.out.println("]"); // display the distinct elements of the multiset with their occurrence count System.out.println("MultiSet ["); for (Multiset.Entry<String> entry : multiset.entrySet()) { System.out.println("Element: " + entry.getElement() + ", Occurrence(s): " + entry.getCount()); } System.out.println("]"); // remove extra occurrences multiset.remove("b", 2); // print the occurrence of an element System.out.println("Occurence of 'b' : " + multiset.count("b")); }
@Override public void printResults() { Multiset<String> storage = (Multiset<String>) this.storage; System.out.println(category); for (Multiset.Entry<String> entry : storage.entrySet()) { System.out.printf("%s:%s\n", entry.getElement(), entry.getCount()); } super.printResults(); }
public static void printResult( Ordering<Multiset.Entry<Word>> comparator, int top, Multiset<Word> result) { System.out.println("**********sort result**************"); System.out.println("\tword\t\tcount"); for (Multiset.Entry<Word> wordEntry : comparator.greatestOf(result.entrySet(), top)) { System.out.printf("\t%s\t%6d\n", wordEntry.getElement().getWord(), wordEntry.getCount()); } System.out.println("**********done...**************"); }
private void snapshotTimers(DataOutputView out) throws IOException { out.writeInt(watermarkTimersQueue.size()); for (Timer<K, W> timer : watermarkTimersQueue) { keySerializer.serialize(timer.key, out); windowSerializer.serialize(timer.window, out); out.writeLong(timer.timestamp); } out.writeInt(processingTimeTimers.size()); for (Timer<K, W> timer : processingTimeTimers) { keySerializer.serialize(timer.key, out); windowSerializer.serialize(timer.window, out); out.writeLong(timer.timestamp); } out.writeInt(processingTimeTimerTimestamps.entrySet().size()); for (Multiset.Entry<Long> timerTimestampCounts : processingTimeTimerTimestamps.entrySet()) { out.writeLong(timerTimestampCounts.getElement()); out.writeInt(timerTimestampCounts.getCount()); } }
public static void main(String[] args) { String filename = "d://hhtord.csv"; try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); String s = null; List<Multiset<String>> valuesMultisetArray = Lists.newCopyOnWriteArrayList(); final int MAX_FIELD = 180; for (int i = 0; i < MAX_FIELD; i++) { Multiset<String> o = HashMultiset.create(); valuesMultisetArray.add(o); } s = reader.readLine(); final int MAX = 1000 * 1000; for (int i = 0; i < MAX && s != null; i++, s = reader.readLine()) { String[] sd = s.split(","); for (int j = 0; j < sd.length; j++) { valuesMultisetArray.get(j).add(sd[j]); } if (i % 1000 == 0) { System.out.println(i); } } System.out.println("============="); for (int i = 0; i < MAX_FIELD; i++) { Multiset<String> o = valuesMultisetArray.get(i); System.out.println(o.entrySet().size()); } } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } }
public void add(Multiset<BoolArray> multiset) { lock.lock(); for (HashMultiset.Entry<BoolArray> entry : multiset.entrySet()) { BoolArray label = new SubGraphStructure(entry.getElement().getArray()).getOrderedForm().getAdjacencyArray(); labelMap.add(label, entry.getCount()); } if (isVerbose()) { System.out.printf( "Added %,d new signatures. LabelMap size:%,d\n", multiset.elementSet().size(), size()); } if (size() > capacity) try { flush(); } catch (IOException exp) { exp.printStackTrace(); System.exit(-1); } lock.unlock(); }
/** * @param orConditions StatisticsQueryOrConditions<StatisticsQueryCondition> * @param statisticsStorage * @param scoringExps Set of experiments that have at least one non-zero score for * statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs * exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset<Integer> containing experiment counts corresponding to all attributes in each * StatisticsQueryCondition in orConditions */ private static Multiset<Integer> getScoresForOrConditions( final StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions, StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { Multiset<Integer> scores = HashMultiset.create(); for (StatisticsQueryCondition orCondition : orConditions.getConditions()) { orCondition.setBioEntityIdRestrictionSet(orConditions.getBioEntityIdRestrictionSet()); scores.addAll(scoreQuery(orCondition, statisticsStorage, scoringExps)); } // Now apply orConditions' min experiments restriction to scores Multiset<Integer> qualifyingScores = HashMultiset.create(); for (Multiset.Entry<Integer> entry : scores.entrySet()) { if (entry.getCount() >= orConditions.getMinExperiments()) { qualifyingScores.setCount(entry.getElement(), entry.getCount()); } } return qualifyingScores; }
private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) { if (!type.attributes.isEmpty()) { Multiset<String> attributeNames = HashMultiset.create(type.attributes.size()); for (ValueAttribute attribute : type.attributes) { attributeNames.add(attribute.name()); } List<String> duplicates = Lists.newArrayList(); for (Multiset.Entry<String> entry : attributeNames.entrySet()) { if (entry.getCount() > 1) { duplicates.add(entry.getElement()); } } if (!duplicates.isEmpty()) { protoclass .report() .error( "Duplicate attribute names %s. You should check if correct @Value.Style applied", duplicates); } } }
public void MagicProcessor() { if (modelChanges == null) { modelChanges = new ArrayList<ModelChange>(); return; } else if (modelChanges.size() == 0) return; // else go! // gets all model deltas and processes! // System.out.println("[MAGIC] Update match set based on model change started..."); long start = System.currentTimeMillis(); Set<PQuery> affecteds = new HashSet<PQuery>(); for (ModelChange change : modelChanges) { if (change instanceof EFeatureChange) affecteds.addAll( LookaheadMatcherTreat.RelativeSet.get(((EFeatureChange) change).getChangedFeature())); if (change instanceof EClassChange) affecteds.addAll( LookaheadMatcherTreat.RelativeSet.get(((EClassChange) change).getChange())); if (change instanceof EDataTypeChange) affecteds.addAll( LookaheadMatcherTreat.RelativeSet.get(((EDataTypeChange) change).getChange())); } ArrayList<ModelDelta> deltas = new ArrayList<ModelDelta>(); for (PQuery affectedQuery : affecteds) { ArrayList<AheadStructure> cachedStructures = LookaheadMatcherTreat.GodSetStructures.get(affectedQuery); // deliver deltas for pattern! for (ModelChange change : modelChanges) { for (AheadStructure aSn : cachedStructures) { for (AxisConstraint rC : aSn.SearchedConstraints) { if (rC instanceof RelationConstraint && change instanceof EFeatureChange) { EFeatureChange changenow = (EFeatureChange) change; if (((RelationConstraint) rC).getEdge().equals(changenow.getChangedFeature())) rC.putToMailbox(change); } else if (rC instanceof TypeConstraint && change instanceof EDataTypeChange) { EDataTypeChange changenow = (EDataTypeChange) change; if (((TypeConstraint) rC).getType().equals(changenow.getChange())) rC.putToMailbox(change); } if (rC instanceof TypeConstraint && change instanceof EClassChange) { EClassChange changenow = (EClassChange) change; if (((TypeConstraint) rC).getType().equals(changenow.getChange())) rC.putToMailbox(change); } } } } for (ModelChange change : modelChanges) { // process this change: first remove all deltas from constraints with this change for (AheadStructure aSn : cachedStructures) { for (AxisConstraint rC : aSn.SearchedConstraints) { if (rC.hasMailboxContent()) { if (rC.getMailboxContent().contains(change)) rC.removeFromMailbox(change); } } } // apply modelchange: HashMap<PVariable, Object> knownLocalAndParameters = new HashMap<PVariable, Object>(); for (AheadStructure aSn : cachedStructures) { // find all relationConstraints for (AxisConstraint rC : aSn.SearchedConstraints) { if (rC instanceof RelationConstraint && change instanceof EFeatureChange) { EFeatureChange changenow = (EFeatureChange) change; if (((RelationConstraint) rC).getEdge().equals(changenow.getChangedFeature())) { // affected relaconstraint's lookvariables should be bound!! knownLocalAndParameters.put( ((RelationConstraint) rC).getSource(), changenow.getHost()); knownLocalAndParameters.put( ((RelationConstraint) rC).getTarget(), changenow.getInstance()); } } else if (rC instanceof TypeConstraint && change instanceof EDataTypeChange) { EDataTypeChange changenow = (EDataTypeChange) change; if (((TypeConstraint) rC).getType().equals(changenow.getChange())) { // affected typeconstraint's lookvariable should be bound!! knownLocalAndParameters.put( ((TypeConstraint) rC).getTypedVariable(), changenow.getInstance()); } } if (rC instanceof TypeConstraint && change instanceof EClassChange) { EClassChange changenow = (EClassChange) change; if (((TypeConstraint) rC).getType().equals(changenow.getChange())) { // affected typeconstraint's lookvariable should be bound!! knownLocalAndParameters.put( ((TypeConstraint) rC).getTypedVariable(), changenow.getInstance()); } } } } // manual satisfy and clone cachedStructures (createNew* clones input): ArrayList<AheadStructure> newStructs = null; isModified = false; if (change instanceof EFeatureChange) { EFeatureChange changenow = (EFeatureChange) change; newStructs = createNewFromOldRelaC( changenow.getHost(), changenow.getInstance(), changenow.getChangedFeature(), cachedStructures); } else if (change instanceof EDataTypeChange) { EDataTypeChange changenow = (EDataTypeChange) change; newStructs = createNewFromOldTypeC( false, changenow.getChange(), changenow.getInstance(), cachedStructures); } if (change instanceof EClassChange) { EClassChange changenow = (EClassChange) change; newStructs = createNewFromOldTypeC( false, changenow.getChange(), changenow.getInstance(), cachedStructures); } if (isModified) { // the new matches that'll appear in matching based on manually satisfied structure Multiset<LookaheadMatching> newbies_toExamine = (new LookaheadMatcherInterface(this.navHelper)) .searchChangesAll( treat.getIncQueryEngine(), affectedQuery, newStructs, knownLocalAndParameters, new TreatConstraintEnumerator(this.navHelper)); // a new map to store a matching and whether it is added or removed HashMultimap<LookaheadMatching, Boolean> newMatchingsAndChange = HashMultimap.create(); // iterate over multiset and create delta for (com.google.common.collect.Multiset.Entry<LookaheadMatching> inners : newbies_toExamine.entrySet()) { for (int pi = 0; pi < inners.getCount(); pi++) newMatchingsAndChange.put(inners.getElement(), change.isAddition()); } // delta needed to propagate the changes if (newMatchingsAndChange.size() > 0) { ModelDelta d = new ModelDelta(affectedQuery, newMatchingsAndChange); deltas.add(d); } } } } // apply deltas for (ModelDelta delta : deltas) { // System.out.println("Propagate a delta: " + delta.getPattern().getFullyQualifiedName()); AdvancedDeltaProcessor.getInstance().ReceiveDelta(delta); } AdvancedDeltaProcessor.getInstance().ProcessReceivedDeltaSet(); // System.out.println("[MAGIC] Update match set based on model change ended! Time:" + // Long.toString(System.currentTimeMillis() - start)); // finally: modelChanges = new ArrayList<ModelChange>(); }
/** @since 2.7 */ public static <K> String format(Multiset<K> multiset, Converter<K> keyConverter) { return formatEntries(multiset.entrySet(), keyConverter); }
@Override public void merge(final Iterator<DataFile> inFileIterator, DataFile outFile) throws IOException { final Multiset<String> counts = HashMultiset.create(); final Set<String> emptyCounts = new HashSet<>(); while (inFileIterator.hasNext()) { // Get input file final DataFile inFile = inFileIterator.next(); EoulsanLogger.getLogger().info("Merge " + inFile.getName() + " to " + outFile.getName()); boolean first = true; try (BufferedReader reader = new BufferedReader(new InputStreamReader(inFile.open()))) { String line = null; while ((line = reader.readLine()) != null) { // Do no handle header if (first) { first = false; continue; } final int tabPos = line.indexOf('\t'); // Do not handle empty or invalid lines if (tabPos == -1) { continue; } try { final String id = line.substring(0, tabPos).trim(); final int count = Integer.parseInt(line.substring(tabPos).trim()); if (count == 0) { emptyCounts.add(id); } counts.add(id, count); } catch (NumberFormatException e) { // Do not handle parsing errors } } } } // Write the result file try (Writer writer = new OutputStreamWriter(outFile.create())) { writer.write(ExpressionSplitter.EXPRESSION_FILE_HEADER); // Write the non empty counts for (Multiset.Entry<String> e : counts.entrySet()) { final String id = e.getElement(); // Remove the id from empty counts emptyCounts.remove(id); // Write the entry writer.write(id + '\t' + e.getCount() + '\n'); } // Write the empty counts for (String id : emptyCounts) { writer.write(id + "\t0\n"); } } }
public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println( "Arguments: [model] [label index] [dictionnary] [document frequency] [Customer description]"); return; } String modelPath = args[0]; String labelIndexPath = args[1]; String dictionaryPath = args[2]; String documentFrequencyPath = args[3]; String carsPath = args[4]; Configuration configuration = new Configuration(); // model is a matrix (wordId, labelId) => probability score NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration); StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model); // labels is a map label => classId Map<Integer, String> labels = BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath)); Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath)); Map<Integer, Long> documentFrequency = readDocumentFrequency(configuration, new Path(documentFrequencyPath)); // analyzer used to extract word from tweet Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); int labelCount = labels.size(); int documentCount = documentFrequency.get(-1).intValue(); System.out.println("Number of labels: " + labelCount); System.out.println("Number of documents in training set: " + documentCount); BufferedReader reader = new BufferedReader(new FileReader(carsPath)); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] tokens = line.split("\t", 47); String cmplid = tokens[0]; String cdescr = tokens[19]; System.out.println("Complaint id: " + cmplid + "\t" + cdescr); Multiset<String> words = ConcurrentHashMultiset.create(); // extract words from complaint description TokenStream ts = analyzer.tokenStream("text", new StringReader(cdescr)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); int wordCount = 0; while (ts.incrementToken()) { if (termAtt.length() > 0) { String word = ts.getAttribute(CharTermAttribute.class).toString(); Integer wordId = dictionary.get(word); // if the word is not in the dictionary, skip it if (wordId != null) { words.add(word); wordCount++; } } } // create vector wordId => weight using tfidf Vector vector = new RandomAccessSparseVector(1000); TFIDF tfidf = new TFIDF(); for (Multiset.Entry<String> entry : words.entrySet()) { String word = entry.getElement(); int count = entry.getCount(); Integer wordId = dictionary.get(word); Long freq = documentFrequency.get(wordId); double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount); vector.setQuick(wordId, tfIdfValue); } // With the classifier, we get one score for each label // The label with the highest score is the one the tweet is more likely to // be associated to Vector resultVector = classifier.classifyFull(vector); double bestScore = -Double.MAX_VALUE; int bestCategoryId = -1; for (Element element : resultVector.all()) { int categoryId = element.index(); double score = element.get(); if (score > bestScore) { bestScore = score; bestCategoryId = categoryId; } System.out.print(" " + labels.get(categoryId) + ": " + score); } System.out.println(" => " + labels.get(bestCategoryId)); } analyzer.close(); reader.close(); }
/** Transform this raw statement into a CreateTableStatement. */ public ParsedStatement.Prepared prepare() throws RequestValidationException { // Column family name if (!columnFamily().matches("\\w+")) throw new InvalidRequestException( String.format( "\"%s\" is not a valid column family name (must be alphanumeric character only: [0-9A-Za-z]+)", columnFamily())); if (columnFamily().length() > Schema.NAME_LENGTH) throw new InvalidRequestException( String.format( "Column family names shouldn't be more than %s characters long (got \"%s\")", Schema.NAME_LENGTH, columnFamily())); for (Multiset.Entry<ColumnIdentifier> entry : definedNames.entrySet()) if (entry.getCount() > 1) throw new InvalidRequestException( String.format("Multiple definition of identifier %s", entry.getElement())); properties.validate(); CreateTableStatement stmt = new CreateTableStatement(cfName, properties, ifNotExists, staticColumns); Map<ByteBuffer, CollectionType> definedCollections = null; for (Map.Entry<ColumnIdentifier, CQL3Type> entry : definitions.entrySet()) { ColumnIdentifier id = entry.getKey(); CQL3Type pt = entry.getValue(); if (pt.isCollection()) { if (definedCollections == null) definedCollections = new HashMap<ByteBuffer, CollectionType>(); definedCollections.put(id.key, (CollectionType) pt.getType()); } stmt.columns.put(id, pt.getType()); // we'll remove what is not a column below } if (keyAliases.isEmpty()) throw new InvalidRequestException("No PRIMARY KEY specifed (exactly one required)"); else if (keyAliases.size() > 1) throw new InvalidRequestException("Multiple PRIMARY KEYs specifed (exactly one required)"); List<ColumnIdentifier> kAliases = keyAliases.get(0); List<AbstractType<?>> keyTypes = new ArrayList<AbstractType<?>>(kAliases.size()); for (ColumnIdentifier alias : kAliases) { stmt.keyAliases.add(alias.key); AbstractType<?> t = getTypeAndRemove(stmt.columns, alias); if (t instanceof CounterColumnType) throw new InvalidRequestException( String.format("counter type is not supported for PRIMARY KEY part %s", alias)); if (staticColumns.contains(alias)) throw new InvalidRequestException( String.format("Static column %s cannot be part of the PRIMARY KEY", alias)); keyTypes.add(t); } stmt.keyValidator = keyTypes.size() == 1 ? keyTypes.get(0) : CompositeType.getInstance(keyTypes); // Dense means that no part of the comparator stores a CQL column name. This means // COMPACT STORAGE with at least one columnAliases (otherwise it's a thrift "static" CF). stmt.isDense = useCompactStorage && !columnAliases.isEmpty(); // Handle column aliases if (columnAliases.isEmpty()) { if (useCompactStorage) { // There should remain some column definition since it is a non-composite "static" CF if (stmt.columns.isEmpty()) throw new InvalidRequestException( "No definition found that is not part of the PRIMARY KEY"); if (definedCollections != null) throw new InvalidRequestException( "Collection types are not supported with COMPACT STORAGE"); stmt.comparator = CFDefinition.definitionType; } else { List<AbstractType<?>> types = new ArrayList<AbstractType<?>>(definedCollections == null ? 1 : 2); types.add(CFDefinition.definitionType); if (definedCollections != null) types.add(ColumnToCollectionType.getInstance(definedCollections)); stmt.comparator = CompositeType.getInstance(types); } } else { // If we use compact storage and have only one alias, it is a // standard "dynamic" CF, otherwise it's a composite if (useCompactStorage && columnAliases.size() == 1) { if (definedCollections != null) throw new InvalidRequestException( "Collection types are not supported with COMPACT STORAGE"); ColumnIdentifier alias = columnAliases.get(0); stmt.columnAliases.add(alias.key); stmt.comparator = getTypeAndRemove(stmt.columns, alias); if (stmt.comparator instanceof CounterColumnType) throw new InvalidRequestException( String.format("counter type is not supported for PRIMARY KEY part %s", alias)); if (staticColumns.contains(alias)) throw new InvalidRequestException( String.format("Static column %s cannot be part of the PRIMARY KEY", alias)); } else { List<AbstractType<?>> types = new ArrayList<AbstractType<?>>(columnAliases.size() + 1); for (ColumnIdentifier t : columnAliases) { stmt.columnAliases.add(t.key); AbstractType<?> type = getTypeAndRemove(stmt.columns, t); if (type instanceof CounterColumnType) throw new InvalidRequestException( String.format("counter type is not supported for PRIMARY KEY part %s", t)); if (staticColumns.contains(t)) throw new InvalidRequestException( String.format("Static column %s cannot be part of the PRIMARY KEY", t)); types.add(type); } if (useCompactStorage) { if (definedCollections != null) throw new InvalidRequestException( "Collection types are not supported with COMPACT STORAGE"); } else { // For sparse, we must add the last UTF8 component // and the collection type if there is one types.add(CFDefinition.definitionType); if (definedCollections != null) types.add(ColumnToCollectionType.getInstance(definedCollections)); } if (types.isEmpty()) throw new IllegalStateException("Nonsensical empty parameter list for CompositeType"); stmt.comparator = CompositeType.getInstance(types); } } if (!staticColumns.isEmpty()) { // Only CQL3 tables can have static columns if (useCompactStorage) throw new InvalidRequestException( "Static columns are not supported in COMPACT STORAGE tables"); // Static columns only make sense if we have at least one clustering column. Otherwise // everything is static anyway if (columnAliases.isEmpty()) throw new InvalidRequestException( "Static columns are only useful (and thus allowed) if the table has at least one clustering column"); } if (useCompactStorage && !stmt.columnAliases.isEmpty()) { if (stmt.columns.isEmpty()) { // The only value we'll insert will be the empty one, so the default validator don't // matter stmt.defaultValidator = BytesType.instance; // We need to distinguish between // * I'm upgrading from thrift so the valueAlias is null // * I've defined my table with only a PK (and the column value will be empty) // So, we use an empty valueAlias (rather than null) for the second case stmt.valueAlias = ByteBufferUtil.EMPTY_BYTE_BUFFER; } else { if (stmt.columns.size() > 1) throw new InvalidRequestException( String.format( "COMPACT STORAGE with composite PRIMARY KEY allows no more than one column not part of the PRIMARY KEY (got: %s)", StringUtils.join(stmt.columns.keySet(), ", "))); Map.Entry<ColumnIdentifier, AbstractType> lastEntry = stmt.columns.entrySet().iterator().next(); stmt.defaultValidator = lastEntry.getValue(); stmt.valueAlias = lastEntry.getKey().key; stmt.columns.remove(lastEntry.getKey()); } } else { // For compact, we are in the "static" case, so we need at least one column defined. For // non-compact however, having // just the PK is fine since we have CQL3 row marker. if (useCompactStorage && stmt.columns.isEmpty()) throw new InvalidRequestException( "COMPACT STORAGE with non-composite PRIMARY KEY require one column not part of the PRIMARY KEY, none given"); // There is no way to insert/access a column that is not defined for non-compact storage, so // the actual validator don't matter much (except that we want to recognize counter CF as // limitation apply to them). stmt.defaultValidator = !stmt.columns.isEmpty() && (stmt.columns.values().iterator().next() instanceof CounterColumnType) ? CounterColumnType.instance : BytesType.instance; } // If we give a clustering order, we must explicitly do so for all aliases and in the order of // the PK if (!definedOrdering.isEmpty()) { if (definedOrdering.size() > columnAliases.size()) throw new InvalidRequestException( "Only clustering key columns can be defined in CLUSTERING ORDER directive"); int i = 0; for (ColumnIdentifier id : definedOrdering.keySet()) { ColumnIdentifier c = columnAliases.get(i); if (!id.equals(c)) { if (definedOrdering.containsKey(c)) throw new InvalidRequestException( String.format( "The order of columns in the CLUSTERING ORDER directive must be the one of the clustering key (%s must appear before %s)", c, id)); else throw new InvalidRequestException( String.format("Missing CLUSTERING ORDER for column %s", c)); } ++i; } } return new ParsedStatement.Prepared(stmt); }
/** * The core scoring method for statistics queries * * @param statisticsQuery query to be peformed on statisticsStorage * @param statisticsStorage core data for Statistics qeries * @param scoringExps an out parameter. * <p>- If null, experiment counts result of statisticsQuery should be returned. if - If * non-null, it serves as a flag that an optimised statisticsQuery should be performed to just * collect Experiments for which non-zero counts exist for Statistics query. A typical call * scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check * if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in * scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex * (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes()) * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected * across statisticsQuery.getConditions(), and union-ed across attributes within each * condition in statisticsQuery.getConditions(). */ public static Multiset<Integer> scoreQuery( StatisticsQueryCondition statisticsQuery, final StatisticsStorage statisticsStorage, Set<ExperimentInfo> scoringExps) { // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only boolean gatherScoringExpsOnly = scoringExps != null; Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions = statisticsQuery.getConditions(); Multiset<Integer> results = null; if (andStatisticsQueryConditions.isEmpty()) { // End of recursion Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet(); Set<EfAttribute> attributes = statisticsQuery.getAttributes(); if (attributes.isEmpty()) { // No attributes were provided - we have to use pre-computed scores across all attributes Multiset<Integer> scoresAcrossAllEfos = statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType()); results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet); } else { results = HashMultiset.create(); setQueryExperiments(statisticsQuery, statisticsStorage); // For each experiment in the query, traverse through all attributes and add all gene // indexes into one ConciseSet. This way a gene can score // only once for a single experiment - across all OR attributes in this query. Once all // attributes have been traversed for a single experiment, // add ConciseSet to Multiset results for (ExperimentInfo exp : statisticsQuery.getExperiments()) { FastSet statsForExperiment = new FastSet(); for (EfAttribute attr : attributes) { Map<ExperimentInfo, ConciseSet> expsToStats = getStatisticsForAttribute( statisticsQuery.getStatisticsType(), attr, statisticsStorage); if (expsToStats != null) { if (expsToStats.isEmpty()) { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " and attr: " + attr); } else { if (expsToStats.get(exp) != null) { if (!gatherScoringExpsOnly) { statsForExperiment.addAll( intersect(expsToStats.get(exp), bioEntityIdRestrictionSet)); } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) { // exp contains at least one non-zero score for at least one gene index in // bioEntityIdRestrictionSet -> add it to scoringExps scoringExps.add(exp); } } else { log.debug( "Failed to retrieve stats for stat: " + statisticsQuery.getStatisticsType() + " exp: " + exp.getAccession() + " and attr: " + attr); } } } } if (!gatherScoringExpsOnly) { results.addAll(statsForExperiment); } } } } else { // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) , // "AND"'ing over the whole thing for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions : andStatisticsQueryConditions) { // Pass gene restriction set down to orConditions orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet()); // process OR conditions Multiset<Integer> condGenes = getScoresForOrConditions(orConditions, statisticsStorage, scoringExps); if (results == null) results = condGenes; else { Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator(); while (resultGenes.hasNext()) { Multiset.Entry<Integer> entry = resultGenes.next(); if (!condGenes.contains( entry.getElement())) // AND operation between different top query conditions resultGenes.remove(); else // for all gene ids belonging to intersection of all conditions seen so far, we // accumulate experiment counts results.setCount( entry.getElement(), entry.getCount() + condGenes.count(entry.getElement())); } } } } if (results == null) { results = HashMultiset.create(); } return results; }