Ejemplo n.º 1
0
  private void computeViolationsPerRules(DecoratorContext context) {
    Map<RulePriority, Multiset<Rule>> rulesPerSeverity = Maps.newHashMap();
    for (Violation violation : context.getViolations()) {
      Multiset<Rule> rulesBag = initRules(rulesPerSeverity, violation.getSeverity());
      rulesBag.add(violation.getRule());
    }

    for (RulePriority severity : RulePriority.values()) {
      Metric metric = SeverityUtils.severityToViolationMetric(severity);

      Collection<Measure> children = context.getChildrenMeasures(MeasuresFilters.rules(metric));
      for (Measure child : children) {
        RuleMeasure childRuleMeasure = (RuleMeasure) child;
        Rule rule = childRuleMeasure.getRule();
        if (rule != null && MeasureUtils.hasValue(childRuleMeasure)) {
          Multiset<Rule> rulesBag = initRules(rulesPerSeverity, severity);
          rulesBag.add(rule, childRuleMeasure.getIntValue());
        }
      }

      Multiset<Rule> rulesBag = rulesPerSeverity.get(severity);
      if (rulesBag != null) {
        for (Multiset.Entry<Rule> entry : rulesBag.entrySet()) {
          RuleMeasure measure =
              RuleMeasure.createForRule(metric, entry.getElement(), (double) entry.getCount());
          measure.setSeverity(severity);
          context.saveMeasure(measure);
        }
      }
    }
  }
  private ScoredCandidates<Container> scoreContainers(
      Multiset<String> parents, int children, ResultDescription desc) {
    Builder<Container> candidates = DefaultScoredCandidates.fromSource(NAME);

    ResolvedContent containers = resolver.findByCanonicalUris(parents.elementSet());

    for (Multiset.Entry<String> parent : parents.entrySet()) {
      Maybe<Identified> possibledContainer = containers.get(parent.getElement());
      if (possibledContainer.hasValue()) {
        Identified identified = possibledContainer.requireValue();
        if (identified instanceof Container) {
          Container container = (Container) identified;
          Score score = score(parent.getCount(), children);
          candidates.addEquivalent(container, score);
          desc.appendText(
              "%s: scored %s (%s)", container.getCanonicalUri(), score, container.getTitle());
        } else {
          desc.appendText("%s: %s not container", parent, identified.getClass().getSimpleName());
        }
      } else {
        desc.appendText("%s: missing", parent);
      }
    }

    return candidates.build();
  }
Ejemplo n.º 3
0
  public static void main(String args[]) {

    // create a multiset collection
    Multiset<String> multiset = HashMultiset.create();

    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("d");
    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("b");
    multiset.add("b");
    multiset.add("b");

    // print the occurrence of an element
    System.out.println("Occurrence of 'b' : " + multiset.count("b"));

    // print the total size of the multiset
    System.out.println("Total Size : " + multiset.size());

    // get the distinct elements of the multiset as set
    Set<String> set = multiset.elementSet();

    // display the elements of the set
    System.out.println("Set [");

    for (String s : set) {
      System.out.println(s);
    }

    System.out.println("]");

    // display all the elements of the multiset using iterator
    Iterator<String> iterator = multiset.iterator();
    System.out.println("MultiSet [");

    while (iterator.hasNext()) {
      System.out.println(iterator.next());
    }

    System.out.println("]");

    // display the distinct elements of the multiset with their occurrence count
    System.out.println("MultiSet [");

    for (Multiset.Entry<String> entry : multiset.entrySet()) {
      System.out.println("Element: " + entry.getElement() + ", Occurrence(s): " + entry.getCount());
    }
    System.out.println("]");

    // remove extra occurrences
    multiset.remove("b", 2);

    // print the occurrence of an element
    System.out.println("Occurence of 'b' : " + multiset.count("b"));
  }
Ejemplo n.º 4
0
 @Override
 public void printResults() {
   Multiset<String> storage = (Multiset<String>) this.storage;
   System.out.println(category);
   for (Multiset.Entry<String> entry : storage.entrySet()) {
     System.out.printf("%s:%s\n", entry.getElement(), entry.getCount());
   }
   super.printResults();
 }
Ejemplo n.º 5
0
 public static void printResult(
     Ordering<Multiset.Entry<Word>> comparator, int top, Multiset<Word> result) {
   System.out.println("**********sort result**************");
   System.out.println("\tword\t\tcount");
   for (Multiset.Entry<Word> wordEntry : comparator.greatestOf(result.entrySet(), top)) {
     System.out.printf("\t%s\t%6d\n", wordEntry.getElement().getWord(), wordEntry.getCount());
   }
   System.out.println("**********done...**************");
 }
Ejemplo n.º 6
0
  private void snapshotTimers(DataOutputView out) throws IOException {
    out.writeInt(watermarkTimersQueue.size());
    for (Timer<K, W> timer : watermarkTimersQueue) {
      keySerializer.serialize(timer.key, out);
      windowSerializer.serialize(timer.window, out);
      out.writeLong(timer.timestamp);
    }

    out.writeInt(processingTimeTimers.size());
    for (Timer<K, W> timer : processingTimeTimers) {
      keySerializer.serialize(timer.key, out);
      windowSerializer.serialize(timer.window, out);
      out.writeLong(timer.timestamp);
    }

    out.writeInt(processingTimeTimerTimestamps.entrySet().size());
    for (Multiset.Entry<Long> timerTimestampCounts : processingTimeTimerTimestamps.entrySet()) {
      out.writeLong(timerTimestampCounts.getElement());
      out.writeInt(timerTimestampCounts.getCount());
    }
  }
Ejemplo n.º 7
0
  public static void main(String[] args) {
    String filename = "d://hhtord.csv";
    try {
      BufferedReader reader =
          new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
      String s = null;

      List<Multiset<String>> valuesMultisetArray = Lists.newCopyOnWriteArrayList();

      final int MAX_FIELD = 180;
      for (int i = 0; i < MAX_FIELD; i++) {
        Multiset<String> o = HashMultiset.create();
        valuesMultisetArray.add(o);
      }

      s = reader.readLine();
      final int MAX = 1000 * 1000;
      for (int i = 0; i < MAX && s != null; i++, s = reader.readLine()) {
        String[] sd = s.split(",");

        for (int j = 0; j < sd.length; j++) {
          valuesMultisetArray.get(j).add(sd[j]);
        }

        if (i % 1000 == 0) {
          System.out.println(i);
        }
      }
      System.out.println("=============");

      for (int i = 0; i < MAX_FIELD; i++) {
        Multiset<String> o = valuesMultisetArray.get(i);
        System.out.println(o.entrySet().size());
      }

    } catch (FileNotFoundException e) {

      throw new RuntimeException(e);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
Ejemplo n.º 8
0
  public void add(Multiset<BoolArray> multiset) {
    lock.lock();
    for (HashMultiset.Entry<BoolArray> entry : multiset.entrySet()) {
      BoolArray label =
          new SubGraphStructure(entry.getElement().getArray()).getOrderedForm().getAdjacencyArray();
      labelMap.add(label, entry.getCount());
    }

    if (isVerbose()) {
      System.out.printf(
          "Added %,d new signatures. LabelMap size:%,d\n", multiset.elementSet().size(), size());
    }
    if (size() > capacity)
      try {
        flush();
      } catch (IOException exp) {
        exp.printStackTrace();
        System.exit(-1);
      }
    lock.unlock();
  }
Ejemplo n.º 9
0
  /**
   * @param orConditions StatisticsQueryOrConditions<StatisticsQueryCondition>
   * @param statisticsStorage
   * @param scoringExps Set of experiments that have at least one non-zero score for
   *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
   *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset<Integer> containing experiment counts corresponding to all attributes in each
   *     StatisticsQueryCondition in orConditions
   */
  private static Multiset<Integer> getScoresForOrConditions(
      final StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions,
      StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    Multiset<Integer> scores = HashMultiset.create();
    for (StatisticsQueryCondition orCondition : orConditions.getConditions()) {
      orCondition.setBioEntityIdRestrictionSet(orConditions.getBioEntityIdRestrictionSet());
      scores.addAll(scoreQuery(orCondition, statisticsStorage, scoringExps));
    }

    // Now apply orConditions' min experiments restriction to scores
    Multiset<Integer> qualifyingScores = HashMultiset.create();
    for (Multiset.Entry<Integer> entry : scores.entrySet()) {
      if (entry.getCount() >= orConditions.getMinExperiments()) {
        qualifyingScores.setCount(entry.getElement(), entry.getCount());
      }
    }

    return qualifyingScores;
  }
Ejemplo n.º 10
0
  private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) {
    if (!type.attributes.isEmpty()) {
      Multiset<String> attributeNames = HashMultiset.create(type.attributes.size());
      for (ValueAttribute attribute : type.attributes) {
        attributeNames.add(attribute.name());
      }

      List<String> duplicates = Lists.newArrayList();
      for (Multiset.Entry<String> entry : attributeNames.entrySet()) {
        if (entry.getCount() > 1) {
          duplicates.add(entry.getElement());
        }
      }

      if (!duplicates.isEmpty()) {
        protoclass
            .report()
            .error(
                "Duplicate attribute names %s. You should check if correct @Value.Style applied",
                duplicates);
      }
    }
  }
  public void MagicProcessor() {
    if (modelChanges == null) {
      modelChanges = new ArrayList<ModelChange>();
      return;
    } else if (modelChanges.size() == 0) return;
    // else go!

    // gets all model deltas and processes!
    // System.out.println("[MAGIC] Update match set based on model change started...");

    long start = System.currentTimeMillis();

    Set<PQuery> affecteds = new HashSet<PQuery>();
    for (ModelChange change : modelChanges) {
      if (change instanceof EFeatureChange)
        affecteds.addAll(
            LookaheadMatcherTreat.RelativeSet.get(((EFeatureChange) change).getChangedFeature()));
      if (change instanceof EClassChange)
        affecteds.addAll(
            LookaheadMatcherTreat.RelativeSet.get(((EClassChange) change).getChange()));
      if (change instanceof EDataTypeChange)
        affecteds.addAll(
            LookaheadMatcherTreat.RelativeSet.get(((EDataTypeChange) change).getChange()));
    }

    ArrayList<ModelDelta> deltas = new ArrayList<ModelDelta>();

    for (PQuery affectedQuery : affecteds) {

      ArrayList<AheadStructure> cachedStructures =
          LookaheadMatcherTreat.GodSetStructures.get(affectedQuery);
      // deliver deltas for pattern!
      for (ModelChange change : modelChanges) {
        for (AheadStructure aSn : cachedStructures) {
          for (AxisConstraint rC : aSn.SearchedConstraints) {
            if (rC instanceof RelationConstraint && change instanceof EFeatureChange) {
              EFeatureChange changenow = (EFeatureChange) change;
              if (((RelationConstraint) rC).getEdge().equals(changenow.getChangedFeature()))
                rC.putToMailbox(change);
            } else if (rC instanceof TypeConstraint && change instanceof EDataTypeChange) {
              EDataTypeChange changenow = (EDataTypeChange) change;
              if (((TypeConstraint) rC).getType().equals(changenow.getChange()))
                rC.putToMailbox(change);
            }
            if (rC instanceof TypeConstraint && change instanceof EClassChange) {
              EClassChange changenow = (EClassChange) change;
              if (((TypeConstraint) rC).getType().equals(changenow.getChange()))
                rC.putToMailbox(change);
            }
          }
        }
      }

      for (ModelChange change : modelChanges) {
        // process this change: first remove all deltas from constraints with this change
        for (AheadStructure aSn : cachedStructures) {
          for (AxisConstraint rC : aSn.SearchedConstraints) {
            if (rC.hasMailboxContent()) {
              if (rC.getMailboxContent().contains(change)) rC.removeFromMailbox(change);
            }
          }
        }
        // apply modelchange:
        HashMap<PVariable, Object> knownLocalAndParameters = new HashMap<PVariable, Object>();
        for (AheadStructure aSn : cachedStructures) {
          // find all relationConstraints
          for (AxisConstraint rC : aSn.SearchedConstraints) {
            if (rC instanceof RelationConstraint && change instanceof EFeatureChange) {
              EFeatureChange changenow = (EFeatureChange) change;
              if (((RelationConstraint) rC).getEdge().equals(changenow.getChangedFeature())) {
                // affected relaconstraint's lookvariables should be bound!!
                knownLocalAndParameters.put(
                    ((RelationConstraint) rC).getSource(), changenow.getHost());
                knownLocalAndParameters.put(
                    ((RelationConstraint) rC).getTarget(), changenow.getInstance());
              }
            } else if (rC instanceof TypeConstraint && change instanceof EDataTypeChange) {
              EDataTypeChange changenow = (EDataTypeChange) change;
              if (((TypeConstraint) rC).getType().equals(changenow.getChange())) {
                // affected typeconstraint's lookvariable should be bound!!
                knownLocalAndParameters.put(
                    ((TypeConstraint) rC).getTypedVariable(), changenow.getInstance());
              }
            }
            if (rC instanceof TypeConstraint && change instanceof EClassChange) {
              EClassChange changenow = (EClassChange) change;
              if (((TypeConstraint) rC).getType().equals(changenow.getChange())) {
                // affected typeconstraint's lookvariable should be bound!!
                knownLocalAndParameters.put(
                    ((TypeConstraint) rC).getTypedVariable(), changenow.getInstance());
              }
            }
          }
        }

        // manual satisfy and clone cachedStructures (createNew* clones input):
        ArrayList<AheadStructure> newStructs = null;
        isModified = false;
        if (change instanceof EFeatureChange) {
          EFeatureChange changenow = (EFeatureChange) change;
          newStructs =
              createNewFromOldRelaC(
                  changenow.getHost(),
                  changenow.getInstance(),
                  changenow.getChangedFeature(),
                  cachedStructures);
        } else if (change instanceof EDataTypeChange) {
          EDataTypeChange changenow = (EDataTypeChange) change;
          newStructs =
              createNewFromOldTypeC(
                  false, changenow.getChange(), changenow.getInstance(), cachedStructures);
        }
        if (change instanceof EClassChange) {
          EClassChange changenow = (EClassChange) change;
          newStructs =
              createNewFromOldTypeC(
                  false, changenow.getChange(), changenow.getInstance(), cachedStructures);
        }
        if (isModified) {
          // the new matches that'll appear in matching based on manually satisfied structure
          Multiset<LookaheadMatching> newbies_toExamine =
              (new LookaheadMatcherInterface(this.navHelper))
                  .searchChangesAll(
                      treat.getIncQueryEngine(),
                      affectedQuery,
                      newStructs,
                      knownLocalAndParameters,
                      new TreatConstraintEnumerator(this.navHelper));

          // a new map to store a matching and whether it is added or removed
          HashMultimap<LookaheadMatching, Boolean> newMatchingsAndChange = HashMultimap.create();

          // iterate over multiset and create delta
          for (com.google.common.collect.Multiset.Entry<LookaheadMatching> inners :
              newbies_toExamine.entrySet()) {
            for (int pi = 0; pi < inners.getCount(); pi++)
              newMatchingsAndChange.put(inners.getElement(), change.isAddition());
          }
          // delta needed to propagate the changes
          if (newMatchingsAndChange.size() > 0) {
            ModelDelta d = new ModelDelta(affectedQuery, newMatchingsAndChange);
            deltas.add(d);
          }
        }
      }
    }

    // apply deltas
    for (ModelDelta delta : deltas) {
      // System.out.println("Propagate a delta: " + delta.getPattern().getFullyQualifiedName());
      AdvancedDeltaProcessor.getInstance().ReceiveDelta(delta);
    }
    AdvancedDeltaProcessor.getInstance().ProcessReceivedDeltaSet();

    // System.out.println("[MAGIC] Update match set based on model change ended! Time:" +
    // Long.toString(System.currentTimeMillis() - start));

    // finally:
    modelChanges = new ArrayList<ModelChange>();
  }
Ejemplo n.º 12
0
 /** @since 2.7 */
 public static <K> String format(Multiset<K> multiset, Converter<K> keyConverter) {
   return formatEntries(multiset.entrySet(), keyConverter);
 }
  @Override
  public void merge(final Iterator<DataFile> inFileIterator, DataFile outFile) throws IOException {

    final Multiset<String> counts = HashMultiset.create();
    final Set<String> emptyCounts = new HashSet<>();

    while (inFileIterator.hasNext()) {

      // Get input file
      final DataFile inFile = inFileIterator.next();

      EoulsanLogger.getLogger().info("Merge " + inFile.getName() + " to " + outFile.getName());

      boolean first = true;

      try (BufferedReader reader = new BufferedReader(new InputStreamReader(inFile.open()))) {

        String line = null;

        while ((line = reader.readLine()) != null) {

          // Do no handle header
          if (first) {
            first = false;
            continue;
          }

          final int tabPos = line.indexOf('\t');

          // Do not handle empty or invalid lines
          if (tabPos == -1) {
            continue;
          }

          try {

            final String id = line.substring(0, tabPos).trim();
            final int count = Integer.parseInt(line.substring(tabPos).trim());

            if (count == 0) {
              emptyCounts.add(id);
            }

            counts.add(id, count);

          } catch (NumberFormatException e) {
            // Do not handle parsing errors
          }
        }
      }
    }

    // Write the result file
    try (Writer writer = new OutputStreamWriter(outFile.create())) {

      writer.write(ExpressionSplitter.EXPRESSION_FILE_HEADER);

      // Write the non empty counts
      for (Multiset.Entry<String> e : counts.entrySet()) {

        final String id = e.getElement();

        // Remove the id from empty counts
        emptyCounts.remove(id);

        // Write the entry
        writer.write(id + '\t' + e.getCount() + '\n');
      }

      // Write the empty counts
      for (String id : emptyCounts) {
        writer.write(id + "\t0\n");
      }
    }
  }
Ejemplo n.º 14
0
  public static void main(String[] args) throws Exception {
    if (args.length < 5) {
      System.out.println(
          "Arguments: [model] [label index] [dictionnary] [document frequency] [Customer description]");
      return;
    }
    String modelPath = args[0];
    String labelIndexPath = args[1];
    String dictionaryPath = args[2];
    String documentFrequencyPath = args[3];
    String carsPath = args[4];

    Configuration configuration = new Configuration();

    // model is a matrix (wordId, labelId) => probability score
    NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelPath), configuration);

    StandardNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);

    // labels is a map label => classId
    Map<Integer, String> labels =
        BayesUtils.readLabelIndex(configuration, new Path(labelIndexPath));
    Map<String, Integer> dictionary = readDictionnary(configuration, new Path(dictionaryPath));
    Map<Integer, Long> documentFrequency =
        readDocumentFrequency(configuration, new Path(documentFrequencyPath));

    // analyzer used to extract word from tweet
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    int labelCount = labels.size();
    int documentCount = documentFrequency.get(-1).intValue();

    System.out.println("Number of labels: " + labelCount);
    System.out.println("Number of documents in training set: " + documentCount);
    BufferedReader reader = new BufferedReader(new FileReader(carsPath));
    while (true) {
      String line = reader.readLine();
      if (line == null) {
        break;
      }

      String[] tokens = line.split("\t", 47);
      String cmplid = tokens[0];
      String cdescr = tokens[19];

      System.out.println("Complaint id: " + cmplid + "\t" + cdescr);

      Multiset<String> words = ConcurrentHashMultiset.create();

      // extract words from complaint description
      TokenStream ts = analyzer.tokenStream("text", new StringReader(cdescr));
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      int wordCount = 0;
      while (ts.incrementToken()) {
        if (termAtt.length() > 0) {
          String word = ts.getAttribute(CharTermAttribute.class).toString();
          Integer wordId = dictionary.get(word);
          // if the word is not in the dictionary, skip it
          if (wordId != null) {
            words.add(word);
            wordCount++;
          }
        }
      }

      // create vector wordId => weight using tfidf
      Vector vector = new RandomAccessSparseVector(1000);
      TFIDF tfidf = new TFIDF();
      for (Multiset.Entry<String> entry : words.entrySet()) {
        String word = entry.getElement();
        int count = entry.getCount();
        Integer wordId = dictionary.get(word);
        Long freq = documentFrequency.get(wordId);
        double tfIdfValue = tfidf.calculate(count, freq.intValue(), wordCount, documentCount);
        vector.setQuick(wordId, tfIdfValue);
      }
      // With the classifier, we get one score for each label
      // The label with the highest score is the one the tweet is more likely to
      // be associated to
      Vector resultVector = classifier.classifyFull(vector);
      double bestScore = -Double.MAX_VALUE;
      int bestCategoryId = -1;
      for (Element element : resultVector.all()) {
        int categoryId = element.index();
        double score = element.get();
        if (score > bestScore) {
          bestScore = score;
          bestCategoryId = categoryId;
        }
        System.out.print("  " + labels.get(categoryId) + ": " + score);
      }
      System.out.println(" => " + labels.get(bestCategoryId));
    }
    analyzer.close();
    reader.close();
  }
    /** Transform this raw statement into a CreateTableStatement. */
    public ParsedStatement.Prepared prepare() throws RequestValidationException {
      // Column family name
      if (!columnFamily().matches("\\w+"))
        throw new InvalidRequestException(
            String.format(
                "\"%s\" is not a valid column family name (must be alphanumeric character only: [0-9A-Za-z]+)",
                columnFamily()));
      if (columnFamily().length() > Schema.NAME_LENGTH)
        throw new InvalidRequestException(
            String.format(
                "Column family names shouldn't be more than %s characters long (got \"%s\")",
                Schema.NAME_LENGTH, columnFamily()));

      for (Multiset.Entry<ColumnIdentifier> entry : definedNames.entrySet())
        if (entry.getCount() > 1)
          throw new InvalidRequestException(
              String.format("Multiple definition of identifier %s", entry.getElement()));

      properties.validate();
      CreateTableStatement stmt =
          new CreateTableStatement(cfName, properties, ifNotExists, staticColumns);

      Map<ByteBuffer, CollectionType> definedCollections = null;
      for (Map.Entry<ColumnIdentifier, CQL3Type> entry : definitions.entrySet()) {

        ColumnIdentifier id = entry.getKey();
        CQL3Type pt = entry.getValue();
        if (pt.isCollection()) {
          if (definedCollections == null)
            definedCollections = new HashMap<ByteBuffer, CollectionType>();
          definedCollections.put(id.key, (CollectionType) pt.getType());
        }
        stmt.columns.put(id, pt.getType()); // we'll remove what is not a column below
      }

      if (keyAliases.isEmpty())
        throw new InvalidRequestException("No PRIMARY KEY specifed (exactly one required)");
      else if (keyAliases.size() > 1)
        throw new InvalidRequestException("Multiple PRIMARY KEYs specifed (exactly one required)");

      List<ColumnIdentifier> kAliases = keyAliases.get(0);

      List<AbstractType<?>> keyTypes = new ArrayList<AbstractType<?>>(kAliases.size());
      for (ColumnIdentifier alias : kAliases) {
        stmt.keyAliases.add(alias.key);
        AbstractType<?> t = getTypeAndRemove(stmt.columns, alias);
        if (t instanceof CounterColumnType)
          throw new InvalidRequestException(
              String.format("counter type is not supported for PRIMARY KEY part %s", alias));
        if (staticColumns.contains(alias))
          throw new InvalidRequestException(
              String.format("Static column %s cannot be part of the PRIMARY KEY", alias));
        keyTypes.add(t);
      }
      stmt.keyValidator =
          keyTypes.size() == 1 ? keyTypes.get(0) : CompositeType.getInstance(keyTypes);

      // Dense means that no part of the comparator stores a CQL column name. This means
      // COMPACT STORAGE with at least one columnAliases (otherwise it's a thrift "static" CF).
      stmt.isDense = useCompactStorage && !columnAliases.isEmpty();

      // Handle column aliases
      if (columnAliases.isEmpty()) {
        if (useCompactStorage) {
          // There should remain some column definition since it is a non-composite "static" CF
          if (stmt.columns.isEmpty())
            throw new InvalidRequestException(
                "No definition found that is not part of the PRIMARY KEY");

          if (definedCollections != null)
            throw new InvalidRequestException(
                "Collection types are not supported with COMPACT STORAGE");

          stmt.comparator = CFDefinition.definitionType;
        } else {
          List<AbstractType<?>> types =
              new ArrayList<AbstractType<?>>(definedCollections == null ? 1 : 2);
          types.add(CFDefinition.definitionType);
          if (definedCollections != null)
            types.add(ColumnToCollectionType.getInstance(definedCollections));
          stmt.comparator = CompositeType.getInstance(types);
        }
      } else {
        // If we use compact storage and have only one alias, it is a
        // standard "dynamic" CF, otherwise it's a composite
        if (useCompactStorage && columnAliases.size() == 1) {
          if (definedCollections != null)
            throw new InvalidRequestException(
                "Collection types are not supported with COMPACT STORAGE");
          ColumnIdentifier alias = columnAliases.get(0);
          stmt.columnAliases.add(alias.key);
          stmt.comparator = getTypeAndRemove(stmt.columns, alias);
          if (stmt.comparator instanceof CounterColumnType)
            throw new InvalidRequestException(
                String.format("counter type is not supported for PRIMARY KEY part %s", alias));
          if (staticColumns.contains(alias))
            throw new InvalidRequestException(
                String.format("Static column %s cannot be part of the PRIMARY KEY", alias));
        } else {
          List<AbstractType<?>> types = new ArrayList<AbstractType<?>>(columnAliases.size() + 1);
          for (ColumnIdentifier t : columnAliases) {
            stmt.columnAliases.add(t.key);

            AbstractType<?> type = getTypeAndRemove(stmt.columns, t);
            if (type instanceof CounterColumnType)
              throw new InvalidRequestException(
                  String.format("counter type is not supported for PRIMARY KEY part %s", t));
            if (staticColumns.contains(t))
              throw new InvalidRequestException(
                  String.format("Static column %s cannot be part of the PRIMARY KEY", t));
            types.add(type);
          }

          if (useCompactStorage) {
            if (definedCollections != null)
              throw new InvalidRequestException(
                  "Collection types are not supported with COMPACT STORAGE");
          } else {
            // For sparse, we must add the last UTF8 component
            // and the collection type if there is one
            types.add(CFDefinition.definitionType);
            if (definedCollections != null)
              types.add(ColumnToCollectionType.getInstance(definedCollections));
          }

          if (types.isEmpty())
            throw new IllegalStateException("Nonsensical empty parameter list for CompositeType");
          stmt.comparator = CompositeType.getInstance(types);
        }
      }

      if (!staticColumns.isEmpty()) {
        // Only CQL3 tables can have static columns
        if (useCompactStorage)
          throw new InvalidRequestException(
              "Static columns are not supported in COMPACT STORAGE tables");
        // Static columns only make sense if we have at least one clustering column. Otherwise
        // everything is static anyway
        if (columnAliases.isEmpty())
          throw new InvalidRequestException(
              "Static columns are only useful (and thus allowed) if the table has at least one clustering column");
      }

      if (useCompactStorage && !stmt.columnAliases.isEmpty()) {
        if (stmt.columns.isEmpty()) {
          // The only value we'll insert will be the empty one, so the default validator don't
          // matter
          stmt.defaultValidator = BytesType.instance;
          // We need to distinguish between
          //   * I'm upgrading from thrift so the valueAlias is null
          //   * I've defined my table with only a PK (and the column value will be empty)
          // So, we use an empty valueAlias (rather than null) for the second case
          stmt.valueAlias = ByteBufferUtil.EMPTY_BYTE_BUFFER;
        } else {
          if (stmt.columns.size() > 1)
            throw new InvalidRequestException(
                String.format(
                    "COMPACT STORAGE with composite PRIMARY KEY allows no more than one column not part of the PRIMARY KEY (got: %s)",
                    StringUtils.join(stmt.columns.keySet(), ", ")));

          Map.Entry<ColumnIdentifier, AbstractType> lastEntry =
              stmt.columns.entrySet().iterator().next();
          stmt.defaultValidator = lastEntry.getValue();
          stmt.valueAlias = lastEntry.getKey().key;
          stmt.columns.remove(lastEntry.getKey());
        }
      } else {
        // For compact, we are in the "static" case, so we need at least one column defined. For
        // non-compact however, having
        // just the PK is fine since we have CQL3 row marker.
        if (useCompactStorage && stmt.columns.isEmpty())
          throw new InvalidRequestException(
              "COMPACT STORAGE with non-composite PRIMARY KEY require one column not part of the PRIMARY KEY, none given");

        // There is no way to insert/access a column that is not defined for non-compact storage, so
        // the actual validator don't matter much (except that we want to recognize counter CF as
        // limitation apply to them).
        stmt.defaultValidator =
            !stmt.columns.isEmpty()
                    && (stmt.columns.values().iterator().next() instanceof CounterColumnType)
                ? CounterColumnType.instance
                : BytesType.instance;
      }

      // If we give a clustering order, we must explicitly do so for all aliases and in the order of
      // the PK
      if (!definedOrdering.isEmpty()) {
        if (definedOrdering.size() > columnAliases.size())
          throw new InvalidRequestException(
              "Only clustering key columns can be defined in CLUSTERING ORDER directive");

        int i = 0;
        for (ColumnIdentifier id : definedOrdering.keySet()) {
          ColumnIdentifier c = columnAliases.get(i);
          if (!id.equals(c)) {
            if (definedOrdering.containsKey(c))
              throw new InvalidRequestException(
                  String.format(
                      "The order of columns in the CLUSTERING ORDER directive must be the one of the clustering key (%s must appear before %s)",
                      c, id));
            else
              throw new InvalidRequestException(
                  String.format("Missing CLUSTERING ORDER for column %s", c));
          }
          ++i;
        }
      }

      return new ParsedStatement.Prepared(stmt);
    }
Ejemplo n.º 16
0
  /**
   * The core scoring method for statistics queries
   *
   * @param statisticsQuery query to be peformed on statisticsStorage
   * @param statisticsStorage core data for Statistics qeries
   * @param scoringExps an out parameter.
   *     <p>- If null, experiment counts result of statisticsQuery should be returned. if - If
   *     non-null, it serves as a flag that an optimised statisticsQuery should be performed to just
   *     collect Experiments for which non-zero counts exist for Statistics query. A typical call
   *     scenario in this case is just one efv per statisticsQuery, in which we can both: 1. check
   *     if the efv Attribute itself is a scoring one 2. map this Attribute and Experimeants in
   *     scoringExps to efo terms - via the reverse mapping efv-experiment-> efo term in EfoIndex
   *     (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
   * @return Multiset of aggregated experiment counts, where the set of scores genes is intersected
   *     across statisticsQuery.getConditions(), and union-ed across attributes within each
   *     condition in statisticsQuery.getConditions().
   */
  public static Multiset<Integer> scoreQuery(
      StatisticsQueryCondition statisticsQuery,
      final StatisticsStorage statisticsStorage,
      Set<ExperimentInfo> scoringExps) {

    // gatherScoringExpsOnly -> experiment counts should be calculated for statisticsQuery
    // !gatherScoringExpsOnly -> scoring experiments should be collected (into scoringExps) only
    boolean gatherScoringExpsOnly = scoringExps != null;
    Set<StatisticsQueryOrConditions<StatisticsQueryCondition>> andStatisticsQueryConditions =
        statisticsQuery.getConditions();

    Multiset<Integer> results = null;

    if (andStatisticsQueryConditions.isEmpty()) { // End of recursion
      Set<Integer> bioEntityIdRestrictionSet = statisticsQuery.getBioEntityIdRestrictionSet();

      Set<EfAttribute> attributes = statisticsQuery.getAttributes();
      if (attributes.isEmpty()) {

        // No attributes were provided - we have to use pre-computed scores across all attributes
        Multiset<Integer> scoresAcrossAllEfos =
            statisticsStorage.getScoresAcrossAllEfos(statisticsQuery.getStatisticsType());
        results = intersect(scoresAcrossAllEfos, bioEntityIdRestrictionSet);
      } else {
        results = HashMultiset.create();
        setQueryExperiments(statisticsQuery, statisticsStorage);

        // For each experiment in the query, traverse through all attributes and add all gene
        // indexes into one ConciseSet. This way a gene can score
        // only once for a single experiment - across all OR attributes in this query. Once all
        // attributes have been traversed for a single experiment,
        // add ConciseSet to Multiset results
        for (ExperimentInfo exp : statisticsQuery.getExperiments()) {
          FastSet statsForExperiment = new FastSet();
          for (EfAttribute attr : attributes) {
            Map<ExperimentInfo, ConciseSet> expsToStats =
                getStatisticsForAttribute(
                    statisticsQuery.getStatisticsType(), attr, statisticsStorage);
            if (expsToStats != null) {
              if (expsToStats.isEmpty()) {
                log.debug(
                    "Failed to retrieve stats for stat: "
                        + statisticsQuery.getStatisticsType()
                        + " and attr: "
                        + attr);
              } else {
                if (expsToStats.get(exp) != null) {
                  if (!gatherScoringExpsOnly) {
                    statsForExperiment.addAll(
                        intersect(expsToStats.get(exp), bioEntityIdRestrictionSet));
                  } else if (containsAtLeastOne(expsToStats.get(exp), bioEntityIdRestrictionSet)) {
                    // exp contains at least one non-zero score for at least one gene index in
                    // bioEntityIdRestrictionSet -> add it to scoringExps
                    scoringExps.add(exp);
                  }
                } else {
                  log.debug(
                      "Failed to retrieve stats for stat: "
                          + statisticsQuery.getStatisticsType()
                          + " exp: "
                          + exp.getAccession()
                          + " and attr: "
                          + attr);
                }
              }
            }
          }
          if (!gatherScoringExpsOnly) {
            results.addAll(statsForExperiment);
          }
        }
      }
    } else {
      // run over all AND conditions, do "OR" inside (cf. scoreOrStatisticsQueryConditions()) ,
      // "AND"'ing over the whole thing
      for (StatisticsQueryOrConditions<StatisticsQueryCondition> orConditions :
          andStatisticsQueryConditions) {

        // Pass gene restriction set down to orConditions
        orConditions.setGeneRestrictionSet(statisticsQuery.getBioEntityIdRestrictionSet());
        // process OR conditions
        Multiset<Integer> condGenes =
            getScoresForOrConditions(orConditions, statisticsStorage, scoringExps);

        if (results == null) results = condGenes;
        else {
          Iterator<Multiset.Entry<Integer>> resultGenes = results.entrySet().iterator();

          while (resultGenes.hasNext()) {
            Multiset.Entry<Integer> entry = resultGenes.next();
            if (!condGenes.contains(
                entry.getElement())) // AND operation between different top query conditions
            resultGenes.remove();
            else
              // for all gene ids belonging to intersection of all conditions seen so far, we
              // accumulate experiment counts
              results.setCount(
                  entry.getElement(), entry.getCount() + condGenes.count(entry.getElement()));
          }
        }
      }
    }

    if (results == null) {
      results = HashMultiset.create();
    }
    return results;
  }