Пример #1
0
 private static LongSet stringToSet(CharSequence values) {
   LongSet result = new LongSet();
   for (String valueString : DelimitedDataUtils.decode(values, ',')) {
     result.add(Long.parseLong(valueString));
   }
   return result;
 }
Пример #2
0
 /**
  * CSV output contains one recommendation per line, and each line is of the form {@code
  * itemID,strength}, like {@code "ABC",0.53}. Strength is an opaque indicator of the relative
  * quality of the recommendation.
  */
 final void output(ServletResponse response, Iterable<IDValue> items) throws IOException {
   Writer writer = response.getWriter();
   for (IDValue item : items) {
     writer.write(DelimitedDataUtils.encode(item.getID(), Float.toString(item.getValue())));
     writer.write('\n');
   }
 }
Пример #3
0
 private static String setToString(LongFloatMap map) {
   LongPrimitiveIterator it = map.keySetIterator();
   Collection<String> keyStrings = Lists.newArrayListWithCapacity(map.size());
   while (it.hasNext()) {
     keyStrings.add(Long.toString(it.nextLong()));
   }
   return DelimitedDataUtils.encode(',', keyStrings);
 }
Пример #4
0
 @Override
 public void process(Pair<Long, Iterable<NumericIDValue>> input, Emitter<String> emitter) {
   StringLongMapping mapping = idMapping.getIDMapping();
   Iterable<NumericIDValue> recs = TopN.selectTopN(input.second().iterator(), numRecs);
   String userID = mapping.toString(input.first());
   for (NumericIDValue rec : recs) {
     emitter.emit(
         DelimitedDataUtils.encode(
             ',', userID, mapping.toString(rec.getID()), Float.toString(rec.getValue())));
   }
 }
Пример #5
0
  @Override
  protected void doPost(HttpServletRequest request, HttpServletResponse response)
      throws IOException {

    RDFGenerationManager generationManager = getGenerationManager();
    Generation generation = generationManager.getCurrentGeneration();
    if (generation == null) {
      response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE);
      return;
    }

    InboundSettings inboundSettings = getInboundSettings();

    TreeBasedClassifier forest = generation.getForest();
    Map<Integer, BiMap<String, Integer>> columnToCategoryNameToIDMapping =
        generation.getColumnToCategoryNameToIDMapping();

    int totalColumns = getTotalColumns();

    for (CharSequence line : CharStreams.readLines(request.getReader())) {

      generationManager.append(line);

      String[] tokens = DelimitedDataUtils.decode(line);
      if (tokens.length != totalColumns) {
        response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Wrong column count");
        return;
      }

      Feature target = null;
      Feature[] features = new Feature[totalColumns]; // Too big by 1 but makes math easier
      try {
        for (int col = 0; col < features.length; col++) {
          if (col == inboundSettings.getTargetColumn()) {
            target = buildFeature(col, tokens[col], columnToCategoryNameToIDMapping);
            features[col] = IgnoredFeature.INSTANCE;
          } else {
            features[col] = buildFeature(col, tokens[col], columnToCategoryNameToIDMapping);
          }
        }
      } catch (IllegalArgumentException iae) {
        response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Bad input line");
        return;
      }

      Preconditions.checkNotNull(target);
      Example example = new Example(target, features);

      forest.update(example);
    }
  }
Пример #6
0
  @Override
  protected void doPost(HttpServletRequest request, HttpServletResponse response)
      throws IOException {

    KMeansGenerationManager generationManager = getGenerationManager();
    Generation generation = generationManager.getCurrentGeneration();
    if (generation == null) {
      response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE);
      return;
    }

    for (CharSequence line : CharStreams.readLines(request.getReader())) {
      generationManager.append(line);

      RealVector vec = generation.toVector(DelimitedDataUtils.decode(line));
      if (vec == null) {
        response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Wrong column count");
        return;
      }

      // TODO update the centers, along the lines of Meyerson et al.
    }
  }