public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
Beispiel #2
0
    private static void WriteNewCentroids(Context context) throws IOException {
      FileSystem fs = FileSystem.get(context.getConfiguration());
      Path nextCFile = new Path(context.getConfiguration().get("NEXTCFILE"));
      DataOutputStream d = new DataOutputStream(fs.create(nextCFile, false));
      BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(d));

      for (String centroid : newCentroids) {
        writer.write(centroid + "\n");
      }

      writer.close();
    }
Beispiel #3
0
    protected void setup(Context context) throws IOException, InterruptedException {
      FileSystem fs = FileSystem.get(context.getConfiguration());

      Path cFile = new Path(context.getConfiguration().get("CFILE"));
      DataInputStream d = new DataInputStream(fs.open(cFile));
      BufferedReader reader = new BufferedReader(new InputStreamReader(d));

      String line;
      while ((line = reader.readLine()) != null) {
        StringTokenizer tokenizer = new StringTokenizer(line.toString());

        if (tokenizer.hasMoreTokens()) {
          List<Double> centroid = new ArrayList<Double>(58);

          while (tokenizer.hasMoreTokens()) {
            centroid.add(Double.parseDouble(tokenizer.nextToken()));
          }

          centroids.add(centroid);
        }
      }

      k = centroids.size();
    }
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration c = context.getConfiguration();
      String s = value.toString();
      String input[] = s.split(",");
      Text outputkey = new Text();
      Text outputvalue = new Text();
      double result = 0.0;

      /* multiplies matrix and vector entry with matching column value */

      result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1])));
      outputkey.set(input[0]);
      outputvalue.set(Double.toString(result));
      context.write(outputkey, outputvalue);
    }
    /* called once at the beginning of the task */
    public void setup(Context context) throws IOException, InterruptedException {
      BufferedReader br = null;
      Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
      if (files != null && files.length > 0)
        br = new BufferedReader(new FileReader(files[0].toString()));
      String line = null;

      /* reads the cached file into a hashmap */
      try {
        while ((line = br.readLine()) != null) {
          String input[] = line.split(",");
          vector.put(Long.valueOf(input[0]), Double.valueOf(input[1]));
        }
      } finally {
        br.close();
      }
    }
 protected void setup(Context context) {
   Configuration config = context.getConfiguration();
   inMovies = config.get("inParameter");
 }