コード例 #1
1
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
コード例 #2
1
ファイル: HadoopJoin.java プロジェクト: yaol9/gbkws-hadoop
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String keyS = key.toString();
      if (keyS.startsWith("O") || keyS.startsWith("P") || keyS.startsWith("S")) {
        String sum = new String();

        for (Text val : values) {

          sum += (" " + val.toString());
        }

        // String subKey = keyS.substring(0,keyS.length()-1);

        // Text t = new Text();
        // t.set(subKey);
        result.set(sum);
        context.write(key, result);
      }
      if (keyS.startsWith("L")) {
        //	String [] keyIdS = keyS.substring(1).split("[+]");

        result.set(" ");
        context.write(key, result);

        // String KeyIdS1 = keyIdS[1];
        // result.set(KeyIdS1);
        // context.write(key, result);

        // String KeyIdS2 = keyIdS[2];
        // result.set(KeyIdS2);
        // context.write(key, result);

      }
    }
コード例 #3
0
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      boolean filtered = true;
      String keypair = key.toString();
      String[] keys = keypair.split(" ");
      if (keys.length == 1) {
        filtered = false;
      } else {
        if (keys[0].equals("*") || keys[1].equals("*")) {
          filtered = false;
        }
      }

      if (!filtered) {
        for (IntWritable val : values) {
          sum += val.get();
        }
        context.write(key, new IntWritable(sum));
        return;
      }

      for (IntWritable val : values) {
        if (val.get() == -1) {
          filtered = false;
          continue;
        }
        sum += val.get();
      }
      // filter non-needed events
      if (filtered) return;
      context.write(key, new IntWritable(sum));
    }
コード例 #4
0
    public void reduce(GFKey key, Iterable<PEIWritable> values, Context context)
        throws IOException, InterruptedException {
      // For a particular key ... process all records and output what we would have expected in this
      // concKnownKeys test
      // Note that we either
      // 1. do a single create
      // 2. create + update
      // 3. create + destroy
      // look at all ops ... and output either
      // 1. create
      // 2. create (with value from update)
      // 3. do nothing (overall result is destroy, so do not create the entry in the gemfire
      // validation region
      String keyStr = (String) key.getKey();
      ValueHolder updateValue = null;
      ValueHolder createValue = null;
      boolean destroyed = false;
      System.out.println("KnownKeysMRv2.reduce() invoked with " + keyStr);
      for (PEIWritable value : values) {
        PersistedEventImpl event = value.getEvent();
        Operation op = event.getOperation();

        ValueHolder vh = null;
        if (op.isDestroy()) {
          destroyed = true;
        } else {
          try {
            vh = (ValueHolder) event.getDeserializedValue();
          } catch (ClassNotFoundException e) {
            System.out.println(
                "KnownKeysMRv2.map() caught " + e + " : " + TestHelper.getStackTrace(e));
          }
          if (op.isUpdate()) {
            updateValue = vh;
          } else {
            createValue = vh;
          }
        }
        System.out.println(
            "KnownKeysMRv2.reduce() record: "
                + op.toString()
                + ": key = "
                + keyStr
                + " and op "
                + op.toString());
      }
      if (!destroyed) {
        if (updateValue != null) {
          context.write(key.getKey(), updateValue);
        } else {
          context.write(key.getKey(), createValue);
        }
      }
    }
コード例 #5
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    private static void WriteNewCentroids(Context context) throws IOException {
      FileSystem fs = FileSystem.get(context.getConfiguration());
      Path nextCFile = new Path(context.getConfiguration().get("NEXTCFILE"));
      DataOutputStream d = new DataOutputStream(fs.create(nextCFile, false));
      BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(d));

      for (String centroid : newCentroids) {
        writer.write(centroid + "\n");
      }

      writer.close();
    }
コード例 #6
0
  public void reduce(LongWritable key, Iterable<Text> values, Context context)
      throws IOException, InterruptedException {
    LongWritable curNodeId = key;
    double previousPRValue = 1;
    double nextPRValue = 0;
    double localResidual = 0;
    String edgeListOfCurNode = "";
    long localResidualTransformed = 0;

    for (Text value : values) {
      String[] inputInfo = value.toString().split("\\s+");

      // incoming pagerank value
      if (inputInfo.length == 1) {
        nextPRValue += Double.parseDouble(inputInfo[0]);
      }
      // current node info
      else if (inputInfo.length == 3) {
        edgeListOfCurNode = inputInfo[2];
        previousPRValue = Double.parseDouble(inputInfo[1]);
      } else if (inputInfo.length == 2) {
        previousPRValue = Double.parseDouble(inputInfo[1]);
      } else {
        System.out.println("ERROR: received unexpected TEXT in length");
      }
    }
    if (previousPRValue == 1) System.out.println("No node info has been received by a reducer");
    // calculate the pagerank value according to the given formula
    nextPRValue = pagerankFormula(nextPRValue);

    // should also iterate sink nodes list, add the evenly splitted value

    // reducer should store the updated node info(NPR) to output directory
    context.write(null, new Text(curNodeId + " " + nextPRValue + " " + edgeListOfCurNode));

    // then compare PPR with NPR
    try {
      localResidual = Math.abs(previousPRValue - nextPRValue) / nextPRValue;
      localResidualTransformed = (long) (localResidual * 10000);
      // System.out.println("Make sure you got the right transformed residual :
      // "+localResidualTransformed);

    } catch (ArithmeticException e) {
      System.out.println("PPR is zero. Check where you get the value!");
    }

    // assume there is a global counter called residualCounter;

    context.getCounter(myCounter.ResidualCounter.RESIDUAL_SUM).increment(localResidualTransformed);
  }
コード例 #7
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration c = context.getConfiguration();
      String s = value.toString();
      String input[] = s.split(",");
      Text outputkey = new Text();
      Text outputvalue = new Text();
      double result = 0.0;

      /* multiplies matrix and vector entry with matching column value */

      result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1])));
      outputkey.set(input[0]);
      outputvalue.set(Double.toString(result));
      context.write(outputkey, outputvalue);
    }
コード例 #8
0
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (IntWritable val : values) {
     sum += val.get();
   }
   context.write(key, new IntWritable(sum));
 }
コード例 #9
0
ファイル: AuthorCounter.java プロジェクト: akhfa/hadoop-test
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String file = value.toString();
      String[] lines = file.split("\n");

      for (String line : lines) {
        if (line.contains("<author>") && line.contains("</author>")) {
          String author = line.substring(8, line.indexOf("</a"));
          word.set(author);
          context.write(word, one);
        } else if (line.contains("<author>")) {
          String author = line.substring(8);
          word.set(author);
          context.write(word, one);
        }
      }
    }
コード例 #10
0
ファイル: X.java プロジェクト: jivesoftware/codesquare
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      System.out.println("HELLO" + key + value);

      String line = value.toString();
      String[] components = line.split("\\s+");
      context.write(new Text(components[0]), new Text(components[1]));
    }
コード例 #11
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     context.write(word, one);
   }
 }
コード例 #12
0
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (IntWritable val : values) {
     sum = sum + val.get();
   }
   System.out.println("----------inside-----------");
   context.write(key, new Text(Integer.toString(sum)));
 }
コード例 #13
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   Text word = new Text();
   StringTokenizer s = new StringTokenizer(value.toString());
   while (s.hasMoreTokens()) {
     word.set(s.nextToken());
     context.write(word, one);
   }
 }
コード例 #14
0
ファイル: AuthorCounter.java プロジェクト: akhfa/hadoop-test
 public void reduce(Text key, Iterable<LongWritable> values, Context context)
     throws IOException, InterruptedException {
   int sum = 0;
   for (LongWritable val : values) {
     sum += val.get();
   }
   result.set(sum);
   context.write(key, result);
 }
コード例 #15
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   String[] currentUserTuples = line.split("::");
   int currentAgeValue = Integer.parseInt(currentUserTuples[2].trim());
   if (currentUserTuples[1].trim().equalsIgnoreCase("M") && currentAgeValue <= targetAge) {
     context.write(new Text("UserId :: " + currentUserTuples[0].trim()), one);
   }
 }
コード例 #16
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      // read in a document (point in 58-dimensional space)

      List<Double> p = GetPoint(value.toString());

      int idxClosestCentoid = IndexOfClosestCentroid(p);

      context.write(new IntWritable(idxClosestCentoid), value);
    }
コード例 #17
0
ファイル: X.java プロジェクト: jivesoftware/codesquare
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      for (Text val : values) {
        context.write(key, val);

        System.out.println("Pass2 RED KEY: " + key);
        System.out.println("Pass2 RED VAL: " + val);
      }
    }
コード例 #18
0
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      String input[];
      double result = 0.0;

      /* adds all the values corresponding to a key */
      for (Text value : values) {
        result += Double.parseDouble(value.toString());
      }

      context.write(null, new Text(key + "," + Double.toString(result)));
    }
コード例 #19
0
ファイル: WordCount_e3.java プロジェクト: reishin/CS9223
    protected void cleanup(Context context) throws IOException, InterruptedException {

      Map<Text, IntWritable> sortedMap = sortByValues(countMap);

      int counter = 0;
      for (Text key : sortedMap.keySet()) {
        if (counter++ == 100) {
          break;
        }
        context.write(key, sortedMap.get(key));
      }
    }
コード例 #20
0
    // Identity mapper (log and write out processed key/value pairs, the value is the
    // PersistedEventImpl)
    public void map(GFKey key, PersistedEventImpl value, Context context)
        throws IOException, InterruptedException {

      String keyStr = (String) key.getKey();
      Operation op = value.getOperation();
      ValueHolder entryValue = null;
      System.out.println("map method invoked with " + keyStr + " " + op.toString());
      try {
        entryValue = (ValueHolder) value.getDeserializedValue();
      } catch (ClassNotFoundException e) {
        System.out.println("KnownKeysMRv2.map() caught " + e + " : " + TestHelper.getStackTrace(e));
      }
      context.write(key, new PEIWritable(value));
    }
コード例 #21
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    protected void setup(Context context) throws IOException, InterruptedException {
      FileSystem fs = FileSystem.get(context.getConfiguration());

      Path cFile = new Path(context.getConfiguration().get("CFILE"));
      DataInputStream d = new DataInputStream(fs.open(cFile));
      BufferedReader reader = new BufferedReader(new InputStreamReader(d));

      String line;
      while ((line = reader.readLine()) != null) {
        StringTokenizer tokenizer = new StringTokenizer(line.toString());

        if (tokenizer.hasMoreTokens()) {
          List<Double> centroid = new ArrayList<Double>(58);

          while (tokenizer.hasMoreTokens()) {
            centroid.add(Double.parseDouble(tokenizer.nextToken()));
          }

          centroids.add(centroid);
        }
      }

      k = centroids.size();
    }
コード例 #22
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();

      splitposition = line.indexOf("\t");
      labels = line.substring(0, splitposition);
      content = line.substring(splitposition + 1, line.length());
      if (content.length() <= 5) {
        word.set(labels);
        int counter = Integer.parseInt(content);
        context.write(word, new IntWritable(counter));
        return;
      }
      labeltokens = labels.split(",");
      contenttokens = tokenizeDoc(content);

      for (String label : labelspace) {
        for (String token : contenttokens) {
          // filter token, denotes the needed events;
          word.set(label + " " + token);
          context.write(word, new IntWritable(-1));
        }
      }
    }
コード例 #23
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      List<Double> newCentroid = null;
      int numPoints = 0;

      for (Text value : values) {
        ++numPoints;

        List<Double> p = GetPoint(value.toString());

        if (newCentroid == null) {
          // initialize the new centroid to the first element
          newCentroid = new ArrayList<Double>(p);
        } else {
          for (int i = 0; i < newCentroid.size(); i++) {
            newCentroid.set(i, newCentroid.get(i) + p.get(i));
          }
        }
      }

      // now the newCentroid contains the sum of all the points
      // so to get the average of all the points, we need to
      // divide each entry by the total number of points

      for (int i = 0; i < newCentroid.size(); i++) {
        newCentroid.set(i, newCentroid.get(i) / (double) numPoints);
      }

      // now create a string containing all the new centroid's coordinates

      String s = null;
      for (Double d : newCentroid) {
        if (s == null) {
          s = d.toString();
        } else {
          s += " " + d.toString();
        }
      }

      newCentroids.add(s);

      if (newCentroids.size() == 10) {
        WriteNewCentroids(context);
      }

      // output the centroid ID and the centroid data
      context.write(key, new Text(s));
    }
コード例 #24
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   String[] currentMovieTuples = line.split("::");
   String[] inMovieList = inMovies.split(",");
   for (String s : inMovieList) {
     if (currentMovieTuples[1].trim().equalsIgnoreCase(s))
       context.write(
           new Text(
               "Movie :: "
                   + currentMovieTuples[1].trim()
                   + " Genre :: "
                   + currentMovieTuples[2].trim()),
           one);
   }
 }
コード例 #25
0
    /* called once at the beginning of the task */
    public void setup(Context context) throws IOException, InterruptedException {
      BufferedReader br = null;
      Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
      if (files != null && files.length > 0)
        br = new BufferedReader(new FileReader(files[0].toString()));
      String line = null;

      /* reads the cached file into a hashmap */
      try {
        while ((line = br.readLine()) != null) {
          String input[] = line.split(",");
          vector.put(Long.valueOf(input[0]), Double.valueOf(input[1]));
        }
      } finally {
        br.close();
      }
    }
コード例 #26
0
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String[] pair = new String[2];
      int count = 0;
      for (Text txt : values) {
        pair[count] = txt.toString();
        count++;
      }

      // word exists in training
      if (count == 2) {
        StringTokenizer st_one, st_two;
        if (pair[0].contains(dlt)) {
          st_one = new StringTokenizer(pair[1]);
          st_two = new StringTokenizer(pair[0]);
        } else {
          st_one = new StringTokenizer(pair[0]);
          st_two = new StringTokenizer(pair[1]);
        }

        // outputting the data
        String f_id = st_one.nextToken();

        StringBuilder builder = new StringBuilder(dlt);
        builder.append(f_id);
        builder.append(dlt);
        while (st_two.hasMoreTokens()) {
          String filename = st_two.nextToken();
          String tf_idf = st_two.nextToken();
          builder.append(filename);
          builder.append(dlt);
          builder.append(tf_idf);
          builder.append("\t");
        }
        myVal.set(builder.toString());
        context.write(key, myVal);
      }
    }
コード例 #27
0
 protected void setup(Context context) {
   Configuration config = context.getConfiguration();
   inMovies = config.get("inParameter");
 }
コード例 #28
0
 public void reduce(Text key, Iterable<IntWritable> values, Context context)
     throws IOException, InterruptedException {
   for (IntWritable value : values) context.write(key, new IntWritable());
 }
コード例 #29
0
ファイル: HadoopJoin.java プロジェクト: yaol9/gbkws-hadoop
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {

      String line = value.toString();

      String[] attributes = line.split("[|]");

      String tableName = getTableName(line);

      if (tableName.equalsIgnoreCase("lineitem")) {
        word.set(
            "LO"
                + attributes[0]
                + "+P"
                + attributes[1]
                + "+S"
                + attributes[2]); // orderkey+partkey+supplykey
        Text v = new Text(" ");
        context.write(word, v);
      } else if (tableName.equalsIgnoreCase("supplier")) {
        if (line.contains(k2)) {
          Text v = new Text(attributes[6]);
          word.set("S" + attributes[0] + "A");
          context.write(word, v);
          word.set("S" + attributes[0] + "B");
          context.write(word, v);
          word.set("S" + attributes[0] + "C");
          context.write(word, v);
          word.set("S" + attributes[0] + "D");
          context.write(word, v);
        }
      } else if (tableName.equalsIgnoreCase("part")) {
        if (line.contains(k1)) {
          Text v =
              new Text(
                  attributes[1] + " " + attributes[4] + " " + attributes[6] + " " + attributes[8]);
          word.set("P" + attributes[0] + "A");
          context.write(word, v);
          word.set("P" + attributes[0] + "B");
          context.write(word, v);
          word.set("P" + attributes[0] + "C");
          context.write(word, v);
          word.set("P" + attributes[0] + "D");
          context.write(word, v);
        }
      } else if (tableName.equalsIgnoreCase("order")) {
        if (line.contains(k0)) {
          Text v = new Text(attributes[8]);
          word.set("O" + attributes[0] + "A");
          context.write(word, v);
          word.set("O" + attributes[0] + "B");
          context.write(word, v);
          word.set("O" + attributes[0] + "C");
          context.write(word, v);
          word.set("O" + attributes[0] + "D");
          context.write(word, v);
        }
      } else {
      }
    }