public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
示例#2
0
 public static List<InetSocketAddress> readAddresses(Path path, Configuration conf)
     throws IOException {
   final List<InetSocketAddress> addrs = new ArrayList<InetSocketAddress>();
   for (final String line : readConfig(path, conf)) {
     final StringTokenizer tokens = new StringTokenizer(line);
     if (tokens.hasMoreTokens()) {
       final String host = tokens.nextToken();
       if (tokens.hasMoreTokens()) {
         final String port = tokens.nextToken();
         addrs.add(new InetSocketAddress(host, Integer.parseInt(port)));
       }
     }
   }
   return addrs;
 }
示例#3
0
  /**
   * Make a path relative with respect to a root path. absPath is always assumed to descend from
   * root. Otherwise returned path is null.
   */
  static String makeRelative(Path root, Path absPath) {
    if (!absPath.isAbsolute()) {
      throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
    }
    String p = absPath.toUri().getPath();

    StringTokenizer pathTokens = new StringTokenizer(p, "/");
    for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/");
        rootTokens.hasMoreTokens(); ) {
      if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
        return null;
      }
    }
    StringBuilder sb = new StringBuilder();
    for (; pathTokens.hasMoreTokens(); ) {
      sb.append(pathTokens.nextToken());
      if (pathTokens.hasMoreTokens()) {
        sb.append(Path.SEPARATOR);
      }
    }
    return sb.length() == 0 ? "." : sb.toString();
  }
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String[] pair = new String[2];
      int count = 0;
      for (Text txt : values) {
        pair[count] = txt.toString();
        count++;
      }

      // word exists in training
      if (count == 2) {
        StringTokenizer st_one, st_two;
        if (pair[0].contains(dlt)) {
          st_one = new StringTokenizer(pair[1]);
          st_two = new StringTokenizer(pair[0]);
        } else {
          st_one = new StringTokenizer(pair[0]);
          st_two = new StringTokenizer(pair[1]);
        }

        // outputting the data
        String f_id = st_one.nextToken();

        StringBuilder builder = new StringBuilder(dlt);
        builder.append(f_id);
        builder.append(dlt);
        while (st_two.hasMoreTokens()) {
          String filename = st_two.nextToken();
          String tf_idf = st_two.nextToken();
          builder.append(filename);
          builder.append(dlt);
          builder.append(tf_idf);
          builder.append("\t");
        }
        myVal.set(builder.toString());
        context.write(key, myVal);
      }
    }