public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
예제 #2
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }
예제 #4
0
 // Identity
 public void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   System.out.println("I'm in Job1 reduce");
   for (Text val : values) {
     System.out.println("job1:redInp:-" + val.toString());
     context.write(new Text(""), val);
   }
 }
예제 #5
0
    public void reduce(
        Text key,
        Iterator<CrawlDatum> values,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      boolean oldSet = false;
      boolean injectedSet = false;
      while (values.hasNext()) {
        CrawlDatum val = values.next();
        if (val.getStatus() == CrawlDatum.STATUS_INJECTED) {
          injected.set(val);
          injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
          injectedSet = true;
        } else {
          old.set(val);
          oldSet = true;
        }
      }
      CrawlDatum res = null;

      /**
       * Whether to overwrite, ignore or update existing records
       *
       * @see https://issues.apache.org/jira/browse/NUTCH-1405
       */

      // Injected record already exists and overwrite but not update
      if (injectedSet && oldSet && overwrite) {
        res = injected;

        if (update) {
          LOG.info(key.toString() + " overwritten with injected record but update was specified.");
        }
      }

      // Injected record already exists and update but not overwrite
      if (injectedSet && oldSet && update && !overwrite) {
        res = old;
        old.putAllMetaData(injected);
        old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore());
        old.setFetchInterval(
            injected.getFetchInterval() != interval
                ? injected.getFetchInterval()
                : old.getFetchInterval());
      }

      // Old default behaviour
      if (injectedSet && !oldSet) {
        res = injected;
      } else {
        res = old;
      }

      output.collect(key, res);
    }
 public void map(
     IntWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String dataRow = value.toString();
   StringTokenizer tk = new StringTokenizer(dataRow);
   String label = tk.nextToken();
   String image = tk.nextToken();
   dataString.set(label + "\t" + image);
   output.collect(sameKey, dataString);
 }
예제 #7
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      // read in a document (point in 58-dimensional space)

      List<Double> p = GetPoint(value.toString());

      int idxClosestCentoid = IndexOfClosestCentroid(p);

      context.write(new IntWritable(idxClosestCentoid), value);
    }
예제 #8
0
 @Override
 public void map(Object key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   int movieId = Integer.parseInt(tokenizer.nextToken());
   while (tokenizer.hasMoreTokens()) {
     String word = tokenizer.nextToken();
     context.write(new Text("1"), new IntWritable(1));
   }
 }
  /*
   * Finds a full file and sets it as the value.
   */
  public synchronized boolean next(LongWritable key, Text value) throws IOException {
    Text line = new Text();
    boolean retrieved = true;

    String result = "";

    value.clear();

    while (retrieved) {
      retrieved = recordReader.next(key, line);

      if (line.toString().length() > 0) {
        String lineValue = line.toString();
        result += lineValue + "\n";
      }
    }

    value.set(result);
    return true;
  }
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      String input[];
      double result = 0.0;

      /* adds all the values corresponding to a key */
      for (Text value : values) {
        result += Double.parseDouble(value.toString());
      }

      context.write(null, new Text(key + "," + Double.toString(result)));
    }
예제 #11
0
 /** Implement readFields of Writable */
 public void readFields(DataInput in) throws IOException {
   this.offset = in.readLong();
   this.length = in.readLong();
   int numNames = in.readInt();
   this.names = new String[numNames];
   for (int i = 0; i < numNames; i++) {
     Text name = new Text();
     name.readFields(in);
     names[i] = name.toString();
   }
   int numHosts = in.readInt();
   for (int i = 0; i < numHosts; i++) {
     Text host = new Text();
     host.readFields(in);
     hosts[i] = host.toString();
   }
   int numTops = in.readInt();
   Text path = new Text();
   for (int i = 0; i < numTops; i++) {
     path.readFields(in);
     topologyPaths[i] = path.toString();
   }
 }
예제 #12
0
    public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      List<Double> newCentroid = null;
      int numPoints = 0;

      for (Text value : values) {
        ++numPoints;

        List<Double> p = GetPoint(value.toString());

        if (newCentroid == null) {
          // initialize the new centroid to the first element
          newCentroid = new ArrayList<Double>(p);
        } else {
          for (int i = 0; i < newCentroid.size(); i++) {
            newCentroid.set(i, newCentroid.get(i) + p.get(i));
          }
        }
      }

      // now the newCentroid contains the sum of all the points
      // so to get the average of all the points, we need to
      // divide each entry by the total number of points

      for (int i = 0; i < newCentroid.size(); i++) {
        newCentroid.set(i, newCentroid.get(i) / (double) numPoints);
      }

      // now create a string containing all the new centroid's coordinates

      String s = null;
      for (Double d : newCentroid) {
        if (s == null) {
          s = d.toString();
        } else {
          s += " " + d.toString();
        }
      }

      newCentroids.add(s);

      if (newCentroids.size() == 10) {
        WriteNewCentroids(context);
      }

      // output the centroid ID and the centroid data
      context.write(key, new Text(s));
    }
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      Configuration c = context.getConfiguration();
      String s = value.toString();
      String input[] = s.split(",");
      Text outputkey = new Text();
      Text outputvalue = new Text();
      double result = 0.0;

      /* multiplies matrix and vector entry with matching column value */

      result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1])));
      outputkey.set(input[0]);
      outputvalue.set(Double.toString(result));
      context.write(outputkey, outputvalue);
    }
예제 #14
0
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String str = "1\t" + value.toString();
    Kvector vector = Kvector.fromString(str);

    int code = -1, d = Integer.MAX_VALUE;
    for (Kvector e : m_kVectors) {
      int t = vector.distance(e);
      if (t < d) {
        d = t;
        code = e.code;
      }
    }

    context.write(new KcodeWritable(code), new Text(str));
  }
예제 #15
0
    public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      System.out.println(PREFIX + "Collecting all the matched results");
      for (Text val : values) {
        String[] tmp =
            val.toString()
                .split("\\|"); //  The \\ here is very important. Cannot use "|" since split()
        //  need a regex(regular expression), and the vertical bar is
        //  special character.
        System.out.println("filename:" + tmp[0] + " ratio:" + tmp[1]);
        String filename = tmp[0];
        double ratio = Double.valueOf(tmp[1]);

        // Key:filename     Value:ratio
        context.write(new Text(filename), new DoubleWritable(ratio));
      }
    }
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    String line = value.toString();
    String[] tokens = line.split("\\s");
    String hour = new String();
    long numRequests = 0;

    if (tokens.length > 4) {
      // get the project name field
      hour = tokens[4];

      // get the number of requests field
      numRequests = Long.parseLong(tokens[2]);

      // output the key, value pairs where the key is a combination of
      // project name and datetime and the value is number of requests
      context.write(new Text(hour), new LongWritable(numRequests));
    }
  }
예제 #17
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, DoubleWritable> output,
        Reporter reporter)
        throws IOException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      int rowIdx = 0;
      double xValue = 0;

      if (tokenizer.hasMoreTokens()) {
        rowIdx = Integer.parseInt(tokenizer.nextToken());
        xValue = Double.parseDouble(tokenizer.nextToken());
      }

      double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx];
      output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult));
    }
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String[] pair = new String[2];
      int count = 0;
      for (Text txt : values) {
        pair[count] = txt.toString();
        count++;
      }

      // word exists in training
      if (count == 2) {
        StringTokenizer st_one, st_two;
        if (pair[0].contains(dlt)) {
          st_one = new StringTokenizer(pair[1]);
          st_two = new StringTokenizer(pair[0]);
        } else {
          st_one = new StringTokenizer(pair[0]);
          st_two = new StringTokenizer(pair[1]);
        }

        // outputting the data
        String f_id = st_one.nextToken();

        StringBuilder builder = new StringBuilder(dlt);
        builder.append(f_id);
        builder.append(dlt);
        while (st_two.hasMoreTokens()) {
          String filename = st_two.nextToken();
          String tf_idf = st_two.nextToken();
          builder.append(filename);
          builder.append(dlt);
          builder.append(tf_idf);
          builder.append("\t");
        }
        myVal.set(builder.toString());
        context.write(key, myVal);
      }
    }
  /* Finds a full sentence and sets it as the value.
   * If the sentence is shorter than the full line, the rest is stored to use later.
   */
  public synchronized boolean next(LongWritable key, Text value) throws IOException {
    Text line = new Text();
    boolean getMore = true;
    boolean retrieved = false;

    String result = leftovers;
    leftovers = "";

    value.clear();

    while (getMore) {
      retrieved = recordReader.next(key, line);

      if (retrieved) {
        String lineValue = line.toString();

        // here, we assume sentences run until the period.
        int endOfSentence = lineValue.indexOf('.');

        if (endOfSentence == -1) {
          result += " " + lineValue;
        } else {
          result += " " + lineValue.substring(0, endOfSentence + 1);
          leftovers = lineValue.substring(endOfSentence + 1);
          getMore = false;
        }
      } else {
        getMore = false;
        value.set(result);
        return false;
      }
    }

    value.set(result);
    return true;
  }
예제 #20
0
    // The input video files are split into chunks of 64MB here...
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      System.out.println("job1:mapInp:-" + line);
      String[] info = line.split(" ");
      info[0] = info[0].trim();
      info[1] = info[1].trim();
      String lstfnames = "", fname = "";
      try {
        Configuration config = new Configuration();
        FileSystem hdfs = FileSystem.get(config);
        String prefixPath = "", fnm = "";
        Pattern x = Pattern.compile("(.*)/(.*)");
        Matcher xm = x.matcher(info[0]);
        while (xm.find()) {
          prefixPath = xm.group(1);
          fnm = xm.group(2);
        }
        String dst = "/home/" + fnm; // dst is path of the file on local system.
        hdfs.copyToLocalFile(new Path(info[0]), new Path(dst));

        Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst);
        String s;

        BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
        Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)");
        long time = 0; // "time" is the duration of the input video file
        long sps = 0; // "sps" is the number of seconds(duration) of each video split
        while ((s = stdError.readLine()) != null) {
          Matcher md = D.matcher(s);
          while (md.find()) {
            time =
                Long.parseLong(md.group(1)) * 3600
                    + Long.parseLong(md.group(2)) * 60
                    + Long.parseLong(md.group(3));
          }
        }
        Process p1 = Runtime.getRuntime().exec("du -s " + dst);
        BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream()));
        String s1 = "", size = ""; // "size" is the size of input video file
        while ((s1 = stdInput1.readLine()) != null) {
          String s11[] = s1.split("\t");
          size = s11[0];
        }
        sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB
        String hr, min, sc;
        hr = Long.toString((sps / 3600));
        min = Long.toString((sps % 3600) / 60);
        sc = Long.toString(sps % 60);
        if (hr.length() < 2) hr = "0" + hr;
        if (min.length() < 2) min = "0" + min;
        if (sc.length() < 2) sc = "0" + sc;
        String splt = hr + ":" + min + ":" + sc;

        String query =
            "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file
        String app = "", inpExt = "";
        Pattern xx = Pattern.compile("(.*)\\.(.*)");
        Matcher xxm = xx.matcher(dst);
        while (xxm.find()) {
          fname = xxm.group(1);
          inpExt = xxm.group(2);
        }
        String[] tmpArr = fname.split("/");
        String hdfsFname = "";
        long stSrt = 0;
        int cnt = 0;

        while (true) {
          if (stSrt > time) break;
          if (stSrt + sps > time) {
            long t = time - stSrt;
            hr = Long.toString((t / 3600));
            min = Long.toString((t % 3600) / 60);
            sc = Long.toString(t % 60);
            if (hr.length() < 2) hr = "0" + hr;
            if (min.length() < 2) min = "0" + min;
            if (sc.length() < 2) sc = "0" + sc;
            splt = hr + ":" + min + ":" + sc;
          }
          cnt++;
          hr = Long.toString((stSrt / 3600));
          min = Long.toString((stSrt % 3600) / 60);
          sc = Long.toString(stSrt % 60);
          if (hr.length() < 2) hr = "0" + hr;
          if (min.length() < 2) min = "0" + min;
          if (sc.length() < 2) sc = "0" + sc;
          app =
              hr
                  + ":"
                  + min
                  + ":"
                  + sc
                  + " -endPos "
                  + splt
                  + " "
                  + dst
                  + " -o "
                  + fname
                  + "_"
                  + Integer.toString(cnt)
                  + "."
                  + inpExt;

          Process p2 = Runtime.getRuntime().exec(query + app);
          String ls_str = "";
          DataInputStream ls_in = new DataInputStream(p2.getInputStream());
          while ((ls_str = ls_in.readLine()) != null) {}
          p2.destroy();
          String[] tmpArr1 = fnm.split("\\.");
          hdfs.copyFromLocalFile(
              true,
              true,
              new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt),
              new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt));
          lstfnames +=
              prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# ";
          stSrt += sps;
        }
        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();
        lstfnames += "*" + info[1];

        context.write(
            new Text(fname),
            new Text(
                lstfnames)); // "fname" contains name of the input video file with
                             // extension(eg.".avi") removed #### "lstfnames" is a string, contains
                             // all the names of video splits(concatenated)
        System.out.println("lstfnames : " + lstfnames);
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }
예제 #21
0
    public void map(Text key, Text value, Context context)
        throws InterruptedException, IOException {

      String filename = key.toString();
      String json = value.toString();

      // Make sure the input is valid
      if (!(filename.isEmpty() || json.isEmpty())) {

        // Change the json-type feature to Mat-type feature
        Mat descriptor = json2mat(json);
        if (descriptor != null) {
          // Read the query feature from the cache in Hadoop
          Mat query_features;
          String pathStr = context.getConfiguration().get("featureFilePath");
          FileSystem fs = FileSystem.get(context.getConfiguration());
          FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr));
          StringBuilder sb = new StringBuilder();

          // Use a buffer to read the query_feature
          int remain = fsDataInputStream.available();
          while (remain > 0) {
            int read;
            byte[] buf = new byte[BUF_SIZE];
            read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE);
            sb.append(new String(buf, 0, read, StandardCharsets.UTF_8));
            remain = remain - read;
            System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length());
          }

          // Read the query_feature line by line
          //                    Scanner sc = new Scanner(fsDataInputStream, "UTF-8");
          //                    StringBuilder sb = new StringBuilder();
          //                    while (sc.hasNextLine()) {
          //                        sb.append(sc.nextLine());
          //                    }
          //                    String query_json = sb.toString();
          //                    String query_json = new String(buf, StandardCharsets.UTF_8);

          String query_json = sb.toString();
          fsDataInputStream.close();
          query_features = json2mat(query_json);

          // Get the similarity of the current database image against the query image
          DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED);
          MatOfDMatch matches = new MatOfDMatch();

          // Ensure the two features have same length of cols (the feature extracted are all 128
          // cols(at least in this case))
          if (query_features.cols() == descriptor.cols()) {

            matcher.match(query_features, descriptor, matches);
            DMatch[] dMatches = matches.toArray();

            // Calculate the max/min distances
            //                    double max_dist = Double.MAX_VALUE;
            //                    double min_dist = Double.MIN_VALUE;
            double max_dist = 0;
            double min_dist = 100;
            for (int i = 0; i < dMatches.length; i++) {
              double dist = dMatches[i].distance;
              if (min_dist > dist) min_dist = dist;
              if (max_dist < dist) max_dist = dist;
            }
            // Only distances ≤ threshold are good matches
            double threshold = max_dist * THRESHOLD_FACTOR;
            //                    double threshold = min_dist * 2;
            LinkedList<DMatch> goodMatches = new LinkedList<DMatch>();

            for (int i = 0; i < dMatches.length; i++) {
              if (dMatches[i].distance <= threshold) {
                goodMatches.addLast(dMatches[i]);
              }
            }

            // Get the ratio of good_matches to all_matches
            double ratio = (double) goodMatches.size() / (double) dMatches.length;

            System.out.println("*** current_record_filename:" + filename + " ***");
            System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features);
            System.out.println(
                "min_dist of keypoints:" + min_dist + "  max_dist of keypoints:" + max_dist);
            System.out.println(
                "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size());
            //                    System.out.println("type:" + descriptor.type() + " channels:" +
            // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols());
            //                    System.out.println("qtype:" + query_features.type() + "
            // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + "
            // qcols:" + query_features.cols());
            System.out.println();

            if (ratio > PERCENTAGE_THRESHOLD) {
              // Key:1        Value:filename|ratio
              context.write(ONE, new Text(filename + "|" + ratio));
              //                        context.write(ONE, new Text(filename + "|" +
              // String.valueOf(goodMatches.size())));
            }
          } else {
            System.out.println("The size of the features are not equal");
          }
        } else {
          // a null pointer, do nothing
          System.out.println("A broken/null feature:" + filename);
          System.out.println();
        }
      }
    }
예제 #22
0
  // ## operation readReactorOutputFile(ReactionModel)
  public SystemSnapshot readReactorOutputFile(ReactionModel p_reactionModel) {
    // #[ operation readReactorOutputFile(ReactionModel)
    try {
      // open output file and build the DOM tree
      String dir = System.getProperty("RMG.workingDirectory");
      String filename = "chemkin/reactorOutput.xml";
      File inputFile = new File(filename);

      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      factory.setValidating(true); // validate the document with the DTD
      factory.setIgnoringElementContentWhitespace(true); // ignore whitespace
      DocumentBuilder builder = factory.newDocumentBuilder();
      Document doc = builder.parse(inputFile);

      // get root element and its children
      Element root = doc.getDocumentElement();
      NodeList rootchildren = root.getChildNodes();

      // header is rootchildren.item(0)

      // get return message and check for successful run
      Element returnmessageElement = (Element) rootchildren.item(1);
      Text returnmessageText = (Text) returnmessageElement.getFirstChild();
      String returnmessage = returnmessageText.toString();
      returnmessage = returnmessage.trim();
      if (!returnmessage.contains("SUCCESSFULLY COMPLETED RUN.")) {
        System.out.println("External reactor model failed!");
        System.out.println("Reactor model error message: " + returnmessage);
        System.exit(0);
      }

      // get outputvalues element and its children
      Element outputvaluesElement = (Element) rootchildren.item(2);
      NodeList children = outputvaluesElement.getChildNodes();

      // get time
      Element timeElement = (Element) children.item(0);
      Text timeText = (Text) timeElement.getFirstChild();
      double time = Double.parseDouble(timeText.getData());
      String timeUnits = timeElement.getAttribute("units");

      // get systemstate element and its children
      Element systemstateElement = (Element) children.item(1);
      NodeList states = systemstateElement.getChildNodes();

      // get temperature and its units
      Element temperatureElement = (Element) states.item(0);
      String tempUnits = temperatureElement.getAttribute("units");
      Text temperatureText = (Text) temperatureElement.getFirstChild();
      double temp = Double.parseDouble(temperatureText.getData());
      Temperature T = new Temperature(temp, tempUnits);

      // get pressure and its units
      Element pressureElement = (Element) states.item(1);
      String presUnits = pressureElement.getAttribute("units");
      Text pressureText = (Text) pressureElement.getFirstChild();
      double pres = Double.parseDouble(pressureText.getData());
      Pressure P = new Pressure(pres, presUnits);

      // get species amounts (e.g. concentrations)
      ArrayList speciesIDs = new ArrayList();
      ArrayList amounts = new ArrayList();
      ArrayList fluxes = new ArrayList();
      String amountUnits = null;
      String fluxUnits = null;

      // loop thru all the species
      // begin at i=2, since T and P take already the first two position of states
      int nSpe = (states.getLength() - 2) / 2;
      int index = 0;
      LinkedHashMap inertGas = new LinkedHashMap();
      for (int i = 2; i < nSpe + 2; i++) {
        // get amount element and the units
        Element amountElement = (Element) states.item(i);
        amountUnits = amountElement.getAttribute("units");

        Element fluxElement = (Element) states.item(i + nSpe);
        fluxUnits = fluxElement.getAttribute("units");

        // get speciesid and store in an array list
        String thisSpeciesID = amountElement.getAttribute("speciesid");

        // get amount (e.g. concentraion) and store in an array list
        Text amountText = (Text) amountElement.getFirstChild();
        double thisAmount = Double.parseDouble(amountText.getData());
        if (thisAmount < 0) {
          double aTol = ReactionModelGenerator.getAtol();
          // if (Math.abs(thisAmount) < aTol) thisAmount = 0;
          // else throw new NegativeConcentrationException("Negative concentration in
          // reactorOutput.xml: " + thisSpeciesID);
          if (thisAmount < -100.0 * aTol)
            throw new NegativeConcentrationException(
                "Species "
                    + thisSpeciesID
                    + " has negative concentration: "
                    + String.valueOf(thisAmount));
        }

        // get amount (e.g. concentraion) and store in an array list
        Text fluxText = (Text) fluxElement.getFirstChild();
        double thisFlux = Double.parseDouble(fluxText.getData());

        if (thisSpeciesID.compareToIgnoreCase("N2") == 0
            || thisSpeciesID.compareToIgnoreCase("Ne") == 0
            || thisSpeciesID.compareToIgnoreCase("Ar") == 0) {
          inertGas.put(thisSpeciesID, new Double(thisAmount));
        } else {
          speciesIDs.add(index, thisSpeciesID);
          amounts.add(index, new Double(thisAmount));
          fluxes.add(index, new Double(thisFlux));
          index++;
        }
      }

      // print results for debugging purposes
      /**
       * System.out.println(returnmessage); System.out.println("Temp = " + temp + " " + tempUnits);
       * System.out.println("Pres = " + pres + " " + presUnits); for (int i = 0; i < amounts.size();
       * i++) { System.out.println(speciesIDs.get(i) + " " + amounts.get(i) + " " + amountUnits); }
       */
      ReactionTime rt = new ReactionTime(time, timeUnits);
      LinkedHashMap speStatus = generateSpeciesStatus(p_reactionModel, speciesIDs, amounts, fluxes);
      SystemSnapshot ss = new SystemSnapshot(rt, speStatus, T, P);
      ss.inertGas = inertGas;
      return ss;
    } catch (Exception e) {
      System.out.println("Error reading reactor model output: " + e.getMessage());
      System.exit(0);
      return null;
    }

    // #]
  }
예제 #23
0
  /** Delete the dst files/dirs which do not exist in src */
  private static void deleteNonexisting(
      FileSystem dstfs,
      FileStatus dstroot,
      Path dstsorted,
      FileSystem jobfs,
      Path jobdir,
      JobConf jobconf,
      Configuration conf)
      throws IOException {
    if (!dstroot.isDir()) {
      throw new IOException(
          "dst must be a directory when option "
              + Options.DELETE.cmd
              + " is set, but dst (= "
              + dstroot.getPath()
              + ") is not a directory.");
    }

    // write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer =
        SequenceFile.createWriter(
            jobfs,
            jobconf,
            dstlsr,
            Text.class,
            FileStatus.class,
            SequenceFile.CompressionType.NONE);
    try {
      // do lsr to get all file statuses in dstroot
      final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
      for (lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
        final FileStatus status = lsrstack.pop();
        if (status.isDir()) {
          for (FileStatus child : dstfs.listStatus(status.getPath())) {
            String relative = makeRelative(dstroot.getPath(), child.getPath());
            writer.append(new Text(relative), child);
            lsrstack.push(child);
          }
        }
      }
    } finally {
      checkAndClose(writer);
    }

    // sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter =
        new SequenceFile.Sorter(
            jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    // compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
      lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
      dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

      // compare sorted lsr list and sorted dst list
      final Text lsrpath = new Text();
      final FileStatus lsrstatus = new FileStatus();
      final Text dstpath = new Text();
      final Text dstfrom = new Text();
      final FsShell shell = new FsShell(conf);
      final String[] shellargs = {"-rmr", null};

      boolean hasnext = dstin.next(dstpath, dstfrom);
      for (; lsrin.next(lsrpath, lsrstatus); ) {
        int dst_cmp_lsr = dstpath.compareTo(lsrpath);
        for (; hasnext && dst_cmp_lsr < 0; ) {
          hasnext = dstin.next(dstpath, dstfrom);
          dst_cmp_lsr = dstpath.compareTo(lsrpath);
        }

        if (dst_cmp_lsr == 0) {
          // lsrpath exists in dst, skip it
          hasnext = dstin.next(dstpath, dstfrom);
        } else {
          // lsrpath does not exist, delete it
          String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
          if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
            shellargs[1] = s;
            int r = 0;
            try {
              r = shell.run(shellargs);
            } catch (Exception e) {
              throw new IOException("Exception from shell.", e);
            }
            if (r != 0) {
              throw new IOException(
                  "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
            }
          }
        }
      }
    } finally {
      checkAndClose(lsrin);
      checkAndClose(dstin);
    }
  }
예제 #24
0
    // Video splts are converted to target format here...
    public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
      try {
        Configuration config = new Configuration();
        FileSystem hdfs = FileSystem.get(config);

        String st = value.toString();
        st = st.trim();
        System.out.println("job2:mapInp:-" + st);
        String[] fmt = st.split(" #!# \\*");
        String[] lst = fmt[0].split(" #!# ");

        String out = "", dlt = "";
        int flag = 1;
        for (String st1 : lst) {

          Pattern x = Pattern.compile("(.*)/(.*)");
          Matcher xm = x.matcher(st1);
          String prefixPath = "", fnm = "", inpExt = "";
          while (xm.find()) {
            prefixPath = xm.group(1);
            fnm = xm.group(2);
          }
          String[] tmpArr = fnm.split("\\.");
          fnm = tmpArr[0];
          inpExt = tmpArr[1];
          hdfs.copyToLocalFile(true, new Path(st1), new Path("/home/" + fnm + "." + inpExt));
          String fname = "/home/" + fnm;
          if (flag == 1) {
            flag = 0;
            out += prefixPath + "/" + fnm + "." + fmt[1];
          } else {
            out += " #!# " + prefixPath + "/" + fnm + "." + fmt[1];
          }

          if (fmt[1].equals("mpg") || fmt[1].equals("mpeg") || fmt[1].equals("mp4")) {

            Process p =
                Runtime.getRuntime()
                    .exec(
                        "mencoder -of mpeg -ovc lavc -lavcopts vcodec=mpeg1video -oac copy "
                            + "/home/"
                            + fnm
                            + "."
                            + inpExt
                            + " -o "
                            + fname
                            + "."
                            + fmt[1]);

            String ls_str = "";
            DataInputStream ls_in = new DataInputStream(p.getInputStream());
            while ((ls_str = ls_in.readLine()) != null) {}

            p.destroy();
            dlt += " /home/" + fnm + "." + inpExt;
          } else if (fmt[1].equals("avi")) {

            Process p =
                Runtime.getRuntime()
                    .exec(
                        "mencoder -ovc lavc -oac mp3lame -o "
                            + fname
                            + "."
                            + fmt[1]
                            + " "
                            + "/home/"
                            + fnm
                            + "."
                            + inpExt);

            String ls_str = "";
            DataInputStream ls_in = new DataInputStream(p.getInputStream());
            while ((ls_str = ls_in.readLine()) != null) {}

            p.destroy();
            dlt += " /home/" + fnm + "." + inpExt;
          } else {
            // TBD
            System.out.println("Unsupported target format!!!!!");
          }
          hdfs.copyFromLocalFile(
              true,
              true,
              new Path(fname + "." + fmt[1]),
              new Path(prefixPath + "/" + fnm + "." + fmt[1]));
        }

        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt}; // delete the files after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();

        System.out.println("Job2 mapOut:" + out);
        context.write(new Text(lst[0]), new Text(out));
        System.out.println(out);
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }
예제 #25
0
    public void map(
        WritableComparable<?> key,
        Text value,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      String url = value.toString(); // value is line of text

      if (url != null && url.trim().startsWith("#")) {
        /* Ignore line that start with # */
        return;
      }

      // if tabs : metadata that could be stored
      // must be name=value and separated by \t
      float customScore = -1f;
      int customInterval = interval;
      int fixedInterval = -1;
      Map<String, String> metadata = new TreeMap<String, String>();
      if (url.indexOf("\t") != -1) {
        String[] splits = url.split("\t");
        url = splits[0];
        for (int s = 1; s < splits.length; s++) {
          // find separation between name and value
          int indexEquals = splits[s].indexOf("=");
          if (indexEquals == -1) {
            // skip anything without a =
            continue;
          }
          String metaname = splits[s].substring(0, indexEquals);
          String metavalue = splits[s].substring(indexEquals + 1);
          if (metaname.equals(nutchScoreMDName)) {
            try {
              customScore = Float.parseFloat(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else if (metaname.equals(nutchFetchIntervalMDName)) {
            try {
              customInterval = Integer.parseInt(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else if (metaname.equals(nutchFixedFetchIntervalMDName)) {
            try {
              fixedInterval = Integer.parseInt(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else metadata.put(metaname, metavalue);
        }
      }
      try {
        url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);
        url = filters.filter(url); // filter the url
      } catch (Exception e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Skipping " + url + ":" + e);
        }
        url = null;
      }
      if (url == null) {
        reporter.getCounter("injector", "urls_filtered").increment(1);
      } else { // if it passes
        value.set(url); // collect it
        CrawlDatum datum = new CrawlDatum();
        datum.setStatus(CrawlDatum.STATUS_INJECTED);

        // Is interval custom? Then set as meta data
        if (fixedInterval > -1) {
          // Set writable using float. Flaot is used by AdaptiveFetchSchedule
          datum
              .getMetaData()
              .put(Nutch.WRITABLE_FIXED_INTERVAL_KEY, new FloatWritable(fixedInterval));
          datum.setFetchInterval(fixedInterval);
        } else {
          datum.setFetchInterval(customInterval);
        }

        datum.setFetchTime(curTime);
        // now add the metadata
        Iterator<String> keysIter = metadata.keySet().iterator();
        while (keysIter.hasNext()) {
          String keymd = keysIter.next();
          String valuemd = metadata.get(keymd);
          datum.getMetaData().put(new Text(keymd), new Text(valuemd));
        }
        if (customScore != -1) datum.setScore(customScore);
        else datum.setScore(scoreInjected);
        try {
          scfilters.injectedScore(value, datum);
        } catch (ScoringFilterException e) {
          if (LOG.isWarnEnabled()) {
            LOG.warn(
                "Cannot filter injected score for url "
                    + url
                    + ", using default ("
                    + e.getMessage()
                    + ")");
          }
        }
        reporter.getCounter("injector", "urls_injected").increment(1);
        output.collect(value, datum);
      }
    }
예제 #26
0
 public void write(DataOutput out) throws IOException {
   out.writeUTF(leftBigram.toString());
   out.writeUTF(rightBigram.toString());
 }
예제 #27
0
    // merge the converted files here
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      System.out.println("I'm in Job2 reduce");

      Configuration config = new Configuration();
      FileSystem hdfs = FileSystem.get(config);
      try {
        String out = "";
        for (Text t : values) {
          out = t.toString();
          out = out.trim();
          System.out.println("job2:redInp:-" + out);
          break;
        }
        String[] outl = out.split(" #!# ");

        Pattern x = Pattern.compile("(.*)/(.*)\\.(.*)");
        Matcher xm = x.matcher(outl[0]);
        String prefixPath = "", fnm = "", ext = "";
        while (xm.find()) {
          prefixPath = xm.group(1);
          fnm = xm.group(2);
          ext = xm.group(3);
        }
        String foutname = fnm.split("_")[0];
        foutname += "." + ext;
        String query = "mencoder -oac copy -ovc copy";
        int cnt = 0;
        for (String st : outl) {
          cnt++;
          hdfs.copyToLocalFile(
              true,
              new Path(st),
              new Path("/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext));
          query += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext;
        }
        query += " -o " + "/home/" + foutname;
        Process p2 =
            Runtime.getRuntime().exec(query); // query for merging the video files is executed here
        String ls_str = "";
        DataInputStream ls_in = new DataInputStream(p2.getInputStream());
        while ((ls_str = ls_in.readLine()) != null) {}
        p2.destroy();
        hdfs.copyFromLocalFile(
            true, true, new Path("/home/" + foutname), new Path(prefixPath + "/" + foutname));
        cnt = 0;
        String dlt1 = "";
        for (String st3 : outl) {
          cnt++;
          dlt1 += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext;
        }
        Runtime rt1 = Runtime.getRuntime();
        String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt1}; // delete the files after use
        Process pr1 = rt1.exec(cmd1);
        pr1.waitFor();
        context.write(new Text(""), new Text(prefixPath + "/" + foutname));
      } catch (IOException e) {
        System.out.println("exception happened - here's what I know: ");
        e.printStackTrace();
        System.exit(-1);
      }
    }