Esempio n. 1
0
 @SuppressWarnings("unchecked")
 public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
     throws IOException, InterruptedException {
   // Find the InputFormat and then the RecordReader from the TaggedInputSplit.
   TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
   InputFormat<K, V> inputFormat =
       (InputFormat<K, V>)
           ReflectionUtils.newInstance(
               taggedInputSplit.getInputFormatClass(), context.getConfiguration());
   originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
 }
 public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter)
     throws IOException {
   InputFormat indirIF =
       (InputFormat)
           ReflectionUtils.newInstance(
               job.getClass(
                   org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
                   SequenceFileInputFormat.class),
               job);
   IndirectSplit is = ((IndirectSplit) split);
   return indirIF.getRecordReader(
       new FileSplit(is.getPath(), 0, is.getLength(), (String[]) null), job, reporter);
 }
Esempio n. 3
0
  @SuppressWarnings("unchecked")
  private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir)
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);

    List<InputSplit> splits = input.getSplits(job);
    T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(array, new SplitComparator());
    JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array);
    //// num of split. the same as num of maps
    return array.length;
  }
Esempio n. 4
0
  public void setData(String text) {
    if (text != null && text.length() > 0) {
      InputStream in = null;
      try {
        Object result = null;
        Drawing drawing = createDrawing();
        // Try to read the data using all known input formats.
        for (InputFormat fmt : drawing.getInputFormats()) {
          try {
            fmt.read(in, drawing);
            in = new ByteArrayInputStream(text.getBytes("UTF8"));
            result = drawing;
            break;
          } catch (IOException e) {
            result = e;
          }
        }
        if (result instanceof IOException) {
          throw (IOException) result;
        }

        setDrawing(drawing);
      } catch (Throwable e) {
        getDrawing().removeAllChildren();
        SVGTextFigure tf = new SVGTextFigure();
        tf.setText(e.getMessage());
        tf.setBounds(new Point2D.Double(10, 10), new Point2D.Double(100, 100));
        getDrawing().add(tf);
        e.printStackTrace();
      } finally {
        if (in != null) {
          try {
            in.close();
          } catch (IOException ex) {
            ex.printStackTrace();
          }
        }
      }
    }
  }
Esempio n. 5
0
  /**
   * adds (or replaces) a named graph to the repository
   *
   * @param graphName
   * @param graphData
   * @param format
   * @return the HTML string returned for the server - could be made prettier.
   * @throws MalformedURLException
   * @throws ProtocolException
   * @throws IOException
   */
  public String add(String graphName, String graphData, InputFormat format)
      throws MalformedURLException, ProtocolException, IOException {
    HttpURLConnection connection =
        (HttpURLConnection) new URL(dataURL + graphName).openConnection();

    connection.setDoOutput(true);
    connection.setDoInput(true);
    connection.setRequestMethod("PUT");
    connection.setRequestProperty("Content-Type", format.getMimeType());
    inFormat = format;

    DataOutputStream ps = new DataOutputStream(connection.getOutputStream());
    ps.writeBytes(graphData);
    ps.flush();
    ps.close();

    return readResponse(connection);
  }
Esempio n. 6
0
  /**
   * Appends the given graph to a named graph
   *
   * @param graphName - name of the graph to append to
   * @param graphData - the graph to append
   * @param format - the format of the graph to append
   * @return the HTML string returned for the server - could be made prettier.
   * @throws MalformedURLException
   * @throws ProtocolException
   * @throws IOException
   */
  public String append(String graphName, String graphData, InputFormat format)
      throws MalformedURLException, ProtocolException, IOException {

    HttpURLConnection connection = (HttpURLConnection) dataURL.openConnection();

    connection.setDoOutput(true);
    connection.setDoInput(true);
    connection.setRequestMethod("POST");
    connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");

    DataOutputStream ps = new DataOutputStream(connection.getOutputStream());
    ps.writeBytes(
        "mime-type="
            + format.getMimeType()
            + "&graph="
            + URLEncoder.encode(graphName, "UTF-8")
            + "&data="
            + URLEncoder.encode(graphData, "UTF-8"));
    ps.flush();
    ps.close();

    return readResponse(connection);
  }
  public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;

    int seed = new Random().nextInt();
    // LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

      // LOG.info("creating; entries = " + length);

      // create a file with length entries
      SequenceFile.Writer writer =
          SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
      try {
        for (int i = 0; i < length; i++) {
          IntWritable key = new IntWritable(i);
          byte[] data = new byte[random.nextInt(10)];
          random.nextBytes(data);
          BytesWritable value = new BytesWritable(data);
          writer.append(key, value);
        }
      } finally {
        writer.close();
      }

      // try splitting the file in a variety of sizes
      InputFormat<IntWritable, BytesWritable> format =
          new SequenceFileInputFormat<IntWritable, BytesWritable>();
      IntWritable key = new IntWritable();
      BytesWritable value = new BytesWritable();
      for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
        // LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        // LOG.info("splitting: got =        " + splits.length);

        // check each split
        BitSet bits = new BitSet(length);
        for (int j = 0; j < splits.length; j++) {
          RecordReader<IntWritable, BytesWritable> reader =
              format.getRecordReader(splits[j], job, reporter);
          try {
            int count = 0;
            while (reader.next(key, value)) {
              // if (bits.get(key.get())) {
              // LOG.info("splits["+j+"]="+splits[j]+" : " +
              // key.get());
              // LOG.info("@"+reader.getPos());
              // }
              assertFalse("Key in multiple partitions.", bits.get(key.get()));
              bits.set(key.get());
              count++;
            }
            // LOG.info("splits["+j+"]="+splits[j]+" count=" +
            // count);
          } finally {
            reader.close();
          }
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
      }
    }
  }
 @Test(expected = MediaTypeNotSupportedException.class)
 public void canNotGetInputFormatBasedOnWildcardMediaType() throws Exception {
   InputFormat format = repository.inputFormat(MediaType.WILDCARD_TYPE);
   format.readValue("foo");
   fail("Got InputFormat based on wild card type: " + format);
 }