@SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance( taggedInputSplit.getInputFormatClass(), context.getConfiguration()); originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { InputFormat indirIF = (InputFormat) ReflectionUtils.newInstance( job.getClass( org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT, SequenceFileInputFormat.class), job); IndirectSplit is = ((IndirectSplit) split); return indirIF.getRecordReader( new FileSplit(is.getPath(), 0, is.getLength(), (String[]) null), job, reporter); }
@SuppressWarnings("unchecked") private <T extends InputSplit> int writeNewSplits(JobContext job, Path jobSubmitDir) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); T[] array = (T[]) splits.toArray(new InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(array, new SplitComparator()); JobSplitWriter.createSplitFiles(jobSubmitDir, conf, jobSubmitDir.getFileSystem(conf), array); //// num of split. the same as num of maps return array.length; }
public void setData(String text) { if (text != null && text.length() > 0) { InputStream in = null; try { Object result = null; Drawing drawing = createDrawing(); // Try to read the data using all known input formats. for (InputFormat fmt : drawing.getInputFormats()) { try { fmt.read(in, drawing); in = new ByteArrayInputStream(text.getBytes("UTF8")); result = drawing; break; } catch (IOException e) { result = e; } } if (result instanceof IOException) { throw (IOException) result; } setDrawing(drawing); } catch (Throwable e) { getDrawing().removeAllChildren(); SVGTextFigure tf = new SVGTextFigure(); tf.setText(e.getMessage()); tf.setBounds(new Point2D.Double(10, 10), new Point2D.Double(100, 100)); getDrawing().add(tf); e.printStackTrace(); } finally { if (in != null) { try { in.close(); } catch (IOException ex) { ex.printStackTrace(); } } } } }
/** * adds (or replaces) a named graph to the repository * * @param graphName * @param graphData * @param format * @return the HTML string returned for the server - could be made prettier. * @throws MalformedURLException * @throws ProtocolException * @throws IOException */ public String add(String graphName, String graphData, InputFormat format) throws MalformedURLException, ProtocolException, IOException { HttpURLConnection connection = (HttpURLConnection) new URL(dataURL + graphName).openConnection(); connection.setDoOutput(true); connection.setDoInput(true); connection.setRequestMethod("PUT"); connection.setRequestProperty("Content-Type", format.getMimeType()); inFormat = format; DataOutputStream ps = new DataOutputStream(connection.getOutputStream()); ps.writeBytes(graphData); ps.flush(); ps.close(); return readResponse(connection); }
/** * Appends the given graph to a named graph * * @param graphName - name of the graph to append to * @param graphData - the graph to append * @param format - the format of the graph to append * @return the HTML string returned for the server - could be made prettier. * @throws MalformedURLException * @throws ProtocolException * @throws IOException */ public String append(String graphName, String graphData, InputFormat format) throws MalformedURLException, ProtocolException, IOException { HttpURLConnection connection = (HttpURLConnection) dataURL.openConnection(); connection.setDoOutput(true); connection.setDoInput(true); connection.setRequestMethod("POST"); connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); DataOutputStream ps = new DataOutputStream(connection.getOutputStream()); ps.writeBytes( "mime-type=" + format.getMimeType() + "&graph=" + URLEncoder.encode(graphName, "UTF-8") + "&data=" + URLEncoder.encode(graphData, "UTF-8")); ps.flush(); ps.close(); return readResponse(connection); }
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); // LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; // LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); // LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + // key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } // LOG.info("splits["+j+"]="+splits[j]+" count=" + // count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
@Test(expected = MediaTypeNotSupportedException.class) public void canNotGetInputFormatBasedOnWildcardMediaType() throws Exception { InputFormat format = repository.inputFormat(MediaType.WILDCARD_TYPE); format.readValue("foo"); fail("Got InputFormat based on wild card type: " + format); }