예제 #1
     * look for a tag whose text is getStartTag() then read until it closes
     * @return true if there is data
     * @throws java.io.IOException
    public boolean nextKeyValue() throws IOException {
      String current = m_Sb.toString();
      if (current.contains("<scan num=\"67\"")) current = m_Sb.toString(); // break here

      if (readFromCurrentBuffer()) return true;
      int newSize;
      if (m_Current > m_End) { // we are the the end of the split
        m_Key = null;
        m_Value = null;
        return false;

      newSize = m_Input.read(m_Buffer);

      while (newSize > 0) {
        m_Current += newSize;
        String read = new String(m_Buffer, 0, newSize);
        if (readFromCurrentBuffer()) return true;
        if (m_Current > m_End) { // we are the the end of the split
          String s = m_Sb.toString();
          if (bufferHasStartTag() == -1) { // not working on a tag
            m_Key = null;
            m_Value = null;
            return false;
          if (m_Sb.length() > getMaxTagLength()) {
            m_Key = null;
            m_Value = null;
            return false;

        newSize = m_Input.read(m_Buffer);
      // exit because we are at the m_End
      if (newSize <= 0) {
        m_Key = null;
        m_Value = null;
        return false;
      if (m_Current > m_End) { // we are the the end of the split
        m_Key = null;
        m_Value = null;
        return false;

      return true;
예제 #2
 public void map(
     Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   // id color
   // id color 1/0 "COLOR"
   String[] tokens = value.toString().split("\\s+");
   IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0]));
   StringBuilder sb = new StringBuilder();
   for (int i = 1; i < tokens.length; i++) {
     if (sb.length() != 0) sb.append(" ");
   output.collect(SourceId, new Text(sb.toString()));
예제 #3
    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      m_Current = m_Start;
      m_Key = split.getPath().getName();
예제 #4
 protected int bufferHasStartTag() {
   String startText = m_Sb.toString();
   String startTag = getStartTag() + " ";
   String startTag2 = getStartTag() + ">";
   int index = startText.indexOf(startTag);
   if (index > -1) return index;
   index = startText.indexOf(startTag2);
   if (index > -1) return index;
   return -1;
예제 #5
    protected boolean readFromCurrentBuffer() {
      String endTag = getEndTag();
      String startText = m_Sb.toString();
      if (!startText.contains(endTag)) return false; // need more read
      int index = bufferHasStartTag();
      if (index == -1) return false;
      startText = startText.substring(index);

      String s = m_Sb.toString();
      index = s.indexOf(endTag);
      if (index == -1) return false; // need more read
      // throw new IllegalStateException("unmatched tag " + getBaseTag());
      index += endTag.length();
      m_Value = s.substring(0, index).trim();

      // keep the remaining text to add to the next tag
      String rest = s.substring(index);
      return true;
예제 #6
    public void map(
        Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
        throws IOException {

      // vid neighbors_num n1 n2 ...
      // vid color 1/0 "COLOR"
      String str = value.toString();
      if (str.endsWith(COLOR)) {
        // color table
        String[] tokens = str.substring(0, str.length() - 5).split("\\s+");
        int change = Integer.parseInt(tokens[2]);
        if (change == 1) {
          IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0]));
          StringBuilder sb = new StringBuilder();
          sb.append(" ");
          output.collect(SourceId, new Text(sb.toString()));
      } else {
        // edge table
        String[] tokens = value.toString().split("\\s+");
        IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0]));
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i < tokens.length; i++) {
          if (sb.length() != 0) sb.append(" ");
        output.collect(SourceId, new Text(sb.toString()));