/** * look for a tag whose text is getStartTag() then read until it closes * * @return true if there is data * @throws java.io.IOException */ public boolean nextKeyValue() throws IOException { String current = m_Sb.toString(); if (current.contains("<scan num=\"67\"")) current = m_Sb.toString(); // break here if (readFromCurrentBuffer()) return true; int newSize; if (m_Current > m_End) { // we are the the end of the split m_Key = null; m_Value = null; m_Sb.setLength(0); return false; } newSize = m_Input.read(m_Buffer); while (newSize > 0) { m_Current += newSize; String read = new String(m_Buffer, 0, newSize); m_Sb.append(read); if (readFromCurrentBuffer()) return true; if (m_Current > m_End) { // we are the the end of the split String s = m_Sb.toString(); if (bufferHasStartTag() == -1) { // not working on a tag m_Key = null; m_Value = null; m_Sb.setLength(0); return false; } if (m_Sb.length() > getMaxTagLength()) { m_Key = null; m_Value = null; m_Sb.setLength(0); return false; } } newSize = m_Input.read(m_Buffer); } // exit because we are at the m_End if (newSize <= 0) { m_Key = null; m_Value = null; m_Sb.setLength(0); return false; } if (m_Current > m_End) { // we are the the end of the split m_Key = null; m_Value = null; m_Sb.setLength(0); return false; } return true; }
public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // id color // id color 1/0 "COLOR" String[] tokens = value.toString().split("\\s+"); IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); for (int i = 1; i < tokens.length; i++) { if (sb.length() != 0) sb.append(" "); sb.append(tokens[i]); } output.collect(SourceId, new Text(sb.toString())); }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
protected int bufferHasStartTag() { String startText = m_Sb.toString(); String startTag = getStartTag() + " "; String startTag2 = getStartTag() + ">"; int index = startText.indexOf(startTag); if (index > -1) return index; index = startText.indexOf(startTag2); if (index > -1) return index; return -1; }
protected boolean readFromCurrentBuffer() { String endTag = getEndTag(); String startText = m_Sb.toString(); if (!startText.contains(endTag)) return false; // need more read int index = bufferHasStartTag(); if (index == -1) return false; startText = startText.substring(index); m_Sb.setLength(0); m_Sb.append(startText); String s = m_Sb.toString(); ; index = s.indexOf(endTag); if (index == -1) return false; // need more read // throw new IllegalStateException("unmatched tag " + getBaseTag()); index += endTag.length(); m_Value = s.substring(0, index).trim(); // keep the remaining text to add to the next tag m_Sb.setLength(0); String rest = s.substring(index); m_Sb.append(rest); return true; }
public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // vid neighbors_num n1 n2 ... // vid color 1/0 "COLOR" String str = value.toString(); if (str.endsWith(COLOR)) { // color table String[] tokens = str.substring(0, str.length() - 5).split("\\s+"); int change = Integer.parseInt(tokens[2]); if (change == 1) { IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); sb.append(tokens[1]); sb.append(" "); sb.append(tokens[2]); sb.append(COLOR); output.collect(SourceId, new Text(sb.toString())); } } else { // edge table String[] tokens = value.toString().split("\\s+"); IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); for (int i = 1; i < tokens.length; i++) { if (sb.length() != 0) sb.append(" "); sb.append(tokens[i]); } output.collect(SourceId, new Text(sb.toString())); } }