Java DParseTask.phaseOneInitialize 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: water.parser

클래스/타입: DParseTask

메소드/함수: phaseOneInitialize

hotexamples.com에서의 예제들: 2

Java DParseTask.phaseOneInitialize - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 water.parser.DParseTask.phaseOneInitialize에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

addInvalidCol(4)

pow10i(3)

setColumnNames(2)

phaseOneInitialize(2)

pow10(2)

phaseTwoInitialize(2)

createEnums(2)

calculateColumnEncodings(1)

_colDomains(1)

createValueArrayHeader(1)

fitsIntoInt(1)

newLine(1)

addStrCol(1)

addNumCol(1)

addCol(1)

_phase(1)

_bases(1)

예제 #1

파일 보기

파일: DParseTask.java 프로젝트: patricktoohey/h2o

  /** Sets the column names and creates the array of the enums for each column. */
  public void setColumnNames(String[] colNames) {
    if (_phase == Pass.ONE) {
      assert (colNames != null);
      _colNames = colNames;
      _ncolumns = colNames.length;

      // Initialize the statistics for the XLS parsers. Statistics for CSV
      // parsers are created in the map method - they must be different for
      // each distributed invocation
      if ((_parserType == CustomParser.Type.XLS) || (_parserType == CustomParser.Type.XLSX)) {
        createEnums();
        phaseOneInitialize();
      }
    }
  }

예제 #2

파일 보기

파일: DParseTask.java 프로젝트: patricktoohey/h2o

  /**
   * Map function for distributed parsing of the CSV files.
   *
   * <p>In first phase it calculates the min, max, means, encodings and other statistics about the
   * dataset, determines the number of columns.
   *
   * <p>The second pass then encodes the parsed dataset to the result key, splitting it into equal
   * sized chunks.
   */
  @Override
  public void map(Key key) {
    try {
      Key aryKey = null;
      boolean arraylet = key._kb[0] == Key.ARRAYLET_CHUNK;
      boolean skipFirstLine = _skipFirstLine;
      if (arraylet) {
        aryKey = ValueArray.getArrayKey(key);
        _chunkId = ValueArray.getChunkIndex(key);
        skipFirstLine = skipFirstLine || (ValueArray.getChunkIndex(key) != 0);
      }
      switch (_phase) {
        case ONE:
          assert (_ncolumns != 0);
          // initialize the column statistics
          phaseOneInitialize();
          // perform the parse
          CsvParser p = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p.parse(key);
          if (arraylet) {
            long idx = ValueArray.getChunkIndex(key);
            int idx2 = (int) idx;
            assert idx2 == idx;
            assert (_nrows[idx2] == 0)
                : idx
                    + ": "
                    + Arrays.toString(_nrows)
                    + " ("
                    + _nrows[idx2]
                    + " -- "
                    + _myrows
                    + ")";
            _nrows[idx2] = _myrows;
          }
          break;
        case TWO:
          assert (_ncolumns != 0);
          // initialize statistics - invalid rows, sigma and row size
          phaseTwoInitialize();
          // calculate the first row and the number of rows to parse
          int firstRow = 0;
          int lastRow = _myrows;
          _myrows = 0;
          if (arraylet) {
            long origChunkIdx = ValueArray.getChunkIndex(key);
            firstRow = (origChunkIdx == 0) ? 0 : _nrows[(int) origChunkIdx - 1];
            lastRow = _nrows[(int) origChunkIdx];
          }
          int rowsToParse = lastRow - firstRow;
          // create the output streams
          _outputStreams2 = createRecords(firstRow, rowsToParse);
          assert (_outputStreams2.length > 0);
          _ab = _outputStreams2[0].initialize();
          // perform the second parse pass
          CsvParser p2 = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p2.parse(key);
          // store the last stream if not stored during the parse
          if (_ab != null) _outputStreams2[_outputIdx].store();
          break;
        default:
          assert (false);
      }

      ParseStatus.update(_resultKey, DKV.get(key).length(), _phase);
    } catch (Exception e) {
      e.printStackTrace();
      _error = e.getMessage();
    }
  }