Java ColumnMetadata примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.freeeed.main

Класс/Тип: ColumnMetadata

Примеров на hotexamples.com: 3

Java ColumnMetadata - 3 примера найдено. Это лучшие примеры Java кода для org.freeeed.main.ColumnMetadata, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

delimiterSeparatedHeaders(2)

addMetadata(1)

addMetadataValue(1)

delimiterSeparatedValues(1)

reinit(1)

setAllMetadata(1)

setFieldSeparator(1)

Пример #1

Показать файл

Файл: FreeEedReducer.java Проект: McDeCoderDude/FreeEed

  @Override
  @SuppressWarnings("unchecked")
  protected void setup(Reducer.Context context) throws IOException, InterruptedException {
    this.context = context;
    String settingsStr = context.getConfiguration().get(ParameterProcessing.SETTINGS_STR);
    Settings settings = Settings.loadFromString(settingsStr);
    Settings.setSettings(settings);

    String projectStr = context.getConfiguration().get(ParameterProcessing.PROJECT);
    Project project = Project.loadFromString(projectStr);
    if (project.isEnvHadoop()) {
      String metadataFileContents =
          context.getConfiguration().get(ParameterProcessing.METADATA_FILE);
      new File(ColumnMetadata.metadataNamesFile).getParentFile().mkdirs();
      Files.write(metadataFileContents.getBytes(), new File(ColumnMetadata.metadataNamesFile));
    }
    columnMetadata = new ColumnMetadata();
    String fileSeparatorStr = project.getFieldSeparator();
    char fieldSeparatorChar = Delimiter.getDelim(fileSeparatorStr);
    columnMetadata.setFieldSeparator(String.valueOf(fieldSeparatorChar));
    columnMetadata.setAllMetadata(project.getMetadataCollect());
    // write standard metadata fields
    context.write(null, new Text(columnMetadata.delimiterSeparatedHeaders()));
    zipFileWriter.setup();
    zipFileWriter.openZipForWriting();

    luceneIndex = new LuceneIndex(settings.getLuceneIndexDir(), project.getProjectCode(), null);
    luceneIndex.init();
  }

Пример #2

Показать файл

Файл: FreeEedReducer.java Проект: McDeCoderDude/FreeEed

  @Override
  @SuppressWarnings("unchecked")
  protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
    if (!Project.getProject().isMetadataCollectStandard()) {
      // write summary headers with all metadata, but for standard metadata don't write the last
      // line
      context.write(new Text("Hash"), new Text(columnMetadata.delimiterSeparatedHeaders()));
    }
    zipFileWriter.closeZip();

    if (Project.getProject().isLuceneIndexEnabled()) {
      mergeLuceneIndex();
    }

    Project project = Project.getProject();
    if (project.isEnvHadoop()) {
      String outputPath = Project.getProject().getProperty(ParameterProcessing.OUTPUT_DIR_HADOOP);
      String zipFileName = zipFileWriter.getZipFileName();
      if (project.isFsHdfs()) {
        String cmd =
            "hadoop fs -copyFromLocal "
                + zipFileName
                + " "
                + outputPath
                + File.separator
                + context.getTaskAttemptID()
                + ".zip";
        OsUtil.runCommand(cmd);
      } else if (project.isFsS3()) {
        S3Agent s3agent = new S3Agent();
        String run = project.getRun();
        if (!run.isEmpty()) {
          run = run + "/";
        }
        String s3key =
            project.getProjectCode()
                + File.separator
                + "output/"
                + run
                + "results/"
                + context.getTaskAttemptID()
                + ".zip";
        // Keep updating the hadoop progress
        int refreshInterval = 60000;
        Timer timer = new Timer(refreshInterval, this);
        timer.start();
        s3agent.putFileInS3(zipFileName, s3key);
        timer.stop();
      }
    }
    Stats.getInstance().setJobFinished();
  }

Пример #3

Показать файл

Файл: FreeEedReducer.java Проект: McDeCoderDude/FreeEed

  protected void processMap(MapWritable value) throws IOException, InterruptedException {
    columnMetadata.reinit();
    ++outputFileCount;
    DocumentMetadata allMetadata = getAllMetadata(value);
    Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount);
    columnMetadata.addMetadata(standardMetadata);
    columnMetadata.addMetadata(allMetadata);
    // documents other than the first one in this loop are either duplicates or attachments
    if (first) {
      masterOutputFileCount = outputFileCount;
    } else {
      if (allMetadata.hasParent()) {
        columnMetadata.addMetadataValue(
            DocumentMetadataKeys.ATTACHMENT_PARENT, UPIFormat.format(masterOutputFileCount));
      } else {
        columnMetadata.addMetadataValue(
            DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(masterOutputFileCount));
      }
    }

    // String uniqueId = allMetadata.getUniqueId();

    String originalFileName =
        new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
    // add the text to the text folder
    String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT);
    String textEntryName =
        ParameterProcessing.TEXT
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + originalFileName
            + ".txt";
    if (textEntryName != null) {
      zipFileWriter.addTextFile(textEntryName, documentText);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName);
    // add the native file to the native folder
    String nativeEntryName =
        ParameterProcessing.NATIVE
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + originalFileName;
    BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE));
    if (bytesWritable != null) { // some large exception files are not passed
      zipFileWriter.addBinaryFile(
          nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
      logger.trace("Processing file: {}", nativeEntryName);
    }
    columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName);
    // add the pdf made from native to the PDF folder
    String pdfNativeEntryName =
        ParameterProcessing.PDF_FOLDER
            + "/"
            + UPIFormat.format(outputFileCount)
            + "_"
            + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName()
            + ".pdf";
    BytesWritable pdfBytesWritable =
        (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF));
    if (pdfBytesWritable != null) {
      zipFileWriter.addBinaryFile(
          pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength());
      logger.trace("Processing file: {}", pdfNativeEntryName);
    }

    processHtmlContent(value, allMetadata, UPIFormat.format(outputFileCount));

    // add exception to the exception folder
    String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION);
    if (exception != null) {
      String exceptionEntryName =
          "exception/"
              + UPIFormat.format(outputFileCount)
              + "_"
              + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName();
      if (bytesWritable != null) {
        zipFileWriter.addBinaryFile(
            exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength());
      }
      columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName);
    }
    // write this all to the reduce map
    // context.write(new Text(outputKey), new Text(columnMetadata.delimiterSeparatedValues()));
    // drop the key altogether, because it messes up the format - but put it in the value
    // TODO use NullWritable
    if (OsUtil.isNix()) {
      context.write(null, new Text(columnMetadata.delimiterSeparatedValues()));
    }
    // prepare for the next file with the same key, if there is any
    first = false;
  }