Java RawDocument Examples

Programming Language: Java

Namespace/Package Name: net.sf.okapi.common.resource

Class/Type: RawDocument

Examples at hotexamples.com: 3

Java RawDocument - 3 examples found. These are the top rated real world Java examples of net.sf.okapi.common.resource.RawDocument extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

setFilterConfigId(2)

createOutputFile(1)

finalizeOutput(1)

getEncoding(1)

getReader(1)

getSourceLocale(1)

getStream(1)

getTargetLocale(1)

setEncoding(1)

Example #1

Show file

File: CtsApp.java Project: barunhalderkolkata/bk_Hulk

  private void addRawDocumentsIntoPipeline(LocaleId locale) {
    String.format("adding RawDocuments (locale: [%s]) to pipeline", locale);

    for (L10nFile f : project.getFiles()) {

      RawDocument rd = null;
      if (!project.createTM) {
        rd = new RawDocument(f.getURI(), f.getEncoding(), project.sourceLocale, locale);
      } else {
        rd = new RawDocument(f.getURI(locale), f.getEncoding(locale), locale, locale);
      }
      rd.setFilterConfigId(f.getFilterConfigurationId());

      URI targetUri;
      if (!project.createTM) targetUri = new File(f.getLocalXliffPath(locale)).toURI();
      else {
        targetUri =
            new File(f.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale)).toURI();
      }

      driver.addBatchItem(rd, targetUri, f.getEncoding(locale));

      projetLogger.debug(
          String.format(
              "[%s] added to the pipeline driver (target uri: [%s])",
              f.getURI(), targetUri.toString()));
    }
  }

Example #2

Show file

File: CtsApp.java Project: barunhalderkolkata/bk_Hulk

  public void extractTranslatedFilesFromXliffs() {

    IPipelineDriver driverX = new PipelineDriver();
    driverX.setFilterConfigurationMapper(fcMapper);

    // L10nFile file = files.get(0);
    // LocaleId locale = LocaleId.FRENCH;

    for (L10nFile file : project.getFiles()) {
      for (LocaleId locale : project.getTargetLocales()) {

        // represents source document (e.g. file.resx)
        RawDocument originalDoc =
            new RawDocument(
                new File(file.getLocalPath()).toURI(),
                file.getEncoding(),
                project.sourceLocale,
                locale);
        originalDoc.setFilterConfigId(file.getFilterConfigurationId());

        // reprosents translated xliff file
        RawDocument xliffDoc =
            new RawDocument(
                new File(file.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale))
                    .toURI(),
                "UTF-8",
                project.sourceLocale,
                locale);

        BatchItemContext bic =
            new BatchItemContext(
                xliffDoc,
                Util.toURI(file.getLocalPath(locale)),
                file.getEncoding(locale),
                originalDoc);
        driverX.addBatchItem(bic);
      }
    }

    driverX.addStep(new OriginalDocumentXliffMergerStep());
    driverX.addStep(new RawDocumentWriterStep());

    project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffStarted);
    driverX.processBatch();
    project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffFinished);

    driverX.destroy();
  }

Example #3

Show file

File: EncodingConversionStep.java Project: asgeirf/okapi

  @Override
  protected Event handleRawDocument(Event event) {
    RawDocument rawDoc = (RawDocument) event.getResource();
    BufferedReader reader = null;
    OutputStreamWriter writer = null;
    try {
      // Try to detect the type of file from extension
      isXML = false;
      isHTML = false;
      String ext = Util.getExtension(inputURI.getPath());
      if (!Util.isEmpty(ext)) {
        isHTML = (ext.toLowerCase().indexOf(".htm") == 0);
        isXML = ext.equalsIgnoreCase(".xml");
      }

      // === Try to detect the encoding

      InputStream is = rawDoc.getStream();
      // First: guess from a possible BOM
      BOMNewlineEncodingDetector detector =
          new BOMNewlineEncodingDetector(is, rawDoc.getEncoding());
      detector.detectAndRemoveBom();
      rawDoc.setEncoding(detector.getEncoding());

      String inputEncoding = rawDoc.getEncoding();
      // Then try internal detection for XML/HTML type files
      if (!detector.isAutodetected()) {
        reader = new BufferedReader(rawDoc.getReader());
        reader.read(buffer);
        String detectedEncoding = checkDeclaration(inputEncoding);
        if (!detectedEncoding.equalsIgnoreCase(inputEncoding)) {
          inputEncoding = detectedEncoding;
        }
        reader.close();
      }

      // Open the input document
      // TODO: Where did we reset the reader - can't call this twice unless we reset it
      reader = new BufferedReader(rawDoc.getReader());
      logger.info("Input encoding: " + inputEncoding);

      // Open the output document
      File outFile;
      if (isLastOutputStep()) {
        outFile = rawDoc.createOutputFile(outputURI);
      } else {
        try {
          outFile = File.createTempFile("okp-enc_", ".tmp");
        } catch (Throwable e) {
          throw new OkapiIOException("Cannot create temporary output.", e);
        }
        outFile.deleteOnExit();
      }
      writer =
          new OutputStreamWriter(
              new BufferedOutputStream(new FileOutputStream(outFile)), outputEncoding);
      outputEncoder = Charset.forName(outputEncoding).newEncoder();
      logger.info("Output encoding: " + outputEncoding);
      Util.writeBOMIfNeeded(writer, params.BOMonUTF8, outputEncoding);

      int n;
      CharBuffer tmpBuf = CharBuffer.allocate(1);
      ByteBuffer encBuf;
      boolean canEncode;
      boolean checkDeclaration = true;

      while (true) {
        buffer.clear();
        // Start with previous buffer remains if needed
        if (prevBuf != null) {
          buffer.append(prevBuf);
        }
        // Read the next block
        n = reader.read(buffer);
        // Check if we need to stop here
        boolean needSplitCheck = true;
        if (n == -1) {
          // Make sure we do not start an endless loop by
          // re-checking the last previous buffer
          if (prevBuf != null) {
            needSplitCheck = false;
            prevBuf = null;
            buffer.limit(buffer.position());
          } else break; // No previous, no read: Done
        }

        if (checkDeclaration) {
          checkDeclaration(inputEncoding);
          checkDeclaration = false;
        }

        // Un-escape if requested
        if (pattern != null) {
          if (needSplitCheck) checkSplitSequence();
          unescape();
        }

        // Output
        n = buffer.position();
        buffer.position(0);
        for (int i = 0; i < n; i++) {
          if (!(canEncode = outputEncoder.canEncode(buffer.get(i)))) {
            if (params.reportUnsupported) {
              logger.warning(
                  String.format(
                      "Un-supported character: U+%04X ('%c')", (int) buffer.get(i), buffer.get(i)));
            }
          }

          if ((params.escapeAll && (buffer.get(i) > 127)) || !canEncode) {
            boolean fallBack = false;
            // Write escape form
            if (useCER) {
              String tmp = entities.getName(buffer.get(i));
              if (tmp == null) fallBack = true;
              else writer.write("&" + tmp + ";");
            } else {
              if (params.useBytes) { // Escape bytes
                if (canEncode) {
                  tmpBuf.put(0, buffer.get(i));
                  tmpBuf.position(0);
                  encBuf = outputEncoder.encode(tmpBuf);
                  for (int j = 0; j < encBuf.limit(); j++) {
                    writer.write(
                        String.format(
                            outFormat,
                            (encBuf.get(j) < 0 ? (0xFF ^ ~encBuf.get(j)) : encBuf.get(j))));
                  }
                } else fallBack = true;
              } else { // Escape character
                writer.write(String.format(outFormat, (int) buffer.get(i)));
              }
            }
            if (fallBack) { // Default escaping when nothing else works
              writer.write(String.format("&#x%X;", (int) buffer.get(i)));
            }
          } else { // Normal raw forms
            writer.write(buffer.get(i));
          }
        }
      }

      // Done: close the files
      reader.close();
      reader = null;
      writer.close();
      writer = null;
      rawDoc.finalizeOutput();

      // Set the new raw-document URI and the encoding (in case one was auto-detected)
      // Other info stays the same
      RawDocument newDoc =
          new RawDocument(
              outFile.toURI(), outputEncoding, rawDoc.getSourceLocale(), rawDoc.getTargetLocale());
      event.setResource(newDoc);

    } catch (FileNotFoundException e) {
      throw new RuntimeException(e);
    } catch (IOException e) {
      throw new RuntimeException(e);
    } finally {
      try {
        if (writer != null) {
          writer.close();
          writer = null;
        }
        if (reader != null) {
          reader.close();
          reader = null;
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    return event;
  }