Example #1
0
  private void addRawDocumentsIntoPipeline(LocaleId locale) {
    String.format("adding RawDocuments (locale: [%s]) to pipeline", locale);

    for (L10nFile f : project.getFiles()) {

      RawDocument rd = null;
      if (!project.createTM) {
        rd = new RawDocument(f.getURI(), f.getEncoding(), project.sourceLocale, locale);
      } else {
        rd = new RawDocument(f.getURI(locale), f.getEncoding(locale), locale, locale);
      }
      rd.setFilterConfigId(f.getFilterConfigurationId());

      URI targetUri;
      if (!project.createTM) targetUri = new File(f.getLocalXliffPath(locale)).toURI();
      else {
        targetUri =
            new File(f.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale)).toURI();
      }

      driver.addBatchItem(rd, targetUri, f.getEncoding(locale));

      projetLogger.debug(
          String.format(
              "[%s] added to the pipeline driver (target uri: [%s])",
              f.getURI(), targetUri.toString()));
    }
  }
Example #2
0
  public void extractTranslatedFilesFromXliffs() {

    IPipelineDriver driverX = new PipelineDriver();
    driverX.setFilterConfigurationMapper(fcMapper);

    // L10nFile file = files.get(0);
    // LocaleId locale = LocaleId.FRENCH;

    for (L10nFile file : project.getFiles()) {
      for (LocaleId locale : project.getTargetLocales()) {

        // represents source document (e.g. file.resx)
        RawDocument originalDoc =
            new RawDocument(
                new File(file.getLocalPath()).toURI(),
                file.getEncoding(),
                project.sourceLocale,
                locale);
        originalDoc.setFilterConfigId(file.getFilterConfigurationId());

        // reprosents translated xliff file
        RawDocument xliffDoc =
            new RawDocument(
                new File(file.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale))
                    .toURI(),
                "UTF-8",
                project.sourceLocale,
                locale);

        BatchItemContext bic =
            new BatchItemContext(
                xliffDoc,
                Util.toURI(file.getLocalPath(locale)),
                file.getEncoding(locale),
                originalDoc);
        driverX.addBatchItem(bic);
      }
    }

    driverX.addStep(new OriginalDocumentXliffMergerStep());
    driverX.addStep(new RawDocumentWriterStep());

    project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffStarted);
    driverX.processBatch();
    project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffFinished);

    driverX.destroy();
  }
  @Override
  protected Event handleRawDocument(Event event) {
    RawDocument rawDoc = (RawDocument) event.getResource();
    BufferedReader reader = null;
    OutputStreamWriter writer = null;
    try {
      // Try to detect the type of file from extension
      isXML = false;
      isHTML = false;
      String ext = Util.getExtension(inputURI.getPath());
      if (!Util.isEmpty(ext)) {
        isHTML = (ext.toLowerCase().indexOf(".htm") == 0);
        isXML = ext.equalsIgnoreCase(".xml");
      }

      // === Try to detect the encoding

      InputStream is = rawDoc.getStream();
      // First: guess from a possible BOM
      BOMNewlineEncodingDetector detector =
          new BOMNewlineEncodingDetector(is, rawDoc.getEncoding());
      detector.detectAndRemoveBom();
      rawDoc.setEncoding(detector.getEncoding());

      String inputEncoding = rawDoc.getEncoding();
      // Then try internal detection for XML/HTML type files
      if (!detector.isAutodetected()) {
        reader = new BufferedReader(rawDoc.getReader());
        reader.read(buffer);
        String detectedEncoding = checkDeclaration(inputEncoding);
        if (!detectedEncoding.equalsIgnoreCase(inputEncoding)) {
          inputEncoding = detectedEncoding;
        }
        reader.close();
      }

      // Open the input document
      // TODO: Where did we reset the reader - can't call this twice unless we reset it
      reader = new BufferedReader(rawDoc.getReader());
      logger.info("Input encoding: " + inputEncoding);

      // Open the output document
      File outFile;
      if (isLastOutputStep()) {
        outFile = rawDoc.createOutputFile(outputURI);
      } else {
        try {
          outFile = File.createTempFile("okp-enc_", ".tmp");
        } catch (Throwable e) {
          throw new OkapiIOException("Cannot create temporary output.", e);
        }
        outFile.deleteOnExit();
      }
      writer =
          new OutputStreamWriter(
              new BufferedOutputStream(new FileOutputStream(outFile)), outputEncoding);
      outputEncoder = Charset.forName(outputEncoding).newEncoder();
      logger.info("Output encoding: " + outputEncoding);
      Util.writeBOMIfNeeded(writer, params.BOMonUTF8, outputEncoding);

      int n;
      CharBuffer tmpBuf = CharBuffer.allocate(1);
      ByteBuffer encBuf;
      boolean canEncode;
      boolean checkDeclaration = true;

      while (true) {
        buffer.clear();
        // Start with previous buffer remains if needed
        if (prevBuf != null) {
          buffer.append(prevBuf);
        }
        // Read the next block
        n = reader.read(buffer);
        // Check if we need to stop here
        boolean needSplitCheck = true;
        if (n == -1) {
          // Make sure we do not start an endless loop by
          // re-checking the last previous buffer
          if (prevBuf != null) {
            needSplitCheck = false;
            prevBuf = null;
            buffer.limit(buffer.position());
          } else break; // No previous, no read: Done
        }

        if (checkDeclaration) {
          checkDeclaration(inputEncoding);
          checkDeclaration = false;
        }

        // Un-escape if requested
        if (pattern != null) {
          if (needSplitCheck) checkSplitSequence();
          unescape();
        }

        // Output
        n = buffer.position();
        buffer.position(0);
        for (int i = 0; i < n; i++) {
          if (!(canEncode = outputEncoder.canEncode(buffer.get(i)))) {
            if (params.reportUnsupported) {
              logger.warning(
                  String.format(
                      "Un-supported character: U+%04X ('%c')", (int) buffer.get(i), buffer.get(i)));
            }
          }

          if ((params.escapeAll && (buffer.get(i) > 127)) || !canEncode) {
            boolean fallBack = false;
            // Write escape form
            if (useCER) {
              String tmp = entities.getName(buffer.get(i));
              if (tmp == null) fallBack = true;
              else writer.write("&" + tmp + ";");
            } else {
              if (params.useBytes) { // Escape bytes
                if (canEncode) {
                  tmpBuf.put(0, buffer.get(i));
                  tmpBuf.position(0);
                  encBuf = outputEncoder.encode(tmpBuf);
                  for (int j = 0; j < encBuf.limit(); j++) {
                    writer.write(
                        String.format(
                            outFormat,
                            (encBuf.get(j) < 0 ? (0xFF ^ ~encBuf.get(j)) : encBuf.get(j))));
                  }
                } else fallBack = true;
              } else { // Escape character
                writer.write(String.format(outFormat, (int) buffer.get(i)));
              }
            }
            if (fallBack) { // Default escaping when nothing else works
              writer.write(String.format("&#x%X;", (int) buffer.get(i)));
            }
          } else { // Normal raw forms
            writer.write(buffer.get(i));
          }
        }
      }

      // Done: close the files
      reader.close();
      reader = null;
      writer.close();
      writer = null;
      rawDoc.finalizeOutput();

      // Set the new raw-document URI and the encoding (in case one was auto-detected)
      // Other info stays the same
      RawDocument newDoc =
          new RawDocument(
              outFile.toURI(), outputEncoding, rawDoc.getSourceLocale(), rawDoc.getTargetLocale());
      event.setResource(newDoc);

    } catch (FileNotFoundException e) {
      throw new RuntimeException(e);
    } catch (IOException e) {
      throw new RuntimeException(e);
    } finally {
      try {
        if (writer != null) {
          writer.close();
          writer = null;
        }
        if (reader != null) {
          reader.close();
          reader = null;
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    return event;
  }