private void addRawDocumentsIntoPipeline(LocaleId locale) { String.format("adding RawDocuments (locale: [%s]) to pipeline", locale); for (L10nFile f : project.getFiles()) { RawDocument rd = null; if (!project.createTM) { rd = new RawDocument(f.getURI(), f.getEncoding(), project.sourceLocale, locale); } else { rd = new RawDocument(f.getURI(locale), f.getEncoding(locale), locale, locale); } rd.setFilterConfigId(f.getFilterConfigurationId()); URI targetUri; if (!project.createTM) targetUri = new File(f.getLocalXliffPath(locale)).toURI(); else { targetUri = new File(f.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale)).toURI(); } driver.addBatchItem(rd, targetUri, f.getEncoding(locale)); projetLogger.debug( String.format( "[%s] added to the pipeline driver (target uri: [%s])", f.getURI(), targetUri.toString())); } }
public void extractTranslatedFilesFromXliffs() { IPipelineDriver driverX = new PipelineDriver(); driverX.setFilterConfigurationMapper(fcMapper); // L10nFile file = files.get(0); // LocaleId locale = LocaleId.FRENCH; for (L10nFile file : project.getFiles()) { for (LocaleId locale : project.getTargetLocales()) { // represents source document (e.g. file.resx) RawDocument originalDoc = new RawDocument( new File(file.getLocalPath()).toURI(), file.getEncoding(), project.sourceLocale, locale); originalDoc.setFilterConfigId(file.getFilterConfigurationId()); // reprosents translated xliff file RawDocument xliffDoc = new RawDocument( new File(file.getLocalXliffPath_SourceWithLocaleCodeBeforeXliffExtension(locale)) .toURI(), "UTF-8", project.sourceLocale, locale); BatchItemContext bic = new BatchItemContext( xliffDoc, Util.toURI(file.getLocalPath(locale)), file.getEncoding(locale), originalDoc); driverX.addBatchItem(bic); } } driverX.addStep(new OriginalDocumentXliffMergerStep()); driverX.addStep(new RawDocumentWriterStep()); project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffStarted); driverX.processBatch(); project.getEventCannon().fireEvent(CtsAppEventType.ExtractingFromXliffFinished); driverX.destroy(); }
@Override protected Event handleRawDocument(Event event) { RawDocument rawDoc = (RawDocument) event.getResource(); BufferedReader reader = null; OutputStreamWriter writer = null; try { // Try to detect the type of file from extension isXML = false; isHTML = false; String ext = Util.getExtension(inputURI.getPath()); if (!Util.isEmpty(ext)) { isHTML = (ext.toLowerCase().indexOf(".htm") == 0); isXML = ext.equalsIgnoreCase(".xml"); } // === Try to detect the encoding InputStream is = rawDoc.getStream(); // First: guess from a possible BOM BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(is, rawDoc.getEncoding()); detector.detectAndRemoveBom(); rawDoc.setEncoding(detector.getEncoding()); String inputEncoding = rawDoc.getEncoding(); // Then try internal detection for XML/HTML type files if (!detector.isAutodetected()) { reader = new BufferedReader(rawDoc.getReader()); reader.read(buffer); String detectedEncoding = checkDeclaration(inputEncoding); if (!detectedEncoding.equalsIgnoreCase(inputEncoding)) { inputEncoding = detectedEncoding; } reader.close(); } // Open the input document // TODO: Where did we reset the reader - can't call this twice unless we reset it reader = new BufferedReader(rawDoc.getReader()); logger.info("Input encoding: " + inputEncoding); // Open the output document File outFile; if (isLastOutputStep()) { outFile = rawDoc.createOutputFile(outputURI); } else { try { outFile = File.createTempFile("okp-enc_", ".tmp"); } catch (Throwable e) { throw new OkapiIOException("Cannot create temporary output.", e); } outFile.deleteOnExit(); } writer = new OutputStreamWriter( new BufferedOutputStream(new FileOutputStream(outFile)), outputEncoding); outputEncoder = Charset.forName(outputEncoding).newEncoder(); logger.info("Output encoding: " + outputEncoding); Util.writeBOMIfNeeded(writer, params.BOMonUTF8, outputEncoding); int n; CharBuffer tmpBuf = CharBuffer.allocate(1); ByteBuffer encBuf; boolean canEncode; boolean checkDeclaration = true; while (true) { buffer.clear(); // Start with previous buffer remains if needed if (prevBuf != null) { buffer.append(prevBuf); } // Read the next block n = reader.read(buffer); // Check if we need to stop here boolean needSplitCheck = true; if (n == -1) { // Make sure we do not start an endless loop by // re-checking the last previous buffer if (prevBuf != null) { needSplitCheck = false; prevBuf = null; buffer.limit(buffer.position()); } else break; // No previous, no read: Done } if (checkDeclaration) { checkDeclaration(inputEncoding); checkDeclaration = false; } // Un-escape if requested if (pattern != null) { if (needSplitCheck) checkSplitSequence(); unescape(); } // Output n = buffer.position(); buffer.position(0); for (int i = 0; i < n; i++) { if (!(canEncode = outputEncoder.canEncode(buffer.get(i)))) { if (params.reportUnsupported) { logger.warning( String.format( "Un-supported character: U+%04X ('%c')", (int) buffer.get(i), buffer.get(i))); } } if ((params.escapeAll && (buffer.get(i) > 127)) || !canEncode) { boolean fallBack = false; // Write escape form if (useCER) { String tmp = entities.getName(buffer.get(i)); if (tmp == null) fallBack = true; else writer.write("&" + tmp + ";"); } else { if (params.useBytes) { // Escape bytes if (canEncode) { tmpBuf.put(0, buffer.get(i)); tmpBuf.position(0); encBuf = outputEncoder.encode(tmpBuf); for (int j = 0; j < encBuf.limit(); j++) { writer.write( String.format( outFormat, (encBuf.get(j) < 0 ? (0xFF ^ ~encBuf.get(j)) : encBuf.get(j)))); } } else fallBack = true; } else { // Escape character writer.write(String.format(outFormat, (int) buffer.get(i))); } } if (fallBack) { // Default escaping when nothing else works writer.write(String.format("&#x%X;", (int) buffer.get(i))); } } else { // Normal raw forms writer.write(buffer.get(i)); } } } // Done: close the files reader.close(); reader = null; writer.close(); writer = null; rawDoc.finalizeOutput(); // Set the new raw-document URI and the encoding (in case one was auto-detected) // Other info stays the same RawDocument newDoc = new RawDocument( outFile.toURI(), outputEncoding, rawDoc.getSourceLocale(), rawDoc.getTargetLocale()); event.setResource(newDoc); } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } finally { try { if (writer != null) { writer.close(); writer = null; } if (reader != null) { reader.close(); reader = null; } } catch (IOException e) { throw new RuntimeException(e); } } return event; }