/** * Validate TMX Format,and pilot to Body XMLElement * * @param vg * @throws TmxReadException ; */ private void validateTmxAndParseHeader(VTDGen vg) throws TmxReadException { VTDNav vn = vg.getNav(); AutoPilot ap = new AutoPilot(vn); String rootPath = "/tmx"; vu = new VTDUtils(); try { vu.bind(vn); ap.selectXPath(rootPath); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } ap.resetXPath(); ap.selectXPath("/tmx/header"); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } int id = vu.getVTDNav().getAttrVal("srclang"); if (id == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } header.setSrclang(vu.getVTDNav().toString(id).trim()); if (vu.pilot("/tmx/body") == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } // compute total tu number this.totalTu = vu.getChildElementsCount(); } catch (VTDException e) { logger.error("", e); throw new TmxReadException( Messages.getString("document.TmxReader.parseTmxFileError") + e.getMessage()); } finally { vg.clear(); } }
private void readTuPropElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./prop"); while (ap.evalXPath() != -1) { String content = vu.getElementContent(); if (content == null) { continue; } int inx = vn.getAttrVal("type"); String typeValue = inx != -1 ? vn.toString(inx) : null; if (typeValue == null) { continue; } if (typeValue.equals(TmxContexts.PRE_CONTEXT_NAME)) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, content.trim()); } else if (typeValue.equals(TmxContexts.NEXT_CONTEXT_NAME)) { tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, content.trim()); } else if (typeValue.equals("x-Context")) { // Trados TMX file String[] contexts = content.split(","); if (contexts.length == 2) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, contexts[0].trim()); tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, contexts[1].trim()); } } else { TmxProp p = new TmxProp(typeValue, content); tu.appendProp(p); } } vn.pop(); }
private void readTuElementAttribute(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot apAttributes = new AutoPilot(vu.getVTDNav()); apAttributes.selectXPath("@*"); int inx = -1; while ((inx = apAttributes.evalXPath()) != -1) { String name = vn.toString(inx); inx = vn.getAttrVal(name); String value = inx != -1 ? vn.toString(inx) : ""; // tuid, o-encoding, datatype, usagecount, lastusagedate, creationtool, creationtoolversion, // creationdate, // creationid, changedate, segtype, changeid, o-tmf, srclang. if (name.equals("tuid")) { tu.setTuId(value); } else if (name.equals("creationtool")) { tu.setCreationTool(value); } else if (name.equals("creationtoolversion")) { tu.setCreationToolVersion(value); } else if (name.equals("creationdate")) { tu.setCreationDate(value); } else if (name.equals("creationid")) { tu.setCreationUser(value); } else if (name.equals("changedate")) { tu.setChangeDate(value); } else if (name.equals("changeid")) { tu.setChangeUser(value); } else { tu.appendAttribute(name, value); } } vn.pop(); }
/** * 获取 tmxfile 中的所有语言 * * @return */ public List<String> getLangs() { List<String> langs = new LinkedList<String>(); langs.add(LanguageUtils.convertLangCode(header.getSrclang())); VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); try { ap.selectXPath("/tmx/body/tu/tuv"); String lang; int index = -1; while (ap.evalXPath() != -1) { index = vn.getAttrVal("xml:lang"); if (index == -1) { index = vn.getAttrVal("lang"); // version 1.1 if (index == -1) { continue; } } lang = LanguageUtils.convertLangCode(vn.toRawString(index)); if (!langs.contains(lang)) { langs.add(lang); } } } catch (Exception e) { e.printStackTrace(); } vn.pop(); return langs; }
public TmxReader(String tmxContent) throws TmxReadException { // 解析文件 VTDGen vg = new VTDGen(); vg.setDoc(tmxContent.getBytes()); String message = ""; try { vg.parse(true); } catch (EncodingException e) { logger.error(Messages.getString("document.ImportAbstract.logger1"), e); message = Messages.getString("document.ImportAbstract.msg1"); throw new TmxReadException(message + e.getMessage()); } catch (ParseException e) { logger.error(Messages.getString("document.ImportAbstract.logger3"), e); String errMsg = e.getMessage(); if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常 message = Messages.getString("document.ImportAbstract.msg1"); } else { message = Messages.getString("document.ImportAbstract.msg3"); } throw new TmxReadException(message + e.getMessage()); } header = new TmxHeader(); validateTmxAndParseHeader(vg); tuAp = new AutoPilot(vu.getVTDNav()); try { tuAp.selectXPath("./tu"); } catch (XPathParseException e) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } }
private void readTuTuvElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./tuv"); // TUV 节点下的Note,Prop节点暂时不处理,所以此处暂时不解析 while (ap.evalXPath() != -1) { int inx = vn.getAttrVal("xml:lang"); inx = inx == -1 ? vn.getAttrVal("lang") : inx; String lang = inx != -1 ? vn.toString(inx) : null; if (lang == null) { continue; } vn.push(); if (vu.pilot("./seg") != -1) { String fullText = vu.getElementContent().trim(); String pureText = DocUtils.getTmxTbxPureText(vu).trim(); if (fullText == null || pureText == null || fullText.equals("") || pureText.equals("")) { // fix Bug #2928 by Jason SQLite--导入TMX异常, 导入程序正常退出,但是未完全导入所有内容,此处在continue时应该先调用vn.pop() vn.pop(); continue; } TmxSegement segment = new TmxSegement(); segment.setLangCode(Utils.convertLangCode(lang)); if (tmxFilter == null) segment.setFullText(fullText); else { String text = tmxFilter.clearString(fullText); segment.setFullText(text); } segment.setPureText(pureText); if (lang.equalsIgnoreCase(header.getSrclang())) { tu.setSource(segment); } else { tu.appendSegement(segment); } } vn.pop(); } vn.pop(); }
/** * 此方法用于清除导入TMX时清除第三方标记,如 sdl 2007 中的 ut 标签,清除标记请在以下方法中加入 * * @param isClear ; */ public void tryToClearTags(boolean isClear) { if (isClear) { if (header == null || vu == null || tuAp == null) { return; } try { String creationtool = vu.getElementAttribute("/tmx/header", "creationtool"); String creationtoolversion = vu.getElementAttribute("/tmx/header", "creationtoolversion"); if (creationtool == null || creationtoolversion == null) { return; } if (creationtool.equals(TmxFilterInterface.SDL_2007_FOR_WIN)) { tmxFilter = new TmxFilterSDL2007Impl(); } } catch (XPathParseException e) { e.printStackTrace(); } catch (XPathEvalException e) { e.printStackTrace(); } catch (NavException e) { e.printStackTrace(); } } }
private void readTuNoteElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./note"); while (ap.evalXPath() != -1) { String fragment = vu.getElementFragment(); TmxNote note = new TmxNote(); note.setContent(fragment); int inx = vn.getAttrVal("xml:lang"); String value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } inx = vn.getAttrVal("o-encoding"); value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } tu.appendNote(note); } vn.pop(); }
public TmxReader(File file) throws TmxReadException { // 解析文件 VTDGen vg = null; try { vg = VTDLoader.loadVTDGen(file, FileEncodingDetector.detectFileEncoding(file)); } catch (IOException e) { logger.error(Messages.getString("document.DocUtils.logger1"), e); throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } catch (EncodingException e) { logger.error(Messages.getString("document.ImportAbstract.logger1"), e); String message = Messages.getString("document.ImportAbstract.msg1"); throw new TmxReadException(message + e.getMessage()); } catch (ParseException e) { logger.error(Messages.getString("document.ImportAbstract.logger3"), e); String errMsg = e.getMessage(); String message; if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常 message = Messages.getString("document.ImportAbstract.msg1"); } else { message = Messages.getString("document.ImportAbstract.msg3"); } throw new TmxReadException(message + e.getMessage()); } catch (EmptyFileException e) { logger.error(Messages.getString("document.DocUtils.logger1"), e); throw new TmxReadException(Messages.getString("document.TmxReader.EmptyTmxFileError")); } if (vg == null) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } // 验证TMX ,解析Header XMLElement,将节点导航到Body XMLElement header = new TmxHeader(); validateTmxAndParseHeader(vg); tuAp = new AutoPilot(vu.getVTDNav()); try { tuAp.selectXPath("./tu"); } catch (XPathParseException e) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } }
/** * 分析xliff文件的每一个 trans-unit 节点 * * @throws Exception */ private void ananysisXlfTU(IProgressMonitor monitor) throws Exception { if (monitor == null) { monitor = new NullProgressMonitor(); } AutoPilot ap = new AutoPilot(xlfVN); AutoPilot childAP = new AutoPilot(xlfVN); VTDUtils vu = new VTDUtils(xlfVN); String xpath = "count(/xliff/file/body//trans-unit)"; ap.selectXPath(xpath); int totalTuNum = (int) ap.evalXPathToNumber(); if (totalTuNum > 500) { workInterval = totalTuNum / 500; } int matchWorkUnit = totalTuNum % workInterval == 0 ? (totalTuNum / workInterval) : (totalTuNum / workInterval) + 1; monitor.beginTask("", matchWorkUnit); xpath = "/xliff/file/body//trans-unit"; String srcXpath = "./source"; String tgtXpath = "./target"; ap.selectXPath(xpath); int attrIdx = -1; // trans-unit的id,对应sdl文件的占位符如%%%1%%% 。 String segId = ""; TuMrkBean srcBean = null; TuMrkBean tgtBean = null; int traversalTuIndex = 0; while (ap.evalXPath() != -1) { traversalTuIndex++; if ((attrIdx = xlfVN.getAttrVal("id")) == -1) { continue; } srcBean = new TuMrkBean(); tgtBean = new TuMrkBean(); segId = xlfVN.toString(attrIdx); // 处理source节点 xlfVN.push(); childAP.selectXPath(srcXpath); if (childAP.evalXPath() != -1) { String srcContent = vu.getElementContent(); srcContent = srcContent == null ? "" : srcContent; srcBean.setContent(srcContent); srcBean.setSource(true); } xlfVN.pop(); // 处理target节点 String status = ""; // 状态,针对target节点,空字符串为未翻译 xlfVN.push(); tgtBean.setSource(false); String tgtContent = null; childAP.selectXPath(tgtXpath); if (childAP.evalXPath() != -1) { tgtContent = vu.getElementContent(); if ((attrIdx = xlfVN.getAttrVal("state")) != -1) { status = xlfVN.toString(attrIdx); } } tgtContent = tgtContent == null ? "" : tgtContent; tgtBean.setContent(tgtContent); xlfVN.pop(); // 处理批注 getNotes(xlfVN, tgtBean); // 判断是否处于锁定状态 if ((attrIdx = xlfVN.getAttrVal("translate")) != -1) { if ("no".equalsIgnoreCase(xlfVN.toString(attrIdx))) { tgtBean.setLocked(true); } } // 判断是否处于批准状态,若是签发,就没有必要判断了,因为签发了的一定就批准了的 if (!"signed-off".equalsIgnoreCase(status)) { if ((attrIdx = xlfVN.getAttrVal("approved")) != -1) { if ("yes".equalsIgnoreCase(xlfVN.toString(attrIdx))) { status = "approved"; // 批准 } } } tgtBean.setStatus(status); replaceSegment(segId, srcBean, tgtBean); monitorWork(monitor, traversalTuIndex, false); } monitorWork(monitor, traversalTuIndex, true); }