/** * Validate TMX Format,and pilot to Body XMLElement * * @param vg * @throws TmxReadException ; */ private void validateTmxAndParseHeader(VTDGen vg) throws TmxReadException { VTDNav vn = vg.getNav(); AutoPilot ap = new AutoPilot(vn); String rootPath = "/tmx"; vu = new VTDUtils(); try { vu.bind(vn); ap.selectXPath(rootPath); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } ap.resetXPath(); ap.selectXPath("/tmx/header"); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } int id = vu.getVTDNav().getAttrVal("srclang"); if (id == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } header.setSrclang(vu.getVTDNav().toString(id).trim()); if (vu.pilot("/tmx/body") == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } // compute total tu number this.totalTu = vu.getChildElementsCount(); } catch (VTDException e) { logger.error("", e); throw new TmxReadException( Messages.getString("document.TmxReader.parseTmxFileError") + e.getMessage()); } finally { vg.clear(); } }
/** * Requires vn to point to a relation element. From there it iterates over all matches of the ap. * Puts an empty list for each referenced way. */ private List<Tuple2<WayRole, Long>> extractWayRefs(VTDNav vn, Hashtable<Long, List<Long>> ways) throws NavException, XPathEvalException { vn.push(); List<Tuple2<WayRole, Long>> refs = new ArrayList<>(); for (int j = MEMBER_WAY_PATH.evalXPath(); j != -1; j = MEMBER_WAY_PATH.evalXPath()) { long ref = Long.parseLong(vn.toString(vn.getAttrVal("ref"))); String roleAsString = vn.toString(vn.getAttrVal("role")).toLowerCase(); WayRole role; switch (roleAsString) { case "inner": role = WayRole.INNER; break; case "outer": role = WayRole.OUTER; break; default: continue; } refs.add(Tuple.tuple(role, ref)); ways.put(ref, Collections.emptyList()); } MEMBER_WAY_PATH.resetXPath(); vn.pop(); return refs; }
private void extractReferencedWays( VTDNav vn, Hashtable<Long, List<Long>> ways, Hashtable<Long, Point2D> nodes) throws XPathEvalException, NavException { vn.push(); for (int i = WAY_PATH.evalXPath(); i != -1; i = WAY_PATH.evalXPath()) { long id = Long.parseLong(vn.toString(vn.getAttrVal("id"))); // By checking that we already referenced the id we can reduce memory pressure if (ways.containsKey(id)) { ways.put(id, extractNodeRefs(vn, nodes)); } } vn.pop(); }
/** * 创建所有批注节点 * * @throws Exception */ private void createComments() throws Exception { String xpath = ""; // 先生成全局批注的定义节点 if (fileCommentsList.size() > 0) { String fileCommentId = CommonFunction.createUUID(); String fileCommentStr = "<sdl:cmt id=\"" + fileCommentId + "\" />"; xpath = "/xliff/file/header"; outputAP.selectXPath(xpath); while (outputAP.evalXPath() != -1) { outputXM.insertBeforeTail(fileCommentStr.getBytes("utf-8")); commentMap.put(fileCommentId, fileCommentsList); } } // 开始生成Comments节点 if (commentMap.size() == 0) { return; } xpath = "/xliff/doc-info"; outputAP.selectXPath(xpath); if (outputAP.evalXPath() != -1) { StringBuffer commentSB = new StringBuffer(); commentSB.append("<cmt-defs>"); for (Entry<String, List<CommentBean>> entry : commentMap.entrySet()) { String id = entry.getKey(); commentSB.append("<cmt-def id=\"" + id + "\">"); commentSB.append("<Comments xmlns=\"\">"); for (CommentBean bean : entry.getValue()) { commentSB.append( "<Comment severity=\"" + bean.getSeverity() + "\" " + "user=\"" + bean.getUser() + "\" date=\"" + bean.getDate() + "\" version=\"1.0\">" + bean.getCommentText() + "</Comment>"); } commentSB.append("</Comments>"); commentSB.append("</cmt-def>"); } commentSB.append("</cmt-defs>"); outputXM.insertBeforeTail(commentSB.toString().getBytes("utf-8")); } }
private void readTuPropElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./prop"); while (ap.evalXPath() != -1) { String content = vu.getElementContent(); if (content == null) { continue; } int inx = vn.getAttrVal("type"); String typeValue = inx != -1 ? vn.toString(inx) : null; if (typeValue == null) { continue; } if (typeValue.equals(TmxContexts.PRE_CONTEXT_NAME)) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, content.trim()); } else if (typeValue.equals(TmxContexts.NEXT_CONTEXT_NAME)) { tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, content.trim()); } else if (typeValue.equals("x-Context")) { // Trados TMX file String[] contexts = content.split(","); if (contexts.length == 2) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, contexts[0].trim()); tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, contexts[1].trim()); } } else { TmxProp p = new TmxProp(typeValue, content); tu.appendProp(p); } } vn.pop(); }
/** * Requires vn to point to a way element. From there it iterates over all matches of the ap. Puts * a dummy point into nodes for every node it finds. */ private List<Long> extractNodeRefs(VTDNav vn, Hashtable<Long, Point2D> nodes) throws NavException, XPathEvalException { vn.push(); List<Long> refs = new ArrayList<>(); for (int j = NODE_REF_PATH.evalXPath(); j != -1; j = NODE_REF_PATH.evalXPath()) { long ref = Long.parseLong(vn.toString(j + 1)); refs.add(ref); nodes.put(ref, new Point2D(0, 0)); } NODE_REF_PATH.resetXPath(); vn.pop(); return refs; }
private List<List<Long>> extractWaysOfBuildings(VTDNav vn, Hashtable<Long, Point2D> nodes) throws XPathEvalException, NavException { vn.push(); List<List<Long>> ways = new ArrayList<>(); for (int i = BUILDING_WAY_PATH.evalXPath(); i != -1; i = BUILDING_WAY_PATH.evalXPath()) { // The lambda will put in a dummy value for each encountered node, // so that we know later which nodes we need to parse. ways.add(extractNodeRefs(vn, nodes)); } vn.pop(); BUILDING_WAY_PATH.resetXPath(); return ways; }
private void readTuElementAttribute(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot apAttributes = new AutoPilot(vu.getVTDNav()); apAttributes.selectXPath("@*"); int inx = -1; while ((inx = apAttributes.evalXPath()) != -1) { String name = vn.toString(inx); inx = vn.getAttrVal(name); String value = inx != -1 ? vn.toString(inx) : ""; // tuid, o-encoding, datatype, usagecount, lastusagedate, creationtool, creationtoolversion, // creationdate, // creationid, changedate, segtype, changeid, o-tmf, srclang. if (name.equals("tuid")) { tu.setTuId(value); } else if (name.equals("creationtool")) { tu.setCreationTool(value); } else if (name.equals("creationtoolversion")) { tu.setCreationToolVersion(value); } else if (name.equals("creationdate")) { tu.setCreationDate(value); } else if (name.equals("creationid")) { tu.setCreationUser(value); } else if (name.equals("changedate")) { tu.setChangeDate(value); } else if (name.equals("changeid")) { tu.setChangeUser(value); } else { tu.appendAttribute(name, value); } } vn.pop(); }
/** * 获取 tmxfile 中的所有语言 * * @return */ public List<String> getLangs() { List<String> langs = new LinkedList<String>(); langs.add(LanguageUtils.convertLangCode(header.getSrclang())); VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); try { ap.selectXPath("/tmx/body/tu/tuv"); String lang; int index = -1; while (ap.evalXPath() != -1) { index = vn.getAttrVal("xml:lang"); if (index == -1) { index = vn.getAttrVal("lang"); // version 1.1 if (index == -1) { continue; } } lang = LanguageUtils.convertLangCode(vn.toRawString(index)); if (!langs.contains(lang)) { langs.add(lang); } } } catch (Exception e) { e.printStackTrace(); } vn.pop(); return langs; }
private List<List<Tuple2<WayRole, Long>>> extractWayRefsOfMultipolygons( VTDNav vn, Hashtable<Long, List<Long>> ways) throws NavException, XPathEvalException { vn.push(); List<List<Tuple2<WayRole, Long>>> multipolygons = new ArrayList<>(); for (int i = BUILDING_MULTIPOLYGON_PATH.evalXPath(); i != -1; i = BUILDING_MULTIPOLYGON_PATH.evalXPath()) { // For an explanation for the lambda see extractWaysOfBuildings multipolygons.add(extractWayRefs(vn, ways)); } vn.pop(); BUILDING_WAY_PATH.resetXPath(); return multipolygons; }
private void extractReferencedNodes(VTDNav vn, Hashtable<Long, Point2D> nodes) throws XPathEvalException, NavException { vn.push(); for (int i = NODE_PATH.evalXPath(); i != -1; i = NODE_PATH.evalXPath()) { long id = Long.parseLong(vn.toString(vn.getAttrVal("id"))); // By checking that we already referenced the id we can reduce memory pressure if (nodes.containsKey(id)) { nodes.put( id, new Point2D( Double.parseDouble(vn.toString(vn.getAttrVal("lon"))) * (1 << 10), Double.parseDouble(vn.toString(vn.getAttrVal("lat"))) * (1 << 10))); } } // Make sure we have all referenced nodes extracted // assert !nodes.containsValue(DUMMY_POINT); vn.pop(); }
/** * 获取 R8 xliff文件的所有批注信息 * * @param vn * @param tgtbeBean */ private void getNotes(VTDNav vn, TuMrkBean tgtbeBean) throws Exception { vn.push(); List<CommentBean> segCommentList = new LinkedList<CommentBean>(); AutoPilot ap = new AutoPilot(vn); String xpath = "./note"; ap.selectXPath(xpath); int atttIdx = -1; CommentBean bean; while (ap.evalXPath() != -1) { boolean isCurrent = true; if ((atttIdx = vn.getAttrVal("hs:apply-current")) != -1) { if ("no".equalsIgnoreCase(vn.toString(atttIdx))) { isCurrent = false; } } String user = ""; String date = ""; String commentText = ""; // R8 xliff 文件中没有提示级别一属性,故此处皆为供参考 String severity = "Low"; if ((atttIdx = vn.getAttrVal("from")) != -1) { user = vn.toString(atttIdx); } if (vn.getText() != -1) { String r8NoteText = vn.toString(vn.getText()); if (r8NoteText.indexOf(":") != -1) { date = r8NoteText.substring(0, r8NoteText.indexOf(":")); commentText = r8NoteText.substring(r8NoteText.indexOf(":") + 1, r8NoteText.length()); } else { commentText = r8NoteText; } } bean = new CommentBean(user, date, severity, commentText, true); if (isCurrent) { segCommentList.add(new CommentBean(user, date, severity, commentText, true)); } else { if (!fileCommentsList.contains(bean)) { fileCommentsList.add(bean); } } } tgtbeBean.setCommentList(segCommentList); vn.pop(); }
public TmxReaderEvent read() { TmxTU tu = null; try { if (tuAp.evalXPath() != -1) { tu = new TmxTU(); readTuElementAttribute(tu); readTuNoteElement(tu); readTuPropElement(tu); readTuTuvElement(tu); } else { return new TmxReaderEvent(null, TmxReaderEvent.END_FILE); } } catch (VTDException e) { return new TmxReaderEvent(null, TmxReaderEvent.READ_EXCEPTION); } return new TmxReaderEvent(tu, TmxReaderEvent.NORMAL_READ); }
private void readTuTuvElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./tuv"); // TUV 节点下的Note,Prop节点暂时不处理,所以此处暂时不解析 while (ap.evalXPath() != -1) { int inx = vn.getAttrVal("xml:lang"); inx = inx == -1 ? vn.getAttrVal("lang") : inx; String lang = inx != -1 ? vn.toString(inx) : null; if (lang == null) { continue; } vn.push(); if (vu.pilot("./seg") != -1) { String fullText = vu.getElementContent().trim(); String pureText = DocUtils.getTmxTbxPureText(vu).trim(); if (fullText == null || pureText == null || fullText.equals("") || pureText.equals("")) { // fix Bug #2928 by Jason SQLite--导入TMX异常, 导入程序正常退出,但是未完全导入所有内容,此处在continue时应该先调用vn.pop() vn.pop(); continue; } TmxSegement segment = new TmxSegement(); segment.setLangCode(Utils.convertLangCode(lang)); if (tmxFilter == null) segment.setFullText(fullText); else { String text = tmxFilter.clearString(fullText); segment.setFullText(text); } segment.setPureText(pureText); if (lang.equalsIgnoreCase(header.getSrclang())) { tu.setSource(segment); } else { tu.appendSegement(segment); } } vn.pop(); } vn.pop(); }
private void readTuNoteElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./note"); while (ap.evalXPath() != -1) { String fragment = vu.getElementFragment(); TmxNote note = new TmxNote(); note.setContent(fragment); int inx = vn.getAttrVal("xml:lang"); String value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } inx = vn.getAttrVal("o-encoding"); value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } tu.appendNote(note); } vn.pop(); }
/** * 分析xliff文件的每一个 trans-unit 节点 * * @throws Exception */ private void ananysisXlfTU(IProgressMonitor monitor) throws Exception { if (monitor == null) { monitor = new NullProgressMonitor(); } AutoPilot ap = new AutoPilot(xlfVN); AutoPilot childAP = new AutoPilot(xlfVN); VTDUtils vu = new VTDUtils(xlfVN); String xpath = "count(/xliff/file/body//trans-unit)"; ap.selectXPath(xpath); int totalTuNum = (int) ap.evalXPathToNumber(); if (totalTuNum > 500) { workInterval = totalTuNum / 500; } int matchWorkUnit = totalTuNum % workInterval == 0 ? (totalTuNum / workInterval) : (totalTuNum / workInterval) + 1; monitor.beginTask("", matchWorkUnit); xpath = "/xliff/file/body//trans-unit"; String srcXpath = "./source"; String tgtXpath = "./target"; ap.selectXPath(xpath); int attrIdx = -1; // trans-unit的id,对应sdl文件的占位符如%%%1%%% 。 String segId = ""; TuMrkBean srcBean = null; TuMrkBean tgtBean = null; int traversalTuIndex = 0; while (ap.evalXPath() != -1) { traversalTuIndex++; if ((attrIdx = xlfVN.getAttrVal("id")) == -1) { continue; } srcBean = new TuMrkBean(); tgtBean = new TuMrkBean(); segId = xlfVN.toString(attrIdx); // 处理source节点 xlfVN.push(); childAP.selectXPath(srcXpath); if (childAP.evalXPath() != -1) { String srcContent = vu.getElementContent(); srcContent = srcContent == null ? "" : srcContent; srcBean.setContent(srcContent); srcBean.setSource(true); } xlfVN.pop(); // 处理target节点 String status = ""; // 状态,针对target节点,空字符串为未翻译 xlfVN.push(); tgtBean.setSource(false); String tgtContent = null; childAP.selectXPath(tgtXpath); if (childAP.evalXPath() != -1) { tgtContent = vu.getElementContent(); if ((attrIdx = xlfVN.getAttrVal("state")) != -1) { status = xlfVN.toString(attrIdx); } } tgtContent = tgtContent == null ? "" : tgtContent; tgtBean.setContent(tgtContent); xlfVN.pop(); // 处理批注 getNotes(xlfVN, tgtBean); // 判断是否处于锁定状态 if ((attrIdx = xlfVN.getAttrVal("translate")) != -1) { if ("no".equalsIgnoreCase(xlfVN.toString(attrIdx))) { tgtBean.setLocked(true); } } // 判断是否处于批准状态,若是签发,就没有必要判断了,因为签发了的一定就批准了的 if (!"signed-off".equalsIgnoreCase(status)) { if ((attrIdx = xlfVN.getAttrVal("approved")) != -1) { if ("yes".equalsIgnoreCase(xlfVN.toString(attrIdx))) { status = "approved"; // 批准 } } } tgtBean.setStatus(status); replaceSegment(segId, srcBean, tgtBean); monitorWork(monitor, traversalTuIndex, false); } monitorWork(monitor, traversalTuIndex, true); }
/** * 替换掉骨架文件中的占位符 * * @param segId * @param srcBean * @param tgtbeBean */ private void replaceSegment(String segId, TuMrkBean srcBean, TuMrkBean tgtbeBean) throws Exception { String segStr = "%%%" + segId + "%%%"; String srcXpath = "/xliff/file/body//trans-unit/seg-source//mrk[text()='" + segStr + "']"; // 先处理源文 outputAP.selectXPath(srcXpath); if (outputAP.evalXPath() != -1) { int textIdx = outputVN.getText(); outputXM.updateToken(textIdx, srcBean.getContent().getBytes("utf-8")); } // 处理译文 String tgtXpath = "/xliff/file/body//trans-unit/target//mrk[text()='" + segStr + "']"; outputAP.selectXPath(tgtXpath); if (outputAP.evalXPath() != -1) { String content = tgtbeBean.getContent(); if (tgtbeBean.getCommentList().size() > 0) { String uuId = CommonFunction.createUUID(); commentMap.put(uuId, tgtbeBean.getCommentList()); content = "<mrk mtype=\"x-sdl-comment\" sdl:cid=\"" + uuId + "\">" + tgtbeBean.getContent() + "</mrk>"; } int textIdx = outputVN.getText(); outputXM.updateToken(textIdx, content.getBytes("utf-8")); // 开始处理状态 int attrIdx = -1; if ((attrIdx = outputVN.getAttrVal("mid")) != -1) { boolean needLocked = false; String mid = outputVN.toString(attrIdx); // 下面根据mid找到对应的sdl:seg节点,这个节点里面存放的有每个文本段的状态 String xpath = "ancestor::trans-unit/sdl:seg-defs/sdl:seg[@id='" + mid + "']"; outputAP.selectXPath(xpath); if (outputAP.evalXPath() != -1) { // 先判断是否锁定 if (tgtbeBean.isLocked()) { if ((attrIdx = outputVN.getAttrVal("locked")) != -1) { if (!"true".equals(outputVN.toString(attrIdx))) { outputXM.updateToken(attrIdx, "true"); } } else { needLocked = true; } } else { if ((attrIdx = outputVN.getAttrVal("locked")) != -1) { if ("true".equals(outputVN.toString(attrIdx))) { outputXM.updateToken(attrIdx, "false"); } } } // 下面根据R8的状态。修改sdl的状态。 String conf = ""; String status = tgtbeBean.getStatus(); if ("new".equals(status)) { conf = "Draft"; } else if ("translated".equals(status)) { conf = "Translated"; } else if ("approved".equals(status)) { conf = "ApprovedTranslation"; } else if ("signed-off".equals(status)) { conf = "ApprovedSignOff"; } if ("".equals(conf)) { if ((attrIdx = outputVN.getAttrVal("conf")) != -1) { outputXM.updateToken(attrIdx, ""); } } else { if ((attrIdx = outputVN.getAttrVal("conf")) != -1) { if (!conf.equals(outputVN.toString(attrIdx))) { outputXM.updateToken(attrIdx, conf); } } else { String attributeStr = ""; if (needLocked) { attributeStr = " locked=\"true\" "; } attributeStr += " conf=\"" + conf + "\" "; outputXM.insertAttribute(attributeStr.getBytes("utf-8")); needLocked = false; } } if (needLocked) { outputXM.insertAttribute(" locked=\"true\" ".getBytes("utf-8")); } } } } }