private void readTuElementAttribute(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot apAttributes = new AutoPilot(vu.getVTDNav()); apAttributes.selectXPath("@*"); int inx = -1; while ((inx = apAttributes.evalXPath()) != -1) { String name = vn.toString(inx); inx = vn.getAttrVal(name); String value = inx != -1 ? vn.toString(inx) : ""; // tuid, o-encoding, datatype, usagecount, lastusagedate, creationtool, creationtoolversion, // creationdate, // creationid, changedate, segtype, changeid, o-tmf, srclang. if (name.equals("tuid")) { tu.setTuId(value); } else if (name.equals("creationtool")) { tu.setCreationTool(value); } else if (name.equals("creationtoolversion")) { tu.setCreationToolVersion(value); } else if (name.equals("creationdate")) { tu.setCreationDate(value); } else if (name.equals("creationid")) { tu.setCreationUser(value); } else if (name.equals("changedate")) { tu.setChangeDate(value); } else if (name.equals("changeid")) { tu.setChangeUser(value); } else { tu.appendAttribute(name, value); } } vn.pop(); }
/** * Validate TMX Format,and pilot to Body XMLElement * * @param vg * @throws TmxReadException ; */ private void validateTmxAndParseHeader(VTDGen vg) throws TmxReadException { VTDNav vn = vg.getNav(); AutoPilot ap = new AutoPilot(vn); String rootPath = "/tmx"; vu = new VTDUtils(); try { vu.bind(vn); ap.selectXPath(rootPath); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } ap.resetXPath(); ap.selectXPath("/tmx/header"); if (ap.evalXPath() == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } int id = vu.getVTDNav().getAttrVal("srclang"); if (id == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } header.setSrclang(vu.getVTDNav().toString(id).trim()); if (vu.pilot("/tmx/body") == -1) { throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError")); } // compute total tu number this.totalTu = vu.getChildElementsCount(); } catch (VTDException e) { logger.error("", e); throw new TmxReadException( Messages.getString("document.TmxReader.parseTmxFileError") + e.getMessage()); } finally { vg.clear(); } }
/** * 获取 tmxfile 中的所有语言 * * @return */ public List<String> getLangs() { List<String> langs = new LinkedList<String>(); langs.add(LanguageUtils.convertLangCode(header.getSrclang())); VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); try { ap.selectXPath("/tmx/body/tu/tuv"); String lang; int index = -1; while (ap.evalXPath() != -1) { index = vn.getAttrVal("xml:lang"); if (index == -1) { index = vn.getAttrVal("lang"); // version 1.1 if (index == -1) { continue; } } lang = LanguageUtils.convertLangCode(vn.toRawString(index)); if (!langs.contains(lang)) { langs.add(lang); } } } catch (Exception e) { e.printStackTrace(); } vn.pop(); return langs; }
private void readTuPropElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./prop"); while (ap.evalXPath() != -1) { String content = vu.getElementContent(); if (content == null) { continue; } int inx = vn.getAttrVal("type"); String typeValue = inx != -1 ? vn.toString(inx) : null; if (typeValue == null) { continue; } if (typeValue.equals(TmxContexts.PRE_CONTEXT_NAME)) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, content.trim()); } else if (typeValue.equals(TmxContexts.NEXT_CONTEXT_NAME)) { tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, content.trim()); } else if (typeValue.equals("x-Context")) { // Trados TMX file String[] contexts = content.split(","); if (contexts.length == 2) { tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, contexts[0].trim()); tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, contexts[1].trim()); } } else { TmxProp p = new TmxProp(typeValue, content); tu.appendProp(p); } } vn.pop(); }
public TmxReader(String tmxContent) throws TmxReadException { // 解析文件 VTDGen vg = new VTDGen(); vg.setDoc(tmxContent.getBytes()); String message = ""; try { vg.parse(true); } catch (EncodingException e) { logger.error(Messages.getString("document.ImportAbstract.logger1"), e); message = Messages.getString("document.ImportAbstract.msg1"); throw new TmxReadException(message + e.getMessage()); } catch (ParseException e) { logger.error(Messages.getString("document.ImportAbstract.logger3"), e); String errMsg = e.getMessage(); if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常 message = Messages.getString("document.ImportAbstract.msg1"); } else { message = Messages.getString("document.ImportAbstract.msg3"); } throw new TmxReadException(message + e.getMessage()); } header = new TmxHeader(); validateTmxAndParseHeader(vg); tuAp = new AutoPilot(vu.getVTDNav()); try { tuAp.selectXPath("./tu"); } catch (XPathParseException e) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } }
private void readTuTuvElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./tuv"); // TUV 节点下的Note,Prop节点暂时不处理,所以此处暂时不解析 while (ap.evalXPath() != -1) { int inx = vn.getAttrVal("xml:lang"); inx = inx == -1 ? vn.getAttrVal("lang") : inx; String lang = inx != -1 ? vn.toString(inx) : null; if (lang == null) { continue; } vn.push(); if (vu.pilot("./seg") != -1) { String fullText = vu.getElementContent().trim(); String pureText = DocUtils.getTmxTbxPureText(vu).trim(); if (fullText == null || pureText == null || fullText.equals("") || pureText.equals("")) { // fix Bug #2928 by Jason SQLite--导入TMX异常, 导入程序正常退出,但是未完全导入所有内容,此处在continue时应该先调用vn.pop() vn.pop(); continue; } TmxSegement segment = new TmxSegement(); segment.setLangCode(Utils.convertLangCode(lang)); if (tmxFilter == null) segment.setFullText(fullText); else { String text = tmxFilter.clearString(fullText); segment.setFullText(text); } segment.setPureText(pureText); if (lang.equalsIgnoreCase(header.getSrclang())) { tu.setSource(segment); } else { tu.appendSegement(segment); } } vn.pop(); } vn.pop(); }
public TmxReader(File file) throws TmxReadException { // 解析文件 VTDGen vg = null; try { vg = VTDLoader.loadVTDGen(file, FileEncodingDetector.detectFileEncoding(file)); } catch (IOException e) { logger.error(Messages.getString("document.DocUtils.logger1"), e); throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } catch (EncodingException e) { logger.error(Messages.getString("document.ImportAbstract.logger1"), e); String message = Messages.getString("document.ImportAbstract.msg1"); throw new TmxReadException(message + e.getMessage()); } catch (ParseException e) { logger.error(Messages.getString("document.ImportAbstract.logger3"), e); String errMsg = e.getMessage(); String message; if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常 message = Messages.getString("document.ImportAbstract.msg1"); } else { message = Messages.getString("document.ImportAbstract.msg3"); } throw new TmxReadException(message + e.getMessage()); } catch (EmptyFileException e) { logger.error(Messages.getString("document.DocUtils.logger1"), e); throw new TmxReadException(Messages.getString("document.TmxReader.EmptyTmxFileError")); } if (vg == null) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } // 验证TMX ,解析Header XMLElement,将节点导航到Body XMLElement header = new TmxHeader(); validateTmxAndParseHeader(vg); tuAp = new AutoPilot(vu.getVTDNav()); try { tuAp.selectXPath("./tu"); } catch (XPathParseException e) { throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError")); } }
private void readTuNoteElement(TmxTU tu) throws VTDException { VTDNav vn = vu.getVTDNav(); vn.push(); AutoPilot ap = new AutoPilot(vn); ap.selectXPath("./note"); while (ap.evalXPath() != -1) { String fragment = vu.getElementFragment(); TmxNote note = new TmxNote(); note.setContent(fragment); int inx = vn.getAttrVal("xml:lang"); String value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } inx = vn.getAttrVal("o-encoding"); value = inx != -1 ? vn.toString(inx) : null; if (value != null) { note.setXmlLang(value); } tu.appendNote(note); } vn.pop(); }