Example #1
0
 public TmxReader(String tmxContent) throws TmxReadException {
   // 解析文件
   VTDGen vg = new VTDGen();
   vg.setDoc(tmxContent.getBytes());
   String message = "";
   try {
     vg.parse(true);
   } catch (EncodingException e) {
     logger.error(Messages.getString("document.ImportAbstract.logger1"), e);
     message = Messages.getString("document.ImportAbstract.msg1");
     throw new TmxReadException(message + e.getMessage());
   } catch (ParseException e) {
     logger.error(Messages.getString("document.ImportAbstract.logger3"), e);
     String errMsg = e.getMessage();
     if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常
       message = Messages.getString("document.ImportAbstract.msg1");
     } else {
       message = Messages.getString("document.ImportAbstract.msg3");
     }
     throw new TmxReadException(message + e.getMessage());
   }
   header = new TmxHeader();
   validateTmxAndParseHeader(vg);
   tuAp = new AutoPilot(vu.getVTDNav());
   try {
     tuAp.selectXPath("./tu");
   } catch (XPathParseException e) {
     throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError"));
   }
 }
Example #2
0
  public TmxReader(File file) throws TmxReadException {
    // 解析文件
    VTDGen vg = null;
    try {
      vg = VTDLoader.loadVTDGen(file, FileEncodingDetector.detectFileEncoding(file));
    } catch (IOException e) {
      logger.error(Messages.getString("document.DocUtils.logger1"), e);
      throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError"));
    } catch (EncodingException e) {
      logger.error(Messages.getString("document.ImportAbstract.logger1"), e);
      String message = Messages.getString("document.ImportAbstract.msg1");
      throw new TmxReadException(message + e.getMessage());
    } catch (ParseException e) {
      logger.error(Messages.getString("document.ImportAbstract.logger3"), e);
      String errMsg = e.getMessage();
      String message;
      if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常
        message = Messages.getString("document.ImportAbstract.msg1");
      } else {
        message = Messages.getString("document.ImportAbstract.msg3");
      }
      throw new TmxReadException(message + e.getMessage());
    } catch (EmptyFileException e) {
      logger.error(Messages.getString("document.DocUtils.logger1"), e);
      throw new TmxReadException(Messages.getString("document.TmxReader.EmptyTmxFileError"));
    }
    if (vg == null) {
      throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError"));
    }
    // 验证TMX ,解析Header XMLElement,将节点导航到Body XMLElement
    header = new TmxHeader();
    validateTmxAndParseHeader(vg);

    tuAp = new AutoPilot(vu.getVTDNav());
    try {
      tuAp.selectXPath("./tu");
    } catch (XPathParseException e) {
      throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError"));
    }
  }
Example #3
0
  /**
   * Parse file with VTD-XML
   *
   * @param file
   * @return
   * @throws TmxReadException All Exception come from VTDExcetpion;
   */
  private VTDGen paseFile(File file) throws TmxReadException {
    String encoding = FileEncodingDetector.detectFileEncoding(file);
    VTDGen vg = new VTDGen();
    FileInputStream fis = null;
    String message = "";
    try {
      fis = new FileInputStream(file);
      byte[] bArr = new byte[(int) file.length()];

      int offset = 0;
      int numRead = 0;
      int numOfBytes = 1048576; // I choose this value randomally,
      // any other (not too big) value also can be here.
      if (bArr.length - offset < numOfBytes) {
        numOfBytes = bArr.length - offset;
      }
      while (offset < bArr.length && (numRead = fis.read(bArr, offset, numOfBytes)) >= 0) {
        offset += numRead;
        if (bArr.length - offset < numOfBytes) {
          numOfBytes = bArr.length - offset;
        }
      }

      // clean invalid XML character
      byte[] _bArr = new byte[bArr.length];
      int _bArrIndx = 0;
      int type = 0;
      if (encoding.equalsIgnoreCase("UTF-16LE") || encoding.equalsIgnoreCase("UTF-16BE")) {
        type = 1;
      }
      for (int i = 0; i < bArr.length; i++) {
        byte b = bArr[i];
        if ((b >= type && b <= 8) || b == 11 || b == 12 || (b >= 14 && b <= 31)) {
          continue;
        } else if (b == 38
            && i + 1 < bArr.length
            && bArr[i + 1] == 35
            && i + 2 < bArr.length) { // &#
          List<Byte> entis = new ArrayList<Byte>();
          entis.add((byte) 38);
          entis.add((byte) 35);
          int j = i + 2;
          if (bArr[j] == 120) { // x
            entis.add((byte) 120);
            while (true) {
              j++;
              if (j >= bArr.length) {
                entis.clear();
                b = bArr[i];
                break;
              }
              b = bArr[j];
              if ((b >= 48 && b <= 57) || (b >= 97 && b <= 102) || (b >= 65 && b <= 70)) {
                entis.add(b);
              } else if (b == 59) {
                entis.add(b);
                i = j;
                break;
              } else if (j - i > 10) {
                entis.clear();
                b = bArr[i];
                break;
              } else {
                entis.clear();
                b = bArr[i];
                break;
              }
            }
          } else {
            while (true) {
              b = bArr[j];
              if ((b >= 48 && b <= 57)) {
                entis.add(b);
              } else if (b == 59) {
                entis.add(b);
                i = j;
                break;
              } else if (j - i > 10) {
                entis.clear();
                b = bArr[i];
                break;
              } else {
                entis.clear();
                b = bArr[i];
                break;
              }
              j++;
              if (j >= bArr.length) {
                entis.clear();
                b = bArr[i];
                break;
              }
            }
          }
          if (!entis.isEmpty()) {
            byte[] t = new byte[entis.size()];
            for (int ti = 0; ti < entis.size(); ti++) {
              t[ti] = entis.get(ti);
            }
            String s = new String(t);
            if (s.matches("((&#[x]?)(([0]?([0-8]|[BbCcEe]))|(1[0-9])|(1[a-fA-F]));)")) {
              continue;
            }
          }
        }
        _bArr[_bArrIndx++] = b;
      }
      bArr = null;
      bArr = Arrays.copyOf(_bArr, _bArrIndx);

      // use vtd parse
      vg.setDoc(bArr);
      vg.parse(true);
    } catch (IOException e) {
      logger.error(Messages.getString("document.DocUtils.logger1"), e);
      throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError"));
    } catch (EncodingException e) {
      logger.error(Messages.getString("document.ImportAbstract.logger1"), e);
      message = Messages.getString("document.ImportAbstract.msg1");
      throw new TmxReadException(message + e.getMessage());
    } catch (ParseException e) {
      logger.error(Messages.getString("document.ImportAbstract.logger3"), e);
      String errMsg = e.getMessage();
      if (errMsg.indexOf("invalid encoding") != -1) { // 编码异常
        message = Messages.getString("document.ImportAbstract.msg1");
      } else {
        message = Messages.getString("document.ImportAbstract.msg3");
      }
      throw new TmxReadException(message + e.getMessage());
    } finally {
      if (fis != null) {
        try {
          fis.close();
        } catch (Exception e) {
        }
      }
    }
    return vg;
  }