Example #1
0
 /**
  * Get ParsedTuv from list of Tuv for specific language.
  *
  * <p>Language choosed by:<br>
  * - with the same language+country<br>
  * - if not exist, then with the same language but without country<br>
  * - if not exist, then with the same language with whatever country<br>
  */
 protected ParsedTuv getTuvByLang(Language lang) {
   String langLanguage = lang.getLanguageCode();
   String langCountry = lang.getCountryCode();
   ParsedTuv tuvLC = null; // Tuv with the same language+country
   ParsedTuv tuvL = null; // Tuv with the same language only, without country
   ParsedTuv tuvLW = null; // Tuv with the same language+whatever country
   for (int i = 0; i < currentTu.tuvs.size(); i++) {
     ParsedTuv tuv = currentTu.tuvs.get(i);
     String tuvLang = tuv.lang;
     if (!langLanguage.regionMatches(true, 0, tuvLang, 0, 2)) {
       // language not equals - there is no sense to processing
       continue;
     }
     if (tuvLang.length() < 3) {
       // language only, without country
       tuvL = tuv;
     } else if (langCountry.regionMatches(true, 0, tuvLang, 3, 2)) {
       // the same country
       tuvLC = tuv;
     } else {
       // other country
       tuvLW = tuv;
     }
   }
   ParsedTuv bestTuv;
   if (tuvLC != null) {
     bestTuv = tuvLC;
   } else if (tuvL != null) {
     bestTuv = tuvL;
   } else {
     bestTuv = tuvLW;
   }
   return bestTuv;
 }
Example #2
0
  /**
   * Glues the sentences back to paragraph.
   *
   * <p>As sentences are returned by {@link #segment(String, List)} without spaces before and after
   * them, this method adds spaces if needed:
   *
   * <ul>
   *   <li>For translation to Japanese does <b>not</b> add any spaces. <br>
   *       A special exceptions are the Break SRX rules that break on space, i.e. before and after
   *       patterns consist of spaces (they get trimmed to an empty string). For such rules all the
   *       spaces are added
   *   <li>For translation from Japanese adds one space
   *   <li>For all other language combinations adds those spaces as were in the paragraph before.
   * </ul>
   *
   * @param sentences list of translated sentences
   * @param spaces information about spaces in original paragraph
   * @param brules rules that account to breaks
   * @return glued translated paragraph
   */
  public static String glue(
      Language sourceLang,
      Language targetLang,
      List<String> sentences,
      List<StringBuffer> spaces,
      List<Rule> brules) {
    if (sentences.size() <= 0) return "";

    StringBuffer res = new StringBuffer();
    res.append(sentences.get(0));

    for (int i = 1; i < sentences.size(); i++) {
      StringBuffer sp = new StringBuffer();
      sp.append(spaces.get(2 * i - 1));
      sp.append(spaces.get(2 * i));

      if (CJK_LANGUAGES.contains(targetLang.getLanguageCode().toUpperCase(Locale.ENGLISH))) {
        Rule rule = brules.get(i - 1);
        char lastChar = res.charAt(res.length() - 1);
        if ((lastChar != '.')
            && (!PatternConsts.SPACY_REGEX.matcher(rule.getBeforebreak()).matches()
                || !PatternConsts.SPACY_REGEX.matcher(rule.getAfterbreak()).matches()))
          sp.setLength(0);
      } else if (CJK_LANGUAGES.contains(sourceLang.getLanguageCode().toUpperCase(Locale.ENGLISH))
          && sp.length() == 0) sp.append(" ");

      res.append(sp);
      res.append(sentences.get(i));
    }
    return res.toString();
  }
Example #3
0
 /** Indicates whether some other language is "equal to" this one. */
 @Override
 public boolean equals(Object lang) {
   if (this == lang) return true;
   if (lang == null) return false;
   if (!(lang instanceof Language)) return false;
   Language that = (Language) lang;
   return this.getLocaleCode().equals(that.getLocaleCode());
 }
 public static void doImport(String sourcelang, String targetlang, String folder, String tmxfile)
     throws IOException {
   Language source = new Language(sourcelang);
   Language target = new Language(targetlang);
   TMXSaver saver = new TMXSaver(source.getLanguage(), target.getLanguage(), tmxfile);
   FolderScanner scanner = new FolderScanner(folder, target.getLocale(), saver);
   scanner.scan();
   saver.save();
 }
Example #5
0
  protected void parseHeader(StartElement element, final Language sourceLanguage) {
    isParagraphSegtype = SEG_PARAGRAPH.equals(getAttributeValue(element, "segtype"));
    isOmegaT = CT_OMEGAT.equals(getAttributeValue(element, "creationtool"));

    // log some details
    Log.logRB("TMXR_INFO_CREATION_TOOL", new Object[] {getAttributeValue(element, "creationtool")});
    Log.logRB(
        "TMXR_INFO_CREATION_TOOL_VERSION",
        new Object[] {getAttributeValue(element, "creationtoolversion")});
    Log.logRB("TMXR_INFO_SEG_TYPE", new Object[] {getAttributeValue(element, "segtype")});
    Log.logRB("TMXR_INFO_SOURCE_LANG", new Object[] {getAttributeValue(element, "srclang")});

    // give a warning if the TMX source language is
    // different from the project source language
    String tmxSourceLanguage = getAttributeValue(element, "srclang");
    if (!tmxSourceLanguage.equalsIgnoreCase(sourceLanguage.getLanguage())) {
      Log.logWarningRB(
          "TMXR_WARNING_INCORRECT_SOURCE_LANG", new Object[] {tmxSourceLanguage, sourceLanguage});
    }

    // give a warning that TMX file will be upgraded to sentence segmentation
    if (isSegmentingEnabled && isParagraphSegtype) {
      Log.logWarningRB("TMXR_WARNING_UPGRADE_SENTSEG");
    }
  }
Example #6
0
  @Override
  protected String translate(Language sLang, Language tLang, String text) throws Exception {
    if (mvYandexKey == null) {
      return "";
    }

    String lvSourceLang = sLang.getLanguageCode().substring(0, 2).toLowerCase();
    String lvTargetLang = tLang.getLanguageCode().substring(0, 2).toLowerCase();

    String lvShorText = text.length() > 10000 ? text.substring(0, 9999) + "…" : text;
    String prev = getFromCache(sLang, tLang, lvShorText);
    if (prev != null) {
      return prev;
    }

    // ----------------------------------------------------------------------
    Map<String, String> p = new TreeMap<String, String>();
    p.put("key", mvYandexKey);
    p.put("lang", lvSourceLang + '-' + lvTargetLang);
    p.put("text", lvShorText);

    YTPPostResponse response = requestTranslate(p); // WikiGet.post(GT_URL, p);

    XPathFactory xPathFactory = XPathFactory.newInstance();

    // XPath xPathCode = xPathFactory.newXPath();
    // String pathCode = "/Translation/@code";
    // String code = (String) xPathCode.evaluate(pathCode, new InputSource(new
    // StringReader(lvResponse)));

    switch (response.code) {
      case ERR_OK:
        break;
      case ERR_KEY_INVALID:
        return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_INVALID_KEY");
      case ERR_KEY_BLOCKED:
        return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_API_BLOCKED");
      case ERR_DAILY_REQ_LIMIT_EXCEEDED:
        return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_DAILY_LIMIT_DETECT");
      case ERR_DAILY_CHAR_LIMIT_EXCEEDED:
        return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_DAILY_LIMIT_VOLUME");
      case ERR_TEXT_TOO_LONG:
        return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_MAZ_SIZE");
      case ERR_UNPROCESSABLE_TEXT:
        return response.code
            + ": "
            + OStrings.getString("MT_ENGINE_YANDEX_TRANSLATION_NOT_POSSIBLE");
      case ERR_LANG_NOT_SUPPORTED:
        return response.code
            + ": "
            + OStrings.getString("MT_ENGINE_YANDEX_DIRECTION_NOT_SUPPORTED");
      default:
        break;
    }

    XPath xPathText = xPathFactory.newXPath();
    String pathText = "/Translation[@code='200']/text";

    String result =
        (String) xPathText.evaluate(pathText, new InputSource(new StringReader(response.response)));

    putToCache(sLang, tLang, lvShorText, result);
    return result;
  }
 @Override
 protected String getDisplayText(Language value) {
   return value == null ? null : value + " - " + value.getDisplayName();
 }