/** * Glues the sentences back to paragraph. * * <p>As sentences are returned by {@link #segment(String, List)} without spaces before and after * them, this method adds spaces if needed: * * <ul> * <li>For translation to Japanese does <b>not</b> add any spaces. <br> * A special exceptions are the Break SRX rules that break on space, i.e. before and after * patterns consist of spaces (they get trimmed to an empty string). For such rules all the * spaces are added * <li>For translation from Japanese adds one space * <li>For all other language combinations adds those spaces as were in the paragraph before. * </ul> * * @param sentences list of translated sentences * @param spaces information about spaces in original paragraph * @param brules rules that account to breaks * @return glued translated paragraph */ public static String glue( Language sourceLang, Language targetLang, List<String> sentences, List<StringBuffer> spaces, List<Rule> brules) { if (sentences.size() <= 0) return ""; StringBuffer res = new StringBuffer(); res.append(sentences.get(0)); for (int i = 1; i < sentences.size(); i++) { StringBuffer sp = new StringBuffer(); sp.append(spaces.get(2 * i - 1)); sp.append(spaces.get(2 * i)); if (CJK_LANGUAGES.contains(targetLang.getLanguageCode().toUpperCase(Locale.ENGLISH))) { Rule rule = brules.get(i - 1); char lastChar = res.charAt(res.length() - 1); if ((lastChar != '.') && (!PatternConsts.SPACY_REGEX.matcher(rule.getBeforebreak()).matches() || !PatternConsts.SPACY_REGEX.matcher(rule.getAfterbreak()).matches())) sp.setLength(0); } else if (CJK_LANGUAGES.contains(sourceLang.getLanguageCode().toUpperCase(Locale.ENGLISH)) && sp.length() == 0) sp.append(" "); res.append(sp); res.append(sentences.get(i)); } return res.toString(); }
/** * Get ParsedTuv from list of Tuv for specific language. * * <p>Language choosed by:<br> * - with the same language+country<br> * - if not exist, then with the same language but without country<br> * - if not exist, then with the same language with whatever country<br> */ protected ParsedTuv getTuvByLang(Language lang) { String langLanguage = lang.getLanguageCode(); String langCountry = lang.getCountryCode(); ParsedTuv tuvLC = null; // Tuv with the same language+country ParsedTuv tuvL = null; // Tuv with the same language only, without country ParsedTuv tuvLW = null; // Tuv with the same language+whatever country for (int i = 0; i < currentTu.tuvs.size(); i++) { ParsedTuv tuv = currentTu.tuvs.get(i); String tuvLang = tuv.lang; if (!langLanguage.regionMatches(true, 0, tuvLang, 0, 2)) { // language not equals - there is no sense to processing continue; } if (tuvLang.length() < 3) { // language only, without country tuvL = tuv; } else if (langCountry.regionMatches(true, 0, tuvLang, 3, 2)) { // the same country tuvLC = tuv; } else { // other country tuvLW = tuv; } } ParsedTuv bestTuv; if (tuvLC != null) { bestTuv = tuvLC; } else if (tuvL != null) { bestTuv = tuvL; } else { bestTuv = tuvLW; } return bestTuv; }
@Override protected String translate(Language sLang, Language tLang, String text) throws Exception { if (mvYandexKey == null) { return ""; } String lvSourceLang = sLang.getLanguageCode().substring(0, 2).toLowerCase(); String lvTargetLang = tLang.getLanguageCode().substring(0, 2).toLowerCase(); String lvShorText = text.length() > 10000 ? text.substring(0, 9999) + "…" : text; String prev = getFromCache(sLang, tLang, lvShorText); if (prev != null) { return prev; } // ---------------------------------------------------------------------- Map<String, String> p = new TreeMap<String, String>(); p.put("key", mvYandexKey); p.put("lang", lvSourceLang + '-' + lvTargetLang); p.put("text", lvShorText); YTPPostResponse response = requestTranslate(p); // WikiGet.post(GT_URL, p); XPathFactory xPathFactory = XPathFactory.newInstance(); // XPath xPathCode = xPathFactory.newXPath(); // String pathCode = "/Translation/@code"; // String code = (String) xPathCode.evaluate(pathCode, new InputSource(new // StringReader(lvResponse))); switch (response.code) { case ERR_OK: break; case ERR_KEY_INVALID: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_INVALID_KEY"); case ERR_KEY_BLOCKED: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_API_BLOCKED"); case ERR_DAILY_REQ_LIMIT_EXCEEDED: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_DAILY_LIMIT_DETECT"); case ERR_DAILY_CHAR_LIMIT_EXCEEDED: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_DAILY_LIMIT_VOLUME"); case ERR_TEXT_TOO_LONG: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_MAZ_SIZE"); case ERR_UNPROCESSABLE_TEXT: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_TRANSLATION_NOT_POSSIBLE"); case ERR_LANG_NOT_SUPPORTED: return response.code + ": " + OStrings.getString("MT_ENGINE_YANDEX_DIRECTION_NOT_SUPPORTED"); default: break; } XPath xPathText = xPathFactory.newXPath(); String pathText = "/Translation[@code='200']/text"; String result = (String) xPathText.evaluate(pathText, new InputSource(new StringReader(response.response))); putToCache(sLang, tLang, lvShorText, result); return result; }