/** Quasi-XML for humans */ public String toString() { return "<GC valid=" + valid + " pretty=\"" + UnicodeUtils.unicodeStringToPrettyString(getNfthdl()) + "\"/>"; }
/** Returns NFTHDL-decomposed Unicode representing this grapheme cluster. */ private void setNfthdl(String nfthdl) { if (debug) { System.out.println("debug: GC is " + UnicodeUtils.unicodeStringToPrettyString(nfthdl)); } this.nfthdl = nfthdl; ThdlDebug.verify(nfthdl.length() > 0); // TODO(dchandler): assert only if (nfthdl.length() < 1) valid = false; valid = validGcRegex.matcher(nfthdl).matches(); }
/** * Returns EWTS that is valid but not beautiful. It's better suited for consumption by computer * programs than by humans, though it'll do in a pinch. (Humans like to see [rnams] instead of * [r+namasa].) * * @return null if this grapheme cluster has no valid EWTS representation or valid-but-ugly EWTS * otherwise */ public StringBuffer getEwtsForComputers() { if (!valid) { return null; } StringBuffer sb = new StringBuffer(); // We use ch after the loop. Initialization is not really // needed; it's just to avoid compiler errors. char ch = 'X'; boolean seenVowel = false; String lastEwts = ""; boolean added_aVOWEL = false; for (int i = 0; i < nfthdl.length(); i++) { ch = nfthdl.charAt(i); String ewts = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(ch); if (i + 1 < nfthdl.length()) { // lookahead // Even computers want to see kI because the spec // isn't (or at least hasn't always been) crystal // clear that kA+i is equivalent to kI. if (('\u0f55' == ch || '\u0fa5' == ch) && '\u0f39' == nfthdl.charAt(i + 1)) { ++i; ewts = "f"; // TODO(dchandler): hard-coded EWTS } else if (('\u0f56' == ch || '\u0fa6' == ch) && '\u0f39' == nfthdl.charAt(i + 1)) { ++i; ewts = "v"; // TODO(dchandler): hard-coded EWTS } else if ('\u0f71' == ch && '\u0f72' == nfthdl.charAt(i + 1)) { ++i; ewts = THDLWylieConstants.I_VOWEL; // NOTE: we could normalize to 0f73 and 0f75 when // possible in NFTHDL. That's closer to EWTS and // would avoid these two special cases. } else if ('\u0f71' == ch && '\u0f74' == nfthdl.charAt(i + 1)) { ++i; ewts = THDLWylieConstants.U_VOWEL; } } if (null == ewts && UnicodeUtils.isInTibetanRange(ch)) { return null; } if (UnicodeUtils.isSubjoinedConsonant(ch) || (seenVowel && isVowel(ch))) sb.append(THDLWylieConstants.WYLIE_SANSKRIT_STACKING_KEY); if (isWowelRequiringPrecedingVowel(ch) && !seenVowel) { if (!added_aVOWEL) { added_aVOWEL = true; sb.append(THDLWylieConstants.WYLIE_aVOWEL); // paM, no pM } } if (isVowel(ch)) { seenVowel = true; if (lastEwts == "a") { sb.deleteCharAt(sb.length() - 1); } } sb.append(ewts); lastEwts = ewts; } if ((UnicodeUtils.isNonSubjoinedConsonant(ch) || UnicodeUtils.isSubjoinedConsonant(ch) || '\u0f39' == ch) && '\u0f68' != ch) { ThdlDebug.verify(!added_aVOWEL); sb.append(THDLWylieConstants.WYLIE_aVOWEL); } return sb; }