private void initParam() { lg.entering(TyPun.class.getName(), "initParam"); /* keep track of parameter’s values that will be used */ lg.config( String.format("Use typographic punctuation : %s", tc.do_UsePunctuation() ? "YES" : "NO")); lg.config(String.format("Use ligatures : %s", tc.do_UseLigatures() ? "YES" : "NO")); lg.config(String.format("Use old ligatures : %s", tc.do_UseOldLigatures() ? "YES" : "NO")); lg.config(String.format("Use French quotes : %s", tc.do_UseFrenchQuotes() ? "YES" : "NO")); lg.config(String.format("Use old style numbers : %s", tc.do_UseOldStyleNums() ? "YES" : "NO")); lg.config(String.format("Use French spacing : %s", tc.do_UseFrenchSpacing() ? "YES" : "NO")); lg.config(String.format("Use typographic dashes : %s", tc.do_UseTypoDash() ? "YES" : "NO")); /* left and right double quotes are currently defined at compile-time * but the code needs very few changes to make it dynamic. That way we * may use English or French quotes running the exact same code */ // ldquo = bUseFrenchQuotes ? "\u00ab" : "\u201c"; /* U+00ab « U+201c “ */ // rdquo = bUseFrenchQuotes ? "\u00bb" : "\u201d"; /* U+00bb » U+201d ” */ ldquo = tc.do_UseFrenchQuotes() ? "\u00ab" : "\u201c"; /* U+00ab « U+201c “ */ rdquo = tc.do_UseFrenchQuotes() ? "\u00bb" : "\u201d"; /* U+00bb » U+201d ” */ tydash = "\u2013"; /* tydash = "\u2014"; */ lg.exiting(TyPun.class.getName(), "initParam"); }
private String processLineSeq(String line) { /* should probably be left commented out (unless debugging) */ // lg.entering (getClass ().getName (), String.format ("processLineSeq (%s)", line)); /* keep track of line length so that we avoid overflows and/or * reallocations */ int len = line.length(); /* StringBuilder is a faster StringBuffer for single-threaded apps, we * define the capacity as length of line (+ a margin of 7 since * ligatures are multibytes characters) before processing which avoids * any unneeded realloc */ StringBuilder pline = new StringBuilder(len + 7); /* processed line */ /* For instance, the f_i ligature code point is U+fb01, which needs * 3 bytes to be written in UTF-8 (you may check with any hexdump * program that fi is indeed written as 0xef 0xac 0x81 so we are going to * need (3-2=)1 more byte. For the f_f_i ligature (ffi) whose codepoint is * U+fb03 and UTF-8 encoding is 0xef 0xac 0x83 we are using exactly the * same number of bytes (in UTF-8). Same principle applies to the * ellipsis, in ASCII encoding we need 3 bytes (one for each dot 0x2e), * in UTF-8 we also need 3 bytes to encode … / U+2026 i.e., 0xe2 0x80 * 0xa6 */ for (int index = 0; index < len; index++) { char c = line.charAt(index); if (bIsOpenTag) { /* if we are inside a tag we don’t want to change anything * otherwise we might end up with an unreadable hexadecimal * number (using ff ligature), an unknow tag name, … */ pline.append(c); /* TBD: we could add a test to check for malformed HTML tags * e.g., <xxx <yyy> zzz> */ if (c == '>') bIsOpenTag = false; } else { switch (c) { // case ',': /* to speed up processing a bit as we don’t need to */ // case ' ': /* pass every test on these glyphs */ // pline.append (c); // break; // ij and IJ ligatures deactivated since with some fonts // e.g., New Century Schoolbook, kerning is *really* bad // for instance in “ouija” or “mija” the ij is overlapping // the letter right after while being very far from the // letter before. // // case 'i': /* most frequent case first */ // // System.out.println ("we found a i"); // /* check first it is not the end */ // if (tc.do_UseLigatures () && index + 1 < len) { // /* then check following char for a ‘j’ */ // if (line.charAt (index + 1) == 'j') { // pline.append ('\u0133'); /* ij */ // ++index; // } else pline.append (c); // } else pline.append (c); // break; // case 'I': // // System.out.println ("we found a I"); // if (tc.do_UseLigatures () && index + 1 < len) { // if (line.charAt (index + 1) == 'J') { // pline.append ('\u0132'); /* IJ */ // ++index; // } else pline.append (c); // } else pline.append (c); // break; case 'a': case 'A': /* if user requires ligatures and we’re not at the end of * the line and we haven’t found ‘ae’, ‘AE’, ‘Ae’ or ‘aE’ yet… */ if (tc.do_UseLigatures() && index + 1 < len && !baePresent) { if (Character.toLowerCase(line.charAt(index + 1)) == 'e') { baePresent = true; } } /* Never make any change here. As we never know when ae * ligature is possible or not it is up to the user to do it */ pline.append(c); break; /* NB: we cannot do the same for œ because it is too common, * we find it almost everywhere e.g., «does», «doesn’t» */ case '\'': if (tc.do_UsePunctuation()) { pline.append('\u2019'); /* ’ */ } else pline.append(c); break; case '.': if (tc.do_UsePunctuation()) { if (index + 2 < len) { /* [ 0 .. (n-3) ] */ if (line.charAt(index + 1) == '.') { if (line.charAt(index + 2) == '.') { pline.append('\u2026'); /* … */ index += 2; } else { /* case only two points, assume user wanted * a (missing) third */ pline.append('\u2026'); /* … */ ++index; } } else pline.append(c); } else if (index + 1 < len) { /* [ (n-2) .. (n-1) ], (n-1) <=> EOL */ if (line.charAt(index + 1) == '.') { /* case only two points at end of line, assume * user wanted a (missing) third */ pline.append('\u2026'); /* … */ ++index; } else pline.append(c); } else pline.append(c); } else pline.append(c); break; case ';': case '?': /* case ':': /* temporarily deactivated extra spacing in front of a colon * ’cause it looks really bad in front of ‘:’ after name (HI subs) or when * displaying a time between hours and minutes */ if (tc.do_UseFrenchSpacing() && index > 0) if (line.charAt(index - 1) != ' ') pline.append('\u2009'); /* U+2009 is a thin space */ /* pline.append (' '); */ /* U+0020 is a normal (ASCII) space */ /* no change to the character itself */ pline.append(c); break; case '!': if (tc.do_UseFrenchSpacing() && index > 0) { char prevc = line.charAt(index - 1); if (prevc != ' ' && prevc != '?') pline.append('\u2009'); /* U+2009 is a thin space */ /* pline.append (' '); */ /* U+0020 is a normal (ASCII) space */ } /* no change to the character itself */ pline.append(c); break; case 'f': if (tc.do_UseLigatures()) { if (index + 2 < len) { if (line.charAt(index + 1) == 'f') { /* always check for ffi before fi */ switch (line.charAt(index + 2)) { // case 't': // pline.append ('\ue097'); /* */ // index += 2; // break; case 'i': pline.append('\ufb03'); /* ffi */ index += 2; break; case 'l': pline.append('\ufb04'); /* ffl */ index += 2; break; default: pline.append('\ufb00'); /* ff */ ++index; } } else { switch (line.charAt(index + 1)) { case 'i': pline.append('\ufb01'); /* fi */ ++index; break; case 'l': pline.append('\ufb02'); /* fl */ ++index; break; // case 't': // pline.append ('\ufb05'); /* ſt */ // ++index; // break; default: pline.append(c); } } } else if (index + 1 < len) { /* case two last chars of line */ switch (line.charAt(index + 1)) { case 'f': pline.append('\ufb00'); /* ff */ ++index; break; case 'i': pline.append('\ufb01'); /* fi */ ++index; break; case 'l': pline.append('\ufb02'); /* fl */ ++index; break; // case 't': // pline.append ('\ufb05'); /* ſt */ // ++index; // break; default: pline.append(c); } } else pline.append(c); /* case last char of line is ‘f’ */ } else pline.append(c); /* case not using ligatures */ break; case 'c': if (tc.do_UseOldLigatures()) { if (index + 1 < len) { switch (line.charAt(index + 1)) { case 'h': pline.append('\ue085'); /* */ ++index; break; case 'k': pline.append('\ue086'); /* */ ++index; break; case 't': pline.append('\ue087'); /* */ ++index; break; default: pline.append(c); } } else pline.append(c); } else pline.append(c); break; case 'T': if (tc.do_UseOldLigatures()) { if (index + 1 < len) { if (line.charAt(index + 1) == 'h') { pline.append('\ue062'); /* */ ++index; } else pline.append(c); } else pline.append(c); } else pline.append(c); break; case '"': /* if first char is a quote then make it a left quote * always, fix a bug when users write many left quotes and * only one right for a long quote that runs on several lines */ if (tc.do_UsePunctuation()) { if (index == 0) { pline.append(ldquo); } else pline.append(bIsOpenQuote ? rdquo : ldquo); } else pline.append(c); bIsOpenQuote = (index == 0) ? true : (!bIsOpenQuote); break; case '<': bIsOpenTag = true; pline.append(c); break; case '-': /* TBD: replace '--' by en-dash U+2013 and '---' by em-dash U+2014 */ if (tc.do_UseTypoDash()) { if (index + 1 < len) { char nextc = line.charAt(index + 1); switch (nextc) { case '-': /* or '\u002d' */ pline.append(tydash); ++index; break; case '<': /* HTML tag, '\u003c' */ case '\u2009': /* Unicode thin space */ case ' ': /* ' ' or '\u0020' */ pline.append(tydash); break; default: /* between words: always a simple dash unless it’s the first char */ pline.append(index == 0 ? tydash : c); } } else pline.append(tydash); /* case End Of Line */ } else pline.append(c); /* case no typographic dash */ break; case '`': /* TBD: replace `` by “ and '' by ” */ if (tc.do_UsePunctuation()) { pline.append('\u2018'); /* ‘ */ } else pline.append(c); break; case '0': if (tc.do_UseOldStyleNums()) { pline.append('\uf730'); /* */ } else pline.append(c); break; case '1': if (tc.do_UseOldStyleNums()) { pline.append('\uf731'); /* */ } else pline.append(c); break; case '2': if (tc.do_UseOldStyleNums()) { pline.append('\uf732'); /* */ } else pline.append(c); break; case '3': if (tc.do_UseOldStyleNums()) { pline.append('\uf733'); /* */ } else pline.append(c); break; case '4': if (tc.do_UseOldStyleNums()) { pline.append('\uf734'); /* */ } else pline.append(c); break; case '5': if (tc.do_UseOldStyleNums()) { pline.append('\uf735'); /* */ } else pline.append(c); break; case '6': if (tc.do_UseOldStyleNums()) { pline.append('\uf736'); /* */ } else pline.append(c); break; case '7': if (tc.do_UseOldStyleNums()) { pline.append('\uf737'); /* */ } else pline.append(c); break; case '8': if (tc.do_UseOldStyleNums()) { pline.append('\uf738'); /* */ } else pline.append(c); break; case '9': if (tc.do_UseOldStyleNums()) { pline.append('\uf739'); /* */ } else pline.append(c); break; default: pline.append(c); } } } /* should probably be left commented out (unless debugging) */ // lg.exiting (getClass ().getName (), String.format ("processLineSeq (%s)", pline)); // return reference of modified string return new String(pline); }