/** * 转换输入汉字的首字母,多个汉字是只反回第一个汉字的首字母 * * @param chinese * @return */ public static String getFirstLetter(String chines) { chines = replaceString(chines); String pinyinName = ""; char[] nameChar = chines.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); // 如果传入的第一个不是汉字 则取第二个汉字的首字母 否则只反回第个汉字 if (nameChar[0] > 128) { try { String[] pys = PinyinHelper.toHanyuPinyinStringArray(nameChar[0], defaultFormat); if (pys != null) { char nchar = pys[0].charAt(0); pinyinName += nchar; } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else if (nameChar[1] > 128) { try { String[] pys = PinyinHelper.toHanyuPinyinStringArray(nameChar[1], defaultFormat); if (pys != null) { char nchar = pys[0].charAt(0); pinyinName += nchar; } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pinyinName += "#"; } return pinyinName; }
/** * 转换输入汉字的全拼(小写) 多音字只反回第一个拼音,拼音首字母大写其它小写 * * @param chines * @return */ public static String converterToPingYingHeadUppercase(String chines) { chines = replaceString(chines); StringBuffer pinyinName = new StringBuffer(); char[] nameChar = chines.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < nameChar.length; i++) { if (nameChar[i] > 128) { try { String[] pys = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat); String headPy = pys == null ? nameChar[i] + "" : pys[0]; pinyinName.append( conversionHeadUppercase(headPy.charAt(0)) + headPy.substring(1, headPy.length()) + "-"); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pinyinName.append(nameChar[i]); } } return pinyinName.toString(); }
/** * 将汉字转换为全拼 * * @param src * @return String */ public static String getPinYin(String src) { char[] t1 = null; t1 = src.toCharArray(); // System.out.println(t1.length); String[] t2 = new String[t1.length]; // System.out.println(t2.length); // 设置汉字拼音输出的格式 HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat(); t3.setCaseType(HanyuPinyinCaseType.LOWERCASE); t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE); t3.setVCharType(HanyuPinyinVCharType.WITH_V); String t4 = ""; int t0 = t1.length; try { for (int i = 0; i < t0; i++) { // 判断是否为汉字字符 // System.out.println(t1[i]); if (Character.toString(t1[i]).matches("[\\u4E00-\\u9FA5]+")) { t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3); // 将汉字的几种全拼都存到t2数组中 t4 += t2[0]; // 取出该汉字全拼的第一种读音并连接到字符串t4后 } else { // 如果不是汉字字符,直接取出字符并连接到字符串t4后 t4 += Character.toString(t1[i]); } } } catch (BadHanyuPinyinOutputFormatCombination e) { // TODO Auto-generated catch block e.printStackTrace(); } return t4; }
public String toPinYin(String name) { HanyuPinyinOutputFormat hanyuPinyin = new HanyuPinyinOutputFormat(); /** *大写 */ hanyuPinyin.setCaseType(HanyuPinyinCaseType.UPPERCASE); /** *无音调 */ hanyuPinyin.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); /** *'¨¹' is "v" */ hanyuPinyin.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); String code = ""; String[] pinyinArray = null; for (int i = 0; i < name.length(); i++) { char hanzi = name.charAt(i); try { /** *是否在汉字范围内 */ if (hanzi >= 0x4e00 && hanzi <= 0x9fa5) { pinyinArray = PinyinHelper.toHanyuPinyinStringArray(hanzi, hanyuPinyin); code += pinyinArray[0].substring(0, 1); } else if (hanzi >= 0x61 && hanzi <= 0x7a) { code += String.valueOf((char) (hanzi - 32)); } else if (hanzi >= 0x41 && hanzi <= 0x5a) { code += hanzi; } else { code += "#"; } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } return code; }
@Override protected ArrayList<ContactInfo> doInBackground(Cursor... params) { Cursor cursor = params[0]; ArrayList<ContactInfo> ciList = new ArrayList<ContactInfo>(); if (cursor != null && cursor.getCount() > 0) { try { cursor.moveToFirst(); for (int i = 0; i < cursor.getCount(); i++) { cursor.moveToPosition(i); String name = cursor.getString(1).replace(" ", ""); String number = cursor.getString(2); long contactId = cursor.getLong(4); ContactInfo contactInfo = new ContactInfo(); contactInfo.setId(contactId); contactInfo.setPhoneNum(number); contactInfo.setName(name); if (contactInfo.getName() == null) { contactInfo.setName(contactInfo.getPhoneNum()); } contactInfo.setFormattedNumber(getNameNum(contactInfo.getName() + "")); contactInfo.setPinyin(ToPinYin.getPinYin(contactInfo.getName() + "")); ciList.add(contactInfo); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } return ciList; }
/** * 汉字转换为汉语拼音首字母,英文字符不�? * * @author gzs * @param chines 汉字 * @return 拼音 */ public static String getPinyinFrt(String chines) { String pinyinName = ""; // 转化为字�? char[] nameChar = chines.toCharArray(); // 汉语拼音格式输出�? HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); // 输出设置,大小�?音标方式�? defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < nameChar.length; i++) { // 如果是中�? if (nameChar[i] > 128) { try { String[] temp = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat); if (temp != null && temp.length > 0 && temp[0].length() > 0) { pinyinName += temp[0].charAt(0); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { // 为英文字�? pinyinName += nameChar[i]; } } return pinyinName; }
/** * 得到 全拼 * * @param src * @return */ public static String getPingYin(String src) { char[] t1 = null; t1 = src.toCharArray(); String[] t2 = new String[t1.length]; HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat(); t3.setCaseType(HanyuPinyinCaseType.LOWERCASE); t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE); t3.setVCharType(HanyuPinyinVCharType.WITH_V); String t4 = ""; int t0 = t1.length; try { for (int i = 0; i < t0; i++) { // 判断是否为汉字字符 if (Character.toString(t1[i]).matches("[\\u4E00-\\u9FA5]+")) { t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3); t4 += t2[0]; } else { t4 += Character.toString(t1[i]); } } return t4; } catch (BadHanyuPinyinOutputFormatCombination e1) { e1.printStackTrace(); } return t4; }
/** * 分词过滤。<br> * 该方法在上层调用中被循环调用,直到该方法返回false */ public final boolean incrementToken() throws IOException { while (true) { if (this.curTermBuffer == null) { // 开始处理或上一输入词元已被处理完成 if (!this.input.incrementToken()) { // 获取下一词元输入 return false; // 没有后继词元输入,处理完成,返回false,结束上层调用 } // 缓存词元输入 this.curTermBuffer = this.termAtt.buffer().clone(); this.curTermLength = this.termAtt.length(); this.inputTermPosInc = this.posIncrAtt.getPositionIncrement(); } // 处理原输入词元 if ((this.isOutChinese) && (!this.hasCurOut) && (this.termIte == null)) { // 准许输出原中文词元且当前没有输出原输入词元且还没有处理拼音结果集 this.hasCurOut = true; // 标记以保证下次循环不会输出 // 写入原输入词元 this.termAtt.copyBuffer(this.curTermBuffer, 0, this.curTermLength); this.posIncrAtt.setPositionIncrement(this.inputTermPosInc); return true; // 继续 } String chinese = this.termAtt.toString(); // 拼音处理 if (chineseCharCount(chinese) >= this._minTermLength) { // 有中文且符合长度限制 try { // 输出拼音(缩写或全拼) Collection<String> terms = this.firstChar ? getPyShort(chinese) : GetPyString(chinese); if (terms != null) { this.termIte = terms.iterator(); this.termIteRead = false; } } catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) { badHanyuPinyinOutputFormatCombination.printStackTrace(); } } if (this.termIte != null) { if (this.termIte.hasNext()) { // 有拼音结果集且未处理完成 String pinyin = this.termIte.next(); this.termAtt.copyBuffer(pinyin.toCharArray(), 0, pinyin.length()); if (this.isOutChinese) { this.posIncrAtt.setPositionIncrement(0); } else { this.posIncrAtt.setPositionIncrement(this.termIteRead ? 0 : this.inputTermPosInc); } this.typeAtt.setType(this.firstChar ? "short_pinyin" : "pinyin"); this.termIteRead = true; return true; } } // 没有中文或转换拼音失败,不用处理, // 清理缓存,下次取新词元 this.curTermBuffer = null; this.termIte = null; this.hasCurOut = false; // 下次取词元后输出原词元(如果开关也准许) } }
// 此处异常一定要捕获、、、不然在spring的service内抛出异常不完成事务性。。除非也catch。。不过外一那。。给他扼杀在摇篮里 public static String[] Name2PinYinArray(String name) { try { String value = PinYinUtil.Name2Pinyin(name); if (value == null) { return null; } return value.split(";"); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } return null; }
public String getCharacterPinYin(char c) { HanyuPinyinOutputFormat format = null; format = new HanyuPinyinOutputFormat(); format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); String[] pinyin = null; try { pinyin = PinyinHelper.toHanyuPinyinStringArray(c, format); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } // 如果c不是汉字,toHanyuPinyinStringArray会返回null if (pinyin == null) return null; // 只取一个发音,如果是多音字,仅取第一个发音 return pinyin[0]; }
private String getNameNum(String name) { try { if (name != null && name.length() != 0) { int len = name.length(); char[] nums = new char[len]; for (int i = 0; i < len; i++) { String tmp = name.substring(i); nums[i] = getOneNumFromAlpha(ToPinYin.getPinYin(tmp).toLowerCase().charAt(0)); } return new String(nums); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } return null; }
public static String getFirstSpellByString(String string) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); char[] chars = string.toCharArray(); StringBuffer stringBuffer = new StringBuffer(); for (char c : chars) { try { String[] strings = PinyinHelper.toHanyuPinyinStringArray(c, format); if (strings != null && strings.length > 0) { stringBuffer.append(strings[0].charAt(0)); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } return stringBuffer.toString(); }
/** * 获取指定汉字的拼音数组<br> * 注:对声调支持不是很好,有些会显示乱码,例如“很” * * @param ch 汉字字符 * @param isShowMark 是否显示声调 * @return 返回该汉字拼音字符串数组,非汉字返回null */ public static String[] getPinyin(char ch, boolean isShowMark) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); // 为true则设置输出格式带声调 if (isShowMark) { format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); } else { format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); } String[] pinyins = null; try { pinyins = PinyinHelper.toHanyuPinyinStringArray(ch, format); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } return null == pinyins ? null : pinyins; }
/** * 汉字转换位汉语拼音,英文字符不变 * * @param chines 汉字 * @return 拼音 */ public static String converterToSpell(String chines) { String pinyinName = ""; char[] nameChar = chines.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < nameChar.length; i++) { if (nameChar[i] > 128) { try { pinyinName += PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat)[0]; } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pinyinName += nameChar[i]; } } return pinyinName; }
private static String getFullSpell(String chinese) { StringBuffer pybf = new StringBuffer(); char[] arr = chinese.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < arr.length; i++) { if (arr[i] > 128) { try { pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pybf.append(arr[i]); } } return pybf.toString(); }
/** * 获取指定字符串的拼音字符串<br> * 注:对声调支持不是很好,有些会显示乱码,例如“很” * * @param str 字符串 * @param isShowMark 是否显示声调 * @return 返回拼音字符串,非汉字则照原样输出 */ public static String getPinyins(String str, boolean isShowMark) { StringBuffer pinyinStr = new StringBuffer(); HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); // 为true则设置输出格式带声调 if (isShowMark) { format.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); } else { format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); } for (int i = 0; i < str.length(); i++) { try { String[] pinyins = PinyinHelper.toHanyuPinyinStringArray(str.charAt(i), format); if (null == pinyins) pinyinStr.append(str.charAt(i)); else pinyinStr.append(pinyins[0]); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } return pinyinStr.toString(); }
/** * 将字符串中的中文转化为拼音,其他字符不变 * * @param inputString * @return */ public static String getPingYin(String inputString) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.setCaseType(HanyuPinyinCaseType.LOWERCASE); format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); format.setVCharType(HanyuPinyinVCharType.WITH_V); char[] input = inputString.trim().toCharArray(); String output = ""; try { for (int i = 0; i < input.length; i++) { if (java.lang.Character.toString(input[i]).matches("[\\u4E00-\\u9FA5]+")) { String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i], format); output += temp[0]; } else output += java.lang.Character.toString(input[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } return output; }
private static String getFirstSpell(String chinese) { StringBuffer pybf = new StringBuffer(); char[] arr = chinese.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < arr.length; i++) { if (arr[i] > 128) { try { String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat); if (temp != null) { pybf.append(temp[0].charAt(0)); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pybf.append(arr[i]); } } return pybf.toString().replaceAll("\\W", "").trim(); }
/** * 获取拼音集合 * * @author wyh * @param src * @return Set<String> */ public static Set<String> getPinyin(String src) { if (src != null && !src.trim().equalsIgnoreCase("")) { char[] srcChar; srcChar = src.toCharArray(); // 汉语拼音格式输出类 HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat(); // 输出设置,大小写,音标方式等 hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); String[][] temp = new String[src.length()][]; for (int i = 0; i < srcChar.length; i++) { char c = srcChar[i]; // 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z) if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) { try { temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i], hanYuPinOutputFormat); } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else if (((int) c >= 65 && (int) c <= 90) || ((int) c >= 97 && (int) c <= 122) || (int) c <= '9' && (int) c >= '0') { temp[i] = new String[] {String.valueOf(srcChar[i])}; } else { temp[i] = new String[] {""}; } } String[] pingyinArray = Exchange(temp); Set<String> pinyinSet = new HashSet<String>(); for (int i = 0; i < pingyinArray.length; i++) { pinyinSet.add(pingyinArray[i]); } return pinyinSet; } return null; }
/** * 反回输入字符串的所有首字母(小写)并去除非汉字 * * @param chines * @return */ public static String converterToAllFirstSpellsLowercase(String chines) { chines = replaceString(chines); StringBuffer pinyinName = new StringBuffer(); char[] nameChar = chines.toCharArray(); HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); for (int i = 0; i < nameChar.length; i++) { if (nameChar[i] > 128) { try { String[] pys = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat); if (pys != null) { char nchar = pys[0].charAt(0); pinyinName.append(nchar); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } } else { pinyinName.append(nameChar[i]); } } return pinyinName.toString(); }
@Override public final boolean incrementToken() throws IOException { clearAttributes(); if (!done) { done = true; int upto = 0; char[] buffer = termAtt.buffer(); while (true) { final int length = input.read(buffer, upto, buffer.length - upto); if (length == -1) break; upto += length; if (upto == buffer.length) buffer = termAtt.resizeBuffer(1 + buffer.length); } termAtt.setLength(upto); String str = termAtt.toString(); termAtt.setEmpty(); StringBuilder stringBuilder = new StringBuilder(); StringBuilder firstLetters = new StringBuilder(); for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); if (c < 128) { stringBuilder.append(c); } else { try { String[] strs = PinyinHelper.toHanyuPinyinStringArray(c, format); if (strs != null) { // get first result by default String first_value = strs[0]; // TODO more than one pinyin stringBuilder.append(first_value); if (this.padding_char.length() > 0) { stringBuilder.append(this.padding_char); } firstLetters.append(first_value.charAt(0)); } } catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) { badHanyuPinyinOutputFormatCombination.printStackTrace(); } } } // let's join them if (first_letter.equals("prefix")) { termAtt.append(firstLetters.toString()); if (this.padding_char.length() > 0) { termAtt.append(this.padding_char); // TODO splitter } termAtt.append(stringBuilder.toString()); } else if (first_letter.equals("append")) { termAtt.append(stringBuilder.toString()); if (this.padding_char.length() > 0) { if (!stringBuilder.toString().endsWith(this.padding_char)) { termAtt.append(this.padding_char); } } termAtt.append(firstLetters.toString()); } else if (first_letter.equals("none")) { termAtt.append(stringBuilder.toString()); } else if (first_letter.equals("only")) { termAtt.append(firstLetters.toString()); } finalOffset = correctOffset(upto); offsetAtt.setOffset(correctOffset(0), finalOffset); return true; } return false; }