Java CharType Exemples

Langage de programmation: Java

Espace de nommage/Pack: com.hankcs.hanlp.dictionary.other

Class/Type: CharType

Exemples au hotexamples.com: 2

Java CharType - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de com.hankcs.hanlp.dictionary.other.CharType extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

get(2)

Méthodes fréquemment utilisées

get (2)

Associées

CoffeeBreakBean

SystemViewpoint

CascadeState

ColumnReorderLayer

ObjectReader

Rndm_bldr_wkr

UserUtils

TilesetPropertiesView

SvnFileUrlMappingImpl

RepairServiceForm

Related in langs

DisplayTorrentFluxLink (PHP)

cms_current_language (PHP)

CoroutineInstance (C#)

GUIElement (C#)

roundedIntSize (C++)

MyClient (C++)

HWND (Go)

NewClient (Go)

CommandLine (Python)

processingnid (Python)

Exemple #1

0

Afficher le fichier

Fichier : Segment.java Projet : zhoufeng/HanLP

/** * 快速原子分词，希望用这个方法替换掉原来缓慢的方法 * * @param charArray * @param start * @param end * @return */ protected static List<AtomNode> quickAtomSegment(char[] charArray, int start, int end) { List<AtomNode> atomNodeList = new LinkedList<AtomNode>(); int offsetAtom = start; int preType = CharType.get(charArray[offsetAtom]); int curType; while (++offsetAtom < end) { curType = CharType.get(charArray[offsetAtom]); if (curType != preType) { // 浮点数识别 if (charArray[offsetAtom] == '.' && preType == CharType.CT_NUM) { while (++offsetAtom < end) { curType = CharType.get(charArray[offsetAtom]); if (curType != CharType.CT_NUM) break; } } atomNodeList.add(new AtomNode(new String(charArray, start, offsetAtom - start), preType)); start = offsetAtom; } preType = curType; } if (offsetAtom == end) atomNodeList.add(new AtomNode(new String(charArray, start, offsetAtom - start), preType)); return atomNodeList; }

Exemple #2

0

Afficher le fichier

Fichier : Segment.java Projet : zhoufeng/HanLP

/** * 原子分词 * * @param charArray * @param start 从start开始（包含） * @param end 到end结束（不包含end） * @return 一个列表，代表从start到from的所有字构成的原子节点 */ protected static List<AtomNode> atomSegment(char[] charArray, int start, int end) { List<AtomNode> atomSegment = new ArrayList<AtomNode>(); int pCur = start, nCurType, nNextType; StringBuilder sb = new StringBuilder(); char c; int[] charTypeArray = new int[end - start]; // 生成对应单个汉字的字符类型数组 for (int i = 0; i < charTypeArray.length; ++i) { c = charArray[i + start]; charTypeArray[i] = CharType.get(c); if (c == '.' && i + start < (charArray.length - 1) && CharType.get(charArray[i + start + 1]) == CharType.CT_NUM) charTypeArray[i] = CharType.CT_NUM; else if (c == '.' && i + start < (charArray.length - 1) && charArray[i + start + 1] >= '0' && charArray[i + start + 1] <= '9') charTypeArray[i] = CharType.CT_SINGLE; else if (charTypeArray[i] == CharType.CT_LETTER) charTypeArray[i] = CharType.CT_SINGLE; } // 根据字符类型数组中的内容完成原子切割 while (pCur < end) { nCurType = charTypeArray[pCur - start]; if (nCurType == CharType.CT_CHINESE || nCurType == CharType.CT_INDEX || nCurType == CharType.CT_DELIMITER || nCurType == CharType.CT_OTHER) { String single = String.valueOf(charArray[pCur]); if (single.length() != 0) atomSegment.add(new AtomNode(single, nCurType)); pCur++; } // 如果是字符、数字或者后面跟随了数字的小数点“.”则一直取下去。 else if (pCur < end - 1 && ((nCurType == CharType.CT_SINGLE) || nCurType == CharType.CT_NUM)) { sb.delete(0, sb.length()); sb.append(charArray[pCur]); boolean reachEnd = true; while (pCur < end - 1) { nNextType = charTypeArray[++pCur - start]; if (nNextType == nCurType) sb.append(charArray[pCur]); else { reachEnd = false; break; } } atomSegment.add(new AtomNode(sb.toString(), nCurType)); if (reachEnd) pCur++; } // 对于所有其它情况 else { atomSegment.add(new AtomNode(charArray[pCur], nCurType)); pCur++; } } return atomSegment; }