/** * Parse the * * <p>elements * * @return a list of tokens from the dictionary's entry * @throws javax.xml.stream.XMLStreamException */ EntryToken procTransduction() throws XMLStreamException { ArrayList<Integer> lhs = new ArrayList<Integer>(); ArrayList<Integer> rhs = new ArrayList<Integer>(); skipUntil("", COMPILER_LEFT_ELEM); reader.next(); String name = ""; while (true) { if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_LEFT_ELEM)) { break; } if (reader.isStartElement()) { name = reader.getLocalName(); readString(lhs, name); reader.next(); } else if (reader.isCharacters()) { readString(lhs, ""); reader.next(); } else if (reader.isEndElement()) { reader.next(); } else { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "," + reader.getLocation().getColumnNumber() + "): unexpected type of event."); } } skipUntil(name, COMPILER_RIGHT_ELEM); reader.next(); while (true) { if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_RIGHT_ELEM)) { break; } if (reader.isStartElement()) { name = reader.getLocalName(); readString(rhs, name); reader.next(); } else if (reader.isCharacters()) { readString(rhs, ""); reader.next(); } else if (reader.isEndElement()) { reader.next(); } else { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "," + reader.getLocation().getColumnNumber() + "): unexpected type of event."); } } skipUntil(name, COMPILER_PAIR_ELEM); EntryToken e = new EntryToken(); e.setSingleTransduction(lhs, rhs); return e; }
/** * Parse the <f> elements * * @return a list of tokens from the dictionary's entry */ EntryToken procFlag() throws XMLStreamException { ArrayList<Integer> both_sides = new ArrayList<Integer>(); // String n = attrib(COMPILER_N_ATTR); // String v = attrib(COMPILER_VALUE_ATTR); String name = ""; reader.next(); while (true) { if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_FLAG_ELEM)) { break; } if (reader.isStartElement()) { name = reader.getLocalName(); readString(both_sides, name); reader.next(); } else if (reader.isCharacters()) { readString(both_sides, ""); reader.next(); } else if (reader.isEndElement()) { reader.next(); } else { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "," + reader.getLocation().getColumnNumber() + "): unexpected type of event."); } } /* * while (true) { * reader.next(); * int type = reader.getEventType(); * if (type == XMLStreamConstants.END_ELEMENT || type == XMLStreamConstants.START_ELEMENT) { * name = reader.getLocalName(); * * System.err.println("name = " + name); * } * if (name.equals(COMPILER_FLAG_ELEM)) { * break; * } * readString(both_sides, name); * } */ EntryToken e = new EntryToken(); if (direction.equals(COMPILER_RESTRICTION_LR_VAL)) { e.setSingleTransduction(new ArrayList<Integer>(), both_sides); } else { e.setSingleTransduction(both_sides, new ArrayList<Integer>()); } return e; }
/** * Parse the <par> elements * * @return a list of tokens from the dictionary's entry * @throws javax.xml.stream.XMLStreamException */ EntryToken procPar() throws XMLStreamException { String paradigm_name = attrib(COMPILER_N_ATTR); if (!paradigms.containsKey(paradigm_name)) { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "): Undefined paradigm '" + paradigm_name + "'."); } EntryToken e = new EntryToken(); e.setParadigm(paradigm_name); return e; }
/** * Parse the <re> elements * * @return a list of tokens from the dictionary's entry * @throws javax.xml.stream.XMLStreamException */ EntryToken procRegexp() throws XMLStreamException { reader.next(); StringBuffer re = new StringBuffer(); int start = reader.getTextStart(); int length = reader.getTextLength(); while (reader.isCharacters()) { start = reader.getTextStart(); length = reader.getTextLength(); re.append(reader.getTextCharacters(), start, length); reader.next(); } EntryToken et = new EntryToken(); et.setRegexp(re.toString()); return et; }
/** * Parse the <i> elements * * @return a list of tokens from the dictionary's entry * @throws javax.xml.stream.XMLStreamException */ EntryToken procIdentity() throws XMLStreamException { ArrayList<Integer> both_sides = new ArrayList<Integer>(); if (!(reader.isStartElement() && reader.isEndElement())) { // not an emoty node } String name = ""; reader.next(); while (true) { if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_IDENTITY_ELEM)) { break; } if (reader.isStartElement()) { name = reader.getLocalName(); readString(both_sides, name); reader.next(); } else if (reader.isCharacters()) { readString(both_sides, ""); reader.next(); } else if (reader.isEndElement()) { reader.next(); } else { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "," + reader.getLocation().getColumnNumber() + "): unexpected type of event."); } } /* * while (true) { * reader.next(); * int type = reader.getEventType(); * if (type == XMLStreamConstants.END_ELEMENT || type == XMLStreamConstants.START_ELEMENT) { * name = reader.getLocalName(); * } * if (name.equals(COMPILER_IDENTITY_ELEM)) { * break; * } * readString(both_sides, name); * } */ EntryToken e = new EntryToken(); e.setSingleTransduction(both_sides, both_sides); return e; }
/** * Insert a list of tokens into the paradigm / section being processed * * @param elements the list */ private void insertEntryTokens(ArrayList<EntryToken> elements) { if (DEBUG) System.err.println("insertEntryTokens( " + elements); if (!current_paradigm.equals("")) { // compilation of paradigms TransducerComp t = paradigms.get(current_paradigm); if (t == null) { t = new TransducerComp(); paradigms.put(current_paradigm, t); } Integer e = t.getInitial(); for (int i = 0, limit = elements.size(); i < limit; i++) { EntryToken entry = elements.get(i); if (entry.isParadigm()) { if (!paradigms.containsKey(entry.paradigmName)) { paradigms.put(entry.paradigmName, new TransducerComp()); } e = t.insertTransducer(e, paradigms.get(entry.paradigmName)); } else if (entry.isSingleTransduction()) { e = matchTransduction(entry.leftSide, entry.rightSide, e, t); } else if (entry.isRegexp()) { RegexpCompiler analyzer = new RegexpCompiler(); analyzer.initialize(alphabet); analyzer.compile(entry.regexp); t.setEpsilon_Tag(alphabet_cast00); e = t.insertTransducer(e, analyzer.getTransducer()); } else { throw new RuntimeException( "Error (" + reader.getLocation().getLineNumber() + "): Invalid entry token."); } } t.setFinal(e); } else { // compilation of the dictionary TransducerComp t; if (!sections.containsKey(current_section)) { t = new TransducerComp(); sections.put(current_section, t); } else { t = sections.get(current_section); } int e = t.getInitial(); for (int i = 0, limit = elements.size(); i < limit; i++) { EntryToken entry = elements.get(i); if (entry.isParadigm()) { final String paradigmName = entry.paradigmName; if (i == elements.size() - 1) { // paradigm sufix if (!suffix_paradigms.containsKey(current_section)) { suffix_paradigms.put(current_section, new HashMap<String, Integer>()); } if (suffix_paradigms.get(current_section).containsKey(paradigmName)) { t.linkStates(e, suffix_paradigms.get(current_section).get(paradigmName), 0); e = postsuffix_paradigms.get(current_section).get(paradigmName); } else { e = t.insertNewSingleTransduction(alphabet_cast00, e); suffix_paradigms.get(current_section).put(paradigmName, e); t.setEpsilon_Tag(0); e = t.insertTransducer(e, paradigms.get(paradigmName)); if (!postsuffix_paradigms.containsKey(current_section)) { postsuffix_paradigms.put(current_section, new HashMap<String, Integer>()); } postsuffix_paradigms.get(current_section).put(paradigmName, e); } } else if (i == 0) { // paradigm prefix if (!prefix_paradigms.containsKey(current_section)) { prefix_paradigms.put(current_section, new HashMap<String, Integer>()); } if (prefix_paradigms.get(current_section).containsKey(paradigmName)) { e = prefix_paradigms.get(current_section).get(paradigmName); } else { t.setEpsilon_Tag(0); e = t.insertTransducer(e, paradigms.get(paradigmName)); prefix_paradigms.get(current_section).put(paradigmName, e); } } else { // paradigm intermediate if (!paradigms.containsKey(paradigmName)) { paradigms.put(paradigmName, new TransducerComp()); } t.setEpsilon_Tag(0); e = t.insertTransducer(e, paradigms.get(paradigmName)); } } else if (entry.isRegexp()) { RegexpCompiler analyzer = new RegexpCompiler(); analyzer.initialize(alphabet); analyzer.compile(entry.regexp); t.setEpsilon_Tag(alphabet_cast00); e = t.insertTransducer(e, analyzer.getTransducer()); } else { e = matchTransduction(entry.leftSide, entry.rightSide, e, t); } } t.setFinal(e); } }