Beispiel #1
0
  /**
   * Parse the
   *
   * <p>elements
   *
   * @return a list of tokens from the dictionary's entry
   * @throws javax.xml.stream.XMLStreamException
   */
  EntryToken procTransduction() throws XMLStreamException {
    ArrayList<Integer> lhs = new ArrayList<Integer>();
    ArrayList<Integer> rhs = new ArrayList<Integer>();
    skipUntil("", COMPILER_LEFT_ELEM);
    reader.next();
    String name = "";
    while (true) {
      if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_LEFT_ELEM)) {
        break;
      }
      if (reader.isStartElement()) {
        name = reader.getLocalName();
        readString(lhs, name);
        reader.next();
      } else if (reader.isCharacters()) {
        readString(lhs, "");
        reader.next();
      } else if (reader.isEndElement()) {
        reader.next();
      } else {
        throw new RuntimeException(
            "Error ("
                + reader.getLocation().getLineNumber()
                + ","
                + reader.getLocation().getColumnNumber()
                + "): unexpected type of event.");
      }
    }
    skipUntil(name, COMPILER_RIGHT_ELEM);
    reader.next();
    while (true) {
      if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_RIGHT_ELEM)) {
        break;
      }
      if (reader.isStartElement()) {
        name = reader.getLocalName();
        readString(rhs, name);
        reader.next();
      } else if (reader.isCharacters()) {
        readString(rhs, "");
        reader.next();
      } else if (reader.isEndElement()) {
        reader.next();
      } else {
        throw new RuntimeException(
            "Error ("
                + reader.getLocation().getLineNumber()
                + ","
                + reader.getLocation().getColumnNumber()
                + "): unexpected type of event.");
      }
    }

    skipUntil(name, COMPILER_PAIR_ELEM);
    EntryToken e = new EntryToken();
    e.setSingleTransduction(lhs, rhs);
    return e;
  }
Beispiel #2
0
  /**
   * Parse the <f> elements
   *
   * @return a list of tokens from the dictionary's entry
   */
  EntryToken procFlag() throws XMLStreamException {
    ArrayList<Integer> both_sides = new ArrayList<Integer>();
    // String n = attrib(COMPILER_N_ATTR);
    // String v = attrib(COMPILER_VALUE_ATTR);
    String name = "";
    reader.next();
    while (true) {
      if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_FLAG_ELEM)) {
        break;
      }
      if (reader.isStartElement()) {
        name = reader.getLocalName();
        readString(both_sides, name);
        reader.next();
      } else if (reader.isCharacters()) {
        readString(both_sides, "");
        reader.next();
      } else if (reader.isEndElement()) {
        reader.next();
      } else {
        throw new RuntimeException(
            "Error ("
                + reader.getLocation().getLineNumber()
                + ","
                + reader.getLocation().getColumnNumber()
                + "): unexpected type of event.");
      }
    }

    /*
     * while (true) {
     * reader.next();
     * int type = reader.getEventType();
     * if (type == XMLStreamConstants.END_ELEMENT || type == XMLStreamConstants.START_ELEMENT) {
     * name = reader.getLocalName();
     *
     * System.err.println("name = " + name);
     * }
     * if (name.equals(COMPILER_FLAG_ELEM)) {
     * break;
     * }
     * readString(both_sides, name);
     * }
     */
    EntryToken e = new EntryToken();

    if (direction.equals(COMPILER_RESTRICTION_LR_VAL)) {
      e.setSingleTransduction(new ArrayList<Integer>(), both_sides);
    } else {
      e.setSingleTransduction(both_sides, new ArrayList<Integer>());
    }
    return e;
  }
Beispiel #3
0
 /**
  * Parse the <par> elements
  *
  * @return a list of tokens from the dictionary's entry
  * @throws javax.xml.stream.XMLStreamException
  */
 EntryToken procPar() throws XMLStreamException {
   String paradigm_name = attrib(COMPILER_N_ATTR);
   if (!paradigms.containsKey(paradigm_name)) {
     throw new RuntimeException(
         "Error ("
             + reader.getLocation().getLineNumber()
             + "): Undefined paradigm '"
             + paradigm_name
             + "'.");
   }
   EntryToken e = new EntryToken();
   e.setParadigm(paradigm_name);
   return e;
 }
Beispiel #4
0
  /**
   * Parse the <re> elements
   *
   * @return a list of tokens from the dictionary's entry
   * @throws javax.xml.stream.XMLStreamException
   */
  EntryToken procRegexp() throws XMLStreamException {

    reader.next();
    StringBuffer re = new StringBuffer();
    int start = reader.getTextStart();
    int length = reader.getTextLength();
    while (reader.isCharacters()) {
      start = reader.getTextStart();
      length = reader.getTextLength();
      re.append(reader.getTextCharacters(), start, length);
      reader.next();
    }
    EntryToken et = new EntryToken();
    et.setRegexp(re.toString());
    return et;
  }
Beispiel #5
0
 /**
  * Parse the <i> elements
  *
  * @return a list of tokens from the dictionary's entry
  * @throws javax.xml.stream.XMLStreamException
  */
 EntryToken procIdentity() throws XMLStreamException {
   ArrayList<Integer> both_sides = new ArrayList<Integer>();
   if (!(reader.isStartElement() && reader.isEndElement())) {
     // not an emoty node
   }
   String name = "";
   reader.next();
   while (true) {
     if (reader.isEndElement() && reader.getLocalName().equals(COMPILER_IDENTITY_ELEM)) {
       break;
     }
     if (reader.isStartElement()) {
       name = reader.getLocalName();
       readString(both_sides, name);
       reader.next();
     } else if (reader.isCharacters()) {
       readString(both_sides, "");
       reader.next();
     } else if (reader.isEndElement()) {
       reader.next();
     } else {
       throw new RuntimeException(
           "Error ("
               + reader.getLocation().getLineNumber()
               + ","
               + reader.getLocation().getColumnNumber()
               + "): unexpected type of event.");
     }
   }
   /*
    * while (true) {
    * reader.next();
    * int type = reader.getEventType();
    * if (type == XMLStreamConstants.END_ELEMENT || type == XMLStreamConstants.START_ELEMENT) {
    * name = reader.getLocalName();
    * }
    * if (name.equals(COMPILER_IDENTITY_ELEM)) {
    * break;
    * }
    * readString(both_sides, name);
    * }
    */
   EntryToken e = new EntryToken();
   e.setSingleTransduction(both_sides, both_sides);
   return e;
 }
Beispiel #6
0
  /**
   * Insert a list of tokens into the paradigm / section being processed
   *
   * @param elements the list
   */
  private void insertEntryTokens(ArrayList<EntryToken> elements) {

    if (DEBUG) System.err.println("insertEntryTokens( " + elements);
    if (!current_paradigm.equals("")) {
      // compilation of paradigms
      TransducerComp t = paradigms.get(current_paradigm);
      if (t == null) {
        t = new TransducerComp();
        paradigms.put(current_paradigm, t);
      }

      Integer e = t.getInitial();

      for (int i = 0, limit = elements.size(); i < limit; i++) {
        EntryToken entry = elements.get(i);

        if (entry.isParadigm()) {
          if (!paradigms.containsKey(entry.paradigmName)) {
            paradigms.put(entry.paradigmName, new TransducerComp());
          }
          e = t.insertTransducer(e, paradigms.get(entry.paradigmName));
        } else if (entry.isSingleTransduction()) {
          e = matchTransduction(entry.leftSide, entry.rightSide, e, t);
        } else if (entry.isRegexp()) {
          RegexpCompiler analyzer = new RegexpCompiler();
          analyzer.initialize(alphabet);
          analyzer.compile(entry.regexp);
          t.setEpsilon_Tag(alphabet_cast00);
          e = t.insertTransducer(e, analyzer.getTransducer());
        } else {
          throw new RuntimeException(
              "Error (" + reader.getLocation().getLineNumber() + "): Invalid entry token.");
        }
      }
      t.setFinal(e);
    } else {
      // compilation of the dictionary
      TransducerComp t;
      if (!sections.containsKey(current_section)) {
        t = new TransducerComp();
        sections.put(current_section, t);
      } else {
        t = sections.get(current_section);
      }
      int e = t.getInitial();

      for (int i = 0, limit = elements.size(); i < limit; i++) {
        EntryToken entry = elements.get(i);
        if (entry.isParadigm()) {
          final String paradigmName = entry.paradigmName;
          if (i == elements.size() - 1) {
            // paradigm sufix
            if (!suffix_paradigms.containsKey(current_section)) {
              suffix_paradigms.put(current_section, new HashMap<String, Integer>());
            }
            if (suffix_paradigms.get(current_section).containsKey(paradigmName)) {
              t.linkStates(e, suffix_paradigms.get(current_section).get(paradigmName), 0);
              e = postsuffix_paradigms.get(current_section).get(paradigmName);
            } else {
              e = t.insertNewSingleTransduction(alphabet_cast00, e);
              suffix_paradigms.get(current_section).put(paradigmName, e);
              t.setEpsilon_Tag(0);
              e = t.insertTransducer(e, paradigms.get(paradigmName));
              if (!postsuffix_paradigms.containsKey(current_section)) {
                postsuffix_paradigms.put(current_section, new HashMap<String, Integer>());
              }
              postsuffix_paradigms.get(current_section).put(paradigmName, e);
            }
          } else if (i == 0) {
            // paradigm prefix
            if (!prefix_paradigms.containsKey(current_section)) {
              prefix_paradigms.put(current_section, new HashMap<String, Integer>());
            }
            if (prefix_paradigms.get(current_section).containsKey(paradigmName)) {
              e = prefix_paradigms.get(current_section).get(paradigmName);
            } else {
              t.setEpsilon_Tag(0);
              e = t.insertTransducer(e, paradigms.get(paradigmName));
              prefix_paradigms.get(current_section).put(paradigmName, e);
            }
          } else {
            // paradigm intermediate
            if (!paradigms.containsKey(paradigmName)) {
              paradigms.put(paradigmName, new TransducerComp());
            }
            t.setEpsilon_Tag(0);
            e = t.insertTransducer(e, paradigms.get(paradigmName));
          }
        } else if (entry.isRegexp()) {
          RegexpCompiler analyzer = new RegexpCompiler();
          analyzer.initialize(alphabet);
          analyzer.compile(entry.regexp);
          t.setEpsilon_Tag(alphabet_cast00);
          e = t.insertTransducer(e, analyzer.getTransducer());
        } else {
          e = matchTransduction(entry.leftSide, entry.rightSide, e, t);
        }
      }
      t.setFinal(e);
    }
  }