Ejemplo n.º 1
0
  /**
   * Construct symbol pairs by align leftSide side of both parts and insert them into a transducer
   *
   * @param pi leftSide part of the transduction
   * @param pd right part of the transduction
   * @param state the state from wich insert the new transduction
   * @param t the transducer
   * @return the last state of the inserted transduction
   */
  int matchTransduction(ArrayList<Integer> pi, ArrayList<Integer> pd, int state, TransducerComp t) {
    int izqda, dcha, limizqda, limdcha;
    if (direction.equals(COMPILER_RESTRICTION_LR_VAL)) {
      izqda = 0;
      dcha = 0;
      limizqda = pi.size();
      limdcha = pd.size();

      if (pi.size() == 0 && pd.size() == 0) {
        if (DEBUG) System.err.println("e = " + t.toString());
        state = t.insertNewSingleTransduction(alphabet_cast00, state);
      } else {
        HashSet<Integer> acx_map_ptr = null;
        int rsymbol = 0;

        while (true) {
          int etiqueta;
          if (izqda == limizqda && dcha == limdcha) {
            break;
          } else if (izqda == limizqda) {
            etiqueta = alphabet.cast(0, pd.get(dcha));
            dcha++;
          } else if (dcha == limdcha) {
            Integer pi_izqda = pi.get(izqda);
            etiqueta = alphabet.cast(pi_izqda, 0);
            acx_map_ptr = acx_map.get(pi_izqda); // perhaps null
            rsymbol = 0;
            izqda++;
          } else {
            Integer pi_izqda = pi.get(izqda);
            Integer pd_dcha = pd.get(dcha);
            etiqueta = alphabet.cast(pi_izqda, pd_dcha);
            acx_map_ptr = acx_map.get(pi_izqda); // perhaps null
            rsymbol = pd_dcha;
            izqda++;
            dcha++;
          }

          int nuevo_estado = t.insertSingleTransduction(etiqueta, state);
          if (acx_map_ptr != null) {
            for (Integer integer : acx_map_ptr) {
              t.linkStates(state, nuevo_estado, alphabet.cast(integer, rsymbol));
            }
          }
          state = nuevo_estado;
        }
      }
      return state;

    } else {
      izqda = 0;
      dcha = 0;
      limizqda = pd.size();
      limdcha = pi.size();

      if (pi.size() == 0 && pd.size() == 0) {
        state = t.insertNewSingleTransduction(alphabet_cast00, state);
      } else {
        HashSet<Integer> acx_map_ptr = null;
        int rsymbol = 0;

        while (true) {
          int etiqueta;
          if (izqda == limizqda && dcha == limdcha) {
            break;
          } else if (izqda == limizqda) {
            etiqueta = alphabet.cast(0, pi.get(dcha));
            dcha++;
          } else if (dcha == limdcha) {
            Integer pd_izqda = pd.get(izqda);
            etiqueta = alphabet.cast(pd_izqda, 0);
            acx_map_ptr = acx_map.get(pd_izqda); // perhaps null
            rsymbol = 0;
            izqda++;
          } else {
            Integer pd_izqda = pd.get(izqda);
            Integer pi_dcha = pi.get(dcha);
            etiqueta = alphabet.cast(pd_izqda, pi_dcha);
            acx_map_ptr = acx_map.get(pd_izqda); // perhaps null
            rsymbol = pi_dcha;
            izqda++;
            dcha++;
          }

          int nuevo_estado = t.insertSingleTransduction(etiqueta, state);
          if (acx_map_ptr != null) {
            for (Integer integer : acx_map_ptr) {
              t.linkStates(state, nuevo_estado, alphabet.cast(integer, rsymbol));
            }
          }
          state = nuevo_estado;
        }
      }
      return state;
    }
  }
Ejemplo n.º 2
0
  /**
   * Insert a list of tokens into the paradigm / section being processed
   *
   * @param elements the list
   */
  private void insertEntryTokens(ArrayList<EntryToken> elements) {

    if (DEBUG) System.err.println("insertEntryTokens( " + elements);
    if (!current_paradigm.equals("")) {
      // compilation of paradigms
      TransducerComp t = paradigms.get(current_paradigm);
      if (t == null) {
        t = new TransducerComp();
        paradigms.put(current_paradigm, t);
      }

      Integer e = t.getInitial();

      for (int i = 0, limit = elements.size(); i < limit; i++) {
        EntryToken entry = elements.get(i);

        if (entry.isParadigm()) {
          if (!paradigms.containsKey(entry.paradigmName)) {
            paradigms.put(entry.paradigmName, new TransducerComp());
          }
          e = t.insertTransducer(e, paradigms.get(entry.paradigmName));
        } else if (entry.isSingleTransduction()) {
          e = matchTransduction(entry.leftSide, entry.rightSide, e, t);
        } else if (entry.isRegexp()) {
          RegexpCompiler analyzer = new RegexpCompiler();
          analyzer.initialize(alphabet);
          analyzer.compile(entry.regexp);
          t.setEpsilon_Tag(alphabet_cast00);
          e = t.insertTransducer(e, analyzer.getTransducer());
        } else {
          throw new RuntimeException(
              "Error (" + reader.getLocation().getLineNumber() + "): Invalid entry token.");
        }
      }
      t.setFinal(e);
    } else {
      // compilation of the dictionary
      TransducerComp t;
      if (!sections.containsKey(current_section)) {
        t = new TransducerComp();
        sections.put(current_section, t);
      } else {
        t = sections.get(current_section);
      }
      int e = t.getInitial();

      for (int i = 0, limit = elements.size(); i < limit; i++) {
        EntryToken entry = elements.get(i);
        if (entry.isParadigm()) {
          final String paradigmName = entry.paradigmName;
          if (i == elements.size() - 1) {
            // paradigm sufix
            if (!suffix_paradigms.containsKey(current_section)) {
              suffix_paradigms.put(current_section, new HashMap<String, Integer>());
            }
            if (suffix_paradigms.get(current_section).containsKey(paradigmName)) {
              t.linkStates(e, suffix_paradigms.get(current_section).get(paradigmName), 0);
              e = postsuffix_paradigms.get(current_section).get(paradigmName);
            } else {
              e = t.insertNewSingleTransduction(alphabet_cast00, e);
              suffix_paradigms.get(current_section).put(paradigmName, e);
              t.setEpsilon_Tag(0);
              e = t.insertTransducer(e, paradigms.get(paradigmName));
              if (!postsuffix_paradigms.containsKey(current_section)) {
                postsuffix_paradigms.put(current_section, new HashMap<String, Integer>());
              }
              postsuffix_paradigms.get(current_section).put(paradigmName, e);
            }
          } else if (i == 0) {
            // paradigm prefix
            if (!prefix_paradigms.containsKey(current_section)) {
              prefix_paradigms.put(current_section, new HashMap<String, Integer>());
            }
            if (prefix_paradigms.get(current_section).containsKey(paradigmName)) {
              e = prefix_paradigms.get(current_section).get(paradigmName);
            } else {
              t.setEpsilon_Tag(0);
              e = t.insertTransducer(e, paradigms.get(paradigmName));
              prefix_paradigms.get(current_section).put(paradigmName, e);
            }
          } else {
            // paradigm intermediate
            if (!paradigms.containsKey(paradigmName)) {
              paradigms.put(paradigmName, new TransducerComp());
            }
            t.setEpsilon_Tag(0);
            e = t.insertTransducer(e, paradigms.get(paradigmName));
          }
        } else if (entry.isRegexp()) {
          RegexpCompiler analyzer = new RegexpCompiler();
          analyzer.initialize(alphabet);
          analyzer.compile(entry.regexp);
          t.setEpsilon_Tag(alphabet_cast00);
          e = t.insertTransducer(e, analyzer.getTransducer());
        } else {
          e = matchTransduction(entry.leftSide, entry.rightSide, e, t);
        }
      }
      t.setFinal(e);
    }
  }