Пример #1
0
 public void write(OutputStream output) throws IOException {
   Compression.multibyte_write(constants.size(), output);
   for (Map.Entry<String, Integer> e : constants.entrySet()) {
     Compression.String_write(e.getKey(), output);
     Compression.multibyte_write(e.getValue(), output);
   }
 }
Пример #2
0
  Node loadNode(int node_no) {
    // assert check: if (node_list[nodeNo__current_state] != sourceNode) throw new InternalError();
    Node node = new Node();
    // node_list[node_no] = node;
    node_list2.put(node_no, node);

    int byteBufferPosition = byteBufferPositions.getInt(node_no * 4);
    byteBuffer.position(byteBufferPosition); // seek to correct place in file
    int number_of_local_transitions =
        Compression.multibyte_read(byteBuffer); // typically 20-40, max seen is 694

    node.initTransitions(number_of_local_transitions);
    int tagbase = 0;
    while (number_of_local_transitions > 0) {
      number_of_local_transitions--;
      tagbase += Compression.multibyte_read(byteBuffer);
      int target_nodeNo = (node_no + Compression.multibyte_read(byteBuffer)) % number_of_states;
      IntegerPair pair = alphabet.decode(tagbase);
      int i_symbol = pair.first;
      int o_symbol = pair.second;
      // Node targetNode = node_list[target_nodeNo];
      node.addTransition(i_symbol, o_symbol, target_nodeNo);
    }

    return node;
  }
Пример #3
0
 public void read(InputStream input) throws IOException {
   try {
     if (constants != null && constants.size() != 0) {
       constants.clear();
     }
     int size = Compression.multibyte_read(input);
     for (int i = 0; i != size; i++) {
       String str = Compression.String_read(input);
       int constant = Compression.multibyte_read(input);
       setConstant(str, constant);
     }
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
  public static void main(String[] args) throws IOException {

    Compile c = new Compile();
    c.parse("testdata/apertium-fr-es.fr.dix", Compile.COMPILER_RESTRICTION_LR_VAL);

    for (String s : c.sections.keySet()) {
      System.out.println("considering transducer of section " + s);
      System.out.println("number of states : " + c.sections.get(s).transitions.size());
      int temp = 0;
      int max = 0;
      float average = 0;
      for (int i = 0; i < c.sections.get(s).transitions.size(); i++) {
        temp += c.sections.get(s).transitions.get(i).size();
        average += temp;
        max = (temp > max) ? temp : max;
        temp = 0;
      }
      System.out.println("maximal number of transitions leaving a state " + max);
      System.out.println(
          "average number of transitions leaving a state "
              + average / ((float) c.sections.get(s).transitions.size()));
    }

    // System.exit(-1);
    c.write("testTransducer2.bin");
    InputStream input = new BufferedInputStream(new FileInputStream("testTransducer2.bin"));
    // InputStream input = new BufferedInputStream(new FileInputStream("outc"));
    // c2 = c.DEBUG_read(input);

    // FSTProcessor fstp = new FSTProcessor();
    // fstp.load(input);
    String letters = Compression.String_read(input);
    Alphabet alphabet = Alphabet.read(input);

    Map<String, TransducerComp> sections = new HashMap<String, TransducerComp>();

    int len = Compression.multibyte_read(input);

    while (len > 0) {
      String name = Compression.String_read(input);

      System.out.println("reading : " + name);
      // if (len ==2) {System.exit(-1);}
      TransducerComp t = new TransducerComp();
      Transducer.read(t, input, 0);
      sections.put(name, t);

      len--;
      if (c.sections.get(name) != null && sections.get(name) != null) {
        boolean same = c.sections.get(name).DEBUG_compare(sections.get(name));
        if (!same) throw new RuntimeException(name + " didnt compare");
        System.out.println(name + " passed comparison");
      }
      // System.exit(-1);
      // throw new RuntimeException("section "+name+" was totaly DEBUG_read");
    }
    input.close();

    for (String s : c.sections.keySet()) {
      int count1 = 0;
      int max1 = 0;
      int count2 = 0;
      int max2 = 0;
      for (int i = 0; i < c.sections.get(s).transitions.size(); i++) {
        if (i > max1) {
          max1 = i;
        }
        for (Integer j : c.sections.get(s).transitions.get(i).keySet()) {

          count1 += c.sections.get(s).transitions.get(i).get(j).size();
        }
      }
      for (int i = 0; i < sections.get(s).transitions.size(); i++) {
        if (i > max2) {
          max2 = i;
        }
        for (Integer j : sections.get(s).transitions.get(i).keySet()) {
          count2 += sections.get(s).transitions.get(i).get(j).size();
        }
      }

      System.out.println("comparing transducers of section " + s);
      System.out.println("original transducer : " + c.sections.get(s));
      System.out.println("original transducer has " + count1 + " transitions");
      System.out.println("original transducer higher state is " + max1);
      System.out.println("DEBUG_read transducer : " + sections.get(s));
      System.out.println("read transducer has " + count2 + " transitions");
      System.out.println("read transducer higher state is " + max2);
      // System.out.println(c.sections.get(s).DEBUG_compare(sections.get(s)));
      boolean same = c.sections.get(s).DEBUG_compare(sections.get(s));
      if (!same) throw new RuntimeException(s + " didnt compare");
      System.out.println(s + " passed comparison");
    }
  }
Пример #5
0
  public void read(ByteBuffer input, Alphabet alphabet, File cachedFile) throws IOException {

    initial_id = Compression.multibyte_read(input); // 0 for eo-en.dix)
    final int finals_size = Compression.multibyte_read(input); // xx  (5 for eo-en.dix)

    this.alphabet = alphabet;

    // first comes the list of the final nodes
    int[] myfinals = new int[finals_size]; // xx  ([679, 14875, 27426, 27883, 35871] for eo-en.dix)
    int base = 0;
    for (int i = 0; i < finals_size; i++) {
      base += Compression.multibyte_read(input);
      myfinals[i] = base;
    }

    // System.err.println(ant1 + " ettere ud af  " + number_of_states);
    for (int i = 0; i < finals_size; i++) {
      int final_index = myfinals[i];
      final_ids.add(final_index);
    }

    number_of_states = Compression.multibyte_read(input); // xx  (46191 for eo-en.dix)

    // node_list = new Node[number_of_states];
    node_list2 = new HashMap<Integer, Node>(1000);

    // Keep reference to bytebuffer for delayed node loading
    byteBuffer = input;

    int cacheFileSize =
        number_of_states * 4 + 4; // one extra int to hold index of end of transducer
    byteBufferPositions = IOUtils.mapByteBuffer(cachedFile, cacheFileSize);

    if (FSTProcessor.DEBUG) {
      System.err.println(
          "TransducerExe read states:"
              + number_of_states
              + "  cachedFile="
              + cachedFile
              + " "
              + byteBufferPositions.isReadOnly()
              + " "
              + byteBufferPositions);
    }

    if (byteBufferPositions.isReadOnly()) {
      int lastPos = byteBufferPositions.getInt(number_of_states * 4);
      input.position(lastPos); // Skip to end position
      return;
    }

    // No cache. Load and index the nodes
    for (int nodeNo__current_state = 0;
        nodeNo__current_state < number_of_states;
        nodeNo__current_state++) {

      byteBufferPositions.putInt(input.position());
      // System.out.println(number_of_states+"  "+nodeNo__current_state+ " "+input.position());
      int number_of_local_transitions =
          Compression.multibyte_read(input); // typically 20-40, max seen is 694

      if (DELAYED_NODE_LOADING) {
        Compression.multibyte_skip(input, 2 * number_of_local_transitions);
      } else {
        loadNode(nodeNo__current_state); // skips the correct number of positions
      }
    }
    byteBufferPositions.putInt(input.position()); // Remember end position
  }