Exemplo n.º 1
0
  public static void findViterbiPath() {
    try {
      System.out.println("name of forward bitext");
      String fwBitext = UTIL_UserInput.fileNameInput();
      ArrayList<Bitext> fwList = (ArrayList<Bitext>) UTIL_FileOperations.openObject(fwBitext);

      System.out.println("name of backward bitext");
      String bwBitext = UTIL_UserInput.fileNameInput();
      ArrayList<Bitext> bwList = (ArrayList<Bitext>) UTIL_FileOperations.openObject(bwBitext);

      System.out.println("name of forward map of the corpus");
      String fwm = UTIL_UserInput.fileNameInput();
      HashMap<WordPair, Double> fwMap =
          (HashMap<WordPair, Double>) UTIL_FileOperations.openObject(fwm);

      System.out.println("name of backward map of the corpus");
      String bwm = UTIL_UserInput.fileNameInput();
      HashMap<WordPair, Double> bwMap =
          (HashMap<WordPair, Double>) UTIL_FileOperations.openObject(bwm);

      IBM1EM.extractViterbi(fwMap, bwMap, fwList, bwList);
    } catch (Exception e) {
      e.printStackTrace(System.out);
    }
  }
Exemplo n.º 2
0
  public static void extractPhrase() {

    System.out.println("name of forward alignment");
    String fwA = UTIL_UserInput.fileNameInput();
    HashMap<Bitext, AlignPair> currentMap =
        (HashMap<Bitext, AlignPair>) UTIL_FileOperations.openObject(fwA);
    System.out.println("name of backward alignment");
    String bwA = UTIL_UserInput.fileNameInput();
    HashMap<Bitext, AlignPair> bwAlignment =
        (HashMap<Bitext, AlignPair>) UTIL_FileOperations.openObject(bwA);
    String mapName = "forward.aMap";
    for (int i = 0; i < 2; i++) {
      if (i == 1) {
        currentMap = bwAlignment;
        mapName = "reverse.aMap";
      }
      // Iterator<Entry<Bitext, AlignPair>> fIt = fwAlignment.entrySet()
      // .iterator();
      HashMap<Bitext, HashMap<Integer, Integer>> alignmentMap =
          new HashMap<Bitext, HashMap<Integer, Integer>>();
      Iterator<Entry<Bitext, AlignPair>> cait = currentMap.entrySet().iterator();
      while (cait.hasNext()) {
        HashMap<Integer, Integer> sourceTargetPair = new HashMap<Integer, Integer>();
        Map.Entry<Bitext, AlignPair> pair = cait.next();
        AlignPair forwardAlign = pair.getValue();
        Bitext currentBitext = pair.getKey();
        AlignPair backwardAlign = bwAlignment.get(currentBitext);
        ArrayList<WordPair> forwardViterbi = forwardAlign.getAlignments();
        Integer sourceIndex = 0;

        ArrayList<Integer> targetsUsed = new ArrayList<Integer>();
        // for forward alignment
        for (String s : currentBitext.getSource()) {
          Integer targetIndex = 0;
          Integer closestTarget = currentBitext.getTarget().length;
          for (WordPair wp : forwardViterbi) {
            if (wp.e.equals(s)) {
              String target = wp.f;
              ArrayList<Integer> targetPositions = new ArrayList<Integer>();
              for (String t : currentBitext.getTarget()) {
                if (t.equals(target)) {
                  targetPositions.add(targetIndex);
                }
                targetIndex++;
              }
              Integer source = sourceIndex;

              if (targetPositions.size() > 1) {
                System.out.println("multiple target words");
                Double sourcePercentage = (double) (sourceIndex / currentBitext.getSource().length);
                Double smallest = 100000000000000d;
                for (Integer tp : targetPositions) {
                  Double targetPercentage = (double) (tp / currentBitext.getTarget().length);
                  Double distance = Math.abs(sourcePercentage - targetPercentage);
                  if (smallest > distance) {
                    smallest = distance;
                    closestTarget = tp;
                  }
                }

              } else if (targetPositions.size() == 1) {
                closestTarget = targetPositions.get(0);
              }

              break;
            }
          }
          System.out.println(sourceIndex);
          System.out.println(closestTarget);
          if (closestTarget >= currentBitext.getTarget().length) {
            System.out.println("Something is wrong");
            System.exit(1);
          }
          sourceTargetPair.put(sourceIndex, closestTarget);
          sourceIndex++;
        }
        System.out.println("For bitext id: " + currentBitext.hashCode());
        Iterator<Entry<Integer, Integer>> ait = sourceTargetPair.entrySet().iterator();
        while (ait.hasNext()) {
          Map.Entry<Integer, Integer> ip = ait.next();
          System.out.println(
              ip.getKey()
                  + " : "
                  + ip.getValue()
                  + "----"
                  + currentBitext.getSource()[ip.getKey()]
                  + " : "
                  + currentBitext.getTarget()[ip.getValue()]);
        }
        alignmentMap.put(currentBitext, sourceTargetPair);
      }

      UTIL_FileOperations.store(alignmentMap, mapName);
    }
  }