Esempio n. 1
0
  /**
   * Initialize resources and processors that has been config, and register them in Resource Manager
   *
   * @param propertiesManager
   * @return
   */
  public boolean initialize(PropertiesManager propertiesManager) {
    assert propertiesManager != null;
    this.propertiesManager = propertiesManager;
    // this.configurables = new ArrayList<Configurable>();
    // read the config and initialize those resources and processors that has been config
    // TODO; only initialize the resources and processors by reflection from the feature sets, has
    // to change the framework to start
    try {
      String srcLang = this.getSrcLang();
      String trgLang = this.getTrgLang();

      // slang dict only for the target
      SlangDictionary trgSlangDict = new SlangDictionary(trgLang);
      if (trgSlangDict.isConfigured(propertiesManager)) {
        trgSlangDict.load(propertiesManager);
        trgSlangDict.register();
        VariantsSlangProcessor p1 = new VariantsSlangProcessor(trgSlangDict);
        this.trgResourceProcessors.add(p1);
      }

      // abbreviation
      AbbreviationDictionary abbrevDict = new AbbreviationDictionary(trgLang);
      if (abbrevDict.isConfigured(propertiesManager)) {
        abbrevDict.load(propertiesManager);
        abbrevDict.register();
        AbbreviationsProcessor p2 = new AbbreviationsProcessor(abbrevDict);
        this.trgResourceProcessors.add(p2);
      }

      // other features

    } catch (Exception e) {
      isInitialized = false;
      return false;
    }
    isInitialized = true;
    return true;
  }
 @Override
 public void processNextSentence(Sentence sentence) {
   assert abbreviationDictionary != null;
   String strLine = sentence.getText();
   int abbrevConflicts = 0;
   Set<String> abbrevs = abbreviationDictionary.getAbbrevSet();
   for (String abbrev : abbrevs) {
     int pos = 0;
     for (String word : sentence.getTokens()) {
       if (word.equals(abbrev)) {
         String position = sentence.getIndex() + "-" + pos;
         for (Map.Entry<String, String> entry : position2abbrev.entrySet()) {
           String aPos = entry.getKey();
           String aAbbrev = entry.getValue();
           if (aAbbrev != abbrev) { // not the same one
             // find how close they are by meaning
             Set<String> meaningSetA =
                 new HashSet<String>(abbreviationDictionary.getMeaningSetOfAbbreviation(aAbbrev));
             Set<String> meaningSetB = abbreviationDictionary.getMeaningSetOfAbbreviation(abbrev);
             meaningSetA.retainAll(meaningSetB);
             if (meaningSetA.size() > 0) {
               abbrevConflicts++;
             }
           }
         }
       }
     }
   }
   sentence.setValue("abbrev_conflicts", abbrevConflicts); // number of conflicts
   if (position2abbrev.size() > 0) {
     sentence.setValue(
         "abbrev_conflicts_divided_by_count", abbrevConflicts * 1.0 / position2abbrev.size());
   } else {
     sentence.setValue("abbrev_conflicts_divided_by_count", 0.0);
   }
 }
 @Override
 public void globalProcessing(Context context) {
   assert abbreviationDictionary != null;
   Set<String> abbrevs = abbreviationDictionary.getAbbrevSet();
   BufferedReader br = null;
   try {
     br =
         new BufferedReader(
             new InputStreamReader(new FileInputStream(context.getTargetFilePath())));
     String strLine;
     int lineCount = 0;
     while ((strLine = br.readLine()) != null) {
       strLine = strLine.trim();
       for (String abbrev : abbrevs) {
         int pos = 0;
         for (String word : strLine.split("\\s+")) {
           if (word.equals(abbrev)) {
             String position = lineCount + "-" + pos;
             position2abbrev.put(position, abbrev);
           }
           pos++;
         }
       }
       lineCount++;
     }
   } catch (Exception e) {
     e.printStackTrace();
   } finally {
     if (br != null) {
       try {
         br.close();
       } catch (IOException e) {
         e.printStackTrace();
       }
     }
   }
 }