/** * Initialize resources and processors that has been config, and register them in Resource Manager * * @param propertiesManager * @return */ public boolean initialize(PropertiesManager propertiesManager) { assert propertiesManager != null; this.propertiesManager = propertiesManager; // this.configurables = new ArrayList<Configurable>(); // read the config and initialize those resources and processors that has been config // TODO; only initialize the resources and processors by reflection from the feature sets, has // to change the framework to start try { String srcLang = this.getSrcLang(); String trgLang = this.getTrgLang(); // slang dict only for the target SlangDictionary trgSlangDict = new SlangDictionary(trgLang); if (trgSlangDict.isConfigured(propertiesManager)) { trgSlangDict.load(propertiesManager); trgSlangDict.register(); VariantsSlangProcessor p1 = new VariantsSlangProcessor(trgSlangDict); this.trgResourceProcessors.add(p1); } // abbreviation AbbreviationDictionary abbrevDict = new AbbreviationDictionary(trgLang); if (abbrevDict.isConfigured(propertiesManager)) { abbrevDict.load(propertiesManager); abbrevDict.register(); AbbreviationsProcessor p2 = new AbbreviationsProcessor(abbrevDict); this.trgResourceProcessors.add(p2); } // other features } catch (Exception e) { isInitialized = false; return false; } isInitialized = true; return true; }
@Override public void processNextSentence(Sentence sentence) { assert abbreviationDictionary != null; String strLine = sentence.getText(); int abbrevConflicts = 0; Set<String> abbrevs = abbreviationDictionary.getAbbrevSet(); for (String abbrev : abbrevs) { int pos = 0; for (String word : sentence.getTokens()) { if (word.equals(abbrev)) { String position = sentence.getIndex() + "-" + pos; for (Map.Entry<String, String> entry : position2abbrev.entrySet()) { String aPos = entry.getKey(); String aAbbrev = entry.getValue(); if (aAbbrev != abbrev) { // not the same one // find how close they are by meaning Set<String> meaningSetA = new HashSet<String>(abbreviationDictionary.getMeaningSetOfAbbreviation(aAbbrev)); Set<String> meaningSetB = abbreviationDictionary.getMeaningSetOfAbbreviation(abbrev); meaningSetA.retainAll(meaningSetB); if (meaningSetA.size() > 0) { abbrevConflicts++; } } } } } } sentence.setValue("abbrev_conflicts", abbrevConflicts); // number of conflicts if (position2abbrev.size() > 0) { sentence.setValue( "abbrev_conflicts_divided_by_count", abbrevConflicts * 1.0 / position2abbrev.size()); } else { sentence.setValue("abbrev_conflicts_divided_by_count", 0.0); } }
@Override public void globalProcessing(Context context) { assert abbreviationDictionary != null; Set<String> abbrevs = abbreviationDictionary.getAbbrevSet(); BufferedReader br = null; try { br = new BufferedReader( new InputStreamReader(new FileInputStream(context.getTargetFilePath()))); String strLine; int lineCount = 0; while ((strLine = br.readLine()) != null) { strLine = strLine.trim(); for (String abbrev : abbrevs) { int pos = 0; for (String word : strLine.split("\\s+")) { if (word.equals(abbrev)) { String position = lineCount + "-" + pos; position2abbrev.put(position, abbrev); } pos++; } } lineCount++; } } catch (Exception e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } }