@Override public void globalProcessing(Context context) { assert abbreviationDictionary != null; Set<String> abbrevs = abbreviationDictionary.getAbbrevSet(); BufferedReader br = null; try { br = new BufferedReader( new InputStreamReader(new FileInputStream(context.getTargetFilePath()))); String strLine; int lineCount = 0; while ((strLine = br.readLine()) != null) { strLine = strLine.trim(); for (String abbrev : abbrevs) { int pos = 0; for (String word : strLine.split("\\s+")) { if (word.equals(abbrev)) { String position = lineCount + "-" + pos; position2abbrev.put(position, abbrev); } pos++; } } lineCount++; } } catch (Exception e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } }
@Override public void processNextSentence(Sentence sentence) { assert abbreviationDictionary != null; String strLine = sentence.getText(); int abbrevConflicts = 0; Set<String> abbrevs = abbreviationDictionary.getAbbrevSet(); for (String abbrev : abbrevs) { int pos = 0; for (String word : sentence.getTokens()) { if (word.equals(abbrev)) { String position = sentence.getIndex() + "-" + pos; for (Map.Entry<String, String> entry : position2abbrev.entrySet()) { String aPos = entry.getKey(); String aAbbrev = entry.getValue(); if (aAbbrev != abbrev) { // not the same one // find how close they are by meaning Set<String> meaningSetA = new HashSet<String>(abbreviationDictionary.getMeaningSetOfAbbreviation(aAbbrev)); Set<String> meaningSetB = abbreviationDictionary.getMeaningSetOfAbbreviation(abbrev); meaningSetA.retainAll(meaningSetB); if (meaningSetA.size() > 0) { abbrevConflicts++; } } } } } } sentence.setValue("abbrev_conflicts", abbrevConflicts); // number of conflicts if (position2abbrev.size() > 0) { sentence.setValue( "abbrev_conflicts_divided_by_count", abbrevConflicts * 1.0 / position2abbrev.size()); } else { sentence.setValue("abbrev_conflicts_divided_by_count", 0.0); } }