public List<String> predictArgumentLines(Sentence sentence, List<String> idResult, int kBest) throws IOException { final List<FrameFeatures> frameFeaturesList = Lists.newArrayList(); final FeatureExtractor featureExtractor = new FeatureExtractor(); for (String feLine : idResult) { final DataPointWithFrameElements dataPoint = new DataPointWithFrameElements(sentence, feLine); final String frame = dataPoint.getFrameName(); final DependencyParses parses = dataPoint.getParses(); final int targetStartTokenIdx = dataPoint.getTargetTokenIdxs()[0]; final int targetEndTokenIdx = dataPoint.getTargetTokenIdxs()[dataPoint.getTargetTokenIdxs().length - 1]; final List<SpanAndParseIdx> spans = DataPrep.findSpans(dataPoint, 1); final List<String> frameElements = Lists.newArrayList(frameElementsForFrame.lookupFrameElements(frame)); final List<SpanAndCorrespondingFeatures[]> featuresAndSpanByArgument = Lists.newArrayList(); for (String frameElement : frameElements) { final List<SpanAndCorrespondingFeatures> spansAndFeatures = Lists.newArrayList(); for (SpanAndParseIdx candidateSpanAndParseIdx : spans) { final Range0Based span = candidateSpanAndParseIdx.span; final DependencyParse parse = parses.get(candidateSpanAndParseIdx.parseIdx); final Set<String> featureSet = featureExtractor .extractFeatures(dataPoint, frame, frameElement, span, parse) .elementSet(); final int[] featArray = convertToIdxs(featureSet); spansAndFeatures.add( new SpanAndCorrespondingFeatures(new int[] {span.start, span.end}, featArray)); } featuresAndSpanByArgument.add( spansAndFeatures.toArray(new SpanAndCorrespondingFeatures[spansAndFeatures.size()])); } frameFeaturesList.add( new FrameFeatures( frame, targetStartTokenIdx, targetEndTokenIdx, frameElements, featuresAndSpanByArgument)); } return decoder.decodeAll(frameFeaturesList, idResult, 0, kBest); }
public static Semafor getSemaforInstance(String modelDirectory) throws IOException, ClassNotFoundException, URISyntaxException { final String requiredDataFilename = new File(modelDirectory, REQUIRED_DATA_FILENAME).getAbsolutePath(); final String alphabetFilename = new File(modelDirectory, ALPHABET_FILENAME).getAbsolutePath(); final String frameElementMapFilename = new File(modelDirectory, FRAME_ELEMENT_MAP_FILENAME).getAbsolutePath(); final String argModelFilename = new File(modelDirectory, ARG_MODEL_FILENAME).getAbsolutePath(); // unpack required data final RequiredDataForFrameIdentification r = readObject(requiredDataFilename); final Set<String> allRelatedWords = r.getAllRelatedWords(); final GraphBasedFrameIdentifier idModel = GraphBasedFrameIdentifier.getInstance(modelDirectory); final RoteSegmenter segmenter = new RoteSegmenter(allRelatedWords); System.err.println("Initializing alphabet for argument identification.."); final Map<String, Integer> argIdFeatureIndex = DataPrep.readFeatureIndex(new File(alphabetFilename)); final FEDict frameElementsForFrame = FEDict.fromFile(frameElementMapFilename); final Decoding decoder = Decoding.fromFile(argModelFilename, alphabetFilename); return new Semafor( allRelatedWords, frameElementsForFrame, segmenter, idModel, decoder, argIdFeatureIndex); }