Пример #1
0
 public List<String> predictArgumentLines(Sentence sentence, List<String> idResult, int kBest)
     throws IOException {
   final List<FrameFeatures> frameFeaturesList = Lists.newArrayList();
   final FeatureExtractor featureExtractor = new FeatureExtractor();
   for (String feLine : idResult) {
     final DataPointWithFrameElements dataPoint = new DataPointWithFrameElements(sentence, feLine);
     final String frame = dataPoint.getFrameName();
     final DependencyParses parses = dataPoint.getParses();
     final int targetStartTokenIdx = dataPoint.getTargetTokenIdxs()[0];
     final int targetEndTokenIdx =
         dataPoint.getTargetTokenIdxs()[dataPoint.getTargetTokenIdxs().length - 1];
     final List<SpanAndParseIdx> spans = DataPrep.findSpans(dataPoint, 1);
     final List<String> frameElements =
         Lists.newArrayList(frameElementsForFrame.lookupFrameElements(frame));
     final List<SpanAndCorrespondingFeatures[]> featuresAndSpanByArgument = Lists.newArrayList();
     for (String frameElement : frameElements) {
       final List<SpanAndCorrespondingFeatures> spansAndFeatures = Lists.newArrayList();
       for (SpanAndParseIdx candidateSpanAndParseIdx : spans) {
         final Range0Based span = candidateSpanAndParseIdx.span;
         final DependencyParse parse = parses.get(candidateSpanAndParseIdx.parseIdx);
         final Set<String> featureSet =
             featureExtractor
                 .extractFeatures(dataPoint, frame, frameElement, span, parse)
                 .elementSet();
         final int[] featArray = convertToIdxs(featureSet);
         spansAndFeatures.add(
             new SpanAndCorrespondingFeatures(new int[] {span.start, span.end}, featArray));
       }
       featuresAndSpanByArgument.add(
           spansAndFeatures.toArray(new SpanAndCorrespondingFeatures[spansAndFeatures.size()]));
     }
     frameFeaturesList.add(
         new FrameFeatures(
             frame,
             targetStartTokenIdx,
             targetEndTokenIdx,
             frameElements,
             featuresAndSpanByArgument));
   }
   return decoder.decodeAll(frameFeaturesList, idResult, 0, kBest);
 }
Пример #2
0
 public static Semafor getSemaforInstance(String modelDirectory)
     throws IOException, ClassNotFoundException, URISyntaxException {
   final String requiredDataFilename =
       new File(modelDirectory, REQUIRED_DATA_FILENAME).getAbsolutePath();
   final String alphabetFilename = new File(modelDirectory, ALPHABET_FILENAME).getAbsolutePath();
   final String frameElementMapFilename =
       new File(modelDirectory, FRAME_ELEMENT_MAP_FILENAME).getAbsolutePath();
   final String argModelFilename = new File(modelDirectory, ARG_MODEL_FILENAME).getAbsolutePath();
   // unpack required data
   final RequiredDataForFrameIdentification r = readObject(requiredDataFilename);
   final Set<String> allRelatedWords = r.getAllRelatedWords();
   final GraphBasedFrameIdentifier idModel = GraphBasedFrameIdentifier.getInstance(modelDirectory);
   final RoteSegmenter segmenter = new RoteSegmenter(allRelatedWords);
   System.err.println("Initializing alphabet for argument identification..");
   final Map<String, Integer> argIdFeatureIndex =
       DataPrep.readFeatureIndex(new File(alphabetFilename));
   final FEDict frameElementsForFrame = FEDict.fromFile(frameElementMapFilename);
   final Decoding decoder = Decoding.fromFile(argModelFilename, alphabetFilename);
   return new Semafor(
       allRelatedWords, frameElementsForFrame, segmenter, idModel, decoder, argIdFeatureIndex);
 }