コード例 #1
0
    public boolean addSortedExclusive(Annotation annot) {
      Annotation currAnot = null;

      // overlapping check
      for (int i = 0; i < size(); ++i) {
        currAnot = (Annotation) get(i);
        if (annot.overlaps(currAnot)) {
          return false;
        } // if
      } // for

      long annotStart = annot.getStartNode().getOffset().longValue();
      long currStart;
      // insert
      for (int i = 0; i < size(); ++i) {
        currAnot = (Annotation) get(i);
        currStart = currAnot.getStartNode().getOffset().longValue();
        if (annotStart < currStart) {
          insertElementAt(annot, i);
          /*
          Out.prln("Insert start: "+annotStart+" at position: "+i+" size="+size());
          Out.prln("Current start: "+currStart);
          */
          return true;
        } // if
      } // for

      int size = size();
      insertElementAt(annot, size);
      // Out.prln("Insert start: "+annotStart+" at size position: "+size);
      return true;
    } // addSorted
コード例 #2
0
ファイル: SeminarDatabase.java プロジェクト: bklimt/irlab
 public SeminarDatabase(String filename) throws FileNotFoundException, IOException {
   this.filename = filename;
   seminars = new HashMap();
   BufferedReader reader = new BufferedReader(new FileReader(filename));
   String line = reader.readLine();
   while (line != null) {
     String[] parts = line.split("\t");
     String id = parts[0];
     int count = (new Integer(parts[1])).intValue();
     Vector annotationVector = new Vector();
     for (int i = 0; i < count; i++) {
       String line2 = reader.readLine();
       String[] parts2 = line2.split("\t");
       String type = parts2[0];
       int offset = (new Integer(parts2[1])).intValue();
       int length = (new Integer(parts2[2])).intValue();
       Annotation annotation = new Annotation(type, offset, length);
       annotation.text = parts2[3];
       annotationVector.add(annotation);
     }
     Annotation[] annotations = (Annotation[]) annotationVector.toArray(new Annotation[0]);
     Seminar seminar = new Seminar(annotations);
     if (seminars.get(id) == null) {
       seminars.put(id, new Vector());
     }
     ((Vector) seminars.get(id)).add(seminar);
     line = reader.readLine();
   }
 }
コード例 #3
0
ファイル: APFtoXML.java プロジェクト: rgrishman/jet
 /** splits document 'doc' into sentences, adding 'sentence' annotations */
 static void addSentences(Document doc) {
   SpecialZoner.findSpecialZones(doc);
   Vector<Annotation> textSegments = doc.annotationsOfType("TEXT");
   if (textSegments == null) {
     System.out.println("No <TEXT> in document");
     return;
   }
   for (Annotation ann : textSegments) {
     Span textSpan = ann.span();
     // check document case
     Ace.monocase = Ace.allLowerCase(doc);
     // split into sentences
     SentenceSplitter.split(doc, textSpan);
   }
   Vector<Annotation> sentences = doc.annotationsOfType("sentence");
   if (sentences != null) {
     int sentNo = 0;
     for (Annotation sentence : sentences) {
       sentNo++;
       sentence.put("ID", "SENT-" + sentNo);
     }
   }
   doc.removeAnnotationsOfType("dateline");
   doc.removeAnnotationsOfType("textBreak");
   doc.shrink("sentence");
 }
コード例 #4
0
    private boolean hasParamAnnotation(Method method) {
      Annotation[][] paramAnnotationArrays = method.getParameterAnnotations();
      for (Annotation[] paramAnnotations : paramAnnotationArrays)
        for (Annotation paramAnnotation : paramAnnotations)
          if (paramAnnotation.annotationType().isAssignableFrom(Param.class)) return true;

      return false;
    }
コード例 #5
0
ファイル: Distributor.java プロジェクト: superyfwy/db4o
 public static boolean hasAnnotation(Annotation[] anns, Class<? extends Annotation> clazz) {
   for (Annotation ann : anns) {
     if (clazz == ann.annotationType()) {
       return true;
     }
   }
   return false;
 }
コード例 #6
0
  /**
   * train the tagger using the DocumentCollection in file 'trainingCollection'.
   * 'trainingCollection' should consist of documents which have been explicitly tagged with
   * part-of-speech information.
   */
  void train(String trainingCollection) {

    for (int i = 0; i < posTable.length; i++)
      tagTable[i] = new String[] {"constit", "cat", posTable[i], posTable[i]};

    // build ergodic HMM with one state for each POS (plus start and end states)

    HMMstate startState = new HMMstate("start", "", WordFeatureHMMemitter.class);
    posh.addState(startState);
    for (int j = 0; j < posTable.length; j++) startState.addArc(new HMMarc(posTable[j], 0));
    HMMstate endState = new HMMstate("end", "", WordFeatureHMMemitter.class);
    posh.addState(endState);
    for (int i = 0; i < posTable.length; i++) {
      String pos = posTable[i];
      HMMstate state = new HMMstate(pos, pos, WordFeatureHMMemitter.class);
      posh.addState(state);
      for (int j = 0; j < posTable.length; j++) state.addArc(new HMMarc(posTable[j], 0));
      state.addArc(new HMMarc("end", 0));
    }
    posh.resolveNames();

    posh.resetForTraining();
    annotator = new HMMannotator(posh);
    annotator.setTagTable(tagTable);
    annotator.setBItag(false);

    DocumentCollection col = new DocumentCollection(trainingCollection);
    col.open();
    for (int i = 0; i < col.size(); i++) {
      ExternalDocument doc = col.get(i);
      doc.open();
      System.out.println("Training from " + doc.fileName());

      // divide at endmarks (constit cat="."), adding "S" marks

      int posn = 0;
      int start = posn;
      Vector anns;
      while ((anns = doc.annotationsAt(posn, "constit")) != null) {
        Annotation ann = (Annotation) anns.get(0);
        posn = ann.span().end();
        String pos = (String) ann.get("cat");
        if (pos.equals(".")) {
          doc.annotate("S", new Span(start, posn), new FeatureSet());
          start = posn;
        }
      }
      annotator.train(doc);
      //  free up space taken by annotations on document
      doc.clearAnnotations();
    }
    posh.computeProbabilities();
  }
コード例 #7
0
 /**
  * hides (adds the 'hidden' feature) to all annotations of type <I>type</I> beginning at the
  * starting position of span <I>span</I>.
  */
 public static void hideAnnotations(Document doc, String type, Span span) {
   for (int posn = span.start(); posn < span.end(); posn++) {
     Vector annotations = doc.annotationsAt(posn, type);
     if (annotations != null) {
       for (int i = 0; i < annotations.size(); i++) {
         Annotation ann = (Annotation) annotations.elementAt(i);
         ann.put("hidden", "true");
         // Console.println ("Hiding " + ann);
       }
     }
   }
 }
コード例 #8
0
ファイル: EntityFinder.java プロジェクト: rgrishman/jet
 static void writeDoc1(Document doc, PrintStream out) throws IOException {
   Vector<Annotation> entities = doc.annotationsOfType("entity");
   if (entities == null) {
     System.err.println("No Entity: " + doc);
     return;
   }
   Iterator<Annotation> entityIt = entities.iterator();
   int i = 0;
   while (entityIt.hasNext()) {
     Annotation entity = entityIt.next();
     Vector mentions = (Vector) entity.get("mentions");
     Iterator mentionIt = mentions.iterator();
     String nameType = (String) entity.get("nameType");
     while (mentionIt.hasNext()) {
       Annotation mention1 = (Annotation) mentionIt.next();
       Annotation mention2 = new Annotation("refobj", mention1.span(), new FeatureSet());
       mention2.put("objid", Integer.toString(i));
       if (nameType != null) {
         mention2.put("netype", nameType);
       }
       doc.addAnnotation(mention2);
     }
     i++;
   }
   // remove other annotations.
   String[] annotypes = doc.getAnnotationTypes();
   for (i = 0; i < annotypes.length; i++) {
     String t = annotypes[i];
     if (!(t.equals("tagger") || t.equals("refobj") || t.equals("ENAMEX"))) {
       doc.removeAnnotationsOfType(t);
     }
   }
   writeDocRaw(doc, out);
   return;
 }
コード例 #9
0
ファイル: PeakMLParser.java プロジェクト: joewandy/HDP-Align
  private static Vector<Annotation> parseAnnotations(Node parent) throws XmlParserException {
    Vector<Annotation> annotations = new Vector<Annotation>();

    NodeList nodes = parent.getChildNodes();
    for (int nodeid = 0; nodeid < nodes.getLength(); ++nodeid) {
      Node node = nodes.item(nodeid);
      if (node.getNodeType() != Node.ELEMENT_NODE) continue;

      Element element = (Element) node;
      if (element.getTagName().equals("annotation")) {
        String label = null, value = null, valuetype = null, unit = null;
        NodeList annotation_nodes = element.getChildNodes();
        for (int annotationid = 0; annotationid < annotation_nodes.getLength(); ++annotationid) {
          Node annotation_node = annotation_nodes.item(annotationid);
          if (annotation_node.getNodeType() != Node.ELEMENT_NODE) continue;

          Element annotation_element = (Element) annotation_node;
          if (annotation_element.getTagName().equals("label"))
            label = annotation_element.getTextContent();
          else if (annotation_element.getTagName().equals("value"))
            value = annotation_element.getTextContent();
          else if (annotation_element.getTagName().equals("valuetype"))
            valuetype = annotation_element.getTextContent();
        }

        if (label == null || value == null || valuetype == null)
          throw new XmlParserException("Annotation is missing either: label, value or valuetype");

        Annotation annotation =
            new Annotation(label, value, Annotation.ValueType.valueOf(valuetype));
        annotation.setUnit(unit);
        if (annotation.getValueType() == Annotation.ValueType.ONTOLOGY)
          annotation.setOntologyRef(element.getAttribute("ontologyref"));
        if (element.getAttribute("unit") != null) annotation.setUnit(element.getAttribute("unit"));
        annotations.add(annotation);
      }
    }

    return annotations;
  }
コード例 #10
0
 protected Annotation parseAnnotation(Reader input, String fileName) throws IOException {
   BufferedReader in = new BufferedReader(input);
   Annotation ret = new Annotation(fileName);
   String line = "";
   int lineno = 0;
   Matcher matcher = BLAME_PATTERN.matcher(line);
   while ((line = in.readLine()) != null) {
     ++lineno;
     matcher.reset(line);
     if (matcher.find()) {
       String rev = matcher.group(1);
       String author = matcher.group(2).trim();
       ret.addLine(rev, author, true);
     } else {
       OpenGrokLogger.getLogger()
           .log(
               Level.SEVERE,
               "Error: did not find annotation in line {0}: [{1}]",
               new Object[] {String.valueOf(lineno), line});
     }
   }
   return ret;
 }
コード例 #11
0
  private void runTest(Annotation annotation) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    serializer.write(annotation, out);
    byte[] serialized = out.toByteArray();

    ByteArrayInputStream in = new ByteArrayInputStream(serialized);
    Annotation deserialized = serializer.read(in).first();

    assertEquals(
        annotation.get(CoreAnnotations.SentencesAnnotation.class).size(),
        deserialized.get(CoreAnnotations.SentencesAnnotation.class).size());
    for (int i = 0; i < annotation.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
      verifySentence(
          annotation.get(CoreAnnotations.SentencesAnnotation.class).get(i),
          deserialized.get(CoreAnnotations.SentencesAnnotation.class).get(i));
    }
  }
コード例 #12
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }
コード例 #13
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      // Get sentence.
      Properties props;
      Annotation ann;
      StanfordCoreNLP.OutputFormat of;
      log("[" + httpExchange.getRemoteAddress() + "] Received message");
      try {
        props = getProperties(httpExchange);
        ann = getDocument(props, httpExchange);
        of =
            StanfordCoreNLP.OutputFormat.valueOf(
                props.getProperty("outputFormat", "json").toUpperCase());
        // Handle direct browser connections (i.e., not a POST request).
        if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) {
          log("[" + httpExchange.getRemoteAddress() + "] Interactive connection");
          staticPageHandle.handle(httpExchange);
          return;
        }
        log("[" + httpExchange.getRemoteAddress() + "] API call");
      } catch (Exception e) {
        // Return error message.
        e.printStackTrace();
        String response = e.getMessage();
        httpExchange.getResponseHeaders().add("Content-Type", "text/plain");
        httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length());
        httpExchange.getResponseBody().write(response.getBytes());
        httpExchange.close();
        return;
      }

      try {
        // Annotate
        StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
        Future<Annotation> completedAnnotationFuture =
            corenlpExecutor.submit(
                () -> {
                  pipeline.annotate(ann);
                  return ann;
                });
        Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS);

        // Get output
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline))
            .accept(completedAnnotation, os);
        os.close();
        byte[] response = os.toByteArray();

        httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of));
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      } catch (TimeoutException e) {
        respondError("CoreNLP request timed out", httpExchange);
      } catch (Exception e) {
        // Return error message.
        respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
      }
    }
コード例 #14
0
ファイル: GenerateGffFile.java プロジェクト: bg7/BG7
  public static void main(String[] args) {

    if (args.length != 2) {
      System.out.println(
          "This program expects two parameters: \n"
              + "1. Input predicted genes XML filename \n"
              + "2. Output GFF filename\n");
    } else {

      String inFileString = args[0];
      String outFileString = args[1];

      File inFile = new File(inFileString);
      File outFile = new File(outFileString);

      try {

        BufferedWriter outBuff = new BufferedWriter(new FileWriter(outFile));

        // writing header first
        outBuff.write(GFF_HEADER + "\n");
        Date currentDate = new Date();
        outBuff.write(DATE_HEADER + currentDate.toString() + "\n");
        outBuff.write(TYPE_HEADER + "\n");

        BufferedReader reader = new BufferedReader(new FileReader(inFile));
        String tempSt;
        StringBuilder stBuilder = new StringBuilder();
        while ((tempSt = reader.readLine()) != null) {
          stBuilder.append(tempSt);
        }
        // closing input file reader
        reader.close();

        Annotation annotation = new Annotation(stBuilder.toString());

        HashMap<String, TreeSet<GffLine>> linesPerContig = new HashMap<String, TreeSet<GffLine>>();

        // -----------POTATIZING GENES----------------
        List<Element> contigsGenes =
            annotation
                .asJDomElement()
                .getChild(PredictedGenes.TAG_NAME)
                .getChildren(ContigXML.TAG_NAME);
        for (Element element : contigsGenes) {
          ContigXML contig = new ContigXML(element);
          TreeSet<GffLine> lines = new TreeSet<GffLine>();
          linesPerContig.put(contig.getId(), lines);
          List<XMLElement> genes = contig.getChildrenWith(PredictedGene.TAG_NAME);
          for (XMLElement xMLElement : genes) {
            PredictedGene gene = new PredictedGene(xMLElement.asJDomElement());

            String geneLine =
                contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + GENE + SEPARATOR;
            int beginPos = gene.getStartPosition();
            int endPos = gene.getEndPosition();
            int initPos = beginPos;
            if (beginPos < endPos) {
              geneLine += beginPos + SEPARATOR + endPos + SEPARATOR;
            } else {
              geneLine += endPos + SEPARATOR + beginPos + SEPARATOR;
              initPos = endPos;
            }
            geneLine +=
                gene.getEvalue()
                    + SEPARATOR
                    + gene.getStrand()
                    + SEPARATOR
                    + "."
                    + SEPARATOR
                    + LOCUS_TAG
                    + gene.getId()
                    + ";\n";
            lines.add(new GffLine(initPos, geneLine));
            // outBuff.write(geneLine);

            String cdsLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + CDS + SEPARATOR;
            if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) {
              cdsLine +=
                  gene.getStartPosition() + SEPARATOR + (gene.getEndPosition() - 3) + SEPARATOR;
            } else {
              cdsLine +=
                  (gene.getEndPosition() - 3) + SEPARATOR + gene.getStartPosition() + SEPARATOR;
            }
            cdsLine +=
                gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR;
            cdsLine +=
                LOCUS_TAG
                    + gene.getId()
                    + ";"
                    + PRODUCT
                    + gene.getProteinNames()
                    + ";"
                    + CHORIZO_INFERENCE
                    + gene.getAccession()
                    + "\n";
            // outBuff.write(cdsLine);
            lines.add(new GffLine(initPos, cdsLine));

            String startCodonLine =
                contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + START_CODON + SEPARATOR;
            if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) {
              startCodonLine +=
                  gene.getStartPosition() + SEPARATOR + (gene.getStartPosition() + 2) + SEPARATOR;
            } else {
              startCodonLine +=
                  (gene.getStartPosition() - 2) + SEPARATOR + gene.getStartPosition() + SEPARATOR;
            }
            startCodonLine +=
                gene.getEvalue()
                    + SEPARATOR
                    + gene.getStrand()
                    + SEPARATOR
                    + "0"
                    + SEPARATOR
                    + LOCUS_TAG
                    + gene.getId()
                    + ";";
            startCodonLine +=
                PRODUCT
                    + gene.getProteinNames()
                    + ";"
                    + CHORIZO_INFERENCE
                    + gene.getAccession()
                    + "\n";
            // outBuff.write(startCodonLine);
            lines.add(new GffLine(initPos, startCodonLine));

            String stopCodonLine =
                contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + STOP_CODON + SEPARATOR;
            if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) {
              stopCodonLine +=
                  (gene.getEndPosition() + 1) + SEPARATOR + (gene.getEndPosition() + 3) + SEPARATOR;
            } else {
              stopCodonLine +=
                  (gene.getEndPosition() - 3) + SEPARATOR + (gene.getEndPosition() - 1) + SEPARATOR;
            }
            stopCodonLine +=
                gene.getEvalue()
                    + SEPARATOR
                    + gene.getStrand()
                    + SEPARATOR
                    + "0"
                    + SEPARATOR
                    + LOCUS_TAG
                    + gene.getId()
                    + ";";
            stopCodonLine +=
                PRODUCT
                    + gene.getProteinNames()
                    + ";"
                    + CHORIZO_INFERENCE
                    + gene.getAccession()
                    + "\n";
            // outBuff.write(stopCodonLine);
            lines.add(new GffLine(initPos, stopCodonLine));
          }
        }

        // -----------POTATIZING RNAS-----------------
        List<Element> contigsRnas =
            annotation
                .asJDomElement()
                .getChild(PredictedRnas.TAG_NAME)
                .getChildren(ContigXML.TAG_NAME);
        for (Element element : contigsRnas) {
          ContigXML contig = new ContigXML(element);
          List<XMLElement> rnas = contig.getChildrenWith(PredictedRna.TAG_NAME);

          TreeSet<GffLine> lines = linesPerContig.get(contig.getId());
          if (lines == null) {
            lines = new TreeSet<GffLine>();
            linesPerContig.put(contig.getId(), lines);
          }

          for (XMLElement xMLElement : rnas) {
            PredictedRna rna = new PredictedRna(xMLElement.asJDomElement());

            String rnaLine = contig.getId() + SEPARATOR + CHORIZO_RNA + SEPARATOR + RNA + SEPARATOR;
            int beginPos = rna.getStartPosition();
            int endPos = rna.getEndPosition();
            int initPos = beginPos;
            if (beginPos < endPos) {
              rnaLine += beginPos + SEPARATOR + endPos + SEPARATOR;
            } else {
              rnaLine += endPos + SEPARATOR + beginPos + SEPARATOR;
              initPos = endPos;
            }
            rnaLine +=
                rna.getEvalue()
                    + SEPARATOR
                    + rna.getStrand()
                    + SEPARATOR
                    + "."
                    + SEPARATOR
                    + LOCUS_TAG
                    + rna.getId()
                    + ";";
            String columns[] = rna.getAnnotationUniprotId().split("\\|");
            String rnaProduct = columns[3];
            String refSeqId = columns[1];
            String positions = columns[2].substring(1);
            // ref|NC_007413|:3894075-3895562|16S ribosomal RNA| [locus_tag=Ava_R0035]
            rnaLine +=
                PRODUCT + rnaProduct + "," + "rna:RefSeq:" + refSeqId + " " + positions + "\n";
            // outBuff.write(rnaLine);
            lines.add(new GffLine(initPos, rnaLine));
          }
        }

        Set<String> keys = linesPerContig.keySet();
        for (String key : keys) {
          TreeSet<GffLine> lines = linesPerContig.get(key);
          GffLine line = lines.pollFirst();
          while (line != null) {
            outBuff.write(line.getLine());
            line = lines.pollFirst();
          }
        }

        outBuff.close();
        System.out.println("Done!!! :D");

      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }
コード例 #15
0
  /**
   * Run from the command-line, with a list of URLs as argument.
   *
   * <p><B>NOTE:</B><br>
   * This code will run with all the documents in memory - if you want to unload each from memory
   * after use, add code to store the corpus in a DataStore.
   */
  public static void main(String args[]) throws GateException, IOException {
    // initialise the GATE library
    Out.prln("Initialising GATE...");
    Gate.init();
    Out.prln("...GATE initialised");

    // initialise ANNIE (this may take several minutes)
    StandAloneAnnie annie = new StandAloneAnnie();
    annie.initAnnie();

    // create a GATE corpus and add a document for each command-line
    // argument
    Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus");
    for (int i = 0; i < args.length; i++) {
      URL u = new URL(args[i]);
      FeatureMap params = Factory.newFeatureMap();
      params.put("sourceUrl", u);
      params.put("preserveOriginalContent", new Boolean(true));
      params.put("collectRepositioningInfo", new Boolean(true));
      Out.prln("Creating doc for " + u);
      Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
      corpus.add(doc);
    } // for each of args

    // tell the pipeline about the corpus and run it
    annie.setCorpus(corpus);
    annie.execute();

    // for each document, get an XML document with the
    // person and location names added
    Iterator iter = corpus.iterator();
    int count = 0;
    String startTagPart_1 = "<span GateID=\"";
    String startTagPart_2 = "\" title=\"";
    String startTagPart_3 = "\" style=\"background:Red;\">";
    String endTag = "</span>";

    while (iter.hasNext()) {
      Document doc = (Document) iter.next();
      AnnotationSet defaultAnnotSet = doc.getAnnotations();
      Set annotTypesRequired = new HashSet();
      annotTypesRequired.add("Person");
      annotTypesRequired.add("Location");
      Set<Annotation> peopleAndPlaces =
          new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));

      FeatureMap features = doc.getFeatures();
      String originalContent =
          (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
      RepositioningInfo info =
          (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);

      ++count;
      File file = new File("StANNIE_" + count + ".HTML");
      Out.prln("File name: '" + file.getAbsolutePath() + "'");
      if (originalContent != null && info != null) {
        Out.prln("OrigContent and reposInfo existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionStart = info.getOriginalPos(insertPositionStart);
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          insertPositionEnd = info.getOriginalPos(insertPositionEnd, true);
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } // if - should generate
      else if (originalContent != null) {
        Out.prln("OrigContent existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } else {
        Out.prln("Content : " + originalContent);
        Out.prln("Repositioning: " + info);
      }

      String xmlDocument = doc.toXml(peopleAndPlaces, false);
      String fileName = new String("StANNIE_toXML_" + count + ".HTML");
      FileWriter writer = new FileWriter(fileName);
      writer.write(xmlDocument);
      writer.close();
    } // for each doc
  } // main