public SpanChunk create(final List<SpanEvent> flushData) { if (flushData == null) { throw new NullPointerException("flushData must not be null"); } // TODO must be equals to or greater than 1 final int size = flushData.size(); if (size < 1) { throw new IllegalArgumentException("flushData.size() < 1 size:" + size); } final SpanEvent first = flushData.get(0); if (first == null) { throw new IllegalStateException("first SpanEvent is null"); } final Span parentSpan = first.getSpan(); final String agentId = this.agentInformation.getAgentId(); final SpanChunk spanChunk = new SpanChunk(flushData); spanChunk.setAgentId(agentId); spanChunk.setApplicationName(this.agentInformation.getApplicationName()); spanChunk.setAgentStartTime(this.agentInformation.getStartTime()); spanChunk.setServiceType(parentSpan.getServiceType()); final byte[] transactionId = parentSpan.getTransactionId(); spanChunk.setTransactionId(transactionId); spanChunk.setSpanId(parentSpan.getSpanId()); spanChunk.setEndPoint(parentSpan.getEndPoint()); return spanChunk; }
/** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */ static void addMentionTags(Document doc, AceDocument aceDoc) { ArrayList<AceEntity> entities = aceDoc.entities; for (int i = 0; i < entities.size(); i++) { AceEntity entity = entities.get(i); ArrayList<AceEntityMention> mentions = entity.mentions; for (int j = 0; j < mentions.size(); j++) { AceEntityMention mention = (AceEntityMention) mentions.get(j); // we compute a jetSpan not including trailing whitespace Span aceSpan = mention.head; // skip mentions in ChEnglish APF not aligned to any English text if (aceSpan.start() < 0) continue; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet("entity", new Integer(i)); if (flags.contains("types")) { features.put("type", entity.type.substring(0, 3)); if (entity.subtype != null) features.put("subtype", entity.subtype); } if (flags.contains("extents")) { String cleanExtent = mention.text.replaceAll("\n", " "); features.put("extent", AceEntityMention.addXmlEscapes(cleanExtent)); } doc.annotate("mention", jetSpan, features); } } }
private List<Span> readSpans(File file) { final List<Span> spans = Lists.newArrayList(); try { final DocumentBuilder builder = factory.newDocumentBuilder(); final Document document = builder.parse(file); final Element root = document.getDocumentElement(); final NodeList nodes = root.getElementsByTagName("span"); for (int i = 0; i < nodes.getLength(); i++) { final Element element = Element.class.cast(nodes.item(i)); final Span span = new Span(); span.setType(element.getAttribute("type")); span.setStart(Integer.parseInt(element.getAttribute("start"))); span.setEnd(Integer.parseInt(element.getAttribute("end"))); span.setValue(element.getTextContent()); spans.add(span); } } catch (IOException e) { throw new IllegalStateException(e); } catch (ParserConfigurationException e) { throw new IllegalStateException(e); } catch (SAXException e) { throw new IllegalStateException(e); } return spans; }
@Override void setExceptionInfo(int exceptionClassId, String exceptionMessage) { span.setExceptionInfo(exceptionClassId, exceptionMessage); if (!span.isSetErrCode()) { span.setErrCode(1); } }
/** * performs the action, adding the specified Annotation. Returns the position of the end of the * Annotation. */ @Override public int perform(Document doc, PatternApplication patap) { Span span; HashMap bindings = patap.bestBindings; // System.out.println ("bindings (for new annotation): " + bindings); if (spanVariable == null) { span = new Span(patap.startPosition, patap.bestPosition); } else if (spanVariable.name.toString() == "0") { span = new Span(patap.startPosition, patap.startPosition); } else { Object value = bindings.get(spanVariable.name); if (value instanceof Span) { span = (Span) value; } else if (value instanceof Annotation) { span = ((Annotation) value).span(); } else { System.out.println("Value of " + spanVariable.toString() + " is not a span.or annotation"); return -1; } } if (Pat.trace) Console.println( "Annotating " + doc.text(span) + " as " + type + " " + features.substitute(bindings).toSGMLString()); hideAnnotations(doc, type, span); hideAnnotations(doc, "token", span); Annotation newAnnotation = new Annotation(type, span, features.substitute(bindings)); doc.addAnnotation(newAnnotation); if (bindingVariable != null) bindings.put(bindingVariable.name, newAnnotation); return span.end(); }
/** * Get the text value of this entity. The headTokenSpan MUST be set before calling this method! */ public String getValue() { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); // int lastEnd = -1; StringBuilder sb = new StringBuilder(); for (int i = headTokenSpan.start(); i < headTokenSpan.end(); i++) { CoreLabel token = tokens.get(i); // we are not guaranteed to have CharacterOffsets so we can't use them... /* Integer start = token.get(CharacterOffsetBeginAnnotation.class); Integer end = token.get(CharacterOffsetEndAnnotation.class); if (start != null && end != null) { if (lastEnd != -1 && !start.equals(lastEnd)) { sb.append(StringUtils.repeat(" ", start - lastEnd)); lastEnd = end; } } else { if (lastEnd != -1) sb.append(" "); lastEnd = 0; } */ if (i > headTokenSpan.start()) sb.append(" "); sb.append(token.word()); } return sb.toString(); }
private String generateSubParentLevelId(Span spanData) { if (spanData.getParentLevel() == null || spanData.getParentLevel().length() == 0) { return spanData.getLevelId() + ""; } return spanData.getParentLevel() + "." + spanData.getLevelId(); }
/** * Returns true is the specified span crosses this span. * * @param s The span to compare with this span. * @return true is the specified span overlaps this span and contains a non-overlapping section; * false otherwise. */ public boolean crosses(Span s) { int sstart = s.getStart(); // either s's start is in this or this' start is in s return !this.contains(s) && !s.contains(this) && (getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd()); }
/** * Returns true if the specified span intersects with this span. * * @param s The span to compare with this span. * @return true is the spans overlap; false otherwise. */ public boolean intersects(Span s) { int sstart = s.getStart(); // either s's start is in this or this' start is in s return this.contains(s) || s.contains(this) || getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd(); }
@Override public void recordParentApplication(String parentApplicationName, short parentApplicationType) { span.setParentApplicationName(parentApplicationName); span.setParentApplicationType(parentApplicationType); if (isDebug) { logger.debug("ParentApplicationName marked. parentApplicationName={}", parentApplicationName); } }
@Override void setExceptionInfo(boolean markError, int exceptionClassId, String exceptionMessage) { span.setExceptionInfo(exceptionClassId, exceptionMessage); if (markError) { if (!span.isSetErrCode()) { span.setErrCode(1); } } }
public String getExtentString() { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); StringBuilder sb = new StringBuilder(); for (int i = extentTokenSpan.start(); i < extentTokenSpan.end(); i++) { CoreLabel token = tokens.get(i); if (i > extentTokenSpan.start()) sb.append(" "); sb.append(token.word()); } return sb.toString(); }
// 2 * intersect / (length of s1 + length of s2) public static double score(Span s1, Span s2) { double a = 0; double b = 0; // there is a more efficient way for (int i = 0; i < s1.size(); i++) for (int j = 0; j < s2.size(); j++) a += overlap(s1.get(i), s2.get(j)); for (int i = 0; i < s1.size(); i++) b += s1.get(i)[1] - s1.get(i)[0]; for (int i = 0; i < s2.size(); i++) b += s2.get(i)[1] - s2.get(i)[0]; return a == 0 ? -1 : a / b; }
/** * Always returns the text corresponding to the extent of this object, even when getValue is * overridden by subclass. */ public final String getFullValue() { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); StringBuilder sb = new StringBuilder(); if (tokens != null && extentTokenSpan != null) { for (int i = extentTokenSpan.start(); i < extentTokenSpan.end(); i++) { if (i > extentTokenSpan.start()) sb.append(" "); sb.append(tokens.get(i).word()); } } return sb.toString(); }
/** * hides (adds the 'hidden' feature) to all annotations of type <I>type</I> beginning at the * starting position of span <I>span</I>. */ public static void hideAnnotations(Document doc, String type, Span span) { for (int posn = span.start(); posn < span.end(); posn++) { Vector annotations = doc.annotationsAt(posn, type); if (annotations != null) { for (int i = 0; i < annotations.size(); i++) { Annotation ann = (Annotation) annotations.elementAt(i); ann.put("hidden", "true"); // Console.println ("Hiding " + ann); } } } }
/** * Creates annotations for each node in parse tree <CODE>node</NODE>. * These annotations are added to the parse tree and to the document * <CODE>doc</CODE>. In constrast to <CODE>setAnnotations</CODE>, * the categories used for terminal nodes are Jet categories obtained by * Jet tokenization and lexical look-up. This means that hyphenated * items are split, and multi-word names are reduced to a single node. * * @param node the root of the parse tree * @param treeSpan the span of the document matching the parse tree * @param doc the document to which annotations will be added */ private void setJetAnnotations(ParseTreeNode node, Span treeSpan, Document doc) { StatParser.buildParserInput(doc, treeSpan.start(), treeSpan.end(), false); StatParser.fixHyphenatedItems(doc); int nameConstitEnd = -1; List<ParseTreeNode> terminals = getTerminalNodes(node); for (ParseTreeNode terminal : terminals) { int terminalEnd = terminal.end; // is there a 'name' constituent or 'hyphword' constituent here? Vector<Annotation> constits = doc.annotationsAt(terminal.start, "constit"); Annotation constit = null; Annotation nameConstit = null; Annotation hyphword = null; if (constits != null) { for (Annotation c : constits) { if (c.get("cat") == "name") { nameConstit = c; } else if (c.get("cat") == "hyphword") { hyphword = c; } if (constit == null) constit = c; } } if (hyphword != null) { nameConstit = null; constit = hyphword; } // if there is a name which is not part of a hyphword, associate the // name with this (first) terminal node, and mark any remaining terminal // nodes which match tokens in the name as empty if (nameConstit != null) { terminal.end = nameConstit.end(); terminal.ann = nameConstit; nameConstitEnd = nameConstit.end(); } else if (nameConstitEnd >= 0) { terminal.word = null; } else { Span span = new Span(terminal.start, terminal.end); String pennPOS = ((String) terminal.category).toUpperCase().intern(); String word = terminal.word; terminal.ann = StatParser.buildWordDefn(doc, word, span, constit, pennPOS); } if (nameConstitEnd == terminalEnd) nameConstitEnd = -1; } // prune parse tree: remove a node if it has no word or children pruneTree(node); determineNonTerminalSpans(node, treeSpan.start()); // add head links if (hr == null) hr = HeadRule.createDefaultRule(); hr.apply(node); // add annotations for non-terminals: Jet.Parser.ParseTreeNode.makeParseAnnotations(doc, node); }
/** * This utility swaps annotation information from one <code>TextAnnotation</code> to another. The * fields that are transferred are: * * <ul> * <li>annotation ID * <li>annotator * <li>annotation sets * <li>covered text * <li>document collection ID * <li>document ID * <li>document section ID * <li>spans * </ul> * * <p>Note: class information does not get transferred. * * @param fromTA the text annotation from which to transfer annotation information * @param toTA the text annotation to which to transfer annotation information */ public static void swapAnnotationInfo(TextAnnotation fromTA, TextAnnotation toTA) { toTA.setAnnotationID(fromTA.getAnnotationID()); toTA.setAnnotationSets(fromTA.getAnnotationSets()); toTA.setAnnotator(fromTA.getAnnotator()); toTA.setCoveredText(fromTA.getCoveredText()); toTA.setDocumentCollectionID(fromTA.getDocumentCollectionID()); toTA.setDocumentID(fromTA.getDocumentID()); toTA.setDocumentSectionID(fromTA.getDocumentSectionID()); for (Span span : fromTA.getSpans()) { toTA.addSpan(span.clone()); } }
public ContextData beforeInvoke(Identification id) { try { Span spanData = ContextGenerator.generateSpanFromThreadLocal(id); // 设置SpanType的类型 spanData.setTag(Tag.SPAN_TYPE, SpanType.RPC_CLIENT); if (Config.BuriedPoint.PRINTF) { logger.debug( "TraceId:" + spanData.getTraceId() + "\tParentLevelId:" + spanData.getParentLevel() + "\tLevelId:" + spanData.getLevelId() + "\tbusinessKey:" + spanData.getBusinessKey()); } CurrentThreadSpanStack.push(spanData); sendRequestSpan(spanData, id); return new ContextData( spanData.getTraceId(), generateSubParentLevelId(spanData), spanData.getRouteKey()); } catch (Throwable t) { logger.error(t.getMessage(), t); return new EmptyContextData(); } }
/** * Finds all the {@link Span}s that match this query. This is what actually scans the HBase table * and loads the data into {@link Span}s. * * @return A map from HBase row key to the {@link Span} for that row key. Since a {@link Span} * actually contains multiple HBase rows, the row key stored in the map has its timestamp * zero'ed out. * @throws HBaseException if there was a problem communicating with HBase to perform the search. * @throws IllegalArgumentException if bad data was retreived from HBase. */ private TreeMap<byte[], Span> findSpans() throws HBaseException { final short metric_width = tsdb.metrics.width(); final TreeMap<byte[], Span> spans = // The key is a row key from HBase. new TreeMap<byte[], Span>(new SpanCmp(metric_width)); int nrows = 0; int hbase_time = 0; // milliseconds. long starttime = System.nanoTime(); final Scanner scanner = getScanner(); try { ArrayList<ArrayList<KeyValue>> rows; while ((rows = scanner.nextRows().joinUninterruptibly()) != null) { hbase_time += (System.nanoTime() - starttime) / 1000000; for (final ArrayList<KeyValue> row : rows) { final byte[] key = row.get(0).key(); if (Bytes.memcmp(metric, key, 0, metric_width) != 0) { throw new IllegalDataException( "HBase returned a row that doesn't match" + " our scanner (" + scanner + ")! " + row + " does not start" + " with " + Arrays.toString(metric)); } Span datapoints = spans.get(key); if (datapoints == null) { datapoints = new Span(tsdb); spans.put(key, datapoints); } datapoints.addRow(tsdb.compact(row)); nrows++; starttime = System.nanoTime(); } } } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } finally { hbase_time += (System.nanoTime() - starttime) / 1000000; scanlatency.add(hbase_time); } LOG.info(this + " matched " + nrows + " rows in " + spans.size() + " spans"); if (nrows == 0) { return null; } return spans; }
/** Compares the specified span to the current span. */ public int compareTo(Span s) { if (getStart() < s.getStart()) { return -1; } else if (getStart() == s.getStart()) { if (getEnd() > s.getEnd()) { return -1; } else if (getEnd() < s.getEnd()) { return 1; } else { return 0; } } else { return 1; } }
/** * Compares the text spans of the two entity mentions * * @param otherEnt */ public boolean textEquals(EntityMention otherEnt) { // // we attempt three comparisons: // a) if syntactic heads are defined we consider two texts similar if they have the same // syntactic head // (this is necessary because in NFL we compare entities with different spans but same heads, // e.g. "49ers" vs "San Francisco 49ers" // b) if head spans are defined we consider two texts similar if they have the same head span // c) if extent spans are defined we consider two texts similar if they have the same extent // span // if (syntacticHeadTokenPosition != -1 && otherEnt.syntacticHeadTokenPosition != -1) { if (syntacticHeadTokenPosition == otherEnt.syntacticHeadTokenPosition) return true; return false; } if (headTokenSpan != null && otherEnt.headTokenSpan != null) { if (headTokenSpan.equals(otherEnt.headTokenSpan)) return true; return false; } if (extentTokenSpan != null && otherEnt.extentTokenSpan != null) { if (extentTokenSpan.equals(otherEnt.extentTokenSpan)) return true; return false; } return false; }
/** * Adds <B>constit</B> annotations to an existing Document <CODE>doc</CODE> to represent the parse * tree structure of a set of trees <CODE>trees</CODE>. This version is provided for parse tree * files which include sentence offsets. * * @param trees list of parse trees * @param offsets list of the starting position (in doc) of the text corresponding to each parse * tree * @param doc document to which annotations should be added * @param targetAnnotation name of annotation to get 'parse' feature pointing to parse tree * @param span target span. * @param jetCategories if false, use lexical categories from Penn Tree Bank; if true, use * categories from Jet */ public void addAnnotations( List<ParseTreeNode> trees, List<Integer> offsets, Document doc, String targetAnnotation, Span span, boolean jetCategories) { if (trees.size() != offsets.size()) { System.err.println( "PTBReader.addAnnotations: mismatch between number of " + "trees (" + trees.size() + ") and number of offsets (" + offsets.size() + ")"); return; } for (int i = 0; i < trees.size(); i++) { ParseTreeNode tree = trees.get(i); int start = offsets.get(i); if (start < 0) { System.err.println("PTBReader.addAnnotations: offset missing for " + " parse tree " + i); continue; } int end = (i + 1 == offsets.size()) ? span.end() : offsets.get(i + 1); Span sentenceSpan = new Span(start, end); addAnnotations(tree, doc, sentenceSpan, jetCategories); Vector<Annotation> anns = doc.annotationsAt(start, targetAnnotation); if (anns != null && anns.size() > 0) { Annotation ann = anns.get(0); ann.put("parse", tree.ann); } } }
@Override public void recordAcceptorHost(String host) { span.setAcceptorHost(host); // me if (isDebug) { logger.debug("Acceptor host received. host={}", host); } }
protected void assertRaisesUnknownObjectExceptionForMethodText(Span span) throws Exception { try { span.text(); failUnknownObjectException(); } catch (UnknownObjectException e1) { } }
/** * Adds <B>constit</B> annotations to an existing Document <CODE>doc</CODE> to represent the parse * tree structure <CODE>tree</CODE>. * * @param tree the parse tree (for a portion of Document doc) * @param doc the document * @param span the portion of doc covered by the parse tree * @param jetCategories if true, use Jet categories as terminal categories (if false, use * categories read from parse trees) */ public void addAnnotations(ParseTreeNode tree, Document doc, Span span, boolean jetCategories) { List<ParseTreeNode> terminalNodes = getTerminalNodes(tree); String text = doc.text(); int offset = span.start(); for (ParseTreeNode terminal : terminalNodes) { while (offset < span.end() && Character.isWhitespace(text.charAt(offset))) { offset++; } for (String skipString : skip) { if (text.startsWith(skipString, offset)) { offset += skipString.length(); while (offset < span.end() && Character.isWhitespace(text.charAt(offset))) { offset++; } break; } } // match next terminal node against next word in text int matchLength = matchTextToTree(text, offset, terminal.word); if (matchLength > 0) { int endOffset = offset + matchLength; while (endOffset < span.end() && Character.isWhitespace(text.charAt(endOffset))) { endOffset++; } terminal.start = offset; terminal.end = endOffset; offset = endOffset; } else { System.err.println( "PTBReader.addAnnotations: " + "Cannot determine parse tree offset for word " + terminal.word); System.err.println(" at document offset " + offset + " in sentence"); System.err.println(" " + doc.text(span)); return; } } if (jetCategories) { setJetAnnotations(tree, span, doc); StatParser.deleteUnusedConstits(doc, span, tree.ann); // <<< } else { determineNonTerminalSpans(tree, span.start()); setAnnotations(tree, doc); } }
static void addTimexTags(Document doc, AceDocument aceDoc) { List<AceTimex> timeExpressions = aceDoc.timeExpressions; for (AceTimex timex : timeExpressions) { AceTimexMention mention = (AceTimexMention) timex.mentions.get(0); Span aceSpan = mention.extent; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet(); if (timex.val != null && !timex.val.equals("")) features.put("val", timex.val); if (timex.anchorVal != null && !timex.anchorVal.equals("")) features.put("anchor_val", timex.anchorVal); if (timex.anchorDir != null && !timex.anchorDir.equals("")) features.put("anchor_dir", timex.anchorDir); if (timex.set != null && !timex.set.equals("")) features.put("set", timex.set); if (timex.mod != null && !timex.mod.equals("")) features.put("mod", timex.mod); doc.annotate("timex2", jetSpan, features); } }
/** Checks if the specified span is equal to the current span. */ public boolean equals(Object o) { boolean result; if (o == this) { result = true; } else if (o instanceof Span) { Span s = (Span) o; result = (getStart() == s.getStart()) && (getEnd() == s.getEnd()) && (getType() != null ? type.equals(s.getType()) : true); } else { result = false; } return result; }
/** generate mention annotations (with entity numbers) based on the ACE entities and mentions. */ static void addMentionTags(Document doc, AceDocument aceDoc) { ArrayList<AceEntity> entities = aceDoc.entities; for (int i = 0; i < entities.size(); i++) { AceEntity entity = (AceEntity) entities.get(i); ArrayList<AceEntityMention> mentions = entity.mentions; for (int j = 0; j < mentions.size(); j++) { AceEntityMention mention = mentions.get(j); // we compute a jetSpan not including trailing whitespace Span aceSpan = mention.head; Span jetSpan = new Span(aceSpan.start(), aceSpan.end() + 1); FeatureSet features = new FeatureSet("entity", new Integer(i)); if (showTypes) { features.put("type", entity.type.substring(0, 3)); if (entity.subtype != null) features.put("subtype", entity.subtype); } doc.annotate("mention", jetSpan, features); } } }
@Override public String toString() { return "EntityMention [type=" + type + (subType != null ? ", subType=" + subType : "") + (mentionType != null ? ", mentionType=" + mentionType : "") + (objectId != null ? ", objectId=" + objectId : "") + (headTokenSpan != null ? ", hstart=" + headTokenSpan.start() + ", hend=" + headTokenSpan.end() : "") + (extentTokenSpan != null ? ", estart=" + extentTokenSpan.start() + ", eend=" + extentTokenSpan.end() : "") + (syntacticHeadTokenPosition >= 0 ? ", headPosition=" + syntacticHeadTokenPosition : "") + (headTokenSpan != null ? ", value=\"" + getValue() + "\"" : "") + (normalizedName != null ? ", normalizedName=\"" + normalizedName + "\"" : "") + ", corefID=" + corefID + (typeProbabilities != null ? ", probs=" + probsToString() : "") + "]"; }
/** Test of inject method, of class AbstractTracer. */ @Test public void testInject() { System.out.println("inject"); AbstractTracer instance = new TestTracerImpl(); instance.register(TextMap.class, new TestTextMapInjectorImpl()); String operationName = "test-inject-span"; Span span = new AbstractSpan(operationName) { SpanContext spanContext = new TestSpanContextImpl("whatever"); @Override public SpanContext context() { return spanContext; } }; Map<String, String> map = new HashMap<>(); TextMap carrier = new TextMapInjectAdapter(map); instance.inject(span.context(), Format.Builtin.TEXT_MAP, carrier); assertEquals("marker should have been injected into map", "whatever", map.get("test-marker")); }