boolean load(Object[] props, int offset) { if (_values.size() > 0) { int p = _values.size() - 1; while (_index[p] > _values.get(p).size() - 1 && p > 0) { _index[p] = 0; --p; ++_index[p]; } if (_index[p] > _values.get(p).size() - 1) { return false; } // System.err.print("\t TypedValueGroup.load{"); for (int i = 0; i < _values.size(); ++i) { props[i + offset] = _values.get(i).get(_index[i]).data; // System.err.print("[" + _index[i] + "](" + props[i+offset].getClass().getSimpleName() + // ')'); } // System.err.println("}"); ++_index[_values.size() - 1]; return true; } else { return false; } }
private QueryResult gatherResultInfoForSelectQuery( String queryString, int queryNr, boolean sorted, Document doc, String[] rows) { Element root = doc.getRootElement(); // Get head information Element child = root.getChild("head", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); // Get result rows (<head>) List headChildren = child.getChildren( "variable", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); Iterator it = headChildren.iterator(); ArrayList<String> headList = new ArrayList<String>(); while (it.hasNext()) { headList.add(((Element) it.next()).getAttributeValue("name")); } List resultChildren = root.getChild("results", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")) .getChildren( "result", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); int nrResults = resultChildren.size(); QueryResult queryResult = new QueryResult(queryNr, queryString, nrResults, sorted, headList); it = resultChildren.iterator(); while (it.hasNext()) { Element resultElement = (Element) it.next(); String result = ""; // get the row values and paste it together to one String for (int i = 0; i < rows.length; i++) { List bindings = resultElement.getChildren( "binding", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); String rowName = rows[i]; for (int j = 0; j < bindings.size(); j++) { Element binding = (Element) bindings.get(j); if (binding.getAttributeValue("name").equals(rowName)) if (result.equals("")) result += rowName + ": " + ((Element) binding.getChildren().get(0)).getTextNormalize(); else result += "\n" + rowName + ": " + ((Element) binding.getChildren().get(0)).getTextNormalize(); } } queryResult.addResult(result); } return queryResult; }
/** * Returns the content of a JDOM Element detached from it. * * @param elt the element to get the content from. * @return a (possibly empty) list of JDOM nodes, detached from their parent. */ private List getDetachedContent(Element elt) { List content = elt.getContent(); List nodes = new ArrayList(content.size()); while (content.size() != 0) { Object o = content.remove(0); nodes.add(o); } return (nodes); }
// returns list of statements public static void replaceMult(Statement st, NodeFactory f, Map o2n, Collection result) throws ModelException { List l1 = new ArrayList(); replaceMultSPO(st, f, o2n, l1, st.subject(), 0); List l2 = new ArrayList(); for (int i = 0; i < l1.size(); i++) replaceMultSPO((Statement) l1.get(i), f, o2n, l2, st.predicate(), 1); for (int i = 0; i < l2.size(); i++) replaceMultSPO((Statement) l2.get(i), f, o2n, result, st.object(), 2); }
@Override public void processingInstruction(String target, String data) throws SAXException { _logger.fine("Processing Instruction " + target); _logger.fine("Processing Instruction data: " + data); if (target.equals("assemble")) { if (!_stack.isEmpty()) { ElementInfo element = _stack.get(_stack.size() - 1); Matcher matcher = PROCESSING_INSTRUCTION.matcher(data); while (matcher.find()) { if (matcher.groupCount() == 2) { String name = matcher.group(1); if (name.charAt(0) == '@') { element.inst.put(name, matcher.group(2)); } else { element.args.add(guessUntypedValue(name, matcher.group(2))); } _logger.fine( "Processing Instruction for " + element.data.getClass() + "\n\ttarget = " + target + "\n\t" + name + "=" + matcher.group(2)); } } } } }
public AlbumBean[] getResult() { synchronized (libraryList) { AlbumBean[] res; res = libraryList.toArray(new AlbumBean[libraryList.size()]); return res; } }
/** * This will output a list of JDOM nodes as a fragment of an XML document, firing off the SAX * events that have been registered. * * <p><strong>Warning</strong>: This method does not call the {@link * ContentHandler#setDocumentLocator}, {@link ContentHandler#startDocument} and {@link * ContentHandler#endDocument} callbacks on the {@link #setContentHandler ContentHandler}. The * user shall invoke these methods directly prior/after outputting the document fragments. * * @param nodes <code>List</code> of JDOM nodes to output. * @throws JDOMException if any error occurred. * @see #outputFragment(org.jdom2.Content) */ public void outputFragment(List<? extends Content> nodes) throws JDOMException { if ((nodes == null) || (nodes.size() == 0)) { return; } // Output node list as a document fragment. elementContent(nodes, new NamespaceStack()); }
// returns list of statements protected static void replaceMultSPO( Statement st, NodeFactory f, Map o2n, Collection result, RDFNode toReplace, int position) throws ModelException { Collection replacements; if (toReplace instanceof Statement) { List l = new ArrayList(); replaceMult((Statement) toReplace, f, o2n, l); if (l.size() == 1 && toReplace == l.get(0)) { result.add(st); return; // keep the same } else replacements = l; } else { Object ro = o2n.get(toReplace); if (ro instanceof Collection) replacements = (Collection) ro; else if (ro != null) { replacements = new ArrayList(); replacements.add(ro); } else { // no replacement needed result.add(st); // keep the same statement return; } } for (Iterator it = replacements.iterator(); it.hasNext(); ) { Statement rs = null; Object rr = it.next(); switch (position) { case 0: rs = f.createStatement((Resource) rr, st.predicate(), st.object()); break; case 1: rs = f.createStatement(st.subject(), (Resource) rr, st.object()); break; case 2: rs = f.createStatement(st.subject(), st.predicate(), (RDFNode) rr); break; } result.add(rs); } }
/** * Returns the result of an XSL Transformation as a list of JDOM nodes. * * <p>If the result of the transformation is a JDOM document, this method converts it into a list * of JDOM nodes; any subsequent call to {@link #getDocument} will return <code>null</code>. * * @return the transformation result as a (possibly empty) list of JDOM nodes (Elements, Texts, * Comments, PIs...). */ public List getResult() { List nodes = Collections.EMPTY_LIST; // Retrieve result from the document builder if not set. this.retrieveResult(); if (result instanceof List) { nodes = (List) result; } else { if ((result instanceof Document) && (queried == false)) { List content = ((Document) result).getContent(); nodes = new ArrayList(content.size()); while (content.size() != 0) { Object o = content.remove(0); nodes.add(o); } result = nodes; } } queried = true; return (nodes); }
/** * This will output a list of JDOM nodes as a document, firing off the SAX events that have been * registered. * * <p><strong>Warning</strong>: This method may output ill-formed XML documents if the list * contains top-level objects that are not legal at the document level (e.g. Text or CDATA nodes, * multiple Element nodes, etc.). Thus, it should only be used to output document portions towards * ContentHandlers capable of accepting such ill-formed documents (such as XSLT processors). * * @param nodes <code>List</code> of JDOM nodes to output. * @throws JDOMException if any error occurred. * @see #output(org.jdom2.Document) */ public void output(List<? extends Content> nodes) throws JDOMException { if ((nodes == null) || (nodes.size() == 0)) { return; } // contentHandler.setDocumentLocator() documentLocator(null); // contentHandler.startDocument() startDocument(); // Process node list. elementContent(nodes, new NamespaceStack()); // contentHandler.endDocument() endDocument(); }
public int getCount() { synchronized (libraryList) { return libraryList.size(); } }
/** Receive notification of the start of an element. */ @Override public void startElement(String uri, String l, String q, Attributes a) { /* * 1. Load a class that matches the element name. * 2. If no class found, assume the element maps to a String. * 3. Otherwise, construct a new object of the class with element attributes. */ _logger.fine( S.fine(_logger) ? "Consider element " + l + "\n uri " + uri + "\n q " + q : null); ElementInfo info = new ElementInfo(); // Record java packages defined on this element as xmlns for (int i = 0; i < a.getLength(); ++i) { _logger.fine( S.fine(_logger) ? " attr " + a.getQName(i) + "=" + a.getValue(i) + "\n " + a.getQName(i) + ":" + a.getURI(i) : null); if (a.getQName(i).startsWith("xmlns:") && a.getValue(i).startsWith("java://")) { info.pkgs.put(a.getQName(i).substring(6), a.getValue(i).substring(7)); } } // Resolve the package name of this element, which could be empty (default package) int colon = q.indexOf(':'); if (colon > 0) { String xmlns = q.substring(0, colon); // is it defined right here? info.jpkg = info.pkgs.get(xmlns); // find a matching namespace from ancesters if (info.jpkg == null && !_stack.isEmpty()) { for (int i = _stack.size() - 1; i >= 0; --i) { info.jpkg = _stack.get(i).pkgs.get(xmlns); if (info.jpkg != null) { break; } } } } else if (isPrimitiveType(q)) { info.jpkg = "java.lang"; } else if (!_stack.isEmpty()) { info.jpkg = _stack.get(_stack.size() - 1).jpkg; } else { info.jpkg = _jpkg; } _logger.fine("to create element with package = " + info.jpkg); try { info.name = (info.jpkg != null) ? info.jpkg + '.' + Strings.toCamelCase(l) : Strings.toCamelCase(l); try { if (info.name.endsWith("...")) { // Array construction info.type = Class.forName(info.name.substring(0, info.name.length() - 3)); info.data = new ArrayList<Object>(); } else { // Non-array construction int size = a.getLength(); TypedValueGroup arguments = new TypedValueGroup(); for (int i = 0; i < size; ++i) { if (!a.getQName(i).startsWith("xmlns:") && !a.getQName(i).equals("xmlns")) { arguments.add(guessUntypedValue(a.getQName(i), a.getValue(i))); } } arguments.complete(); _logger.fine(S.fine(_logger) ? "arguments=" + arguments : null); if (arguments.size() > 0) { if (arguments.size() == 1 && "java.lang".equals(info.jpkg)) { info.inst.put( "@as", Strings.toCamelCase( arguments.get(0).name, '-', false)); // respect original spelling info.data = arguments.get(0).get(0).data; info.type = arguments.get(0).get(0).type; } else { Exception last = null; Object[] args = new Object[arguments.size()]; while (arguments.load(args, 0)) { try { _logger.fine( S.fine(_logger) ? "to create " + info.name + " with args: " + args.length + args(args) : null); info.data = _factory.create(info.name, args); info.type = info.data.getClass(); break; } catch (InvocationTargetException x) { throw x; } catch (Exception x) { last = x; _logger.fine( "failure in creating " + info.name + ": probing for other constructors"); } } if (info.data == null) { throw last; } } } else { _logger.fine("Create " + info.name + " with the default constructor"); info.data = _factory.create(info.name); info.type = info.data.getClass(); } } } catch (ClassNotFoundException x) { // no class by the element name is found, assumed String if (!_lenient) { throw new BeanAssemblyException("No class associated with element " + q); } else { _logger.log(Level.WARNING, "can't find class " + info.name, x); } } _stack.add(info); // _logger.fine(">>ElementInfo: " + info.type.getName() + " in " + info); // all other exceptions indicate mismatches between the beans and the XML schema } catch (Exception x) { if (!_lenient) { throw new BeanAssemblyException("Failed to assemble bean from element " + q, x); } else { _logger.log(Level.SEVERE, "can't create object for this element", x); } } }
protected void init(Element root) { NodeList children = root.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeName().equals("meta")) { Element meta = (Element) child; String value = XMLUtil.getText(meta); this.metaAttributes.put(meta.getAttribute("name"), value); } } // handle registers - OPTIONAL Element r = XMLUtil.getChildElement(root, "registers"); if (r != null) { List registers = XMLUtil.getChildElements(r, "register"); for (int i = 0; i < registers.size(); i++) { Element register = (Element) registers.get(i); RegisterDescriptor registerDescriptor = DescriptorFactory.getFactory().createRegisterDescriptor(register); registerDescriptor.setParent(this); this.registers.add(registerDescriptor); } } // handle global-conditions - OPTIONAL Element globalConditionsElement = XMLUtil.getChildElement(root, "global-conditions"); if (globalConditionsElement != null) { Element globalConditions = XMLUtil.getChildElement(globalConditionsElement, "conditions"); ConditionsDescriptor conditionsDescriptor = DescriptorFactory.getFactory().createConditionsDescriptor(globalConditions); conditionsDescriptor.setParent(this); this.globalConditions = conditionsDescriptor; } // handle initial-steps - REQUIRED Element intialActionsElement = XMLUtil.getChildElement(root, "initial-actions"); List initialActions = XMLUtil.getChildElements(intialActionsElement, "action"); for (int i = 0; i < initialActions.size(); i++) { Element initialAction = (Element) initialActions.get(i); ActionDescriptor actionDescriptor = DescriptorFactory.getFactory().createActionDescriptor(initialAction); actionDescriptor.setParent(this); this.initialActions.add(actionDescriptor); } // handle global-actions - OPTIONAL Element globalActionsElement = XMLUtil.getChildElement(root, "global-actions"); if (globalActionsElement != null) { List globalActions = XMLUtil.getChildElements(globalActionsElement, "action"); for (int i = 0; i < globalActions.size(); i++) { Element globalAction = (Element) globalActions.get(i); ActionDescriptor actionDescriptor = DescriptorFactory.getFactory().createActionDescriptor(globalAction); actionDescriptor.setParent(this); this.globalActions.add(actionDescriptor); } } // handle common-actions - OPTIONAL // - Store actions in HashMap for now. When parsing Steps, we'll resolve // any common actions into local references. Element commonActionsElement = XMLUtil.getChildElement(root, "common-actions"); if (commonActionsElement != null) { List commonActions = XMLUtil.getChildElements(commonActionsElement, "action"); for (int i = 0; i < commonActions.size(); i++) { Element commonAction = (Element) commonActions.get(i); ActionDescriptor actionDescriptor = DescriptorFactory.getFactory().createActionDescriptor(commonAction); actionDescriptor.setParent(this); addCommonAction(actionDescriptor); } } // handle timer-functions - OPTIONAL Element timerFunctionsElement = XMLUtil.getChildElement(root, "trigger-functions"); if (timerFunctionsElement != null) { List timerFunctions = XMLUtil.getChildElements(timerFunctionsElement, "trigger-function"); for (int i = 0; i < timerFunctions.size(); i++) { Element timerFunction = (Element) timerFunctions.get(i); Integer id = new Integer(timerFunction.getAttribute("id")); FunctionDescriptor function = DescriptorFactory.getFactory() .createFunctionDescriptor(XMLUtil.getChildElement(timerFunction, "function")); function.setParent(this); this.timerFunctions.put(id, function); } } // handle steps - REQUIRED Element stepsElement = XMLUtil.getChildElement(root, "steps"); List steps = XMLUtil.getChildElements(stepsElement, "step"); for (int i = 0; i < steps.size(); i++) { Element step = (Element) steps.get(i); StepDescriptor stepDescriptor = DescriptorFactory.getFactory().createStepDescriptor(step, this); this.steps.add(stepDescriptor); } // handle splits - OPTIONAL Element splitsElement = XMLUtil.getChildElement(root, "splits"); if (splitsElement != null) { List split = XMLUtil.getChildElements(splitsElement, "split"); for (int i = 0; i < split.size(); i++) { Element s = (Element) split.get(i); SplitDescriptor splitDescriptor = DescriptorFactory.getFactory().createSplitDescriptor(s); splitDescriptor.setParent(this); this.splits.add(splitDescriptor); } } // handle joins - OPTIONAL: Element joinsElement = XMLUtil.getChildElement(root, "joins"); if (joinsElement != null) { List join = XMLUtil.getChildElements(joinsElement, "join"); for (int i = 0; i < join.size(); i++) { Element s = (Element) join.get(i); JoinDescriptor joinDescriptor = DescriptorFactory.getFactory().createJoinDescriptor(s); joinDescriptor.setParent(this); this.joins.add(joinDescriptor); } } }
public void writeXML(PrintWriter out, int indent) { XMLUtil.printIndent(out, indent++); out.println("<workflow>"); Iterator iter = metaAttributes.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); XMLUtil.printIndent(out, indent); out.print("<meta name=\""); out.print(XMLUtil.encode(entry.getKey())); out.print("\">"); out.print(XMLUtil.encode(entry.getValue())); out.println("</meta>"); } if (registers.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<registers>"); for (int i = 0; i < registers.size(); i++) { RegisterDescriptor register = (RegisterDescriptor) registers.get(i); register.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</registers>"); } if (timerFunctions.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<trigger-functions>"); Iterator iterator = timerFunctions.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry entry = (Map.Entry) iterator.next(); XMLUtil.printIndent(out, indent++); out.println("<trigger-function id=\"" + entry.getKey() + "\">"); FunctionDescriptor trigger = (FunctionDescriptor) entry.getValue(); trigger.writeXML(out, indent); XMLUtil.printIndent(out, --indent); out.println("</trigger-function>"); } while (iterator.hasNext()) {} XMLUtil.printIndent(out, --indent); out.println("</trigger-functions>"); } if (getGlobalConditions() != null) { XMLUtil.printIndent(out, indent++); out.println("<global-conditions>"); getGlobalConditions().writeXML(out, indent); out.println("</global-conditions>"); } XMLUtil.printIndent(out, indent++); out.println("<initial-actions>"); for (int i = 0; i < initialActions.size(); i++) { ActionDescriptor action = (ActionDescriptor) initialActions.get(i); action.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</initial-actions>"); if (globalActions.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<global-actions>"); for (int i = 0; i < globalActions.size(); i++) { ActionDescriptor action = (ActionDescriptor) globalActions.get(i); action.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</global-actions>"); } if (commonActions.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<common-actions>"); Iterator iterator = getCommonActions().values().iterator(); while (iterator.hasNext()) { ActionDescriptor action = (ActionDescriptor) iterator.next(); action.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</common-actions>"); } XMLUtil.printIndent(out, indent++); out.println("<steps>"); for (int i = 0; i < steps.size(); i++) { StepDescriptor step = (StepDescriptor) steps.get(i); step.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</steps>"); if (splits.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<splits>"); for (int i = 0; i < splits.size(); i++) { SplitDescriptor split = (SplitDescriptor) splits.get(i); split.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</splits>"); } if (joins.size() > 0) { XMLUtil.printIndent(out, indent++); out.println("<joins>"); for (int i = 0; i < joins.size(); i++) { JoinDescriptor join = (JoinDescriptor) joins.get(i); join.writeXML(out, indent); } XMLUtil.printIndent(out, --indent); out.println("</joins>"); } XMLUtil.printIndent(out, --indent); out.println("</workflow>"); }
/** Receive notification of the end of an element. */ @Override public void endElement(String uri, String l, String q) { /* * 1. If current element is a String, update its value from the string buffer. * 2. Add the element to parent. */ ElementInfo element = _stack.remove(_stack.size() - 1); _logger.fine("endElement " + element); if (element.type == null) { _logger.warning("Element " + element.name + " not created "); return; } else if (_chars.length() > 0) { try { injectProperty(element.data, String.class, _chars.toString(), null, null); } catch (Exception x) { if (!_lenient) { throw new BeanAssemblyException( "Failed to set characters to object " + element.type.getName(), x); } else { _logger.warning("Failed to set characters to parent " + element.data); } } } _chars.setLength(0); _logger.fine( "<<ElementInfo: " + element.type.getName() + " in " + element + "\n @as is " + element.inst.get("@as") + "\n @id is " + element.inst.get("@id")); if (List.class.isAssignableFrom(element.data.getClass()) && element.name.endsWith("...")) { List<?> list = (List<?>) element.data; Object array = Array.newInstance(element.type, list.size()); for (int i = 0; i < list.size(); ++i) { Array.set(array, i, list.get(i)); } element.data = array; } String id = element.inst.get("@id"); if (id != null) { // locally stored object - not added to the parent _local.put(id, element); } else if (!_stack.isEmpty()) { // inject into the parent as a property ElementInfo parent = _stack.get(_stack.size() - 1); _logger.fine("Parent is " + parent.data.getClass().getName()); try { String as = element.inst.get("@as"); if (as != null) { injectProperty( parent.data, element.type, element.data, Strings.toCamelCase(as, '-', false), element.args.complete()); } else { injectProperty(parent.data, element.type, element.data, null, element.args.complete()); } } catch (Exception x) { if (!_lenient) { throw new BeanAssemblyException( "Failed to set value " + element.data + " to parent " + parent.data, x); } else { _logger.log( Level.WARNING, "Failed to set value " + element.data + " to parent " + parent.data, x); } } } _top = element.data; }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equals("patent-document") || qName.equals("fulltext-document")) { nbNPLRef = 0; nbPatentRef = 0; nbAllRef = 0; int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("lang")) { // Global_Language_Code = value.toLowerCase(); } if (name.equals("doc-number")) { PatentNumber = "EP" + value; } if (name.equals("kind")) { CodeType = value; } if (name.equals("date")) { PublicDate = value; } } } CitedPatentNumber = new ArrayList<String>(); accumulatedText = new StringBuffer(); allContent = new StringBuffer(); accumulator.setLength(0); } else if (qName.equals("description")) { accumulator.setLength(0); } else if (qName.equals("ref") || qName.equals("bibl")) { int length = atts.getLength(); nbAllRef++; // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("type") || name.equals("typ")) { if (value.equals("npl") || value.equals("book") || value.equals("journal")) { String content = getText(); // we output what has been read so far in the description // we tokenize the text // ArrayList<String> tokens = // StringTokenizer st = new StringTokenizer(content, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(content); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } // int nbTokens = st.countTokens(); int nbTokens = tokenizations.size(); int j = 0; // while (st.hasMoreTokens()) { for (String token : tokenizations) { // String token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } if ((j > (nbTokens - N) && (N != -1)) || (refFound && (j < N) && (N != -1))) { try { accumulatedText.append(token + "\t" + "<other>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } else { try { accumulatedText.append(token + "\t" + "<ignore>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } j++; } accumulator.setLength(0); npl = true; ref = true; } else if (value.equals("patent") || value.equals("pl")) { String content = getText(); // we output what has been read so far in the description // we tokenize the text // ArrayList<String> tokens = // TextUtilities.segment(content,"[("+TextUtilities.punctuations); // StringTokenizer st = new StringTokenizer(content, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(content); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } // int nbTokens = st.countTokens(); int nbTokens = tokenizations.size(); int j = 0; for (String token : tokenizations) { // while (st.hasMoreTokens()) { // String token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } if ((j > (nbTokens - N)) | (refFound & (j < N))) { try { accumulatedText.append(token + "\t" + "<other>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } else { try { accumulatedText.append(token + "\t" + "<ignore>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } j++; } accumulator.setLength(0); npl = false; ref = true; } else { System.out.println("Warning: unknown attribute value for ref or bibl: " + value); ref = false; npl = false; } } } } accumulatorRef.setLength(0); } else if (qName.equals("claim")) { accumulator.setLength(0); } else if (qName.equals("invention-title")) { accumulator.setLength(0); } else if (qName.equals("patcit")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("ucid")) { cited_number = value; // we normally need to normalize a little bit this patent nummer } } } } }
int size() { return _values.size(); }
public String getText() { String text = accumulator.toString(); if (text.trim().length() == 0) { return ""; } /*text = text.replace("\n", " "); text = text.replace(" ", " ");*/ if (counting) { /* StringTokenizer st = new StringTokenizer(text, delimiters, true); int count = 0; while(st.hasMoreTokens()) { String token = st.nextToken().trim(); if (token.length() == 0) { continue; } count++; } */ int i = currentPatentIndex; int count = text.length(); while (i < patents.size()) { PatentItem currentPatent = patents.get(i); if (currentPatent != null) { int startOffset = currentPatent.getOffsetBegin(); int endOffset = currentPatent.getOffsetEnd(); if ((startOffset >= offset) && (endOffset <= offset + count)) { String context = currentPatent.getContext(); /*System.out.println("OFFSET: " + offset); System.out.println("count: " + count); System.out.println("startOffset: " + startOffset); System.out.println("endOffset: " + endOffset); System.out.println("context: " + context); System.out.println("text: " + text);*/ String target = ""; if (context.charAt(0) == ' ') { target = " <ref type=\"patent\">" + context.substring(1, context.length()) + "</ref>"; } else { target = "<ref type=\"patent\">" + context + "</ref>"; } text = text.replace(context, target); currentPatentIndex = i; } } i++; } // i = currentArticleIndex; i = 0; while (i < articles.size()) { BibDataSet currentArticle = articles.get(i); if (currentArticle != null) { List<Integer> offsets = currentArticle.getOffsets(); int startOffset = -1; int endOffset = -1; String context = currentArticle.getRawBib().trim(); if (offsets.size() > 0) { if (offsets.get(0) != null) { startOffset = offsets.get(0).intValue(); /*StringTokenizer stt = new StringTokenizer(context, delimiters, true); int count2 = 0; while(stt.hasMoreTokens()) { String token2 = stt.nextToken().trim(); if (token2.length() == 0) { continue; } count2++; }*/ // endOffset = offsets.get(1).intValue(); endOffset = startOffset + context.length(); } } // if ( (startOffset >= offset) && (endOffset <= offset+count) ) { if ((startOffset >= offset)) { /*System.out.println("OFFSET: " + offset); System.out.println("count: " + count); System.out.println("startOffset: " + startOffset); System.out.println("endOffset: " + endOffset); System.out.println("context: " + context); System.out.println("text: " + text);*/ String target = " <ref type=\"npl\">" + context + "</ref> "; text = text.replace(context, target); currentArticleIndex = i; } } i++; } offset += count; } return text; }
/** * This reports the occurrence of an actual element. It will include the element's attributes, * with the exception of XML vocabulary specific attributes, such as <code> * xmlns:[namespace prefix]</code> and <code>xsi:schemaLocation</code>. * * @param namespaceURI <code>String</code> namespace URI this element is associated with, or an * empty <code>String</code> * @param localName <code>String</code> name of element (with no namespace prefix, if one is * present) * @param qName <code>String</code> XML 1.0 version of element name: [namespace * prefix]:[localName] * @param atts <code>Attributes</code> list for this element * @throws SAXException when things go wrong */ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (suppress) return; Element element = null; if ((namespaceURI != null) && (!namespaceURI.equals(""))) { String prefix = ""; // Determine any prefix on the Element if (!qName.equals(localName)) { int split = qName.indexOf(":"); prefix = qName.substring(0, split); } Namespace elementNamespace = Namespace.getNamespace(prefix, namespaceURI); element = factory.element(localName, elementNamespace); } else { element = factory.element(localName); } // Take leftover declared namespaces and add them to this element's // map of namespaces if (declaredNamespaces.size() > 0) { transferNamespaces(element); } // Handle attributes for (int i = 0, len = atts.getLength(); i < len; i++) { Attribute attribute = null; String attLocalName = atts.getLocalName(i); String attQName = atts.getQName(i); int attType = getAttributeType(atts.getType(i)); // Bypass any xmlns attributes which might appear, as we got // them already in startPrefixMapping(). // This is sometimes necessary when SAXHandler is used with // another source than SAXBuilder, as with JDOMResult. if (attQName.startsWith("xmlns:") || attQName.equals("xmlns")) { continue; } // First clause per http://markmail.org/message/2p245ggcjst27xe6 // patch from Mattias Jiderhamn if ("".equals(attLocalName) && attQName.indexOf(":") == -1) { attribute = factory.attribute(attQName, atts.getValue(i), attType); } else if (!attQName.equals(attLocalName)) { String attPrefix = attQName.substring(0, attQName.indexOf(":")); Namespace attNs = Namespace.getNamespace(attPrefix, atts.getURI(i)); attribute = factory.attribute(attLocalName, atts.getValue(i), attType, attNs); } else { attribute = factory.attribute(attLocalName, atts.getValue(i), attType); } factory.setAttribute(element, attribute); } flushCharacters(); if (atRoot) { document.setRootElement(element); // XXX should we use a factory call? atRoot = false; } else { factory.addContent(getCurrentElement(), element); } currentElement = element; }
TypedValueGroup complete() { _index = new int[_values.size()]; return this; }