public static String saxElementToDebugString(String uri, String qName, Attributes attributes) { // Open start tag final StringBuilder sb = new StringBuilder("<"); sb.append(qName); final Set<String> declaredPrefixes = new HashSet<String>(); mapPrefixIfNeeded(declaredPrefixes, uri, qName, sb); // Attributes if any for (int i = 0; i < attributes.getLength(); i++) { mapPrefixIfNeeded(declaredPrefixes, attributes.getURI(i), attributes.getQName(i), sb); sb.append(' '); sb.append(attributes.getQName(i)); sb.append("=\""); sb.append(attributes.getValue(i)); sb.append('\"'); } // Close start tag sb.append('>'); // Content sb.append("[...]"); // Close element with end tag sb.append("</"); sb.append(qName); sb.append('>'); return sb.toString(); }
/** * @param attributes source attributes * @return new AttributesImpl containing all attributes that were in src attributes and that were * in the default name space. */ public static AttributesImpl getAttributesFromDefaultNamespace(final Attributes attributes) { final AttributesImpl ret = new AttributesImpl(); final int size = attributes.getLength(); for (int i = 0; i < size; i++) { final String ns = attributes.getURI(i); if (!"".equals(ns)) continue; final String lnam = attributes.getLocalName(i); final String qnam = attributes.getQName(i); final String typ = attributes.getType(i); final String val = attributes.getValue(i); ret.addAttribute(ns, lnam, qnam, typ, val); } return ret; }
protected void collectExtensionAttributes(Attributes attributes) { for (int i = 0; i < attributes.getLength(); i++) { String key = attributes.getURI(i); if (key.length() == 0 || key.startsWith("http://www.osgi.org/xmlns/metatype/v")) // $NON-NLS-1$ continue; Map<String, String> value = extensionAttributes.get(key); if (value == null) { value = new HashMap<String, String>(); extensionAttributes.put(key, value); } value.put( getName(attributes.getLocalName(i), attributes.getQName(i)), attributes.getValue(i)); } }
private Map attributeMap(String tagName, Attributes atts) throws ParserException { if (null == tagName || null == atts) return null; Map mapping = null; try { mapping = (Map) attributeMaps.get(tagName); } catch (Exception e) { throw new ParserException( "Typecast error, unknown element found in attribute list mappings! " + e.getMessage()); } if (null == mapping) return null; Map resultMapping = new HashMap(); for (int i = 0; i < atts.getLength(); i++) { String xmlName = atts.getQName(i); String value = atts.getValue(i); TagMap.AttributeMapping aMap = null; try { aMap = (TagMap.AttributeMapping) mapping.get(xmlName); } catch (Exception e) { throw new ParserException( "Typecast error, unknown element found in property mapping! " + e.getMessage()); } if (null == aMap) throw new ParserException( "No attribute mapping specified for attribute: " + xmlName + " in tag: " + tagName); String propertyName = aMap.getPropertyName(); try { resultMapping.put(propertyName, aMap.convertValue(value)); } catch (Exception e) { throw new ParserException( "Can not convert given value: \"" + value + "\" to specified type: " + aMap.getType() + " for attribute: " + xmlName + " in tag: " + tagName + "! " + e.getMessage()); } } checkForRequiredAttributes(tagName, resultMapping, mapping); addDefaultValues(resultMapping, mapping); return resultMapping; }
public static AttributesImpl removeAttribute( Attributes attributes, String uri, String localname) { final AttributesImpl newAttributes = new AttributesImpl(); for (int i = 0; i < attributes.getLength(); i++) { final String attributeURI = attributes.getURI(i); final String attributeValue = attributes.getValue(i); final String attributeType = attributes.getType(i); final String attributeQName = attributes.getQName(i); final String attributeLocalname = attributes.getLocalName(i); if (!uri.equals(attributeURI) || !localname.equals(attributeLocalname)) { // Not a matched attribute newAttributes.addAttribute( attributeURI, attributeLocalname, attributeQName, attributeType, attributeValue); } } return newAttributes; }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { try { // we output the remaining text if (!counting) { writer.write(getText()); accumulator.setLength(0); } if (!counting) { writer.write("<" + qName); int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { writer.write(" " + name + "=\"" + value + "\""); } } writer.write(">"); } if (qName.equals("description")) { offset = 0; counting = true; } else if (qName.equals("patent-document")) { counting = false; } } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } }
public static AttributesImpl addOrReplaceAttribute( Attributes attributes, String uri, String prefix, String localname, String value) { final AttributesImpl newAttributes = new AttributesImpl(); boolean replaced = false; for (int i = 0; i < attributes.getLength(); i++) { final String attributeURI = attributes.getURI(i); final String attributeValue = attributes.getValue(i); final String attributeType = attributes.getType(i); final String attributeQName = attributes.getQName(i); final String attributeLocalname = attributes.getLocalName(i); if (uri.equals(attributeURI) && localname.equals(attributeLocalname)) { // Found existing attribute replaced = true; newAttributes.addAttribute( uri, localname, XMLUtils.buildQName(prefix, localname), ContentHandlerHelper.CDATA, value); } else { // Not a matched attribute newAttributes.addAttribute( attributeURI, attributeLocalname, attributeQName, attributeType, attributeValue); } } if (!replaced) { // Attribute did not exist already so add it newAttributes.addAttribute( uri, localname, XMLUtils.buildQName(prefix, localname), ContentHandlerHelper.CDATA, value); } return newAttributes; }
/** * This reports the occurrence of an actual element. It will include the element's attributes, * with the exception of XML vocabulary specific attributes, such as <code> * xmlns:[namespace prefix]</code> and <code>xsi:schemaLocation</code>. * * @param namespaceURI <code>String</code> namespace URI this element is associated with, or an * empty <code>String</code> * @param localName <code>String</code> name of element (with no namespace prefix, if one is * present) * @param qName <code>String</code> XML 1.0 version of element name: [namespace * prefix]:[localName] * @param atts <code>Attributes</code> list for this element * @throws SAXException when things go wrong */ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (suppress) return; Element element = null; if ((namespaceURI != null) && (!namespaceURI.equals(""))) { String prefix = ""; // Determine any prefix on the Element if (!qName.equals(localName)) { int split = qName.indexOf(":"); prefix = qName.substring(0, split); } Namespace elementNamespace = Namespace.getNamespace(prefix, namespaceURI); element = factory.element(localName, elementNamespace); } else { element = factory.element(localName); } // Take leftover declared namespaces and add them to this element's // map of namespaces if (declaredNamespaces.size() > 0) { transferNamespaces(element); } // Handle attributes for (int i = 0, len = atts.getLength(); i < len; i++) { Attribute attribute = null; String attLocalName = atts.getLocalName(i); String attQName = atts.getQName(i); int attType = getAttributeType(atts.getType(i)); // Bypass any xmlns attributes which might appear, as we got // them already in startPrefixMapping(). // This is sometimes necessary when SAXHandler is used with // another source than SAXBuilder, as with JDOMResult. if (attQName.startsWith("xmlns:") || attQName.equals("xmlns")) { continue; } // First clause per http://markmail.org/message/2p245ggcjst27xe6 // patch from Mattias Jiderhamn if ("".equals(attLocalName) && attQName.indexOf(":") == -1) { attribute = factory.attribute(attQName, atts.getValue(i), attType); } else if (!attQName.equals(attLocalName)) { String attPrefix = attQName.substring(0, attQName.indexOf(":")); Namespace attNs = Namespace.getNamespace(attPrefix, atts.getURI(i)); attribute = factory.attribute(attLocalName, atts.getValue(i), attType, attNs); } else { attribute = factory.attribute(attLocalName, atts.getValue(i), attType); } factory.setAttribute(element, attribute); } flushCharacters(); if (atRoot) { document.setRootElement(element); // XXX should we use a factory call? atRoot = false; } else { factory.addContent(getCurrentElement(), element); } currentElement = element; }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equals("patent-document") || qName.equals("fulltext-document")) { nbNPLRef = 0; nbPatentRef = 0; nbAllRef = 0; int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("lang")) { // Global_Language_Code = value.toLowerCase(); } if (name.equals("doc-number")) { PatentNumber = "EP" + value; } if (name.equals("kind")) { CodeType = value; } if (name.equals("date")) { PublicDate = value; } } } CitedPatentNumber = new ArrayList<String>(); accumulatedText = new StringBuffer(); allContent = new StringBuffer(); accumulator.setLength(0); } else if (qName.equals("description")) { accumulator.setLength(0); } else if (qName.equals("ref") || qName.equals("bibl")) { int length = atts.getLength(); nbAllRef++; // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("type") || name.equals("typ")) { if (value.equals("npl") || value.equals("book") || value.equals("journal")) { String content = getText(); // we output what has been read so far in the description // we tokenize the text // ArrayList<String> tokens = // StringTokenizer st = new StringTokenizer(content, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(content); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } // int nbTokens = st.countTokens(); int nbTokens = tokenizations.size(); int j = 0; // while (st.hasMoreTokens()) { for (String token : tokenizations) { // String token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } if ((j > (nbTokens - N) && (N != -1)) || (refFound && (j < N) && (N != -1))) { try { accumulatedText.append(token + "\t" + "<other>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } else { try { accumulatedText.append(token + "\t" + "<ignore>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } j++; } accumulator.setLength(0); npl = true; ref = true; } else if (value.equals("patent") || value.equals("pl")) { String content = getText(); // we output what has been read so far in the description // we tokenize the text // ArrayList<String> tokens = // TextUtilities.segment(content,"[("+TextUtilities.punctuations); // StringTokenizer st = new StringTokenizer(content, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(content); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } // int nbTokens = st.countTokens(); int nbTokens = tokenizations.size(); int j = 0; for (String token : tokenizations) { // while (st.hasMoreTokens()) { // String token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } if ((j > (nbTokens - N)) | (refFound & (j < N))) { try { accumulatedText.append(token + "\t" + "<other>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } else { try { accumulatedText.append(token + "\t" + "<ignore>\n"); allContent.append(token + " "); } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } } j++; } accumulator.setLength(0); npl = false; ref = true; } else { System.out.println("Warning: unknown attribute value for ref or bibl: " + value); ref = false; npl = false; } } } } accumulatorRef.setLength(0); } else if (qName.equals("claim")) { accumulator.setLength(0); } else if (qName.equals("invention-title")) { accumulator.setLength(0); } else if (qName.equals("patcit")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if (name != null) { if (name.equals("ucid")) { cited_number = value; // we normally need to normalize a little bit this patent nummer } } } } }
/** Receive notification of the start of an element. */ @Override public void startElement(String uri, String l, String q, Attributes a) { /* * 1. Load a class that matches the element name. * 2. If no class found, assume the element maps to a String. * 3. Otherwise, construct a new object of the class with element attributes. */ _logger.fine( S.fine(_logger) ? "Consider element " + l + "\n uri " + uri + "\n q " + q : null); ElementInfo info = new ElementInfo(); // Record java packages defined on this element as xmlns for (int i = 0; i < a.getLength(); ++i) { _logger.fine( S.fine(_logger) ? " attr " + a.getQName(i) + "=" + a.getValue(i) + "\n " + a.getQName(i) + ":" + a.getURI(i) : null); if (a.getQName(i).startsWith("xmlns:") && a.getValue(i).startsWith("java://")) { info.pkgs.put(a.getQName(i).substring(6), a.getValue(i).substring(7)); } } // Resolve the package name of this element, which could be empty (default package) int colon = q.indexOf(':'); if (colon > 0) { String xmlns = q.substring(0, colon); // is it defined right here? info.jpkg = info.pkgs.get(xmlns); // find a matching namespace from ancesters if (info.jpkg == null && !_stack.isEmpty()) { for (int i = _stack.size() - 1; i >= 0; --i) { info.jpkg = _stack.get(i).pkgs.get(xmlns); if (info.jpkg != null) { break; } } } } else if (isPrimitiveType(q)) { info.jpkg = "java.lang"; } else if (!_stack.isEmpty()) { info.jpkg = _stack.get(_stack.size() - 1).jpkg; } else { info.jpkg = _jpkg; } _logger.fine("to create element with package = " + info.jpkg); try { info.name = (info.jpkg != null) ? info.jpkg + '.' + Strings.toCamelCase(l) : Strings.toCamelCase(l); try { if (info.name.endsWith("...")) { // Array construction info.type = Class.forName(info.name.substring(0, info.name.length() - 3)); info.data = new ArrayList<Object>(); } else { // Non-array construction int size = a.getLength(); TypedValueGroup arguments = new TypedValueGroup(); for (int i = 0; i < size; ++i) { if (!a.getQName(i).startsWith("xmlns:") && !a.getQName(i).equals("xmlns")) { arguments.add(guessUntypedValue(a.getQName(i), a.getValue(i))); } } arguments.complete(); _logger.fine(S.fine(_logger) ? "arguments=" + arguments : null); if (arguments.size() > 0) { if (arguments.size() == 1 && "java.lang".equals(info.jpkg)) { info.inst.put( "@as", Strings.toCamelCase( arguments.get(0).name, '-', false)); // respect original spelling info.data = arguments.get(0).get(0).data; info.type = arguments.get(0).get(0).type; } else { Exception last = null; Object[] args = new Object[arguments.size()]; while (arguments.load(args, 0)) { try { _logger.fine( S.fine(_logger) ? "to create " + info.name + " with args: " + args.length + args(args) : null); info.data = _factory.create(info.name, args); info.type = info.data.getClass(); break; } catch (InvocationTargetException x) { throw x; } catch (Exception x) { last = x; _logger.fine( "failure in creating " + info.name + ": probing for other constructors"); } } if (info.data == null) { throw last; } } } else { _logger.fine("Create " + info.name + " with the default constructor"); info.data = _factory.create(info.name); info.type = info.data.getClass(); } } } catch (ClassNotFoundException x) { // no class by the element name is found, assumed String if (!_lenient) { throw new BeanAssemblyException("No class associated with element " + q); } else { _logger.log(Level.WARNING, "can't find class " + info.name, x); } } _stack.add(info); // _logger.fine(">>ElementInfo: " + info.type.getName() + " in " + info); // all other exceptions indicate mismatches between the beans and the XML schema } catch (Exception x) { if (!_lenient) { throw new BeanAssemblyException("Failed to assemble bean from element " + q, x); } else { _logger.log(Level.SEVERE, "can't create object for this element", x); } } }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equals("PAGE")) { int length = atts.getLength(); currentPage++; // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) {; } else if (name.equals("number")) { } else if (name.equals("width")) { } else if (name.equals("height")) { } } } /* * if (block != null) { blabla.append("\n"); * tokenizations.add("\n"); block.setText(blabla.toString()); * block.setNbTokens(nbTokens); doc.addBlock(block); } Block block0 * = new Block(); block0.setText("@PAGE\n"); block0.setNbTokens(0); * doc.addBlock(block0); */ /* * block = new Block(); blabla = new StringBuffer(); nbTokens = 0; * //blabla.append("\n@block\n"); tokenizations.add("\n"); */ } else if (qName.equals("BLOCK")) { block = new Block(); blabla = new StringBuffer(); nbTokens = 0; block.setPage(currentPage); // blabla.append("\n@block\n"); } else if (qName.equals("IMAGE")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("href")) { // if (images == null) // images = new ArrayList<String>(); images.add(value); } else if (name.equals("x")) { double x = Double.parseDouble(value); if (x != currentX) { currentX = x; } } else if (name.equals("y")) { double y = Double.parseDouble(value); if (y != currentY) { currentY = y; } } else if (name.equals("width")) { double width = Double.parseDouble(value); if (width != currentWidth) { currentWidth = width; } } else if (name.equals("height")) { double height = Double.parseDouble(value); if (height != currentHeight) { currentHeight = height; } } } } } else if (qName.equals("TEXT")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) { } else if (name.equals("x")) { } else if (name.equals("y")) { } else if (name.equals("width")) { } else if (name.equals("height")) { } } } } else if (qName.equals("TOKEN")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) {; } else if (name.equals("font-name")) { if (!value.equals(currentFont)) { currentFont = value; blabla.append(" "); } } else if (name.equals("font-size")) { double fontSize = Double.parseDouble(value); if (fontSize != currentFontSize) { currentFontSize = fontSize; blabla.append(" "); } } else if (name.equals("bold")) { if (value.equals("yes")) { currentBold = true; } else { currentBold = false; } } else if (name.equals("italic")) { if (value.equals("yes")) { currentItalic = true; } else { currentItalic = false; } } else if (name.equals("font-color")) { if (!value.equals(colorFont)) { colorFont = value; } } else if (name.equals("rotation")) { if (value.equals("0")) currentRotation = false; else currentRotation = true; } else if (name.equals("x")) { double x = Double.parseDouble(value); if (x != currentX) { currentX = x; } } else if (name.equals("y")) { double y = Double.parseDouble(value); if (y != currentY) { currentY = y; } } else if (name.equals("base")) { double base = Double.parseDouble(value); } else if (name.equals("width")) { double width = Double.parseDouble(value); if (width != currentWidth) { currentWidth = width; } } else if (name.equals("height")) { double height = Double.parseDouble(value); if (height != currentHeight) { currentHeight = height; } } } } } else if (qName.equals("xi:include")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("href")) { // if (images == null) // images = new ArrayList<String>(); images.add(value); } } } } // accumulator.setLength(0); }