public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws SAXException { try { if (qName.equals("p") || qName.equals("description")) { writer.write(getText()); accumulator.setLength(0); } if (qName.equals("description")) { counting = false; } if (!counting) { writer.write(getText()); accumulator.setLength(0); writer.write("</" + qName + ">\n"); } else { if (qName.equals("row")) { accumulator.append(" "); } if (qName.equals("p")) { writer.write("\n"); accumulator.append(" "); } } } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } }
public static void main(String[] args) { if (args.length <= 0) { System.out.println("Usage: java ExtractorDriver url"); return; } try { XMLReader parser = XMLReaderFactory.createXMLReader(); // Since this just writes onto the console, it's best // to use the system default encoding, which is what // we get by not specifying an explicit encoding here. Writer out = new OutputStreamWriter(System.out); ContentHandler handler = new TextExtractor(out); parser.setContentHandler(handler); // parser.parse("file:////c://myxml.xml"); parser.parse(args[0]); out.flush(); } catch (Exception e) { System.err.println(e); } }
// Wrap I/O exceptions in SAX exceptions, to // suit handler signature requirements private void emit(String s) throws SAXException { try { out.write(s); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } }
public void endDocument() throws SAXException { try { out.write(eol); out.flush(); out = null; } catch (IOException e) { throw new SAXException("I/O error", e); // NOT LOCALIZABLE } }
// Start a new line // and indent the next line appropriately private void nl() throws SAXException { String lineEnd = System.getProperty("line.separator"); try { out.write(lineEnd); for (int i = 0; i < indentLevel; i++) out.write(indentString); } catch (IOException e) { throw new SAXException("I/O error", e); } }
// helpers ... wrap I/O exceptions in SAX exceptions, to // suit handler signature requirements private void emit(String s) throws SAXException { try { if (s.equals("MULTIPLICITY")) { // NOT LOCALIZABLE int i = 0; } out.write(s); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); // NOT LOCALIZABLE } }
public void endDocument() throws SAXException { nl(); try { out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } }
// =========================================================== // SAX DocumentHandler methods // =========================================================== public void setDocumentLocator(Locator l) { // Save this to resolve relative URIs or to give diagnostics. try { out.flush(); } catch (IOException e) { // Ignore errors } }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { try { // we output the remaining text if (!counting) { writer.write(getText()); accumulator.setLength(0); } if (!counting) { writer.write("<" + qName); int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { writer.write(" " + name + "=\"" + value + "\""); } } writer.write(">"); } if (qName.equals("description")) { offset = 0; counting = true; } else if (qName.equals("patent-document")) { counting = false; } } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } }