public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName)
      throws SAXException {

    try {
      if (qName.equals("p") || qName.equals("description")) {
        writer.write(getText());
        accumulator.setLength(0);
      }

      if (qName.equals("description")) {
        counting = false;
      }

      if (!counting) {
        writer.write(getText());
        accumulator.setLength(0);
        writer.write("</" + qName + ">\n");
      } else {
        if (qName.equals("row")) {
          accumulator.append(" ");
        }
        if (qName.equals("p")) {
          writer.write("\n");
          accumulator.append(" ");
        }
      }
    } catch (Exception e) {
      //		    e.printStackTrace();
      throw new GrobidException("An exception occured while running Grobid.", e);
    }
  }
예제 #2
0
  public static void main(String[] args) {

    if (args.length <= 0) {
      System.out.println("Usage: java ExtractorDriver url");
      return;
    }

    try {
      XMLReader parser = XMLReaderFactory.createXMLReader();

      // Since this just writes onto the console, it's best
      // to use the system default encoding, which is what
      // we get by not specifying an explicit encoding here.
      Writer out = new OutputStreamWriter(System.out);
      ContentHandler handler = new TextExtractor(out);
      parser.setContentHandler(handler);

      //      parser.parse("file:////c://myxml.xml");
      parser.parse(args[0]);

      out.flush();
    } catch (Exception e) {
      System.err.println(e);
    }
  }
예제 #3
0
 // Wrap I/O exceptions in SAX exceptions, to
 // suit handler signature requirements
 private void emit(String s) throws SAXException {
   try {
     out.write(s);
     out.flush();
   } catch (IOException e) {
     throw new SAXException("I/O error", e);
   }
 }
예제 #4
0
 public void endDocument() throws SAXException {
   try {
     out.write(eol);
     out.flush();
     out = null;
   } catch (IOException e) {
     throw new SAXException("I/O error", e); // NOT LOCALIZABLE
   }
 }
예제 #5
0
  // Start a new line
  // and indent the next line appropriately
  private void nl() throws SAXException {
    String lineEnd = System.getProperty("line.separator");

    try {
      out.write(lineEnd);

      for (int i = 0; i < indentLevel; i++) out.write(indentString);
    } catch (IOException e) {
      throw new SAXException("I/O error", e);
    }
  }
예제 #6
0
  // helpers ... wrap I/O exceptions in SAX exceptions, to
  // suit handler signature requirements
  private void emit(String s) throws SAXException {
    try {
      if (s.equals("MULTIPLICITY")) { // NOT LOCALIZABLE
        int i = 0;
      }

      out.write(s);
      out.flush();
    } catch (IOException e) {
      throw new SAXException("I/O error", e); // NOT LOCALIZABLE
    }
  }
예제 #7
0
 public void endDocument() throws SAXException {
   nl();
   try {
     out.flush();
   } catch (IOException e) {
     throw new SAXException("I/O error", e);
   }
 }
예제 #8
0
 // ===========================================================
 // SAX DocumentHandler methods
 // ===========================================================
 public void setDocumentLocator(Locator l) {
   // Save this to resolve relative URIs or to give diagnostics.
   try {
     out.flush();
   } catch (IOException e) {
     // Ignore errors
   }
 }
  public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
      throws SAXException {
    try {
      // we output the remaining text
      if (!counting) {
        writer.write(getText());
        accumulator.setLength(0);
      }
      if (!counting) {
        writer.write("<" + qName);

        int length = atts.getLength();

        // Process each attribute
        for (int i = 0; i < length; i++) {
          // Get names and values for each attribute
          String name = atts.getQName(i);
          String value = atts.getValue(i);

          if ((name != null) && (value != null)) {
            writer.write(" " + name + "=\"" + value + "\"");
          }
        }

        writer.write(">");
      }

      if (qName.equals("description")) {
        offset = 0;
        counting = true;
      } else if (qName.equals("patent-document")) {
        counting = false;
      }
    } catch (Exception e) {
      //		    e.printStackTrace();
      throw new GrobidException("An exception occured while running Grobid.", e);
    }
  }