Beispiel #1
0
  /**
   * Stores the specified source to the specified file.
   *
   * @param in input source
   * @param file target file
   * @throws IOException I/O exception
   */
  public static void store(final InputSource in, final IOFile file) throws IOException {
    // add directory if it does not exist anyway
    file.dir().md();

    final PrintOutput po = new PrintOutput(file.path());
    try {
      final Reader r = in.getCharacterStream();
      final InputStream is = in.getByteStream();
      final String id = in.getSystemId();
      if (r != null) {
        for (int c; (c = r.read()) != -1; ) po.utf8(c);
      } else if (is != null) {
        for (int b; (b = is.read()) != -1; ) po.write(b);
      } else if (id != null) {
        final BufferInput bi = new BufferInput(IO.get(id));
        try {
          for (int b; (b = bi.read()) != -1; ) po.write(b);
        } finally {
          bi.close();
        }
      }
    } finally {
      po.close();
    }
  }
Beispiel #2
0
  private static InputSource getConfigSource(File pAppFile) throws FileNotFoundException {
    InputSource retSource = null;
    String uri = FileUtils.createFileURL(pAppFile);

    FileInputStream fileInputStream = new FileInputStream(pAppFile);
    retSource = new InputSource(fileInputStream);
    retSource.setSystemId(uri);
    return (retSource);
  }
Beispiel #3
0
 public static void readerToSAX(
     Reader reader,
     String systemId,
     XMLReceiver xmlReceiver,
     XMLUtils.ParserConfiguration parserConfiguration,
     boolean handleLexical) {
   final InputSource inputSource = new InputSource(reader);
   inputSource.setSystemId(systemId);
   inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical);
 }
Beispiel #4
0
 private static InputSource getConfigSource(Class pAppClass) throws DataNotFoundException {
   URL resource = pAppClass.getResource(CONFIG_FILE_NAME);
   String resourceFileName = resource.toExternalForm();
   File resourceFile = new File(resourceFileName);
   InputStream configResourceStream = pAppClass.getResourceAsStream(CONFIG_FILE_NAME);
   if (null == configResourceStream) {
     throw new DataNotFoundException(
         "unable to find XML configuration file resource: "
             + CONFIG_FILE_NAME
             + " for class: "
             + pAppClass.getName());
   }
   InputSource inputSource = new InputSource(configResourceStream);
   if (!resourceFile.exists()) {
     inputSource.setSystemId(resourceFileName);
   }
   return (inputSource);
 }
Beispiel #5
0
  public static void parse(String fileNameOrURL, RDFParser parser, Model model)
      throws IOException, SAXException, MalformedURLException, ModelException {

    URL url = new URL(normalizeURI(fileNameOrURL));

    // maybe this model is loaded as schema...
    //    Model model = factory.registry().get(url.toString());
    //    if(model != null)
    //      return model;

    // Prepare input source
    model.setSourceURI(url.toString());
    InputStream in = url.openStream();
    InputSource source = new InputSource(in);
    source.setSystemId(url.toString());

    parser.parse(source, new ModelConsumer(model));
    in.close();
  }
Beispiel #6
0
 /**
  * Read a URL into SAX events.
  *
  * @param systemId system id of the document
  * @param xmlReceiver receiver to output to
  * @param parserConfiguration parser configuration
  * @param handleLexical whether the XML parser must output SAX LexicalHandler events, including
  *     comments
  */
 public static void urlToSAX(
     String systemId,
     XMLReceiver xmlReceiver,
     XMLUtils.ParserConfiguration parserConfiguration,
     boolean handleLexical) {
   try {
     final URL url = URLFactory.createURL(systemId);
     final InputStream is = url.openStream();
     final InputSource inputSource = new InputSource(is);
     inputSource.setSystemId(systemId);
     try {
       inputSourceToSAX(inputSource, xmlReceiver, parserConfiguration, handleLexical);
     } finally {
       is.close();
     }
   } catch (IOException e) {
     throw new OXFException(e);
   }
 }
Beispiel #7
0
 private Document GetXMLDocument(String url) {
   try {
     DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
     DocumentBuilder db = dbf.newDocumentBuilder();
     InputStream input = HTMLTools.inputStream_GET(url, 5000);
     InputStreamReader reader = new InputStreamReader(input, ENCODING_UTF8);
     InputSource inSrc = new InputSource(reader);
     inSrc.setEncoding(ENCODING_UTF8);
     return db.parse(inSrc);
   } catch (ParserConfigurationException pce) {
     Print.logError("Parse error: " + pce);
     return null;
   } catch (SAXException se) {
     Print.logError("Parse error: " + se);
     return null;
   } catch (IOException ioe) {
     Print.logError("IO error: " + ioe);
     return null;
   }
 }
Beispiel #8
0
    public InputSource resolveEntity(String publicId, String systemId)
        throws SAXException, IOException {
      final InputSource is = new InputSource();
      is.setSystemId(systemId);
      is.setPublicId(publicId);
      final URL url = URLFactory.createURL(systemId);

      // Would be nice to support XML Catalogs or similar here. See:
      // http://xerces.apache.org/xerces2-j/faq-xcatalogs.html
      if (url.getProtocol().equals("http")) {
        logger.warn(
            "XML entity resolver for public id: "
                + publicId
                + " is accessing external entity via HTTP: "
                + url.toExternalForm());
      }

      is.setByteStream(url.openConnection().getInputStream());
      return is;
    }
Beispiel #9
0
    /** SAX entity resolver */
    public InputSource resolveEntity(String name, String uri) throws IOException, SAXException {

      InputSource retval;
      String mappedURI = name2uri(name);
      InputStream stream = mapResource(name);

      // prefer explicit URI mappings, then bundled resources...
      if (mappedURI != null) {
        retval = new InputSource(mappedURI);
        retval.setPublicId(name);
        return retval;

      } else if (stream != null) {
        uri = "java:resource:" + (String) id2resource.get(name); // NOI18N
        retval = new InputSource(stream);
        retval.setPublicId(name);
        return retval;

      } else {
        return null;
      }
    }
  @Override
  public String parseFulltext(String html) {
    if (!judge(html)) return null;
    Parser theParser = null;

    HTMLSchema theSchema = null;
    XMLReader r;
    if (theParser == null) theParser = new Parser();
    r = theParser;
    theSchema = new HTMLSchema();
    try {
      r.setProperty(Parser.schemaProperty, theSchema);
      r.setFeature(Parser.namespacesFeature, false);
    } catch (SAXNotRecognizedException e) {
      logger.fatal(e.getMessage());
      return null;
    } catch (SAXNotSupportedException e) {
      logger.fatal(e.getMessage());
      return null;
    }

    RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler();
    r.setContentHandler(h);
    InputStream fin = new ByteArrayInputStream(html.getBytes());
    InputSource s = new InputSource(fin);
    s.setEncoding("utf8");
    try {
      r.parse(s);
      fin.close();
      return h.getContent();
    } catch (IOException e) {
      logger.fatal(e.getMessage());
    } catch (SAXException e) {
      logger.fatal(e.getMessage());
    }

    return null;
  }
Beispiel #11
0
  /**
   * Converts an HTML document to XML.
   *
   * @param io io reference
   * @param opts html options
   * @return parser
   * @throws IOException I/O exception
   */
  private static IO toXML(final IO io, final HtmlOptions opts) throws IOException {
    // reader could not be initialized; fall back to XML
    if (READER == null) return io;

    try {
      // tries to extract the encoding from the input
      final TextInput ti = new TextInput(io);
      String enc = ti.encoding();
      final byte[] content = ti.content();

      // looks for a charset definition
      final byte[] encoding = token("charset=");
      int cs = indexOf(content, encoding);
      if (cs > 0) {
        // extracts the encoding string
        cs += encoding.length;
        int ce = cs;
        final int cl = content.length;
        while (++ce < cl && content[ce] > 0x28) ;
        enc = string(substring(content, cs, ce));
      }

      // define input
      final InputSource is = new InputSource(new ArrayInput(content));
      is.setEncoding(supported(enc) ? normEncoding(enc) : UTF8);
      // define output
      final StringWriter sw = new StringWriter();
      final XMLReader reader = (XMLReader) Reflect.get(READER);
      final Object writer = Reflect.get(WRITER, sw);

      // set TagSoup options
      if (opts.get(HtmlOptions.HTML)) {
        reader.setFeature("http://xml.org/sax/features/namespaces", false);
        opt("method", "html");
        opt("omit-xml-declaration", "yes");
      }
      if (opts.get(HtmlOptions.NONS))
        reader.setFeature("http://xml.org/sax/features/namespaces", false);
      if (opts.get(HtmlOptions.OMITXML)) opt("omit-xml-declaration", "yes");
      if (opts.get(HtmlOptions.NOBOGONS)) reader.setFeature(FEATURES + "ignore-bogons", true);
      if (opts.get(HtmlOptions.NODEFAULTS))
        reader.setFeature(FEATURES + "default-attributes", false);
      if (opts.get(HtmlOptions.NOCOLONS)) reader.setFeature(FEATURES + "translate-colons", true);
      if (opts.get(HtmlOptions.NORESTART)) reader.setFeature(FEATURES + "restart-elements", false);
      if (opts.get(HtmlOptions.IGNORABLE))
        reader.setFeature(FEATURES + "ignorable-whitespace", true);
      if (opts.get(HtmlOptions.EMPTYBOGONS)) reader.setFeature(FEATURES + "bogons-empty", true);
      if (opts.get(HtmlOptions.ANY)) reader.setFeature(FEATURES + "bogons-empty", false);
      if (opts.get(HtmlOptions.NOROOTBOGONS)) reader.setFeature(FEATURES + "root-bogons", false);
      if (opts.get(HtmlOptions.NOCDATA)) reader.setFeature(FEATURES + "cdata-elements", false);
      if (opts.get(HtmlOptions.LEXICAL))
        reader.setProperty("http://xml.org/sax/properties/lexical-handler", writer);
      if (opts.contains(HtmlOptions.METHOD)) opt("method", opts.get(HtmlOptions.METHOD));
      if (opts.contains(HtmlOptions.DOCTYPESYS))
        opt("doctype-system", opts.get(HtmlOptions.DOCTYPESYS));
      if (opts.contains(HtmlOptions.DOCTYPEPUB))
        opt("doctype-public", opts.get(HtmlOptions.DOCTYPEPUB));
      if (opts.contains(HtmlOptions.ENCODING)) is.setEncoding(opts.get(HtmlOptions.ENCODING));
      // end TagSoup options

      reader.setContentHandler((ContentHandler) writer);
      reader.parse(is);
      return new IOContent(token(sw.toString()), io.name());

    } catch (final SAXException ex) {
      Util.errln(ex);
      return io;
    }
  }
Beispiel #12
0
  public static MiningResult importFile(InputStream input) throws IOException {
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      Document doc;
      // NodeList netNodes;
      dbf.setValidating(false);
      dbf.setIgnoringComments(true);
      dbf.setIgnoringElementContentWhitespace(true);
      // dbf.setExpandEntityReferences(false);
      // dbf.setNamespaceAware(false);

      DocumentBuilder db = dbf.newDocumentBuilder();

      db.setEntityResolver(
          new EntityResolver() {
            public InputSource resolveEntity(String publicId, String systemId) {
              if (systemId.indexOf("ARIS-Export") != -1) {
                return new InputSource("file:" + About.EXTLIBLOCATION() + "ARIS-Export101.dtd");
              } else {
                return null;
              }
            }
          });

      InputSource inpStream = new InputSource(input);
      inpStream.setSystemId("file:" + System.getProperty("user.dir", ""));
      doc = db.parse(inpStream);

      // check if root element is a aml tag
      Message.add("parsing done" + doc, Message.DEBUG);
      if (!(doc.getDocumentElement().getNodeName().equals("AML"))) {
        Message.add("aml tag not found", Message.ERROR);
        throw new Exception("aml tag not found");
      } else {
        Message.add("aml root element found");
      }

      EPCResult result = new EPCResult(null, (EPC) null);
      HashMap ObjDef_LinkId = new HashMap();
      HashMap modelid_net = new HashMap();
      HashMap ObjDef_Name = new HashMap();
      HashMap function_LinkId = new HashMap();
      HashMap ModelId_ModelType = new HashMap();
      traverseAMLforObjectNames(
          ObjDef_Name, doc.getDocumentElement(), ObjDef_LinkId, ModelId_ModelType);
      Iterator findLinkToEpc = ObjDef_LinkId.keySet().iterator();
      while (findLinkToEpc.hasNext()) {
        String currentObjDef = (String) findLinkToEpc.next();
        String Links = (String) ObjDef_LinkId.get(currentObjDef);
        StringTokenizer linkSet = new StringTokenizer(Links);
        String realEpcLink = "";
        while (linkSet.hasMoreTokens()) {
          String currentLink = linkSet.nextToken();
          if (ModelId_ModelType.get(currentLink).equals("MT_EEPC")) {
            realEpcLink = currentLink;
            break;
          }
        }
        if (realEpcLink.equals(" ")) {
          ObjDef_LinkId.remove(currentObjDef);
        } else {
          ObjDef_LinkId.put(currentObjDef, realEpcLink);
        }
      }
      result =
          traverseAML(
              result,
              doc.getDocumentElement(),
              null,
              ObjDef_Name,
              ObjDef_LinkId,
              modelid_net,
              function_LinkId);
      Iterator hierarchicalFunctions = function_LinkId.keySet().iterator();
      while (hierarchicalFunctions.hasNext()) {
        EPCSubstFunction f = (EPCSubstFunction) hierarchicalFunctions.next();
        f.setSubstitutedEPC((EPC) modelid_net.get(function_LinkId.get(f)));
        // Message.add(f.getSubstitutedEPC().getName());
      }

      return result;

    } catch (Throwable x) {
      Message.add(x.toString());
      throw new IOException(x.getMessage());
    }
  }