public AllrisScraper(
      String baseUrl,
      String allrisUrl,
      boolean scrape,
      String scrapeStartDate,
      String cacheFolder,
      int timeout,
      String outputFormat,
      String outputFolder,
      String outputUrlBase) {
    this.baseUrl = formatUrl(baseUrl);
    this.allrisUrl = formatUrl(allrisUrl);
    this.scrape = scrape;
    this.scrapeStartDate = scrapeStartDate;
    this.cacheFolder = cacheFolder;
    this.timeout = timeout;
    this.outputFormat = outputFormat;
    this.outputFolder = outputFolder;
    this.outputUrlBase = formatUrl(outputUrlBase);

    List<Drucksache> drucksachen = new ArrayList<Drucksache>();
    System.out.println("Hole Drucksachen.");
    DrucksachenScraper druckScraper = new DrucksachenScraper(this.baseUrl, this.cacheFolder);
    if (scrape) {
      druckScraper.init(
          this.baseUrl + "/" + this.allrisUrl + "/vo040.asp?selfaction=ws&template=xyz");
      List<String> failedUris = druckScraper.scrapeDrucksachen(true);
      if (failedUris.size() > 0) {
        System.out.println("Failed to parse " + failedUris.size() + " URLS:");
        for (String failed : failedUris) {
          System.out.println(failed);
        }
      }
      drucksachen = druckScraper.getDrucksachen();
    } else {
      System.out.println("Lese Drucksachen aus " + this.cacheFolder);
      drucksachen = druckScraper.readDrucksachenFromFolder();
    }

    List<Fraktion> fraktionen = new ArrayList<Fraktion>();
    System.out.println("Scrape Fraktionen.");
    FraktionScraper fruckScraper = new FraktionScraper(this.baseUrl);
    fraktionen =
        fruckScraper.parseFraktionen(
            this.baseUrl + "/" + this.allrisUrl + "/fr010.asp", this.timeout);
    System.out.println("Writer erstellt Datenmodell.");
    RDFWriter writer = new RDFWriter(drucksachen, fraktionen, outputUrlBase);
    Model rdfModel = writer.getRdfModel();
    System.out.println("Schreibe Datenmodell in " + this.outputFolder);
    writer.print(this.outputFormat, this.outputFolder, rdfModel);
  }
 public static void main(String[] args) throws IOException {
   String resultFile = "owl-results.rdf";
   if (args.length >= 1) {
     resultFile = args[0];
   }
   WebOntTestHarness harness = new WebOntTestHarness();
   harness.runTests();
   //        harness.runTest("http://www.w3.org/2002/03owlt/AnnotationProperty/Manifest004#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/AnnotationProperty/Manifest003#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/Thing/Manifest001#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/Thing/Manifest002#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/Thing/Manifest003#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/Thing/Manifest004#test");
   //        harness.runTest("http://www.w3.org/2002/03owlt/Thing/Manifest005#test");
   RDFWriter writer = harness.testResults.getWriter("RDF/XML-ABBREV");
   OutputStream stream = new FileOutputStream(resultFile);
   writer.setProperty("showXmlDeclaration", "true");
   harness.testResults.setNsPrefix("", "http://www.w3.org/1999/xhtml");
   writer.write(harness.testResults, stream, BASE_RESULTS_URI);
 }
Exemple #3
0
  /**
   * LDP-Style to serialize a resource.
   *
   * @param writer the writer to serialize to
   * @param subject the resource to serialize
   * @param iteration the Iteration containing the data
   * @throws RDFHandlerException
   * @throws RepositoryException
   */
  public static void exportIteration(
      RDFWriter writer, URI subject, CloseableIteration<Statement, RepositoryException> iteration)
      throws RDFHandlerException, RepositoryException {
    writer.startRDF();

    writer.handleNamespace(LDP.PREFIX, LDP.NAMESPACE);
    writer.handleNamespace(RDF.PREFIX, RDF.NAMESPACE);
    writer.handleNamespace(XSD.PREFIX, XSD.NAMESPACE);
    writer.handleNamespace(DCTERMS.PREFIX, DCTERMS.NAMESPACE);

    writer.handleNamespace("parent", subject.getNamespace());
    writer.handleNamespace("child", subject.stringValue().replaceFirst("/*$", "/"));
    writer.handleNamespace("this", subject.stringValue().replaceFirst("/*$", "#"));

    while (iteration.hasNext()) {
      writer.handleStatement(iteration.next());
    }

    writer.endRDF();
  }