コード例 #1
0
  public static void main(String[] args) throws Exception {
    boolean createSampleSet = Boolean.parseBoolean(args[2]);
    int sampleSetSize = 0;
    if (createSampleSet) {
      sampleSetSize = Integer.parseInt(args[3]);
    }
    boolean dryRun = Boolean.parseBoolean(args[1]);
    File basePath = new File(args[0]);
    if (basePath.exists()) {

      File artistsFile = null;
      File labelsFile = null;
      File mastersFile = null;
      File releasesFile = null;

      for (File child : basePath.listFiles()) {
        if (child.isFile()) {
          String filename = child.getName().toLowerCase();
          if (filename.endsWith("_artists.xml")) {
            artistsFile = child;
          } else if (filename.endsWith("_labels.xml")) {
            labelsFile = child;
          } else if (filename.endsWith("_masters.xml")) {
            mastersFile = child;
          } else if (filename.endsWith("_releases.xml")) {
            releasesFile = child;
          }
        }
      }

      if (artistsFile == null
          || labelsFile == null
          || mastersFile == null
          || releasesFile == null) {
        throw new IllegalStateException("Not all needed files found");
      }

      List<Artist> artists = null;
      ParserResult<Artist> artistParserResult = null;
      try (FileInputStream is = new FileInputStream(artistsFile)) {
        StaxParser<Artist> parser = new ArtistStaxParser(dryRun);
        artistParserResult = parser.parse(is);
        artists = artistParserResult.getResult();
      }

      List<Label> labels = null;
      ParserResult<Label> labelParserResult = null;
      try (FileInputStream is = new FileInputStream(labelsFile)) {
        StaxParser<Label> parser = new LabelStaxParser(dryRun);
        labelParserResult = parser.parse(is);
        labels = labelParserResult.getResult();
      }

      List<Master> masters = null;
      ParserResult<Master> masterParserResult = null;
      try (FileInputStream is = new FileInputStream(mastersFile)) {
        StaxParser<Master> parser = new MasterStaxParser(dryRun);
        masterParserResult = parser.parse(is);
        masters = masterParserResult.getResult();
      }

      List<Release> releases = null;
      ParserResult<Release> releaseParserResult = null;
      try (FileInputStream is = new FileInputStream(releasesFile)) {
        StaxParser<Release> parser = new ReleaseStaxParser(dryRun, sampleSetSize);
        releaseParserResult = parser.parse(is);
        releases = releaseParserResult.getResult();
      }

      System.out.println("Found " + artistParserResult);
      System.out.println("Found " + labelParserResult);
      System.out.println("Found " + masterParserResult);
      System.out.println("Found " + releaseParserResult);

      if (createSampleSet) {
        System.out.println("Collecting data for sample dataset...");

        Set<Artist> neededArtist = new HashSet<>(artists.size() / 1000);
        for (Release release : releases) {
          for (ArtistRef ref : release.getArtists()) {
            Artist artist = findArtist(ref.getArtistId(), artists);
            if (artist != null) {
              neededArtist.add(artist);
            }
          }
          for (ArtistRef ref : release.getExtraArtists()) {
            Artist artist = findArtist(ref.getArtistId(), artists);
            if (artist != null) {
              neededArtist.add(artist);
            }
          }
        }

        File file = new File(basePath, "dataset.sample");
        FileOutputStream fos = new FileOutputStream(file);
        SerializationServiceBuilder builder = new SerializationServiceBuilder();
        SerializationService ss =
            builder
                .setAllowUnsafe(true)
                .setClassLoader(StaxParser.class.getClassLoader())
                .setEnableCompression(true)
                .setPartitioningStrategy(new DefaultPartitioningStrategy())
                .setCheckClassDefErrors(true)
                .setByteOrder(ByteOrder.BIG_ENDIAN)
                .build();

        BufferedOutputStream bos = new BufferedOutputStream(fos);
        ObjectDataOutputStream out = new ObjectDataOutputStream(bos, ss, ByteOrder.BIG_ENDIAN);

        System.out.println("Writing " + neededArtist.size() + " artists to sample dataset...");
        out.writeInt(neededArtist.size());
        for (Artist artist : neededArtist) {
          out.writeObject(artist);
        }
        System.out.println("Writing " + labels.size() + " labels to sample dataset...");
        out.writeInt(labels.size());
        for (Label label : labels) {
          out.writeObject(label);
        }
        System.out.println("Writing " + masters.size() + " masters to sample dataset...");
        out.writeInt(masters.size());
        for (Master master : masters) {
          out.writeObject(master);
        }
        System.out.println("Writing " + releases.size() + " releases to sample dataset...");
        out.writeInt(releases.size());
        for (Release release : releases) {
          out.writeObject(release);
        }
        out.flush();
        out.close();
      }
    }
  }