Beispiel #1
0
  private void import_biographies(File directory) throws IOException, SQLException {
    reader = new ListFileReader(new File(directory, "biographies.list"));
    reader.skipUntil("^---*$");

    Batch batch =
        schema.createBatch(
            "UPDATE People SET real_name = ?, nick_name = ?, "
                + "birth_date = ?, death_date = ? WHERE imdb_name = ?");

    while (true) {
      List<String> lines = reader.readUntil("^---*$", false, false);
      if (lines == null) {
        break;
      }
      String imdb_name = null;
      String nick_name = null;
      String real_name = null;
      Date bdate = null;
      Date ddate = null;

      for (String ln : lines) {
        ln = ln.trim();
        if (ln.isEmpty()) {
          continue;
        }
        if (ln.charAt(2) != ':') {
          continue;
        }
        String key = ln.substring(0, 2);
        String value = ln.substring(3).trim();

        if (key.equals("NM")) {
          imdb_name = value;
        } else if (key.equals("RN")) {
          if (value.length() > 90) {
            real_name = value.substring(0, 90);
          } else {
            real_name = value;
          }
        } else if (key.equals("NK")) {
          if (value.length() > 90) {
            nick_name = value.substring(0, 90);
          } else {
            nick_name = value;
          }
        } else if (key.equals("DB")) {
          bdate = strToDate(value);
        } else if (key.equals("DD")) {
          ddate = strToDate(value);
        }
      }
      if (imdb_name != null) {
        batch.add(real_name, nick_name, bdate, ddate, imdb_name);
      }
    }

    batch.close();
    reader.close();
  }
Beispiel #2
0
  private void import_movies(File directory) throws IOException, SQLException {
    reader = new ListFileReader(new File(directory, "movies.list"));
    reader.skipUntil("^MOVIES\\s+LIST\\s*$");
    reader.skipUntil("^=+\\s*$");

    final Pattern linePattern = Pattern.compile("^(.+?)\t\\s*(.+?)$");
    final Pattern tvshowPattern = Pattern.compile("\"(.+?)\"\\s.*?\\{(.+?)\\}");
    final Pattern filmPattern = Pattern.compile("(.+?)\\s\\(\\d+\\)");

    Batch batch =
        schema.createBatch(
            "INSERT IGNORE INTO movies (imdb_name, is_film, name, episode, year) "
                + "VALUES (?, ?, ?, ?, ?)");

    while (true) {
      String line = reader.readLine();
      if (line == null) {
        break;
      }
      line = line.trim();
      if (line.isEmpty()) {
        continue;
      }
      Matcher m = linePattern.matcher(line);
      if (!m.matches()) {
        continue;
      }
      String imdb_name = m.group(1);
      String name = null;
      String episode = null;
      boolean tvshow = false;
      int year;
      try {
        year = Integer.parseInt(m.group(2));
      } catch (NumberFormatException ex) {
        year = -1;
      }
      if (imdb_name.charAt(0) == '"') {
        tvshow = true;
        m = tvshowPattern.matcher(imdb_name);
        if (!m.matches()) {
          continue;
        }
        name = m.group(1);
        episode = m.group(2);
      } else {
        m = filmPattern.matcher(imdb_name);
        if (!m.matches()) {
          continue;
        }
        name = m.group(1);
      }
      if (isEnglish(name)) {
        batch.add(imdb_name, tvshow ? 0 : 1, name, episode, (year > 1900) ? year : null);
      }
    }
    batch.close();
    reader.close();
  }
Beispiel #3
0
  private void import_ratings(File directory) throws IOException, SQLException {
    reader = new ListFileReader(new File(directory, "ratings.list"));
    reader.skipUntil("New\\s+Distribution\\s+Votes\\s+Rank\\s+Title");

    final Pattern linePattern = Pattern.compile("^.+?\\s+(\\d+)\\s+(\\d\\.\\d)\\s+(.+)$");

    Batch batch =
        schema.createBatch("UPDATE movies SET rating = ?, votes = ? " + "WHERE imdb_name = ?");

    while (true) {
      String line = reader.readLine();
      if (line == null) {
        break;
      }
      line = line.trim();
      if (line.isEmpty()) {
        continue;
      }
      Matcher m = linePattern.matcher(line);
      if (!m.matches()) {
        continue;
      }
      int votes = -1;
      double rank = -1;
      String imdb_name = m.group(3);
      try {
        votes = Integer.parseInt(m.group(1));
      } catch (NumberFormatException ex) {
      }
      try {
        rank = Double.parseDouble(m.group(2));
      } catch (NumberFormatException ex) {
      }
      batch.add(rank, votes, imdb_name);
    }
    batch.close();
    reader.close();
  }
Beispiel #4
0
 public void close() throws SQLException {
   if (batch != null) {
     batch.close();
   }
   people.close();
 }
Beispiel #5
0
  private void import_genres(File directory) throws IOException, SQLException {
    reader = new ListFileReader(new File(directory, "genres.list"));
    reader.skipUntil("^8: THE GENRES LIST\\s*$");

    final Pattern linePattern = Pattern.compile("^(.+?)\\t\\s*(.+)$");
    HashMap<String, Integer> genresMap = new HashMap<String, Integer>();

    Batch batch =
        schema.createBatch(
            "INSERT IGNORE MovieGenres (movie, genre) VALUES "
                + "((SELECT movie_id FROM Movies WHERE imdb_name = ? LIMIT 1), ?)");

    SimpleInsert insertGenre = schema.createInsert("Genres", true, "name");
    SimpleQuery findGenre = schema.createQuery("genre_id", "Genres", "name = ?");

    // speedup: drop the FKs
    Statement stmt = schema.createStatement();
    try {
      stmt.executeUpdate("ALTER TABLE MovieGenres DROP FOREIGN KEY `mg_genre`");
    } catch (SQLException ex) {
    }
    try {
      stmt.executeUpdate("ALTER TABLE MovieGenres DROP FOREIGN KEY `mg_movie`");
    } catch (SQLException ex) {
    }
    stmt.close();

    while (true) {
      String line = reader.readLine();
      if (line == null) {
        break;
      }
      line = line.trim();
      if (line.isEmpty()) {
        continue;
      }
      Matcher m = linePattern.matcher(line);
      if (!m.matches()) {
        continue;
      }
      String imdb_name = m.group(1);
      String genre = m.group(2);
      int genre_id;
      genre = genre.toLowerCase();
      if (genresMap.containsKey(genre)) {
        genre_id = genresMap.get(genre);
      } else {
        genre_id = insertGenre.insert(genre);
        if (genre_id < 0) {
          genre_id = findGenre.queryGetKey(genre);
        }
        genresMap.put(genre, genre_id);
      }
      batch.add(imdb_name, genre_id);
    }
    insertGenre.close();
    findGenre.close();
    batch.close();

    // remove anomalies and re-add the FKs
    debug("re-normalizing...");
    stmt = schema.createStatement();
    stmt.executeUpdate("DELETE FROM MovieGenres WHERE movie = 0 OR genre = 0");
    stmt.executeUpdate(
        "ALTER TABLE MovieGenres ADD CONSTRAINT `mg_movie` "
            + "FOREIGN KEY (`movie`) REFERENCES `Movies` (`movie_id`) "
            + "ON DELETE CASCADE ON UPDATE NO ACTION, "
            + "ADD CONSTRAINT `mg_genre` "
            + "FOREIGN KEY (`genre`) REFERENCES `Genres` (`genre_id`) "
            + "ON DELETE CASCADE ON UPDATE NO ACTION");
    stmt.close();

    reader.close();
  }