Example #1
0
  public int run(String[] args) throws Exception {
    if (args.length < 3) {
      System.err.println(
          "Usage: SolrIndexer <solr url> <crawldb> [-linkdb <linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] [-filter] [-normalize]");
      return -1;
    }

    final Path crawlDb = new Path(args[1]);
    Path linkDb = null;

    final List<Path> segments = new ArrayList<Path>();
    String params = null;

    boolean noCommit = false;
    boolean deleteGone = false;
    boolean filter = false;
    boolean normalize = false;

    for (int i = 2; i < args.length; i++) {
      if (args[i].equals("-linkdb")) {
        linkDb = new Path(args[++i]);
      } else if (args[i].equals("-dir")) {
        Path dir = new Path(args[++i]);
        FileSystem fs = dir.getFileSystem(getConf());
        FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
        Path[] files = HadoopFSUtil.getPaths(fstats);
        for (Path p : files) {
          segments.add(p);
        }
      } else if (args[i].equals("-noCommit")) {
        noCommit = true;
      } else if (args[i].equals("-deleteGone")) {
        deleteGone = true;
      } else if (args[i].equals("-filter")) {
        filter = true;
      } else if (args[i].equals("-normalize")) {
        normalize = true;
      } else if (args[i].equals("-params")) {
        params = args[++i];
      } else {
        segments.add(new Path(args[i]));
      }
    }

    try {
      indexSolr(
          args[0], crawlDb, linkDb, segments, noCommit, deleteGone, params, filter, normalize);
      return 0;
    } catch (final Exception e) {
      LOG.error("SolrIndexer: " + StringUtils.stringifyException(e));
      return -1;
    }
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      usage();
      return;
    }
    int mode = -1;
    if (args[0].equals("-dump")) mode = MODE_DUMP;
    else if (args[0].equals("-list")) mode = MODE_LIST;
    else if (args[0].equals("-get")) mode = MODE_GET;

    boolean co = true;
    boolean fe = true;
    boolean ge = true;
    boolean pa = true;
    boolean pd = true;
    boolean pt = true;
    // collect general options
    for (int i = 1; i < args.length; i++) {
      if (args[i].equals("-nocontent")) {
        co = false;
        args[i] = null;
      } else if (args[i].equals("-nofetch")) {
        fe = false;
        args[i] = null;
      } else if (args[i].equals("-nogenerate")) {
        ge = false;
        args[i] = null;
      } else if (args[i].equals("-noparse")) {
        pa = false;
        args[i] = null;
      } else if (args[i].equals("-noparsedata")) {
        pd = false;
        args[i] = null;
      } else if (args[i].equals("-noparsetext")) {
        pt = false;
        args[i] = null;
      }
    }
    Configuration conf = NutchConfiguration.create();
    final FileSystem fs = FileSystem.get(conf);
    SegmentReader segmentReader = new SegmentReader(conf, co, fe, ge, pa, pd, pt);
    // collect required args
    switch (mode) {
      case MODE_DUMP:
        String input = args[1];
        if (input == null) {
          System.err.println("Missing required argument: <segment_dir>");
          usage();
          return;
        }
        String output = args.length > 2 ? args[2] : null;
        if (output == null) {
          System.err.println("Missing required argument: <output>");
          usage();
          return;
        }
        segmentReader.dump(new Path(input), new Path(output));
        return;
      case MODE_LIST:
        ArrayList<Path> dirs = new ArrayList<Path>();
        for (int i = 1; i < args.length; i++) {
          if (args[i] == null) continue;
          if (args[i].equals("-dir")) {
            Path dir = new Path(args[++i]);
            FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
            Path[] files = HadoopFSUtil.getPaths(fstats);
            if (files != null && files.length > 0) {
              dirs.addAll(Arrays.asList(files));
            }
          } else dirs.add(new Path(args[i]));
        }
        segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
        return;
      case MODE_GET:
        input = args[1];
        if (input == null) {
          System.err.println("Missing required argument: <segment_dir>");
          usage();
          return;
        }
        String key = args.length > 2 ? args[2] : null;
        if (key == null) {
          System.err.println("Missing required argument: <keyValue>");
          usage();
          return;
        }
        segmentReader.get(
            new Path(input),
            new Text(key),
            new OutputStreamWriter(System.out, "UTF-8"),
            new HashMap<String, List<Writable>>());
        return;
      default:
        System.err.println("Invalid operation: " + args[0]);
        usage();
        return;
    }
  }