예제 #1
0
  /** Reset the transformer in preparation for a new object */
  private void reset() throws TransformerException {
    if (firstRun) {
      firstRun = false;
      // Output directory
      String outputPath = config.getString(null, "outputPath");
      if (outputPath == null) {
        throw new TransformerException("Output path not specified!");
      }
      outputDir = new File(outputPath);
      outputDir.mkdirs();

      // Rendition exclusions
      excludeList =
          Arrays.asList(StringUtils.split(config.getString(null, "excludeRenditionExt"), ','));

      // Conversion Service URL
      convertUrl = config.getString(null, "url");
      if (convertUrl == null) {
        throw new TransformerException("No ICE URL provided!");
      }
    }

    // Priority
    Boolean testResponse = itemConfig.getBoolean(null, "priority");
    if (testResponse != null) {
      // We found it in item config
      priority = testResponse;
    } else {
      // Try system config
      priority = config.getBoolean(true, "priority");
    }

    // Clear the old SAX reader
    reader = new SafeSAXReader();

    // Remove the last object
    thumbnails = null;
    previews = null;
  }
  /**
   * Initialise the CSV harvester plugin.
   *
   * @throws HarvesterException if an error occurred
   */
  @Override
  public void init() throws HarvesterException {
    JsonSimple options = new JsonSimple(getJsonConfig().getObject("harvester", "csv"));

    String filePath = options.getString(null, "fileLocation");
    if (filePath == null) {
      throw new HarvesterException("No data file provided!");
    }
    File csvDataFile = new File(filePath);
    if (csvDataFile == null || !csvDataFile.exists()) {
      throw new HarvesterException("Could not find CSV file '" + filePath + "'");
    }
    filename = csvDataFile.getName();

    idPrefix = options.getString("", "recordIDPrefix");
    maxRows = options.getInteger(-1, "maxRows");
    delimiter = options.getString(String.valueOf(DEFAULT_DELIMITER), "delimiter").charAt(0);
    ignoredFields = getStringList(options, "ignoreFields");
    includedFields = getStringList(options, "includedFields");
    multiValueFields = getStringList(options, "multiValueFields");
    multiValueFieldDelimiter =
        options
            .getString(
                String.valueOf(DEFAULT_MULTI_VALUE_FIELD_DELIMITER), "multiValueFieldDelimiter")
            .charAt(0);
    payloadId = options.getString(DEFAULT_PAYLOAD_ID, "payloadId");
    batchSize = options.getInteger(DEFAULT_BATCH_SIZE, "batchSize");
    hasMore = true;

    if (delimiter == multiValueFieldDelimiter) {
      throw new HarvesterException(
          "Cannot parse CSV: The requested delimiters for the CSV and multivalue fields are the same: "
              + delimiter);
    }

    try {
      // open the CSV file for reading
      Reader fileReader = new InputStreamReader(new FileInputStream(csvDataFile), "UTF-8");
      // char delimiter = options.getString(String.valueOf(DEFAULT_DELIMITER),
      // "delimiter").charAt(0);
      csvReader = new CSVReader(fileReader, delimiter);

      // configure the data fields
      if (options.getBoolean(true, "headerRow")) {
        dataFields = Arrays.asList(csvReader.readNext());
      } else {
        dataFields = getStringList(options, "headerList");
      }

      // check that the specified id column is valid
      idColumn = options.getString(null, "idColumn");
      if (idColumn != null && !dataFields.contains(idColumn)) {
        throw new HarvesterException(
            "ID column '" + idColumn + "' was invalid or not found in the data!");
      }

      // load filters, all filters must pass for the row to be considered
      filters = new HashMap<String, List<Filter>>();
      List<JsonSimple> filterConfig = options.getJsonSimpleList("filters");
      if (filterConfig != null) {
        for (JsonSimple singleFilterConfig : filterConfig) {
          Filter filter = new Filter(singleFilterConfig);
          String field = filter.getField();
          if (!dataFields.contains(field)) {
            throw new HarvesterException("Filter column '" + field + "' was not found in the data");
          }
          List<Filter> existingFilters = filters.get(field);
          if (existingFilters == null) {
            existingFilters = new ArrayList<Filter>();
            filters.put(field, existingFilters);
          }
          existingFilters.add(filter);
        }
      }

    } catch (IOException ioe) {
      throw new HarvesterException(ioe);
    }
  }