Exemplo n.º 1
0
  /**
   * Searches the attribute subset space using a genetic algorithm.
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  @Override
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {

    m_best = null;
    m_generationReports = new StringBuffer();

    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
    m_numAttribs = data.numAttributes();

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    // initial random population
    m_lookupTable = new Hashtable<BitSet, GABitSet>(m_lookupTableSize);
    m_random = new Random(m_seed);
    m_population = new GABitSet[m_popSize];

    // set up random initial population
    initPopulation();
    evaluatePopulation(ASEvaluator);
    populationStatistics();
    scalePopulation();
    checkBest();
    m_generationReports.append(populationReport(0));

    boolean converged;
    for (int i = 1; i <= m_maxGenerations; i++) {
      generation();
      evaluatePopulation(ASEvaluator);
      populationStatistics();
      scalePopulation();
      // find the best pop member and check for convergence
      converged = checkBest();

      if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || (converged == true)) {
        m_generationReports.append(populationReport(i));
        if (converged == true) {
          break;
        }
      }
    }
    return attributeList(m_best.getChromosome());
  }
Exemplo n.º 2
0
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the format couldn't be set successfully
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    super.setInputFormat(instanceInfo);

    m_Insert.setUpper(instanceInfo.numAttributes());
    Instances outputFormat = new Instances(instanceInfo, 0);
    Attribute newAttribute = null;
    switch (m_AttributeType) {
      case Attribute.NUMERIC:
        newAttribute = new Attribute(m_Name);
        break;
      case Attribute.NOMINAL:
        newAttribute = new Attribute(m_Name, m_Labels);
        break;
      case Attribute.STRING:
        newAttribute = new Attribute(m_Name, (FastVector) null);
        break;
      case Attribute.DATE:
        newAttribute = new Attribute(m_Name, m_DateFormat);
        break;
      default:
        throw new IllegalArgumentException("Unknown attribute type in Add");
    }

    if ((m_Insert.getIndex() < 0) || (m_Insert.getIndex() > getInputFormat().numAttributes())) {
      throw new IllegalArgumentException("Index out of range");
    }
    outputFormat.insertAttributeAt(newAttribute, m_Insert.getIndex());
    setOutputFormat(outputFormat);

    // all attributes, except index of added attribute
    // (otherwise the length of the input/output indices differ)
    Range atts = new Range(m_Insert.getSingleIndex());
    atts.setInvert(true);
    atts.setUpper(outputFormat.numAttributes() - 1);
    initOutputLocators(outputFormat, atts.getSelection());

    return true;
  }
Exemplo n.º 3
0
  /**
   * Searches the attribute subset space by best first search
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    m_totalEvals = 0;
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
    m_numAttribs = data.numAttributes();
    int i, j;
    int best_size = 0;
    int size = 0;
    int done;
    int sd = m_searchDirection;
    BitSet best_group, temp_group;
    int stale;
    double best_merit;
    double merit;
    boolean z;
    boolean added;
    Link2 tl;
    Hashtable lookup = new Hashtable(m_cacheSize * m_numAttribs);
    int insertCount = 0;
    int cacheHits = 0;
    LinkedList2 bfList = new LinkedList2(m_maxStale);
    best_merit = -Double.MAX_VALUE;
    stale = 0;
    best_group = new BitSet(m_numAttribs);

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }
    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
      for (i = 0; i < m_starting.length; i++) {
        if ((m_starting[i]) != m_classIndex) {
          best_group.set(m_starting[i]);
        }
      }

      best_size = m_starting.length;
      m_totalEvals++;
    } else {
      if (m_searchDirection == SELECTION_BACKWARD) {
        setStartSet("1-last");
        m_starting = new int[m_numAttribs];

        // init initial subset to all attributes
        for (i = 0, j = 0; i < m_numAttribs; i++) {
          if (i != m_classIndex) {
            best_group.set(i);
            m_starting[j++] = i;
          }
        }

        best_size = m_numAttribs - 1;
        m_totalEvals++;
      }
    }

    // evaluate the initial subset
    best_merit = ASEvaluator.evaluateSubset(best_group);
    // add the initial group to the list and the hash table
    Object[] best = new Object[1];
    best[0] = best_group.clone();
    bfList.addToList(best, best_merit);
    BitSet tt = (BitSet) best_group.clone();
    String hashC = tt.toString();
    lookup.put(hashC, new Double(best_merit));

    while (stale < m_maxStale) {
      added = false;

      if (m_searchDirection == SELECTION_BIDIRECTIONAL) {
        // bi-directional search
        done = 2;
        sd = SELECTION_FORWARD;
      } else {
        done = 1;
      }

      // finished search?
      if (bfList.size() == 0) {
        stale = m_maxStale;
        break;
      }

      // copy the attribute set at the head of the list
      tl = bfList.getLinkAt(0);
      temp_group = (BitSet) (tl.getData()[0]);
      temp_group = (BitSet) temp_group.clone();
      // remove the head of the list
      bfList.removeLinkAt(0);
      // count the number of bits set (attributes)
      int kk;

      for (kk = 0, size = 0; kk < m_numAttribs; kk++) {
        if (temp_group.get(kk)) {
          size++;
        }
      }

      do {
        for (i = 0; i < m_numAttribs; i++) {
          if (sd == SELECTION_FORWARD) {
            z = ((i != m_classIndex) && (!temp_group.get(i)));
          } else {
            z = ((i != m_classIndex) && (temp_group.get(i)));
          }

          if (z) {
            // set the bit (attribute to add/delete)
            if (sd == SELECTION_FORWARD) {
              temp_group.set(i);
              size++;
            } else {
              temp_group.clear(i);
              size--;
            }

            /* if this subset has been seen before, then it is already
            in the list (or has been fully expanded) */
            tt = (BitSet) temp_group.clone();
            hashC = tt.toString();

            if (lookup.containsKey(hashC) == false) {
              merit = ASEvaluator.evaluateSubset(temp_group);
              m_totalEvals++;

              // insert this one in the hashtable
              if (insertCount > m_cacheSize * m_numAttribs) {
                lookup = new Hashtable(m_cacheSize * m_numAttribs);
                insertCount = 0;
              }
              hashC = tt.toString();
              lookup.put(hashC, new Double(merit));
              insertCount++;
            } else {
              merit = ((Double) lookup.get(hashC)).doubleValue();
              cacheHits++;
            }

            // insert this one in the list
            Object[] add = new Object[1];
            add[0] = tt.clone();
            bfList.addToList(add, merit);

            if (m_debug) {
              System.out.print("Group: ");
              printGroup(tt, m_numAttribs);
              System.out.println("Merit: " + merit);
            }

            // is this better than the best?
            if (sd == SELECTION_FORWARD) {
              z = ((merit - best_merit) > 0.00001);
            } else {
              if (merit == best_merit) {
                z = (size < best_size);
              } else {
                z = (merit > best_merit);
              }
            }

            if (z) {
              added = true;
              stale = 0;
              best_merit = merit;
              //		best_size = (size + best_size);
              best_size = size;
              best_group = (BitSet) (temp_group.clone());
            }

            // unset this addition(deletion)
            if (sd == SELECTION_FORWARD) {
              temp_group.clear(i);
              size--;
            } else {
              temp_group.set(i);
              size++;
            }
          }
        }

        if (done == 2) {
          sd = SELECTION_BACKWARD;
        }

        done--;
      } while (done > 0);

      /* if we haven't added a new attribute subset then full expansion
      of this node hasen't resulted in anything better */
      if (!added) {
        stale++;
      }
    }

    m_bestMerit = best_merit;
    return attributeList(best_group);
  }
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the format couldn't be set successfully
   */
  @Override
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    super.setInputFormat(instanceInfo);

    int classIndex = instanceInfo.classIndex();

    // setup the map
    if (m_renameVals != null && m_renameVals.length() > 0) {
      String[] vals = m_renameVals.split(",");

      for (String val : vals) {
        String[] parts = val.split(":");
        if (parts.length != 2) {
          throw new WekaException("Invalid replacement string: " + val);
        }

        if (parts[0].length() == 0 || parts[1].length() == 0) {
          throw new WekaException("Invalid replacement string: " + val);
        }

        m_renameMap.put(
            m_ignoreCase ? parts[0].toLowerCase().trim() : parts[0].trim(), parts[1].trim());
      }
    }

    // try selected atts as a numeric range first
    Range tempRange = new Range();
    tempRange.setInvert(m_invert);
    if (m_selectedColsString == null) {
      m_selectedColsString = "";
    }

    try {
      tempRange.setRanges(m_selectedColsString);
      tempRange.setUpper(instanceInfo.numAttributes() - 1);
      m_selectedAttributes = tempRange.getSelection();
      m_selectedCols = tempRange;
    } catch (Exception r) {
      // OK, now try as named attributes
      StringBuffer indexes = new StringBuffer();
      String[] attNames = m_selectedColsString.split(",");
      boolean first = true;
      for (String n : attNames) {
        n = n.trim();
        Attribute found = instanceInfo.attribute(n);
        if (found == null) {
          throw new WekaException(
              "Unable to find attribute '" + n + "' in the incoming instances'");
        }
        if (first) {
          indexes.append("" + (found.index() + 1));
          first = false;
        } else {
          indexes.append("," + (found.index() + 1));
        }
      }

      tempRange = new Range();
      tempRange.setRanges(indexes.toString());
      tempRange.setUpper(instanceInfo.numAttributes() - 1);
      m_selectedAttributes = tempRange.getSelection();
      m_selectedCols = tempRange;
    }

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 0; i < instanceInfo.numAttributes(); i++) {
      if (m_selectedCols.isInRange(i)) {
        if (instanceInfo.attribute(i).isNominal()) {
          List<String> valsForAtt = new ArrayList<String>();
          for (int j = 0; j < instanceInfo.attribute(i).numValues(); j++) {
            String origV = instanceInfo.attribute(i).value(j);

            String replace =
                m_ignoreCase ? m_renameMap.get(origV.toLowerCase()) : m_renameMap.get(origV);
            if (replace != null && !valsForAtt.contains(replace)) {
              valsForAtt.add(replace);
            } else {
              valsForAtt.add(origV);
            }
          }
          Attribute newAtt = new Attribute(instanceInfo.attribute(i).name(), valsForAtt);
          attributes.add(newAtt);
        } else {
          // ignore any selected attributes that are not nominal
          Attribute att = (Attribute) instanceInfo.attribute(i).copy();
          attributes.add(att);
        }
      } else {
        Attribute att = (Attribute) instanceInfo.attribute(i).copy();
        attributes.add(att);
      }
    }

    Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);

    return true;
  }
Exemplo n.º 5
0
  /**
   * Kind of a dummy search algorithm. Calls a Attribute evaluator to evaluate each attribute not
   * included in the startSet and then sorts them to produce a ranked list of attributes.
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    int i, j;

    if (!(ASEval instanceof AttributeEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a" + "Attribute evaluator!");
    }

    m_numAttribs = data.numAttributes();

    if (ASEval instanceof UnsupervisedAttributeEvaluator) {
      m_hasClass = false;
    } else {
      m_classIndex = data.classIndex();
      if (m_classIndex >= 0) {
        m_hasClass = true;
      } else {
        m_hasClass = false;
      }
    }

    // get the transformed data and check to see if the transformer
    // preserves a class index
    if (ASEval instanceof AttributeTransformer) {
      data = ((AttributeTransformer) ASEval).transformedHeader();
      if (m_classIndex >= 0 && data.classIndex() >= 0) {
        m_classIndex = data.classIndex();
        m_hasClass = true;
      }
    }

    m_startRange.setUpper(m_numAttribs - 1);
    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    int sl = 0;
    if (m_starting != null) {
      sl = m_starting.length;
    }
    if ((m_starting != null) && (m_hasClass == true)) {
      // see if the supplied list contains the class index
      boolean ok = false;
      for (i = 0; i < sl; i++) {
        if (m_starting[i] == m_classIndex) {
          ok = true;
          break;
        }
      }

      if (ok == false) {
        sl++;
      }
    } else {
      if (m_hasClass == true) {
        sl++;
      }
    }

    m_attributeList = new int[m_numAttribs - sl];
    m_attributeMerit = new double[m_numAttribs - sl];

    // add in those attributes not in the starting (omit list)
    for (i = 0, j = 0; i < m_numAttribs; i++) {
      if (!inStarting(i)) {
        m_attributeList[j++] = i;
      }
    }

    AttributeEvaluator ASEvaluator = (AttributeEvaluator) ASEval;

    for (i = 0; i < m_attributeList.length; i++) {
      m_attributeMerit[i] = ASEvaluator.evaluateAttribute(m_attributeList[i]);
    }

    double[][] tempRanked = rankedAttributes();
    int[] rankedAttributes = new int[m_attributeList.length];

    for (i = 0; i < m_attributeList.length; i++) {
      rankedAttributes[i] = (int) tempRanked[i][0];
    }

    return rankedAttributes;
  }
Exemplo n.º 6
0
  /**
   * Searches the attribute subset space by linear forward selection
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @exception Exception if the search can't be completed
   */
  public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
    m_totalEvals = 0;

    if (!(ASEval instanceof SubsetEvaluator)) {
      throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      m_hasClass = false;
    } else {
      m_hasClass = true;
      m_classIndex = data.classIndex();
    }

    ((ASEvaluation) ASEval).buildEvaluator(data);

    m_numAttribs = data.numAttributes();

    if (m_numUsedAttributes > m_numAttribs) {
      System.out.println(
          "Decreasing number of top-ranked attributes to total number of attributes: "
              + data.numAttributes());
      m_numUsedAttributes = m_numAttribs;
    }

    BitSet start_group = new BitSet(m_numAttribs);
    m_startRange.setUpper(m_numAttribs - 1);

    if (!(getStartSet().equals(""))) {
      m_starting = m_startRange.getSelection();
    }

    // If a starting subset has been supplied, then initialise the bitset
    if (m_starting != null) {
      for (int i = 0; i < m_starting.length; i++) {
        if ((m_starting[i]) != m_classIndex) {
          start_group.set(m_starting[i]);
        }
      }
    }

    LFSMethods LFS = new LFSMethods();

    int[] ranking;

    if (m_performRanking) {
      ranking = LFS.rankAttributes(data, (SubsetEvaluator) ASEval, m_verbose);
    } else {
      ranking = new int[m_numAttribs];

      for (int i = 0; i < ranking.length; i++) {
        ranking[i] = i;
      }
    }

    if (m_forwardSearchMethod == SEARCH_METHOD_FORWARD) {
      LFS.forwardSearch(
          m_cacheSize,
          start_group,
          ranking,
          m_numUsedAttributes,
          m_linearSelectionType == TYPE_FIXED_WIDTH,
          m_maxStale,
          -1,
          data,
          (SubsetEvaluator) ASEval,
          m_verbose);
    } else if (m_forwardSearchMethod == SEARCH_METHOD_FLOATING) {
      LFS.floatingForwardSearch(
          m_cacheSize,
          start_group,
          ranking,
          m_numUsedAttributes,
          m_linearSelectionType == TYPE_FIXED_WIDTH,
          m_maxStale,
          data,
          (SubsetEvaluator) ASEval,
          m_verbose);
    }

    m_totalEvals = LFS.getNumEvalsTotal();
    m_bestMerit = LFS.getBestMerit();

    return attributeList(LFS.getBestGroup());
  }
Exemplo n.º 7
0
  /**
   * Determines the output format based on the input format and returns this. In case the output
   * format cannot be returned immediately, i.e., hasImmediateOutputFormat() returns false, then
   * this method will called from batchFinished() after the call of preprocess(Instances), in which,
   * e.g., statistics for the actual processing step can be gathered.
   *
   * @param inputFormat the input format to base the output format on
   * @return the output format
   * @throws Exception in case the determination goes wrong
   * @see #hasImmediateOutputFormat()
   * @see #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {

    FastVector atts;
    FastVector values;
    Instances result;
    int i;

    // attributes must be numeric
    m_Attributes.setUpper(inputFormat.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();
    for (i = 0; i < m_AttributeIndices.length; i++) {
      // ignore class
      if (m_AttributeIndices[i] == inputFormat.classIndex()) {
        m_AttributeIndices[i] = NON_NUMERIC;
        continue;
      }
      // not numeric -> ignore it
      if (!inputFormat.attribute(m_AttributeIndices[i]).isNumeric())
        m_AttributeIndices[i] = NON_NUMERIC;
    }

    // get old attributes
    atts = new FastVector();
    for (i = 0; i < inputFormat.numAttributes(); i++) atts.addElement(inputFormat.attribute(i));

    if (!getDetectionPerAttribute()) {
      m_OutlierAttributePosition = new int[1];
      m_OutlierAttributePosition[0] = atts.size();

      // add 2 new attributes
      values = new FastVector();
      values.addElement("no");
      values.addElement("yes");
      atts.addElement(new Attribute("Outlier", values));

      values = new FastVector();
      values.addElement("no");
      values.addElement("yes");
      atts.addElement(new Attribute("ExtremeValue", values));
    } else {
      m_OutlierAttributePosition = new int[m_AttributeIndices.length];

      for (i = 0; i < m_AttributeIndices.length; i++) {
        if (m_AttributeIndices[i] == NON_NUMERIC) continue;

        m_OutlierAttributePosition[i] = atts.size();

        // add new attributes
        values = new FastVector();
        values.addElement("no");
        values.addElement("yes");
        atts.addElement(
            new Attribute(
                inputFormat.attribute(m_AttributeIndices[i]).name() + "_Outlier", values));

        values = new FastVector();
        values.addElement("no");
        values.addElement("yes");
        atts.addElement(
            new Attribute(
                inputFormat.attribute(m_AttributeIndices[i]).name() + "_ExtremeValue", values));

        if (getOutputOffsetMultiplier())
          atts.addElement(
              new Attribute(inputFormat.attribute(m_AttributeIndices[i]).name() + "_Offset"));
      }
    }

    // generate header
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());

    return result;
  }
Exemplo n.º 8
0
  private void readHeader() throws IOException {
    m_rowCount = 1;
    m_incrementalReader = null;
    m_current = new ArrayList<Object>();
    openTempFiles();

    m_rowBuffer = new ArrayList<String>();

    String firstRow = m_sourceReader.readLine();
    if (firstRow == null) {
      throw new IOException("No data in the file!");
    }
    if (m_noHeaderRow) {
      m_rowBuffer.add(firstRow);
    }

    ArrayList<Attribute> attribNames = new ArrayList<Attribute>();

    // now tokenize to determine attribute names (or create att names if
    // no header row
    StringReader sr = new StringReader(firstRow + "\n");
    // System.out.print(firstRow + "\n");
    m_st = new StreamTokenizer(sr);
    initTokenizer(m_st);

    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    int attNum = 1;
    StreamTokenizerUtils.getFirstToken(m_st);
    if (m_st.ttype == StreamTokenizer.TT_EOF) {
      StreamTokenizerUtils.errms(m_st, "premature end of file");
    }
    boolean first = true;
    boolean wasSep;

    while (m_st.ttype != StreamTokenizer.TT_EOL && m_st.ttype != StreamTokenizer.TT_EOF) {
      // Get next token

      if (!first) {
        StreamTokenizerUtils.getToken(m_st);
      }

      if (m_st.ttype == m_FieldSeparator.charAt(0) || m_st.ttype == StreamTokenizer.TT_EOL) {
        wasSep = true;
      } else {
        wasSep = false;

        String attName = null;

        if (m_noHeaderRow) {
          attName = "att" + attNum;
          attNum++;
        } else {
          attName = m_st.sval;
        }

        attribNames.add(new Attribute(attName, (java.util.List<String>) null));
      }
      if (!wasSep) {
        StreamTokenizerUtils.getToken(m_st);
      }
      first = false;
    }
    String relationName;
    if (m_sourceFile != null) {
      relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", "");
    } else {
      relationName = "stream";
    }
    m_structure = new Instances(relationName, attribNames, 0);
    m_NominalAttributes.setUpper(m_structure.numAttributes() - 1);
    m_StringAttributes.setUpper(m_structure.numAttributes() - 1);
    m_dateAttributes.setUpper(m_structure.numAttributes() - 1);
    m_numericAttributes.setUpper(m_structure.numAttributes() - 1);
    m_nominalVals = new HashMap<Integer, LinkedHashSet<String>>();

    m_types = new TYPE[m_structure.numAttributes()];
    for (int i = 0; i < m_structure.numAttributes(); i++) {
      if (m_NominalAttributes.isInRange(i)) {
        m_types[i] = TYPE.NOMINAL;
        LinkedHashSet<String> ts = new LinkedHashSet<String>();
        m_nominalVals.put(i, ts);
      } else if (m_StringAttributes.isInRange(i)) {
        m_types[i] = TYPE.STRING;
      } else if (m_dateAttributes.isInRange(i)) {
        m_types[i] = TYPE.DATE;
      } else if (m_numericAttributes.isInRange(i)) {
        m_types[i] = TYPE.NUMERIC;
      } else {
        m_types[i] = TYPE.UNDETERMINED;
      }
    }

    if (m_nominalLabelSpecs.size() > 0) {
      for (String spec : m_nominalLabelSpecs) {
        String[] attsAndLabels = spec.split(":");
        if (attsAndLabels.length == 2) {
          String[] labels = attsAndLabels[1].split(",");
          try {
            // try as a range string first
            Range tempR = new Range();
            tempR.setRanges(attsAndLabels[0].trim());
            tempR.setUpper(m_structure.numAttributes() - 1);

            int[] rangeIndexes = tempR.getSelection();
            for (int i = 0; i < rangeIndexes.length; i++) {
              m_types[rangeIndexes[i]] = TYPE.NOMINAL;
              LinkedHashSet<String> ts = new LinkedHashSet<String>();
              for (String lab : labels) {
                ts.add(lab);
              }
              m_nominalVals.put(rangeIndexes[i], ts);
            }
          } catch (IllegalArgumentException e) {
            // one or more named attributes?
            String[] attNames = attsAndLabels[0].split(",");
            for (String attN : attNames) {
              Attribute a = m_structure.attribute(attN.trim());
              if (a != null) {
                int attIndex = a.index();
                m_types[attIndex] = TYPE.NOMINAL;
                LinkedHashSet<String> ts = new LinkedHashSet<String>();
                for (String lab : labels) {
                  ts.add(lab);
                }
                m_nominalVals.put(attIndex, ts);
              }
            }
          }
        }
      }
    }

    // Prevents the first row from getting lost in the
    // case where there is no header row and we're
    // running in batch mode
    if (m_noHeaderRow && getRetrieval() == BATCH) {
      StreamTokenizer tempT = new StreamTokenizer(new StringReader(firstRow));
      initTokenizer(tempT);
      tempT.ordinaryChar(m_FieldSeparator.charAt(0));
      String checked = getInstance(tempT);
      dumpRow(checked);
    }

    m_st = new StreamTokenizer(m_sourceReader);
    initTokenizer(m_st);
    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    // try and determine a more accurate structure from the first batch
    readData(false || getRetrieval() == BATCH);
    makeStructure();
  }