Example #1
0
  /**
   * Parses the pattern string, which should be a comma separated list of regular expressions, each
   * of which may be surrounded with double quotes.
   *
   * @param inStr pattern string to parse
   * @return list of pattern regular expressions
   */
  private ArrayList<Pattern> parsePatternString(String inStr) {
    ArrayList<Pattern> result = new ArrayList<Pattern>();

    StrTokenizer tokenizer = new StrTokenizer(inStr, ',', '"');
    tokenizer.setIgnoreEmptyTokens(true);
    tokenizer.setIgnoredMatcher(StrMatcher.charSetMatcher(" \t\n\r"));

    while (tokenizer.hasNext()) {
      String tok = (String) tokenizer.next();
      Pattern pat = Pattern.compile(tok);
      result.add(pat);
    }

    return result;
  }
  protected static <T> void parseEDISegmentFields(
      EDIMessage ediMessage, Object segment, String segmentLine)
      throws IllegalAccessException, InvocationTargetException, ClassNotFoundException,
          ConversionException, InstantiationException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Before Field Values: " + ReflectionToStringBuilder.toString(segment));
      LOG.debug("Segment Values: " + segmentLine);
    }

    // now, tokenize the line, and set the fields.
    StrTokenizer tokenizer = new StrTokenizer(segmentLine, ediMessage.elementDelimiter());

    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    // move past the initial tag.
    tokenizer.next();

    Iterator<Field> fieldIterator =
        Arrays.asList(segment.getClass().getDeclaredFields()).iterator();
    while (tokenizer.hasNext() && fieldIterator.hasNext()) {
      Field field = fieldIterator.next();
      String val = tokenizer.nextToken();

      // check field to see if it is a component of regular field type.
      if (field.isAnnotationPresent(EDIComponent.class)) {
        EDIComponent ediComponent = field.getAnnotation(EDIComponent.class);
        Collection obj = CollectionFactory.newInstance(field.getType());
        Class objType = getCollectionType(field);

        char componentDelimiter =
            ediComponent.delimiter() == Character.UNASSIGNED
                ? ediMessage.componentDelimiter()
                : ediComponent.delimiter();

        // parse each element to the collection.
        StrTokenizer componentTokenizer = new StrTokenizer(val, componentDelimiter);
        componentTokenizer.setEmptyTokenAsNull(true);
        componentTokenizer.setIgnoreEmptyTokens(false);

        while (componentTokenizer.hasNext()) {
          String component = componentTokenizer.nextToken();
          Object fieldObj =
              objType.cast(FieldAwareConverter.convertFromString(objType, field, component));
          obj.add(fieldObj);
        }
        BeanUtils.setProperty(segment, field.getName(), obj);
      } else {
        if (val == null) {
          LOG.debug("  " + field.getName() + " -> null");
          continue;
        }

        // try and populate the field.

        try {
          Object fieldObj = FieldAwareConverter.convertFromString(field.getType(), field, val);
          LOG.debug("  " + field.getName() + " -> " + val);

          BeanUtils.setProperty(segment, field.getName(), fieldObj);
        } catch (Exception e) {
          throw new ConversionException(
              "Exception setting: "
                  + segment.getClass()
                  + "."
                  + field.getName()
                  + " with value: "
                  + val,
              e);
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("After Field Values: " + ReflectionToStringBuilder.toString(segment));
    }
  }
  static List<double[]> readDataBodyAsDouble(
      BufferedReader br, int[] remappedColumns, int mappedColumnCount)
      throws NumberFormatException, IOException {
    List<double[]> rows = new ArrayList<double[]>(500);
    int colCount = 0; // Number of columns (minus one) - must match in each row
    int curRow = 0; // Current row number - 0 based;
    String s = null; // Single line from file

    while ((s = br.readLine()) != null) {
      StrTokenizer strTk = new StrTokenizer(s);
      strTk.setIgnoreEmptyTokens(false);
      strTk.setDelimiterChar('\t');
      String src[] = strTk.getTokenArray();
      // String src[] = s.split("\t");

      if (isNonEmptyLine(src)) {

        if (colCount == 0) {
          colCount = src.length; // initialize column count
          if (remappedColumns == null) {
            // assign default mapping now
            remappedColumns = new int[colCount];
            for (int i = 0; i < colCount; i++) {
              remappedColumns[i] = i;
            }

            mappedColumnCount = colCount;
          }
        } else {
          if (src.length != colCount) {
            for (int i = 0; i < src.length; i++) {
              System.out.println(i + ": " + src[i]);
            }

            throw new IllegalStateException(
                "Parse Error: Row "
                    + curRow
                    + " has "
                    + src.length
                    + " columns, previous columns had "
                    + colCount
                    + " columns.");
          }
        }

        double[] row = new double[mappedColumnCount];

        for (int i = 0; i < src.length; i++) {
          if (remappedColumns[i] != -1) {
            if (src[i].length() > 0) {

              // Simple hack to ignore text columns in the predict.txt file

              try {
                row[remappedColumns[i]] = Double.parseDouble(src[i]);
              } catch (Exception e) {
                row[remappedColumns[i]] = Double.NaN;
              }
            } else {
              row[remappedColumns[i]] = 0;
            }
          }
        }

        rows.add(row);
      } else {
        // ignore empty lines
      }

      curRow++;
    }

    log.debug("Found " + curRow + " rows in predict.txt file.");
    return rows;
  }