/** * Parses the pattern string, which should be a comma separated list of regular expressions, each * of which may be surrounded with double quotes. * * @param inStr pattern string to parse * @return list of pattern regular expressions */ private ArrayList<Pattern> parsePatternString(String inStr) { ArrayList<Pattern> result = new ArrayList<Pattern>(); StrTokenizer tokenizer = new StrTokenizer(inStr, ',', '"'); tokenizer.setIgnoreEmptyTokens(true); tokenizer.setIgnoredMatcher(StrMatcher.charSetMatcher(" \t\n\r")); while (tokenizer.hasNext()) { String tok = (String) tokenizer.next(); Pattern pat = Pattern.compile(tok); result.add(pat); } return result; }
protected static <T> void parseEDISegmentFields( EDIMessage ediMessage, Object segment, String segmentLine) throws IllegalAccessException, InvocationTargetException, ClassNotFoundException, ConversionException, InstantiationException { if (LOG.isDebugEnabled()) { LOG.debug("Before Field Values: " + ReflectionToStringBuilder.toString(segment)); LOG.debug("Segment Values: " + segmentLine); } // now, tokenize the line, and set the fields. StrTokenizer tokenizer = new StrTokenizer(segmentLine, ediMessage.elementDelimiter()); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); // move past the initial tag. tokenizer.next(); Iterator<Field> fieldIterator = Arrays.asList(segment.getClass().getDeclaredFields()).iterator(); while (tokenizer.hasNext() && fieldIterator.hasNext()) { Field field = fieldIterator.next(); String val = tokenizer.nextToken(); // check field to see if it is a component of regular field type. if (field.isAnnotationPresent(EDIComponent.class)) { EDIComponent ediComponent = field.getAnnotation(EDIComponent.class); Collection obj = CollectionFactory.newInstance(field.getType()); Class objType = getCollectionType(field); char componentDelimiter = ediComponent.delimiter() == Character.UNASSIGNED ? ediMessage.componentDelimiter() : ediComponent.delimiter(); // parse each element to the collection. StrTokenizer componentTokenizer = new StrTokenizer(val, componentDelimiter); componentTokenizer.setEmptyTokenAsNull(true); componentTokenizer.setIgnoreEmptyTokens(false); while (componentTokenizer.hasNext()) { String component = componentTokenizer.nextToken(); Object fieldObj = objType.cast(FieldAwareConverter.convertFromString(objType, field, component)); obj.add(fieldObj); } BeanUtils.setProperty(segment, field.getName(), obj); } else { if (val == null) { LOG.debug(" " + field.getName() + " -> null"); continue; } // try and populate the field. try { Object fieldObj = FieldAwareConverter.convertFromString(field.getType(), field, val); LOG.debug(" " + field.getName() + " -> " + val); BeanUtils.setProperty(segment, field.getName(), fieldObj); } catch (Exception e) { throw new ConversionException( "Exception setting: " + segment.getClass() + "." + field.getName() + " with value: " + val, e); } } } if (LOG.isDebugEnabled()) { LOG.debug("After Field Values: " + ReflectionToStringBuilder.toString(segment)); } }
static List<double[]> readDataBodyAsDouble( BufferedReader br, int[] remappedColumns, int mappedColumnCount) throws NumberFormatException, IOException { List<double[]> rows = new ArrayList<double[]>(500); int colCount = 0; // Number of columns (minus one) - must match in each row int curRow = 0; // Current row number - 0 based; String s = null; // Single line from file while ((s = br.readLine()) != null) { StrTokenizer strTk = new StrTokenizer(s); strTk.setIgnoreEmptyTokens(false); strTk.setDelimiterChar('\t'); String src[] = strTk.getTokenArray(); // String src[] = s.split("\t"); if (isNonEmptyLine(src)) { if (colCount == 0) { colCount = src.length; // initialize column count if (remappedColumns == null) { // assign default mapping now remappedColumns = new int[colCount]; for (int i = 0; i < colCount; i++) { remappedColumns[i] = i; } mappedColumnCount = colCount; } } else { if (src.length != colCount) { for (int i = 0; i < src.length; i++) { System.out.println(i + ": " + src[i]); } throw new IllegalStateException( "Parse Error: Row " + curRow + " has " + src.length + " columns, previous columns had " + colCount + " columns."); } } double[] row = new double[mappedColumnCount]; for (int i = 0; i < src.length; i++) { if (remappedColumns[i] != -1) { if (src[i].length() > 0) { // Simple hack to ignore text columns in the predict.txt file try { row[remappedColumns[i]] = Double.parseDouble(src[i]); } catch (Exception e) { row[remappedColumns[i]] = Double.NaN; } } else { row[remappedColumns[i]] = 0; } } } rows.add(row); } else { // ignore empty lines } curRow++; } log.debug("Found " + curRow + " rows in predict.txt file."); return rows; }