// Should a field be ignored? public void guessIgnore() { // If the string contains only spaces? boolean stop = false; for (int i = 0; i < samples.length && !stop; i++) { if (!Const.onlySpaces(samples[i])) stop = true; } if (!stop) { ignore = true; return; } // If all the strings are empty stop = false; for (int i = 0; i < samples.length && !stop; i++) { if (samples[i].length() > 0) stop = true; } if (!stop) { ignore = true; return; } // If all the strings are equivalent to NULL stop = false; for (int i = 0; i < samples.length && !stop; i++) { if (!samples[i].equalsIgnoreCase(nullString)) stop = true; } if (!stop) { ignore = true; return; } }
public void guessType() { nf = NumberFormat.getInstance(); df = (DecimalFormat) nf; dfs = new DecimalFormatSymbols(); daf = new SimpleDateFormat(); daf.setLenient(false); // Start with a string... type = ValueMetaInterface.TYPE_STRING; // If we have no samples, we assume a String... if (samples == null) return; ////////////////////////////// // DATES ////////////////////////////// // See if all samples can be transformed into a date... int datefmt_cnt = date_formats.length; boolean datefmt[] = new boolean[date_formats.length]; for (int i = 0; i < date_formats.length; i++) { datefmt[i] = true; } int datenul = 0; for (int i = 0; i < samples.length; i++) { if (samples[i].length() > 0 && samples[i].equalsIgnoreCase(nullString)) { datenul++; } else for (int x = 0; x < date_formats.length; x++) { if (samples[i] == null || Const.onlySpaces(samples[i]) || samples[i].length() == 0) { datefmt[x] = false; datefmt_cnt--; } if (datefmt[x]) { try { daf.applyPattern(date_formats[x]); Date date = daf.parse(samples[i]); Calendar cal = Calendar.getInstance(); cal.setTime(date); int year = cal.get(Calendar.YEAR); if (year < 1800 || year > 2200) { datefmt[x] = false; // Don't try it again in the future. datefmt_cnt--; // One less that works.. } } catch (Exception e) { datefmt[x] = false; // Don't try it again in the future. datefmt_cnt--; // One less that works.. } } } } // If it is a date, copy info over to the format etc. Then return with the info. // If all samples where NULL values, we can't really decide what the type is. // So we're certainly not going to take a date, just take a string in that case. if (datefmt_cnt > 0 && datenul != samples.length) { int first = -1; for (int i = 0; i < date_formats.length && first < 0; i++) { if (datefmt[i]) first = i; } type = ValueMetaInterface.TYPE_DATE; format = date_formats[first]; return; } ////////////////////////////// // NUMBERS ////////////////////////////// boolean isnumber = true; // Set decimal symbols to default decimalSymbol = "" + dfs.getDecimalSeparator(); groupSymbol = "" + dfs.getGroupingSeparator(); boolean numfmt[] = new boolean[number_formats.length]; int maxprecision[] = new int[number_formats.length]; for (int i = 0; i < numfmt.length; i++) { numfmt[i] = true; maxprecision[i] = -1; } int numfmt_cnt = number_formats.length; int numnul = 0; for (int i = 0; i < samples.length && isnumber; i++) { boolean contains_dot = false; boolean contains_comma = false; String field = samples[i]; if (field.length() > 0 && field.equalsIgnoreCase(nullString)) { numnul++; } else { for (int x = 0; x < field.length() && isnumber; x++) { char ch = field.charAt(x); if (!Character.isDigit(ch) && ch != '.' && ch != ',' && (ch != '-' || x > 0) && ch != 'E' && ch != 'e' // exponential ) { isnumber = false; numfmt_cnt = 0; } else { if (ch == '.') { contains_dot = true; // containsDot = true; } if (ch == ',') { contains_comma = true; // containsComma = true; } } } // If it's still a number, try to parse it as a double if (isnumber) { if (contains_dot && !contains_comma) // American style 174.5 { dfs.setDecimalSeparator('.'); decimalSymbol = "."; dfs.setGroupingSeparator(','); groupSymbol = ","; } else if (!contains_dot && contains_comma) // European style 174,5 { dfs.setDecimalSeparator(','); decimalSymbol = ","; dfs.setGroupingSeparator('.'); groupSymbol = "."; } else if (contains_dot && contains_comma) // Both appear! { // What's the last occurance: decimal point! int idx_dot = field.indexOf('.'); int idx_com = field.indexOf(','); if (idx_dot > idx_com) { dfs.setDecimalSeparator('.'); decimalSymbol = "."; dfs.setGroupingSeparator(','); groupSymbol = ","; } else { dfs.setDecimalSeparator(','); decimalSymbol = ","; dfs.setGroupingSeparator('.'); groupSymbol = "."; } } // Try the remaining possible number formats! for (int x = 0; x < number_formats.length; x++) { if (numfmt[x]) { boolean islong = true; try { int prec = -1; // Try long integers first.... if (!contains_dot && !contains_comma) { try { Long.parseLong(field); prec = 0; } catch (Exception e) { islong = false; } } if (!islong) // Try the double { df.setDecimalFormatSymbols(dfs); df.applyPattern(number_formats[x]); double d = df.parse(field).doubleValue(); prec = guessPrecision(d); } if (prec > maxprecision[x]) maxprecision[x] = prec; } catch (Exception e) { numfmt[x] = false; // Don't try it again in the future. numfmt_cnt--; // One less that works.. } } } } } } // Still a number? Grab the result and return. // If all sample strings are empty or represent NULL values we can't take a number as type. if (numfmt_cnt > 0 && numnul != samples.length) { int first = -1; for (int i = 0; i < number_formats.length && first < 0; i++) { if (numfmt[i]) first = i; } type = ValueMetaInterface.TYPE_NUMBER; format = number_formats[first]; precision = maxprecision[first]; // Wait a minute!!! What about Integers? // OK, only if the precision is 0 and the length <19 (java long integer) /* if (length<19 && precision==0 && !containsDot && !containsComma) { type=ValueMetaInterface.TYPE_INTEGER; decimalSymbol=""; groupSymbol=""; } */ return; } // // Assume it's a string... // type = ValueMetaInterface.TYPE_STRING; format = ""; precision = -1; decimalSymbol = ""; groupSymbol = ""; currencySymbol = ""; }