/**
  * Process the identifiers.
  *
  * @param type The type of thing these identifiers are.
  * @param input The creation input.
  * @param ids The identifiers.
  * @param unmatchedIds A collector for unmatched identifiers.
  * @param tempBag The temporary bag to add results to.
  * @throws IOException If we can't from the request.
  * @throws ClassNotFoundException if the type is not valid.
  * @throws InterMineException If something goes wrong building the bag.
  * @throws ObjectStoreException If there is a problem on the database level.
  */
 protected void processIdentifiers(
     final String type,
     ListCreationInput input,
     final Set<String> ids,
     final Set<String> unmatchedIds,
     final InterMineBag tempBag)
     throws IOException, ClassNotFoundException, InterMineException, ObjectStoreException {
   final Collection<String> addIssues = input.getAddIssues();
   String line;
   final StrMatcher matcher = getMatcher();
   final BufferedReader r = getReader(request);
   try {
     while ((line = r.readLine()) != null) {
       final StrTokenizer st = new StrTokenizer(line, matcher, StrMatcher.doubleQuoteMatcher());
       while (st.hasNext()) {
         final String token = st.nextToken();
         ids.add(token);
       }
       if (ids.size() >= BAG_QUERY_MAX_BATCH_SIZE) {
         addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues);
         ids.clear();
       }
     }
   } finally {
     if (r != null) {
       r.close();
     }
   }
   if (ids.size() > 0) {
     addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues);
   }
 }
Beispiel #2
0
  /**
   * Parses the pattern string, which should be a comma separated list of regular expressions, each
   * of which may be surrounded with double quotes.
   *
   * @param inStr pattern string to parse
   * @return list of pattern regular expressions
   */
  private ArrayList<Pattern> parsePatternString(String inStr) {
    ArrayList<Pattern> result = new ArrayList<Pattern>();

    StrTokenizer tokenizer = new StrTokenizer(inStr, ',', '"');
    tokenizer.setIgnoreEmptyTokens(true);
    tokenizer.setIgnoredMatcher(StrMatcher.charSetMatcher(" \t\n\r"));

    while (tokenizer.hasNext()) {
      String tok = (String) tokenizer.next();
      Pattern pat = Pattern.compile(tok);
      result.add(pat);
    }

    return result;
  }
  @Override
  protected void processInput(String keggGeneId, String keggGeneRecord, Resource output) {
    Map<String, String> recordSections = KeggUtils.getSectionsFromKeggRecord(keggGeneRecord);
    StrTokenizer tokenizer = new StrTokenizer();

    if (recordSections.containsKey(PATHWAY_RECORD_SECTION)) {
      for (String line : recordSections.get(PATHWAY_RECORD_SECTION).split("\\r?\\n")) {
        String keggPathwayId = tokenizer.reset(line).nextToken();
        Resource keggPathwayNode =
            LSRNUtils.createInstance(
                output.getModel(), LSRNUtils.getClass(LSRN.Namespace.KEGG_PATHWAY), keggPathwayId);
        output.addProperty(SIO.is_participant_in, keggPathwayNode);
      }
    }
  }
 @Override
 public void init(RuleConfiguration ruleConfig) {
   minOrderValue =
       new Money(
           BigDecimal.valueOf(
               Double.valueOf(
                   ruleConfig.getConfigItemValue(RuleConfigConstants.MIN_ORDER_VALUE))));
   fixedRsOff =
       new Money(
           BigDecimal.valueOf(
               Double.valueOf(
                   ruleConfig.getConfigItemValue(RuleConfigConstants.FIXED_DISCOUNT_RS_OFF))));
   StrTokenizer strTokCategories =
       new StrTokenizer(ruleConfig.getConfigItemValue(RuleConfigConstants.CATEGORY_LIST), ",");
   categories = StringToIntegerList.convert((List<String>) strTokCategories.getTokenList());
   StrTokenizer strTokClients =
       new StrTokenizer(ruleConfig.getConfigItemValue(RuleConfigConstants.CLIENT_LIST), ",");
   client_list = StringToIntegerList.convert((List<String>) strTokClients.getTokenList());
   log.info(
       "minOrderValue : " + minOrderValue.toString() + ", fixedRsOff : " + fixedRsOff.toString());
 }
 /**
  * Converts a String back to connection parameters.
  *
  * @param input String from configuration
  * @return JDBC connection parameters
  */
 protected static Properties propertiesFromString(String input) {
   if (input != null && !input.isEmpty()) {
     Properties result = new Properties();
     StrTokenizer propertyTokenizer = StrTokenizer.getCSVInstance(input);
     StrTokenizer valueTokenizer = StrTokenizer.getCSVInstance();
     valueTokenizer.setDelimiterChar('=');
     while (propertyTokenizer.hasNext()) {
       valueTokenizer.reset(propertyTokenizer.nextToken());
       String[] values = valueTokenizer.getTokenArray();
       if (values.length == 2) {
         result.put(values[0], values[1]);
       }
     }
     return result;
   } else {
     return null;
   }
 }
  protected static <T> void parseEDISegmentFields(
      EDIMessage ediMessage, Object segment, String segmentLine)
      throws IllegalAccessException, InvocationTargetException, ClassNotFoundException,
          ConversionException, InstantiationException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Before Field Values: " + ReflectionToStringBuilder.toString(segment));
      LOG.debug("Segment Values: " + segmentLine);
    }

    // now, tokenize the line, and set the fields.
    StrTokenizer tokenizer = new StrTokenizer(segmentLine, ediMessage.elementDelimiter());

    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    // move past the initial tag.
    tokenizer.next();

    Iterator<Field> fieldIterator =
        Arrays.asList(segment.getClass().getDeclaredFields()).iterator();
    while (tokenizer.hasNext() && fieldIterator.hasNext()) {
      Field field = fieldIterator.next();
      String val = tokenizer.nextToken();

      // check field to see if it is a component of regular field type.
      if (field.isAnnotationPresent(EDIComponent.class)) {
        EDIComponent ediComponent = field.getAnnotation(EDIComponent.class);
        Collection obj = CollectionFactory.newInstance(field.getType());
        Class objType = getCollectionType(field);

        char componentDelimiter =
            ediComponent.delimiter() == Character.UNASSIGNED
                ? ediMessage.componentDelimiter()
                : ediComponent.delimiter();

        // parse each element to the collection.
        StrTokenizer componentTokenizer = new StrTokenizer(val, componentDelimiter);
        componentTokenizer.setEmptyTokenAsNull(true);
        componentTokenizer.setIgnoreEmptyTokens(false);

        while (componentTokenizer.hasNext()) {
          String component = componentTokenizer.nextToken();
          Object fieldObj =
              objType.cast(FieldAwareConverter.convertFromString(objType, field, component));
          obj.add(fieldObj);
        }
        BeanUtils.setProperty(segment, field.getName(), obj);
      } else {
        if (val == null) {
          LOG.debug("  " + field.getName() + " -> null");
          continue;
        }

        // try and populate the field.

        try {
          Object fieldObj = FieldAwareConverter.convertFromString(field.getType(), field, val);
          LOG.debug("  " + field.getName() + " -> " + val);

          BeanUtils.setProperty(segment, field.getName(), fieldObj);
        } catch (Exception e) {
          throw new ConversionException(
              "Exception setting: "
                  + segment.getClass()
                  + "."
                  + field.getName()
                  + " with value: "
                  + val,
              e);
        }
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("After Field Values: " + ReflectionToStringBuilder.toString(segment));
    }
  }
  protected static <T> void processSegmentGroup(
      EDIMessage ediMessage,
      T object,
      Queue<String> lookAhead,
      SegmentIterator segmentIterator,
      FieldMatch fm)
      throws InstantiationException, IllegalAccessException, InvocationTargetException,
          ClassNotFoundException, ConversionException, EDIMessageException {

    LOG.debug("Object: " + ReflectionToStringBuilder.toString(object));
    LOG.debug("Field: " + fm.getField().getName());

    Class<?> segmentGroupClass = getEDISegmentOrGroupType(fm.getField());
    if (!segmentGroupClass.isAnnotationPresent(EDISegmentGroup.class)) {
      throw new EDIMessageException("Segment Group should have annotation.");
    }

    LOG.debug("Segment Group Type: " + segmentGroupClass);

    String line = fm.getLine();
    EDISegmentGroup es = segmentGroupClass.getAnnotation(EDISegmentGroup.class);

    if (StringUtils.equals(es.header(), line)) {
      // feed line.
    } else {
      LOG.debug("Adding to Look Ahead: " + line);
      lookAhead.add(line);
    }

    if (Collection.class.isAssignableFrom(fm.getField().getType())) {
      Collection obj = CollectionFactory.newInstance(fm.getField().getType());
      BeanUtils.setProperty(object, fm.getField().getName(), obj);

      String segmentTag = getSegmentTag(fm.getField(), true);

      while (true) {
        LOG.debug("Looping to collect Collection of Segment Groups");
        // parse the group...
        Field[] fields = segmentGroupClass.getDeclaredFields();
        final List<Field> fieldsList = Arrays.asList(fields);
        ListIterator<Field> fieldIterator = fieldsList.listIterator(0);

        Object collectionObj = segmentGroupClass.newInstance();
        while (fieldIterator.hasNext() && (segmentIterator.hasNext() || lookAhead.size() > 0)) {
          if (startOfNewRecursiveObject(fieldIterator, fieldsList, segmentGroupClass)) {
            String next = lookAhead.size() > 0 ? lookAhead.remove() : segmentIterator.next();
            lookAhead.add(next);
            if (!isSegmentHeirarchicalLevelAndDeeperLevel(
                line, next, Character.toString(ediMessage.elementDelimiter()))) {
              LOG.debug("Reaching new instance of list.");
              break;
            }
          }
          parseEDISegmentOrSegmentGroup(
              ediMessage, collectionObj, fieldIterator, lookAhead, segmentIterator);
        }

        obj.add(collectionObj);

        // look to next line...
        String nextLine = lookAhead.size() > 0 ? lookAhead.remove() : segmentIterator.next();
        // get the first element of the line.
        StrTokenizer nextLineTokenizer = new StrTokenizer(nextLine, ediMessage.elementDelimiter());

        if (StringUtils.equals(segmentTag, nextLineTokenizer.nextToken())) {
          if (!isSegmentEqual(line, nextLine, Character.toString(ediMessage.elementDelimiter()))) {
            LOG.debug("Reaching new collection");
            lookAhead.add(nextLine);
            break;
          }
          LOG.debug("Might be a repeat..");
          LOG.debug("Next line: " + line);
          lookAhead.add(nextLine);
        } else {
          lookAhead.add(nextLine);
          break;
        }

        // now, look ahead to see whether the next line is of the same
        // object type..
        if (!segmentIterator.hasNext() && lookAhead.size() == 0) {
          break;
        }
      }

    } else {
      Field[] fields = segmentGroupClass.getDeclaredFields();
      Iterator<Field> fieldIterator = Arrays.asList(fields).iterator();

      Object obj = segmentGroupClass.newInstance();
      while (fieldIterator.hasNext() && (segmentIterator.hasNext() || lookAhead.size() > 0)) {
        parseEDISegmentOrSegmentGroup(ediMessage, obj, fieldIterator, lookAhead, segmentIterator);
      }

      BeanUtils.setProperty(object, fm.getField().getName(), obj);
    }

    // look at next...
    if (StringUtils.isNotBlank(es.header())) {
      line = lookAhead.size() > 0 ? lookAhead.remove() : segmentIterator.next();

      if (StringUtils.endsWith(es.footer(), line)) {
        // feed line.
        LOG.debug("Popping footer off of the line iterator.");
      } else {
        lookAhead.add(line);
      }
    }
  }
  static List<double[]> readDataBodyAsDouble(
      BufferedReader br, int[] remappedColumns, int mappedColumnCount)
      throws NumberFormatException, IOException {
    List<double[]> rows = new ArrayList<double[]>(500);
    int colCount = 0; // Number of columns (minus one) - must match in each row
    int curRow = 0; // Current row number - 0 based;
    String s = null; // Single line from file

    while ((s = br.readLine()) != null) {
      StrTokenizer strTk = new StrTokenizer(s);
      strTk.setIgnoreEmptyTokens(false);
      strTk.setDelimiterChar('\t');
      String src[] = strTk.getTokenArray();
      // String src[] = s.split("\t");

      if (isNonEmptyLine(src)) {

        if (colCount == 0) {
          colCount = src.length; // initialize column count
          if (remappedColumns == null) {
            // assign default mapping now
            remappedColumns = new int[colCount];
            for (int i = 0; i < colCount; i++) {
              remappedColumns[i] = i;
            }

            mappedColumnCount = colCount;
          }
        } else {
          if (src.length != colCount) {
            for (int i = 0; i < src.length; i++) {
              System.out.println(i + ": " + src[i]);
            }

            throw new IllegalStateException(
                "Parse Error: Row "
                    + curRow
                    + " has "
                    + src.length
                    + " columns, previous columns had "
                    + colCount
                    + " columns.");
          }
        }

        double[] row = new double[mappedColumnCount];

        for (int i = 0; i < src.length; i++) {
          if (remappedColumns[i] != -1) {
            if (src[i].length() > 0) {

              // Simple hack to ignore text columns in the predict.txt file

              try {
                row[remappedColumns[i]] = Double.parseDouble(src[i]);
              } catch (Exception e) {
                row[remappedColumns[i]] = Double.NaN;
              }
            } else {
              row[remappedColumns[i]] = 0;
            }
          }
        }

        rows.add(row);
      } else {
        // ignore empty lines
      }

      curRow++;
    }

    log.debug("Found " + curRow + " rows in predict.txt file.");
    return rows;
  }