コード例 #1
0
  /**
   * Looks for an element with the given tag name in the Tree data being parsed, returning the path
   * hierarchy to reach it.
   *
   * @param parser
   * @param tag The element name (can be qualified) to search for
   * @return If the tag is found, an array of strings is returned. If the tag is at the top level,
   *     the tag will be the only item in the array. If the tag is nested beneath the top level, the
   *     array is filled with the hierarchy with the tag name at the last index null if the the tag
   *     is not found.
   * @throws ServletException
   */
  protected static List<String> detectRecordElement(TreeReader parser, String tag)
      throws TreeReaderException {
    if (parser.current() == Token.Ignorable) {
      parser.next();
    }

    String localName = parser.getFieldName();
    String fullName = composeName(parser.getPrefix(), localName);
    if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
      List<String> path = new LinkedList<String>();
      path.add(localName);

      return path;
    }

    while (parser.hasNext()) {
      Token eventType = parser.next();
      if (eventType == Token.EndEntity) { // XMLStreamConstants.END_ELEMENT) {
        break;
      } else if (eventType == Token.StartEntity) { // XMLStreamConstants.START_ELEMENT) {
        List<String> path = detectRecordElement(parser, tag);
        if (path != null) {
          path.add(0, localName);
          return path;
        }
      }
    }
    return null;
  }
コード例 #2
0
  /**
   * Seeks for recurring element in a parsed document which are likely candidates for being data
   * records
   *
   * @param parser The parser loaded with tree data
   * @return The path to the most numerous of the possible candidates. null if no candidates were
   *     found (less than 6 recurrences)
   */
  public static String[] detectRecordElement(TreeReader parser) {
    logger.trace("detectRecordElement(inputStream)");
    List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();

    try {
      while (parser.hasNext()) {
        Token eventType = parser.next();
        if (eventType == Token.StartEntity) {
          RecordElementCandidate candidate =
              detectRecordElement(parser, new String[] {parser.getFieldName()});

          if (candidate != null) {
            candidates.add(candidate);
          }
        }
      }
    } catch (TreeReaderException e) {
      // silent
      e.printStackTrace();
    }

    if (candidates.size() > 0) {
      sortRecordElementCandidates(candidates);

      return candidates.get(0).path;
    }
    logger.info(
        "No candidate elements were found in data - at least 6 similar elements are required");
    return null;
  }
コード例 #3
0
 protected static void skip(TreeReader parser) throws TreeReaderException {
   while (parser.hasNext()) {
     Token eventType = parser.next();
     if (eventType == Token.StartEntity) { // XMLStreamConstants.START_ELEMENT) {
       skip(parser);
     } else if (eventType == Token.EndEntity) { // XMLStreamConstants.END_ELEMENT) {
       return;
     }
   }
 }
コード例 #4
0
  /**
   * @param project
   * @param parser
   * @param recordPath
   * @param pathIndex
   * @param rootColumnGroup
   * @throws ServletException
   */
  protected static void findRecord(
      Project project,
      TreeReader parser,
      String[] recordPath,
      int pathIndex,
      ImportColumnGroup rootColumnGroup,
      int limit)
      throws TreeReaderException {
    logger.trace(
        "findRecord(Project, TreeReader, String[], int, ImportColumnGroup - path:"
            + Arrays.toString(recordPath));

    if (parser.current() == Token.Ignorable) { // XMLStreamConstants.START_DOCUMENT){
      logger.warn("Cannot use findRecord method for START_DOCUMENT event");
      return;
    }

    String recordPathSegment = recordPath[pathIndex];

    String localName = parser.getFieldName();
    String fullName = composeName(parser.getPrefix(), localName);
    if (recordPathSegment.equals(localName) || recordPathSegment.equals(fullName)) {
      if (pathIndex < recordPath.length - 1) {
        while (parser.hasNext() && limit != 0) {
          Token eventType = parser.next();
          if (eventType == Token.StartEntity) {
            findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit--);
          } else if (eventType == Token.EndEntity) {
            break;
          } else if (eventType == Token.Value) {
            // This is when the user picks a specific field to import, not a whole object or
            // element.
            if (pathIndex == recordPath.length - 2) {
              String desiredFieldName = recordPath[pathIndex + 1];
              String currentFieldName = parser.getFieldName();
              if (desiredFieldName.equals(currentFieldName)) {
                processFieldAsRecord(project, parser, rootColumnGroup);
              }
            }
          }
        }
      } else {
        processRecord(project, parser, rootColumnGroup);
      }
    } else {
      skip(parser);
    }
  }
コード例 #5
0
  public static String[] detectPathFromTag(TreeReader parser, String tag)
      throws TreeReaderException {
    while (parser.hasNext()) {
      Token eventType = parser.next();
      if (eventType == Token.StartEntity) { // XMLStreamConstants.START_ELEMENT) {
        List<String> path = detectRecordElement(parser, tag);
        if (path != null) {
          String[] path2 = new String[path.size()];

          path.toArray(path2);

          return path2;
        }
      }
    }

    return null;
  }
コード例 #6
0
 public static void importTreeData(
     TreeReader parser,
     Project project,
     String[] recordPath,
     ImportColumnGroup rootColumnGroup,
     int limit) {
   logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
   try {
     while (parser.hasNext()) {
       Token eventType = parser.next();
       if (eventType == Token.StartEntity) {
         findRecord(project, parser, recordPath, 0, rootColumnGroup, limit--);
       }
     }
   } catch (TreeReaderException e) {
     // TODO: This error needs to be reported to the browser/user
     logger.error("Exception from XML parse", e);
   }
 }
コード例 #7
0
  /**
   * @param project
   * @param parser
   * @param columnGroup
   * @param record
   * @throws ServletException
   */
  protected static void processSubRecord(
      Project project,
      TreeReader parser,
      ImportColumnGroup columnGroup,
      ImportRecord record,
      int level)
      throws TreeReaderException {
    logger.trace(
        "processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord) lvl:"
            + level
            + " "
            + columnGroup);

    if (parser.current() == Token.Ignorable) {
      return;
    }

    ImportColumnGroup thisColumnGroup =
        getColumnGroup(
            project, columnGroup, composeName(parser.getPrefix(), parser.getFieldName()));

    thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);

    int attributeCount = parser.getAttributeCount();
    for (int i = 0; i < attributeCount; i++) {
      String text = parser.getAttributeValue(i).trim();
      if (text.length() > 0) {
        addCell(
            project,
            thisColumnGroup,
            record,
            composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)),
            text);
      }
    }

    while (parser.hasNext()) {
      Token eventType = parser.next();
      if (eventType == Token.StartEntity) {
        processSubRecord(project, parser, thisColumnGroup, record, level + 1);
      } else if ( // eventType == XMLStreamConstants.CDATA ||
      eventType == Token.Value) { // XMLStreamConstants.CHARACTERS) {
        String text = parser.getFieldValue();
        String colName = parser.getFieldName();
        if (text != null) {
          text = text.trim();
          if (text.length() > 0) {
            addCell(project, thisColumnGroup, record, colName, text);
          }
        }
      } else if (eventType == Token.EndEntity) {
        break;
      } else if (eventType == Token.Ignorable) {
        continue;
      } else {
        logger.info("unknown event type " + eventType);
      }
    }

    int nextRowIndex = thisColumnGroup.nextRowIndex;
    for (ImportColumn column2 : thisColumnGroup.columns.values()) {
      nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex);
    }
    for (ImportColumnGroup columnGroup2 : thisColumnGroup.subgroups.values()) {
      nextRowIndex = Math.max(nextRowIndex, columnGroup2.nextRowIndex);
    }
    thisColumnGroup.nextRowIndex = nextRowIndex;
  }
コード例 #8
0
  protected static RecordElementCandidate detectRecordElement(TreeReader parser, String[] path) {
    logger.trace("detectRecordElement(TreeReader, String[])");
    List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();

    Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();

    try {
      while (parser.hasNext()) {
        Token eventType = parser.next();
        if (eventType == Token.EndEntity) {
          break;
        } else if (eventType == Token.StartEntity) {
          String tagName = parser.getFieldName();

          immediateChildCandidateMap.put(
              tagName,
              immediateChildCandidateMap.containsKey(tagName)
                  ? immediateChildCandidateMap.get(tagName) + 1
                  : 1);

          String[] path2 = new String[path.length + 1];
          System.arraycopy(path, 0, path2, 0, path.length);
          path2[path.length] = tagName;

          RecordElementCandidate c = detectRecordElement(parser, path2);
          if (c != null) {
            descendantCandidates.add(c);
          }
        }
      }
    } catch (TreeReaderException e) {
      // silent
      e.printStackTrace();
    }

    if (immediateChildCandidateMap.size() > 0) {
      List<RecordElementCandidate> immediateChildCandidates =
          new ArrayList<RecordElementCandidate>(immediateChildCandidateMap.size());
      for (Entry<String, Integer> entry : immediateChildCandidateMap.entrySet()) {
        int count = entry.getValue();
        if (count > 1) {
          String[] path2 = new String[path.length + 1];
          System.arraycopy(path, 0, path2, 0, path.length);
          path2[path.length] = entry.getKey();

          RecordElementCandidate candidate = new RecordElementCandidate();
          candidate.path = path2;
          candidate.count = count;
          immediateChildCandidates.add(candidate);
        }
      }

      if (immediateChildCandidates.size() > 0 && immediateChildCandidates.size() < 5) {
        // There are some promising immediate child elements, but not many,
        // that can serve as record elements.

        sortRecordElementCandidates(immediateChildCandidates);

        RecordElementCandidate ourCandidate = immediateChildCandidates.get(0);
        logger.trace(
            "ourCandidate.count : "
                + ourCandidate.count
                + "; immediateChildCandidates.size() : "
                + immediateChildCandidates.size());
        if (ourCandidate.count / immediateChildCandidates.size() > 5) {
          return ourCandidate;
        }

        descendantCandidates.add(ourCandidate);
      }
    }

    if (descendantCandidates.size() > 0) {
      sortRecordElementCandidates(descendantCandidates);

      RecordElementCandidate candidate = descendantCandidates.get(0);
      if (candidate.count / descendantCandidates.size() > 5) {
        return candidate;
      }
    }

    return null;
  }