/**
   * This is a test to see if the GATE document has a valid URL or a valid content.
   *
   * @param doc
   * @throws DocumentFormatException
   */
  protected static boolean hasContentButNoValidUrl(Document doc) throws DocumentFormatException {
    try {
      if (doc.getSourceUrl() == null && doc.getContent() != null) {
        // The doc's url is null but there is a content.
        return true;
      } else {
        doc.getSourceUrl().openConnection();
      }
    } catch (IOException ex1) {
      // The URL is not null but is not valid.
      if (doc.getContent() == null)
        // The document content is also null. There is nothing we can do.
        throw new DocumentFormatException(
            "The document doesn't have a" + " valid URL and also no content");
      return true;
    } // End try

    return false;
  }
Exemplo n.º 2
0
  /**
   * Checks two documents for equality.
   *
   * @param doc1 a document
   * @param doc2 another document
   * @return a boolean.
   */
  public static boolean documentsEqual(Document doc1, Document doc2) {
    message = "";
    if (doc1 == null ^ doc2 == null) {
      message = "Documents not equal: null<>non-null!";
      return false;
    }
    if (doc1 == null) return true;
    if (!check(doc1.getContent(), doc2.getContent())) {
      message = "Document contents different!";
      return false;
    }

    if (!check(doc1.getAnnotations(), doc2.getAnnotations())) {
      message = "Documents default AS not equal!";
      return false;
    }

    if (doc1 instanceof TextualDocument) {
      if (doc2 instanceof TextualDocument) {
        if (!check(
            ((TextualDocument) doc1).getEncoding(), ((TextualDocument) doc2).getEncoding())) {
          message = "Textual documents with different encodings!";
          return false;
        }
      } else {
        message = "Documents not equal: textual<>non-textual!";
        return false;
      }
    }
    if (!check(doc1.getFeatures(), doc2.getFeatures())) {
      message = "Documents features not equal!";
      return false;
    }

    // needs friend declaration :(
    //    if(!markupAware.equals(doc.markupAware)) return false;

    if (!check(doc1.getNamedAnnotationSets(), doc2.getNamedAnnotationSets())) {
      message = "Documents named annots not equal!";
      return false;
    }

    //    if(doc1 instanceof DocumentImpl){
    //      if(doc2 instanceof DocumentImpl){
    //        if(! check(((DocumentImpl)doc1).getNextNodeId(),
    //                   ((DocumentImpl)doc2).getNextNodeId())){
    //          message = "Documents next nodeID not equal!";
    //          return false;
    //        }
    //        if(! check(((DocumentImpl)doc1).getNextAnnotationId(),
    //                   ((DocumentImpl)doc2).getNextAnnotationId())){
    //          message = "Documents next annotationIDs not equal!";
    //          return false;
    //        }
    //      }else{
    //        message = "Documents not equal: DocumentImpl<>non-DocumentImpl!";
    //        return false;
    //      }
    //    }

    if (!check(doc1.getSourceUrl(), doc2.getSourceUrl())) {
      message = "Documents sourceURLs not equal!";
      return false;
    }
    if (!(check(doc1.getSourceUrlStartOffset(), doc2.getSourceUrlStartOffset())
        && check(doc1.getSourceUrlEndOffset(), doc2.getSourceUrlEndOffset()))) {
      message = "Documents sourceURLOffsets not equal!";
      return false;
    }
    return true;
  }