コード例 #1
0
  /*
   * Test the functionality of the MetadataUtilities
   *
   */
  public void testNormalizeTitleValue() throws Exception {

    assertEquals(
        BaseAtyponMetadataUtil.normalizeTitle("The title goes here"),
        BaseAtyponMetadataUtil.normalizeTitle("Title Goes Here"));
    assertEquals(
        BaseAtyponMetadataUtil.normalizeTitle("Title    with     random spaces"),
        BaseAtyponMetadataUtil.normalizeTitle("Title with random spaces"));
    assertEquals(
        BaseAtyponMetadataUtil.normalizeTitle("Normalize -- hyphen"),
        BaseAtyponMetadataUtil.normalizeTitle("normalize \u2013\u2013 hyphen"));
    assertEquals(
        BaseAtyponMetadataUtil.normalizeTitle("Title and title"),
        BaseAtyponMetadataUtil.normalizeTitle("Title & title"));
    assertEquals(
        BaseAtyponMetadataUtil.normalizeTitle("   leading spaces"),
        BaseAtyponMetadataUtil.normalizeTitle("leading spaces"));

    // now checking the fall-back last ditch attempt
    assertEquals(
        BaseAtyponMetadataUtil.generateRawTitle("leading spaces:colon?"),
        BaseAtyponMetadataUtil.generateRawTitle("leadingspacescolon"));
    assertEquals(
        BaseAtyponMetadataUtil.generateRawTitle("relapsing-remitting"),
        BaseAtyponMetadataUtil.generateRawTitle("relapsing?remitting"));
    assertEquals(
        BaseAtyponMetadataUtil.generateRawTitle("foo\"blah"),
        BaseAtyponMetadataUtil.generateRawTitle("foo-blah"));
  }
コード例 #2
0
    // override this to do some additional attempts to get valid data before emitting
    @Override
    public void extract(MetadataTarget target, CachedUrl cu, FileMetadataExtractor.Emitter emitter)
        throws IOException, PluginException {
      ArticleMetadata am = extract(target, cu);

      /*
       * if, due to overcrawl, we got to a page that didn't have anything
       * valid, eg "this page not found" html page
       * don't emit empty metadata (because defaults would get put in
       * Must do this after cooking, because it checks size of cooked info
       */
      if (am.isEmpty()) {
        return;
      }

      /*
       * RIS data can be variable.  We don't have any way to add priority to
       * the cooking of data, so fallback to alternate values manually
       *
       * There are differences between books and journals, so fork for titles
       * and metadata check
       */
      if (am.get(MetadataField.FIELD_DATE) == null) {
        if (am.getRaw("Y1") != null) { // if DA wasn't there, use Y1
          am.put(MetadataField.FIELD_DATE, am.getRaw("Y1"));
        }
      }

      /*
       * Determine if this is a book item or a journal item.
       * set the appropriate article type once the daemon passes along the TY
       */
      String ris_type = am.getRaw("TY");
      if (ris_type == null) {
        // pre 1.69, do an alternate check because TY wasn't passed through
        ris_type = "JOUR"; // set a default
        if (am.get(MetadataField.FIELD_ISBN) != null) {
          // it is a bad value, but it was recognized as an isbn because of TY type
          ris_type = "BOOK"; // it could be a chapter but until TY is passed through...
        }
      }

      // Modify or try alternate RIS tag values based after cooking
      postCookProcess(cu, am, ris_type);

      // Only emit if this item is likely to be from this AU
      // protect against counting overcrawled articles by checking against
      // values from the TDB file - differentiate between book items and journal itesm
      ArchivalUnit au = cu.getArchivalUnit();
      if (ris_type.contains("BOOK") || ris_type.contains("CHAP")) {
        if (!BaseAtyponMetadataUtil.metadataMatchesBookTdb(au, am)) {
          return;
        }
      } else {
        // JOURNAL default is to assume it's a journal for backwards compatibility
        if (!BaseAtyponMetadataUtil.metadataMatchesTdb(au, am)) {
          return;
        }
      }

      /*
       * Fill in DOI, publisher, other information available from
       * the URL or TDB
       * CORRECT the access.url if it is not in the AU
       */
      BaseAtyponMetadataUtil.completeMetadata(cu, am);
      emitter.emitMetadata(cu, am);
    }