Esempio n. 1
0
  /**
   * @param desc
   * @return
   */
  private String extractContents(String desc) {
    Pattern p = Pattern.compile("((?:an?|the) (.*?)(?: of (.*))?)(?:,|\\s*located)");
    Pattern p2 = Pattern.compile("((incomplete .*?)(?: of (.*))?)(?:,|\\s*located)");

    String result = desc;
    Matcher mat = p.matcher(desc);
    Matcher mat2 = p2.matcher(desc);
    mat = mat.find() ? mat : mat2.find() ? mat2 : null;

    if (mat != null) {
      String matchedText = mat.group(1);
      int start = mat.start();
      int end =
          (matchedText.indexOf("located") >= 0)
              ? end = mat.start(1) + matchedText.indexOf("located")
              : mat.end(1);

      String contents = desc.substring(mat.start(1), end);
      result = desc.substring(0, start) + desc.substring(end);

      contents = StringUtils.trimToEmpty(contents);
      if (contents.lastIndexOf(",") == contents.length() - 1) {
        contents = contents.substring(0, contents.length() - 1);
      }
      ms.setContents(contents);
      LOGGER.debug("       Contents: " + contents);
    }

    return result;
  }
Esempio n. 2
0
  private void parseDescription() {
    String desc = this.description;
    ms.setDescription(desc);

    desc = desc.replaceFirst("-+see German footnotes?;?-*\\s*", "");
    if (StringUtils.isBlank(desc)) {
      return;
    } else if (desc.equals("paraphrase")) {
      return;
    }

    if (debug) System.out.println("Parsing: " + desc);

    desc = stripSeeAlso(desc);
    desc = extractDate(desc);
    desc = extractCodexName(desc);
    desc = extractContents(desc);
    desc = extractLocation(desc);
    desc = extractRuinedMs(desc);
    desc = extractClassification(desc);

    if (!debug && !desc.matches("\\W*")) {
      if (desc.matches("\\W*(from( the)?)? ?same.*")) {
        // TODO parse related MS info.
      } else {
        //				System.out.println("      Result: " + desc + "\t:: " + this.description );
        unparsedDateCt++;
      }
    }

    if (debug) System.out.println("         Result: " + desc);
    if (debug) System.out.println();
  }
Esempio n. 3
0
  /**
   * @param desc
   * @return
   */
  private String extractLocation(String desc) {
    Pattern locationPattern =
        Pattern.compile(
            "(?:, located, )?"
                + "lo?cate?d "
                + "(formerly|possibly)? ?"
                + "(?:at|on|in(?: the)?|with(?: the)?)? ?"
                + "((?:[^,]|(?:, [A-Z]{2}))*),? ?");

    String result = desc;
    Matcher mat = locationPattern.matcher(desc);
    while (mat.find()) {
      String location = mat.group(2);

      Institution inst = institutionRepo.findOrCreate(location);
      //			em.merge(inst);
      ms.setCurrentInstitution(inst);

      result = desc.substring(0, mat.start()) + desc.substring(mat.end());
      LOGGER.debug("       Location: " + location);
    }

    Pattern oldLocationPattern =
        Pattern.compile("formerly " + "(?:at|in(?: the)?|with(?: the)?) " + "(.*)?");

    mat = oldLocationPattern.matcher(result);
    while (mat.find()) {
      String institution = mat.group(1);
      Institution inst = institutionRepo.findOrCreate(institution);
      System.out.println(institution);
      //			em.merge(inst);
      ms.addPreviousInstitution(inst);

      result = result.substring(0, mat.start()) + result.substring(mat.end());
      LOGGER.debug("Former Location: " + institution);
    }

    return result;
  }
Esempio n. 4
0
  /**
   * @param desc
   * @return
   */
  public String extractDate(String desc) {
    String result = desc;
    Matcher mat = datePattern.matcher(desc);

    if (mat.find() && (mat.start() == 0)) {
      HistoricalDate date = new HistoricalDate(mat.group(1));
      em.persist(date);
      ms.setDate(date);

      result = desc.substring(mat.end());
      if (date.getText().matches(UNKNOWN_DATE_RE)) {
        return result;
      }

      String prefix = StringUtils.trimToNull(mat.group(2));
      String firstDate = StringUtils.trimToNull(mat.group(3));
      String separator = StringUtils.trimToNull(mat.group(4));
      String lastDate = StringUtils.trimToNull(mat.group(5));
      String suffix = StringUtils.trimToNull(mat.group(6));

      if (suffix != null) {
        if (suffix.matches("c\\.?")) {
          date.setPrecision(Precision.CENTURY);
        } else {
          // Doesn't seem to happen
        }
      }

      if (prefix != null) {
        if (prefix.equalsIgnoreCase("ca.")) {
          // mark date as approximate
          date.setCertainty(Certainty.APPROXIMATE);
        } else if (prefix.equalsIgnoreCase("possibly")) {
          // mark date as uncertain
          date.setCertainty(Certainty.POSSIBLE);
        } else {
          // doesen't occur
        }
      }

      setStartDate(firstDate, date);
      setEndDate(firstDate, lastDate, separator, date);
      LOGGER.debug("           Date: " + date.getText());
    } else {
      //			System.out.println(desc);
    }

    return result;
  }