Ejemplo n.º 1
0
  /** TODO */
  private OrgDisambiguatedEntity findByDetails(RDFOrganization org) {
    Iso3166Country country =
        StringUtils.isBlank(org.country) ? null : Iso3166Country.valueOf(org.country);
    // Find the org by name, city, country and state
    OrgDisambiguatedEntity existingEntity =
        orgDisambiguatedDao.findByNameCityRegionCountryAndSourceType(
            org.name, org.stateCode, org.stateCode, country, FUNDREF_SOURCE_TYPE);
    // If no match is found, try with the doi and source type
    if (existingEntity == null) {
      existingEntity =
          orgDisambiguatedDao.findBySourceIdAndSourceType(org.doi, FUNDREF_SOURCE_TYPE);
    }

    return existingEntity;
  }
Ejemplo n.º 2
0
 /** Creates a disambiguated ORG in the org_disambiguated table */
 private OrgDisambiguatedEntity createDisambiguatedOrg(RDFOrganization organization) {
   LOGGER.info("Creating disambiguated org {}", organization.name);
   String orgType =
       organization.type
           + (StringUtils.isEmpty(organization.subtype) ? "" : "/" + organization.subtype);
   Iso3166Country country =
       StringUtils.isNotBlank(organization.country)
           ? Iso3166Country.fromValue(organization.country)
           : null;
   OrgDisambiguatedEntity orgDisambiguatedEntity = new OrgDisambiguatedEntity();
   orgDisambiguatedEntity.setName(organization.name);
   orgDisambiguatedEntity.setCountry(country);
   orgDisambiguatedEntity.setCity(organization.city);
   orgDisambiguatedEntity.setRegion(organization.stateCode);
   orgDisambiguatedEntity.setOrgType(orgType);
   orgDisambiguatedEntity.setSourceId(organization.doi);
   orgDisambiguatedEntity.setSourceUrl(organization.doi);
   orgDisambiguatedEntity.setSourceType(FUNDREF_SOURCE_TYPE);
   orgDisambiguatedDao.persist(orgDisambiguatedEntity);
   return orgDisambiguatedEntity;
 }
Ejemplo n.º 3
0
  /** Executes the import process */
  private void execute() {
    try {
      long start = System.currentTimeMillis();
      FileInputStream file = new FileInputStream(fileToLoad);
      DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
      DocumentBuilder builder = builderFactory.newDocumentBuilder();
      Document xmlDocument = builder.parse(file);
      // Parent node
      NodeList nodeList =
          (NodeList)
              xPath.compile(conceptsExpression).evaluate(xmlDocument, XPathConstants.NODESET);
      for (int i = 0; i < nodeList.getLength(); i++) {
        RDFOrganization rdfOrganization =
            getOrganization(xmlDocument, nodeList.item(i).getAttributes());
        LOGGER.info(
            "Processing organization from RDF, doi:{}, name:{}, country:{}, state:{}, stateCode:{}, type:{}, subtype:{}",
            new String[] {
              rdfOrganization.doi,
              rdfOrganization.name,
              rdfOrganization.country,
              rdfOrganization.state,
              rdfOrganization.stateCode,
              rdfOrganization.type,
              rdfOrganization.subtype
            });
        // #1: Look for an existing org
        OrgDisambiguatedEntity existingEntity = findByDetails(rdfOrganization);
        if (existingEntity != null) {
          // #2: If the name, city or region changed, update those values
          if (entityChanged(rdfOrganization, existingEntity)) {
            existingEntity.setCity(rdfOrganization.city);
            Iso3166Country country =
                StringUtils.isNotBlank(rdfOrganization.country)
                    ? Iso3166Country.fromValue(rdfOrganization.country)
                    : null;
            existingEntity.setCountry(country);
            existingEntity.setName(rdfOrganization.name);
            String orgType =
                rdfOrganization.type
                    + (StringUtils.isNotBlank(rdfOrganization.subtype)
                        ? ('/' + rdfOrganization.subtype)
                        : "");
            existingEntity.setOrgType(orgType);
            existingEntity.setRegion(rdfOrganization.stateCode);
            existingEntity.setSourceId(rdfOrganization.doi);
            existingEntity.setSourceType(FUNDREF_SOURCE_TYPE);
            existingEntity.setSourceUrl(rdfOrganization.doi);
            orgDisambiguatedDao.merge(existingEntity);
            updatedOrgs += 1;
          } else if (idChanged(rdfOrganization, existingEntity)) {
            // #3: If the ID changed, create an external identifier
            createExternalIdentifier(existingEntity, rdfOrganization.doi);
            addedExternalIdentifiers += 1;
          }
        } else {
          // #4: Else, create the new org
          OrgDisambiguatedEntity newOrg = createDisambiguatedOrg(rdfOrganization);
          addedDisambiguatedOrgs += 1;
        }
      }

      long end = System.currentTimeMillis();
      LOGGER.info("Time taken to process the files: {}", (end - start));
    } catch (FileNotFoundException fne) {
      LOGGER.error("Unable to read file {}", fileToLoad);
    } catch (ParserConfigurationException pce) {
      LOGGER.error("Unable to initialize the DocumentBuilder");
    } catch (IOException ioe) {
      LOGGER.error("Unable to parse document {}", fileToLoad);
    } catch (SAXException se) {
      LOGGER.error("Unable to parse document {}", fileToLoad);
    } catch (XPathExpressionException xpe) {
      LOGGER.error("XPathExpressionException {}", xpe.getMessage());
    } finally {
      LOGGER.info(
          "Number new Disambiguated Orgs={}, Updated Orgs={}, new External Identifiers={}",
          new Object[] {addedDisambiguatedOrgs, updatedOrgs, addedExternalIdentifiers, getTotal()});
    }
  }