예제 #1
0
 /**
  * Unmarshal the Items from the input XML file, and write them to the writer.
  *
  * @param in a Reader containing the XML text input
  * @throws Exception if an error occurs
  */
 public void process(Reader in) throws Exception {
   start = System.currentTimeMillis();
   time = start;
   times = new long[20];
   for (int i = 0; i < 20; i++) {
     times[i] = -1;
   }
   SAXParser.parse(new InputSource(in), new FullDataXmlHandler());
 }
예제 #2
0
 /**
  * Parse TemplateQuerys from XML
  *
  * @param reader the saved templates
  * @param savedBags Map from bag name to bag
  * @param version the version of the XML, an attribute on the profile manager
  * @return a Map from template name to TemplateQuery
  */
 public static Map<String, SavedQuery> unmarshal(Reader reader, Map savedBags, int version) {
   Map<String, SavedQuery> queries = new LinkedHashMap<String, SavedQuery>();
   try {
     SAXParser.parse(new InputSource(reader), new SavedQueryHandler(queries, savedBags, version));
   } catch (Exception e) {
     e.printStackTrace();
     throw new RuntimeException(e);
   }
   return queries;
 }
예제 #3
0
 /**
  * Parse Tags from XML and write them to the userprofile object store
  *
  * @param pm a ProfileManager used to get UserProfile objects for a given username
  * @param userName the user whose tags are being unmarshalled
  * @param reader the saved Tags
  * @return number of new tags created
  */
 public int unmarshal(ProfileManager pm, String userName, Reader reader) {
   TagHandler handler = new TagHandler(pm, userName);
   try {
     SAXParser.parse(new InputSource(reader), handler);
   } catch (Exception e) {
     e.printStackTrace();
     throw new RuntimeException(e);
   }
   return handler.getCount();
 }
예제 #4
0
 /**
  * Read a ProfileManager from an XML stream Reader
  *
  * @param reader contains the ProfileManager XML
  * @param profileManager the ProfileManager to store the unmarshalled Profiles to
  * @param osw ObjectStoreWriter used to resolve object ids and write bags correspond to object in
  *     old bags.
  * @param abortOnError if true, throw an exception if there is a problem. If false, log the
  *     problem and continue if possible (used by read-userprofile-xml).
  */
 public static void unmarshal(
     Reader reader, ProfileManager profileManager, ObjectStoreWriter osw, boolean abortOnError) {
   try {
     ProfileManagerHandler profileManagerHandler =
         new ProfileManagerHandler(profileManager, osw, abortOnError);
     SAXParser.parse(new InputSource(reader), profileManagerHandler);
   } catch (Exception e) {
     e.printStackTrace();
     throw new RuntimeException(e);
   }
 }
  /**
   * Synchronize publications with pubmed using pmid
   *
   * @throws Exception if an error occurs
   */
  public void execute() throws Exception {
    // Needed so that STAX can find it's implementation classes
    ClassLoader cl = Thread.currentThread().getContextClassLoader();

    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    Database db = null;
    Transaction txn = null;
    try {
      if (osAlias == null) {
        throw new BuildException("osAlias attribute is not set");
      }
      if (outputFile == null) {
        throw new BuildException("outputFile attribute is not set");
      }

      // environment is transactional
      EnvironmentConfig envConfig = new EnvironmentConfig();
      envConfig.setTransactional(true);
      envConfig.setAllowCreate(true);

      Environment env = new Environment(new File(cacheDirName), envConfig);

      DatabaseConfig dbConfig = new DatabaseConfig();
      dbConfig.setTransactional(true);
      dbConfig.setAllowCreate(true);
      dbConfig.setSortedDuplicates(true);

      db = env.openDatabase(null, "publications_db", dbConfig);

      txn = env.beginTransaction(null, null);

      LOG.info("Starting EntrezPublicationsRetriever");

      Writer writer = new FileWriter(outputFile);
      ObjectStore os = ObjectStoreFactory.getObjectStore(osAlias);

      Set<Integer> idsToFetch = new HashSet<Integer>();
      itemFactory = new ItemFactory(os.getModel(), "-1_");
      writer.write(FullRenderer.getHeader() + ENDL);
      for (Iterator<Publication> iter = getPublications(os).iterator(); iter.hasNext(); ) {
        String pubMedId = iter.next().getPubMedId();
        Integer pubMedIdInteger;
        try {
          pubMedIdInteger = Integer.valueOf(pubMedId);
        } catch (NumberFormatException e) {
          // not a pubmed id
          continue;
        }

        if (seenPubMeds.contains(pubMedIdInteger)) {
          continue;
        }
        DatabaseEntry key = new DatabaseEntry(pubMedId.getBytes());
        DatabaseEntry data = new DatabaseEntry();
        if (db.get(txn, key, data, null).equals(OperationStatus.SUCCESS)) {
          try {
            ByteArrayInputStream mapInputStream = new ByteArrayInputStream(data.getData());
            ObjectInputStream deserializer = new ObjectInputStream(mapInputStream);
            Map<String, Object> pubMap = (Map) deserializer.readObject();
            writeItems(writer, mapToItems(itemFactory, pubMap));
            seenPubMeds.add(pubMedIdInteger);
          } catch (EOFException e) {
            // ignore and fetch it again
            System.err.println(
                "found in cache, but igored due to cache problem: " + pubMedIdInteger);
          }
        } else {
          idsToFetch.add(pubMedIdInteger);
        }
      }

      Iterator<Integer> idIter = idsToFetch.iterator();
      Set<Integer> thisBatch = new HashSet<Integer>();
      while (idIter.hasNext()) {
        Integer pubMedIdInteger = idIter.next();
        thisBatch.add(pubMedIdInteger);
        if (thisBatch.size() == BATCH_SIZE || !idIter.hasNext() && thisBatch.size() > 0) {
          try {
            // the server may return less publications than we ask for, so keep a Map
            Map<String, Map<String, Object>> fromServerMap = null;

            for (int i = 0; i < MAX_TRIES; i++) {
              BufferedReader br = new BufferedReader(getReader(thisBatch));
              StringBuffer buf = new StringBuffer();
              String line;
              while ((line = br.readLine()) != null) {
                buf.append(line + "\n");
              }
              fromServerMap = new HashMap<String, Map<String, Object>>();
              Throwable throwable = null;
              try {
                if (loadFullRecord) {
                  SAXParser.parse(
                      new InputSource(new StringReader(buf.toString())),
                      new FullRecordHandler(fromServerMap),
                      false);
                } else {
                  SAXParser.parse(
                      new InputSource(new StringReader(buf.toString())),
                      new SummaryRecordHandler(fromServerMap),
                      false);
                }
              } catch (Throwable e) {
                LOG.error("Couldn't parse PubMed XML", e);
                // try again or re-throw the Throwable
                throwable = e;
              }
              if (i == MAX_TRIES) {
                throw new RuntimeException(
                    "failed to parse: " + buf.toString() + " - tried " + MAX_TRIES + " times",
                    throwable);
              } else {
                if (throwable != null) {
                  // try again
                  continue;
                }
              }

              for (String id : fromServerMap.keySet()) {
                writeItems(writer, mapToItems(itemFactory, fromServerMap.get(id)));
              }
              addToDb(txn, db, fromServerMap);
              break;
            }
            thisBatch.clear();
          } finally {
            txn.commit();
            // start a new transaction incase there is an exception while parsing
            txn = env.beginTransaction(null, null);
          }
        }
      }
      writeItems(writer, authorMap.values());
      writeItems(writer, meshTerms.values());
      writer.write(FullRenderer.getFooter() + ENDL);
      writer.flush();
      writer.close();
    } catch (Throwable e) {
      throw new RuntimeException("failed to get all publications", e);
    } finally {
      txn.commit();
      db.close();
      Thread.currentThread().setContextClassLoader(cl);
    }
  }