/** * Unmarshal the Items from the input XML file, and write them to the writer. * * @param in a Reader containing the XML text input * @throws Exception if an error occurs */ public void process(Reader in) throws Exception { start = System.currentTimeMillis(); time = start; times = new long[20]; for (int i = 0; i < 20; i++) { times[i] = -1; } SAXParser.parse(new InputSource(in), new FullDataXmlHandler()); }
/** * Parse TemplateQuerys from XML * * @param reader the saved templates * @param savedBags Map from bag name to bag * @param version the version of the XML, an attribute on the profile manager * @return a Map from template name to TemplateQuery */ public static Map<String, SavedQuery> unmarshal(Reader reader, Map savedBags, int version) { Map<String, SavedQuery> queries = new LinkedHashMap<String, SavedQuery>(); try { SAXParser.parse(new InputSource(reader), new SavedQueryHandler(queries, savedBags, version)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } return queries; }
/** * Parse Tags from XML and write them to the userprofile object store * * @param pm a ProfileManager used to get UserProfile objects for a given username * @param userName the user whose tags are being unmarshalled * @param reader the saved Tags * @return number of new tags created */ public int unmarshal(ProfileManager pm, String userName, Reader reader) { TagHandler handler = new TagHandler(pm, userName); try { SAXParser.parse(new InputSource(reader), handler); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } return handler.getCount(); }
/** * Read a ProfileManager from an XML stream Reader * * @param reader contains the ProfileManager XML * @param profileManager the ProfileManager to store the unmarshalled Profiles to * @param osw ObjectStoreWriter used to resolve object ids and write bags correspond to object in * old bags. * @param abortOnError if true, throw an exception if there is a problem. If false, log the * problem and continue if possible (used by read-userprofile-xml). */ public static void unmarshal( Reader reader, ProfileManager profileManager, ObjectStoreWriter osw, boolean abortOnError) { try { ProfileManagerHandler profileManagerHandler = new ProfileManagerHandler(profileManager, osw, abortOnError); SAXParser.parse(new InputSource(reader), profileManagerHandler); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } }
/** * Synchronize publications with pubmed using pmid * * @throws Exception if an error occurs */ public void execute() throws Exception { // Needed so that STAX can find it's implementation classes ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); Database db = null; Transaction txn = null; try { if (osAlias == null) { throw new BuildException("osAlias attribute is not set"); } if (outputFile == null) { throw new BuildException("outputFile attribute is not set"); } // environment is transactional EnvironmentConfig envConfig = new EnvironmentConfig(); envConfig.setTransactional(true); envConfig.setAllowCreate(true); Environment env = new Environment(new File(cacheDirName), envConfig); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setTransactional(true); dbConfig.setAllowCreate(true); dbConfig.setSortedDuplicates(true); db = env.openDatabase(null, "publications_db", dbConfig); txn = env.beginTransaction(null, null); LOG.info("Starting EntrezPublicationsRetriever"); Writer writer = new FileWriter(outputFile); ObjectStore os = ObjectStoreFactory.getObjectStore(osAlias); Set<Integer> idsToFetch = new HashSet<Integer>(); itemFactory = new ItemFactory(os.getModel(), "-1_"); writer.write(FullRenderer.getHeader() + ENDL); for (Iterator<Publication> iter = getPublications(os).iterator(); iter.hasNext(); ) { String pubMedId = iter.next().getPubMedId(); Integer pubMedIdInteger; try { pubMedIdInteger = Integer.valueOf(pubMedId); } catch (NumberFormatException e) { // not a pubmed id continue; } if (seenPubMeds.contains(pubMedIdInteger)) { continue; } DatabaseEntry key = new DatabaseEntry(pubMedId.getBytes()); DatabaseEntry data = new DatabaseEntry(); if (db.get(txn, key, data, null).equals(OperationStatus.SUCCESS)) { try { ByteArrayInputStream mapInputStream = new ByteArrayInputStream(data.getData()); ObjectInputStream deserializer = new ObjectInputStream(mapInputStream); Map<String, Object> pubMap = (Map) deserializer.readObject(); writeItems(writer, mapToItems(itemFactory, pubMap)); seenPubMeds.add(pubMedIdInteger); } catch (EOFException e) { // ignore and fetch it again System.err.println( "found in cache, but igored due to cache problem: " + pubMedIdInteger); } } else { idsToFetch.add(pubMedIdInteger); } } Iterator<Integer> idIter = idsToFetch.iterator(); Set<Integer> thisBatch = new HashSet<Integer>(); while (idIter.hasNext()) { Integer pubMedIdInteger = idIter.next(); thisBatch.add(pubMedIdInteger); if (thisBatch.size() == BATCH_SIZE || !idIter.hasNext() && thisBatch.size() > 0) { try { // the server may return less publications than we ask for, so keep a Map Map<String, Map<String, Object>> fromServerMap = null; for (int i = 0; i < MAX_TRIES; i++) { BufferedReader br = new BufferedReader(getReader(thisBatch)); StringBuffer buf = new StringBuffer(); String line; while ((line = br.readLine()) != null) { buf.append(line + "\n"); } fromServerMap = new HashMap<String, Map<String, Object>>(); Throwable throwable = null; try { if (loadFullRecord) { SAXParser.parse( new InputSource(new StringReader(buf.toString())), new FullRecordHandler(fromServerMap), false); } else { SAXParser.parse( new InputSource(new StringReader(buf.toString())), new SummaryRecordHandler(fromServerMap), false); } } catch (Throwable e) { LOG.error("Couldn't parse PubMed XML", e); // try again or re-throw the Throwable throwable = e; } if (i == MAX_TRIES) { throw new RuntimeException( "failed to parse: " + buf.toString() + " - tried " + MAX_TRIES + " times", throwable); } else { if (throwable != null) { // try again continue; } } for (String id : fromServerMap.keySet()) { writeItems(writer, mapToItems(itemFactory, fromServerMap.get(id))); } addToDb(txn, db, fromServerMap); break; } thisBatch.clear(); } finally { txn.commit(); // start a new transaction incase there is an exception while parsing txn = env.beginTransaction(null, null); } } } writeItems(writer, authorMap.values()); writeItems(writer, meshTerms.values()); writer.write(FullRenderer.getFooter() + ENDL); writer.flush(); writer.close(); } catch (Throwable e) { throw new RuntimeException("failed to get all publications", e); } finally { txn.commit(); db.close(); Thread.currentThread().setContextClassLoader(cl); } }