static DetailledItem parseDetail(String id, Document doc, JSONObject data) throws OpacErrorException, JSONException { if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) { throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text()); } DetailledItem res = new DetailledItem(); res.setId(id); Elements title = doc.select(".record h1, .record [itemprop=name], .record [property=name]"); if (title.size() > 0) { res.setTitle(title.first().text()); } for (Element img : doc.select(".record img, #cover img")) { String src = img.absUrl("src"); if (src.contains("over")) { if (!src.contains("Unavailable")) { res.setCover(src); } break; } } for (Element tr : doc.select(".record table").first().select("tr")) { String text = tr.child(1).text(); if (tr.child(1).select("a").size() > 0) { String href = tr.child(1).select("a").attr("href"); if (!href.startsWith("/") && !text.contains(data.getString("baseurl"))) { text += " " + href; } } res.addDetail(new Detail(tr.child(0).text(), text)); } try { if (doc.select("#Volumes").size() > 0) { parseVolumes(res, doc, data); } else { parseCopies(res, doc, data); } } catch (JSONException e) { e.printStackTrace(); } return res; }
private static void parseVolumes(DetailledItem res, Document doc, JSONObject data) { // only tested in Münster // e.g. https://www.stadt-muenster.de/opac2/Record/0900944 Element table = doc.select(".recordsubcontent, .tab-container").first().select("table").first(); for (Element link : table.select("tr a")) { Volume volume = new Volume(); Matcher matcher = idPattern.matcher(link.attr("href")); if (matcher.find()) volume.setId(matcher.group(1)); volume.setTitle(link.text()); res.addVolume(volume); } }
static void parseCopies(DetailledItem res, Document doc, JSONObject data) throws JSONException { if ("doublestacked".equals(data.optString("copystyle"))) { // e.g. http://vopac.nlg.gr/Record/393668/Holdings#tabnav // for Athens_GreekNationalLibrary Element container = doc.select(".tab-container").first(); String branch = ""; for (Element child : container.children()) { if (child.tagName().equals("h5")) { branch = child.text(); } else if (child.tagName().equals("table")) { int i = 0; String callNumber = ""; for (Element row : child.select("tr")) { if (i == 0) { callNumber = row.child(1).text(); } else { Copy copy = new Copy(); copy.setBranch(branch); copy.setShelfmark(callNumber); copy.setBarcode(row.child(0).text()); copy.setStatus(row.child(1).text()); res.addCopy(copy); } i++; } } } } else if ("stackedtable".equals(data.optString("copystyle"))) { // e.g. http://search.lib.auth.gr/Record/376356 // or https://katalog.ub.uni-leipzig.de/Record/0000196115 // or https://www.stadt-muenster.de/opac2/Record/0367968 Element container = doc.select(".recordsubcontent, .tab-container").first(); // .tab-container is used in Muenster. String branch = ""; JSONObject copytable = data.getJSONObject("copytable"); for (Element child : container.children()) { if (child.tagName().equals("div")) { child = child.child(0); } if (child.tagName().equals("h3")) { branch = child.text(); } else if (child.tagName().equals("table")) { if (child.select("caption").size() > 0) { // Leipzig_Uni branch = child.select("caption").first().ownText(); } int i = 0; String callNumber = null; if ("headrow".equals(copytable.optString("signature"))) { callNumber = child.select("tr").get(0).child(1).text(); } for (Element row : child.select("tr")) { if (i < copytable.optInt("_offset", 0)) { i++; continue; } Copy copy = new Copy(); if (callNumber != null) { copy.setShelfmark(callNumber); } copy.setBranch(branch); Iterator<?> keys = copytable.keys(); while (keys.hasNext()) { String key = (String) keys.next(); if (key.startsWith("_")) continue; if (copytable.optString(key, "").contains("/")) { // Leipzig_Uni String[] splitted = copytable.getString(key).split("/"); int col = Integer.parseInt(splitted[0]); int line = Integer.parseInt(splitted[1]); int j = 0; for (Node node : row.child(col).childNodes()) { if (node instanceof Element) { if (((Element) node).tagName().equals("br")) { j++; } else if (j == line) { copy.set(key, ((Element) node).text()); } } else if (node instanceof TextNode && j == line && !((TextNode) node).text().trim().equals("")) { copy.set(key, ((TextNode) node).text()); } } } else { // Thessaloniki_University if (copytable.optInt(key, -1) == -1) continue; String value = row.child(copytable.getInt(key)).text(); copy.set(key, value); } } res.addCopy(copy); i++; } } } } }