/** * suffers from long time skipping to the offset "from", has to sort everything and then start * skipping nodes. * * @param xp * @param from * @param count * @return */ public List<XMLNode> getByXpathHolder(XpathHolder xp, long from, long count) { return getSession() .createSQLQuery( "select * from xml_node_" + xp.getXmlObject().getDbID() + " where xpath_summary_id = :xpath and xml_object_id = :xo " + "order by xml_node_id asc") .addEntity(XMLNode.class) .setEntity("xpath", xp) .setEntity("xo", xp.getXmlObject()) .setMaxResults((int) count) .setFirstResult((int) from) .list(); }
/** * Retrieve the given node with subtree and all parent elements. No actual value wrapping is done * in this method! Only empty element wrapping to adjust for straight xpaths. * * @param parent * @return */ public XMLNode wrappedDOMTree(XMLNode parent) { XMLNode result = getDOMTree(parent); do { XpathHolder parentPath = result.getXpathHolder().getParent(); if (parentPath == null) break; // shouldnt happen if (parentPath.getParent() == null) break; // this should happen XMLNode newParent = new XMLNode(); newParent.getChildren().add(result); result.setParentNode(newParent); newParent.setXmlObject(result.getXmlObject()); newParent.setXpathHolder(parentPath); result = newParent; } while (true); return result; }
public List<Object[]> getValues(XpathHolder xp, int start, int count, String filter) { List<Object[]> l = Collections.emptyList(); String cond = null; if (filter != null) { cond = "and content like :filter "; } Query q = getSession() .createSQLQuery( "select content, count(*) " + "from xml_node_" + xp.getXmlObject().getDbID() + " where xpath_summary_id = :xpath " + (cond != null ? cond : "") + "group by content " + "order by content ") .setEntity("xpath", xp); if (count > 0) { q.setMaxResults(count).setFirstResult(start); } if (cond != null) { q.setString("filter", "%" + filter + "%"); } l = q.list(); return l; }
public float getAvgLength(XpathHolder xpathHolder) { Double val; if (xpathHolder == null) return -1f; if (!(xpathHolder.isTextNode() || xpathHolder.isAttributeNode())) { xpathHolder = xpathHolder.getTextNode(); if (xpathHolder == null) return -1f; } val = (Double) getSession() .createQuery( "select avg( length (content) ) from XMLNode " + "where xmlObject = :xo and xpathHolder = :xp ") .setEntity("xo", xpathHolder.getXmlObject()) .setEntity("xp", xpathHolder) .uniqueResult(); return val.floatValue(); }
public List<XMLNode> getStatelessByXpathHolder(XpathHolder xp, long from, long count) { List<XMLNode> l = DB.getStatelessSession() .createSQLQuery( "select * from xml_node_" + xp.getXmlObject().getDbID() + " where xpath_summary_id = :xpath and xml_object_id = :xo " + "order by xml_node_id asc") .addEntity(XMLNode.class) .setEntity("xpath", xp) .setEntity("xo", xp.getXmlObject()) .setMaxResults((int) count) .setFirstResult((int) from) .list(); for (XMLNode node : l) { node.setXpathHolder(DB.getXpathHolderDAO().findById(node.getXpathHolder().getDbID(), false)); } return l; }
/** * Quickly build a dom tree. Use per item, not for trees with more than 2000 nodes (or about * that). The XMLNodes are not in the Hibernate session and don't lazy load their parent or * anything else. They should not need to, though. The attached XpathHolders are in the session * and behave normally. * * @param parent * @return */ public XMLNode getDOMTree(XMLNode parent) { StatelessSession ss = DB.getStatelessSession(); List<XMLNode> l; int maxNodes = 10000; if (parent.getSize() < maxNodes) maxNodes = (int) parent.getSize(); Stack<XMLNode> stack = new Stack<XMLNode>(); HashMap<Long, XpathHolder> xpathCache = new HashMap<Long, XpathHolder>(); l = ss.createSQLQuery( "select * from xml_node_" + parent.getXmlObject().getDbID() + " where xml_node_id >= :parentId order by xml_node_id") .addEntity(XMLNode.class) .setLong("parentId", parent.getNodeId()) .setMaxResults(maxNodes) .list(); // now every node has the wrong parent and XpathHolder and no Children.. for (XMLNode x : l) { // find the right place in stack x.setChildren(new ArrayList<XMLNode>()); while (!stack.isEmpty()) { if (stack.peek().getNodeId() != x.getParentNode().getNodeId()) stack.pop(); else break; } if (!stack.isEmpty()) { x.setParentNode(stack.peek()); stack.peek().getChildren().add(x); } stack.push(x); // now the xpathholder XpathHolder path = xpathCache.get(x.getXpathHolder().getDbID()); if (path == null) { path = DB.getXpathHolderDAO().findById(x.getXpathHolder().getDbID(), false); xpathCache.put(path.getDbID(), path); } x.setXpathHolder(path); x.setXmlObject(parent.getXmlObject()); } if (l.size() > 0) return l.get(0); else return null; }
/** * Cursor over all nodes. (You still have to get XpathHolders the normal way) Call the index * function on given object. Proceeds in node order. * * @param xo * @param ni */ public void indexNodes(XmlObject xo, NodeIndexer ni) { StatelessSession ss = DB.getStatelessSession(); Stack<XMLNode> stack = new Stack<XMLNode>(); HashMap<Long, XpathHolder> xpathCache = new HashMap<Long, XpathHolder>(); ScrollableResults sr = null; try { sr = ss.createSQLQuery("select * from xml_node_" + xo.getDbID() + " order by xml_node_id") .addEntity(XMLNode.class) .scroll(ScrollMode.FORWARD_ONLY); while (sr.next()) { XMLNode x = (XMLNode) sr.get()[0]; while (!stack.isEmpty()) { if ((x.getParentNode() == null) || (stack.peek().getNodeId() != x.getParentNode().getNodeId())) stack.pop(); else break; } stack.push(x); // now the xpathholder XpathHolder path = xpathCache.get(x.getXpathHolder().getDbID()); if (path == null) { path = DB.getXpathHolderDAO().findById(x.getXpathHolder().getDbID(), false); xpathCache.put(path.getDbID(), path); } x.setXpathHolder(path); x.setXmlObject(xo); // node ready to index ni.index(x); } ni.index(null); } catch (Exception e) { log.error("Error while scrolling XMLNodes for indexing.", e); } finally { if (sr != null) sr.close(); } }
/** * Looks at content for simple nodes and checksums for subtrees. * * @param xp * @return */ public long countDistinct(XpathHolder xp) { Long val; if (xp.name.equals("text()") || xp.name.startsWith("@")) val = (Long) getSession() .createQuery( "select count( distinct content ) from XMLNode where xpathHolder = :xpath and xmlObject = :xo") .setEntity("xpath", xp) .setEntity("xo", xp.getXmlObject()) .uniqueResult(); else val = (Long) getSession() .createQuery( "select count( distinct checksum ) from XMLNode where xpathHolder = :xpath and xmlObject = :xo ") .setEntity("xpath", xp) .setEntity("xo", xp.getXmlObject()) .uniqueResult(); return val.longValue(); }
/** * Ordered List of value, frequency for given xpath. Only given number of values listed. * * @param xp * @param limit * @return */ public List<Object[]> getCountByValue(XpathHolder xp, int limit) { if (xp.name.equals("text()") || xp.name.startsWith("@")) { List<Object[]> l = (List<Object[]>) getSession() .createQuery( "select content, count( * ) " + "from XMLNode where xpathHolder = :xpath " + "and xmlObject = :xo " + "group by content " + "order by count(*) desc") .setEntity("xpath", xp) .setEntity("xo", xp.getXmlObject()) .setMaxResults(limit) .list(); return l; } else return Collections.emptyList(); }