@Override public void write(NutchDocument doc) throws IOException { final SolrInputDocument inputDoc = new SolrInputDocument(); for (final Entry<String, List<String>> e : doc) { for (final String val : e.getValue()) { Object val2 = val; if (e.getKey().equals("content") || e.getKey().equals("title")) { val2 = SolrUtils.stripNonCharCodepoints(val); } inputDoc.addField(solrMapping.mapKey(e.getKey()), val2); String sCopy = solrMapping.mapCopyKey(e.getKey()); if (sCopy != e.getKey()) { inputDoc.addField(sCopy, val2); } } } inputDoc.setDocumentBoost(doc.getScore()); inputDocs.add(inputDoc); if (inputDocs.size() >= commitSize) { try { LOG.info("Adding " + Integer.toString(inputDocs.size()) + " documents"); solr.add(inputDocs); } catch (final SolrServerException e) { throw new IOException(e); } inputDocs.clear(); } }
@Override public void open(TaskAttemptContext job) throws IOException { Configuration conf = job.getConfiguration(); solr = SolrUtils.getCommonsHttpSolrServer(conf); commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000); solrMapping = SolrMappingReader.getInstance(conf); }