public void projectivize() { IntArrayList ids = new IntArrayList(); int i, size = size(); DEPNode nonProj; for (i = 1; i < size; i++) ids.add(i); while ((nonProj = getSmallestNonProjectiveArc(ids)) != null) nonProj.setHead(nonProj.getHead().getHead(), DEPLib.DEP_NON_PROJ); }
/** Builds an array, with potential null elements, with docs to load. */ public void fillDocIdsToLoad(AtomicArray<IntArrayList> docsIdsToLoad, ScoreDoc[] shardDocs) { for (ScoreDoc shardDoc : shardDocs) { IntArrayList list = docsIdsToLoad.get(shardDoc.shardIndex); if (list == null) { list = new IntArrayList(); // can't be shared!, uses unsafe on it later on docsIdsToLoad.set(shardDoc.shardIndex, list); } list.add(shardDoc.doc); } }
@SuppressWarnings("unchecked") void onShardFailure(ShardId shardId, Throwable e) { logger.debug("{} Shard multi percolate failure", e, shardId); try { IntArrayList slots = shardToSlots.get(shardId); for (int i = 0; i < slots.size(); i++) { int slot = slots.get(i); AtomicReferenceArray shardResults = responsesByItemAndShard.get(slot); if (shardResults == null) { continue; } shardResults.set(shardId.id(), new BroadcastShardOperationFailedException(shardId, e)); assert expectedOperationsPerItem.get(slot).get() >= 1 : "slot[" + slot + "] can't be lower than one. Caused by: " + e.getMessage(); if (expectedOperationsPerItem.get(slot).decrementAndGet() == 0) { reduce(slot); } } } catch (Throwable t) { logger.error("{} Percolate original reduce error, original error {}", t, shardId, e); finalListener.onFailure(t); } }
/** Called by {@link DEPTree#projectivize()}. */ private DEPNode getSmallestNonProjectiveArc(IntArrayList ids) { IntOpenHashSet remove = new IntOpenHashSet(); DEPNode wk, nonProj = null; int np, max = 0; for (IntCursor cur : ids) { wk = get(cur.value); np = isNonProjective(wk); if (np == 0) { remove.add(cur.value); } else if (np > max) { nonProj = wk; max = np; } } ids.removeAll(remove); return nonProj; }
@Override protected void doExecute( final MultiPercolateRequest request, final ActionListener<MultiPercolateResponse> listener) { final ClusterState clusterState = clusterService.state(); clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); final List<Object> percolateRequests = new ArrayList<>(request.requests().size()); // Can have a mixture of percolate requests. (normal percolate requests & percolate existing // doc), // so we need to keep track for what percolate request we had a get request final IntArrayList getRequestSlots = new IntArrayList(); List<GetRequest> existingDocsRequests = new ArrayList<>(); for (int slot = 0; slot < request.requests().size(); slot++) { PercolateRequest percolateRequest = request.requests().get(slot); percolateRequest.startTime = System.currentTimeMillis(); percolateRequests.add(percolateRequest); if (percolateRequest.getRequest() != null) { existingDocsRequests.add(percolateRequest.getRequest()); getRequestSlots.add(slot); } } if (!existingDocsRequests.isEmpty()) { final MultiGetRequest multiGetRequest = new MultiGetRequest(request); for (GetRequest getRequest : existingDocsRequests) { multiGetRequest.add( new MultiGetRequest.Item(getRequest.index(), getRequest.type(), getRequest.id()) .routing(getRequest.routing())); } multiGetAction.execute( multiGetRequest, new ActionListener<MultiGetResponse>() { @Override public void onResponse(MultiGetResponse multiGetItemResponses) { for (int i = 0; i < multiGetItemResponses.getResponses().length; i++) { MultiGetItemResponse itemResponse = multiGetItemResponses.getResponses()[i]; int slot = getRequestSlots.get(i); if (!itemResponse.isFailed()) { GetResponse getResponse = itemResponse.getResponse(); if (getResponse.isExists()) { PercolateRequest originalRequest = (PercolateRequest) percolateRequests.get(slot); percolateRequests.set( slot, new PercolateRequest(originalRequest, getResponse.getSourceAsBytesRef())); } else { logger.trace("mpercolate existing doc, item[{}] doesn't exist", slot); percolateRequests.set( slot, new DocumentMissingException( null, getResponse.getType(), getResponse.getId())); } } else { logger.trace( "mpercolate existing doc, item[{}] failure {}", slot, itemResponse.getFailure()); percolateRequests.set(slot, itemResponse.getFailure()); } } new ASyncAction(request, percolateRequests, listener, clusterState).run(); } @Override public void onFailure(Throwable e) { listener.onFailure(e); } }); } else { new ASyncAction(request, percolateRequests, listener, clusterState).run(); } }
ASyncAction( MultiPercolateRequest multiPercolateRequest, List<Object> percolateRequests, ActionListener<MultiPercolateResponse> finalListener, ClusterState clusterState) { this.finalListener = finalListener; this.multiPercolateRequest = multiPercolateRequest; this.percolateRequests = percolateRequests; responsesByItemAndShard = new AtomicReferenceArray<>(percolateRequests.size()); expectedOperationsPerItem = new AtomicReferenceArray<>(percolateRequests.size()); reducedResponses = new AtomicArray<>(percolateRequests.size()); // Resolving concrete indices and routing and grouping the requests by shard requestsByShard = new HashMap<>(); // Keep track what slots belong to what shard, in case a request to a shard fails on all // copies shardToSlots = new HashMap<>(); int expectedResults = 0; for (int slot = 0; slot < percolateRequests.size(); slot++) { Object element = percolateRequests.get(slot); assert element != null; if (element instanceof PercolateRequest) { PercolateRequest percolateRequest = (PercolateRequest) element; String[] concreteIndices; try { concreteIndices = indexNameExpressionResolver.concreteIndices(clusterState, percolateRequest); } catch (IndexNotFoundException e) { reducedResponses.set(slot, e); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); continue; } Map<String, Set<String>> routing = indexNameExpressionResolver.resolveSearchRouting( clusterState, percolateRequest.routing(), percolateRequest.indices()); // TODO: I only need shardIds, ShardIterator(ShardRouting) is only needed in // TransportShardMultiPercolateAction GroupShardsIterator shards = clusterService .operationRouting() .searchShards( clusterState, concreteIndices, routing, percolateRequest.preference()); if (shards.size() == 0) { reducedResponses.set(slot, new UnavailableShardsException(null, "No shards available")); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); continue; } // The shard id is used as index in the atomic ref array, so we need to find out how many // shards there are regardless of routing: int numShards = clusterService .operationRouting() .searchShardsCount(clusterState, concreteIndices, null); responsesByItemAndShard.set(slot, new AtomicReferenceArray(numShards)); expectedOperationsPerItem.set(slot, new AtomicInteger(shards.size())); for (ShardIterator shard : shards) { ShardId shardId = shard.shardId(); TransportShardMultiPercolateAction.Request requests = requestsByShard.get(shardId); if (requests == null) { requestsByShard.put( shardId, requests = new TransportShardMultiPercolateAction.Request( multiPercolateRequest, shardId.getIndex(), shardId.getId(), percolateRequest.preference())); } logger.trace("Adding shard[{}] percolate request for item[{}]", shardId, slot); requests.add( new TransportShardMultiPercolateAction.Request.Item( slot, new PercolateShardRequest(shardId, percolateRequest))); IntArrayList items = shardToSlots.get(shardId); if (items == null) { shardToSlots.put(shardId, items = new IntArrayList()); } items.add(slot); } expectedResults++; } else if (element instanceof Throwable || element instanceof MultiGetResponse.Failure) { logger.trace("item[{}] won't be executed, reason: {}", slot, element); reducedResponses.set(slot, element); responsesByItemAndShard.set(slot, new AtomicReferenceArray(0)); expectedOperationsPerItem.set(slot, new AtomicInteger(0)); } } expectedOperations = new AtomicInteger(expectedResults); }
/** Performs tokenization and saves the results to the <code>context</code>. */ public void tokenize(PreprocessingContext context) { // Documents to tokenize final List<Document> documents = context.documents; // Fields to tokenize final String[] fieldNames = documentFields.toArray(new String[documentFields.size()]); if (fieldNames.length > 8) { throw new ProcessingException("Maximum number of tokenized fields is 8."); } // Prepare arrays images = Lists.newArrayList(); tokenTypes = new ShortArrayList(); documentIndices = new IntArrayList(); fieldIndices = new ByteArrayList(); final Iterator<Document> docIterator = documents.iterator(); int documentIndex = 0; final ITokenizer ts = context.language.getTokenizer(); final MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY); while (docIterator.hasNext()) { final Document doc = docIterator.next(); boolean hadTokens = false; for (int i = 0; i < fieldNames.length; i++) { final byte fieldIndex = (byte) i; final String fieldName = fieldNames[i]; final String fieldValue = doc.getField(fieldName); if (!StringUtils.isEmpty(fieldValue)) { try { short tokenType; ts.reset(new StringReader(fieldValue)); if ((tokenType = ts.nextToken()) != ITokenizer.TT_EOF) { if (hadTokens) addFieldSeparator(documentIndex); do { ts.setTermBuffer(wrapper); add(documentIndex, fieldIndex, context.intern(wrapper), tokenType); } while ((tokenType = ts.nextToken()) != ITokenizer.TT_EOF); hadTokens = true; } } catch (IOException e) { // Not possible (StringReader above)? throw ExceptionUtils.wrapAsRuntimeException(e); } } } if (docIterator.hasNext()) { addDocumentSeparator(); } documentIndex++; } addTerminator(); // Save results in the PreprocessingContext context.allTokens.documentIndex = documentIndices.toArray(); context.allTokens.fieldIndex = fieldIndices.toArray(); context.allTokens.image = images.toArray(new char[images.size()][]); context.allTokens.type = tokenTypes.toArray(); context.allFields.name = fieldNames; // Clean up images = null; fieldIndices = null; tokenTypes = null; documentIndices = null; }
/** Adds custom token code to the sequence. May be used to add separator constants. */ void add(int documentIndex, byte fieldIndex, char[] image, short tokenTypeCode) { documentIndices.add(documentIndex); fieldIndices.add(fieldIndex); images.add(image); tokenTypes.add(tokenTypeCode); }