Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Protect against multiple concurrent downloads of the same model #116869

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
package org.elasticsearch.xpack.inference;

import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseListener;
import org.elasticsearch.common.Strings;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService;
Expand All @@ -16,9 +19,12 @@
import org.junit.Before;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;
Expand Down Expand Up @@ -110,4 +116,39 @@ private static void assertDefaultE5Config(Map<String, Object> modelConfig) {
Matchers.is(Map.of("enabled", true, "min_number_of_allocations", 0, "max_number_of_allocations", 32))
);
}

public void testMultipleInferencesTiggeringDownloadAndDeploy() throws InterruptedException {
assumeTrue("Default config requires a feature flag", DefaultElserFeatureFlag.isEnabled());

int numParallelRequests = 4;
var latch = new CountDownLatch(numParallelRequests);
var errors = new ArrayList<Exception>();

var listener = new ResponseListener() {
@Override
public void onSuccess(Response response) {
latch.countDown();
}

@Override
public void onFailure(Exception exception) {
errors.add(exception);
latch.countDown();
}
};

var inputs = List.of("Hello World", "Goodnight moon");
var queryParams = Map.of("timeout", "120s");
for (int i = 0; i < numParallelRequests; i++) {
var request = createInferenceRequest(
Strings.format("_inference/%s", ElasticsearchInternalService.DEFAULT_ELSER_ID),
inputs,
queryParams
);
client().performRequestAsync(request, listener);
}

latch.await();
assertThat(errors.toString(), errors, empty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,17 @@ protected Map<String, Object> infer(String modelId, TaskType taskType, List<Stri
return inferInternal(endpoint, input, queryParameters);
}

private Map<String, Object> inferInternal(String endpoint, List<String> input, Map<String, String> queryParameters) throws IOException {
protected Request createInferenceRequest(String endpoint, List<String> input, Map<String, String> queryParameters) {
var request = new Request("POST", endpoint);
request.setJsonEntity(jsonBody(input));
if (queryParameters.isEmpty() == false) {
request.addParameters(queryParameters);
}
return request;
}

private Map<String, Object> inferInternal(String endpoint, List<String> input, Map<String, String> queryParameters) throws IOException {
var request = createInferenceRequest(endpoint, input, queryParameters);
var response = client().performRequest(request);
assertOkOrCreated(response);
return entityAsMap(response);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,9 @@

package org.elasticsearch.xpack.inference.services.elasticsearch;

import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.Model;
import org.elasticsearch.inference.TaskSettings;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;

public class CustomElandModel extends ElasticsearchInternalModel {

Expand All @@ -39,31 +34,10 @@ public CustomElandModel(
}

@Override
public ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
Model model,
ActionListener<Boolean> listener
) {

return new ActionListener<>() {
@Override
public void onResponse(CreateTrainedModelAssignmentAction.Response response) {
listener.onResponse(Boolean.TRUE);
}

@Override
public void onFailure(Exception e) {
if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) {
listener.onFailure(
new ResourceNotFoundException(
"Could not start the inference as the custom eland model [{0}] for this platform cannot be found."
+ " Custom models need to be loaded into the cluster with eland before they can be started.",
internalServiceSettings.modelId()
)
);
return;
}
listener.onFailure(e);
}
};
protected String modelNotFoundErrorMessage(String modelId) {
return "Could not deploy model ["
+ modelId
+ "] as the model cannot be found."
+ " Custom models need to be loaded into the cluster with Eland before they can be started.";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentA
throw new IllegalStateException("cannot start model that uses an existing deployment");
}

@Override
protected String modelNotFoundErrorMessage(String modelId) {
throw new IllegalStateException("cannot start model [" + modelId + "] that uses an existing deployment");
}

@Override
public ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
Model model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

package org.elasticsearch.xpack.inference.services.elasticsearch;

import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.ResourceAlreadyExistsException;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.TimeValue;
Expand All @@ -15,8 +18,10 @@
import org.elasticsearch.inference.ModelConfigurations;
import org.elasticsearch.inference.TaskSettings;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction;
import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;

import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED;

Expand Down Expand Up @@ -79,10 +84,38 @@ public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentA
return startRequest;
}

public abstract ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
public ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
Model model,
ActionListener<Boolean> listener
);
) {
return new ActionListener<>() {
@Override
public void onResponse(CreateTrainedModelAssignmentAction.Response response) {
listener.onResponse(Boolean.TRUE);
}

@Override
public void onFailure(Exception e) {
var cause = ExceptionsHelper.unwrapCause(e);
if (cause instanceof ResourceNotFoundException) {
listener.onFailure(new ResourceNotFoundException(modelNotFoundErrorMessage(internalServiceSettings.modelId())));
return;
} else if (cause instanceof ElasticsearchStatusException statusException) {
if (statusException.status() == RestStatus.CONFLICT
&& statusException.getRootCause() instanceof ResourceAlreadyExistsException) {
// Deployment is already started
listener.onResponse(Boolean.TRUE);
}
return;
}
listener.onFailure(e);
}
};
}

protected String modelNotFoundErrorMessage(String modelId) {
return "Could not deploy model [" + modelId + "] as the model cannot be found.";
}

public boolean usesExistingDeployment() {
return internalServiceSettings.getDeploymentId() != null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,8 @@

package org.elasticsearch.xpack.inference.services.elasticsearch;

import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.Model;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;

public class ElserInternalModel extends ElasticsearchInternalModel {

Expand All @@ -37,31 +32,4 @@ public ElserInternalServiceSettings getServiceSettings() {
public ElserMlNodeTaskSettings getTaskSettings() {
return (ElserMlNodeTaskSettings) super.getTaskSettings();
}

@Override
public ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
Model model,
ActionListener<Boolean> listener
) {
return new ActionListener<>() {
@Override
public void onResponse(CreateTrainedModelAssignmentAction.Response response) {
listener.onResponse(Boolean.TRUE);
}

@Override
public void onFailure(Exception e) {
if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) {
listener.onFailure(
new ResourceNotFoundException(
"Could not start the ELSER service as the ELSER model for this platform cannot be found."
+ " ELSER needs to be downloaded before it can be started."
)
);
return;
}
listener.onFailure(e);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,8 @@

package org.elasticsearch.xpack.inference.services.elasticsearch;

import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.Model;
import org.elasticsearch.inference.TaskType;
import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;

public class MultilingualE5SmallModel extends ElasticsearchInternalModel {

Expand All @@ -31,34 +26,4 @@ public MultilingualE5SmallModel(
public MultilingualE5SmallInternalServiceSettings getServiceSettings() {
return (MultilingualE5SmallInternalServiceSettings) super.getServiceSettings();
}

@Override
public ActionListener<CreateTrainedModelAssignmentAction.Response> getCreateTrainedModelAssignmentActionListener(
Model model,
ActionListener<Boolean> listener
) {

return new ActionListener<>() {
@Override
public void onResponse(CreateTrainedModelAssignmentAction.Response response) {
listener.onResponse(Boolean.TRUE);
}

@Override
public void onFailure(Exception e) {
if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) {
listener.onFailure(
new ResourceNotFoundException(
"Could not start the TextEmbeddingService service as the "
+ "Multilingual-E5-Small model for this platform cannot be found."
+ " Multilingual-E5-Small needs to be downloaded before it can be started"
)
);
return;
}
listener.onFailure(e);
}
};
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.ml.packageloader.action;

import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.tasks.RemovedTaskListener;
import org.elasticsearch.tasks.Task;

public record DownloadTaskRemovedListener(ModelDownloadTask trackedTask, ActionListener<AcknowledgedResponse> listener)
implements
RemovedTaskListener {

@Override
public void onRemoved(Task task) {
if (task.getId() == trackedTask.getId() && task.getAction().equals(trackedTask.getAction())) {
if (trackedTask.getTaskException() == null) {
listener.onResponse(AcknowledgedResponse.TRUE);
} else {
listener.onFailure(trackedTask.getTaskException());
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.tasks.Task;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xpack.core.ml.MlTasks;

import java.io.IOException;
import java.util.Map;
Expand Down Expand Up @@ -51,9 +52,12 @@ public void writeTo(StreamOutput out) throws IOException {
}

private final AtomicReference<DownLoadProgress> downloadProgress = new AtomicReference<>(new DownLoadProgress(0, 0));
private final String modelId;
private volatile Exception taskException;

public ModelDownloadTask(long id, String type, String action, String description, TaskId parentTaskId, Map<String, String> headers) {
super(id, type, action, description, parentTaskId, headers);
public ModelDownloadTask(long id, String type, String action, String modelId, TaskId parentTaskId, Map<String, String> headers) {
super(id, type, action, taskDescription(modelId), parentTaskId, headers);
this.modelId = modelId;
}

void setProgress(int totalParts, int downloadedParts) {
Expand All @@ -65,4 +69,19 @@ public DownloadStatus getStatus() {
return new DownloadStatus(downloadProgress.get());
}

public String getModelId() {
return modelId;
}

public void setTaskException(Exception exception) {
this.taskException = exception;
}

public Exception getTaskException() {
return taskException;
}

public static String taskDescription(String modelId) {
return MlTasks.downloadModelTaskDescription(modelId);
}
}
Loading