Skip to content

Commit e06dd19

Browse files
committed
Treat download error during input prefetching as lost input.
... so that bazel can correctly rewind the build.
1 parent 88facd4 commit e06dd19

File tree

5 files changed

+102
-7
lines changed

5 files changed

+102
-7
lines changed

src/main/java/com/google/devtools/build/lib/remote/AbstractActionInputPrefetcher.java

+15-7
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import com.google.devtools.build.lib.profiler.ProfilerTask;
5050
import com.google.devtools.build.lib.remote.common.CacheNotFoundException;
5151
import com.google.devtools.build.lib.remote.util.AsyncTaskCache;
52+
import com.google.devtools.build.lib.remote.util.DigestUtil;
5253
import com.google.devtools.build.lib.util.TempPathGenerator;
5354
import com.google.devtools.build.lib.vfs.FileSymlinkLoopException;
5455
import com.google.devtools.build.lib.vfs.FileSystemUtils;
@@ -569,26 +570,33 @@ private Completable downloadFileNoCheckRx(
569570
}
570571

571572
Path finalPath = path;
573+
PathFragment execPath = finalPath.relativeTo(execRoot);
572574

573575
Completable download =
574576
usingTempPath(
575577
(tempPath, alreadyDeleted) ->
576578
toCompletable(
577579
() ->
578580
doDownloadFile(
579-
action,
580-
reporter,
581-
tempPath,
582-
finalPath.relativeTo(execRoot),
583-
metadata,
584-
priority,
585-
reason),
581+
action, reporter, tempPath, execPath, metadata, priority, reason),
586582
directExecutor())
587583
.doOnComplete(
588584
() -> {
589585
finalizeDownload(
590586
metadata, tempPath, finalPath, dirsWithOutputPermissions);
591587
alreadyDeleted.set(true);
588+
})
589+
.onErrorResumeNext(
590+
error -> {
591+
if (error instanceof CacheNotFoundException) {
592+
return Completable.error(error);
593+
}
594+
595+
// Treat other download error as CacheNotFoundException so that Bazel can correctly rewind the
596+
// action/build.
597+
var digest =
598+
DigestUtil.buildDigest(metadata.getDigest(), metadata.getSize());
599+
return Completable.error(new CacheNotFoundException(digest, execPath));
592600
}));
593601

594602
return downloadCache.executeIfNot(

src/main/java/com/google/devtools/build/lib/remote/BUILD

+2
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ java_library(
246246
"//src/main/java/com/google/devtools/build/lib/remote/common:cache_not_found_exception",
247247
"//src/main/java/com/google/devtools/build/lib/remote/common:lost_inputs_event",
248248
"//src/main/java/com/google/devtools/build/lib/remote/util",
249+
"//src/main/java/com/google/devtools/build/lib/remote/util:digest_utils",
249250
"//src/main/java/com/google/devtools/build/lib/util:temp_path_generator",
250251
"//src/main/java/com/google/devtools/build/lib/vfs",
251252
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
@@ -255,6 +256,7 @@ java_library(
255256
"//third_party:guava",
256257
"//third_party:jsr305",
257258
"//third_party:rxjava3",
259+
"@remoteapis//:build_bazel_remote_execution_v2_remote_execution_java_proto",
258260
],
259261
)
260262

src/test/shell/bazel/remote/build_without_the_bytes_test.sh

+59
Original file line numberDiff line numberDiff line change
@@ -2062,6 +2062,65 @@ EOF
20622062
fi
20632063
}
20642064

2065+
function test_retry_build_if_remote_executor_is_unavailable() {
2066+
mkdir -p a
2067+
2068+
cat > a/BUILD <<'EOF'
2069+
genrule(
2070+
name = 'foo',
2071+
srcs = ['foo.in'],
2072+
outs = ['foo.out'],
2073+
cmd = 'cat $(SRCS) > $@',
2074+
)
2075+
2076+
genrule(
2077+
name = 'bar',
2078+
srcs = ['foo.out', 'bar.in'],
2079+
outs = ['bar.out'],
2080+
cmd = 'cat $(SRCS) > $@',
2081+
tags = ['no-remote-exec'],
2082+
)
2083+
EOF
2084+
2085+
echo foo > a/foo.in
2086+
echo bar > a/bar.in
2087+
2088+
# Populate remote cache
2089+
bazel build \
2090+
--remote_executor=grpc://localhost:${worker_port} \
2091+
--remote_download_minimal \
2092+
//a:bar >& $TEST_log || fail "Failed to build"
2093+
2094+
bazel clean
2095+
2096+
# Clean build, foo.out isn't downloaded
2097+
bazel build \
2098+
--remote_executor=grpc://localhost:${worker_port} \
2099+
--remote_download_minimal \
2100+
//a:bar >& $TEST_log || fail "Failed to build"
2101+
2102+
if [[ -f bazel-bin/a/foo.out ]]; then
2103+
fail "Expected intermediate output bazel-bin/a/foo.out to not be downloaded"
2104+
fi
2105+
2106+
# Make the remote worker unavailable
2107+
stop_worker
2108+
start_worker --unavailable
2109+
2110+
echo "updated bar" > a/bar.in
2111+
2112+
# Incremental build triggers remote cache error but Bazel automatically retries the build and reruns the generating
2113+
# actions locally for missing blobs
2114+
bazel build \
2115+
--remote_executor=grpc://localhost:${worker_port} \
2116+
--remote_local_fallback \
2117+
--remote_download_minimal \
2118+
--experimental_remote_cache_eviction_retries=1 \
2119+
//a:bar >& $TEST_log || fail "Failed to build"
2120+
2121+
expect_log "Found transient remote cache error, retrying the build..."
2122+
}
2123+
20652124
function test_remote_cache_eviction_retries_toplevel_artifacts() {
20662125
mkdir -p a
20672126

src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/RemoteWorker.java

+16
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,19 @@ public <ReqT, RespT> Listener<ReqT> interceptCall(
150150
}
151151
}
152152

153+
private static class UnavailableInterceptor implements ServerInterceptor {
154+
155+
@Override
156+
public <ReqT, RespT> Listener<ReqT> interceptCall(
157+
ServerCall<ReqT, RespT> call, Metadata headers, ServerCallHandler<ReqT, RespT> next) {
158+
if (!call.getMethodDescriptor().getServiceName().contains("Capabilities")) {
159+
call.close(Status.UNAVAILABLE, new Metadata());
160+
return new ServerCall.Listener<ReqT>() {};
161+
}
162+
return Contexts.interceptCall(Context.current(), call, headers, next);
163+
}
164+
}
165+
153166
public RemoteWorker(
154167
FileSystem fs,
155168
RemoteWorkerOptions workerOptions,
@@ -193,6 +206,9 @@ public RemoteWorker(
193206

194207
public Server startServer() throws IOException {
195208
List<ServerInterceptor> interceptors = new ArrayList<>();
209+
if (workerOptions.unavailable) {
210+
interceptors.add(new UnavailableInterceptor());
211+
}
196212
interceptors.add(new TracingMetadataUtils.ServerHeadersInterceptor());
197213
if (workerOptions.expectedAuthorizationToken != null) {
198214
interceptors.add(new AuthorizationTokenInterceptor(workerOptions.expectedAuthorizationToken));

src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/RemoteWorkerOptions.java

+10
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,16 @@ public class RemoteWorkerOptions extends OptionsBase {
188188
+ " testing only.")
189189
public String expectedAuthorizationToken;
190190

191+
@Option(
192+
name = "unavailable",
193+
defaultValue = "false",
194+
documentationCategory = OptionDocumentationCategory.UNCATEGORIZED,
195+
effectTags = {OptionEffectTag.UNKNOWN},
196+
help =
197+
"If true, all gRPC services, except Capabilities, return UNAVAILABLE. This is useful for"
198+
+ " testing only.")
199+
public boolean unavailable;
200+
191201
private static final int MAX_JOBS = 16384;
192202

193203
/**

0 commit comments

Comments
 (0)