[go: up one dir, main page]

Store SHA-256 hash of wire bytes for WASM V8 Code Cache

This CL stores and validates SHA-256 hash of wire bytes
in WASM V8 Code Cache to prevent mismatches between
wire bytes and code cache metadata.

When hitting code cache, this CL calculates the hash
while streaming wire bytes, and
invalidates the code cache metadata on hash mismatch,
by setting `can_use_compiled_module` to `false`
in `WasmStreaming::Finish()`.

The cached metadata is passed to V8 by
`SetCompiledModuleBytes()` when the first bytes of
the wire bytes are received, but this only triggers
suppressiong of streaming compilation and the
content of the cached metadata is not used until
`WasmStreaming::Finish(true)` is called later.

This CL also adds tracing events to measure hashing overhead
and observe cache rejection by hash mismatch.

This CL depends on V8 side CL:
https://chromium-review.googlesource.com/c/v8/v8/+/3297548

(cherry picked from commit 7077e03a4e02c198ac9fdb0814d195df7a75c229)

Bug: 1260939
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3282643
Cr-Original-Commit-Position: refs/heads/main@{#944867}
No-Try: true
Change-Id: If074cbeeb7015a359566821cae544ff8f1cd3589
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3310065
Commit-Queue: Hiroshige Hayashizaki <hiroshige@chromium.org>
Reviewed-by: Kouhei Ueno <kouhei@chromium.org>
Cr-Commit-Position: refs/branch-heads/4664@{#1207}
Cr-Branched-From: 24dc4ee75e01a29d390d43c9c264372a169273a7-refs/heads/main@{#929512}
diff --git a/third_party/blink/renderer/bindings/core/v8/v8_wasm_response_extensions.cc b/third_party/blink/renderer/bindings/core/v8/v8_wasm_response_extensions.cc
index 7973030..8f14746 100644
--- a/third_party/blink/renderer/bindings/core/v8/v8_wasm_response_extensions.cc
+++ b/third_party/blink/renderer/bindings/core/v8/v8_wasm_response_extensions.cc
@@ -24,6 +24,7 @@
 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
 #include "third_party/blink/renderer/platform/bindings/script_state.h"
 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
+#include "third_party/blink/renderer/platform/crypto.h"
 #include "third_party/blink/renderer/platform/heap/handle.h"
 #include "third_party/blink/renderer/platform/heap/heap.h"
 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
@@ -36,8 +37,14 @@
 
 namespace {
 
+// The first `kWireBytesDigestSize` bytes of CachedMetadata body stores the
+// SHA-256 hash of the wire bytes and created/consumed here in Blink. The
+// remaining part of CachedMetadata is created/consumed by V8.
+static const size_t kWireBytesDigestSize = 32;
+
 // Wasm only has a single metadata type, but we need to tag it.
-static const int kWasmModuleTag = 1;
+// `2` is used to invalidate old cached data (which used kWasmModuleTag = 1).
+static const int kWasmModuleTag = 2;
 
 void SendCachedData(String response_url,
                     base::Time response_time,
@@ -100,12 +107,27 @@
                          "v8.wasm.cachedModule", TRACE_EVENT_SCOPE_THREAD,
                          "producedCacheSize", serialized_module.size);
 
+    DigestValue wire_bytes_digest;
+    {
+      TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
+                   "v8.wasm.compileDigestForCreate");
+      if (!ComputeDigest(kHashAlgorithmSha256,
+                         reinterpret_cast<const char*>(wire_bytes.data()),
+                         wire_bytes.size(), wire_bytes_digest))
+        return;
+      if (wire_bytes_digest.size() != kWireBytesDigestSize)
+        return;
+    }
+
     // The resources needed for caching may have been GC'ed, but we should still
     // save the compiled module. Use the platform API directly.
-    Vector<uint8_t> serialized_data = CachedMetadata::GetSerializedData(
+    Vector<uint8_t> serialized_data = CachedMetadata::GetSerializedDataHeader(
         kWasmModuleTag,
+        kWireBytesDigestSize + SafeCast<wtf_size_t>(serialized_module.size));
+    serialized_data.Append(wire_bytes_digest.data(), kWireBytesDigestSize);
+    serialized_data.Append(
         reinterpret_cast<const uint8_t*>(serialized_module.buffer.get()),
-        serialized_module.size);
+        SafeCast<wtf_size_t>(serialized_module.size));
 
     // Make sure the data could be copied.
     if (serialized_data.size() < serialized_module.size)
@@ -148,7 +170,9 @@
         streaming_(std::move(streaming)),
         script_state_(script_state),
         cache_handler_(cache_handler),
-        streaming_client_(streaming_client) {}
+        streaming_client_(streaming_client),
+        code_cache_state_(CodeCacheState::kBeforeFirstByte),
+        digestor_(kHashAlgorithmSha256) {}
 
   v8::WasmStreaming* streaming() const { return streaming_.get(); }
 
@@ -162,7 +186,15 @@
     OnStateChange();
   }
 
+  enum class CodeCacheState {
+    kBeforeFirstByte,
+    kUseCodeCache,
+    kNoCodeCache,
+  };
+
   void OnStateChange() override {
+    TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
+                 "v8.wasm.compileConsume");
     while (true) {
       // |buffer| is owned by |consumer_|.
       const char* buffer = nullptr;
@@ -173,11 +205,16 @@
         return;
       if (result == BytesConsumer::Result::kOk) {
         if (available > 0) {
-          if (!seen_first_byte) {
-            seen_first_byte = true;
-            MaybeConsumeCodeCache();
-          }
+          if (code_cache_state_ == CodeCacheState::kBeforeFirstByte)
+            code_cache_state_ = MaybeConsumeCodeCache();
+
           DCHECK_NE(buffer, nullptr);
+          if (code_cache_state_ == CodeCacheState::kUseCodeCache) {
+            TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
+                         "v8.wasm.compileDigestForConsume");
+            digestor_.Update(
+                base::as_bytes(base::make_span(buffer, available)));
+          }
           streaming_->OnBytesReceived(reinterpret_cast<const uint8_t*>(buffer),
                                       available);
         }
@@ -191,9 +228,11 @@
           break;
         }
         case BytesConsumer::Result::kDone: {
+          TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
+                       "v8.wasm.compileConsumeDone");
           {
             ScriptState::Scope scope(script_state_);
-            streaming_->Finish();
+            streaming_->Finish(HasValidCodeCache());
           }
           client_->DidFetchDataLoadedCustomFormat();
           return;
@@ -258,9 +297,9 @@
     }
   }
 
-  void MaybeConsumeCodeCache() {
+  CodeCacheState MaybeConsumeCodeCache() {
     if (!cache_handler_)
-      return;
+      return CodeCacheState::kNoCodeCache;
 
     // We must wait until we see the first byte of the response body before
     // checking for GetCachedMetadata().  The serialized cache metadata is
@@ -273,14 +312,20 @@
                            "v8.wasm.moduleCacheHit", TRACE_EVENT_SCOPE_THREAD,
                            "url", url_.Utf8(), "consumedCacheSize",
                            cached_module->size());
-      bool is_valid = streaming_->SetCompiledModuleBytes(
-          reinterpret_cast<const uint8_t*>(cached_module->Data()),
-          cached_module->size());
+
+      bool is_valid =
+          cached_module->size() >= kWireBytesDigestSize &&
+          streaming_->SetCompiledModuleBytes(
+              reinterpret_cast<const uint8_t*>(cached_module->Data()) +
+                  kWireBytesDigestSize,
+              cached_module->size() - kWireBytesDigestSize);
+
       if (is_valid) {
         // Keep the buffer alive until V8 is ready to deserialize it.
         // TODO(bbudge) V8 should notify us if deserialization fails, so we
         // can release the data and reset the cache.
         streaming_client_->SetBuffer(cached_module);
+        return CodeCacheState::kUseCodeCache;
       } else {
         TRACE_EVENT_INSTANT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
                              "v8.wasm.moduleCacheInvalid",
@@ -295,10 +340,42 @@
         cache_handler_->ClearCachedMetadata(
             /*code_cache_host*/ nullptr,
             CachedMetadataHandler::kClearPersistentStorage);
+        return CodeCacheState::kNoCodeCache;
       }
+    } else {
+      return CodeCacheState::kNoCodeCache;
     }
   }
 
+  bool HasValidCodeCache() {
+    if (code_cache_state_ != CodeCacheState::kUseCodeCache)
+      return false;
+    if (!cache_handler_)
+      return false;
+    scoped_refptr<CachedMetadata> cached_module =
+        cache_handler_->GetCachedMetadata(kWasmModuleTag);
+    if (!cached_module)
+      return false;
+    if (cached_module->size() < kWireBytesDigestSize)
+      return false;
+
+    DigestValue wire_bytes_digest;
+    digestor_.Finish(wire_bytes_digest);
+    if (digestor_.has_failed() ||
+        memcmp(wire_bytes_digest.data(), cached_module->Data(),
+               kWireBytesDigestSize) != 0) {
+      TRACE_EVENT_INSTANT0(TRACE_DISABLED_BY_DEFAULT("devtools.timeline"),
+                           "v8.wasm.moduleCacheInvalidDigest",
+                           TRACE_EVENT_SCOPE_THREAD);
+      cache_handler_->ClearCachedMetadata(
+          /*code_cache_host*/ nullptr,
+          CachedMetadataHandler::kClearPersistentStorage);
+      return false;
+    }
+
+    return true;
+  }
+
   const String url_;
   Member<BytesConsumer> consumer_;
   Member<FetchDataLoader::Client> client_;
@@ -306,7 +383,8 @@
   const Member<ScriptState> script_state_;
   Member<ScriptCachedMetadataHandler> cache_handler_;
   std::shared_ptr<WasmStreamingClient> streaming_client_;
-  bool seen_first_byte = false;
+  CodeCacheState code_cache_state_;
+  Digestor digestor_;
 };
 
 // TODO(mtrofin): WasmDataLoaderClient is necessary so we may provide an
diff --git a/third_party/blink/renderer/platform/loader/fetch/cached_metadata.cc b/third_party/blink/renderer/platform/loader/fetch/cached_metadata.cc
index 04141eb..0eefe8f 100644
--- a/third_party/blink/renderer/platform/loader/fetch/cached_metadata.cc
+++ b/third_party/blink/renderer/platform/loader/fetch/cached_metadata.cc
@@ -62,7 +62,8 @@
   DCHECK(data_type_id);
   DCHECK(data);
 
-  vector_ = CachedMetadata::GetSerializedData(data_type_id, data, size);
+  vector_ = CachedMetadata::GetSerializedDataHeader(data_type_id, size);
+  vector_.Append(data, size);
 }
 
 CachedMetadata::CachedMetadata(mojo_base::BigBuffer data) {
diff --git a/third_party/blink/renderer/platform/loader/fetch/cached_metadata.h b/third_party/blink/renderer/platform/loader/fetch/cached_metadata.h
index 9b58818d..99677b7 100644
--- a/third_party/blink/renderer/platform/loader/fetch/cached_metadata.h
+++ b/third_party/blink/renderer/platform/loader/fetch/cached_metadata.h
@@ -63,17 +63,19 @@
         new CachedMetadata(data_type_id, data, SafeCast<wtf_size_t>(size)));
   }
 
-  static Vector<uint8_t> GetSerializedData(uint32_t data_type_id,
-                                           const uint8_t* data,
-                                           size_t size) {
+  // Returns a Vector containing the header of serialized metadata.
+  // Callers should append the body to the Vector to get the full serialized
+  // metadata.
+  // The actual body size can be different from `estimated_body_size`.
+  static Vector<uint8_t> GetSerializedDataHeader(
+      uint32_t data_type_id,
+      wtf_size_t estimated_body_size) {
     Vector<uint8_t> vector;
-    vector.ReserveInitialCapacity(kCachedMetaDataStart +
-                                  SafeCast<wtf_size_t>(size));
+    vector.ReserveInitialCapacity(kCachedMetaDataStart + estimated_body_size);
     uint32_t marker = CachedMetadataHandler::kSingleEntry;
     vector.Append(reinterpret_cast<const uint8_t*>(&marker), sizeof(uint32_t));
     vector.Append(reinterpret_cast<const uint8_t*>(&data_type_id),
                   sizeof(uint32_t));
-    vector.Append(data, SafeCast<wtf_size_t>(size));
     return vector;
   }