From 9a03f3763af672e29f4ac1798e087b5b35b6465d Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sun, 29 Sep 2024 00:27:50 +0200 Subject: [PATCH 1/2] avoid phi node for pointers flowing into Vec appends --- library/alloc/src/vec/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 1984cfeefc14b..5b318ec12fb3d 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2519,6 +2519,14 @@ impl Vec { #[inline] unsafe fn append_elements(&mut self, other: *const [T]) { let count = unsafe { (*other).len() }; + if count == 0 { + // The early return is not necessary for correctness, but in cases + // where LLVM sees all the way to the allocation site of `other` + // this can avoid a phi-node merging the two different pointers + // when zero-length allocations are special-cased. + // That in turn can enable more optimizations around the memcpy below. + return; + } self.reserve(count); let len = self.len(); unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; From c44e0f45cd80bbf981c409eec9fc5895205c1576 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Mon, 30 Sep 2024 23:19:03 +0200 Subject: [PATCH 2/2] test that the phi node got eliminated --- library/alloc/src/slice.rs | 6 +++ library/alloc/src/vec/mod.rs | 8 ++-- .../lib-optimizations/append-elements.rs | 37 +++++++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 tests/codegen/lib-optimizations/append-elements.rs diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index f636f10d5c08c..97353fd0964d5 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -160,6 +160,12 @@ pub(crate) mod hack { impl ConvertVec for T { #[inline] fn to_vec(s: &[Self], alloc: A) -> Vec { + if s.is_empty() { + // The early return is not necessary for correctness, but it helps + // LLVM by avoiding phi nodes flowing into memcpy. + // See codegen/lib-optimizations/append-elements.rs + return Vec::new_in(alloc); + } let mut v = Vec::with_capacity_in(s.len(), alloc); // SAFETY: // allocated above with the capacity of `s`, and initialize to `s.len()` in diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 5b318ec12fb3d..4212a9bb28fbf 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2520,11 +2520,9 @@ impl Vec { unsafe fn append_elements(&mut self, other: *const [T]) { let count = unsafe { (*other).len() }; if count == 0 { - // The early return is not necessary for correctness, but in cases - // where LLVM sees all the way to the allocation site of `other` - // this can avoid a phi-node merging the two different pointers - // when zero-length allocations are special-cased. - // That in turn can enable more optimizations around the memcpy below. + // The early return is not necessary for correctness, but it helps + // LLVM by avoiding phi nodes flowing into memcpy. + // See codegen/lib-optimizations/append-elements.rs return; } self.reserve(count); diff --git a/tests/codegen/lib-optimizations/append-elements.rs b/tests/codegen/lib-optimizations/append-elements.rs new file mode 100644 index 0000000000000..85a9eca2c8912 --- /dev/null +++ b/tests/codegen/lib-optimizations/append-elements.rs @@ -0,0 +1,37 @@ +//@ compile-flags: -O +#![crate_type = "lib"] + +//! Check that pointer flowing into vec.append comes straight from an allocation +//! and not through a phi node that merges the allocation and zero-length cases. +//! With this and https://github.com/llvm/llvm-project/pull/110280 the intermediate +//! allocation should be optimized away in the future. + +// CHECK-LABEL: @vec_append_with_temp_alloc +#[no_mangle] +pub fn vec_append_with_temp_alloc(dst: &mut Vec, src: &[u8]) { + // CHECK: %[[TEMP:.+]] = tail call noalias noundef ptr @__rust_alloc + + // First memcpy, it uses the src pointer directly + // CHECK: call void @llvm.memcpy.{{.*}}%src.0 + let temp = src.to_vec(); + + // final memcpy to destination + // CHECK: call void @llvm.memcpy.{{.*}}%dst.i{{.*}}%[[TEMP]] + dst.extend(&temp); + // CHECK: ret +} + +// CHECK-LABEL: @string_append_with_temp_alloc +#[no_mangle] +pub fn string_append_with_temp_alloc(dst: &mut String, src: &str) { + // CHECK: %[[TEMP:.+]] = tail call noalias noundef ptr @__rust_alloc + + // First memcpy, it uses the src pointer directly + // CHECK: call void @llvm.memcpy.{{.*}}%src.0 + let temp = src.to_string(); + + // final memcpy to destination + // CHECK: call void @llvm.memcpy.{{.*}}%dst.i{{.*}}%[[TEMP]] + dst.push_str(&temp); + // CHECK: ret +}