From 4d3349a24375c86048e5187c2d6166650394e7d4 Mon Sep 17 00:00:00 2001 From: Aviram Hassan Date: Sun, 24 Dec 2023 21:36:15 +0200 Subject: [PATCH] Add realpath hook (#2137) * wip * Added hook for realpath function, fixing issues on files not found in Java * .. --- changelog.d/+realpath-impl.fixed.md | 1 + mirrord/layer/src/file/hooks.rs | 63 +++++++++++++++++- mirrord/layer/src/file/ops.rs | 62 +++++++++++++++++- mirrord/layer/src/lib.rs | 1 + mirrord/layer/tests/apps/realpath/realpath.c | 67 ++++++++++++++++++++ mirrord/layer/tests/common/mod.rs | 4 ++ 6 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 changelog.d/+realpath-impl.fixed.md create mode 100644 mirrord/layer/tests/apps/realpath/realpath.c diff --git a/changelog.d/+realpath-impl.fixed.md b/changelog.d/+realpath-impl.fixed.md new file mode 100644 index 00000000000..eefb14ef869 --- /dev/null +++ b/changelog.d/+realpath-impl.fixed.md @@ -0,0 +1 @@ +Added hook for realpath function, fixing issues on files not found in Java \ No newline at end of file diff --git a/mirrord/layer/src/file/hooks.rs b/mirrord/layer/src/file/hooks.rs index f62b6203e3c..8324411124d 100644 --- a/mirrord/layer/src/file/hooks.rs +++ b/mirrord/layer/src/file/hooks.rs @@ -5,7 +5,7 @@ use core::ffi::{c_size_t, c_ssize_t}; /// /// NOTICE: If a file operation fails, it might be because it depends on some `libc` function /// that is not being hooked (`strace` the program to check). -use std::{os::unix::io::RawFd, ptr, slice, time::Duration}; +use std::{ffi::CString, os::unix::io::RawFd, ptr, slice, time::Duration}; use errno::{set_errno, Errno}; use libc::{ @@ -875,6 +875,51 @@ unsafe extern "C" fn fstatfs_detour(fd: c_int, out_stat: *mut statfs) -> c_int { .unwrap_or_bypass_with(|_| FN_FSTATFS(fd, out_stat)) } +unsafe fn realpath_logic( + source_path: *const c_char, + output_path: *mut c_char, +) -> Detour<*mut c_char> { + let path = source_path.checked_into(); + + realpath(path).map(|res| { + let path = CString::new(res.to_string_lossy().to_string()).unwrap(); + let path_len = path.as_bytes_with_nul().len(); + let output = if output_path.is_null() { + let res = libc::malloc(path_len) as *mut c_char; + if res.is_null() { + return std::ptr::null_mut(); + } + res + } else { + output_path + }; + + output + .copy_from_nonoverlapping(path.as_ptr(), usize::min(libc::PATH_MAX as usize, path_len)); + output + }) +} + +/// When path is handled by us, just make it absolute and return, since resolving it remotely +/// doesn't really matter for our case atm (might be in the future) +#[hook_guard_fn] +unsafe extern "C" fn realpath_detour( + source_path: *const c_char, + output_path: *mut c_char, +) -> *mut c_char { + realpath_logic(source_path, output_path) + .unwrap_or_bypass_with(|_| FN_REALPATH(source_path, output_path)) +} + +#[hook_guard_fn] +unsafe extern "C" fn realpath_darwin_extsn_detour( + source_path: *const c_char, + output_path: *mut c_char, +) -> *mut c_char { + realpath_logic(source_path, output_path) + .unwrap_or_bypass_with(|_| FN_REALPATH_DARWIN_EXTSN(source_path, output_path)) +} + // this requires newer libc which we don't link with to support old libc.. // leaving this in code so we can enable it when libc is updated. // #[cfg(target_os = "linux")] @@ -1014,6 +1059,22 @@ pub(crate) unsafe fn enable_file_hooks(hook_manager: &mut HookManager) { FN_FDATASYNC ); + replace!( + hook_manager, + "realpath", + realpath_detour, + FnRealpath, + FN_REALPATH + ); + + replace!( + hook_manager, + "realpath$DARWIN_EXTSN", + realpath_darwin_extsn_detour, + FnRealpath_darwin_extsn, + FN_REALPATH_DARWIN_EXTSN + ); + // this requires newer libc which we don't link with to support old libc.. // leaving this in code so we can enable it when libc is updated. // #[cfg(target_os = "linux")] diff --git a/mirrord/layer/src/file/ops.rs b/mirrord/layer/src/file/ops.rs index b3f2adb68be..e97c1e47d8d 100644 --- a/mirrord/layer/src/file/ops.rs +++ b/mirrord/layer/src/file/ops.rs @@ -382,7 +382,7 @@ pub(crate) fn fsync(fd: RawFd) -> Detour { /// that. /// rawish_path is Option> because we need to differentiate between null pointer /// and non existing argument (For error handling) -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] pub(crate) fn xstat( rawish_path: Option>, fd: Option, @@ -461,3 +461,63 @@ pub(crate) fn getdents64(fd: RawFd, buffer_size: u64) -> Detour PathBuf { + use std::path::Component; + let mut temp_path = PathBuf::new(); + temp_path.push("/"); + for c in path.components() { + match c { + Component::RootDir => {} + Component::CurDir => {} + Component::Normal(p) => temp_path.push(p), + Component::ParentDir => { + temp_path.pop(); + } + Component::Prefix(_) => {} + } + } + temp_path +} + +#[tracing::instrument(level = "trace")] +pub(crate) fn realpath(path: Detour) -> Detour { + let path = path?; + + if path.is_relative() { + // Calls with non absolute paths are sent to libc::open. + Detour::Bypass(Bypass::RelativePath(path.clone()))? + }; + + let realpath = absolute_path(path); + + ensure_not_ignored!(realpath, false); + + // check that file exists + xstat(Some(Detour::Success(realpath.clone())), None, true)?; + + Detour::Success(realpath) +} + +#[cfg(test)] +mod test { + use std::path::PathBuf; + + use super::absolute_path; + #[test] + fn test_absolute_normal() { + assert_eq!( + absolute_path(PathBuf::from("/a/b/c")), + PathBuf::from("/a/b/c") + ); + assert_eq!( + absolute_path(PathBuf::from("/a/b/../c")), + PathBuf::from("/a/c") + ); + assert_eq!( + absolute_path(PathBuf::from("/a/b/./c")), + PathBuf::from("/a/b/c") + ) + } +} diff --git a/mirrord/layer/src/lib.rs b/mirrord/layer/src/lib.rs index b76e903e168..8be5d7f2820 100644 --- a/mirrord/layer/src/lib.rs +++ b/mirrord/layer/src/lib.rs @@ -454,6 +454,7 @@ fn enable_hooks(enabled_file_ops: bool, enabled_remote_dns: bool, patch_binaries /// /// Removes the `fd` key from either [`SOCKETS`] or [`OPEN_FILES`]. /// **DON'T ADD LOGS HERE SINCE CALLER MIGHT CLOSE STDOUT/STDERR CAUSING THIS TO CRASH** +#[tracing::instrument(level = "trace")] pub(crate) fn close_layer_fd(fd: c_int) { // Remove from sockets. if let Some((_, socket)) = SOCKETS.remove(&fd) { diff --git a/mirrord/layer/tests/apps/realpath/realpath.c b/mirrord/layer/tests/apps/realpath/realpath.c new file mode 100644 index 00000000000..d426eb766d0 --- /dev/null +++ b/mirrord/layer/tests/apps/realpath/realpath.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include +#include + +// first case: path is relative, should be resolved locally +// we trust the runner to abort if it is not the case +void relative_locally() { + const char path[] = "./fun/me"; + char *resolved_path = realpath(path, NULL); + assert(resolved_path == NULL); + assert(errno == ENOENT); +} + + +// second case: path is absolute, should be resolved locally +// we trust the runner to abort if it is not the case +void absolute_locally() { + const char path[] = "/dev/good/cake"; + char *resolved_path = realpath(path, NULL); + assert(resolved_path == NULL); + assert(errno == ENOENT); +} + +// third case: path is absolute, should be resolved remotely +void absolute_remotely() { + // /etc/hosts is usually resolved remotely + const char path[] = "/etc/../etc/hosts"; + char output_path[PATH_MAX] = {0}; + char *resolved_path = realpath(path, output_path); + assert(resolved_path == output_path); + // assert that resolved path is handled correctly + printf("resolved_path\n %s", resolved_path); + assert(strcmp(resolved_path, "/etc/hosts") == 0); +} + +// fourth case: path is absolute, we put null in output buffer, should be resolved remotely and allocate for us, and we free it. +void absolute_remotely_malloc() { + // /etc/hosts is usually resolved remotely + const char path[] = "/etc/../etc/hosts"; + char *resolved_path = realpath(path, NULL); + assert(resolved_path != NULL); + // assert that resolved path is handled correctly + assert(strcmp(resolved_path, "/etc/hosts") == 0); + free(resolved_path); +} + +// fifth case: path is absolute, doesn't exist - should return null and set appropriate errno +void absolute_remotely_doesnt_exist() { + const char path[] = "/etc/../etc/hosts"; + char *resolved_path = realpath(path, NULL); + assert(resolved_path == NULL); + printf("%d ernno", errno); + assert(errno == ENOENT); +} + +/// Test few cases of using realpath +int main() { + relative_locally(); + absolute_locally(); + absolute_remotely(); + absolute_remotely_malloc(); + absolute_remotely_doesnt_exist(); +} diff --git a/mirrord/layer/tests/common/mod.rs b/mirrord/layer/tests/common/mod.rs index 896dae2d238..8748387dbf7 100644 --- a/mirrord/layer/tests/common/mod.rs +++ b/mirrord/layer/tests/common/mod.rs @@ -653,6 +653,7 @@ pub enum Application { OpenFile, CIssue2055, RustIssue2058, + Realpath, // For running applications with the executable and arguments determined at runtime. DynamicApp(String, Vec), } @@ -686,6 +687,7 @@ impl Application { | Application::PythonListen => Self::get_python3_executable().await, Application::PythonFastApiHTTP => String::from("uvicorn"), Application::Fork => String::from("tests/apps/fork/out.c_test_app"), + Application::Realpath => String::from("tests/apps/realpath/out.c_test_app"), Application::NodeHTTP => String::from("node"), Application::JavaTemurinSip => format!( "{}/.sdkman/candidates/java/17.0.6-tem/bin/java", @@ -865,6 +867,7 @@ impl Application { | Application::Go19FAccessAt | Application::Go18FAccessAt | Application::Fork + | Application::Realpath | Application::RustFileOps | Application::RustIssue1123 | Application::RustIssue1054 @@ -919,6 +922,7 @@ impl Application { | Application::NodeSpawn | Application::BashShebang | Application::Fork + | Application::Realpath | Application::Go20Issue834 | Application::Go19Issue834 | Application::Go18Issue834