commit 0fec7e4a742f001c9816a8b58a1120fb44230867 Author: aszlig Date: Thu May 16 14:17:56 2013 +0200 zygote: Add support for user namespaces on Linux. The implementation is done by patching the Zygote host to execute the sandbox binary with CLONE_NEWUSER and setting the uid and gid mapping so that the child process is using uid 0 and gid 0 which map to the current user of the parent. Afterwards, the sandbox will continue as if it was called as a setuid binary. In addition, this adds new_user_namespace as an option in process_util in order to set the UID and GID mapping correctly. The reason for this is that just passing CLONE_NEWUSER to clone_flags doesn't help in LaunchProcess(), because without setting the mappings exec*() will clear the process's capability sets. If the kernel doesn't support unprivileged user namespaces and the sandbox binary doesn't have the setuid flag, the Zygote main process will run without a sandbox. This is to mimic the behaviour if no SUID sandbox binary path is set. Signed-off-by: aszlig diff --git a/base/process/launch.cc b/base/process/launch.cc index 81748f5..930f20f 100644 --- a/base/process/launch.cc +++ b/base/process/launch.cc @@ -26,6 +26,7 @@ LaunchOptions::LaunchOptions() #if defined(OS_LINUX) , clone_flags(0) , allow_new_privs(false) + , new_user_namespace(false) #endif // OS_LINUX #if defined(OS_CHROMEOS) , ctrl_terminal_fd(-1) diff --git a/base/process/launch.h b/base/process/launch.h index 9e39fba..00e4c79 100644 --- a/base/process/launch.h +++ b/base/process/launch.h @@ -115,6 +115,9 @@ struct BASE_EXPORT LaunchOptions { // By default, child processes will have the PR_SET_NO_NEW_PRIVS bit set. If // true, then this bit will not be set in the new child process. bool allow_new_privs; + + // If true, start the process in a new user namespace. + bool new_user_namespace; #endif // defined(OS_LINUX) #if defined(OS_CHROMEOS) diff --git a/base/process/launch_posix.cc b/base/process/launch_posix.cc index 457234f..a99ce9b 100644 --- a/base/process/launch_posix.cc +++ b/base/process/launch_posix.cc @@ -40,6 +40,10 @@ #if defined(OS_LINUX) #include +#include +#if !defined(CLONE_NEWUSER) +#define CLONE_NEWUSER 0x10000000 +#endif #endif #if defined(OS_CHROMEOS) @@ -301,13 +305,23 @@ bool LaunchProcess(const std::vector& argv, pid_t pid; #if defined(OS_LINUX) - if (options.clone_flags) { + int map_pipe_fd[2]; + int flags = options.clone_flags; + + if (options.new_user_namespace) { + flags |= CLONE_NEWUSER; + if (pipe(map_pipe_fd) < 0) { + DPLOG(ERROR) << "user namespace pipe"; + return false; + } + } + + if (options.clone_flags || options.new_user_namespace) { // Signal handling in this function assumes the creation of a new // process, so we check that a thread is not being created by mistake // and that signal handling follows the process-creation rules. - RAW_CHECK( - !(options.clone_flags & (CLONE_SIGHAND | CLONE_THREAD | CLONE_VM))); - pid = syscall(__NR_clone, options.clone_flags, 0, 0, 0); + RAW_CHECK(!(flags & (CLONE_SIGHAND | CLONE_THREAD | CLONE_VM))); + pid = syscall(__NR_clone, flags, 0, 0, 0); } else #endif { @@ -328,6 +342,21 @@ bool LaunchProcess(const std::vector& argv, // DANGER: no calls to malloc or locks are allowed from now on: // http://crbug.com/36678 +#if defined(OS_LINUX) + if (options.new_user_namespace) { + // Close the write end of the pipe so we get an EOF when the parent closes + // the FD. This is to avoid race conditions when the UID/GID mappings are + // written _after_ execvp(). + close(map_pipe_fd[1]); + + char dummy; + if (HANDLE_EINTR(read(map_pipe_fd[0], &dummy, 1)) != 0) { + RAW_LOG(ERROR, "Unexpected input in uid/gid mapping pipe."); + _exit(127); + } + } +#endif + // DANGER: fork() rule: in the child, if you don't end up doing exec*(), // you call _exit() instead of exit(). This is because _exit() does not // call any previously-registered (in the parent) exit handlers, which @@ -452,6 +481,40 @@ bool LaunchProcess(const std::vector& argv, _exit(127); } else { // Parent process +#if defined(OS_LINUX) + if (options.new_user_namespace) { + // We need to write UID/GID mapping here to map the current user outside + // the namespace to the root user inside the namespace in order to + // correctly "fool" the child process. + char buf[256]; + int map_fd, map_len; + + snprintf(buf, sizeof(buf), "/proc/%d/uid_map", pid); + map_fd = open(buf, O_RDWR); + DPCHECK(map_fd >= 0); + snprintf(buf, sizeof(buf), "0 %d 1", geteuid()); + map_len = strlen(buf); + if (write(map_fd, buf, map_len) != map_len) { + RAW_LOG(WARNING, "Can't write to uid_map."); + } + close(map_fd); + + snprintf(buf, sizeof(buf), "/proc/%d/gid_map", pid); + map_fd = open(buf, O_RDWR); + DPCHECK(map_fd >= 0); + snprintf(buf, sizeof(buf), "0 %d 1", getegid()); + map_len = strlen(buf); + if (write(map_fd, buf, map_len) != map_len) { + RAW_LOG(WARNING, "Can't write to gid_map."); + } + close(map_fd); + + // Close the pipe on the parent, so the child can continue doing the + // execvp() call. + close(map_pipe_fd[1]); + } +#endif + if (options.wait) { // While this isn't strictly disk IO, waiting for another process to // finish is the sort of thing ThreadRestrictions is trying to prevent. diff --git a/content/browser/zygote_host/zygote_host_impl_linux.cc b/content/browser/zygote_host/zygote_host_impl_linux.cc index 9d63ad9..0885705 100644 --- a/content/browser/zygote_host/zygote_host_impl_linux.cc +++ b/content/browser/zygote_host/zygote_host_impl_linux.cc @@ -144,6 +144,9 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { // A non empty sandbox_cmd means we want a SUID sandbox. using_suid_sandbox_ = !sandbox_cmd.empty(); + bool userns_sandbox = false; + const std::vector cmd_line_unwrapped(cmd_line.argv()); + // Start up the sandbox host process and get the file descriptor for the // renderers to talk to it. const int sfd = RenderSandboxHostLinux::GetInstance()->GetRendererSocket(); @@ -156,11 +159,24 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { sandbox_client->PrependWrapper(&cmd_line); sandbox_client->SetupLaunchOptions(&options, &fds_to_map, &dummy_fd); sandbox_client->SetupLaunchEnvironment(); + userns_sandbox = sandbox_client->IsNoSuid(); } base::ProcessHandle process = -1; options.fds_to_remap = &fds_to_map; + if (userns_sandbox) + options.new_user_namespace = true; base::LaunchProcess(cmd_line.argv(), options, &process); + + if (process == -1 && userns_sandbox) { + LOG(ERROR) << "User namespace sandbox failed to start, running without " + << "sandbox! You need at least kernel 3.8.0 with CONFIG_USER_NS " + << "enabled in order to use the sandbox without setuid bit."; + using_suid_sandbox_ = false; + options.new_user_namespace = false; + base::LaunchProcess(cmd_line_unwrapped, options, &process); + } + CHECK(process != -1) << "Failed to launch zygote process"; dummy_fd.reset(); diff --git a/content/zygote/zygote_main_linux.cc b/content/zygote/zygote_main_linux.cc index 11f0602..b7b8214 100644 --- a/content/zygote/zygote_main_linux.cc +++ b/content/zygote/zygote_main_linux.cc @@ -389,6 +389,13 @@ static bool EnterSuidSandbox(sandbox::SetuidSandboxClient* setuid_sandbox) { CHECK(CreateInitProcessReaper()); } + // Don't set non-dumpable, as it causes trouble when the host tries to find + // the zygote process (XXX: Not quite sure why this happens with user + // namespaces). Fortunately, we also have the seccomp filter sandbox which + // should disallow the use of ptrace. + if (setuid_sandbox->IsNoSuid()) + return true; + #if !defined(OS_OPENBSD) // Previously, we required that the binary be non-readable. This causes the // kernel to mark the process as non-dumpable at startup. The thinking was diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.cc b/sandbox/linux/suid/client/setuid_sandbox_client.cc index fc03cdd..a972faa 100644 --- a/sandbox/linux/suid/client/setuid_sandbox_client.cc +++ b/sandbox/linux/suid/client/setuid_sandbox_client.cc @@ -229,6 +229,10 @@ bool SetuidSandboxClient::IsInNewNETNamespace() const { return env_->HasVar(kSandboxNETNSEnvironmentVarName); } +bool SetuidSandboxClient::IsNoSuid() const { + return env_->HasVar(kSandboxNoSuidVarName); +} + bool SetuidSandboxClient::IsSandboxed() const { return sandboxed_; } @@ -277,8 +281,7 @@ void SetuidSandboxClient::PrependWrapper(base::CommandLine* cmd_line) { "LinuxSUIDSandboxDevelopment."; } - if (access(sandbox_binary.c_str(), X_OK) != 0 || (st.st_uid != 0) || - ((st.st_mode & S_ISUID) == 0) || ((st.st_mode & S_IXOTH)) == 0) { + if (access(sandbox_binary.c_str(), X_OK) != 0) { LOG(FATAL) << "The SUID sandbox helper binary was found, but is not " "configured correctly. Rather than run without sandboxing " "I'm aborting now. You need to make sure that " @@ -286,6 +289,12 @@ void SetuidSandboxClient::PrependWrapper(base::CommandLine* cmd_line) { } cmd_line->PrependWrapper(sandbox_binary); + + if (!((st.st_uid == 0) && + (st.st_mode & S_ISUID) && + (st.st_mode & S_IXOTH))) { + env_->SetVar(kSandboxNoSuidVarName, "1"); + } } void SetuidSandboxClient::SetupLaunchOptions( diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.h b/sandbox/linux/suid/client/setuid_sandbox_client.h index 2bbad7a..8605475 100644 --- a/sandbox/linux/suid/client/setuid_sandbox_client.h +++ b/sandbox/linux/suid/client/setuid_sandbox_client.h @@ -66,6 +66,8 @@ class SANDBOX_EXPORT SetuidSandboxClient { bool IsInNewPIDNamespace() const; // Did the setuid helper create a new network namespace ? bool IsInNewNETNamespace() const; + // Is sandboxed without SUID binary ? + bool IsNoSuid() const; // Are we done and fully sandboxed ? bool IsSandboxed() const; diff --git a/sandbox/linux/suid/common/sandbox.h b/sandbox/linux/suid/common/sandbox.h index 9345287..2db659e 100644 --- a/sandbox/linux/suid/common/sandbox.h +++ b/sandbox/linux/suid/common/sandbox.h @@ -15,6 +15,7 @@ static const char kAdjustOOMScoreSwitch[] = "--adjust-oom-score"; static const char kSandboxDescriptorEnvironmentVarName[] = "SBX_D"; static const char kSandboxHelperPidEnvironmentVarName[] = "SBX_HELPER_PID"; +static const char kSandboxNoSuidVarName[] = "SBX_NO_SUID"; static const long kSUIDSandboxApiNumber = 1; static const char kSandboxEnvironmentApiRequest[] = "SBX_CHROME_API_RQ"; diff --git a/sandbox/linux/suid/sandbox.c b/sandbox/linux/suid/sandbox.c index 7410b71..a83593d 100644 --- a/sandbox/linux/suid/sandbox.c +++ b/sandbox/linux/suid/sandbox.c @@ -330,7 +330,7 @@ static bool DropRoot() { return false; } - if (setresgid(rgid, rgid, rgid)) { + if (egid != rgid && setresgid(rgid, rgid, rgid)) { perror("setresgid"); return false; }