summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongsu Park <dongsu@kinvolk.io>2017-11-24 18:22:17 +0100
committerIago López Galeiras <iago@kinvolk.io>2017-12-13 10:21:06 +0000
commitd7bea6b6299677fe0b5ddc73ff313f93c3d453c7 (patch)
tree2756c012eaa6a89d5e13669b2f754f49d3a924c5
parentcb9eeb062c323391fcd98da0c30e844fa5162e90 (diff)
downloadsystemd-d7bea6b6299677fe0b5ddc73ff313f93c3d453c7.tar.gz
systemd-d7bea6b6299677fe0b5ddc73ff313f93c3d453c7.tar.bz2
systemd-d7bea6b6299677fe0b5ddc73ff313f93c3d453c7.zip
nspawn: introduce an option for specifying network namespace path
Add a new option `--network-namespace-path` to systemd-nspawn to allow users to specify an arbitrary network namespace, e.g. `/run/netns/foo`. Then systemd-nspawn will open the netns file, pass the fd to outer_child, and enter the namespace represented by the fd before running inner_child. ``` $ sudo ip netns add foo $ mount | grep /run/netns/foo nsfs on /run/netns/foo type nsfs (rw) ... $ sudo systemd-nspawn -D /srv/fc27 --network-namespace-path=/run/netns/foo \ /bin/readlink -f /proc/self/ns/net /proc/1/ns/net:[4026532009] ``` Note that the option `--network-namespace-path=` cannot be used together with other network-related options such as `--private-network` so that the options do not conflict with each other. Fixes https://github.com/systemd/systemd/issues/7361
-rw-r--r--man/systemd-nspawn.xml17
-rw-r--r--src/basic/missing.h8
-rw-r--r--src/basic/stat-util.c12
-rw-r--r--src/basic/stat-util.h1
-rw-r--r--src/nspawn/nspawn.c155
5 files changed, 141 insertions, 52 deletions
diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index cd45ceb2a1..3dbdf376d3 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -523,6 +523,23 @@
</varlistentry>
<varlistentry>
+ <term><option>--network-namespace-path=</option></term>
+
+ <listitem><para>Takes the path to a file representing a kernel
+ network namespace that the container shall run in. The specified path
+ should refer to a (possibly bind-mounted) network namespace file, as
+ exposed by the kernel below <filename>/proc/$PID/ns/net</filename>.
+ This makes the container enter the given network namespace. One of the
+ typical use cases is to give a network namespace under
+ <filename>/run/netns</filename> created by <citerefentry
+ project='man-pages'><refentrytitle>ip-netns</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+ for example, <option>--network-namespace-path=/run/netns/foo</option>.
+ Note that this option cannot be used together with other
+ network-related options, such as <option>--private-network</option>
+ or <option>--network-interface=</option>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>--network-interface=</option></term>
<listitem><para>Assign the specified network interface to the
diff --git a/src/basic/missing.h b/src/basic/missing.h
index bbdded984f..790f9f55a5 100644
--- a/src/basic/missing.h
+++ b/src/basic/missing.h
@@ -1271,4 +1271,12 @@ struct fib_rule_uid_range {
# define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#endif
+#ifndef NSFS_MAGIC
+#define NSFS_MAGIC 0x6e736673
+#endif
+
+#ifndef NS_GET_NSTYPE
+#define NS_GET_NSTYPE _IO(0xb7, 0x3)
+#endif
+
#include "missing_syscall.h"
diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c
index c6b8507e9d..96fc8b3787 100644
--- a/src/basic/stat-util.c
+++ b/src/basic/stat-util.c
@@ -226,6 +226,18 @@ int fd_is_temporary_fs(int fd) {
return is_temporary_fs(&s);
}
+int fd_is_network_ns(int fd) {
+ int r;
+
+ r = fd_is_fs_type(fd, NSFS_MAGIC);
+ if (r <= 0)
+ return r;
+ r = ioctl(fd, NS_GET_NSTYPE);
+ if (r < 0)
+ return -errno;
+ return r == CLONE_NEWNET;
+}
+
int path_is_temporary_fs(const char *path) {
_cleanup_close_ int fd = -1;
diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h
index 8b8d128121..d8d3c20496 100644
--- a/src/basic/stat-util.h
+++ b/src/basic/stat-util.h
@@ -62,6 +62,7 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
bool is_temporary_fs(const struct statfs *s) _pure_;
int fd_is_temporary_fs(int fd);
+int fd_is_network_ns(int fd);
int path_is_temporary_fs(const char *path);
/* Because statfs.t_type can be int on some architectures, we have to cast
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index f217def92d..64819b5e85 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -190,6 +190,7 @@ static bool arg_network_veth = false;
static char **arg_network_veth_extra = NULL;
static char *arg_network_bridge = NULL;
static char *arg_network_zone = NULL;
+static char *arg_network_namespace_path = NULL;
static unsigned long arg_personality = PERSONALITY_INVALID;
static char *arg_image = NULL;
static VolatileMode arg_volatile_mode = VOLATILE_NO;
@@ -260,6 +261,9 @@ static void help(void) {
" and attach it to an existing bridge on the host\n"
" --network-zone=NAME Similar, but attach the new interface to an\n"
" an automatically managed bridge interface\n"
+ " --network-namespace-path=PATH\n"
+ " Set network namespace to the one represented by\n"
+ " the specified kernel namespace file node\n"
" -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
" Expose a container IP port on the host\n"
" -Z --selinux-context=SECLABEL\n"
@@ -434,6 +438,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_NETWORK_BRIDGE,
ARG_NETWORK_ZONE,
ARG_NETWORK_VETH_EXTRA,
+ ARG_NETWORK_NAMESPACE_PATH,
ARG_PERSONALITY,
ARG_VOLATILE,
ARG_TEMPLATE,
@@ -450,55 +455,56 @@ static int parse_argv(int argc, char *argv[]) {
};
static const struct option options[] = {
- { "help", no_argument, NULL, 'h' },
- { "version", no_argument, NULL, ARG_VERSION },
- { "directory", required_argument, NULL, 'D' },
- { "template", required_argument, NULL, ARG_TEMPLATE },
- { "ephemeral", no_argument, NULL, 'x' },
- { "user", required_argument, NULL, 'u' },
- { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
- { "as-pid2", no_argument, NULL, 'a' },
- { "boot", no_argument, NULL, 'b' },
- { "uuid", required_argument, NULL, ARG_UUID },
- { "read-only", no_argument, NULL, ARG_READ_ONLY },
- { "capability", required_argument, NULL, ARG_CAPABILITY },
- { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
- { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
- { "bind", required_argument, NULL, ARG_BIND },
- { "bind-ro", required_argument, NULL, ARG_BIND_RO },
- { "tmpfs", required_argument, NULL, ARG_TMPFS },
- { "overlay", required_argument, NULL, ARG_OVERLAY },
- { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
- { "machine", required_argument, NULL, 'M' },
- { "slice", required_argument, NULL, 'S' },
- { "setenv", required_argument, NULL, 'E' },
- { "selinux-context", required_argument, NULL, 'Z' },
- { "selinux-apifs-context", required_argument, NULL, 'L' },
- { "quiet", no_argument, NULL, 'q' },
- { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
- { "register", required_argument, NULL, ARG_REGISTER },
- { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
- { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
- { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
- { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
- { "network-veth", no_argument, NULL, 'n' },
- { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
- { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
- { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
- { "personality", required_argument, NULL, ARG_PERSONALITY },
- { "image", required_argument, NULL, 'i' },
- { "volatile", optional_argument, NULL, ARG_VOLATILE },
- { "port", required_argument, NULL, 'p' },
- { "property", required_argument, NULL, ARG_PROPERTY },
- { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
- { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
- { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
- { "settings", required_argument, NULL, ARG_SETTINGS },
- { "chdir", required_argument, NULL, ARG_CHDIR },
- { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
- { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
- { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
- { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "directory", required_argument, NULL, 'D' },
+ { "template", required_argument, NULL, ARG_TEMPLATE },
+ { "ephemeral", no_argument, NULL, 'x' },
+ { "user", required_argument, NULL, 'u' },
+ { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
+ { "as-pid2", no_argument, NULL, 'a' },
+ { "boot", no_argument, NULL, 'b' },
+ { "uuid", required_argument, NULL, ARG_UUID },
+ { "read-only", no_argument, NULL, ARG_READ_ONLY },
+ { "capability", required_argument, NULL, ARG_CAPABILITY },
+ { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
+ { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
+ { "bind", required_argument, NULL, ARG_BIND },
+ { "bind-ro", required_argument, NULL, ARG_BIND_RO },
+ { "tmpfs", required_argument, NULL, ARG_TMPFS },
+ { "overlay", required_argument, NULL, ARG_OVERLAY },
+ { "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
+ { "machine", required_argument, NULL, 'M' },
+ { "slice", required_argument, NULL, 'S' },
+ { "setenv", required_argument, NULL, 'E' },
+ { "selinux-context", required_argument, NULL, 'Z' },
+ { "selinux-apifs-context", required_argument, NULL, 'L' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
+ { "register", required_argument, NULL, ARG_REGISTER },
+ { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
+ { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
+ { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
+ { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
+ { "network-veth", no_argument, NULL, 'n' },
+ { "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
+ { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
+ { "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
+ { "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
+ { "personality", required_argument, NULL, ARG_PERSONALITY },
+ { "image", required_argument, NULL, 'i' },
+ { "volatile", optional_argument, NULL, ARG_VOLATILE },
+ { "port", required_argument, NULL, 'p' },
+ { "property", required_argument, NULL, ARG_PROPERTY },
+ { "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
+ { "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
+ { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
+ { "settings", required_argument, NULL, ARG_SETTINGS },
+ { "chdir", required_argument, NULL, ARG_CHDIR },
+ { "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
+ { "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
+ { "root-hash", required_argument, NULL, ARG_ROOT_HASH },
+ { "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
{}
};
@@ -644,6 +650,13 @@ static int parse_argv(int argc, char *argv[]) {
arg_settings_mask |= SETTING_NETWORK;
break;
+ case ARG_NETWORK_NAMESPACE_PATH:
+ r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
+ if (r < 0)
+ return r;
+
+ break;
+
case 'b':
if (arg_start_mode == START_PID2) {
log_error("--boot and --as-pid2 may not be combined.");
@@ -1103,6 +1116,17 @@ static int parse_argv(int argc, char *argv[]) {
assert_not_reached("Unhandled option");
}
+ /* If --network-namespace-path is given with any other network-related option,
+ * we need to error out, to avoid conflicts between different network options. */
+ if (arg_network_namespace_path &&
+ (arg_network_interfaces || arg_network_macvlan ||
+ arg_network_ipvlan || arg_network_veth_extra ||
+ arg_network_bridge || arg_network_zone ||
+ arg_network_veth || arg_private_network)) {
+ log_error("--network-namespace-path cannot be combined with other network options.");
+ return -EINVAL;
+ }
+
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
@@ -2532,12 +2556,14 @@ static int outer_child(
int rtnl_socket,
int uid_shift_socket,
int unified_cgroup_hierarchy_socket,
- FDSet *fds) {
+ FDSet *fds,
+ int netns_fd) {
pid_t pid;
ssize_t l;
int r;
_cleanup_close_ int fd = -1;
+ bool create_netns;
assert(barrier);
assert(directory);
@@ -2788,9 +2814,11 @@ static int outer_child(
if (fd < 0)
return fd;
+ create_netns = !arg_network_namespace_path && arg_private_network;
+
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
arg_clone_ns_flags |
- (arg_private_network ? CLONE_NEWNET : 0) |
+ (create_netns ? CLONE_NEWNET : 0) |
(arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
if (pid < 0)
return log_error_errno(errno, "Failed to fork inner child: %m");
@@ -2804,6 +2832,12 @@ static int outer_child(
* requested, so that we all are owned by the user if
* user namespaces are turned on. */
+ if (arg_network_namespace_path) {
+ r = namespace_enter(-1, -1, netns_fd, -1, -1);
+ if (r < 0)
+ return r;
+ }
+
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
if (r < 0)
_exit(EXIT_FAILURE);
@@ -2836,6 +2870,7 @@ static int outer_child(
notify_socket = safe_close(notify_socket);
kmsg_socket = safe_close(kmsg_socket);
rtnl_socket = safe_close(rtnl_socket);
+ netns_fd = safe_close(netns_fd);
return 0;
}
@@ -3311,6 +3346,7 @@ static int run(int master,
int ifi = 0, r;
ssize_t l;
sigset_t mask_chld;
+ _cleanup_close_ int netns_fd = -1;
assert_se(sigemptyset(&mask_chld) == 0);
assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
@@ -3365,6 +3401,20 @@ static int run(int master,
if (r < 0)
return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
+ if (arg_network_namespace_path) {
+ netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netns_fd < 0)
+ return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
+
+ r = fd_is_network_ns(netns_fd);
+ if (r < 0 && r != -ENOTTY)
+ return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
+ if (r == 0) {
+ log_error("Path %s doesn't refer to a network namespace", arg_network_namespace_path);
+ return -EINVAL;
+ }
+ }
+
*pid = raw_clone(SIGCHLD|CLONE_NEWNS);
if (*pid < 0)
return log_error_errno(errno, "clone() failed%s: %m",
@@ -3401,7 +3451,8 @@ static int run(int master,
rtnl_socket_pair[1],
uid_shift_socket_pair[1],
unified_cgroup_hierarchy_socket_pair[1],
- fds);
+ fds,
+ netns_fd);
if (r < 0)
_exit(EXIT_FAILURE);