Imported Upstream version 2.69upstream/2.69 upstream

author: Dariusz Michaluk <d.michaluk@samsung.com> 2024-02-14 13:15:33 +0100
committer: Dariusz Michaluk <d.michaluk@samsung.com> 2024-02-14 13:15:33 +0100
commit: 46a71608a1c1f277922adf2a82c4ab1e4d7ad320 (patch)
tree: 13e7d9070ff0f463f1ebb644cf5afa3d90277043
parent: bcca19a055cbdb0d67985ac08fef491f6d4bb42b (diff)
download: libcap-upstream.tar.gz
libcap-upstream.tar.bz2
libcap-upstream.zip
258 files changed, 20505 insertions, 1334 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8698f19
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+patches/
+*.o
+*~
+*.cf
diff --git a/CHANGELOG b/CHANGELOG
index 70e5faf..5489c48 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,17 +1,15 @@
-For release notes and other info pointers:
+For release notes and other info pointers for submitting patches etc.:
 
-  http://sites.google.com/site/fullycapable/
+	http://sites.google.com/site/fullycapable/
 
-See GIT repository for detailed source history
+See GIT repository for detailed source history:
 
-  https://git.kernel.org/cgit/linux/kernel/git/morgan/libcap.git/
+	https://git.kernel.org/pub/scm/libs/libcap/libcap.git/
 
 Or simply download the source:
 
-  git clone git://git.kernel.org/pub/scm/linux/kernel/git/morgan/libcap.git
+	git clone git://git.kernel.org/pub/scm/libs/libcap/libcap.git
 
 The license for this library is here:
 
-  https://git.kernel.org/cgit/linux/kernel/git/morgan/libcap.git/tree/License
-
-please submit patches compatible with this to morgan at kernel.org.
+	https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/License
diff --git a/License b/License
index 8a352bc..2398977 100644
--- a/License
+++ b/License
@@ -1,8 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
+
 Unless otherwise *explicitly* stated, the following text describes the
 licensed conditions under which the contents of this libcap release
-may be used and distributed:
+may be used and distributed.
+
+The licensed conditions are one or the other of these two Licenses:
+
+  - BSD 3-clause
+  - GPL v2.0
 
 -------------------------------------------------------------------------
+BSD 3-clause:
+-------------
+
 Redistribution and use in source and binary forms of libcap, with
 or without modification, are permitted provided that the following
 conditions are met:
@@ -20,13 +30,6 @@ conditions are met:
    products derived from this software without their specific prior
    written permission.
 
-ALTERNATIVELY, this product may be distributed under the terms of the
-GNU General Public License (v2.0 - see below), in which case the
-provisions of the GNU GPL are required INSTEAD OF the above
-restrictions.  (This clause is necessary due to a potential conflict
-between the GNU GPL and the restrictions contained in a BSD-style
-copyright.)
-
 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
@@ -38,7 +41,15 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 DAMAGE.
+
 -------------------------------------------------------------------------
+GPL v2.0:
+---------
+
+ALTERNATIVELY, this product may be distributed under the terms of the
+GNU General Public License (v2.0 - see below), in which case the
+provisions of the GNU GPL are required INSTEAD OF the above
+restrictions.
 
 -------------------------
 Full text of gpl-2.0.txt:
diff --git a/Make.Rules b/Make.Rules
index ec1c474..721fc7a 100644
--- a/Make.Rules
+++ b/Make.Rules
@@ -1,8 +1,13 @@
+# Common version number defines for libcap
+LIBTITLE=libcap
+VERSION=2
+MINOR=69
+
 #
 ## Optional prefixes:
 #
 
-# common 'packaging' directoty
+# common 'packaging' directory
 
 FAKEROOT=$(DESTDIR)
 
@@ -13,7 +18,15 @@ FAKEROOT=$(DESTDIR)
 # administrative operations that could be needed to recover a system.
 
 ifndef lib
-lib=$(shell ldd /usr/bin/ld|egrep "ld-linux|ld.so"|cut -d/ -f2)
+lib=$(shell ldd /usr/bin/ld|grep -E "ld-linux|ld.so"|cut -d/ -f2)
+endif
+
+ifndef sbin
+sbin=sbin
+endif
+
+ifdef sbindir
+sbin=$(sbindir)
 endif
 
 ifdef prefix
@@ -31,55 +44,156 @@ endif
 
 # Target directories
 
-MANDIR=$(FAKEROOT)$(man_prefix)/man
-SBINDIR=$(FAKEROOT)$(exec_prefix)/sbin
-INCDIR=$(FAKEROOT)$(inc_prefix)/include
-LIBDIR=$(FAKEROOT)$(lib_prefix)/$(lib)
-PKGCONFIGDIR=$(FAKEROOT)$(prefix)/$(lib)/pkgconfig
+MANDIR=$(man_prefix)/man
+SBINDIR=$(exec_prefix)/$(sbin)
+INCDIR=$(inc_prefix)/include
+LIBDIR=$(lib_prefix)/$(lib)
+PKGCONFIGDIR=$(LIBDIR)/pkgconfig
+GOPKGDIR=$(prefix)/share/gocode/src
 
-# common defines for libcap
-LIBTITLE=libcap
-VERSION=2
-MINOR=24
-#
+# From here on out, the Go module packages should always remain
+# backwardly compatible. I will only resort to using major version 2
+# etc if Go's syntax dramatically changes in a backwards incompatible
+# manner. (Let's hope not. If that happens, I'll also drop deprecated
+# API functions.)
+GOMAJOR=1
 
 # Compilation specifics
 
 KERNEL_HEADERS := $(topdir)/libcap/include/uapi
-IPATH += -fPIC -I$(KERNEL_HEADERS) -I$(topdir)/libcap/include
-
-CC := gcc
-CFLAGS := -O2 -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-BUILD_CC := $(CC)
-BUILD_CFLAGS := $(CFLAGS) $(IPATH)
-AR := ar
-RANLIB := ranlib
-DEBUG = -g #-DDEBUG
-WARNINGS=-Wall -Wwrite-strings \
-        -Wpointer-arith -Wcast-qual -Wcast-align \
-        -Wstrict-prototypes -Wmissing-prototypes \
-        -Wnested-externs -Winline -Wshadow
-LD=$(CC) -Wl,-x -shared
-LDFLAGS := #-g
-
+LIBCAP_INCLUDES = -I$(KERNEL_HEADERS) -I$(topdir)/libcap/include
+DEFINES := -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 SYSTEM_HEADERS = /usr/include
+
+SUDO := sudo
+CC := $(CROSS_COMPILE)gcc
+LD := $(CC) -Wl,-x -shared
+AR := $(CROSS_COMPILE)ar
+RANLIB := $(CROSS_COMPILE)ranlib
+OBJCOPY := $(CROSS_COMPILE)objcopy
+
+# Reference:
+#   CPPFLAGS used for building .o files from .c & .h files
+#   CFLAGS used when building libraries from .o, .c and .h files
+
+DEBUG = # -g -DDEBUG
+WARNINGS=-Wall -Wwrite-strings -Wpointer-arith -Wcast-qual -Wcast-align \
+	-Wstrict-prototypes -Wmissing-prototypes -Wnested-externs       \
+	-Winline -Wshadow -Wunreachable-code
+COPTS ?= -O2
+CFLAGS ?= $(COPTS) $(DEBUG)
+CFLAGS += $(WARNINGS)
+CPPFLAGS += -Dlinux $(DEFINES) $(LIBCAP_INCLUDES)
+LDFLAGS ?= # -g
+
+BUILD_CC ?= $(CC)
+BUILD_LD ?= $(BUILD_CC) -Wl,-x -shared
+BUILD_COPTS ?= $(COPTS)
+BUILD_CFLAGS ?= $(BUILD_COPTS)
+BUILD_CPPFLAGS += -Dlinux $(WARNINGS) $(DEBUG) $(DEFINES) $(LIBCAP_INCLUDES)
+BUILD_LDFLAGS ?= $(LDFLAGS)
+BUILD_SED ?= sed
+BUILD_GREP ?= grep
+BUILD_EGREP ?= $(BUILD_GREP) -E
+BUILD_FGREP ?= $(BUILD_GREP) -F
+
+# Plan to eventually redefine BUILD_GPERF to be the actual gperf tool
+# alias as per above. Typical distributions are upto a year behind
+# HEAD so we'll not do that before 2023-01-01.
+ifdef BUILD_GPERF
+$(error BUILD_GPERF is now reserved, please use USE_GPERF=yes or no instead)
+endif
+
+USE_GPERF ?= $(shell which gperf >/dev/null 2>/dev/null && echo yes)
+
+LIBCAPLIB := -L$(topdir)/libcap -lcap
+PSXLINKFLAGS :=  -lpthread -Wl,-wrap,pthread_create
+LIBPSXLIB := -L$(topdir)/libcap -lpsx $(PSXLINKFLAGS)
+
 INCS=$(topdir)/libcap/include/sys/capability.h
-LDFLAGS += -L$(topdir)/libcap
-CFLAGS += -Dlinux $(WARNINGS) $(DEBUG)
-PAM_CAP := $(shell if [ -f /usr/include/security/pam_modules.h ]; then echo yes ; else echo no ; fi)
-INDENT := $(shell if [ -n "$(which indent 2>/dev/null)" ]; then echo "| indent -kr" ; fi)
-DYNAMIC := $(shell if [ ! -d "$(topdir)/.git" ]; then echo yes; fi)
-LIBATTR := yes
-
-# When installing setcap, set its inheritable bit to be able to place
-# capabilities on files. It can be used in conjunction with pam_cap
-# (associated with su and certain users say) to make it useful for
-# specially blessed users. If you wish to drop this install feature,
-# use this command when running install
-#
-#    make RAISE_SETFCAP=no install
-#
-RAISE_SETFCAP := $(LIBATTR)
+INDENT := $(shell if [ -n "$$(which indent 2>/dev/null)" ]; then echo "| indent -kr" ; fi)
+
+# SHARED tracks whether or not the SHARED libraries (libcap.so,
+# libpsx.so and pam_cap.so) are built. (Some environments don't
+# support shared libraries.)
+SHARED ?= yes
+# DYNAMIC controls how capsh etc are linked - to shared or static libraries
+# Force enabled with "make DYNAMIC=yes ...".
+DYNAMIC := $(shell if [ ! -d "$(topdir)/.git" ]; then echo $(SHARED); else echo no ; fi)
+
+PAM_CAP ?= $(shell if [ -f /usr/include/security/pam_modules.h ]; then echo $(SHARED) ; else echo no ; fi)
+
+# If your system does not support pthreads, override this as "no".
+#
+#    make PTHREADS=no ...
+#
+# This implies no Go support and no C/C++ libpsx build. Why might you
+# need libpsx for non-Go use? Tl;dr for POSIX semantics security:
+#
+#    https://sites.google.com/site/fullycapable/who-ordered-libpsx
+#
+PTHREADS ?= yes
+
+ifeq ($(PTHREADS),yes)
+GO ?= go
+GOLANG ?= $(shell if [ -n "$(shell $(GO) version 2>/dev/null)" ]; then echo yes ; else echo no ; fi)
+ifeq ($(GOLANG),yes)
+GOROOT ?= $(shell $(GO) env GOROOT)
+GOCGO ?= $(shell if [ "$(shell $(GO) env CGO_ENABLED)" = 1 ]; then echo yes ; else echo no ; fi)
+GOOSARCH ?= $(shell $(GO) env GOHOSTOS)_$(shell $(GO) env GOHOSTARCH)
+CGO_REQUIRED := $(shell $(topdir)/go/cgo-required.sh $(GO))
+ifeq ($(CGO_REQUIRED),1)
+# Strictly speaking go1.15 doesn't need this, but 1.16 is when the
+# real golang support arrives for non-cgo support, so drop the last
+# vestige of legacy workarounds then.
+CGO_LDFLAGS_ALLOW := CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+endif
+CGO_CFLAGS := $(LIBCAP_INCLUDES)
+CGO_LDFLAGS := -L$(topdir)/libcap
+GO_BUILD_FLAGS :=
+endif
+endif
+
+# If you want capsh to launch with something other than /bin/bash
+# build like this:
+#
+#   make CAPSH_SHELL='-DSHELL=\"/bin/sh\"'
+#
+# or undefine the following:
+#CAPSH_SHELL := '-DSHELL="/bin/sh"'
+
+# When installing setcap, you can arrange for the installation process
+# to set its inheritable bit to be able to place capabilities on files.
+# It can be used in conjunction with pam_cap (associated with su and
+# certain users say) to make it useful for specially blessed users.
+#
+#    make RAISE_SETFCAP=yes install
+#
+# This is now defaulted to no because some distributions started
+# shipping with all users blessed with full inheritable sets which
+# makes no sense whatsoever!
+#
+# Indeed, it looked alarmingly like these distributions were recreating
+# the environment for what became known as the sendmail-capabilities
+# bug from 2000:
+#
+#  https://sites.google.com/site/fullycapable/Home/thesendmailcapabilitiesissue
+#
+# they are also nullifying the difference between a p-bit and an i-bit.
+#
+# Folk really should read this document, which explains there is a really
+# important difference being lost here:
+#
+#  https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/33528.pdf
+#
+# In the context of this tree, on such such systems, a yes setting will
+# guarantee that every user, by default, is able to bless any binary with
+# any capability - a ready made local exploit mechanism.
+RAISE_SETFCAP := no
+
+# If set to yes, this will cause the go "web" demo app to force the needed p
+# bit to be able to bind to port 80 without running as root.
+RAISE_GO_FILECAP := no
 
 # Global cleanup stuff
 
diff --git a/Makefile b/Makefile
index 124d10d..fca2b58 100644
--- a/Makefile
+++ b/Makefile
@@ -8,16 +8,25 @@ include Make.Rules
 # flags
 #
 
-all install clean: %: %-here
+all test sudotest install clean: %: %-here
 	$(MAKE) -C libcap $@
 ifneq ($(PAM_CAP),no)
 	$(MAKE) -C pam_cap $@
 endif
+ifeq ($(GOLANG),yes)
+	$(MAKE) -C go $@
+	rm -f cap/go.sum
+endif
+	$(MAKE) -C tests $@
 	$(MAKE) -C progs $@
 	$(MAKE) -C doc $@
 
 all-here:
 
+test-here:
+
+sudotest-here:
+
 install-here:
 
 clean-here:
@@ -25,13 +34,52 @@ clean-here:
 
 distclean: clean
 	$(DISTCLEAN)
+	@echo "CONFIRM Go package cap has right version dependency on cap/psx:"
+	for x in $$(find . -name go.mod); do $(BUILD_FGREP) -v "module" $$x | $(BUILD_FGREP) "kernel.org/pub/linux/libs/security/libcap" > /dev/null || continue ; $(BUILD_FGREP) "v$(GOMAJOR).$(VERSION).$(MINOR)" $$x  > /dev/null && continue ; echo "$$x is not updated. Try running: ./gomods.sh v$(GOMAJOR).$(VERSION).$(MINOR)" ; exit 1 ; done
+	@echo "ALL go.mod files updated"
+	@echo "Confirm headers export current version"
+	$(BUILD_FGREP) "#define LIBCAP_MAJOR $(VERSION)" libcap/include/sys/capability.h
+	$(BUILD_FGREP) "#define LIBCAP_MINOR $(MINOR)" libcap/include/sys/capability.h
+	@echo "Now validate that everything is checked in to a clean tree.."
+	test -z "$$(git status --ignored -s)"
+	@echo "All good!"
 
 release: distclean
-	cd .. && ln -s libcap libcap-$(VERSION).$(MINOR) && tar cvf libcap-$(VERSION).$(MINOR).tar libcap-$(VERSION).$(MINOR)/* && rm libcap-$(VERSION).$(MINOR)
-	cd .. && gpg -sba -u E2CCF3F4 libcap-$(VERSION).$(MINOR).tar
+	cd .. && ln -s libcap libcap-$(VERSION).$(MINOR) && tar cvf libcap-$(VERSION).$(MINOR).tar --exclude patches libcap-$(VERSION).$(MINOR)/* && rm libcap-$(VERSION).$(MINOR)
 
-tagrelease: distclean
-	@echo "sign the tag twice: older DSA key; and newer RSA kernel.org key"
-	git tag -u D41A6DF2 -s libcap-$(VERSION).$(MINOR)
-	git tag -u E2CCF3F4 -s libcap-korg-$(VERSION).$(MINOR)
-	make release
+ktest: all
+	$(MAKE) -C kdebug test
+
+distcheck: distclean
+	./distcheck.sh
+	$(MAKE) DYNAMIC=no COPTS="-D_FORTIFY_SOURCE=2 -O1 -g" clean test
+	$(MAKE) DYNAMIC=yes clean all test sudotest
+	$(MAKE) DYNAMIC=no COPTS="-O2 -std=c89" clean all test sudotest
+	$(MAKE) PAM_CAP=no CC=musl-gcc clean all test sudotest
+	$(MAKE) CC=clang clean all test sudotest
+	$(MAKE) clean all test sudotest
+	$(MAKE) distclean
+
+morgangodoc:
+	@echo "Now the release is made, you want to remember to run one of:"
+	@echo
+	@echo "  GOPROXY=https://proxy.golang.org GO111MODULE=on go get kernel.org/pub/linux/libs/security/libcap/cap@v$(GOMAJOR).$(VERSION).$(MINOR)"
+	@echo
+	@echo or press the request button on this page:
+	@echo
+	@echo "  https://pkg.go.dev/kernel.org/pub/linux/libs/security/libcap/cap@v$(GOMAJOR).$(VERSION).$(MINOR)"
+	@echo
+	@echo "This will cause a go.dev documentation update."
+
+morganrelease: distcheck
+	@echo "sign the main library tag twice: older DSA key; and newer RSA (kernel.org) key"
+	git tag -u D41A6DF2 -s libcap-$(VERSION).$(MINOR) -m "This is libcap-$(VERSION).$(MINOR)"
+	git tag -u E2CCF3F4 -s libcap-korg-$(VERSION).$(MINOR) -m "This is libcap-$(VERSION).$(MINOR)"
+	@echo "The following are for the Go module tracking."
+	git tag -u D41A6DF2 -s v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the version tag for the 'libcap' Go base directory associated with libcap-$(VERSION).$(MINOR)."
+	git tag -u D41A6DF2 -s psx/v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the (stable) version tag for the 'psx' Go package associated with libcap-$(VERSION).$(MINOR)."
+	git tag -u D41A6DF2 -s cap/v$(GOMAJOR).$(VERSION).$(MINOR) -m "This is the (stable) version tag for the 'cap' Go package associated with libcap-$(VERSION).$(MINOR)."
+	$(MAKE) release
+	@echo "sign the tar file using korg key"
+	cd .. && gpg -sba -u E2CCF3F4 libcap-$(VERSION).$(MINOR).tar
+	$(MAKE) morgangodoc
diff --git a/README b/README
index 7356eac..9c4a3ea 100644
--- a/README
+++ b/README
@@ -1,28 +1,56 @@
 This is a library for getting and setting POSIX.1e (formerly POSIX 6)
 draft 15 capabilities.
 
+Natively supported languages are C/C++ and Go.
+
 This library would not have been possible without the help of 
 
-	Aleph1, Roland Buresund and Andrew Main, Alexander Kjeldaas.
+    Aleph1, Roland Buresund and Andrew Main, Alexander Kjeldaas.
+
+More information on capabilities in the Linux kernel, links to the
+official git repository for libcap, release notes and how to report
+bugs can be found at:
+
+    http://sites.google.com/site/fullycapable/
+
+The primary upstream git repository is this one:
+
+    https://git.kernel.org/pub/scm/libs/libcap/libcap.git/
+
+# BUILDING AND INSTALLATION
+
+    $ make
+
+       builds the library and the programs that are expected to work
+       on your system. For example, if you have Linux-PAM installed,
+       pam_cap is built. A golang installation is required to build
+       the Go packages.
+
+    $ make test
 
-More information on capabilities in the Linux kernel can be found at
+       runs all of the tests not requiring privilege
 
-	http://sites.google.com/site/fullycapable/
+    $ make sudotest
 
-# INSTALLATION
+       runs all of the tests including those that require privilege.
 
-	Linux-Caps % make
+    $ sudo make install
 
-		builds the library and the programs
+       default installs the library libcap.XX.Y in /lib[64]/
+       the binaries in /sbin/
+       the header files in /usr/include
+       the {libcap,libpsx}.pc files in /usr/lib[64]/pkgconfig
+       the Go packages (if built) under /usr/share/gocode/src
 
-	Linux-Caps % make install
+For some example C programs look in the progs/ directory.
+Specifically, capsh, getpcaps, setcap and getcap. There are some C
+tests in the tests/ directory.
 
-		installs the library libcap.XX.Y in /lib[64]/
-		the binaries in /sbin/
-		the <sys/capability.h> file in /usr/include
-		the libcap.pc file in /usr/lib[64]/pkgconfig
+Go example programs are to be found in the goapps/ directory. There
+are also some more complicated integration tests in the go/ directory.
 
-* for some example programs look in progs.
+There are also some oddball experimental things in the contrib/
+directory, but they are mostly curiosities.
 
 Cheers
 
diff --git a/cap/License b/cap/License
new file mode 100644
index 0000000..095f754
--- /dev/null
+++ b/cap/License
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
+
+Unless otherwise *explicitly* stated, the following text describes the
+licensed conditions under which the contents of this libcap/cap release
+may be used and distributed.
+
+The licensed conditions are one or the other of these two Licenses:
+
+  - BSD 3-clause
+  - GPL v2.0
+
+-------------------------------------------------------------------------
+BSD 3-clause:
+-------------
+
+Redistribution and use in source and binary forms of libcap/cap, with
+or without modification, are permitted provided that the following
+conditions are met:
+
+1. Redistributions of source code must retain any existing copyright
+   notice, and this entire permission notice in its entirety,
+   including the disclaimer of warranties.
+
+2. Redistributions in binary form must reproduce all prior and current
+   copyright notices, this list of conditions, and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+3. The name of any author may not be used to endorse or promote
+   products derived from this software without their specific prior
+   written permission.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+-------------------------------------------------------------------------
+GPL v2.0:
+---------
+
+ALTERNATIVELY, this product may be distributed under the terms of the
+GNU General Public License (v2.0 - see below), in which case the
+provisions of the GNU GPL are required INSTEAD OF the above
+restrictions.  (This clause is necessary due to a potential conflict
+between the GNU GPL and the restrictions contained in a BSD-style
+copyright.)
+
+-------------------------
+Full text of gpl-2.0.txt:
+-------------------------
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/cap/README b/cap/README
new file mode 100644
index 0000000..f257d44
--- /dev/null
+++ b/cap/README
@@ -0,0 +1,10 @@
+Package cap is the libcap API for Linux Capabilities written in
+Go. The official release announcement site for libcap is:
+
+   https://sites.google.com/site/fullycapable/
+
+Like libcap, the cap package is distributed with a "you choose"
+License. Specifically: BSD three clause, or GPL2. See the License
+file.
+
+Andrew G. Morgan <morgan@kernel.org>
diff --git a/cap/cap.go b/cap/cap.go
new file mode 100644
index 0000000..784bf65
--- /dev/null
+++ b/cap/cap.go
@@ -0,0 +1,503 @@
+// Package cap provides all the Linux Capabilities userspace library API
+// bindings in native Go.
+//
+// Capabilities are a feature of the Linux kernel that allow fine
+// grain permissions to perform privileged operations. Privileged
+// operations are required to do irregular system level operations
+// from code. You can read more about how Capabilities are intended to
+// work here:
+//
+//   https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/33528.pdf
+//
+// This package supports native Go bindings for all the features
+// described in that paper as well as supporting subsequent changes to
+// the kernel for other styles of inheritable Capability.
+//
+// Some simple things you can do with this package are:
+//
+//   // Read and display the capabilities of the running process
+//   c := cap.GetProc()
+//   log.Printf("this process has these caps:", c)
+//
+//   // Drop any privilege a process might have (including for root,
+//   // but note root 'owns' a lot of system files so a cap-limited
+//   // root can still do considerable damage to a running system).
+//   old := cap.GetProc()
+//   empty := cap.NewSet()
+//   if err := empty.SetProc(); err != nil {
+//       log.Fatalf("failed to drop privilege: %q -> %q: %v", old, empty, err)
+//   }
+//   now := cap.GetProc()
+//   if cf, _ := now.Cf(empty); cf != 0 {
+//       log.Fatalf("failed to fully drop privilege: have=%q, wanted=%q", now, empty)
+//   }
+//
+// The "cap" package operates with POSIX semantics for security
+// state. That is all OS threads are kept in sync at all times. The
+// package "kernel.org/pub/linux/libs/security/libcap/psx" is used to
+// implement POSIX semantics system calls that manipulate thread state
+// uniformly over the whole Go (and any CGo linked) process runtime.
+//
+// Note, if the Go runtime syscall interface contains the Linux
+// variant syscall.AllThreadsSyscall() API (it debuted in go1.16 see
+// https://github.com/golang/go/issues/1435 for its history) then the
+// "libcap/psx" package will use that to invoke Capability setting
+// system calls in pure Go binaries. With such an enhanced Go runtime,
+// to force this behavior, use the CGO_ENABLED=0 environment variable.
+//
+// POSIX semantics are more secure than trying to manage privilege at
+// a thread level when those threads share a common memory image as
+// they do under Linux: it is trivial to exploit a vulnerability in
+// one thread of a process to cause execution on any another
+// thread. So, any imbalance in security state, in such cases will
+// readily create an opportunity for a privilege escalation
+// vulnerability.
+//
+// POSIX semantics also work well with Go, which deliberately tries to
+// insulate the user from worrying about the number of OS threads that
+// are actually running in their program. Indeed, Go can efficiently
+// launch and manage tens of thousands of concurrent goroutines
+// without bogging the program or wider system down. It does this by
+// aggressively migrating idle threads to make progress on unblocked
+// goroutines. So, inconsistent security state across OS threads can
+// also lead to program misbehavior.
+//
+// The only exception to this process-wide common security state is
+// the cap.Launcher related functionality. This briefly locks an OS
+// thread to a goroutine in order to launch another executable - the
+// robust implementation of this kind of support is quite subtle, so
+// please read its documentation carefully, if you find that you need
+// it.
+//
+// See https://sites.google.com/site/fullycapable/ for recent updates,
+// some more complete walk-through examples of ways of using
+// 'cap.Set's etc and information on how to file bugs.
+//
+// Copyright (c) 2019-21 Andrew G. Morgan <morgan@kernel.org>
+//
+// The cap and psx packages are licensed with a (you choose) BSD
+// 3-clause or GPL2. See LICENSE file for details.
+package cap // import "kernel.org/pub/linux/libs/security/libcap/cap"
+
+import (
+	"errors"
+	"sort"
+	"sync"
+	"syscall"
+	"unsafe"
+)
+
+// Value is the type of a single capability (or permission) bit.
+type Value uint
+
+// Flag is the type of one of the three Value dimensions held in a
+// Set.  It is also used in the (*IAB).Fill() method for changing the
+// Bounding and Ambient Vectors.
+type Flag uint
+
+// Effective, Permitted, Inheritable are the three Flags of Values
+// held in a Set.
+const (
+	Effective Flag = iota
+	Permitted
+	Inheritable
+)
+
+// Diff summarizes the result of the (*Set).Cf() function.
+type Diff uint
+
+const (
+	effectiveDiff   Diff = 1 << Effective
+	permittedDiff   Diff = 1 << Permitted
+	inheritableDiff Diff = 1 << Inheritable
+)
+
+// String identifies a Flag value by its conventional "e", "p" or "i"
+// string abbreviation.
+func (f Flag) String() string {
+	switch f {
+	case Effective:
+		return "e"
+	case Permitted:
+		return "p"
+	case Inheritable:
+		return "i"
+	default:
+		return "<Error>"
+	}
+}
+
+// data holds a 32-bit slice of the compressed bitmaps of capability
+// sets as understood by the kernel.
+type data [Inheritable + 1]uint32
+
+// Set is an opaque capabilities container for a set of system
+// capbilities. It holds individually addressable capability Value's
+// for the three capability Flag's. See GetFlag() and SetFlag() for
+// how to adjust them individually, and Clear() and ClearFlag() for
+// how to do bulk operations.
+//
+// For admin tasks associated with managing namespace specific file
+// capabilities, Set can also support a namespace-root-UID value which
+// defaults to zero. See GetNSOwner() and SetNSOwner().
+type Set struct {
+	// mu protects all other members of a Set.
+	mu sync.RWMutex
+
+	// flat holds Flag Value bitmaps for all capabilities
+	// associated with this Set.
+	flat []data
+
+	// Linux specific
+	nsRoot int
+}
+
+// Various known kernel magic values.
+const (
+	kv1 = 0x19980330 // First iteration of process capabilities (32 bits).
+	kv2 = 0x20071026 // First iteration of process and file capabilities (64 bits) - deprecated.
+	kv3 = 0x20080522 // Most recently supported process and file capabilities (64 bits).
+)
+
+var (
+	// startUp protects setting of the following values: magic,
+	// words, maxValues.
+	startUp sync.Once
+
+	// magic holds the preferred magic number for the kernel ABI.
+	magic uint32
+
+	// words holds the number of uint32's associated with each
+	// capability Flag for this session.
+	words int
+
+	// maxValues holds the number of bit values that are named by
+	// the running kernel. This is generally expected to match
+	// ValueCount which is autogenerated at packaging time.
+	maxValues uint
+)
+
+type header struct {
+	magic uint32
+	pid   int32
+}
+
+// syscaller is a type for abstracting syscalls. The r* variants are
+// for reading state, and can be parallelized, the w* variants need to
+// be serialized so all OS threads can share state.
+type syscaller struct {
+	r3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+	w3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+	r6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+	w6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+}
+
+// caprcall provides a pointer etc wrapper for the system calls
+// associated with getcap.
+//go:uintptrescapes
+func (sc *syscaller) caprcall(call uintptr, h *header, d []data) error {
+	x := uintptr(0)
+	if d != nil {
+		x = uintptr(unsafe.Pointer(&d[0]))
+	}
+	_, _, err := sc.r3(call, uintptr(unsafe.Pointer(h)), x, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
+
+// capwcall provides a pointer etc wrapper for the system calls
+// associated with setcap.
+//go:uintptrescapes
+func (sc *syscaller) capwcall(call uintptr, h *header, d []data) error {
+	x := uintptr(0)
+	if d != nil {
+		x = uintptr(unsafe.Pointer(&d[0]))
+	}
+	_, _, err := sc.w3(call, uintptr(unsafe.Pointer(h)), x, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
+
+// prctlrcall provides a wrapper for the prctl systemcalls that only
+// read kernel state. There is a limited number of arguments needed
+// and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlrcall(prVal, v1, v2 uintptr) (int, error) {
+	r, _, err := sc.r3(syscall.SYS_PRCTL, prVal, v1, v2)
+	if err != 0 {
+		return int(r), err
+	}
+	return int(r), nil
+}
+
+// prctlrcall6 provides a wrapper for the prctl systemcalls that only
+// read kernel state and require 6 arguments - ambient cap API, I'm
+// looking at you. There is a limited number of arguments needed and
+// the caller should use 0 for those not needed.
+func (sc *syscaller) prctlrcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+	r, _, err := sc.r6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+	if err != 0 {
+		return int(r), err
+	}
+	return int(r), nil
+}
+
+// prctlwcall provides a wrapper for the prctl systemcalls that
+// write/modify kernel state. Where available, these will use the
+// POSIX semantics fixup system calls. There is a limited number of
+// arguments needed and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlwcall(prVal, v1, v2 uintptr) (int, error) {
+	r, _, err := sc.w3(syscall.SYS_PRCTL, prVal, v1, v2)
+	if err != 0 {
+		return int(r), err
+	}
+	return int(r), nil
+}
+
+// prctlwcall6 provides a wrapper for the prctl systemcalls that
+// write/modify kernel state and require 6 arguments - ambient cap
+// API, I'm looking at you. (Where available, these will use the POSIX
+// semantics fixup system calls). There is a limited number of
+// arguments needed and the caller should use 0 for those not needed.
+func (sc *syscaller) prctlwcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+	r, _, err := sc.w6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+	if err != 0 {
+		return int(r), err
+	}
+	return int(r), nil
+}
+
+// cInit performs the lazy identification of the capability vintage of
+// the running system.
+func (sc *syscaller) cInit() {
+	h := &header{
+		magic: kv3,
+	}
+	sc.caprcall(syscall.SYS_CAPGET, h, nil)
+	magic = h.magic
+	switch magic {
+	case kv1:
+		words = 1
+	case kv2, kv3:
+		words = 2
+	default:
+		// Fall back to a known good version.
+		magic = kv3
+		words = 2
+	}
+	// Use the bounding set to evaluate which capabilities exist.
+	maxValues = uint(sort.Search(32*words, func(n int) bool {
+		_, err := GetBound(Value(n))
+		return err != nil
+	}))
+	if maxValues == 0 {
+		// Fall back to using the largest value defined at build time.
+		maxValues = NamedCount
+	}
+}
+
+// MaxBits returns the number of kernel-named capabilities discovered
+// at runtime in the current system.
+func MaxBits() Value {
+	startUp.Do(multisc.cInit)
+	return Value(maxValues)
+}
+
+// NewSet returns an empty capability set.
+func NewSet() *Set {
+	startUp.Do(multisc.cInit)
+	return &Set{
+		flat: make([]data, words),
+	}
+}
+
+// ErrBadSet indicates a nil pointer was used for a *Set, or the
+// request of the Set is invalid in some way.
+var ErrBadSet = errors.New("bad capability set")
+
+// good confirms that c looks valid.
+func (c *Set) good() error {
+	if c == nil || len(c.flat) == 0 {
+		return ErrBadSet
+	}
+	return nil
+}
+
+// Dup returns a copy of the specified capability set.
+func (c *Set) Dup() (*Set, error) {
+	if err := c.good(); err != nil {
+		return nil, err
+	}
+	n := NewSet()
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	copy(n.flat, c.flat)
+	n.nsRoot = c.nsRoot
+	return n, nil
+}
+
+// GetPID returns the capability set associated with the target process
+// id; pid=0 is an alias for current.
+func GetPID(pid int) (*Set, error) {
+	v := NewSet()
+	if err := multisc.caprcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
+		return nil, err
+	}
+	return v, nil
+}
+
+// GetProc returns the capability Set of the current process. If the
+// kernel is unable to determine the Set associated with the current
+// process, the function panic()s.
+func GetProc() *Set {
+	c, err := GetPID(0)
+	if err != nil {
+		panic(err)
+	}
+	return c
+}
+
+// setProc uses syscaller to set process capabilities.  Note, c is
+// either private to or (read) locked by the caller.
+func (sc *syscaller) setProc(c *Set) error {
+	return sc.capwcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
+}
+
+// SetProc attempts to set the capability Set of the current
+// process. The kernel will perform permission checks and an error
+// will be returned if the attempt fails. Should the attempt fail
+// no process capabilities will have been modified.
+//
+// Note, the general behavior of this call is to set the
+// process-shared capabilities. However, when called from a callback
+// function as part of a (*Launcher).Launch(), the call only sets the
+// capabilities of the thread being used to perform the launch.
+func (c *Set) SetProc() error {
+	if err := c.good(); err != nil {
+		return err
+	}
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	return sc.setProc(c)
+}
+
+// defines from uapi/linux/prctl.h
+const (
+	prCapBSetRead = 23
+	prCapBSetDrop = 24
+)
+
+// GetBound determines if a specific capability is currently part of
+// the local bounding set. On systems where the bounding set Value is
+// not present, this function returns an error.
+func GetBound(val Value) (bool, error) {
+	v, err := multisc.prctlrcall(prCapBSetRead, uintptr(val), 0)
+	if err != nil {
+		return false, err
+	}
+	return v > 0, nil
+}
+
+//go:uintptrescapes
+func (sc *syscaller) dropBound(val ...Value) error {
+	for _, v := range val {
+		if _, err := sc.prctlwcall(prCapBSetDrop, uintptr(v), 0); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// DropBound attempts to suppress bounding set Values. The kernel will
+// never allow a bounding set Value bit to be raised once successfully
+// dropped. However, dropping requires the current process is
+// sufficiently capable (usually via cap.SETPCAP being raised in the
+// Effective flag of the process' Set). Note, the drops are performed
+// in order and if one bounding value cannot be dropped, the function
+// returns immediately with an error which may leave the system in an
+// ill-defined state. The caller can determine where things went wrong
+// using GetBound().
+func DropBound(val ...Value) error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.dropBound(val...)
+}
+
+// defines from uapi/linux/prctl.h
+const (
+	prCapAmbient = 47
+
+	prCapAmbientIsSet    = 1
+	prCapAmbientRaise    = 2
+	prCapAmbientLower    = 3
+	prCapAmbientClearAll = 4
+)
+
+// GetAmbient determines if a specific capability is currently part of
+// the local ambient set. On systems where the ambient set Value is
+// not present, this function returns an error.
+func GetAmbient(val Value) (bool, error) {
+	r, err := multisc.prctlrcall6(prCapAmbient, prCapAmbientIsSet, uintptr(val), 0, 0, 0)
+	return r > 0, err
+}
+
+//go:uintptrescapes
+func (sc *syscaller) setAmbient(enable bool, val ...Value) error {
+	dir := uintptr(prCapAmbientLower)
+	if enable {
+		dir = prCapAmbientRaise
+	}
+	for _, v := range val {
+		_, err := sc.prctlwcall6(prCapAmbient, dir, uintptr(v), 0, 0, 0)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// SetAmbient attempts to set a specific Value bit to the state,
+// enable. This function will return an error if insufficient
+// permission is available to perform this task. The settings are
+// performed in order and the function returns immediately an error is
+// detected. Use GetAmbient() to unravel where things went
+// wrong. Note, the cap package manages an abstraction IAB that
+// captures all three inheritable vectors in a single type. Consider
+// using that.
+func SetAmbient(enable bool, val ...Value) error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.setAmbient(enable, val...)
+}
+
+func (sc *syscaller) resetAmbient() error {
+	var v bool
+	var err error
+
+	for c := Value(0); !v; c++ {
+		if v, err = GetAmbient(c); err != nil {
+			// no non-zero values found.
+			return nil
+		}
+	}
+	_, err = sc.prctlwcall6(prCapAmbient, prCapAmbientClearAll, 0, 0, 0, 0)
+	return err
+}
+
+// ResetAmbient attempts to ensure the Ambient set is fully
+// cleared. It works by first reading the set and if it finds any bits
+// raised it will attempt a reset. The test before attempting a reset
+// behavior is a workaround for situations where the Ambient API is
+// locked, but a reset is not actually needed. No Ambient bit not
+// already raised in both the Permitted and Inheritable Set is allowed
+// to be raised by the kernel.
+func ResetAmbient() error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.resetAmbient()
+}
diff --git a/cap/cap_test.go b/cap/cap_test.go
new file mode 100644
index 0000000..52afd43
--- /dev/null
+++ b/cap/cap_test.go
@@ -0,0 +1,302 @@
+package cap
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestAllMask(t *testing.T) {
+	oldMask := maxValues
+	oldWords := words
+	defer func() {
+		maxValues = oldMask
+		words = oldWords
+	}()
+
+	maxValues = 35
+	words = 3
+
+	vs := []struct {
+		val   Value
+		index uint
+		bit   uint32
+		mask  uint32
+	}{
+		{val: CHOWN, index: 0, bit: 0x1, mask: ^uint32(0)},
+		{val: 38, index: 1, bit: (1 << 6), mask: 0x7},
+		{val: 34, index: 1, bit: (1 << 2), mask: 0x7},
+		{val: 65, index: 2, bit: (1 << 1), mask: 0},
+	}
+	for i, v := range vs {
+		index, bit, err := bitOf(Inheritable, v.val)
+		if err != nil {
+			t.Fatalf("[%d] %v(%d) - not bitOf: %v", i, v.val, v.val, err)
+		} else if index != v.index {
+			t.Errorf("[%d] %v(%d) - index: got=%d want=%d", i, v.val, v.val, index, v.index)
+		}
+		if bit != v.bit {
+			t.Errorf("[%d] %v(%d) - bit: got=%b want=%b", i, v.val, v.val, bit, v.bit)
+		}
+		if mask := allMask(index); mask != v.mask {
+			t.Errorf("[%d] %v(%d) - mask: got=%b want=%b", i, v.val, v.val, mask, v.mask)
+		}
+	}
+}
+
+func TestString(t *testing.T) {
+	a := CHOWN
+	if got, want := a.String(), "cap_chown"; got != want {
+		t.Fatalf("pretty basic failure: got=%q, want=%q", got, want)
+	}
+}
+
+func TestText(t *testing.T) {
+	vs := []struct {
+		from, to string
+		err      error
+	}{
+		{"", "", ErrBadText},
+		{"=", "=", nil},
+		{"= cap_chown+iep cap_chown-i", "cap_chown=ep", nil},
+		{"= cap_setfcap,cap_chown+iep cap_chown-i", "cap_setfcap=eip cap_chown+ep", nil},
+		{"cap_setfcap,cap_chown=iep cap_chown-i", "cap_setfcap=eip cap_chown+ep", nil},
+		{"=i =p", "=p", nil},
+		{"all+pie", "=eip", nil},
+		{"all=p+ie-e", "=ip", nil},
+	}
+	for i, v := range vs {
+		c, err := FromText(v.from)
+		if err != v.err {
+			t.Errorf("[%d] parsing %q failed: got=%v, want=%v", i, v.from, err, v.err)
+			continue
+		}
+		if err != nil {
+			continue
+		}
+		to := c.String()
+		if to != v.to {
+			t.Errorf("[%d] failed to stringify cap: %q -> got=%q, want=%q", i, v.from, to, v.to)
+		}
+		if d, err := FromText(to); err != nil {
+			t.Errorf("[%d] failed to reparse %q: %v", i, to, err)
+		} else if got := d.String(); got != to {
+			t.Errorf("[%d] failed to stringify %q getting %q", i, to, got)
+		}
+	}
+}
+
+func same(a, b *Set) error {
+	if (a == nil) != (b == nil) {
+		return fmt.Errorf("nil-ness miscompare: %q vs %v", a, b)
+	}
+	if a == nil {
+		return nil
+	}
+	if a.nsRoot != b.nsRoot {
+		return fmt.Errorf("capabilities differ in nsRoot: a=%d b=%d", a.nsRoot, b.nsRoot)
+	}
+	for i, f := range a.flat {
+		g := b.flat[i]
+		for s := Effective; s <= Inheritable; s++ {
+			if got, want := f[s], g[s]; got != want {
+				return fmt.Errorf("capabilities differ: a[%d].flat[%v]=0x%08x b[%d].flat[%v]=0x%08x", i, s, got, i, s, want)
+			}
+		}
+	}
+	return nil
+}
+
+func confirmExpectedExport(t *testing.T, info string, c *Set, size uint) {
+	if ex, err := c.Export(); err != nil {
+		t.Fatalf("[%s] failed to export empty set: %v", info, err)
+	} else if n := 5 + 3*size; uint(len(ex)) != n {
+		t.Fatalf("[%s] wrong length: got=%d [%0x] want=%d", info, len(ex), ex, n)
+	} else if im, err := Import(ex); err != nil {
+		t.Fatalf("[%s] failed to import empty set: %v", info, err)
+	} else if got, want := im.String(), c.String(); got != want {
+		t.Fatalf("[%s] import != export: got=%q want=%q [%02x]", info, got, want, ex)
+	}
+}
+
+func TestImportExport(t *testing.T) {
+	wantQ := "=ep cap_chown-e 63+ip"
+	if q, err := FromText(wantQ); err != nil {
+		t.Fatalf("failed to parse %q: %v", wantQ, err)
+	} else if gotQ := q.String(); gotQ != wantQ {
+		t.Fatalf("static test failed %q -> q -> %q", wantQ, gotQ)
+	}
+
+	// Sanity check empty import/export.
+	c := NewSet()
+	confirmExpectedExport(t, "empty", c, MinExtFlagSize)
+	// Now keep flipping bits on and off and validate that all
+	// forms of import/export work.
+	for i := uint(0); i < 7000; i += 13 {
+		s := Flag(i % 3)
+		v := Value(i % (maxValues + 3))
+		c.SetFlag(s, i&17 < 8, v)
+		if ex, err := c.Export(); err != nil {
+			t.Fatalf("[%d] failed to export (%q): %v", i, c, err)
+		} else if im, err := Import(ex); err != nil {
+			t.Fatalf("[%d] failed to import (%q) set: %v", i, c, err)
+		} else if got, want := im.String(), c.String(); got != want {
+			t.Fatalf("[%d] import != export: got=%q want=%q [%02x]", i, got, want, ex)
+		} else if parsed, err := FromText(got); err != nil {
+			t.Fatalf("[%d] failed to parse %q: %v", i, got, err)
+		} else if err := same(c, parsed); err != nil {
+			t.Fatalf("[%d] miscompare (%q vs. %q): %v", i, got, parsed, err)
+		}
+	}
+
+	oMin := MinExtFlagSize
+	for j := uint(0); j < 5; j++ {
+		t.Logf("exporting with min flag size %d", j)
+		MinExtFlagSize = j
+		c = NewSet()
+		for i := uint(0); i < maxValues; i++ {
+			s := Flag(i % 3)
+			v := Value(i)
+			c.SetFlag(s, true, v)
+			size := 1 + i/8
+			if size < MinExtFlagSize {
+				size = MinExtFlagSize
+			}
+			confirmExpectedExport(t, fmt.Sprintf("%d added %d %v %v", j, i, s, v), c, size)
+		}
+	}
+	MinExtFlagSize = oMin
+}
+
+func TestIAB(t *testing.T) {
+	vs := []struct {
+		text string
+		bad  bool
+	}{
+		{text: "cup_full", bad: true},
+		{text: ""},
+		{text: "!%cap_chown"},
+		{text: "!cap_chown,^cap_setuid"},
+		{text: "cap_chown,cap_setuid"},
+		{text: "^cap_chown,cap_setuid"},
+		{text: "^cap_chown,!cap_setuid"},
+	}
+	for i, v := range vs {
+		want := v.text
+		iab, err := IABFromText(want)
+		if err != nil {
+			if v.bad {
+				continue
+			}
+			t.Errorf("[%d] want=%q, got=%q", i, want, iab)
+			continue
+		}
+		if got := iab.String(); got != want {
+			t.Errorf("[%d] got=%q want=%q", i, got, want)
+		}
+	}
+
+	one, err := GetPID(1)
+	if err != nil {
+		t.Fatalf("failed to get init's capabilities: %v", err)
+	}
+	iab := NewIAB()
+	if err := iab.Fill(Amb, one, Permitted); err != nil {
+		t.Fatalf("failed to fill Amb from Permitted: %v", err)
+	}
+	for i := 0; i < words; i++ {
+		if iab.i[i] != iab.a[i] {
+			t.Errorf("[%d: %q] i=0x%08x != a=0x%08x", i, one, iab.i[i], iab.a[i])
+		}
+	}
+	one.ClearFlag(Inheritable)
+	iab.Fill(Inh, one, Inheritable)
+	for i := 0; i < words; i++ {
+		if iab.i[i] != iab.a[i] {
+			t.Errorf("[%d: %q] i=0x%08x != a=0x%08x", i, one, iab.i[i], iab.a[i])
+		}
+	}
+
+	for n := uint(0); n < 1000; n += 13 {
+		enabled := ((n % 5) & 2) != 0
+		vec := Vector(n % 3)
+		c := Value(n % maxValues)
+		if err := iab.SetVector(vec, enabled, c); err != nil {
+			t.Errorf("[%d] failed to set vec=%v enabled=%v %q in %q", n, vec, enabled, c, iab)
+			continue
+		}
+		replay, err := IABFromText(iab.String())
+		if err != nil {
+			t.Errorf("failed to replay: %v", err)
+			continue
+		}
+		for i := 0; i < words; i++ {
+			if replay.i[i] != iab.i[i] || replay.a[i] != iab.a[i] || replay.nb[i] != iab.nb[i] {
+				t.Errorf("[%d,%d] got=%q want=%q", n, i, replay, iab)
+			}
+		}
+	}
+}
+
+func TestFuncLaunch(t *testing.T) {
+	if _, err := FuncLauncher(func(data interface{}) error {
+		return nil
+	}).Launch(nil); err != nil {
+		t.Fatalf("trivial launcher failed: %v", err)
+	}
+
+	for i := 0; i < 100; i++ {
+		expect := i & 1
+		before, err := Prctl(prGetKeepCaps)
+		if err != nil {
+			t.Fatalf("failed to get PR_KEEP_CAPS: %v", err)
+		}
+		if before != expect {
+			t.Fatalf("invalid initial state: got=%d want=%d", before, expect)
+		}
+
+		if _, err := FuncLauncher(func(data interface{}) error {
+			was, ok := data.(int)
+			if !ok {
+				return fmt.Errorf("data was not an int: %v", data)
+			}
+			if _, err := Prctlw(prSetKeepCaps, uintptr(1-was)); err != nil {
+				return err
+			}
+			if v, err := Prctl(prGetKeepCaps); err != nil {
+				return err
+			} else if v == was {
+				return fmt.Errorf("PR_KEEP_CAPS unchanged: got=%d, want=%v", v, 1-was)
+			}
+			// All good.
+			return nil
+		}).Launch(before); err != nil {
+			t.Fatalf("trivial launcher failed: %v", err)
+		}
+
+		// Now validate that the main process is still OK.
+		if after, err := Prctl(prGetKeepCaps); err != nil {
+			t.Fatalf("failed to get PR_KEEP_CAPS: %v", err)
+		} else if before != after {
+			t.Fatalf("FuncLauncher leaked privileged state: got=%v want=%v", after, before)
+		}
+
+		// Now force the other way
+		if _, err := Prctlw(prSetKeepCaps, uintptr(1-expect)); err != nil {
+			t.Fatalf("[%d] attempt to flip PR_KEEP_CAPS failed: %v", i, err)
+		}
+	}
+}
+
+func TestFill(t *testing.T) {
+	c, err := FromText("cap_setfcap=p")
+	if err != nil {
+		t.Fatalf("failed to parse: %v", err)
+	}
+	c.Fill(Effective, Permitted)
+	c.ClearFlag(Permitted)
+	c.Fill(Inheritable, Effective)
+	c.ClearFlag(Effective)
+	if got, want := c.String(), "cap_setfcap=i"; got != want {
+		t.Errorf("Fill failed: got=%q want=%q", got, want)
+	}
+}
diff --git a/cap/convenience.go b/cap/convenience.go
new file mode 100644
index 0000000..a31ac09
--- /dev/null
+++ b/cap/convenience.go
@@ -0,0 +1,340 @@
+package cap
+
+import (
+	"errors"
+	"fmt"
+	"syscall"
+	"unsafe"
+)
+
+// This file contains convenience functions for libcap, to help
+// users do the right thing with respect to capabilities for
+// common actions.
+
+// Secbits capture the prctl settable secure-bits of a process.
+type Secbits uint
+
+// SecbitNoRoot etc are the bitmasks associated with the supported
+// Secbit masks.  Source: uapi/linux/securebits.h
+const (
+	SecbitNoRoot Secbits = 1 << iota
+	SecbitNoRootLocked
+	SecbitNoSetUIDFixup
+	SecbitNoSetUIDFixupLocked
+	SecbitKeepCaps
+	SecbitKeepCapsLocked
+	SecbitNoCapAmbientRaise
+	SecbitNoCapAmbientRaiseLocked
+)
+
+const (
+	securedBasicBits   = SecbitNoRoot | SecbitNoRootLocked | SecbitNoSetUIDFixup | SecbitNoSetUIDFixupLocked | SecbitKeepCapsLocked
+	securedAmbientBits = securedBasicBits | SecbitNoCapAmbientRaise | SecbitNoCapAmbientRaiseLocked
+)
+
+// defines from uapi/linux/prctl.h
+const (
+	prGetKeepCaps   = 7
+	prSetKeepCaps   = 8
+	prGetSecureBits = 27
+	prSetSecureBits = 28
+	prSetNoNewPrivs = 38
+)
+
+// GetSecbits returns the current setting of the process' Secbits.
+func GetSecbits() Secbits {
+	v, err := multisc.prctlrcall(prGetSecureBits, 0, 0)
+	if err != nil {
+		panic(err)
+	}
+	return Secbits(v)
+}
+
+func (sc *syscaller) setSecbits(s Secbits) error {
+	_, err := sc.prctlwcall(prSetSecureBits, uintptr(s), 0)
+	return err
+}
+
+// Set attempts to force the process Secbits to a value. This function
+// will raise cap.SETPCAP in order to achieve this operation, and will
+// completely lower the Effective Flag of the process upon returning.
+func (s Secbits) Set() error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.setSecbits(s)
+}
+
+// Mode summarizes a complicated secure-bits and capability mode in a
+// libcap preferred way.
+type Mode uint
+
+// ModeUncertain etc are how libcap summarizes security modes
+// involving capabilities and secure-bits.
+const (
+	ModeUncertain Mode = iota
+	ModeNoPriv
+	ModePure1EInit
+	ModePure1E
+	ModeHybrid
+)
+
+// GetMode assesses the current process state and summarizes it as
+// a Mode. This function always succeeds. Unfamiliar modes are
+// declared ModeUncertain.
+func GetMode() Mode {
+	b := GetSecbits()
+	if b == 0 {
+		return ModeHybrid
+	}
+	if b&securedBasicBits != securedBasicBits {
+		return ModeUncertain
+	}
+
+	for c := Value(0); ; c++ {
+		v, err := GetAmbient(c)
+		if err != nil {
+			if c != 0 && b != securedAmbientBits {
+				return ModeUncertain
+			}
+			break
+		}
+		if v {
+			return ModeUncertain
+		}
+	}
+
+	w := GetProc()
+	e := NewSet()
+	cf, _ := w.Cf(e)
+
+	if cf.Has(Inheritable) {
+		return ModePure1E
+	}
+	if cf.Has(Permitted) || cf.Has(Effective) {
+		return ModePure1EInit
+	}
+
+	for c := Value(0); ; c++ {
+		v, err := GetBound(c)
+		if err != nil {
+			break
+		}
+		if v {
+			return ModePure1EInit
+		}
+	}
+
+	return ModeNoPriv
+}
+
+// ErrBadMode is the error returned when an attempt is made to set an
+// unrecognized libcap security mode.
+var ErrBadMode = errors.New("unsupported mode")
+
+func (sc *syscaller) setMode(m Mode) error {
+	w := GetProc()
+	defer func() {
+		w.ClearFlag(Effective)
+		sc.setProc(w)
+	}()
+
+	if err := w.SetFlag(Effective, true, SETPCAP); err != nil {
+		return err
+	}
+	if err := sc.setProc(w); err != nil {
+		return err
+	}
+
+	if m == ModeHybrid {
+		return sc.setSecbits(0)
+	}
+
+	if m == ModeNoPriv || m == ModePure1EInit {
+		w.ClearFlag(Inheritable)
+	} else if m != ModePure1E {
+		return ErrBadMode
+	}
+
+	sb := securedAmbientBits
+	if _, err := GetAmbient(0); err != nil {
+		sb = securedBasicBits
+	} else if err := sc.resetAmbient(); err != nil {
+		return err
+	}
+
+	if err := sc.setSecbits(sb); err != nil {
+		return err
+	}
+
+	if m != ModeNoPriv {
+		return nil
+	}
+
+	for c := Value(0); sc.dropBound(c) == nil; c++ {
+	}
+	w.ClearFlag(Permitted)
+
+	// For good measure.
+	sc.prctlwcall6(prSetNoNewPrivs, 1, 0, 0, 0, 0)
+
+	return nil
+}
+
+// Set attempts to enter the specified mode. An attempt is made to
+// enter the mode, so if you prefer this operation to be a no-op if
+// entering the same mode, call only if CurrentMode() disagrees with
+// the desired mode.
+//
+// This function will raise cap.SETPCAP in order to achieve this
+// operation, and will completely lower the Effective Flag of the
+// process' Set before returning. This function may fail for lack of
+// permission or because (some of) the Secbits are already locked for
+// the current process.
+func (m Mode) Set() error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.setMode(m)
+}
+
+// String returns the libcap conventional string for this mode.
+func (m Mode) String() string {
+	switch m {
+	case ModeUncertain:
+		return "UNCERTAIN"
+	case ModeNoPriv:
+		return "NOPRIV"
+	case ModePure1EInit:
+		return "PURE1E_INIT"
+	case ModePure1E:
+		return "PURE1E"
+	case ModeHybrid:
+		return "HYBRID"
+	default:
+		return "UNKNOWN"
+	}
+}
+
+func (sc *syscaller) setUID(uid int) error {
+	w := GetProc()
+	defer func() {
+		w.ClearFlag(Effective)
+		sc.setProc(w)
+	}()
+
+	if err := w.SetFlag(Effective, true, SETUID); err != nil {
+		return err
+	}
+
+	// these may or may not work depending on whether or not they
+	// are locked. We try them just in case.
+	sc.prctlwcall(prSetKeepCaps, 1, 0)
+	defer sc.prctlwcall(prSetKeepCaps, 0, 0)
+
+	if err := sc.setProc(w); err != nil {
+		return err
+	}
+
+	if _, _, err := sc.w3(syscall.SYS_SETUID, uintptr(uid), 0, 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+// SetUID is a convenience function for robustly setting the UID and
+// all other variants of UID (EUID etc) to the specified value without
+// dropping the privilege of the current process. This function will
+// raise cap.SETUID in order to achieve this operation, and will
+// completely lower the Effective Flag of the process before
+// returning. Unlike the traditional method of dropping privilege when
+// changing from [E]UID=0 to some other UID, this function only can
+// perform any change of UID if cap.SETUID is available, and this
+// operation will not alter the Permitted Flag of the process' Set.
+func SetUID(uid int) error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.setUID(uid)
+}
+
+//go:uintptrescapes
+func (sc *syscaller) setGroups(gid int, suppl []int) error {
+	w := GetProc()
+	defer func() {
+		w.ClearFlag(Effective)
+		sc.setProc(w)
+	}()
+
+	if err := w.SetFlag(Effective, true, SETGID); err != nil {
+		return err
+	}
+	if err := sc.setProc(w); err != nil {
+		return err
+	}
+
+	if _, _, err := sc.w3(syscall.SYS_SETGID, uintptr(gid), 0, 0); err != 0 {
+		return err
+	}
+	if len(suppl) == 0 {
+		if _, _, err := sc.w3(sysSetGroupsVariant, 0, 0, 0); err != 0 {
+			return err
+		}
+		return nil
+	}
+
+	// On linux gid values are 32-bits.
+	gs := make([]uint32, len(suppl))
+	for i, g := range suppl {
+		gs[i] = uint32(g)
+	}
+	if _, _, err := sc.w3(sysSetGroupsVariant, uintptr(len(suppl)), uintptr(unsafe.Pointer(&gs[0])), 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+// SetGroups is a convenience function for robustly setting the GID
+// and all other variants of GID (EGID etc) to the specified value, as
+// well as setting all of the supplementary groups. This function will
+// raise cap.SETGID in order to achieve this operation, and will
+// completely lower the Effective Flag of the process Set before
+// returning.
+func SetGroups(gid int, suppl ...int) error {
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	return sc.setGroups(gid, suppl)
+}
+
+//go:uintptrescapes
+
+// Prctlw is a convenience function for performing a syscall.Prctl()
+// call that executes on all the threads of the process. It is called
+// Prctlw because it is only appropriate to call this function when it
+// is writing thread state that the caller wants to set on all OS
+// threads of the process to observe POSIX semantics when Linux
+// doesn't natively honor them. (Check prctl documentation for when it
+// is appropriate to use this vs. a normal syscall.Prctl() call.)
+func Prctlw(prVal uintptr, args ...uintptr) (int, error) {
+	if n := len(args); n > 5 {
+		return -1, fmt.Errorf("prctl supports up to 5 arguments (not %d)", n)
+	}
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	as := make([]uintptr, 5)
+	copy(as, args)
+	return sc.prctlwcall6(prVal, as[0], as[1], as[2], as[3], as[4])
+}
+
+//go:uintptrescapes
+
+// Prctl is a convenience function that performs a syscall.Prctl()
+// that either reads state using a single OS thread, or performs a
+// Prctl that is treated as a process wide setting. It is provided for
+// symmetry reasons, but is equivalent to simply calling the
+// corresponding syscall function.
+func Prctl(prVal uintptr, args ...uintptr) (int, error) {
+	if n := len(args); n > 5 {
+		return -1, fmt.Errorf("prctl supports up to 5 arguments (not %d)", n)
+	}
+	as := make([]uintptr, 5)
+	copy(as, args)
+	return singlesc.prctlrcall6(prVal, as[0], as[1], as[2], as[3], as[4])
+}
diff --git a/cap/file.go b/cap/file.go
new file mode 100644
index 0000000..d92548f
--- /dev/null
+++ b/cap/file.go
@@ -0,0 +1,419 @@
+package cap
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"io"
+	"os"
+	"syscall"
+	"unsafe"
+)
+
+// uapi/linux/xattr.h defined.
+var (
+	xattrNameCaps, _ = syscall.BytePtrFromString("security.capability")
+)
+
+// uapi/linux/capability.h defined.
+const (
+	vfsCapRevisionMask   = uint32(0xff000000)
+	vfsCapFlagsMask      = ^vfsCapRevisionMask
+	vfsCapFlagsEffective = uint32(1)
+
+	vfsCapRevision1 = uint32(0x01000000)
+	vfsCapRevision2 = uint32(0x02000000)
+	vfsCapRevision3 = uint32(0x03000000)
+)
+
+// Data types stored in little-endian order.
+
+type vfsCaps1 struct {
+	MagicEtc uint32
+	Data     [1]struct {
+		Permitted, Inheritable uint32
+	}
+}
+
+type vfsCaps2 struct {
+	MagicEtc uint32
+	Data     [2]struct {
+		Permitted, Inheritable uint32
+	}
+}
+
+type vfsCaps3 struct {
+	MagicEtc uint32
+	Data     [2]struct {
+		Permitted, Inheritable uint32
+	}
+	RootID uint32
+}
+
+// ErrBadSize indicates the loaded file capability has
+// an invalid number of bytes in it.
+var ErrBadSize = errors.New("filecap bad size")
+
+// ErrBadMagic indicates that the kernel preferred magic number for
+// capability Set values is not supported by this package. This
+// generally implies you are using an exceptionally old
+// "../libcap/cap" package. An upgrade is needed, or failing that see
+// https://sites.google.com/site/fullycapable/ for how to file a bug.
+var ErrBadMagic = errors.New("unsupported magic")
+
+// ErrBadPath indicates a failed attempt to set a file capability on
+// an irregular (non-executable) file.
+var ErrBadPath = errors.New("file is not a regular executable")
+
+// ErrOutOfRange indicates an erroneous value for MinExtFlagSize.
+var ErrOutOfRange = errors.New("flag length invalid for export")
+
+// digestFileCap unpacks a file capability and returns it in a *Set
+// form.
+func digestFileCap(d []byte, sz int, err error) (*Set, error) {
+	if err != nil {
+		return nil, err
+	}
+	var raw1 vfsCaps1
+	var raw2 vfsCaps2
+	var raw3 vfsCaps3
+	if sz < binary.Size(raw1) || sz > binary.Size(raw3) {
+		return nil, ErrBadSize
+	}
+	b := bytes.NewReader(d[:sz])
+	var magicEtc uint32
+	if err = binary.Read(b, binary.LittleEndian, &magicEtc); err != nil {
+		return nil, err
+	}
+
+	c := NewSet()
+	b.Seek(0, io.SeekStart)
+	switch magicEtc & vfsCapRevisionMask {
+	case vfsCapRevision1:
+		if err = binary.Read(b, binary.LittleEndian, &raw1); err != nil {
+			return nil, err
+		}
+		data := raw1.Data[0]
+		c.flat[0][Permitted] = data.Permitted
+		c.flat[0][Inheritable] = data.Inheritable
+		if raw1.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+			c.flat[0][Effective] = data.Inheritable | data.Permitted
+		}
+	case vfsCapRevision2:
+		if err = binary.Read(b, binary.LittleEndian, &raw2); err != nil {
+			return nil, err
+		}
+		for i, data := range raw2.Data {
+			c.flat[i][Permitted] = data.Permitted
+			c.flat[i][Inheritable] = data.Inheritable
+			if raw2.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+				c.flat[i][Effective] = data.Inheritable | data.Permitted
+			}
+		}
+	case vfsCapRevision3:
+		if err = binary.Read(b, binary.LittleEndian, &raw3); err != nil {
+			return nil, err
+		}
+		for i, data := range raw3.Data {
+			c.flat[i][Permitted] = data.Permitted
+			c.flat[i][Inheritable] = data.Inheritable
+			if raw3.MagicEtc&vfsCapFlagsMask == vfsCapFlagsEffective {
+				c.flat[i][Effective] = data.Inheritable | data.Permitted
+			}
+		}
+		c.nsRoot = int(raw3.RootID)
+	default:
+		return nil, ErrBadMagic
+	}
+	return c, nil
+}
+
+//go:uintptrescapes
+
+// GetFd returns the file capabilities of an open (*os.File).Fd().
+func GetFd(file *os.File) (*Set, error) {
+	var raw3 vfsCaps3
+	d := make([]byte, binary.Size(raw3))
+	sz, _, oErr := multisc.r6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+	var err error
+	if oErr != 0 {
+		err = oErr
+	}
+	return digestFileCap(d, int(sz), err)
+}
+
+//go:uintptrescapes
+
+// GetFile returns the file capabilities of a named file.
+func GetFile(path string) (*Set, error) {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return nil, err
+	}
+	var raw3 vfsCaps3
+	d := make([]byte, binary.Size(raw3))
+	sz, _, oErr := multisc.r6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+	if oErr != 0 {
+		err = oErr
+	}
+	return digestFileCap(d, int(sz), err)
+}
+
+// GetNSOwner returns the namespace owner UID of the capability Set.
+func (c *Set) GetNSOwner() (int, error) {
+	if magic < kv3 {
+		return 0, ErrBadMagic
+	}
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	return c.nsRoot, nil
+}
+
+// SetNSOwner adds an explicit namespace owner UID to the capability
+// Set. This is only honored when generating file capabilities, and is
+// generally for use by a setup process when installing binaries that
+// use file capabilities to become capable inside a namespace to be
+// administered by that UID. If capability aware code within that
+// namespace writes file capabilities without explicitly setting such
+// a UID, the kernel will fix-up the capabilities to be specific to
+// that owner. In this way, the kernel prevents filesystem
+// capabilities from leaking out of that restricted namespace.
+func (c *Set) SetNSOwner(uid int) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.nsRoot = uid
+}
+
+// packFileCap transforms a system capability into a VFS form. Because
+// of the way Linux stores capabilities in the file extended
+// attributes, the process is a little lossy with respect to effective
+// bits.
+func (c *Set) packFileCap() ([]byte, error) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	var magic uint32
+	switch words {
+	case 1:
+		if c.nsRoot != 0 {
+			return nil, ErrBadSet // nsRoot not supported for single DWORD caps.
+		}
+		magic = vfsCapRevision1
+	case 2:
+		if c.nsRoot == 0 {
+			magic = vfsCapRevision2
+			break
+		}
+		magic = vfsCapRevision3
+	}
+	if magic == 0 {
+		return nil, ErrBadSize
+	}
+	eff := uint32(0)
+	for _, f := range c.flat {
+		eff |= (f[Permitted] | f[Inheritable]) & f[Effective]
+	}
+	if eff != 0 {
+		magic |= vfsCapFlagsEffective
+	}
+	b := new(bytes.Buffer)
+	binary.Write(b, binary.LittleEndian, magic)
+	for _, f := range c.flat {
+		binary.Write(b, binary.LittleEndian, f[Permitted])
+		binary.Write(b, binary.LittleEndian, f[Inheritable])
+	}
+	if c.nsRoot != 0 {
+		binary.Write(b, binary.LittleEndian, c.nsRoot)
+	}
+	return b.Bytes(), nil
+}
+
+//go:uintptrescapes
+
+// SetFd attempts to set the file capabilities of an open
+// (*os.File).Fd(). This function can also be used to delete a file's
+// capabilities, by calling with c = nil.
+//
+// Note, Linux does not store the full Effective Flag in the metadata
+// for the file. Only a single Effective bit is stored in this
+// metadata. This single bit is non-zero if the Effective Flag has any
+// overlapping bits with the Permitted or Inheritable Flags of c. This
+// may appear suboptimal, but the reasoning behind it is sound.
+// Namely, the purpose of the Effective bit it to support capabability
+// unaware binaries that will only work if they magically launch with
+// the needed Values already raised (this bit is sometimes referred to
+// simply as the 'legacy' bit).
+//
+// Historical note: without *full* support for runtime capability
+// manipulation, as it is provided in this "../libcap/cap" package,
+// this was previously the only way for Go programs to make use of
+// file capabilities.
+//
+// The preferred way that a binary will actually manipulate its
+// file-acquired capabilities is to carefully and deliberately use
+// this package (or libcap, assisted by libpsx, for threaded C/C++
+// family code).
+func (c *Set) SetFd(file *os.File) error {
+	if c == nil {
+		if _, _, err := multisc.r6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+			return err
+		}
+		return nil
+	}
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	d, err := c.packFileCap()
+	if err != nil {
+		return err
+	}
+	if _, _, err := multisc.r6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+//go:uintptrescapes
+
+// SetFile attempts to set the file capabilities of the specified
+// filename. This function can also be used to delete a file's
+// capabilities, by calling with c = nil.
+//
+// Note, see the comment for SetFd() for some non-obvious behavior of
+// Linux for the Effective Flag on the modified file.
+func (c *Set) SetFile(path string) error {
+	fi, err := os.Stat(path)
+	if err != nil {
+		return err
+	}
+	mode := fi.Mode()
+	if mode&os.ModeType != 0 {
+		return ErrBadPath
+	}
+	if mode&os.FileMode(0111) == 0 {
+		return ErrBadPath
+	}
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+	if c == nil {
+		if _, _, err := multisc.r6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+			return err
+		}
+		return nil
+	}
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	d, err := c.packFileCap()
+	if err != nil {
+		return err
+	}
+	if _, _, err := multisc.r6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+		return err
+	}
+	return nil
+}
+
+// ExtMagic is the 32-bit (little endian) magic for an external
+// capability set. It can be used to transmit capabilities in binary
+// format in a Linux portable way. The format is:
+// <ExtMagic><byte:length><length-bytes*3-of-cap-data>.
+const ExtMagic = uint32(0x5101c290)
+
+// Import imports a Set from a byte array where it has been stored in
+// a portable (lossless) way. That is values exported by
+// libcap.cap_copy_ext() and Export().
+func Import(d []byte) (*Set, error) {
+	b := bytes.NewBuffer(d)
+	var m uint32
+	if err := binary.Read(b, binary.LittleEndian, &m); err != nil {
+		return nil, ErrBadSize
+	} else if m != ExtMagic {
+		return nil, ErrBadMagic
+	}
+	var n byte
+	if err := binary.Read(b, binary.LittleEndian, &n); err != nil {
+		return nil, ErrBadSize
+	}
+	c := NewSet()
+	if int(n) > 4*words {
+		return nil, ErrBadSize
+	}
+	f := make([]byte, 3)
+	for i := 0; i < words; i++ {
+		for j := uint(0); n > 0 && j < 4; j++ {
+			n--
+			if x, err := b.Read(f); err != nil || x != 3 {
+				return nil, ErrBadSize
+			}
+			sh := 8 * j
+			c.flat[i][Effective] |= uint32(f[0]) << sh
+			c.flat[i][Permitted] |= uint32(f[1]) << sh
+			c.flat[i][Inheritable] |= uint32(f[2]) << sh
+		}
+	}
+	return c, nil
+}
+
+// MinExtFlagSize defaults to 8 in order to be equivalent to libcap
+// defaults. Setting it to zero can generate smaller external
+// representations. Such smaller representations can be imported by
+// libcap and the Go package just fine, we just default to the default
+// libcap representation for legacy reasons.
+var MinExtFlagSize = uint(8)
+
+// Export exports a Set into a lossless byte array format where it is
+// stored in a portable way. Note, any namespace owner in the Set
+// content is not exported by this function.
+//
+// Note, Export() generates exported byte streams that are importable
+// by libcap.cap_copy_int() as well as Import().
+func (c *Set) Export() ([]byte, error) {
+	if err := c.good(); err != nil {
+		return nil, err
+	}
+	if MinExtFlagSize > 255 {
+		return nil, ErrOutOfRange
+	}
+	b := new(bytes.Buffer)
+	binary.Write(b, binary.LittleEndian, ExtMagic)
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	var n = uint(0)
+	for i, f := range c.flat {
+		if nn := 4 * uint(i); nn+4 > n {
+			if u := f[Effective] | f[Permitted] | f[Inheritable]; u != 0 {
+				n = nn
+				for ; u != 0; u >>= 8 {
+					n++
+				}
+			}
+		}
+	}
+	if n < MinExtFlagSize {
+		n = MinExtFlagSize
+	}
+	b.Write([]byte{byte(n)})
+	for _, f := range c.flat {
+		if n == 0 {
+			break
+		}
+		eff, per, inh := f[Effective], f[Permitted], f[Inheritable]
+		for i := 0; n > 0 && i < 4; i++ {
+			n--
+			b.Write([]byte{
+				byte(eff & 0xff),
+				byte(per & 0xff),
+				byte(inh & 0xff),
+			})
+			eff >>= 8
+			per >>= 8
+			inh >>= 8
+		}
+	}
+	for n > 0 {
+		n--
+		b.Write([]byte{0, 0, 0})
+	}
+	return b.Bytes(), nil
+}
diff --git a/cap/flags.go b/cap/flags.go
new file mode 100644
index 0000000..c4c2ed8
--- /dev/null
+++ b/cap/flags.go
@@ -0,0 +1,271 @@
+package cap
+
+import "errors"
+
+// GetFlag determines if the requested Value is enabled in the
+// specified Flag of the capability Set.
+func (c *Set) GetFlag(vec Flag, val Value) (bool, error) {
+	if err := c.good(); err != nil {
+		// Checked this first, because otherwise we are sure
+		// cInit has been called.
+		return false, err
+	}
+	offset, mask, err := bitOf(vec, val)
+	if err != nil {
+		return false, err
+	}
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	return c.flat[offset][vec]&mask != 0, nil
+}
+
+// SetFlag sets the requested bits to the indicated enable state. This
+// function does not perform any security checks, so values can be set
+// out-of-order. Only when the Set is used to SetProc() etc., will the
+// bits be checked for validity and permission by the kernel. If the
+// function returns an error, the Set will not be modified.
+func (c *Set) SetFlag(vec Flag, enable bool, val ...Value) error {
+	if err := c.good(); err != nil {
+		// Checked this first, because otherwise we are sure
+		// cInit has been called.
+		return err
+	}
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	// Make a backup.
+	replace := make([]uint32, words)
+	for i := range replace {
+		replace[i] = c.flat[i][vec]
+	}
+	var err error
+	for _, v := range val {
+		offset, mask, err2 := bitOf(vec, v)
+		if err2 != nil {
+			err = err2
+			break
+		}
+		if enable {
+			c.flat[offset][vec] |= mask
+		} else {
+			c.flat[offset][vec] &= ^mask
+		}
+	}
+	if err == nil {
+		return nil
+	}
+	// Clean up.
+	for i, bits := range replace {
+		c.flat[i][vec] = bits
+	}
+	return err
+}
+
+// Clear fully clears a capability set.
+func (c *Set) Clear() error {
+	if err := c.good(); err != nil {
+		return err
+	}
+	// startUp.Do(cInit) is not called here because c cannot be
+	// initialized except via this package and doing that will
+	// perform that call at least once (sic).
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.flat = make([]data, words)
+	c.nsRoot = 0
+	return nil
+}
+
+// FillFlag copies the from flag values of ref into the to flag of
+// c. With this function, you can raise all of the permitted values in
+// the c Set from those in ref with c.Fill(cap.Permitted, ref,
+// cap.Permitted).
+func (c *Set) FillFlag(to Flag, ref *Set, from Flag) error {
+	if err := c.good(); err != nil {
+		return err
+	}
+	if err := ref.good(); err != nil {
+		return err
+	}
+	if to > Inheritable || from > Inheritable {
+		return ErrBadValue
+	}
+
+	// Avoid deadlock by using a copy.
+	if c != ref {
+		var err error
+		ref, err = ref.Dup()
+		if err != nil {
+			return err
+		}
+	}
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	for i := range c.flat {
+		c.flat[i][to] = ref.flat[i][from]
+	}
+	return nil
+}
+
+// Fill copies the from flag values into the to flag. With this
+// function, you can raise all of the permitted values in the
+// effective flag with c.Fill(cap.Effective, cap.Permitted).
+func (c *Set) Fill(to, from Flag) error {
+	return c.FillFlag(to, c, from)
+}
+
+// ErrBadValue indicates a bad capability value was specified.
+var ErrBadValue = errors.New("bad capability value")
+
+// bitOf converts from a Value into the offset and mask for a specific
+// Value bit in the compressed (kernel ABI) representation of a
+// capabilities. If the requested bit is unsupported, an error is
+// returned.
+func bitOf(vec Flag, val Value) (uint, uint32, error) {
+	if vec > Inheritable || val > Value(words*32) {
+		return 0, 0, ErrBadValue
+	}
+	u := uint(val)
+	return u / 32, uint32(1) << (u % 32), nil
+}
+
+// allMask returns the mask of valid bits in the all mask for index.
+func allMask(index uint) (mask uint32) {
+	if maxValues == 0 {
+		panic("uninitialized package")
+	}
+	base := 32 * uint(index)
+	if maxValues <= base {
+		return
+	}
+	if maxValues >= 32+base {
+		mask = ^mask
+		return
+	}
+	mask = uint32((uint64(1) << (maxValues % 32)) - 1)
+	return
+}
+
+// forceFlag sets 'all' capability values (supported by the kernel) of
+// a specified Flag to enable.
+func (c *Set) forceFlag(vec Flag, enable bool) error {
+	if err := c.good(); err != nil {
+		return err
+	}
+	if vec > Inheritable {
+		return ErrBadSet
+	}
+	m := uint32(0)
+	if enable {
+		m = ^m
+	}
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	for i := range c.flat {
+		c.flat[i][vec] = m & allMask(uint(i))
+	}
+	return nil
+}
+
+// ClearFlag clears all the Values associated with the specified Flag.
+func (c *Set) ClearFlag(vec Flag) error {
+	return c.forceFlag(vec, false)
+}
+
+// Cf returns 0 if c and d are identical. A non-zero Diff value
+// captures a simple macroscopic summary of how they differ. The
+// (Diff).Has() function can be used to determine how the two
+// capability sets differ.
+func (c *Set) Cf(d *Set) (Diff, error) {
+	if err := c.good(); err != nil {
+		return 0, err
+	}
+	if c == d {
+		return 0, nil
+	}
+	d, err := d.Dup()
+	if err != nil {
+		return 0, err
+	}
+
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	var cf Diff
+	for i := 0; i < words; i++ {
+		if c.flat[i][Effective]^d.flat[i][Effective] != 0 {
+			cf |= effectiveDiff
+		}
+		if c.flat[i][Permitted]^d.flat[i][Permitted] != 0 {
+			cf |= permittedDiff
+		}
+		if c.flat[i][Inheritable]^d.flat[i][Inheritable] != 0 {
+			cf |= inheritableDiff
+		}
+	}
+	return cf, nil
+}
+
+// Compare returns 0 if c and d are identical in content.
+//
+// Deprecated: Replace with (*Set).Cf().
+//
+// Example, replace this:
+//
+//    diff, err := a.Compare(b)
+//    if err != nil {
+//      return err
+//    }
+//    if diff == 0 {
+//      return nil
+//    }
+//    if diff & (1 << Effective) {
+//      log.Print("a and b difference includes Effective values")
+//    }
+//
+// with this:
+//
+//    diff, err := a.Cf(b)
+//    if err != nil {
+//      return err
+//    }
+//    if diff == 0 {
+//      return nil
+//    }
+//    if diff.Has(Effective) {
+//      log.Print("a and b difference includes Effective values")
+//    }
+func (c *Set) Compare(d *Set) (uint, error) {
+	u, err := c.Cf(d)
+	return uint(u), err
+}
+
+// Differs processes the result of Compare and determines if the
+// Flag's components were different.
+//
+// Deprecated: Replace with (Diff).Has().
+//
+// Example, replace this:
+//
+//    diff, err := a.Compare(b)
+//    ...
+//    if diff & (1 << Effective) {
+//       ... different effective capabilities ...
+//    }
+//
+// with this:
+//
+//    diff, err := a.Cf(b)
+//    ...
+//    if diff.Has(Effective) {
+//       ... different effective capabilities ...
+//    }
+func Differs(cf uint, vec Flag) bool {
+	return cf&(1<<vec) != 0
+}
+
+// Has processes the Diff result of (*Set).Cf() and determines if the
+// Flag's components were different in that result.
+func (cf Diff) Has(vec Flag) bool {
+	return uint(cf)&(1<<vec) != 0
+}
diff --git a/cap/go.mod b/cap/go.mod
new file mode 100644
index 0000000..bfabf0c
--- /dev/null
+++ b/cap/go.mod
@@ -0,0 +1,5 @@
+module kernel.org/pub/linux/libs/security/libcap/cap
+
+go 1.11
+
+require kernel.org/pub/linux/libs/security/libcap/psx v1.2.69
diff --git a/cap/iab.go b/cap/iab.go
new file mode 100644
index 0000000..da189be
--- /dev/null
+++ b/cap/iab.go
@@ -0,0 +1,483 @@
+package cap
+
+import (
+	"fmt"
+	"io/ioutil"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// omask returns the offset and mask for a specific capability.
+func omask(c Value) (uint, uint32) {
+	u := uint(c)
+	return u >> 5, uint32(1) << (u & 31)
+}
+
+// IAB holds a summary of all of the inheritable capability vectors:
+// Inh, Amb and Bound. The Bound vector is the logical inverse (two's
+// complement) of the process' Bounding set. That is, raising a Value
+// in the Bound (think blocked) vector is equivalent to dropping that
+// Value from the process' Bounding set. This convention is used to
+// support the empty IAB as being mostly harmless.
+type IAB struct {
+	mu       sync.RWMutex
+	a, i, nb []uint32
+}
+
+// Vector enumerates which of the inheritable IAB capability vectors
+// is being manipulated.
+type Vector uint
+
+// Inh, Amb, Bound enumerate the IAB vector components. (Vector) Inh
+// is equivalent to (Flag) Inheritable. They are named differently for
+// syntax/type checking reasons.
+const (
+	Inh Vector = iota
+	Amb
+	Bound
+)
+
+// IABDiff holds the non-error result of an (*IAB).Cf()
+// function call. It can be interpreted with the function
+// (IABDiff).Has().
+type IABDiff uint
+
+// iBits, iBits and bBits track the (semi-)independent parts of an
+// IABDiff.
+const (
+	iBits IABDiff = 1 << Inh
+	aBits IABDiff = 1 << Amb
+	bBits IABDiff = 1 << Bound
+)
+
+// Has determines if an IAB comparison differs in a specific vector.
+func (d IABDiff) Has(v Vector) bool {
+	return d&(1<<v) != 0
+}
+
+// String identifies a Vector value by its conventional I A or B
+// string abbreviation.
+func (v Vector) String() string {
+	switch v {
+	case Inh:
+		return "I"
+	case Amb:
+		return "A"
+	case Bound:
+		return "B"
+	default:
+		return "<Error>"
+	}
+}
+
+// NewIAB returns an empty IAB.
+func NewIAB() *IAB {
+	startUp.Do(multisc.cInit)
+	return &IAB{
+		i:  make([]uint32, words),
+		a:  make([]uint32, words),
+		nb: make([]uint32, words),
+	}
+}
+
+// good confirms the iab looks to be initialized.
+func (iab *IAB) good() error {
+	if iab == nil || len(iab.i) == 0 || len(iab.i) != words || len(iab.a) != words || len(iab.nb) != words {
+		return ErrBadValue
+	}
+	return nil
+}
+
+// Dup returns a duplicate copy of the IAB.
+func (iab *IAB) Dup() (*IAB, error) {
+	if err := iab.good(); err != nil {
+		return nil, err
+	}
+	v := NewIAB()
+	iab.mu.RLock()
+	defer iab.mu.RUnlock()
+	copy(v.i, iab.i)
+	copy(v.a, iab.a)
+	copy(v.nb, iab.nb)
+	return v, nil
+}
+
+// IABInit allocates a new IAB tuple.
+//
+// Deprecated: Replace with NewIAB.
+//
+// Example, replace this:
+//
+//    iab := IABInit()
+//
+// with this:
+//
+//    iab := NewIAB()
+func IABInit() *IAB {
+	return NewIAB()
+}
+
+// IABGetProc summarizes the Inh, Amb and Bound capability vectors of
+// the current process.
+func IABGetProc() *IAB {
+	iab := NewIAB()
+	current := GetProc()
+	iab.Fill(Inh, current, Inheritable)
+	for c := MaxBits(); c > 0; {
+		c--
+		offset, mask := omask(c)
+		if a, _ := GetAmbient(c); a {
+			iab.a[offset] |= mask
+		}
+		if b, err := GetBound(c); err == nil && !b {
+			iab.nb[offset] |= mask
+		}
+	}
+	return iab
+}
+
+// IABFromText parses a string representing an IAB, as generated
+// by IAB.String(), to generate an IAB.
+func IABFromText(text string) (*IAB, error) {
+	iab := NewIAB()
+	if len(text) == 0 {
+		return iab, nil
+	}
+	for _, f := range strings.Split(text, ",") {
+		var i, a, nb bool
+		var j int
+		for j = 0; j < len(f); j++ {
+			switch f[j : j+1] {
+			case "!":
+				nb = true
+			case "^":
+				i = true
+				a = true
+			case "%":
+				i = true
+			default:
+				goto done
+			}
+		}
+	done:
+		c, err := FromName(f[j:])
+		if err != nil {
+			return nil, err
+		}
+		offset, mask := omask(c)
+		if i || !nb {
+			iab.i[offset] |= mask
+		}
+		if a {
+			iab.a[offset] |= mask
+		}
+		if nb {
+			iab.nb[offset] |= mask
+		}
+	}
+	return iab, nil
+}
+
+// String serializes an IAB to a string format.
+func (iab *IAB) String() string {
+	if err := iab.good(); err != nil {
+		return "<invalid>"
+	}
+	var vs []string
+	iab.mu.RLock()
+	defer iab.mu.RUnlock()
+	for c := Value(0); c < Value(maxValues); c++ {
+		offset, mask := omask(c)
+		i := (iab.i[offset] & mask) != 0
+		a := (iab.a[offset] & mask) != 0
+		nb := (iab.nb[offset] & mask) != 0
+		var cs []string
+		if nb {
+			cs = append(cs, "!")
+		}
+		if a {
+			cs = append(cs, "^")
+		} else if nb && i {
+			cs = append(cs, "%")
+		}
+		if nb || a || i {
+			vs = append(vs, strings.Join(cs, "")+c.String())
+		}
+	}
+	return strings.Join(vs, ",")
+}
+
+// iabSetProc uses a syscaller to apply an IAB tuple to the process.
+// The iab is known to be locked by the caller.
+func (sc *syscaller) iabSetProc(iab *IAB) (err error) {
+	temp := GetProc()
+	var raising uint32
+	for i := 0; i < words; i++ {
+		newI := iab.i[i]
+		oldIP := temp.flat[i][Inheritable] | temp.flat[i][Permitted]
+		raising |= (newI & ^oldIP) | iab.a[i] | iab.nb[i]
+		temp.flat[i][Inheritable] = newI
+	}
+	working, err2 := temp.Dup()
+	if err2 != nil {
+		err = err2
+		return
+	}
+	if raising != 0 {
+		if err = working.SetFlag(Effective, true, SETPCAP); err != nil {
+			return
+		}
+		if err = sc.setProc(working); err != nil {
+			return
+		}
+	}
+	defer func() {
+		if err2 := sc.setProc(temp); err == nil {
+			err = err2
+		}
+	}()
+	if err = sc.resetAmbient(); err != nil {
+		return
+	}
+	for c := Value(maxValues); c > 0; {
+		c--
+		offset, mask := omask(c)
+		if iab.a[offset]&mask != 0 {
+			err = sc.setAmbient(true, c)
+		}
+		if err == nil && iab.nb[offset]&mask != 0 {
+			err = sc.dropBound(c)
+		}
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+// SetProc attempts to change the Inheritable, Ambient and Bounding
+// capability vectors of the current process using the content,
+// iab. The Bounding vector strongly affects the potential for setting
+// other bits, so this function carefully performs the combined
+// operation in the most flexible manner.
+func (iab *IAB) SetProc() error {
+	if err := iab.good(); err != nil {
+		return err
+	}
+	state, sc := scwStateSC()
+	defer scwSetState(launchBlocked, state, -1)
+	iab.mu.RLock()
+	defer iab.mu.RUnlock()
+	return sc.iabSetProc(iab)
+}
+
+// GetVector returns the raised state of the specific capability bit
+// of the indicated vector.
+func (iab *IAB) GetVector(vec Vector, val Value) (bool, error) {
+	if err := iab.good(); err != nil {
+		return false, err
+	}
+	if val >= MaxBits() {
+		return false, ErrBadValue
+	}
+	iab.mu.RLock()
+	defer iab.mu.RUnlock()
+	offset, mask := omask(val)
+	switch vec {
+	case Inh:
+		return (iab.i[offset] & mask) != 0, nil
+	case Amb:
+		return (iab.a[offset] & mask) != 0, nil
+	case Bound:
+		return (iab.nb[offset] & mask) != 0, nil
+	default:
+		return false, ErrBadValue
+	}
+}
+
+// SetVector sets all of the vals in the specified vector to the
+// raised value.  Note, the Ambient vector cannot contain values not raised
+// in the Inh vector, so setting values directly in one vector may have
+// the side effect of mirroring the value in the other vector to
+// maintain this constraint. Note, raising a Bound vector bit is
+// equivalent to lowering the Bounding vector of the process (when
+// successfully applied with (*IAB).SetProc()).
+func (iab *IAB) SetVector(vec Vector, raised bool, vals ...Value) error {
+	if err := iab.good(); err != nil {
+		return err
+	}
+	iab.mu.Lock()
+	defer iab.mu.Unlock()
+	for _, val := range vals {
+		if val >= Value(maxValues) {
+			return ErrBadValue
+		}
+		offset, mask := omask(val)
+		switch vec {
+		case Inh:
+			if raised {
+				iab.i[offset] |= mask
+			} else {
+				iab.i[offset] &= ^mask
+				iab.a[offset] &= ^mask
+			}
+		case Amb:
+			if raised {
+				iab.a[offset] |= mask
+				iab.i[offset] |= mask
+			} else {
+				iab.a[offset] &= ^mask
+			}
+		case Bound:
+			if raised {
+				iab.nb[offset] |= mask
+			} else {
+				iab.nb[offset] &= ^mask
+			}
+		default:
+			return ErrBadValue
+		}
+	}
+	return nil
+}
+
+// Fill fills one of the Inh, Amb and Bound capability vectors from
+// one of the flag vectors of a Set.  Note, filling the Inh vector
+// will mask the Amb vector, and filling the Amb vector may raise
+// entries in the Inh vector. Further, when filling the Bound vector,
+// the bits are inverted from what you might expect - that is lowered
+// bits from the Set will be raised in the Bound vector.
+func (iab *IAB) Fill(vec Vector, c *Set, flag Flag) error {
+	if err := iab.good(); err != nil {
+		return err
+	}
+	// work with a copy to avoid potential deadlock.
+	s, err := c.Dup()
+	if err != nil {
+		return err
+	}
+	iab.mu.Lock()
+	defer iab.mu.Unlock()
+	for i := 0; i < words; i++ {
+		flat := s.flat[i][flag]
+		switch vec {
+		case Inh:
+			iab.i[i] = flat
+			iab.a[i] &= flat
+		case Amb:
+			iab.a[i] = flat
+			iab.i[i] |= flat
+		case Bound:
+			iab.nb[i] = ^flat
+		default:
+			return ErrBadSet
+		}
+	}
+	return nil
+}
+
+// Cf compares two IAB values. Its return value is 0 if the compared
+// tuples are considered identical. The macroscopic differences can be
+// investigated with (IABDiff).Has().
+func (iab *IAB) Cf(alt *IAB) (IABDiff, error) {
+	if err := iab.good(); err != nil {
+		return 0, err
+	}
+	if iab == alt {
+		return 0, nil
+	}
+	// Avoid holding two locks at once.
+	ref, err := alt.Dup()
+	if err != nil {
+		return 0, err
+	}
+	iab.mu.RLock()
+	defer iab.mu.RUnlock()
+
+	var cf IABDiff
+	for i := 0; i < words; i++ {
+		if iab.i[i] != ref.i[i] {
+			cf |= iBits
+		}
+		if iab.a[i] != ref.a[i] {
+			cf |= aBits
+		}
+		if iab.nb[i] != ref.nb[i] {
+			cf |= bBits
+		}
+	}
+	return cf, nil
+}
+
+// parseHex converts the /proc/*/status string into an array of
+// uint32s suitable for storage in an IAB structure.
+func parseHex(hex string, invert bool) []uint32 {
+	if len(hex) != 8*words {
+		// Invalid string
+		return nil
+	}
+	var result []uint32
+	for i := 0; i < words; i++ {
+		upper := 8 * (words - i)
+		raw, err := strconv.ParseUint(hex[upper-8:upper], 16, 32)
+		if err != nil {
+			return nil
+		}
+		if invert {
+			raw = ^raw
+		}
+		bits := allMask(uint(i)) & uint32(raw)
+		result = append(result, bits)
+	}
+	return result
+}
+
+var procRoot = "/proc"
+
+// ProcRoot sets the local mount point for the Linux /proc filesystem.
+// It defaults to "/proc", but might be mounted elsewhere on any given
+// system. The function returns the previous value of the local mount
+// point. If the user attempts to set it to "", the value is left
+// unchanged.
+func ProcRoot(path string) string {
+	was := procRoot
+	if path != "" {
+		procRoot = path
+	}
+	return was
+}
+
+// IABGetPID returns the IAB tuple of a specified process. The kernel
+// ABI does not support this query via system calls, so the function
+// works by parsing the /proc/<pid>/status file content.
+func IABGetPID(pid int) (*IAB, error) {
+	tf := fmt.Sprintf("%s/%d/status", procRoot, pid)
+	d, err := ioutil.ReadFile(tf)
+	if err != nil {
+		return nil, err
+	}
+	iab := &IAB{}
+	for _, line := range strings.Split(string(d), "\n") {
+		if !strings.HasPrefix(line, "Cap") {
+			continue
+		}
+		flavor := line[3:]
+		if strings.HasPrefix(flavor, "Inh:\t") {
+			iab.i = parseHex(line[8:], false)
+			continue
+		}
+		if strings.HasPrefix(flavor, "Bnd:\t") {
+			iab.nb = parseHex(line[8:], true)
+			continue
+		}
+		if strings.HasPrefix(flavor, "Amb:\t") {
+			iab.a = parseHex(line[8:], false)
+			continue
+		}
+	}
+	if len(iab.i) != words || len(iab.a) != words || len(iab.nb) != words {
+		return nil, ErrBadValue
+	}
+	return iab, nil
+}
diff --git a/cap/launch.go b/cap/launch.go
new file mode 100644
index 0000000..de7fd90
--- /dev/null
+++ b/cap/launch.go
@@ -0,0 +1,423 @@
+package cap
+
+import (
+	"errors"
+	"os"
+	"runtime"
+	"sync"
+	"syscall"
+	"unsafe"
+)
+
+// Launcher holds a configuration for executing an optional callback
+// function and/or launching a child process with capability state
+// different from the parent.
+//
+// Note, go1.10 is the earliest version of the Go toolchain that can
+// support this abstraction.
+type Launcher struct {
+	mu sync.RWMutex
+
+	// Note, path and args must be set, or callbackFn. They cannot
+	// both be empty. In such cases .Launch() will error out.
+	path string
+	args []string
+	env  []string
+
+	callbackFn func(pa *syscall.ProcAttr, data interface{}) error
+
+	// The following are only honored when path is non empty.
+	changeUIDs bool
+	uid        int
+
+	changeGIDs bool
+	gid        int
+	groups     []int
+
+	changeMode bool
+	mode       Mode
+
+	iab *IAB
+
+	chroot string
+}
+
+// NewLauncher returns a new launcher for the specified program path
+// and args with the specified environment.
+func NewLauncher(path string, args []string, env []string) *Launcher {
+	return &Launcher{
+		path: path,
+		args: args,
+		env:  env,
+	}
+}
+
+// FuncLauncher returns a new launcher whose purpose is to only
+// execute fn in a disposable security context. This is a more bare
+// bones variant of the more elaborate program launcher returned by
+// cap.NewLauncher().
+//
+// Note, this launcher will fully ignore any overrides provided by the
+// (*Launcher).SetUID() etc. methods. Should your fn() code want to
+// run with a different capability state or other privilege, it should
+// use the cap.*() functions to set them directly. The cap package
+// will ensure that their effects are limited to the runtime of this
+// individual function invocation. Warning: executing non-cap.*()
+// syscall functions may corrupt the state of the program runtime and
+// lead to unpredictable results.
+//
+// The properties of fn are similar to those supplied via
+// (*Launcher).Callback(fn) method. However, this launcher is bare
+// bones because, when launching, all privilege management performed
+// by the fn() is fully discarded when the fn() completes
+// execution. That is, it does not end by exec()ing some program.
+func FuncLauncher(fn func(interface{}) error) *Launcher {
+	return &Launcher{
+		callbackFn: func(ignored *syscall.ProcAttr, data interface{}) error {
+			return fn(data)
+		},
+	}
+}
+
+// Callback changes the callback function for Launch() to call before
+// changing privilege. The only thing that is assumed is that the OS
+// thread in use to call this callback function at launch time will be
+// the one that ultimately calls fork to complete the launch of a path
+// specified executable. Any returned error value of said function
+// will terminate the launch process.
+//
+// A nil fn causes there to be no callback function invoked during a
+// Launch() sequence - it will remove any pre-existing callback.
+//
+// If the non-nil fn requires any effective capabilities in order to
+// run, they can be raised prior to calling .Launch() or inside the
+// callback function itself.
+//
+// If the specified callback fn should call any "cap" package
+// functions that change privilege state, these calls will only affect
+// the launch goroutine itself. While the launch is in progress, other
+// (non-launch) goroutines will block if they attempt to change
+// privilege state. These routines will unblock once there are no
+// in-flight launches.
+//
+// Note, the first argument provided to the callback function is the
+// *syscall.ProcAttr value to be used when a process launch is taking
+// place. A non-nil structure pointer can be modified by the callback
+// to enhance the launch. For example, the .Files field can be
+// overridden to affect how the launched process' stdin/out/err are
+// handled.
+//
+// Further, the 2nd argument to the callback function is provided at
+// Launch() invocation and can communicate contextual info to and from
+// the callback and the main process.
+func (attr *Launcher) Callback(fn func(*syscall.ProcAttr, interface{}) error) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.callbackFn = fn
+}
+
+// SetUID specifies the UID to be used by the launched command.
+func (attr *Launcher) SetUID(uid int) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.changeUIDs = true
+	attr.uid = uid
+}
+
+// SetGroups specifies the GID and supplementary groups for the
+// launched command.
+func (attr *Launcher) SetGroups(gid int, groups []int) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.changeGIDs = true
+	attr.gid = gid
+	attr.groups = groups
+}
+
+// SetMode specifies the libcap Mode to be used by the launched command.
+func (attr *Launcher) SetMode(mode Mode) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.changeMode = true
+	attr.mode = mode
+}
+
+// SetIAB specifies the IAB capability vectors to be inherited by the
+// launched command. A nil value means the prevailing vectors of the
+// parent will be inherited. Note, a duplicate of the provided IAB
+// tuple is actually stored, so concurrent modification of the iab
+// value does not affect the launcher.
+func (attr *Launcher) SetIAB(iab *IAB) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.iab, _ = iab.Dup()
+}
+
+// SetChroot specifies the chroot value to be used by the launched
+// command. An empty value means no-change from the prevailing value.
+func (attr *Launcher) SetChroot(root string) {
+	if attr == nil {
+		return
+	}
+	attr.mu.Lock()
+	defer attr.mu.Unlock()
+	attr.chroot = root
+}
+
+// lResult is used to get the result from the doomed launcher thread.
+type lResult struct {
+	// tgid holds the thread group id, which is an alias for the
+	// shared process id of the parent program.
+	tgid int
+
+	// tid holds the tid of the locked launching thread which dies
+	// as the launch completes.
+	tid int
+
+	// pid is the pid of the launched program (path, args). In
+	// the case of a FuncLaunch() this value is zero on success.
+	// pid holds -1 in the case of error.
+	pid int
+
+	// err is nil on success, but otherwise holds the reason the
+	// launch failed.
+	err error
+}
+
+// ErrLaunchFailed is returned if a launch was aborted with no more
+// specific error.
+var ErrLaunchFailed = errors.New("launch failed")
+
+// ErrNoLaunch indicates the go runtime available to this binary does
+// not reliably support launching. See cap.LaunchSupported.
+var ErrNoLaunch = errors.New("launch not supported")
+
+// ErrAmbiguousChroot indicates that the Launcher is being used in
+// addition to a callback supplied Chroot. The former should be used
+// exclusively for this.
+var ErrAmbiguousChroot = errors.New("use Launcher for chroot")
+
+// ErrAmbiguousIDs indicates that the Launcher is being used in
+// addition to a callback supplied Credentials. The former should be
+// used exclusively for this.
+var ErrAmbiguousIDs = errors.New("use Launcher for uids and gids")
+
+// ErrAmbiguousAmbient indicates that the Launcher is being used in
+// addition to a callback supplied ambient set and the former should
+// be used exclusively in a Launch call.
+var ErrAmbiguousAmbient = errors.New("use Launcher for ambient caps")
+
+// lName is the name we temporarily give to the launcher thread. Note,
+// this will likely stick around in the process tree if the Go runtime
+// is not cleaning up locked launcher OS threads.
+var lName = []byte("cap-launcher\000")
+
+// <uapi/linux/prctl.h>
+const prSetName = 15
+
+//go:uintptrescapes
+func launch(result chan<- lResult, attr *Launcher, data interface{}, quit chan<- struct{}) {
+	if quit != nil {
+		defer close(quit)
+	}
+
+	// Thread group ID is the process ID.
+	tgid := syscall.Getpid()
+
+	// This code waits until we are not scheduled on the parent
+	// thread.  We will exit this thread once the child has
+	// launched.
+	runtime.LockOSThread()
+	tid := syscall.Gettid()
+	if tid == tgid {
+		// Force the go runtime to find a new thread to run
+		// on.  (It is really awkward to have a process'
+		// PID=TID thread in effectively a zombie state. The
+		// Go runtime has support for it, but pstree gives
+		// ugly output since the prSetName value sticks around
+		// after launch completion...
+		//
+		// (Optimize for time to debug by reducing ugly spam
+		// like this.)
+		quit := make(chan struct{})
+		go launch(result, attr, data, quit)
+
+		// Wait for that go routine to complete.
+		<-quit
+		runtime.UnlockOSThread()
+		return
+	}
+
+	// Provide a way to serialize the caller on the thread
+	// completing. This should be done by the one locked tid that
+	// does the ForkExec(). All the other threads have a different
+	// security context.
+	defer close(result)
+
+	// By never releasing the LockOSThread here, we guarantee that
+	// the runtime will terminate the current OS thread once this
+	// function returns.
+	scwSetState(launchIdle, launchActive, tid)
+
+	// Name the launcher thread - transient, but helps to debug if
+	// the callbackFn or something else hangs up.
+	singlesc.prctlrcall(prSetName, uintptr(unsafe.Pointer(&lName[0])), 0)
+
+	var pa *syscall.ProcAttr
+	var err error
+	var needChroot bool
+
+	// Only prepare a non-nil pa value if a path is provided.
+	if attr.path != "" {
+		// By default the following file descriptors are preserved for
+		// the child. The user should modify them in the callback for
+		// stdin/out/err redirection.
+		pa = &syscall.ProcAttr{
+			Files: []uintptr{0, 1, 2},
+		}
+		if len(attr.env) != 0 {
+			pa.Env = attr.env
+		} else {
+			pa.Env = os.Environ()
+		}
+	}
+
+	var pid int
+	if attr.callbackFn != nil {
+		if err = attr.callbackFn(pa, data); err != nil {
+			goto abort
+		}
+		if attr.path == "" {
+			goto abort
+		}
+	}
+
+	if needChroot, err = validatePA(pa, attr.chroot); err != nil {
+		goto abort
+	}
+	if attr.changeUIDs {
+		if err = singlesc.setUID(attr.uid); err != nil {
+			goto abort
+		}
+	}
+	if attr.changeGIDs {
+		if err = singlesc.setGroups(attr.gid, attr.groups); err != nil {
+			goto abort
+		}
+	}
+	if attr.changeMode {
+		if err = singlesc.setMode(attr.mode); err != nil {
+			goto abort
+		}
+	}
+	if attr.iab != nil {
+		// Note, since .iab is a private copy we don't need to
+		// lock it around this call.
+		if err = singlesc.iabSetProc(attr.iab); err != nil {
+			goto abort
+		}
+	}
+
+	if needChroot {
+		c := GetProc()
+		if err = c.SetFlag(Effective, true, SYS_CHROOT); err != nil {
+			goto abort
+		}
+		if err = singlesc.setProc(c); err != nil {
+			goto abort
+		}
+	}
+	pid, err = syscall.ForkExec(attr.path, attr.args, pa)
+
+abort:
+	if err != nil {
+		pid = -1
+	}
+	result <- lResult{
+		tgid: tgid,
+		tid:  tid,
+		pid:  pid,
+		err:  err,
+	}
+}
+
+// pollForThreadExit waits for a thread to terminate. Only after the
+// thread has safely exited is it safe to resume POSIX semantics
+// security state mirroring for the rest of the process threads.
+func (v lResult) pollForThreadExit() {
+	if v.tid == -1 {
+		return
+	}
+	for syscall.Tgkill(v.tgid, v.tid, 0) == nil {
+		runtime.Gosched()
+	}
+	scwSetState(launchActive, launchIdle, v.tid)
+}
+
+// Launch performs a callback function and/or new program launch with
+// a disposable security state. The data object, when not nil, can be
+// used to communicate with the callback. It can also be used to
+// return details from the callback function's execution.
+//
+// If the attr was created with NewLauncher(), this present function
+// will return the pid of the launched process, or -1 and a non-nil
+// error.
+//
+// If the attr was created with FuncLauncher(), this present function
+// will return 0, nil if the callback function exits without
+// error. Otherwise it will return -1 and the non-nil error of the
+// callback return value.
+//
+// Note, while the disposable security state thread makes some
+// operations seem more isolated - they are *not securely
+// isolated*. Launching is inherently violating the POSIX semantics
+// maintained by the rest of the "libcap/cap" package, so think of
+// launching as a convenience wrapper around fork()ing.
+//
+// Advanced user note: if the caller of this function thinks they know
+// what they are doing by using runtime.LockOSThread() before invoking
+// this function, they should understand that the OS thread invoking
+// (*Launcher).Launch() is *not* guaranteed to be the one used for the
+// disposable security state to perform the launch. If said caller
+// needs to run something on the disposable security state thread,
+// they should do it via the launch callback function mechanism. (The
+// Go runtime is complicated and this is why this Launch mechanism
+// provides the optional callback function.)
+func (attr *Launcher) Launch(data interface{}) (int, error) {
+	if !LaunchSupported {
+		return -1, ErrNoLaunch
+	}
+	if attr == nil {
+		return -1, ErrLaunchFailed
+	}
+	attr.mu.RLock()
+	defer attr.mu.RUnlock()
+	if attr.callbackFn == nil && (attr.path == "" || len(attr.args) == 0) {
+		return -1, ErrLaunchFailed
+	}
+
+	result := make(chan lResult)
+	go launch(result, attr, data, nil)
+	v, ok := <-result
+	if !ok {
+		return -1, ErrLaunchFailed
+	}
+	<-result // blocks until the launch() goroutine exits
+	v.pollForThreadExit()
+	return v.pid, v.err
+}
diff --git a/cap/legacy.go b/cap/legacy.go
new file mode 100644
index 0000000..0bfd93e
--- /dev/null
+++ b/cap/legacy.go
@@ -0,0 +1,7 @@
+// +build linux,arm linux,386
+
+package cap
+
+import "syscall"
+
+var sysSetGroupsVariant = uintptr(syscall.SYS_SETGROUPS32)
diff --git a/cap/modern.go b/cap/modern.go
new file mode 100644
index 0000000..ad89416
--- /dev/null
+++ b/cap/modern.go
@@ -0,0 +1,8 @@
+// +build linux,!arm
+// +build linux,!386
+
+package cap
+
+import "syscall"
+
+var sysSetGroupsVariant = uintptr(syscall.SYS_SETGROUPS)
diff --git a/cap/names.go b/cap/names.go
new file mode 100644
index 0000000..2655380
--- /dev/null
+++ b/cap/names.go
@@ -0,0 +1,440 @@
+package cap
+
+/* ** DO NOT EDIT THIS FILE. IT WAS AUTO-GENERATED BY LIBCAP'S GO BUILDER (mknames.go) ** */
+
+// NamedCount holds the number of capability values, with official
+// names, known at the time this libcap/cap version was released. The
+// "../libcap/cap" package is fully able to manipulate higher numbered
+// capability values by numerical value. However, if you find
+// cap.NamedCount < cap.MaxBits(), it is probably time to upgrade this
+// package on your system.
+//
+// FWIW the userspace tool '/sbin/capsh' also contains a runtime check
+// for the condition that libcap is behind the running kernel in this
+// way.
+const NamedCount = 41
+
+// CHOWN etc., are the named capability values of the Linux
+// kernel. The canonical source for each name is the
+// "uapi/linux/capabilities.h" file.  Some values may not be available
+// (yet) where the kernel is older.  The actual number of capabities
+// supported by the running kernel can be obtained using the
+// cap.MaxBits() function.
+const (
+	// CHOWN allows a process to arbitrarily change the user and
+	// group ownership of a file.
+	CHOWN Value = iota
+
+	// DAC_OVERRIDE allows a process to override of all Discretionary
+	// Access Control (DAC) access, including ACL execute
+	// access. That is read, write or execute files that the
+	// process would otherwise not have access to. This
+	// excludes DAC access covered by cap.LINUX_IMMUTABLE.
+	DAC_OVERRIDE
+
+	// DAC_READ_SEARCH allows a process to override all DAC restrictions
+	// limiting the read and search of files and
+	// directories. This excludes DAC access covered by
+	// cap.LINUX_IMMUTABLE.
+	DAC_READ_SEARCH
+
+	// FOWNER allows a process to perform operations on files, even
+	// where file owner ID should otherwise need be equal to
+	// the UID, except where cap.FSETID is applicable. It
+	// doesn't override MAC and DAC restrictions.
+	//
+	// This capability permits the deletion of a file owned
+	// by another UID in a directory protected by the sticky
+	// (t) bit.
+	FOWNER
+
+	// FSETID allows a process to set the S_ISUID and S_ISUID bits of
+	// the file permissions, even when the process' effective
+	// UID or GID/supplementary GIDs do not match that of the
+	// file.
+	FSETID
+
+	// KILL allows a process to send a kill(2) signal to any other
+	// process - overriding the limitation that there be a
+	// [E]UID match between source and target process.
+	KILL
+
+	// SETGID allows a process to freely manipulate its own GIDs:
+	//   - arbitrarily set the GID, EGID, REGID, RESGID values
+	//   - arbitrarily set the supplementary GIDs
+	//   - allows the forging of GID credentials passed over a
+	//     socket
+	SETGID
+
+	// SETUID allows a process to freely manipulate its own UIDs:
+	//   - arbitrarily set the UID, EUID, REUID and RESUID
+	//     values
+	//   - allows the forging of UID credentials passed over a
+	//     socket
+	SETUID
+
+	// SETPCAP allows a process to freely manipulate its inheritable
+	// capabilities.
+	//
+	// Linux supports the POSIX.1e Inheritable set, the POXIX.1e (X
+	// vector) known in Linux as the Bounding vector, as well as
+	// the Linux extension Ambient vector.
+	//
+	// This capability permits dropping bits from the Bounding
+	// vector (ie. raising B bits in the libcap IAB
+	// representation). It also permits the process to raise
+	// Ambient vector bits that are both raised in the Permitted
+	// and Inheritable sets of the process. This capability cannot
+	// be used to raise Permitted bits, Effective bits beyond those
+	// already present in the process' permitted set, or
+	// Inheritable bits beyond those present in the Bounding
+	// vector.
+	//
+	// [Historical note: prior to the advent of file capabilities
+	// (2008), this capability was suppressed by default, as its
+	// unsuppressed behavior was not auditable: it could
+	// asynchronously grant its own Permitted capabilities to and
+	// remove capabilities from other processes arbitrarily. The
+	// former leads to undefined behavior, and the latter is better
+	// served by the kill system call.]
+	SETPCAP
+
+	// LINUX_IMMUTABLE allows a process to modify the S_IMMUTABLE and
+	// S_APPEND file attributes.
+	LINUX_IMMUTABLE
+
+	// NET_BIND_SERVICE allows a process to bind to privileged ports:
+	//   - TCP/UDP sockets below 1024
+	//   - ATM VCIs below 32
+	NET_BIND_SERVICE
+
+	// NET_BROADCAST allows a process to broadcast to the network and to
+	// listen to multicast.
+	NET_BROADCAST
+
+	// NET_ADMIN allows a process to perform network configuration
+	// operations:
+	//   - interface configuration
+	//   - administration of IP firewall, masquerading and
+	//     accounting
+	//   - setting debug options on sockets
+	//   - modification of routing tables
+	//   - setting arbitrary process, and process group
+	//     ownership on sockets
+	//   - binding to any address for transparent proxying
+	//     (this is also allowed via cap.NET_RAW)
+	//   - setting TOS (Type of service)
+	//   - setting promiscuous mode
+	//   - clearing driver statistics
+	//   - multicasing
+	//   - read/write of device-specific registers
+	//   - activation of ATM control sockets
+	NET_ADMIN
+
+	// NET_RAW allows a process to use raw networking:
+	//   - RAW sockets
+	//   - PACKET sockets
+	//   - binding to any address for transparent proxying
+	//     (also permitted via cap.NET_ADMIN)
+	NET_RAW
+
+	// IPC_LOCK allows a process to lock shared memory segments for IPC
+	// purposes.  Also enables mlock and mlockall system
+	// calls.
+	IPC_LOCK
+
+	// IPC_OWNER allows a process to override IPC ownership checks.
+	IPC_OWNER
+
+	// SYS_MODULE allows a process to initiate the loading and unloading
+	// of kernel modules. This capability can effectively
+	// modify kernel without limit.
+	SYS_MODULE
+
+	// SYS_RAWIO allows a process to perform raw IO:
+	//   - permit ioper/iopl access
+	//   - permit sending USB messages to any device via
+	//     /dev/bus/usb
+	SYS_RAWIO
+
+	// SYS_CHROOT allows a process to perform a chroot syscall to change
+	// the effective root of the process' file system:
+	// redirect to directory "/" to some other location.
+	SYS_CHROOT
+
+	// SYS_PTRACE allows a process to perform a ptrace() of any other
+	// process.
+	SYS_PTRACE
+
+	// SYS_PACCT allows a process to configure process accounting.
+	SYS_PACCT
+
+	// SYS_ADMIN allows a process to perform a somewhat arbitrary
+	// grab-bag of privileged operations. Over time, this
+	// capability should weaken as specific capabilities are
+	// created for subsets of cap.SYS_ADMINs functionality:
+	//   - configuration of the secure attention key
+	//   - administration of the random device
+	//   - examination and configuration of disk quotas
+	//   - setting the domainname
+	//   - setting the hostname
+	//   - calling bdflush()
+	//   - mount() and umount(), setting up new SMB connection
+	//   - some autofs root ioctls
+	//   - nfsservctl
+	//   - VM86_REQUEST_IRQ
+	//   - to read/write pci config on alpha
+	//   - irix_prctl on mips (setstacksize)
+	//   - flushing all cache on m68k (sys_cacheflush)
+	//   - removing semaphores
+	//   - Used instead of cap.CHOWN to "chown" IPC message
+	//     queues, semaphores and shared memory
+	//   - locking/unlocking of shared memory segment
+	//   - turning swap on/off
+	//   - forged pids on socket credentials passing
+	//   - setting readahead and flushing buffers on block
+	//     devices
+	//   - setting geometry in floppy driver
+	//   - turning DMA on/off in xd driver
+	//   - administration of md devices (mostly the above, but
+	//     some extra ioctls)
+	//   - tuning the ide driver
+	//   - access to the nvram device
+	//   - administration of apm_bios, serial and bttv (TV)
+	//     device
+	//   - manufacturer commands in isdn CAPI support driver
+	//   - reading non-standardized portions of PCI
+	//     configuration space
+	//   - DDI debug ioctl on sbpcd driver
+	//   - setting up serial ports
+	//   - sending raw qic-117 commands
+	//   - enabling/disabling tagged queuing on SCSI
+	//     controllers and sending arbitrary SCSI commands
+	//   - setting encryption key on loopback filesystem
+	//   - setting zone reclaim policy
+	SYS_ADMIN
+
+	// SYS_BOOT allows a process to initiate a reboot of the system.
+	SYS_BOOT
+
+	// SYS_NICE allows a process to maipulate the execution priorities
+	// of arbitrary processes:
+	//   - those involving different UIDs
+	//   - setting their CPU affinity
+	//   - alter the FIFO vs. round-robin (realtime)
+	//     scheduling for itself and other processes.
+	SYS_NICE
+
+	// SYS_RESOURCE allows a process to adjust resource related parameters
+	// of processes and the system:
+	//   - set and override resource limits
+	//   - override quota limits
+	//   - override the reserved space on ext2 filesystem
+	//     (this can also be achieved via cap.FSETID)
+	//   - modify the data journaling mode on ext3 filesystem,
+	//     which uses journaling resources
+	//   - override size restrictions on IPC message queues
+	//   - configure more than 64Hz interrupts from the
+	//     real-time clock
+	//   - override the maximum number of consoles for console
+	//     allocation
+	//   - override the maximum number of keymaps
+	SYS_RESOURCE
+
+	// SYS_TIME allows a process to perform time manipulation of clocks:
+	//   - alter the system clock
+	//   - enable irix_stime on MIPS
+	//   - set the real-time clock
+	SYS_TIME
+
+	// SYS_TTY_CONFIG allows a process to manipulate tty devices:
+	//   - configure tty devices
+	//   - perform vhangup() of a tty
+	SYS_TTY_CONFIG
+
+	// MKNOD allows a process to perform privileged operations with
+	// the mknod() system call.
+	MKNOD
+
+	// LEASE allows a process to take leases on files.
+	LEASE
+
+	// AUDIT_WRITE allows a process to write to the audit log via a
+	// unicast netlink socket.
+	AUDIT_WRITE
+
+	// AUDIT_CONTROL allows a process to configure audit logging via a
+	// unicast netlink socket.
+	AUDIT_CONTROL
+
+	// SETFCAP allows a process to set capabilities on files.
+	// Permits a process to uid_map the uid=0 of the
+	// parent user namespace into that of the child
+	// namespace. Also, permits a process to override
+	// securebits locks through user namespace
+	// creation.
+	SETFCAP
+
+	// MAC_OVERRIDE allows a process to override Manditory Access Control
+	// (MAC) access. Not all kernels are configured with a MAC
+	// mechanism, but this is the capability reserved for
+	// overriding them.
+	MAC_OVERRIDE
+
+	// MAC_ADMIN allows a process to configure the Mandatory Access
+	// Control (MAC) policy. Not all kernels are configured
+	// with a MAC enabled, but if they are this capability is
+	// reserved for code to perform administration tasks.
+	MAC_ADMIN
+
+	// SYSLOG allows a process to configure the kernel's syslog
+	// (printk) behavior.
+	SYSLOG
+
+	// WAKE_ALARM allows a process to trigger something that can wake the
+	// system up.
+	WAKE_ALARM
+
+	// BLOCK_SUSPEND allows a process to block system suspends - prevent the
+	// system from entering a lower power state.
+	BLOCK_SUSPEND
+
+	// AUDIT_READ allows a process to read the audit log via a multicast
+	// netlink socket.
+	AUDIT_READ
+
+	// PERFMON allows a process to enable observability of privileged
+	// operations related to performance. The mechanisms
+	// include perf_events, i915_perf and other kernel
+	// subsystems.
+	PERFMON
+
+	// BPF allows a process to manipulate aspects of the kernel
+	// enhanced Berkeley Packet Filter (BPF) system. This is
+	// an execution subsystem of the kernel, that manages BPF
+	// programs. cap.BPF permits a process to:
+	//   - create all types of BPF maps
+	//   - advanced verifier features:
+	//     - indirect variable access
+	//     - bounded loops
+	//     - BPF to BPF function calls
+	//     - scalar precision tracking
+	//     - larger complexity limits
+	//     - dead code elimination
+	//     - potentially other features
+	//
+	// Other capabilities can be used together with cap.BFP to
+	// further manipulate the BPF system:
+	//   - cap.PERFMON relaxes the verifier checks as follows:
+	//     - BPF programs can use pointer-to-integer
+	//       conversions
+	//     - speculation attack hardening measures can be
+	//       bypassed
+	//     - bpf_probe_read to read arbitrary kernel memory is
+	//       permitted
+	//     - bpf_trace_printk to print the content of kernel
+	//       memory
+	//   - cap.SYS_ADMIN permits the following:
+	//     - use of bpf_probe_write_user
+	//     - iteration over the system-wide loaded programs,
+	//       maps, links BTFs and convert their IDs to file
+	//       descriptors.
+	//   - cap.PERFMON is required to load tracing programs.
+	//   - cap.NET_ADMIN is required to load networking
+	//     programs.
+	BPF
+
+	// CHECKPOINT_RESTORE allows a process to perform checkpoint
+	// and restore operations. Also permits
+	// explicit PID control via clone3() and
+	// also writing to ns_last_pid.
+	CHECKPOINT_RESTORE
+)
+
+var names = map[Value]string{
+	CHOWN:              "cap_chown",
+	DAC_OVERRIDE:       "cap_dac_override",
+	DAC_READ_SEARCH:    "cap_dac_read_search",
+	FOWNER:             "cap_fowner",
+	FSETID:             "cap_fsetid",
+	KILL:               "cap_kill",
+	SETGID:             "cap_setgid",
+	SETUID:             "cap_setuid",
+	SETPCAP:            "cap_setpcap",
+	LINUX_IMMUTABLE:    "cap_linux_immutable",
+	NET_BIND_SERVICE:   "cap_net_bind_service",
+	NET_BROADCAST:      "cap_net_broadcast",
+	NET_ADMIN:          "cap_net_admin",
+	NET_RAW:            "cap_net_raw",
+	IPC_LOCK:           "cap_ipc_lock",
+	IPC_OWNER:          "cap_ipc_owner",
+	SYS_MODULE:         "cap_sys_module",
+	SYS_RAWIO:          "cap_sys_rawio",
+	SYS_CHROOT:         "cap_sys_chroot",
+	SYS_PTRACE:         "cap_sys_ptrace",
+	SYS_PACCT:          "cap_sys_pacct",
+	SYS_ADMIN:          "cap_sys_admin",
+	SYS_BOOT:           "cap_sys_boot",
+	SYS_NICE:           "cap_sys_nice",
+	SYS_RESOURCE:       "cap_sys_resource",
+	SYS_TIME:           "cap_sys_time",
+	SYS_TTY_CONFIG:     "cap_sys_tty_config",
+	MKNOD:              "cap_mknod",
+	LEASE:              "cap_lease",
+	AUDIT_WRITE:        "cap_audit_write",
+	AUDIT_CONTROL:      "cap_audit_control",
+	SETFCAP:            "cap_setfcap",
+	MAC_OVERRIDE:       "cap_mac_override",
+	MAC_ADMIN:          "cap_mac_admin",
+	SYSLOG:             "cap_syslog",
+	WAKE_ALARM:         "cap_wake_alarm",
+	BLOCK_SUSPEND:      "cap_block_suspend",
+	AUDIT_READ:         "cap_audit_read",
+	PERFMON:            "cap_perfmon",
+	BPF:                "cap_bpf",
+	CHECKPOINT_RESTORE: "cap_checkpoint_restore",
+}
+
+var bits = map[string]Value{
+	"cap_chown":              CHOWN,
+	"cap_dac_override":       DAC_OVERRIDE,
+	"cap_dac_read_search":    DAC_READ_SEARCH,
+	"cap_fowner":             FOWNER,
+	"cap_fsetid":             FSETID,
+	"cap_kill":               KILL,
+	"cap_setgid":             SETGID,
+	"cap_setuid":             SETUID,
+	"cap_setpcap":            SETPCAP,
+	"cap_linux_immutable":    LINUX_IMMUTABLE,
+	"cap_net_bind_service":   NET_BIND_SERVICE,
+	"cap_net_broadcast":      NET_BROADCAST,
+	"cap_net_admin":          NET_ADMIN,
+	"cap_net_raw":            NET_RAW,
+	"cap_ipc_lock":           IPC_LOCK,
+	"cap_ipc_owner":          IPC_OWNER,
+	"cap_sys_module":         SYS_MODULE,
+	"cap_sys_rawio":          SYS_RAWIO,
+	"cap_sys_chroot":         SYS_CHROOT,
+	"cap_sys_ptrace":         SYS_PTRACE,
+	"cap_sys_pacct":          SYS_PACCT,
+	"cap_sys_admin":          SYS_ADMIN,
+	"cap_sys_boot":           SYS_BOOT,
+	"cap_sys_nice":           SYS_NICE,
+	"cap_sys_resource":       SYS_RESOURCE,
+	"cap_sys_time":           SYS_TIME,
+	"cap_sys_tty_config":     SYS_TTY_CONFIG,
+	"cap_mknod":              MKNOD,
+	"cap_lease":              LEASE,
+	"cap_audit_write":        AUDIT_WRITE,
+	"cap_audit_control":      AUDIT_CONTROL,
+	"cap_setfcap":            SETFCAP,
+	"cap_mac_override":       MAC_OVERRIDE,
+	"cap_mac_admin":          MAC_ADMIN,
+	"cap_syslog":             SYSLOG,
+	"cap_wake_alarm":         WAKE_ALARM,
+	"cap_block_suspend":      BLOCK_SUSPEND,
+	"cap_audit_read":         AUDIT_READ,
+	"cap_perfmon":            PERFMON,
+	"cap_bpf":                BPF,
+	"cap_checkpoint_restore": CHECKPOINT_RESTORE,
+}
diff --git a/cap/oslockluster.go b/cap/oslockluster.go
new file mode 100644
index 0000000..0b2cf2e
--- /dev/null
+++ b/cap/oslockluster.go
@@ -0,0 +1,33 @@
+// +build !go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked OS
+// Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+//   https://github.com/golang/go/issues/20395
+//   https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes the Launch functionality
+// to fail with an error: cap.ErrNoLaunch. If this value is false you
+// have two choices with respect to the Launch functionality:
+//
+//   1) don't use cap.(*Launcher).Launch()
+//   2) upgrade your Go toolchain to 1.10+ (ie., do this one).
+const LaunchSupported = false
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+	return false, ErrNoLaunch
+}
diff --git a/cap/oslocks.go b/cap/oslocks.go
new file mode 100644
index 0000000..9754020
--- /dev/null
+++ b/cap/oslocks.go
@@ -0,0 +1,51 @@
+// +build go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked
+// OS Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+//   https://github.com/golang/go/issues/20395
+//   https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes the Launch functionality
+// to fail with an error: cap.ErrNoLaunch. If this value is false you
+// have two choices with respect to the Launch functionality:
+//
+//   1) don't use cap.(*Launcher).Launch()
+//   2) upgrade your Go toolchain to 1.10+ (ie., do this one).
+const LaunchSupported = true
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch and loads up the chroot value.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+	s := pa.Sys
+	if s == nil {
+		if chroot == "" {
+			return false, nil
+		}
+		s = &syscall.SysProcAttr{
+			Chroot: chroot,
+		}
+		pa.Sys = s
+	} else if s.Chroot != "" {
+		return false, ErrAmbiguousChroot
+	}
+	if s.Credential != nil {
+		return false, ErrAmbiguousIDs
+	}
+	if len(s.AmbientCaps) != 0 {
+		return false, ErrAmbiguousAmbient
+	}
+	return s != nil && s.Chroot != "", nil
+}
diff --git a/cap/syscalls.go b/cap/syscalls.go
new file mode 100644
index 0000000..37121e0
--- /dev/null
+++ b/cap/syscalls.go
@@ -0,0 +1,121 @@
+package cap
+
+import (
+	"runtime"
+	"sync"
+	"syscall"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+// multisc provides syscalls overridable for testing purposes that
+// support a single kernel security state for all OS threads.
+// We use this version when we are cgo compiling because
+// we need to manage the native C pthreads too.
+var multisc = &syscaller{
+	w3: psx.Syscall3,
+	w6: psx.Syscall6,
+	r3: syscall.RawSyscall,
+	r6: syscall.RawSyscall6,
+}
+
+// singlesc provides a single threaded implementation. Users should
+// take care to ensure the thread is locked and marked nogc.
+var singlesc = &syscaller{
+	w3: syscall.RawSyscall,
+	w6: syscall.RawSyscall6,
+	r3: syscall.RawSyscall,
+	r6: syscall.RawSyscall6,
+}
+
+// launchState is used to track which variant of the write syscalls
+// should execute.
+type launchState int
+
+// these states are used to understand when a launch is in progress.
+const (
+	launchIdle launchState = iota
+	launchActive
+	launchBlocked
+)
+
+// scwMu is used to fully serialize the write system calls. Note, this
+// would generally not be necessary, but in the case of Launch we get
+// into a situation where the launching thread is temporarily allowed
+// to deviate from the kernel state of the rest of the runtime and
+// allowing other threads to perform w* syscalls will potentially
+// interfere with the launching process. In pure Go binaries, this
+// will lead inevitably to a panic when the AllThreadsSyscall
+// discovers inconsistent thread state.
+//
+// scwMu protects scwTIDs and scwState
+var scwMu sync.Mutex
+
+// scwTIDs holds the thread IDs of the threads that are executing a
+// launch it is empty when no launches are occurring.
+var scwTIDs = make(map[int]bool)
+
+// scwState captures whether a launch is in progress or not.
+var scwState = launchIdle
+
+// scwCond is used to announce when scwState changes to other
+// goroutines waiting for it to change.
+var scwCond = sync.NewCond(&scwMu)
+
+// scwSetState blocks until a launch state change between states from
+// and to occurs. We use this for more context specific syscaller
+// use. In the case that the caller is requesting a launchActive ->
+// launchIdle transition they are declaring that tid is no longer
+// launching. If another thread is also launching the call will
+// complete, but the launchState will remain launchActive.
+func scwSetState(from, to launchState, tid int) {
+	scwMu.Lock()
+	for scwState != from {
+		if scwState == launchActive && from == launchIdle && to == launchActive {
+			break // This "transition" is also allowed.
+		}
+		scwCond.Wait()
+	}
+	if from == launchIdle && to == launchActive {
+		scwTIDs[tid] = true
+	} else if from == launchActive && to == launchIdle {
+		delete(scwTIDs, tid)
+		if len(scwTIDs) != 0 {
+			to = from // not actually idle
+		}
+	}
+	scwState = to
+	scwCond.Broadcast()
+	scwMu.Unlock()
+}
+
+// scwStateSC blocks until the current syscaller is available for
+// writes, and then marks launchBlocked. Use scwSetState to perform
+// the reverse transition (blocked->returned state value).
+func scwStateSC() (launchState, *syscaller) {
+	sc := multisc
+	scwMu.Lock()
+	for {
+		if scwState == launchIdle {
+			break
+		}
+		runtime.LockOSThread()
+		if scwState == launchActive && scwTIDs[syscall.Gettid()] {
+			sc = singlesc
+			// note, we don't runtime.UnlockOSThread()
+			// here because we have no reason to ever
+			// allow this thread to return to normal use -
+			// we need it dead before we can return to the
+			// launchIdle state.
+			break
+		}
+		runtime.UnlockOSThread()
+		scwCond.Wait()
+	}
+	old := scwState
+	scwState = launchBlocked
+	scwCond.Broadcast()
+	scwMu.Unlock()
+
+	return old, sc
+}
diff --git a/cap/text.go b/cap/text.go
new file mode 100644
index 0000000..42cb7fc
--- /dev/null
+++ b/cap/text.go
@@ -0,0 +1,326 @@
+package cap
+
+import (
+	"bufio"
+	"errors"
+	"strconv"
+	"strings"
+)
+
+// String converts a capability Value into its canonical text
+// representation.
+func (v Value) String() string {
+	name, ok := names[v]
+	if ok {
+		return name
+	}
+	// Un-named capabilities are referred to numerically (in decimal).
+	return strconv.Itoa(int(v))
+}
+
+// FromName converts a named capability Value to its binary
+// representation.
+func FromName(name string) (Value, error) {
+	startUp.Do(multisc.cInit)
+	v, ok := bits[name]
+	if ok {
+		if v >= Value(words*32) {
+			return 0, ErrBadValue
+		}
+		return v, nil
+	}
+	i, err := strconv.Atoi(name)
+	if err != nil {
+		return 0, err
+	}
+	if i >= 0 && i < int(words*32) {
+		return Value(i), nil
+	}
+	return 0, ErrBadValue
+}
+
+const (
+	eBin uint = (1 << Effective)
+	pBin      = (1 << Permitted)
+	iBin      = (1 << Inheritable)
+)
+
+var combos = []string{"", "e", "p", "ep", "i", "ei", "ip", "eip"}
+
+// histo generates a histogram of flag state combinations.
+// Note: c is locked by or private to the caller.
+func (c *Set) histo(bins []int, patterns []uint, from, limit Value) uint {
+	for v := from; v < limit; v++ {
+		b := uint(v & 31)
+		u, bit, err := bitOf(0, v)
+		if err != nil {
+			break
+		}
+		x := uint((c.flat[u][Effective]&bit)>>b) * eBin
+		x |= uint((c.flat[u][Permitted]&bit)>>b) * pBin
+		x |= uint((c.flat[u][Inheritable]&bit)>>b) * iBin
+		bins[x]++
+		patterns[uint(v)] = x
+	}
+	// Note, in the loop, we use >= to pick the smallest value for
+	// m with the highest bin value. That is ties break towards
+	// m=0.
+	m := uint(7)
+	for t := m; t > 0; {
+		t--
+		if bins[t] >= bins[m] {
+			m = t
+		}
+	}
+	return m
+}
+
+// String converts a full capability Set into a single short readable
+// string representation (which may contain spaces). See the
+// cap.FromText() function for an explanation of its return values.
+//
+// Note (*cap.Set).String() may evolve to generate more compact
+// strings representing the a given Set over time, but it should
+// maintain compatibility with the libcap:cap_to_text() function for
+// any given release. Further, it will always be an inverse of
+// cap.FromText().
+func (c *Set) String() string {
+	if err := c.good(); err != nil {
+		return "<invalid>"
+	}
+	bins := make([]int, 8)
+	patterns := make([]uint, maxValues)
+
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	// Note, in order to have a *Set pointer, startUp.Do(cInit)
+	// must have been called which sets maxValues.
+	m := c.histo(bins, patterns, 0, Value(maxValues))
+
+	// Background state is the most popular of the named bits.
+	vs := []string{"=" + combos[m]}
+	for i := uint(8); i > 0; {
+		i--
+		if i == m || bins[i] == 0 {
+			continue
+		}
+		var list []string
+		for j, p := range patterns {
+			if p != i {
+				continue
+			}
+			list = append(list, Value(j).String())
+		}
+		x := strings.Join(list, ",")
+		var y, z string
+		if cf := i & ^m; cf != 0 {
+			op := "+"
+			if len(vs) == 1 && vs[0] == "=" {
+				// Special case "= foo+..." == "foo=...".
+				// Prefer because it
+				vs = nil
+				op = "="
+			}
+			y = op + combos[cf]
+		}
+		if cf := m & ^i; cf != 0 {
+			z = "-" + combos[cf]
+		}
+		vs = append(vs, x+y+z)
+	}
+
+	// The unnamed bits can only add to the above named ones since
+	// unnamed ones are always defaulted to lowered.
+	uBins := make([]int, 8)
+	uPatterns := make([]uint, 32*words)
+	c.histo(uBins, uPatterns, Value(maxValues), 32*Value(words))
+	for i := uint(7); i > 0; i-- {
+		if uBins[i] == 0 {
+			continue
+		}
+		var list []string
+		for j, p := range uPatterns {
+			if p != i {
+				continue
+			}
+			list = append(list, Value(j).String())
+		}
+		vs = append(vs, strings.Join(list, ",")+"+"+combos[i])
+	}
+
+	return strings.Join(vs, " ")
+}
+
+// ErrBadText is returned if the text for a capability set cannot be parsed.
+var ErrBadText = errors.New("bad text")
+
+// FromText converts the canonical text representation for a Set into
+// a freshly allocated Set.
+//
+// The format follows the following pattern: a set of space separated
+// sequences. Each sequence applies over the previous sequence to
+// build up a Set. The format of a sequence is:
+//
+//   [comma list of cap_values][[ops][flags]]*
+//
+// Examples:
+//
+//   "all=ep"
+//   "cap_chown,cap_setuid=ip cap_setuid+e"
+//   "=p cap_setpcap-p+i"
+//
+// Here "all" refers to all named capabilities known to the hosting
+// kernel, and "all" is assumed if no capabilities are listed before
+// an "=".
+//
+// The ops values, "=", "+" and "-" imply "reset and raise", "raise"
+// and "lower" respectively. The "e", "i" and "p" characters
+// correspond to the capabilities of the corresponding Flag: "e"
+// (Effective); "i" (Inheritable); "p" (Permitted).
+//
+// This syntax is overspecified and there are many ways of building
+// the same final Set state. Any sequence that includes a '=' resets
+// the accumulated state of all Flags ignoring earlier sequences. On
+// each of the following lines we give three or more examples of ways
+// to specify a common Set. The last entry on each line is the one
+// generated by (*cap.Set).String() from that Set.
+//
+//    "=p all+ei"  "all=pie"   "=pi all+e"   "=eip"
+//
+//    "cap_setuid=p cap_chown=i"  "cap_chown=ip-p"   "cap_chown=i"
+//
+//    "cap_chown=-p"   "all="   "cap_setuid=pie-pie"   "="
+//
+// Note: FromText() is tested at release time to completely match the
+// import ability of the libcap:cap_from_text() function.
+func FromText(text string) (*Set, error) {
+	c := NewSet()
+	scanner := bufio.NewScanner(strings.NewReader(text))
+	scanner.Split(bufio.ScanWords)
+	chunks := 0
+	for scanner.Scan() {
+		chunks++
+
+		// Parsing for xxx([-+=][eip]+)+
+		t := scanner.Text()
+		i := strings.IndexAny(t, "=+-")
+		if i < 0 {
+			return nil, ErrBadText
+		}
+		var vs []Value
+		sep := t[i]
+		if vals := t[:i]; vals == "all" {
+			for v := Value(0); v < Value(maxValues); v++ {
+				vs = append(vs, v)
+			}
+		} else if vals != "" {
+			for _, name := range strings.Split(vals, ",") {
+				v, err := FromName(name)
+				if err != nil {
+					return nil, ErrBadText
+				}
+				vs = append(vs, v)
+			}
+		} else if sep != '=' {
+			if vals == "" {
+				// Only "=" supports ""=="all".
+				return nil, ErrBadText
+			}
+		} else if j := i + 1; j+1 < len(t) {
+			switch t[j] {
+			case '+':
+				sep = 'P'
+				i++
+			case '-':
+				sep = 'M'
+				i++
+			}
+		}
+		i++
+
+		// There are 5 ways to set: =, =+, =-, +, -. We call
+		// the 2nd and 3rd of these 'P' and 'M'.
+
+		for {
+			// read [eip]+ setting flags.
+			var fE, fP, fI bool
+			for ok := true; ok && i < len(t); i++ {
+				switch t[i] {
+				case 'e':
+					fE = true
+				case 'i':
+					fI = true
+				case 'p':
+					fP = true
+				default:
+					ok = false
+				}
+				if !ok {
+					break
+				}
+			}
+
+			if !(fE || fI || fP) {
+				if sep != '=' {
+					return nil, ErrBadText
+				}
+			}
+
+			switch sep {
+			case '=', 'P', 'M', '+':
+				if sep != '+' {
+					c.Clear()
+					if sep == 'M' {
+						break
+					}
+				}
+				if keep := len(vs) == 0; keep {
+					if sep != '=' {
+						return nil, ErrBadText
+					}
+					c.forceFlag(Effective, fE)
+					c.forceFlag(Permitted, fP)
+					c.forceFlag(Inheritable, fI)
+					break
+				}
+				// =, + and P for specific values are left.
+				if fE {
+					c.SetFlag(Effective, true, vs...)
+				}
+				if fP {
+					c.SetFlag(Permitted, true, vs...)
+				}
+				if fI {
+					c.SetFlag(Inheritable, true, vs...)
+				}
+			case '-':
+				if fE {
+					c.SetFlag(Effective, false, vs...)
+				}
+				if fP {
+					c.SetFlag(Permitted, false, vs...)
+				}
+				if fI {
+					c.SetFlag(Inheritable, false, vs...)
+				}
+			}
+
+			if i == len(t) {
+				break
+			}
+
+			switch t[i] {
+			case '+', '-':
+				sep = t[i]
+				i++
+			default:
+				return nil, ErrBadText
+			}
+		}
+	}
+	if chunks == 0 {
+		return nil, ErrBadText
+	}
+	return c, nil
+}
diff --git a/contrib/Makefile b/contrib/Makefile
index 4749630..a4b5008 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -1,3 +1,3 @@
 .PHONY: all clean
 all clean:
-	for x in bug* ; do make -C $$x $@ || exit 1 ; done
+	for x in bug* ; do $(MAKE) -C $$x $@ || exit 1 ; done
diff --git a/contrib/bug216610/.gitignore b/contrib/bug216610/.gitignore
new file mode 100644
index 0000000..1478d58
--- /dev/null
+++ b/contrib/bug216610/.gitignore
@@ -0,0 +1,3 @@
+*~
+arms
+Dockerfile
diff --git a/contrib/bug216610/Dockerfile b/contrib/bug216610/Dockerfile
new file mode 100644
index 0000000..5502b71
--- /dev/null
+++ b/contrib/bug216610/Dockerfile
@@ -0,0 +1,13 @@
+FROM debian:latest
+
+# A directory to share files via.
+RUN mkdir /shared
+
+RUN apt-get update
+RUN apt-get install -y gcc-arm-linux-gnueabi binutils-arm-linux-gnueabi
+RUN apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
+
+# create a builder user
+RUN echo "builder:x:1000:1000:,,,:/home/builder:/bin/bash" >> /etc/passwd
+RUN echo "builder:*:19289:0:99999:7:::" >> /etc/shadow
+RUN mkdir -p /home/builder && chown builder.bin /home/builder
diff --git a/contrib/bug216610/Makefile b/contrib/bug216610/Makefile
new file mode 100644
index 0000000..ce96fb3
--- /dev/null
+++ b/contrib/bug216610/Makefile
@@ -0,0 +1,30 @@
+topdir=$(shell pwd)/../..
+include ../../Make.Rules
+
+GOTARGET=$(shell eval $$(go env) ; echo $${GOHOSTOS}_$${GOARCH})
+
+all: go/fib
+
+go/fib: go/main.go go/fibber/fib.go go/fibber/linkage.go go/fibber/fibs_$(GOTARGET).s go/fibber/fib_$(GOTARGET).syso
+	cd go && CGO_ENABLED=0 go build
+
+# Build the host native version.
+go/fibber/fib_$(GOTARGET).syso go/fibber/linkage.go: c/fib.c ./c/gcc.sh ./package_fns.sh
+	GCC=gcc ./c/gcc.sh -O3 c/fib.c -c -o go/fibber/fib_$(GOTARGET).syso
+	./package_fns.sh fibber go/fibber/fib_$(GOTARGET).syso > go/fibber/linkage.go
+
+Dockerfile: Makefile ./mkdocker.sh
+	./mkdocker.sh > $@
+
+# Use this build target (make arms) to extend support to include arm
+# and arm64 GOARCH values.
+arms: Dockerfile Makefile ./c/gcc.sh ./c/build.sh ./c/fib.c
+	docker run --rm -v $$PWD/c:/shared:z -h debian -u $$(id -u) -it expt shared/build.sh
+	mv c/*.syso go/fibber/
+	touch arms
+
+clean:
+	rm -f *~ arms
+	rm -f c/*.o c/*~
+	rm -f go/fib go/*~
+	rm -f go/fibber/*.syso go/fibber/*~ go/fibber/linkage.go
diff --git a/contrib/bug216610/README.md b/contrib/bug216610/README.md
new file mode 100644
index 0000000..4425715
--- /dev/null
+++ b/contrib/bug216610/README.md
@@ -0,0 +1,139 @@
+# Linking psx and C code without cgo
+
+## Overview
+
+In some embedded situations, there is a desire to compile Go binaries
+to include some C code, but not `libc` etc. For a long time, I had
+assumed this was not possible, since using `cgo` *requires* `libc` and
+`libpthread` linkage.
+
+This _embedded compilation_ need was referenced in a [bug
+filed](https://bugzilla.kernel.org/show_bug.cgi?id=216610) against the
+[`"psx"`](https://pkg.go.dev/kernel.org/pub/linux/libs/security/libcap/psx)
+package. The bug-filer was seeking an alternative to `CGO_ENABLED=1`
+compilation _requiring_ the `cgo` variant of `psx` build. However, the
+go `"runtime"` package will always
+[`panic()`](https://cs.opensource.google/go/go/+/refs/tags/go1.19.2:src/runtime/os_linux.go;l=717-720)
+if you try this because it needs `libpthread` and `[g]libc` to work.
+
+In researching that bug report, however, I have learned there is a
+trick to combining a non-CGO built binary with compiled C code. I
+learned about it from a brief reference in the [Go Programming
+Language
+Wiki](https://zchee.github.io/golang-wiki/GcToolchainTricks/).
+
+This present directory evolved from my attempt to understand and
+hopefully resolve what was going on as reported in that bug into an
+example of this _trick_. I was unable to resolve the problem as
+reported because of the aformentioned `panic()` in the Go
+runtime. However, I was able to demonstrate embedding C code in a Go
+binary _without_ use of cgo. In such a binary, the Go-native version
+of `"psx"` is thus achievable. This is what the example in this
+present directory demonstrates.
+
+*Caveat Emptor*: this example is very fragile. The Go team only
+supports `cgo` linking against C. That being said, I'd certainly like
+to receive bug fixes, etc for this directory if you find you need to
+evolve it to make it work for your use case.
+
+## Content
+
+In this example we have:
+
+- Some C code for the functions `fib_init()` and `fib_next()` that
+combine to implement a _compute engine_ to determine [Fibonacci
+Numbers](https://en.wikipedia.org/wiki/Fibonacci_number). The source
+for this is in the sub directory `c/fib.c`.
+
+- Some Go code, in the directory `go/fibber` that uses this C compiled
+compute kernel.
+
+- `c/gcc.sh` which is a wrapper for `gcc` that adjusts the compilation
+to be digestible by Go's (internal) linker (the one that gets invoked
+when compiling `CGO_ENABLED=0`. Using `gcc` directly instead of this
+wrapper generates an incomplete binary - which miscomputes the
+expected answers. See the discussion below for what seems to be going
+on.
+
+- A top level `Makefile` to build it all.
+
+## Building and running the built binary
+
+Set things up with:
+```
+$ git clone git://git.kernel.org/pub/scm/libs/libcap/libcap.git
+$ cd libcap
+$ make all
+$ cd contrib/bug216610
+$ make clean all
+```
+When you run `./go/fib` it should generate the following output:
+```
+$ ./go/fib
+psx syscall result: PID=<nnnnn>
+fib: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, ...
+$
+```
+Where `<nnnnn>` is the PID of the program at runtime and will be
+different each time the program is invoked.
+
+## Discussion
+
+The Fibonacci detail of what is going on is mostly uninteresting. The
+reason for developing this example was to explore the build issues in
+the reported [Bug
+216610](https://bugzilla.kernel.org/show_bug.cgi?id=216610). Ultimately,
+this example offers an alternative path to building a `nocgo` program
+that links to compute kernel of C code.
+
+The reason we have added the `c/gcc.sh` wrapper for `gcc` is that
+we've found the Go linker has a hard time digesting the
+cross-sectional `%rip` based data addressing that various optimization
+modes of gcc like to use. Specifically, in the x86_64/amd64
+architecture, if a `R_X86_64_PC32` relocation entry made in a `.text`
+section refers to an `.rodata.cst8` section in a generated `.syso`
+file, the Go linker seems to [replace this reference with a `0` offset
+to
+`(%rip)`](https://github.com/golang/go/issues/24321#issuecomment-1296084103). What
+our wrapper script does is rewrite the generated assembly to store
+these data references to the `.text` section. The Go linker has no
+problem with this _same section_ relative addressing and is able to
+link the resulting objects without problems.
+
+If you want to cross compile, we have support for 32-bit arm
+compilation: what is needed for the Raspberry PI. To get this support,
+try:
+```
+$ make clean all arms
+$ cd go
+$ GOARCH=arm CGO_ENABLED=0 go build
+```
+The generated `fib` binary runs on a 32-bit Raspberry Pi.
+
+## Future thoughts
+
+At present, this example only works on Linux with `x86_64` and `arm`
+build architectures. (In go-speak that is `linux_amd64` and
+`linux_arm`). This is because I have only provided some bridging
+assembly for Go to C calling conventions for those architecture
+targets: `./go/fibber/fibs_linux_amd64.s` and
+`./go/fibber/fibs_linux_arm.s`. The non-native, `make arms`, cross
+compilation requires the `docker` command to be available.
+
+I intend to implement an `arm64` build, when I have a system on which
+to test it.
+
+**Note** The Fedora system on which I've been developing this has some
+  SELINUX impediment to naively using the `docker -v ...` bind mount
+  option. I need the `:z` suffix for bind mounting. I don't know how
+  common an issue this is. On Fedora, building the arm variants of the
+  .syso file can be performed as follows:
+```
+$ docker run --rm -v $PWD/c:/shared:z -h debian -u $(id -u) -it expt shared/build.sh
+```
+
+## Reporting bugs
+
+Please report issues or offer improvements to this example via the
+[Fully Capable `libcap`](https://sites.google.com/site/fullycapable/)
+website.
diff --git a/contrib/bug216610/c/build.sh b/contrib/bug216610/c/build.sh
new file mode 100755
index 0000000..7458fb1
--- /dev/null
+++ b/contrib/bug216610/c/build.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+# Builds the following .syso files to the directory containing this script:
+#
+#   fib_linux_arm.syso
+#   fib_linux_arm64.syso
+
+cd ${0%/*}
+GCC=arm-linux-gnueabi-gcc ./gcc.sh -O3 fib.c -c -o fib_linux_arm.syso
+GCC=aarch64-linux-gnu-gcc ./gcc.sh -O3 fib.c -c -o fib_linux_arm64.syso
diff --git a/contrib/bug216610/c/fib.c b/contrib/bug216610/c/fib.c
new file mode 100644
index 0000000..bd665c7
--- /dev/null
+++ b/contrib/bug216610/c/fib.c
@@ -0,0 +1,20 @@
+#include <inttypes.h>
+
+struct state {
+    uint32_t b, a;
+};
+
+void fib_init(struct state *s);
+void fib_init(struct state *s)
+{
+    s->a = 0;
+    s->b = 1;
+}
+
+void fib_next(struct state *s);
+void fib_next(struct state *s)
+{
+    uint32_t next = s->a + s->b;
+    s->a = s->b;
+    s->b = next;
+}
diff --git a/contrib/bug216610/c/gcc.sh b/contrib/bug216610/c/gcc.sh
new file mode 100755
index 0000000..33655d6
--- /dev/null
+++ b/contrib/bug216610/c/gcc.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# The Go linker does not seem to know what to do with relative
+# addressing of rodata.* offset from %rip. GCC likes to use this
+# addressing mode on this architecture, so we quickly run into
+# mis-computation when the relative addressing used in a .syso file of
+# symbol located data is resolved to completely the wrong place by the
+# Go (internal) linker.
+#
+# As a workaround for this, we can modify the assembly source code
+# generated by GCC to not point at problematic '.rodata.*' sections,
+# and place this data in the good old '.text' section where Go's
+# linker can make sense of it.
+#
+# This script exists to generate a '.syso' file from some '*.c' files.
+# It works by recognizing the '*.c' command line arguments and
+# converting them into fixed-up '*.s' files. It then performs the
+# compilation for the collection of the '*.s' files. Upon success, it
+# purges the intermediate '*.s' files.
+#
+# The fragile aspect of this present script is which compiler
+# arguments should be used for the compilation from '.c' -> '.s'
+# files. What we do is accumulate arguments until we encounter our
+# first '*.c' file and use those to perform the '.c' -> '.s'
+# compilation. We build up a complete command line for gcc
+# substituting '.s' files for '.c' files in the original command
+# line. Then with the new command line assembled we invoke gcc with
+# those. If that works, we remove all of the intermediate '.s' files.
+
+GCC="${GCC:=gcc}"
+setup=0
+args=()
+final=()
+ses=()
+
+for arg in "$@"; do
+    if [[ "${arg##*.}" = "c" ]]; then
+	setup=1
+	s="${arg%.*}.s"
+	"${GCC}" "${args[@]}" -S -o "${s}" "${arg}"
+	sed -i -e 's/.*\.rodata\..*/\t.text/' "${s}"
+	final+=("${s}")
+	ses+=("${s}")
+    else
+	if [[ $setup -eq 0 ]]; then
+	    args+=("${arg}")
+	fi
+	final+=("${arg}")
+    fi
+done
+
+#echo final: "${final[@]}"
+#echo args: "${args[@]}"
+#echo ses: "${ses[@]}"
+
+"${GCC}" "${final[@]}"
+if [[ $? -ne 0 ]]; then
+    echo "failed to compile"
+    exit 1
+fi
+rm -f "${ses[@]}"
diff --git a/contrib/bug216610/go/.gitignore b/contrib/bug216610/go/.gitignore
new file mode 100644
index 0000000..ae14305
--- /dev/null
+++ b/contrib/bug216610/go/.gitignore
@@ -0,0 +1,5 @@
+fib
+*.syso
+main
+go.sum
+linkage.go
diff --git a/contrib/bug216610/go/fibber/fib.go b/contrib/bug216610/go/fibber/fib.go
new file mode 100644
index 0000000..49757cd
--- /dev/null
+++ b/contrib/bug216610/go/fibber/fib.go
@@ -0,0 +1,32 @@
+// Package fibber implements a Fibonacci sequence generator using a C
+// coded compute kernel (a .syso file).
+package fibber
+
+import (
+	"unsafe"
+)
+
+// State is the native Go form of the C.state structure.
+type State struct {
+	B, A uint32
+}
+
+// cPtr converts State into a C pointer suitable as an argument for
+// sysoCaller.
+func (s *State) cPtr() unsafe.Pointer {
+	return unsafe.Pointer(&s.B)
+}
+
+// NewState initializes a Fibonacci Number sequence generator.  Upon
+// return s.A=0 and s.B=1 are the first two numbers in the sequence.
+func NewState() *State {
+	s := &State{}
+	syso__fib_init.call(s.cPtr())
+	return s
+}
+
+// Next advances the state to the next number in the sequence. Upon
+// return, s.B is the most recently calculated value.
+func (s *State) Next() {
+	syso__fib_next.call(s.cPtr())
+}
diff --git a/contrib/bug216610/go/fibber/fibs_linux_amd64.s b/contrib/bug216610/go/fibber/fibs_linux_amd64.s
new file mode 100644
index 0000000..5992d09
--- /dev/null
+++ b/contrib/bug216610/go/fibber/fibs_linux_amd64.s
@@ -0,0 +1,21 @@
+// To transition from a Go call to a C function call, we are skating
+// on really thin ice... Ceveat Emptor!
+//
+// Ref:
+//   https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+//
+// This is not strictly needed, but it makes gdb debugging less
+// confusing because spacer ends up being an alias for the TEXT
+// section start.
+TEXT ·spacer(SB),$0
+	RET
+
+#define RINDEX(n) (8*n)
+
+// Header to this function wrapper is the last time we can voluntarily
+// yield to some other goroutine.
+TEXT ·syso(SB),$0-16
+	MOVQ cFn+RINDEX(0)(FP), SI
+	MOVQ state+RINDEX(1)(FP), DI
+	CALL *SI
+	RET
diff --git a/contrib/bug216610/go/fibber/fibs_linux_arm.s b/contrib/bug216610/go/fibber/fibs_linux_arm.s
new file mode 100644
index 0000000..39640a5
--- /dev/null
+++ b/contrib/bug216610/go/fibber/fibs_linux_arm.s
@@ -0,0 +1,23 @@
+// To transition from a Go call to a C function call, we are skating
+// on really thin ice... Ceveat Emptor!
+//
+// Ref:
+//   https://stackoverflow.com/questions/261419/what-registers-to-save-in-the-arm-c-calling-convention
+//
+// This is not strictly needed, but it makes gdb debugging less
+// confusing because spacer ends up being an alias for the TEXT
+// section start.
+TEXT ·spacer(SB),$0
+	RET
+
+#define FINDEX(n) (8*n)
+
+// Header to this function wrapper is the last time we can voluntarily
+// yield to some other goroutine.
+//
+// Conventions: PC == R15, SP == R13, LR == R14, IP (scratch) = R12
+TEXT ·syso(SB),$0-8
+	MOVW	cFn+0(FP), R14
+	MOVW    state+4(FP), R0
+	BL	(R14)
+	RET
diff --git a/contrib/bug216610/go/go.mod b/contrib/bug216610/go/go.mod
new file mode 100644
index 0000000..8531994
--- /dev/null
+++ b/contrib/bug216610/go/go.mod
@@ -0,0 +1,5 @@
+module fib
+
+go 1.18
+
+require kernel.org/pub/linux/libs/security/libcap/psx v1.2.69
diff --git a/contrib/bug216610/go/main.go b/contrib/bug216610/go/main.go
new file mode 100644
index 0000000..65121f6
--- /dev/null
+++ b/contrib/bug216610/go/main.go
@@ -0,0 +1,29 @@
+// Program fib uses the psx package once, and then prints the first
+// ten Fibonacci numbers.
+package main
+
+import (
+	"fmt"
+	"log"
+	"syscall"
+
+	"fib/fibber"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+func main() {
+	pid, _, err := psx.Syscall3(syscall.SYS_GETPID, 0, 0, 0)
+	if err != 0 {
+		log.Fatalf("failed to get PID via psx: %v", err)
+	}
+	fmt.Print("psx syscall result: PID=")
+	fmt.Println(pid)
+	s := fibber.NewState()
+	fmt.Print("fib: ", s.A, ", ", s.B)
+	for i := 0; i < 8; i++ {
+		s.Next()
+		fmt.Print(", ", s.B)
+	}
+	fmt.Println(", ...")
+}
diff --git a/contrib/bug216610/mkdocker.sh b/contrib/bug216610/mkdocker.sh
new file mode 100755
index 0000000..860c198
--- /dev/null
+++ b/contrib/bug216610/mkdocker.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# This script generates a Dockerfile to be used for cross-compilation
+cat <<EOF
+FROM debian:latest
+
+# A directory to share files via.
+RUN mkdir /shared
+
+RUN apt-get update
+RUN apt-get install -y gcc-arm-linux-gnueabi binutils-arm-linux-gnueabi
+RUN apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
+
+# create a builder user
+RUN echo "builder:x:$(id -u):$(id -g):,,,:/home/builder:/bin/bash" >> /etc/passwd
+RUN echo "builder:*:19289:0:99999:7:::" >> /etc/shadow
+RUN mkdir -p /home/builder && chown builder.bin /home/builder
+EOF
diff --git a/contrib/bug216610/package_fns.sh b/contrib/bug216610/package_fns.sh
new file mode 100755
index 0000000..0f4b91c
--- /dev/null
+++ b/contrib/bug216610/package_fns.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Generate some Go code to make calling into the C code of the .syso
+# file easier.
+
+package="${1}"
+syso="${2}"
+
+if [[ -z "${syso}" ]]; then
+    echo "usage: $0 <package> <.....syso>" >&2
+    exit 1
+fi
+
+if [[ "${syso%.syso}" == "${syso}" ]]; then
+    echo "2nd argument should be a .syso file" >&2
+    exit 1
+fi
+
+cat<<EOF
+package ${package}
+
+import (
+	"unsafe"
+)
+
+// syso is how we call, indirectly, into the C-code.
+func syso(cFn, state unsafe.Pointer)
+
+type sysoCaller struct {
+	ptr unsafe.Pointer
+}
+
+// call calls the syso linked C-function, $sym().
+func (s *sysoCaller) call(data unsafe.Pointer) {
+	syso(s.ptr, data)
+}
+EOF
+
+for sym in $(objdump -x "${syso}" | grep -F 'g     F' | awk '{print $6}'); do
+    cat<<EOF
+
+//go:linkname _${sym} ${sym}
+var _${sym} byte
+var syso__${sym} = &sysoCaller{ptr: unsafe.Pointer(&_${sym})}
+
+EOF
+done
diff --git a/contrib/bug400591/Makefile b/contrib/bug400591/Makefile
index 320610c..bb2e59d 100644
--- a/contrib/bug400591/Makefile
+++ b/contrib/bug400591/Makefile
@@ -1,8 +1,8 @@
 all: bug
 
 bug: bug.c ../../libcap Makefile
-	make -C ../../libcap
-	cc -g -I../../libcap/include --static -o $@ $< -L../../libcap -lcap
+	$(MAKE) -C ../../libcap
+	$(CC) $(CFLAGS) $(CPPFLAGS) -g -I../../libcap/include --static -o $@ $< -L../../libcap -lcap
 	./bug
 
 clean:
diff --git a/contrib/capso/.gitignore b/contrib/capso/.gitignore
new file mode 100644
index 0000000..222d35d
--- /dev/null
+++ b/contrib/capso/.gitignore
@@ -0,0 +1,2 @@
+capso.so
+bind
diff --git a/contrib/capso/Makefile b/contrib/capso/Makefile
new file mode 100644
index 0000000..70af7f9
--- /dev/null
+++ b/contrib/capso/Makefile
@@ -0,0 +1,23 @@
+topdir=$(shell pwd)/../..
+include ../../Make.Rules
+
+# Always build sources this way:
+CFLAGS += -fPIC $(CAPSO_DEBUG)
+
+all: bind
+
+bind: bind.c capso.so
+	$(CC) $(CFLAGS) $(CPPFLAGS) -o $@ bind.c capso.so -L../../libcap -lcap
+
+../../libcap/loader.txt:
+	$(MAKE) -C ../../libcap loader.txt
+
+capso.o: capso.c capso.h ../../libcap/execable.h ../../libcap/loader.txt
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DLIBCAP_VERSION=\"libcap-$(VERSION).$(MINOR)\" -DSHARED_LOADER=\"$(shell cat ../../libcap/loader.txt)\" -c capso.c -o $@
+
+capso.so: capso.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBCAPLIB) -ldl -Wl,-e,__so_start
+	sudo setcap cap_net_bind_service=p $@
+
+clean:
+	rm -f bind capso.o capso.so *~
diff --git a/contrib/capso/README.md b/contrib/capso/README.md
new file mode 100644
index 0000000..df2e878
--- /dev/null
+++ b/contrib/capso/README.md
@@ -0,0 +1,21 @@
+# Leveraging file capabilities on shared libraries
+
+This directory contains an example of a shared library (`capso.so`)
+that can be installed with file capabilities. When the library is
+linked against an unprivileged program, it includes internal support
+for re-invoking itself as a child subprocess to execute a privileged
+operation on bahalf of the parent.
+
+The idea for doing this was evolved from the way `pam_unix.so` is able
+to leverage a separate program, and `libcap`'s recently added support
+for supporting binary execution of all the `.so` files built by the
+package.
+
+The actual program example `./bind` leverages the
+`"cap_net_bind_service=p"` enabled `./capso.so` file to bind to the
+privileged port 80.
+
+A writeup of how to build and explore the behavior of this example is
+provided on the `libcap` distribution website:
+
+https://sites.google.com/site/fullycapable/capable-shared-objects
diff --git a/contrib/capso/bind.c b/contrib/capso/bind.c
new file mode 100644
index 0000000..609e4e4
--- /dev/null
+++ b/contrib/capso/bind.c
@@ -0,0 +1,29 @@
+/*
+ * Unprivileged program that binds to port 80. It does this by
+ * leveraging a file capable shared library.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "capso.h"
+
+int main(int argc, char **argv) {
+    int f = bind80("127.0.0.1");
+    if (f < 0) {
+	perror("unable to bind to port 80");
+	exit(1);
+    }
+    if (listen(f, 10) == -1) {
+	perror("unable to listen to port 80");
+	exit(1);
+    }
+    printf("Webserver code to use filedes = %d goes here.\n"
+	   "(Sleeping for 60s... Try 'netstat -tlnp|grep :80')\n", f);
+    fflush(stdout);
+    sleep(60);
+    close(f);
+    printf("Done.\n");
+}
diff --git a/contrib/capso/capso.c b/contrib/capso/capso.c
new file mode 100644
index 0000000..7ca3427
--- /dev/null
+++ b/contrib/capso/capso.c
@@ -0,0 +1,368 @@
+/*
+ * Worked example for a shared object with a file capability on it
+ * leveraging itself for preprogrammed functionality.
+ *
+ * This example implements a shared library that can bind to
+ * the privileged port. ":80".
+ *
+ * The shared library needs to be installed with
+ * cap_net_bind_service=p. As a shared library, it provides the
+ * function bind80().
+ */
+
+#define _GNU_SOURCE
+
+#include <dlfcn.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "capso.h"
+
+extern char **environ;
+
+/*
+ * fake_exploit is some dedicated code to simulate a shell escape type
+ * exploit. This is obviously not something serious to include in code
+ * that has actually been audited for security, but we use it to
+ * demonstrate an aspect of file capabilities vs. setuid root for
+ * granting privilege.
+ */
+static void fake_exploit(void) {
+#ifdef ALLOW_EXPLOIT
+    const char *exploit = getenv("TRIGGER_EXPLOIT");
+    if (exploit == NULL) {
+	return;
+    }
+
+    switch (*exploit) {
+    case '^':
+    case '%':
+	exploit++;
+	cap_value_t caps = CAP_NET_BIND_SERVICE;
+	cap_t c = cap_get_proc();
+	cap_set_flag(c, CAP_INHERITABLE, 1, &caps, CAP_SET);
+	if (cap_set_proc(c)) {
+	    perror("Failed to raise inheritable capability");
+	    exit(1);
+	}
+	if (*(exploit-1) == '%') {
+	    break;
+	}
+	cap_free(c);
+	if (cap_set_ambient(caps, CAP_SET) != 0) {
+	    perror("Unable to raise ambient capability");
+	    exit(1);
+	}
+	break;
+    }
+
+    char *ts = strdup(exploit);
+    if (ts == NULL) {
+	perror("Failed to duplicate exploit string");
+	exit(1);
+    }
+
+    int i, j, n = 1;
+    for (i = 0; ts[i]; i++) {
+	switch (ts[i]) {
+	case ' ':
+	case '\t':
+	    n++;
+	    ts[i] = '\0';
+	}
+    }
+    char **argv = calloc(n, sizeof(char *));
+    for (i = 0, j = 0; j < n; j++) {
+	char *s = ts+i;
+	argv[j] = s;
+	i += 1 + strlen(s);
+	printf("execv argv[%d] = \"%s\"\n", j, s);
+    }
+
+    execv(argv[0], argv);
+    perror("Execv failed");
+    exit(1);
+#endif /* def ALLOW_EXPLOIT */
+}
+
+/*
+ * where_am_i determines the full path for the shared libary that
+ * contains this function. It allocates the path in strdup()d memory
+ * that should be free()d by the caller. If it can't find itself, it
+ * returns NULL.
+ */
+static char *where_am_i(void)
+{
+    Dl_info info;
+    if (dladdr(where_am_i, &info) == 0) {
+	return NULL;
+    }
+    return strdup(info.dli_fname);
+}
+
+/*
+ * try_bind80 attempts to reuseably bind to port 80 with the given
+ * hostname. It returns a bound filedescriptor or -1 on error.
+ */
+static int try_bind80(const char *hostname)
+{
+    struct addrinfo *conf, *detail = NULL;
+    int err, ret = -1, one = 1;
+
+    conf = calloc(1, sizeof(*conf));
+    if (conf == NULL) {
+      return -1;
+    }
+
+    conf->ai_family = PF_UNSPEC;
+    conf->ai_socktype = SOCK_STREAM;
+    conf->ai_protocol = 0;
+    conf->ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
+
+    err = getaddrinfo(hostname, "80", conf, &detail);
+    if (err != 0) {
+	goto done;
+    }
+
+    ret = socket(detail->ai_family, detail->ai_socktype, detail->ai_protocol);
+    if (ret == -1) {
+	goto done;
+    }
+
+    if (setsockopt(ret, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+	close(ret);
+	ret = -1;
+	goto done;
+    }
+
+    if (bind(ret, detail->ai_addr, detail->ai_addrlen)) {
+	close(ret);
+	ret = -1;
+	goto done;
+    }
+
+ done:
+    if (detail != NULL) {
+	freeaddrinfo(detail);
+    }
+    free(conf);
+
+    return ret;
+}
+
+/*
+ * set_fd3 forces file descriptor 3 to be associated with a unix
+ * socket that can be used to send a file descriptor back to the
+ * parent program.
+ */
+static int set_fd3(void *detail)
+{
+    int *sp = detail;
+
+    close(sp[0]);
+    if (dup2(sp[1], 3) != 3) {
+	return -1;
+    }
+    close(sp[1]);
+
+    return 0;
+}
+
+/*
+ * bind80 returns a socket filedescriptor that is bound to port 80 of
+ * the provided service address.
+ *
+ * Example:
+ *
+ *   int fd = bind80("localhost");
+ *
+ * fd < 0 in the case of error.
+ */
+int bind80(const char *hostname)
+{
+    cap_launch_t helper;
+    pid_t child;
+    char const *args[3];
+    char *path;
+    int fd, ignored;
+    int sp[2];
+    char junk[1];
+    const int rec_buf_len = CMSG_SPACE(sizeof(int));
+    char *rec_buf[CMSG_SPACE(sizeof(int))];
+    struct iovec *iov;
+    struct msghdr *msg;
+
+    fd = try_bind80(hostname);
+    if (fd >= 0) {
+	return fd;
+    }
+
+#ifdef CAPSO_DEBUG
+    printf("application bind80(%s) attempt failed\n", hostname);
+    sleep(30);
+#endif
+
+    iov = calloc(1, sizeof(struct iovec));
+    if (iov == NULL) {
+      return -1;
+    }
+    msg = calloc(1, sizeof(struct msghdr));
+    if (msg == NULL) {
+      free(iov);
+      return -1;
+    }
+
+    /*
+     * Initial attempt didn't work, so try launching the shared
+     * library as an executable and getting it to yield a bound
+     * filedescriptor for us via a unix socket pair.
+     */
+    path = where_am_i();
+    if (path == NULL) {
+	perror("Unable to find self");
+	goto drop_alloc;
+    }
+
+    args[0] = "bind80-helper";
+    args[1] = hostname;
+    args[2] = NULL;
+
+    helper = cap_new_launcher(path, args, (void *) environ);
+    if (helper == NULL) {
+	goto drop_path;
+    }
+
+    if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sp)) {
+	goto drop_helper;
+    }
+
+    cap_launcher_callback(helper, set_fd3);
+    child = cap_launch(helper, sp);
+    close(sp[1]);
+
+    if (child <= 0) {
+	goto drop_sp;
+    }
+
+    iov[0].iov_base = junk;
+    iov[0].iov_len = 1;
+
+    msg->msg_name = NULL;
+    msg->msg_namelen = 0;
+    msg->msg_iov = iov;
+    msg->msg_iovlen = 1;
+    msg->msg_control = rec_buf;
+    msg->msg_controllen = rec_buf_len;
+
+    if (recvmsg(sp[0], msg, 0) != -1) {
+	fd = * (int *) CMSG_DATA(CMSG_FIRSTHDR(msg));
+    }
+    waitpid(child, &ignored, 0);
+
+ drop_sp:
+    close(sp[0]);
+
+ drop_helper:
+    cap_free(helper);
+
+ drop_path:
+    free(path);
+
+ drop_alloc:
+    free(msg);
+    free(iov);
+
+    return fd;
+}
+
+#include "../../libcap/execable.h"
+//#define SO_MAIN int main
+
+SO_MAIN(int argc, char **argv)
+{
+    const char *cmd = "<capso.so>";
+    const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
+    cap_t working;
+    int fd;
+    struct msghdr msg;
+    struct cmsghdr *ctrl;
+    struct iovec payload;
+    char data[CMSG_SPACE(sizeof(fd))];
+    char junk[1];
+
+#ifdef CAPSO_DEBUG
+    printf("invoking %s standalone\n", argv[0]);
+    sleep(30);
+#endif
+
+    if (argv != NULL) {
+	cmd = argv[0];
+    }
+
+    if (argc != 2 || argv[1] == NULL || !strcmp(argv[1], "--help")) {
+	fprintf(stderr, "usage: %s <hostname>\n", cmd);
+	exit(1);
+    }
+
+    working = cap_get_proc();
+    if (working == NULL) {
+	perror("Unable to read capabilities");
+	exit(1);
+    }
+
+    if (cap_set_flag(working, CAP_EFFECTIVE, 1,
+		     &cap_net_bind_service, CAP_SET) != 0) {
+	perror("Unable to raise CAP_NET_BIND_SERVICE");
+	exit(1);
+    }
+
+    if (cap_set_proc(working) != 0) {
+	perror("Problem with cap_set_proc");
+	fprintf(stderr, "Try: sudo setcap cap_net_bind_service=p %s\n",
+		argv[0]);
+	exit(1);
+    }
+
+    fd = try_bind80(argv[1]);
+
+    memset(data, 0, sizeof(data));
+    memset(&payload, 0, sizeof(payload));
+
+    payload.iov_base = junk;
+    payload.iov_len = 1;
+
+    msg.msg_name = NULL;
+    msg.msg_namelen = 0;
+    msg.msg_iov = &payload;
+    msg.msg_iovlen = 1;
+    msg.msg_control = data;
+    msg.msg_controllen = sizeof(data);
+
+    ctrl = CMSG_FIRSTHDR(&msg);
+    ctrl->cmsg_level = SOL_SOCKET;
+    ctrl->cmsg_type = SCM_RIGHTS;
+    ctrl->cmsg_len = CMSG_LEN(sizeof(fd));
+
+    *((int *) CMSG_DATA(ctrl)) = fd;
+
+    if (sendmsg(3, &msg, 0) < 0) {
+	perror("Failed to write fd");
+    }
+
+    fake_exploit();
+
+#ifdef CAPSO_DEBUG
+    printf("exiting standalone %s\n", argv[0]);
+    sleep(30);
+#endif
+
+    exit(0);
+}
diff --git a/contrib/capso/capso.h b/contrib/capso/capso.h
new file mode 100644
index 0000000..ae18f3a
--- /dev/null
+++ b/contrib/capso/capso.h
@@ -0,0 +1,16 @@
+#ifndef CAPSO_H
+#define CAPSO_H
+
+/*
+ * bind80 returns a socket filedescriptor that is bound to port 80 of
+ * the provided service address.
+ *
+ * Example:
+ *
+ *   int fd = bind80("localhost");
+ *
+ * fd < 0 in the case of error.
+ */
+extern int bind80(const char *hostname);
+
+#endif /* ndef CAPSO_H */
diff --git a/contrib/pcaps4convenience b/contrib/pcaps4convenience
index c46735d..b78a25b 100644
--- a/contrib/pcaps4convenience
+++ b/contrib/pcaps4convenience
@@ -63,22 +63,22 @@ p4c_test(){
     # are we sane?
     WICH=`which which 2>/dev/null`
     if [ $WICH == "" ]; then
-        # thats bad
+        # that's bad
         echo "Sorry, I haven't found which"
         exit
     fi
 
-    # we needt his apps
+    # we need this app
     SETCAP=`which setcap 2>/dev/null`
     if [ "$SETCAP" == "" ]; then
-        echo "Sorry, I'm missing setcap !"
+        echo "Sorry, I'm missing setcap!"
         exit
     fi
 
-    # checking setcap for SET_SETFCAP PCap ?
+    # checking setcap for SET_SETFCAP PCap?
     # for now we stick to root
     if [ "$( id -u )" != "0" ]; then
-        echo "Sorry, you must be root !"
+        echo "Sorry, you must be root!"
         exit 1
     fi
 }
@@ -113,7 +113,7 @@ p4c_app_convert(){
 
 
 p4c_app_revert(){
-    # revert a singel app
+    # revert a single app
     # $1 is app name
     APP=`which -a $1 2>/dev/null`
     if [ "$APP" != "" ]; then
@@ -136,7 +136,7 @@ p4c_app_revert(){
 
 
 p4c_convert(){
-    # we go throug the APPSARRAY and call s2p_app_convert to do the job
+    # we go through the APPSARRAY and call s2p_app_convert to do the job
     COUNTER=0
     let UPPER=${#APPSARRAY[*]}-1
     until [ $COUNTER == $UPPER ]; do
@@ -170,9 +170,9 @@ p4c_usage(){
     echo "through the PAM module pam_cap.so."
     echo "A user who has not the needed PCaps in his Inheritance Set CAN NOT execute"
     echo "these binaries successful."
-    echo "(well, still per sudo or su -c - but thats not the point here)"
+    echo "(well, still per sudo or su -c - but that's not the point here)"
     echo
-    echo "You need and I will check fot the utilities which and setcap."
+    echo "You need and I will check for the utilities which and setcap."
     echo
     echo "Your Filesystem has to support extended attributes and your kernel must have"
     echo "support for POSIX File Capabilities (CONFIG_SECURITY_FILE_CAPABILITIES)."
diff --git a/contrib/pcaps4server b/contrib/pcaps4server
index af6f9ca..f72a4d3 100644
--- a/contrib/pcaps4server
+++ b/contrib/pcaps4server
@@ -8,7 +8,7 @@
 # changelog:
 # 1 - initial release pcaps4convenience
 # 1 - 2007.02.15 - initial release
-# 2 - 2007.11.02 - changed to new setfcaps api; each app is now callable; supressed error of id
+# 2 - 2007.11.02 - changed to new setfcaps api; each app is now callable; suppressed error of id
 # 3 - 2007.12.28 - changed to libcap2 package setcap/getcap
 # 4 - renamed to pcaps4server
 #      removed suid0 and convenience files,
diff --git a/contrib/pcaps4suid0 b/contrib/pcaps4suid0
index 799df28..2cbdcee 100644
--- a/contrib/pcaps4suid0
+++ b/contrib/pcaps4suid0
@@ -77,23 +77,23 @@ p4s_test(){
     # are we sane?
     WICH=`which which 2>/dev/null`
     if [ $WICH == "" ]; then
-        # thats bad
+        # that's bad
         echo "Sorry, I haven't found which"
         exit
     fi
 
-    # we needt his apps
+    # we need these apps
     CHMOD=`which chmod 2>/dev/null`
     SETCAP=`which setcap 2>/dev/null`
     if [ "$CHMOD" == "" -o "$SETCAP" == "" ]; then
-        echo "Sorry, I'm missing chmod or setcap !"
+        echo "Sorry, I'm missing chmod or setcap!"
         exit
     fi
 
-    # checking setcap for SET_SETFCAP PCap ?
+    # checking setcap for SET_SETFCAP PCap?
     # for now we stick to root
     if [ "$( id -u )" != "0" ]; then
-        echo "Sorry, you must be root !"
+        echo "Sorry, you must be root!"
         exit 1
     fi
 }
@@ -129,7 +129,7 @@ p4s_app_convert(){
 
 
 p4s_app_revert(){
-    # revert a singel app
+    # revert a single app
     # $1 is app name
     APP=`which -a $1 2>/dev/null`
     if [ "$APP" != "" ]; then
@@ -153,7 +153,7 @@ p4s_app_revert(){
 
 
 p4s_convert(){
-    # we go throug the APPSARRAY and call s2p_app_convert to do the job
+    # we go through the APPSARRAY and call s2p_app_convert to do the job
     COUNTER=0
     let UPPER=${#APPSARRAY[*]}-1
     until [ $COUNTER == $UPPER ]; do
@@ -190,7 +190,7 @@ p4s_usage(){
     echo "If you are using pam_cap.so, you might want to change the set into the"
     echo "Inherited and Effective set (check for the SET var)."
     echo
-    echo "You need and I will check fot the utilities which, chmod and setcap."
+    echo "You need and I will check for the utilities which, chmod and setcap."
     echo
     echo "Your Filesystem has to support extended attributes and your kernel must have"
     echo "support for POSIX File Capabilities (CONFIG_SECURITY_FILE_CAPABILITIES)."
diff --git a/contrib/seccomp/explore.go b/contrib/seccomp/explore.go
new file mode 100644
index 0000000..8203d4f
--- /dev/null
+++ b/contrib/seccomp/explore.go
@@ -0,0 +1,277 @@
+// Program explore is evolved from the code discussed in more depth
+// here:
+//
+//   https://github.com/golang/go/issues/3405
+//
+// The code here demonstrates that while PR_SET_NO_NEW_PRIVS only
+// applies to the calling thread, since
+// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03
+// the seccomp filter application forces the setting to be mirrored on
+// all the threads of a process.
+//
+// Based on the command line options, we can manipulate the program to
+// behave in various ways. Example command lines:
+//
+//   sudo ./explore
+//   sudo ./explore --kill=false
+//   sudo ./explore --kill=false --errno=0
+//
+// Supported Go toolchains are after go1.10. Those prior to go1.15
+// require this environment variable to be set to build successfully:
+//
+//   export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+//
+// Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too,
+// demonstrating native nocgo support for seccomp features.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"runtime"
+	"syscall"
+	"time"
+	"unsafe"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+var (
+	withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall")
+	delays  = flag.Bool("delays", false, "use this to pause the program at various places")
+	kill    = flag.Bool("kill", true, "kill the process if setuid attempted")
+	errno   = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno")
+)
+
+const (
+	prSetNoNewPrivs = 38
+
+	sysSeccomp             = 317        // x86_64 syscall number
+	seccompSetModeFilter   = 1          // uses user-supplied filter.
+	seccompFilterFlagTsync = (1 << 0)   // mirror filtering on all threads.
+	seccompRetErrno        = 0x00050000 // returns an errno
+	seccompRetData         = 0x0000ffff // mask for RET data payload (ex. errno)
+	seccompRetKillProcess  = 0x80000000 // kill the whole process immediately
+	seccompRetTrap         = 0x00030000 // disallow and force a SIGSYS
+	seccompRetAllow        = 0x7fff0000
+
+	bpfLd  = 0x00
+	bpfJmp = 0x05
+	bpfRet = 0x06
+
+	bpfW = 0x00
+
+	bpfAbs = 0x20
+	bpfJeq = 0x10
+
+	bpfK = 0x00
+
+	auditArchX86_64 = 3221225534 // HACK: I don't understand this value
+	archNr          = auditArchX86_64
+
+	syscallNr = 0
+)
+
+// SockFilter is a single filter block.
+type SockFilter struct {
+	// Code is the filter code instruction.
+	Code uint16
+	// Jt is the target for a true result from the code execution.
+	Jt uint8
+	// Jf is the target for a false result from the code execution.
+	Jf uint8
+	// K is a generic multiuse field
+	K uint32
+}
+
+// SockFProg is a
+type SockFProg struct {
+	// Len is the number of contiguous SockFilter blocks that can
+	// be found at *Filter.
+	Len uint16
+	// Filter is the address of the first SockFilter block of a
+	// program sequence.
+	Filter *SockFilter
+}
+
+// SockFilterSlice is a subprogram filter.
+type SockFilterSlice []SockFilter
+
+func bpfStmt(code uint16, k uint32) SockFilter {
+	return SockFilter{code, 0, 0, k}
+}
+
+func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter {
+	return SockFilter{code, jt, jf, k}
+}
+
+func validateArchitecture() []SockFilter {
+	return []SockFilter{
+		bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4.
+		bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0),
+		bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
+	}
+}
+
+func examineSyscall() []SockFilter {
+	return []SockFilter{
+		bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr),
+	}
+}
+
+func allowSyscall(syscallNum uint32) []SockFilter {
+	return []SockFilter{
+		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+		bpfStmt(bpfRet+bpfK, seccompRetAllow),
+	}
+}
+
+func disallowSyscall(syscallNum, errno uint32) []SockFilter {
+	return []SockFilter{
+		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+		bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)),
+	}
+}
+
+func killProcess() []SockFilter {
+	return []SockFilter{
+		bpfStmt(bpfRet+bpfK, seccompRetKillProcess),
+	}
+}
+
+func notifyProcessAndDie() []SockFilter {
+	return []SockFilter{
+		bpfStmt(bpfRet+bpfK, seccompRetTrap),
+	}
+}
+
+func trapOnSyscall(syscallNum uint32) []SockFilter {
+	return []SockFilter{
+		bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1),
+		bpfStmt(bpfRet+bpfK, seccompRetTrap),
+	}
+}
+
+func allGood() []SockFilter {
+	return []SockFilter{
+		bpfStmt(bpfRet+bpfK, seccompRetAllow),
+	}
+}
+
+// prctl executes the prctl - unless the --psx commandline argument is
+// used, this is on a single thread.
+//go:uintptrescapes
+func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error {
+	var e syscall.Errno
+	if *withPSX {
+		_, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
+	} else {
+		_, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5)
+	}
+	if e != 0 {
+		return e
+	}
+	if *delays {
+		fmt.Println("prctl'd - check now")
+		time.Sleep(1 * time.Minute)
+	}
+	return nil
+}
+
+// SeccompSetModeFilter is our wrapper for performing our seccomp system call.
+//go:uintptrescapes
+func SeccompSetModeFilter(prog *SockFProg) error {
+	if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 {
+		return e
+	}
+	return nil
+}
+
+var empty func()
+
+func lockProcessThread(pick bool) {
+	// Make sure we are
+	pid := uintptr(syscall.Getpid())
+	runtime.LockOSThread()
+	for {
+		tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0)
+		if (tid == pid) == pick {
+			fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick)
+			break
+		}
+		runtime.UnlockOSThread()
+		go func() {
+			time.Sleep(1 * time.Microsecond)
+		}()
+		runtime.Gosched()
+		runtime.LockOSThread()
+	}
+}
+
+// applyPolicy uploads the program sequence.
+func applyPolicy(prog *SockFProg) {
+	// Without PSX we can't guarantee the thread we execute the
+	// seccomp call on will be the same one that we disabled new
+	// privs on. With PSX, the disabling of new privs is mirrored
+	// on all threads.
+	if !*withPSX {
+		lockProcessThread(false)
+		defer runtime.UnlockOSThread()
+	}
+
+	// This is required to load a filter without privilege.
+	if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil {
+		log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err)
+	}
+
+	fmt.Println("Applying syscall policy...")
+	if err := SeccompSetModeFilter(prog); err != nil {
+		log.Fatalf("seccomp_set_mode_filter: %v", err)
+	}
+	fmt.Println("...Policy applied")
+}
+
+func main() {
+	flag.Parse()
+
+	if *delays {
+		fmt.Println("check first", syscall.Getpid())
+		time.Sleep(60 * time.Second)
+	}
+
+	var filter []SockFilter
+	filter = append(filter, validateArchitecture()...)
+
+	// Grab the system call number.
+	filter = append(filter, examineSyscall()...)
+
+	// List disallowed syscalls.
+	for _, x := range []uint32{
+		syscall.SYS_SETUID,
+	} {
+		if *kill {
+			filter = append(filter, trapOnSyscall(x)...)
+		} else {
+			filter = append(filter, disallowSyscall(x, uint32(*errno))...)
+		}
+	}
+
+	filter = append(filter, allGood()...)
+
+	prog := &SockFProg{
+		Len:    uint16(len(filter)),
+		Filter: &filter[0],
+	}
+
+	applyPolicy(prog)
+
+	// Ensure we are running on the TID=PID.
+	lockProcessThread(true)
+
+	log.Print("Now it is time to try to run something privileged...")
+	if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 {
+		log.Fatalf("setuid failed with an error: %v", e)
+	}
+	log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1")
+}
diff --git a/contrib/seccomp/go.mod b/contrib/seccomp/go.mod
new file mode 100644
index 0000000..ecf18d9
--- /dev/null
+++ b/contrib/seccomp/go.mod
@@ -0,0 +1,5 @@
+module explore
+
+go 1.14
+
+require kernel.org/pub/linux/libs/security/libcap/psx v1.2.69
diff --git a/contrib/sucap/Makefile b/contrib/sucap/Makefile
new file mode 100644
index 0000000..df61ed5
--- /dev/null
+++ b/contrib/sucap/Makefile
@@ -0,0 +1,18 @@
+topdir=$(shell pwd)/../..
+include ../../Make.Rules
+
+# This line is here to link against the in-tree copy of libcap.so
+LINKEXTRA=-Wl,-rpath,$(topdir)/libcap
+DEPS=../../libcap/libcap.so
+
+all: su
+
+su: su.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DPAM_APP_NAME=\"sucap\" $< -o $@ $(LINKEXTRA) -lpam -lpam_misc $(LIBCAPLIB)
+	# to permit all ambient capabilities, this needs all permitted.
+	#   sudo setcap =p ./su
+	# to permit all inheritable, as CAP_PURE1E needs, we don't need as much
+	sudo setcap cap_chown,cap_setgid,cap_setuid,cap_dac_read_search,cap_setpcap=p ./su
+
+clean:
+	rm -f su su.o *~
diff --git a/contrib/sucap/README.md b/contrib/sucap/README.md
new file mode 100644
index 0000000..5cc0dcc
--- /dev/null
+++ b/contrib/sucap/README.md
@@ -0,0 +1,40 @@
+# A fully capable version of `su`
+
+This directory contains a port of the `SimplePAMApp` `su` one that can
+work in a `PURE1E` `libcap`-_mode_ environment.
+
+The point of developing this is to better test the full `libcap`
+implementation, and to also provide a non-setuid-root worked example
+for testing PAM interaction with `libcap` and `pam_cap.so`. The
+required expectations for `pam_unix.so` are that it include this
+commit:
+
+https://github.com/linux-pam/linux-pam/pull/373/commits/bf9b1d8ad909634000a7356af2d865a79d3f86f3
+
+The original sources for this version of `su` were found here:
+
+https://kernel.org/pub/linux/libs/pam/pre/applications/SimplePAMApps-0.60.tar.gz
+
+The `SimplePAMApps` contain the same License as `libcap` (they were
+originally started by the same authors!). The credited Authors in the
+above tarball were:
+
+-  Andrew [G.] Morgan
+-  Andrey V. Savochkin
+-  Alexei V. Galatenko
+
+The code in this present directory is freely adapted from the above
+tar ball and is thus a derived work from that.
+
+**NOTE** As of the time of writing, this adaptation is likely rife
+  with bugs.
+
+Finally, Andrew would like to apologize to Andrey for removing all of
+the config support he worked to add all those decades ago..! I just
+wanted to make a quick tester for a potential workaround for this
+`pam_cap.so` issue:
+
+-  https://bugzilla.kernel.org/show_bug.cgi?id=212945
+
+Andrew G. Morgan <morgan@kernel.org>
+2021-06-30
diff --git a/contrib/sucap/su.c b/contrib/sucap/su.c
new file mode 100644
index 0000000..e3dfe70
--- /dev/null
+++ b/contrib/sucap/su.c
@@ -0,0 +1,1638 @@
+/*
+ * Originally based on an implementation of `su' by
+ *
+ *     Peter Orbaek  <poe@daimi.aau.dk>
+ *
+ * obtained circa 1997 from ftp://ftp.daimi.aau.dk/pub/linux/poe/
+ *
+ * Rewritten for Linux-PAM by Andrew G. Morgan <morgan@linux.kernel.org>
+ * Modified by Andrey V. Savochkin <saw@msu.ru>
+ * Modified for use with libcap by Andrew G. Morgan <morgan@kernel.org>
+ */
+
+/* #define PAM_DEBUG */
+
+#include <sys/prctl.h>
+
+/* non-root user of convenience to block signals */
+#define TEMP_UID                  1
+
+#ifndef PAM_APP_NAME
+#define PAM_APP_NAME              "su"
+#endif /* ndef PAM_APP_NAME */
+
+#define DEFAULT_HOME              "/"
+#define DEFAULT_SHELL             "/bin/bash"
+#define SLEEP_TO_KILL_CHILDREN    3  /* seconds to wait after SIGTERM before
+					SIGKILL */
+#define SU_FAIL_DELAY     2000000    /* usec on authentication failure */
+
+#define RHOST_UNKNOWN_NAME        ""     /* perhaps "[from.where?]" */
+#define DEVICE_FILE_PREFIX        "/dev/"
+#define WTMP_LOCK_TIMEOUT         3      /* in seconds */
+
+#ifndef UT_IDSIZE
+#define UT_IDSIZE 4            /* XXX - this is sizeof(struct utmp.ut_id) */
+#endif
+
+#include <stdlib.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <grp.h>
+#include <string.h>
+#include <syslog.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <termios.h>
+#include <sys/wait.h>
+#include <utmp.h>
+#include <ctype.h>
+#include <stdarg.h>
+#include <netdb.h>
+#include <unistd.h>
+
+#include <security/pam_appl.h>
+#include <security/pam_misc.h>
+#include <sys/capability.h>
+
+#include <security/_pam_macros.h>
+
+/* -------------------------------------------- */
+/* ------ declarations ------------------------ */
+/* -------------------------------------------- */
+
+extern char **environ;
+static pam_handle_t *pamh = NULL;
+
+static int wait_for_child_caught=0;
+static int need_job_control=0;
+static int is_terminal = 0;
+static struct termios stored_mode;        /* initial terminal mode settings */
+static uid_t terminal_uid = (uid_t) -1;
+static uid_t invoked_uid = (uid_t) -1;
+
+/* -------------------------------------------- */
+/* ------ some local (static) functions ------- */
+/* -------------------------------------------- */
+
+/*
+ * We will attempt to transcribe the following env variables
+ * independent of whether we keep the whole environment. Others will
+ * be set elsewhere: either in modules; or after the identity of the
+ * user is known.
+ */
+
+static const char *posix_env[] = {
+    "LANG",
+    "LC_COLLATE",
+    "LC_CTYPE",
+    "LC_MONETARY",
+    "LC_NUMERIC",
+    "TZ",
+    NULL
+};
+
+/*
+ * make_environment transcribes a selection of environment variables
+ * from the invoking user.
+ */
+static int make_environment(int keep_env)
+{
+    const char *tmpe;
+    int i;
+    int retval;
+
+    if (keep_env) {
+	/* preserve the original environment */
+	return pam_misc_paste_env(pamh, (const char * const *)environ);
+    }
+
+    /* we always transcribe some variables anyway */
+    tmpe = getenv("TERM");
+    if (tmpe == NULL) {
+	tmpe = "dumb";
+    }
+    retval = pam_misc_setenv(pamh, "TERM", tmpe, 0);
+    if (retval == PAM_SUCCESS) {
+	retval = pam_misc_setenv(pamh, "PATH", "/bin:/usr/bin", 0);
+    }
+    if (retval != PAM_SUCCESS) {
+	tmpe = NULL;
+	D(("error setting environment variables"));
+	return retval;
+    }
+
+    /* also propagate the POSIX specific ones */
+    for (i=0; retval == PAM_SUCCESS && posix_env[i]; ++i) {
+	tmpe = getenv(posix_env[i]);
+	if (tmpe != NULL) {
+	    retval = pam_misc_setenv(pamh, posix_env[i], tmpe, 0);
+	}
+    }
+    tmpe = NULL;
+
+    return retval;
+}
+
+/*
+ * checkfds ensures that stdout and stderr filedescriptors are
+ * defined. If all else fails, it directs them to /dev/null.
+ */
+static void checkfds(void)
+{
+    struct stat st;
+    int fd;
+
+    if (fstat(1, &st) == -1) {
+        fd = open("/dev/null", O_WRONLY);
+        if (fd == -1) goto badfds;
+        if (fd != 1) {
+            if (dup2(fd, 1) == -1) goto badfds;
+            if (close(fd) == -1) goto badfds;
+        }
+    }
+    if (fstat(2, &st) == -1) {
+        fd = open("/dev/null", O_WRONLY);
+        if (fd == -1) goto badfds;
+        if (fd != 2) {
+            if (dup2(fd, 2) == -1) goto badfds;
+            if (close(fd) == -1) goto badfds;
+        }
+    }
+
+    return;
+
+badfds:
+    perror("bad filedes");
+    exit(1);
+}
+
+/*
+ * store_terminal_modes captures the current state of the input
+ * terminal. Calling this at the start of the program, we ensure we
+ * can restore these default settings when su exits.
+ */
+static void store_terminal_modes(void)
+{
+    if (isatty(STDIN_FILENO)) {
+	is_terminal = 1;
+	if (tcgetattr(STDIN_FILENO, &stored_mode) != 0) {
+	    fprintf(stderr, PAM_APP_NAME ": couldn't copy terminal mode");
+	    exit(1);
+	}
+	return;
+    }
+    fprintf(stderr, PAM_APP_NAME ": must be run from a terminal\n");
+    exit(1);
+}
+
+/*
+ * restore_terminal_modes resets the terminal to the state it was in
+ * when the program started.
+ *
+ * Returns:
+ *   0     ok
+ *   1     error
+ */
+static int restore_terminal_modes(void)
+{
+    if (is_terminal && tcsetattr(STDIN_FILENO, TCSAFLUSH, &stored_mode) != 0) {
+	fprintf(stderr, PAM_APP_NAME ": cannot restore terminal mode: %s\n",
+		strerror(errno));
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+/* ------ unexpected signals ------------------ */
+
+struct sigaction old_int_act, old_quit_act, old_tstp_act, old_pipe_act;
+
+/*
+ * disable_terminal_signals attempts to make the process resistant to
+ * being stopped - it helps ensure that the PAM stack can complete
+ * session and auth failure logging etc.
+ */
+static void disable_terminal_signals(void)
+{
+    /*
+     * Protect the process from dangerous terminal signals.
+     * The protection is implemented via sigaction() because
+     * the signals are sent regardless of the process' uid.
+     */
+    struct sigaction act;
+
+    act.sa_handler = SIG_IGN;  /* ignore the signal */
+    sigemptyset(&act.sa_mask); /* no signal blocking on handler
+				  call needed */
+    act.sa_flags = SA_RESTART; /* do not reset after first signal
+				  arriving, restart interrupted
+				  system calls if possible */
+    sigaction(SIGINT, &act, &old_int_act);
+    sigaction(SIGQUIT, &act, &old_quit_act);
+    /*
+     * Ignore SIGTSTP signals. Why? attacker could otherwise stop
+     * a process and a. kill it, or b. wait for the system to
+     * shutdown - either way, nothing appears in syslogs.
+     */
+    sigaction(SIGTSTP, &act, &old_tstp_act);
+    /*
+     * Ignore SIGPIPE. The parent `su' process may print something
+     * on stderr. Killing of the process would be undesired.
+     */
+    sigaction(SIGPIPE, &act, &old_pipe_act);
+}
+
+static void enable_terminal_signals(void)
+{
+    sigaction(SIGINT, &old_int_act, NULL);
+    sigaction(SIGQUIT, &old_quit_act, NULL);
+    sigaction(SIGTSTP, &old_tstp_act, NULL);
+    sigaction(SIGPIPE, &old_pipe_act, NULL);
+}
+
+/* ------ terminal ownership ------------------ */
+
+/*
+ * change_terminal_owner changes the ownership of STDIN if needed.
+ * Returns:
+ *   0     ok,
+ *  -1     fatal error (continuing is impossible),
+ *   1     non-fatal error.
+ * In the case of an error "err_descr" is set to the error message
+ * and "callname" to the name of the failed call.
+ */
+static int change_terminal_owner(uid_t uid, int is_login,
+				 const char **callname, const char **err_descr)
+{
+    /* determine who owns the terminal line */
+    if (is_terminal && is_login) {
+	struct stat stat_buf;
+	cap_t current, working;
+	int status;
+	cap_value_t cchown = CAP_CHOWN;
+
+	if (fstat(STDIN_FILENO, &stat_buf) != 0) {
+            *callname = "fstat to STDIN";
+	    *err_descr = strerror(errno);
+	    return -1;
+	}
+
+	current = cap_get_proc();
+	working = cap_dup(current);
+	cap_set_flag(working, CAP_EFFECTIVE, 1, &cchown, CAP_SET);
+	status = cap_set_proc(working);
+	cap_free(working);
+
+	if (status != 0) {
+	    *callname = "capset CHOWN";
+	} else if ((status = fchown(STDIN_FILENO, uid, -1)) != 0) {
+	    *callname = "fchown of STDIN";
+	} else {
+	    cap_set_proc(current);
+	}
+	cap_free(current);
+
+	if (status != 0) {
+	    *err_descr = strerror(errno);
+	    return 1;
+	}
+
+	terminal_uid = stat_buf.st_uid;
+    }
+    return 0;
+}
+
+/*
+ * restore_terminal_owner changes the terminal owner back to the value
+ * it had when su was started.
+ */
+static void restore_terminal_owner(void)
+{
+    if (terminal_uid != (uid_t) -1) {
+	cap_t current, working;
+	int status;
+	cap_value_t cchown = CAP_CHOWN;
+
+	current = cap_get_proc();
+	working = cap_dup(current);
+	cap_set_flag(working, CAP_EFFECTIVE, 1, &cchown, CAP_SET);
+	status = cap_set_proc(working);
+	cap_free(working);
+
+	if (status == 0) {
+	    status = fchown(STDIN_FILENO, terminal_uid, -1);
+	    cap_set_proc(current);
+	}
+	cap_free(current);
+
+        if (status != 0) {
+            openlog(PAM_APP_NAME, LOG_CONS|LOG_PERROR|LOG_PID, LOG_AUTHPRIV);
+	    syslog(LOG_ALERT, "Terminal owner hasn\'t been restored: %s",
+		   strerror(errno));
+	    closelog();
+        }
+        terminal_uid = (uid_t) -1;
+    }
+}
+
+/*
+ * make_process_unkillable changes the uid of the process. TEMP_UID is
+ * used for this temporary state.
+ *
+ * Returns:
+ *   0     ok,
+ *  -1     fatal error (continue of the work is impossible),
+ *   1     non-fatal error.
+ * In the case of an error "err_descr" is set to the error message
+ * and "callname" to the name of the failed call.
+ */
+static int make_process_unkillable(const char **callname,
+				   const char **err_descr)
+{
+    invoked_uid = getuid();
+    if (invoked_uid == TEMP_UID) {
+	/* no change needed */
+	return 0;
+    }
+
+    if (cap_setuid(TEMP_UID) != 0) {
+        *callname = "setuid";
+	*err_descr = strerror(errno);
+	return -1;
+    }
+    return 0;
+}
+
+/*
+ * make_process_killable restores the invoking uid to the current
+ * process.
+ */
+static void make_process_killable(void)
+{
+    (void) cap_setuid(invoked_uid);
+}
+
+/* ------ command line parser ----------------- */
+
+static void usage(int exit_val)
+{
+    fprintf(stderr,"usage: su [-] [-h] [-c \"command\"] [username]\n");
+    exit(exit_val);
+}
+
+/*
+ * parse_command_line extracts the options from the command line
+ * arguments.
+ */
+static void parse_command_line(int argc, char *argv[], int *is_login,
+			       const char **user, const char **command)
+{
+    int username_present, command_present;
+
+    *is_login = 0;
+    *user = NULL;
+    *command = NULL;
+    username_present = command_present = 0;
+
+    while ( --argc > 0 ) {
+	const char *token;
+
+	token = *++argv;
+	if (*token == '-') {
+	    switch (*++token) {
+	    case '\0':             /* su as a login shell for the user */
+		if (*is_login)
+		    usage(1);
+		*is_login = 1;
+		break;
+	    case 'c':
+		if (command_present) {
+		    usage(1);
+		} else {               /* indicate we are running commands */
+		    if (*++token != '\0') {
+			command_present = 1;
+			*command = token;
+		    } else if (--argc > 0) {
+			command_present = 1;
+			*command = *++argv;
+		    } else
+			usage(1);
+		}
+		break;
+	    case 'h':
+		usage(0);
+	    default:
+		usage(1);
+	    }
+	} else {                       /* must be username */
+	    if (username_present) {
+		usage(1);
+	    }
+	    username_present = 1;
+	    *user = *argv;
+	}
+    }
+
+    if (!username_present) {
+	fprintf(stderr, PAM_APP_NAME ": requires a username\n");
+	usage(1);
+    }
+}
+
+/*
+ * This following contains code that waits for a child process to die.
+ * It also chooses to intercept a couple of signals that it will
+ * kindly pass on a SIGTERM to the child ;^). Waiting again for the
+ * child to exit. If the child resists dying, it will SIGKILL it!
+ */
+
+static void wait_for_child_catch_sig(int ignore)
+{
+    wait_for_child_caught = 1;
+}
+
+static void prepare_for_job_control(int need_it)
+{
+    sigset_t ourset;
+
+    (void) sigfillset(&ourset);
+    if (sigprocmask(SIG_BLOCK, &ourset, NULL) != 0) {
+	fprintf(stderr,"[trouble blocking signals]\n");
+	wait_for_child_caught = 1;
+	return;
+    }
+    need_job_control = need_it;
+}
+
+static int wait_for_child(pid_t child)
+{
+    int retval, status, exit_code;
+    sigset_t ourset;
+
+    exit_code = -1; /* no exit code yet, exit codes could be from 0 to 255 */
+    if (child == -1) {
+	return exit_code;
+    }
+
+    /*
+     * set up signal handling
+     */
+
+    if (!wait_for_child_caught) {
+	struct sigaction action, defaction;
+
+	action.sa_handler = wait_for_child_catch_sig;
+	sigemptyset(&action.sa_mask);
+	action.sa_flags = 0;
+
+	defaction.sa_handler = SIG_DFL;
+	sigemptyset(&defaction.sa_mask);
+	defaction.sa_flags = 0;
+
+	sigemptyset(&ourset);
+
+	if (   sigaddset(&ourset, SIGTERM)
+	    || sigaction(SIGTERM, &action, NULL)
+	    || sigaddset(&ourset, SIGHUP)
+	    || sigaction(SIGHUP, &action, NULL)
+	    || sigaddset(&ourset, SIGALRM)          /* required by sleep(3) */
+            || (need_job_control && sigaddset(&ourset, SIGTSTP))
+            || (need_job_control && sigaction(SIGTSTP, &defaction, NULL))
+            || (need_job_control && sigaddset(&ourset, SIGTTIN))
+            || (need_job_control && sigaction(SIGTTIN, &defaction, NULL))
+            || (need_job_control && sigaddset(&ourset, SIGTTOU))
+            || (need_job_control && sigaction(SIGTTOU, &defaction, NULL))
+	    || (need_job_control && sigaddset(&ourset, SIGCONT))
+            || (need_job_control && sigaction(SIGCONT, &defaction, NULL))
+	    || sigprocmask(SIG_UNBLOCK, &ourset, NULL)
+	    ) {
+	    fprintf(stderr,"[trouble setting signal intercept]\n");
+	    wait_for_child_caught = 1;
+	}
+
+	/* application should be ready for receiving a SIGTERM/HUP now */
+    }
+
+    /*
+     * This code waits for the process to actually die. If it stops,
+     * then the parent attempts to mimic the behavior of the
+     * child.. There is a slight bug in the code when the 'su'd user
+     * attempts to restart the child independently of the parent --
+     * the child dies.
+     */
+    while (!wait_for_child_caught) {
+        /* parent waits for child */
+	if ((retval = waitpid(child, &status, 0)) <= 0) {
+            if (errno == EINTR) {
+                continue;             /* recovering from a 'fg' */
+	    }
+            fprintf(stderr, "[error waiting child: %s]\n", strerror(errno));
+            /*
+             * Break the loop keeping exit_code undefined.
+             * Do we have a chance for a successful wait() call
+             * after kill()? (SAW)
+             */
+            wait_for_child_caught = 1;
+            break;
+        } else {
+	    /* the child is terminated via exit() or a fatal signal */
+	    if (WIFEXITED(status)) {
+		exit_code = WEXITSTATUS(status);
+	    } else {
+		exit_code = 1;
+	    }
+	    break;
+	}
+    }
+
+    if (wait_for_child_caught) {
+	fprintf(stderr,"\nKilling shell...");
+	kill(child, SIGTERM);
+    }
+
+    /*
+     * do we need to wait for the child to catch up?
+     */
+    if (wait_for_child_caught) {
+	sleep(SLEEP_TO_KILL_CHILDREN);
+	kill(child, SIGKILL);
+	fprintf(stderr, "killed\n");
+    }
+
+    /*
+     * collect the zombie the shell was killed by ourself
+     */
+    if (exit_code == -1) {
+	do {
+	    retval = waitpid(child, &status, 0);
+	} while (retval == -1 && errno == EINTR);
+	if (retval == -1) {
+	    fprintf(stderr, PAM_APP_NAME ": the final wait failed: %s\n",
+		    strerror(errno));
+	}
+	if (WIFEXITED(status)) {
+	    exit_code = WEXITSTATUS(status);
+	} else {
+	    exit_code = 1;
+	}
+    }
+
+    return exit_code;
+}
+
+
+/*
+ * Next some code that parses the spawned shell command line.
+ */
+
+static const char * const *build_shell_args(const char *pw_shell, int login,
+					    const char *command)
+{
+    int use_default = 1;  /* flag to signal we should use the default shell */
+    const char **args=NULL;             /* array of PATH+ARGS+NULL pointers */
+
+    D(("called."));
+    if (login) {
+        command = NULL;                 /* command always ignored for login */
+    }
+
+    if (pw_shell && *pw_shell != '\0') {
+        char *line;
+        const char *tmp, *tmpb=NULL;
+        int arg_no=0,i;
+
+        /* first find the number of arguments */
+        D(("non-null shell"));
+        for (tmp=pw_shell; *tmp; ++arg_no) {
+
+            /* skip leading spaces */
+            while (isspace(*tmp))
+                ++tmp;
+
+            if (tmpb == NULL)               /* mark beginning token */
+                tmpb = tmp;
+            if (*tmp == '\0')               /* end of line with no token */
+                break;
+
+            /* skip token */
+            while (*tmp && !isspace(*tmp))
+                ++tmp;
+        }
+
+        /*
+         * We disallow shells:
+         *    - without a full specified path;
+         *    - when we are not logging in and the #args != 1
+         *                                         (unlikely a simple shell)
+         */
+
+        D(("shell so far = %s, arg_no = %d", tmpb, arg_no));
+        if (tmpb != NULL && tmpb[0] == '/'    /* something (full path) */
+            && ( login || arg_no == 1 )       /* login, or single arg shells */
+            ) {
+
+            use_default = 0;                  /* we will use this shell */
+            D(("committed to using user's shell"));
+            if (command) {
+                arg_no += 2;                  /* will append "-c" "command" */
+            }
+
+            /* allocate an array of pointers long enough */
+
+            D(("building array of size %d", 2+arg_no));
+            args = (const char **) calloc(2+arg_no, sizeof(const char *));
+            if (args == NULL)
+                return NULL;
+            /* get a string long enough for all the arguments */
+
+            D(("an array of size %d chars", 2+strlen(tmpb)
+                                   + ( command ? 4:0 )));
+            line = (char *) malloc(2+strlen(tmpb)
+                                   + ( command ? 4:0 ));
+            if (line == NULL) {
+                free(args);
+                return NULL;
+            }
+
+            /* fill array - tmpb points to start of first non-space char */
+
+            line[0] = '-';
+            strcpy(line+1, tmpb);
+
+            /* append " -c" to line? */
+            if (command) {
+                strcat(line, " -c");
+            }
+
+            D(("complete command: %s [+] %s", line, command));
+
+            tmp = strtok(line, " \t");
+            D(("command path=%s", line+1));
+            args[0] = line+1;
+
+            if (login) {               /* standard procedure for login shell */
+                D(("argv[0]=%s", line));
+                args[i=1] = line;
+            } else {                 /* not a login shell -- for use with su */
+                D(("argv[0]=%s", line+1));
+                args[i=1] = line+1;
+            }
+
+            while ((tmp = strtok(NULL, " \t"))) {
+                D(("adding argument %d: %s",i,tmp));
+                args[++i] = tmp;
+            }
+            if (command) {
+                D(("appending command [%s]", command));
+                args[++i] = command;
+            }
+            D(("terminating args with NULL"));
+            args[++i] = NULL;
+            D(("list completed."));
+        }
+    }
+
+    /* should we use the default shell instead of specific one? */
+
+    if (use_default && !login) {
+        int last_arg;
+
+        D(("selecting default shell"));
+        last_arg = command ? 5:3;
+
+        args = (const char **) calloc(last_arg--, sizeof(const char *));
+        if (args == NULL) {
+            return NULL;
+        }
+        args[1] = DEFAULT_SHELL;      /* mapped to argv[0] (NOT login shell) */
+        args[0] = args[1];            /* path to program */
+        if (command) {
+            args[2] = "-c";           /* should perform command and exit */
+            args[3] = command;        /* the desired command */
+        }
+        args[last_arg] = NULL;        /* terminate list of args */
+    }
+
+    D(("returning arg list"));
+    return (const char * const *) args;
+}
+
+
+/* ------ abnormal termination ---------------- */
+
+static void exit_now(int exit_code, const char *format, ...)
+{
+    va_list args;
+
+    va_start(args, format);
+    vfprintf(stderr, format, args);
+    va_end(args);
+
+    if (pamh != NULL)
+	pam_end(pamh, exit_code ? PAM_ABORT:PAM_SUCCESS);
+
+    /* USER's shell may have completely broken terminal settings
+       restore the sane(?) initial conditions */
+    restore_terminal_modes();
+
+    exit(exit_code);
+}
+
+/* ------ PAM setup --------------------------- */
+
+static struct pam_conv conv = {
+    misc_conv,                   /* defined in <pam_misc/libmisc.h> */
+    NULL
+};
+
+static void do_pam_init(const char *user, int is_login)
+{
+    int retval;
+
+    retval = pam_start(PAM_APP_NAME, user, &conv, &pamh);
+    if (retval != PAM_SUCCESS) {
+	/*
+	 * From my point of view failing of pam_start() means that
+	 * pamh isn't a valid handler. Without a handler
+	 * we couldn't call pam_strerror :-(   1998/03/29 (SAW)
+	 */
+	fprintf(stderr, PAM_APP_NAME ": pam_start failed with code %d\n",
+		retval);
+	exit(1);
+    }
+
+    /*
+     * Fill in some blanks
+     */
+
+    retval = make_environment(!is_login);
+    D(("made_environment returned: %s", pam_strerror(pamh, retval)));
+
+    if (retval == PAM_SUCCESS && is_terminal) {
+	const char *terminal = ttyname(STDIN_FILENO);
+	if (terminal) {
+	    retval = pam_set_item(pamh, PAM_TTY, (const void *)terminal);
+	} else {
+	    retval = PAM_PERM_DENIED;                /* how did we get here? */
+	}
+	terminal = NULL;
+    }
+
+    if (retval == PAM_SUCCESS && is_terminal) {
+	const char *ruser = getlogin();      /* Who is running this program? */
+	if (ruser) {
+	    retval = pam_set_item(pamh, PAM_RUSER, (const void *)ruser);
+	} else {
+	    retval = PAM_PERM_DENIED;             /* must be known to system */
+	}
+	ruser = NULL;
+    }
+
+    if (retval == PAM_SUCCESS) {
+	retval = pam_set_item(pamh, PAM_RHOST, (const void *)"localhost");
+    }
+
+    if (retval != PAM_SUCCESS) {
+	exit_now(1, PAM_APP_NAME ": problem establishing environment\n");
+    }
+
+    /* have to pause on failure. At least this long (doubles..) */
+    retval = pam_fail_delay(pamh, SU_FAIL_DELAY);
+    if (retval != PAM_SUCCESS) {
+	exit_now(1, PAM_APP_NAME ": problem initializing failure delay\n");
+    }
+}
+
+/*
+ * authenticate_user arranges for the PAM authentication stack to run.
+ */
+static int authenticate_user(cap_t all, int *retval, const char **place,
+			     const char **err_descr)
+{
+    *place = "pre-auth cap_set_proc";
+    if (cap_set_proc(all)) {
+	D(("failed to raise all capabilities"));
+	*err_descr = "cap_set_proc() failed";
+	*retval = PAM_SUCCESS;
+	return 1;
+    }
+
+    D(("attempt to authenticate user"));
+    *place = "pam_authenticate";
+    *retval = pam_authenticate(pamh, 0);
+    return (*retval != PAM_SUCCESS);
+}
+
+/*
+ * user_accounting confirms an authenticated user is permitted service.
+ */
+static int user_accounting(cap_t all, int *retval, const char **place,
+			   const char **err_descr) {
+    *place = "user_accounting";
+    if (cap_set_proc(all)) {
+	D(("failed to raise all capabilities"));
+	*err_descr = "cap_set_proc() failed";
+	return 1;
+    }
+    *place = "pam_acct_mgmt";
+    *retval = pam_acct_mgmt(pamh, 0);
+    return (*retval != PAM_SUCCESS);
+}
+
+/*
+ * Find entry for this terminal (if there is one).
+ * Utmp file should have been opened and rewinded for the call.
+ *
+ * XXX: the search should be more or less compatible with libc one.
+ * The caller expects that pututline with the same arguments
+ * will replace the found entry.
+ */
+static const struct utmp *find_utmp_entry(const char *ut_line,
+					  const char *ut_id)
+{
+    struct utmp *u_tmp_p;
+
+    while ((u_tmp_p = getutent()) != NULL)
+	if ((u_tmp_p->ut_type == INIT_PROCESS ||
+             u_tmp_p->ut_type == LOGIN_PROCESS ||
+             u_tmp_p->ut_type == USER_PROCESS ||
+             u_tmp_p->ut_type == DEAD_PROCESS) &&
+            !strncmp(u_tmp_p->ut_id, ut_id, UT_IDSIZE) &&
+            !strncmp(u_tmp_p->ut_line, ut_line, UT_LINESIZE))
+                break;
+
+    return u_tmp_p;
+}
+
+/*
+ * Identify the terminal name and the abbreviation we will use.
+ */
+static void set_terminal_name(const char *terminal, char *ut_line, char *ut_id)
+{
+    memset(ut_line, 0, UT_LINESIZE);
+    memset(ut_id, 0, UT_IDSIZE);
+
+    /* set the terminal entry */
+    if ( *terminal == '/' ) {     /* now deal with filenames */
+	int o1, o2;
+
+	o1 = strncmp(DEVICE_FILE_PREFIX, terminal, 5) ? 0 : 5;
+	if (!strncmp("/dev/tty", terminal, 8)) {
+	    o2 = 8;
+	} else {
+	    o2 = strlen(terminal) - sizeof(UT_IDSIZE);
+	    if (o2 < 0)
+		o2 = 0;
+	}
+
+	strncpy(ut_line, terminal + o1, UT_LINESIZE);
+	strncpy(ut_id, terminal + o2, UT_IDSIZE);
+    } else if (strchr(terminal, ':')) {  /* deal with X-based session */
+	const char *suffix;
+
+	suffix = strrchr(terminal,':');
+	strncpy(ut_line, terminal, UT_LINESIZE);
+	strncpy(ut_id, suffix, UT_IDSIZE);
+    } else {	                         /* finally deal with weird terminals */
+	strncpy(ut_line, terminal, UT_LINESIZE);
+	ut_id[0] = '?';
+	strncpy(ut_id + 1, terminal, UT_IDSIZE - 1);
+    }
+}
+
+/*
+ * Append an entry to wtmp. See utmp_open_session for the return convention.
+ * Be careful: the function uses alarm().
+ */
+
+#define WWTMP_STATE_BEGINNING     0
+#define WWTMP_STATE_FILE_OPENED   1
+#define WWTMP_STATE_SIGACTION_SET 2
+#define WWTMP_STATE_LOCK_TAKEN    3
+
+static int write_wtmp(struct utmp *u_tmp_p, const char **callname,
+		      const char **err_descr)
+{
+    int w_tmp_fd;
+    struct flock w_lock;
+    struct sigaction act1, act2;
+    int state;
+    int retval;
+
+    state = WWTMP_STATE_BEGINNING;
+    retval = 1;
+
+    do {
+        D(("writing to wtmp"));
+        w_tmp_fd = open(_PATH_WTMP, O_APPEND|O_WRONLY);
+        if (w_tmp_fd == -1) {
+            *callname = "wtmp open";
+            *err_descr = strerror(errno);
+            break;
+        }
+        state = WWTMP_STATE_FILE_OPENED;
+
+        /* prepare for blocking operation... */
+        act1.sa_handler = SIG_DFL;
+        sigemptyset(&act1.sa_mask);
+        act1.sa_flags = 0;
+        if (sigaction(SIGALRM, &act1, &act2) == -1) {
+            *callname = "sigaction";
+            *err_descr = strerror(errno);
+            break;
+        }
+        alarm(WTMP_LOCK_TIMEOUT);
+        state = WWTMP_STATE_SIGACTION_SET;
+
+        /* now we try to lock this file-rcord exclusively; non-blocking */
+        memset(&w_lock, 0, sizeof(w_lock));
+        w_lock.l_type = F_WRLCK;
+        w_lock.l_whence = SEEK_END;
+        if (fcntl(w_tmp_fd, F_SETLK, &w_lock) < 0) {
+            D(("locking %s failed.", _PATH_WTMP));
+            *callname = "fcntl(F_SETLK)";
+            *err_descr = strerror(errno);
+            break;
+        }
+        alarm(0);
+        sigaction(SIGALRM, &act2, NULL);
+        state = WWTMP_STATE_LOCK_TAKEN;
+
+        if (write(w_tmp_fd, u_tmp_p, sizeof(struct utmp)) != -1) {
+            retval = 0;
+	}
+    } while(0); /* it's not a loop! */
+
+    if (state >= WWTMP_STATE_LOCK_TAKEN) {
+        w_lock.l_type = F_UNLCK;               /* unlock wtmp file */
+        fcntl(w_tmp_fd, F_SETLK, &w_lock);
+    }else if (state >= WWTMP_STATE_SIGACTION_SET) {
+        alarm(0);
+        sigaction(SIGALRM, &act2, NULL);
+    }
+
+    if (state >= WWTMP_STATE_FILE_OPENED) {
+        close(w_tmp_fd);                       /* close wtmp file */
+        D(("wtmp written"));
+    }
+
+    return retval;
+}
+
+/*
+ * XXX - if this gets turned into a module, make this a
+ * pam_data item. You should put the pid in the name so we can
+ * "probably" nest calls more safely...
+ */
+struct utmp *login_stored_utmp=NULL;
+
+/*
+ * Returns:
+ *   0     ok,
+ *   1     non-fatal error
+ *  -1     fatal error
+ *  callname and err_descr will be set
+ * Be careful: the function indirectly uses alarm().
+ */
+static int utmp_do_open_session(const char *user, const char *terminal,
+				const char *rhost, pid_t pid,
+				const char **place, const char **err_descr)
+{
+    struct utmp u_tmp;
+    const struct utmp *u_tmp_p;
+    char ut_line[UT_LINESIZE], ut_id[UT_IDSIZE];
+    int retval;
+
+    set_terminal_name(terminal, ut_line, ut_id);
+
+    utmpname(_PATH_UTMP);
+    setutent();                                           /* rewind file */
+    u_tmp_p = find_utmp_entry(ut_line, ut_id);
+
+    /* reset new entry */
+    memset(&u_tmp, 0, sizeof(u_tmp));                     /* reset new entry */
+    if (u_tmp_p == NULL) {
+	D(("[NEW utmp]"));
+    } else {
+	D(("[OLD utmp]"));
+
+	/*
+	 * here, we make a record of the former entry. If the
+	 * utmp_close_session code is attached to the same process,
+	 * the wtmp will be replaced, otherwise we leave init to pick
+	 * up the pieces.
+	 */
+	if (login_stored_utmp == NULL) {
+	    login_stored_utmp = malloc(sizeof(struct utmp));
+            if (login_stored_utmp == NULL) {
+                *place = "malloc";
+                *err_descr = "fail";
+                endutent();
+                return -1;
+            }
+	}
+        memcpy(login_stored_utmp, u_tmp_p, sizeof(struct utmp));
+    }
+
+    /* we adjust the entry to reflect the current session */
+    {
+	strncpy(u_tmp.ut_line, ut_line, UT_LINESIZE);
+	memset(ut_line, 0, UT_LINESIZE);
+	strncpy(u_tmp.ut_id, ut_id, UT_IDSIZE);
+	memset(ut_id, 0, UT_IDSIZE);
+	strncpy(u_tmp.ut_user, user
+		, sizeof(u_tmp.ut_user));
+	strncpy(u_tmp.ut_host, rhost ? rhost : RHOST_UNKNOWN_NAME
+		, sizeof(u_tmp.ut_host));
+
+	/* try to fill the host address entry */
+	if (rhost != NULL) {
+	    struct hostent *hptr;
+
+	    /* XXX: it isn't good to do DNS lookup here...  1998/05/29  SAW */
+            hptr = gethostbyname(rhost);
+	    if (hptr != NULL && hptr->h_addr_list) {
+		memcpy(&u_tmp.ut_addr, hptr->h_addr_list[0]
+		       , sizeof(u_tmp.ut_addr));
+	    }
+	}
+
+	/* we fill in the remaining info */
+	u_tmp.ut_type = USER_PROCESS;          /* a user process starting */
+	u_tmp.ut_pid = pid;                    /* session identifier */
+	u_tmp.ut_time = time(NULL);
+    }
+
+    setutent();                                /* rewind file (replace old) */
+    pututline(&u_tmp);                         /* write it to utmp */
+    endutent();                                /* close the file */
+
+    retval = write_wtmp(&u_tmp, place, err_descr); /* write to wtmp file */
+    memset(&u_tmp, 0, sizeof(u_tmp));          /* reset entry */
+
+    return retval;
+}
+
+static int utmp_do_close_session(const char *terminal,
+				 const char **place, const char **err_descr)
+{
+    struct utmp u_tmp;
+    const struct utmp *u_tmp_p;
+    char ut_line[UT_LINESIZE], ut_id[UT_IDSIZE];
+
+    set_terminal_name(terminal, ut_line, ut_id);
+
+    utmpname(_PATH_UTMP);
+    setutent();                                              /* rewind file */
+
+    /*
+     * if there was a stored entry, return it to the utmp file, else
+     * if there is a session to close, we close that
+     */
+    if (login_stored_utmp) {
+	pututline(login_stored_utmp);
+
+	memcpy(&u_tmp, login_stored_utmp, sizeof(u_tmp));
+	u_tmp.ut_time = time(NULL);            /* a new time to restart */
+
+        write_wtmp(&u_tmp, place, err_descr);
+
+	memset(login_stored_utmp, 0, sizeof(u_tmp)); /* reset entry */
+	free(login_stored_utmp);
+    } else {
+        u_tmp_p = find_utmp_entry(ut_line, ut_id);
+        if (u_tmp_p != NULL) {
+            memset(&u_tmp, 0, sizeof(u_tmp));
+            strncpy(u_tmp.ut_line, ut_line, UT_LINESIZE);
+            strncpy(u_tmp.ut_id, ut_id, UT_IDSIZE);
+            memset(&u_tmp.ut_user, 0, sizeof(u_tmp.ut_user));
+            memset(&u_tmp.ut_host, 0, sizeof(u_tmp.ut_host));
+            u_tmp.ut_addr = 0;
+            u_tmp.ut_type = DEAD_PROCESS;      /* `old' login process */
+            u_tmp.ut_pid = 0;
+            u_tmp.ut_time = time(NULL);
+            setutent();                        /* rewind file (replace old) */
+            pututline(&u_tmp);                 /* mark as dead */
+
+            write_wtmp(&u_tmp, place, err_descr);
+        }
+    }
+
+    /* clean up */
+    memset(ut_line, 0, UT_LINESIZE);
+    memset(ut_id, 0, UT_IDSIZE);
+
+    endutent();                                /* close utmp file */
+    memset(&u_tmp, 0, sizeof(u_tmp));          /* reset entry */
+
+    return 0;
+}
+
+/*
+ * Returns:
+ *   0     ok,
+ *   1     non-fatal error
+ *  -1     fatal error
+ * place and err_descr will be set
+ * Be careful: the function indirectly uses alarm().
+ */
+static int utmp_open_session(pid_t pid, int *retval,
+			     const char **place, const char **err_descr)
+{
+    const char *user, *terminal, *rhost;
+
+    *retval = pam_get_item(pamh, PAM_USER, (const void **)&user);
+    if (*retval != PAM_SUCCESS) {
+        return -1;
+    }
+    *retval = pam_get_item(pamh, PAM_TTY, (const void **)&terminal);
+    if (retval != PAM_SUCCESS) {
+        return -1;
+    }
+    *retval = pam_get_item(pamh, PAM_RHOST, (const void **)&rhost);
+    if (retval != PAM_SUCCESS) {
+        rhost = NULL;
+    }
+
+    return utmp_do_open_session(user, terminal, rhost, pid, place, err_descr);
+}
+
+static int utmp_close_session(const char **place, const char **err_descr)
+{
+    int retval;
+    const char *terminal;
+
+    retval = pam_get_item(pamh, PAM_TTY, (const void **)&terminal);
+    if (retval != PAM_SUCCESS) {
+        *place = "pam_get_item(PAM_TTY)";
+        *err_descr = pam_strerror(pamh, retval);
+        return -1;
+    }
+
+    return utmp_do_close_session(terminal, place, err_descr);
+}
+
+/*
+ * set_credentials raises the process and PAM credentials.
+ */
+static int set_credentials(cap_t all, int login,
+			   const char **user_p, uid_t *uid_p,
+			   const char **pw_shell, int *retval,
+			   const char **place, const char **err_descr)
+{
+    const char *user;
+    char *shell;
+    cap_value_t csetgid = CAP_SETGID;
+    cap_t current;
+    int status;
+    struct passwd *pw;
+    uid_t uid;
+
+    D(("get user from pam"));
+    *place = "set_credentials";
+    *retval = pam_get_item(pamh, PAM_USER, (const void **)&user);
+    if (*retval != PAM_SUCCESS || user == NULL || *user == '\0') {
+	D(("error identifying user from PAM."));
+	*retval = PAM_USER_UNKNOWN;
+	return 1;
+    }
+    *user_p = user;
+
+    /*
+     * Add the LOGNAME and HOME environment variables.
+     */
+
+    pw = getpwnam(user);
+    if (pw == NULL || (user = x_strdup(pw->pw_name)) == NULL) {
+	D(("failed to identify user"));
+	*retval = PAM_USER_UNKNOWN;
+	return 1;
+    }
+
+    uid = pw->pw_uid;
+    if (uid == 0) {
+	D(("user is superuser: %s", user));
+	*retval = PAM_CRED_ERR;
+	return 1;
+    }
+    *uid_p = uid;
+
+    shell = x_strdup(pw->pw_shell);
+    if (shell == NULL) {
+	D(("user %s has no shell", user));
+	*retval = PAM_CRED_ERR;
+	return 1;
+    }
+
+    if (login) {
+	/* set LOGNAME, HOME */
+	if (pam_misc_setenv(pamh, "LOGNAME", user, 0) != PAM_SUCCESS) {
+	    D(("failed to set LOGNAME"));
+	    *retval = PAM_CRED_ERR;
+	    return 1;
+	}
+    }
+
+    /* bash requires these be set to the target user values */
+    if (pam_misc_setenv(pamh, "HOME", pw->pw_dir, 0) != PAM_SUCCESS) {
+	D(("failed to set HOME"));
+	*retval = PAM_CRED_ERR;
+	return 1;
+    }
+    if (pam_misc_setenv(pamh, "USER", user, 0) != PAM_SUCCESS) {
+	D(("failed to set USER"));
+	*retval = PAM_CRED_ERR;
+	return 1;
+    }
+
+    current = cap_get_proc();
+    cap_set_flag(current, CAP_EFFECTIVE, 1, &csetgid, CAP_SET);
+    status = cap_set_proc(current);
+    cap_free(current);
+    if (status != 0) {
+	*err_descr = "unable to raise CAP_SETGID";
+	return 1;
+    }
+
+    /* initialize groups */
+    if (initgroups(pw->pw_name, pw->pw_gid) != 0 || setgid(pw->pw_gid) != 0) {
+	D(("failed to setgid etc"));
+	*retval = PAM_PERM_DENIED;
+	return 1;
+    }
+    *pw_shell = shell;
+
+    pw = NULL;                                                  /* be tidy */
+
+    D(("desired uid=%d", uid));
+
+    /* assume user's identity - but preserve the permitted set */
+    if (cap_setuid(uid) != 0) {
+	D(("failed to setuid: %v", strerror(errno)));
+	*retval = PAM_PERM_DENIED;
+	return 1;
+    }
+
+    /*
+     * Next, we call the PAM framework to add/enhance the credentials
+     * of this user [it may change the user's home directory in the
+     * pam_env, and add supplemental group memberships...].
+     */
+    D(("setting credentials"));
+    if (cap_set_proc(all)) {
+	D(("failed to raise all capabilities"));
+	*retval = PAM_PERM_DENIED;
+	return 1;
+    }
+
+    D(("calling pam_setcred to establish credentials"));
+    *retval = pam_setcred(pamh, PAM_ESTABLISH_CRED);
+
+    return (*retval != PAM_SUCCESS);
+}
+
+/*
+ * open_session invokes the open session PAM stack.
+ */
+static int open_session(cap_t all, int *retval, const char **place,
+			const char **err_descr)
+{
+    /* Open the su-session */
+    *place = "pam_open_session";
+    if (cap_set_proc(all)) {
+	D(("failed to raise t_caps capabilities"));
+	*err_descr = "capability setting failed";
+	return 1;
+    }
+    *retval = pam_open_session(pamh, 0);     /* Must take care to close */
+    if (*retval != PAM_SUCCESS) {
+	return 1;
+    }
+    return 0;
+}
+
+/* ------ shell invoker ----------------------- */
+
+static int launch_callback_fn(void *h)
+{
+    pam_handle_t *my_pamh = h;
+    int retval;
+
+    D(("pam_end"));
+    retval = pam_end(my_pamh, PAM_SUCCESS | PAM_DATA_SILENT);
+    pamh = NULL;
+    if (retval != PAM_SUCCESS) {
+	return -1;
+    }
+
+    /*
+     * Restore a signal status: information if the signal is ignored
+     * is inherited across exec() call.  (SAW)
+     */
+    enable_terminal_signals();
+
+#ifdef PAM_DEBUG
+    cap_iab_t iab = cap_iab_get_proc();
+    char *text = cap_iab_to_text(iab);
+    D(("iab = %s", text));
+    cap_free(text);
+    cap_free(iab);
+    cap_t cap = cap_get_proc();
+    text = cap_to_text(cap, NULL);
+    D(("cap = %s", text));
+    cap_free(text);
+    cap_free(cap);
+#endif
+
+    D(("about to launch"));
+    return 0;
+}
+
+/* Returns PAM_<STATUS>. */
+static int perform_launch_and_cleanup(cap_t all, int is_login, const char *user,
+				      const char *shell, const char *command)
+{
+    int status;
+    const char *home;
+    const char * const * shell_args;
+    char * const * shell_env;
+    cap_launch_t launcher;
+    pid_t child;
+    cap_iab_t iab;
+
+    /*
+     * Break up the shell command into a command and arguments
+     */
+    shell_args = build_shell_args(shell, is_login, command);
+    if (shell_args == NULL) {
+	D(("failed to compute shell arguments"));
+	return PAM_SYSTEM_ERR;
+    }
+
+    home = pam_getenv(pamh, "HOME");
+    if ( !home || home[0] == '\0' ) {
+	fprintf(stderr, "setting home directory for %s to %s\n",
+		user, DEFAULT_HOME);
+	home = DEFAULT_HOME;
+	if (pam_misc_setenv(pamh, "HOME", home, 0) != PAM_SUCCESS) {
+	    D(("unable to set $HOME"));
+	    fprintf(stderr,
+		    "Warning: unable to set HOME environment variable\n");
+	}
+    }
+    if (is_login) {
+	if (chdir(home) && chdir(DEFAULT_HOME)) {
+	    D(("failed to change directory"));
+	    return PAM_SYSTEM_ERR;
+	}
+    }
+
+    shell_env = pam_getenvlist(pamh);
+    if (shell_env == NULL) {
+	D(("failed to obtain environment for child"));
+	return PAM_SYSTEM_ERR;
+    }
+
+    iab = cap_iab_get_proc();
+    if (iab == NULL) {
+	D(("failed to read IAB value of process"));
+	return PAM_SYSTEM_ERR;
+    }
+
+    launcher = cap_new_launcher(shell_args[0],
+				(const char * const *) &shell_args[1],
+				(const char * const *) shell_env);
+    if (launcher == NULL) {
+	D(("failed to initialize launcher"));
+	return PAM_SYSTEM_ERR;
+    }
+    cap_launcher_callback(launcher, launch_callback_fn);
+
+    child = cap_launch(launcher, pamh);
+    cap_free(launcher);
+
+    if (cap_set_proc(all) != 0) {
+	D(("failed to restore process capabilities"));
+	return PAM_SYSTEM_ERR;
+    }
+
+    /* job control is off for login sessions */
+    prepare_for_job_control(!is_login && command != NULL);
+
+    if (cap_setuid(TEMP_UID) != 0) {
+	fprintf(stderr, "[failed to change monitor UID=%d]\n", TEMP_UID);
+    }
+
+    /* wait for child to terminate */
+    status = wait_for_child(child);
+    if (status != 0) {
+	D(("shell returned %d", status));
+    }
+    return status;
+}
+
+static void close_session(cap_t all)
+{
+    int retval;
+
+    D(("session %p closing", pamh));
+    if (cap_set_proc(all)) {
+	fprintf(stderr, "WARNING: could not raise all caps\n");
+    }
+    retval = pam_close_session(pamh, 0);
+    if (retval != PAM_SUCCESS) {
+	fprintf(stderr, "WARNING: could not close session\n\t%s\n",
+		pam_strerror(pamh,retval));
+    }
+}
+
+/* -------------------------------------------- */
+/* ------ the application itself -------------- */
+/* -------------------------------------------- */
+
+int main(int argc, char *argv[])
+{
+    int retcode, is_login, status;
+    int retval, final_retval; /* PAM_xxx return values */
+    const char *command, *shell;
+    uid_t uid;
+    const char *place = NULL, *err_descr = NULL;
+    cap_t all, t_caps;
+    const char *user;
+
+    all = cap_get_proc();
+    cap_fill(all, CAP_EFFECTIVE, CAP_PERMITTED);
+    cap_clear_flag(all, CAP_INHERITABLE);
+
+    checkfds();
+
+    /*
+     * Check whether stdin is a terminal and store terminal modes for later.
+     */
+    store_terminal_modes();
+
+    /* ---------- parse the argument list and --------- */
+    /* ------ initialize the Linux-PAM interface ------ */
+    {
+	parse_command_line(argc, argv, &is_login, &user, &command);
+	place = "do_pam_init";
+	do_pam_init(user, is_login);   /* call pam_start and set PAM items */
+	user = NULL;                   /* transient until PAM_USER defined */
+    }
+
+    /*
+     * Turn off terminal signals - this is to be sure that su gets a
+     * chance to call pam_end() and restore the terminal modes in
+     * spite of the frustrated user pressing Ctrl-C.
+     */
+    disable_terminal_signals();
+
+    /*
+     * Random exits from here are strictly prohibited :-) (SAW) AGM
+     * achieves this with goto's and a single exit at the end of main.
+     */
+    status = 1;                       /* fake exit status of a child */
+    err_descr = NULL;                 /* errors haven't happened */
+
+    if (make_process_unkillable(&place, &err_descr) != 0) {
+	goto su_exit;
+    }
+
+    if (authenticate_user(all, &retval, &place, &err_descr) != 0) {
+	goto auth_exit;
+    }
+
+    /*
+     * The user is valid, but should they have access at this
+     * time?
+     */
+    if (user_accounting(all, &retval, &place, &err_descr) != 0) {
+	goto auth_exit;
+    }
+
+    D(("su attempt is confirmed as authorized"));
+
+    if (set_credentials(all, is_login, &user, &uid, &shell,
+			&retval, &place, &err_descr) != 0) {
+	D(("failed to set credentials"));
+	goto auth_exit;
+    }
+
+    /*
+     * ... setup terminal, ...
+     */
+    retcode = change_terminal_owner(uid, is_login, &place, &err_descr);
+    if (retcode > 0) {
+	fprintf(stderr, PAM_APP_NAME ": %s: %s\n", place, err_descr);
+	err_descr = NULL; /* forget about the problem */
+    } else if (retcode < 0) {
+	D(("terminal owner to uid=%d change failed", uid));
+	goto auth_exit;
+    }
+
+    /*
+     * Here the IAB value is fixed and may differ from all's
+     * Inheritable value. So synthesize what we need to proceed in the
+     * child, for now, in this current process.
+     */
+    place = "preserving inheritable parts";
+    t_caps = cap_get_proc();
+    if (t_caps == NULL) {
+	D(("failed to read capabilities"));
+	err_descr = "capability read failed";
+	goto delete_cred;
+    }
+    if (cap_fill(t_caps, CAP_EFFECTIVE, CAP_PERMITTED)) {
+	D(("failed to fill effective bits"));
+	err_descr = "capability fill failed";
+	goto delete_cred;
+    }
+
+    /*
+     * ... make [uw]tmp entries.
+     */
+    if (is_login) {
+	/*
+	 * Note: we use the parent pid as a session identifier for
+	 * the logging.
+	 */
+	retcode = utmp_open_session(getpid(), &retval, &place, &err_descr);
+	if (retcode > 0) {
+	    fprintf(stderr, PAM_APP_NAME ": %s: %s\n", place, err_descr);
+	    err_descr = NULL; /* forget about this non-critical problem */
+	} else if (retcode < 0) {
+	    goto delete_cred;
+	}
+    }
+
+#ifdef PAM_DEBUG
+    cap_iab_t iab = cap_iab_get_proc();
+    char *text = cap_iab_to_text(iab);
+    D(("pre-session open iab = %s", text));
+    cap_free(text);
+    cap_free(iab);
+#endif
+
+    if (open_session(t_caps, &retval, &place, &err_descr) != 0) {
+	goto utmp_closer;
+    }
+
+    status = perform_launch_and_cleanup(all, is_login, user, shell, command);
+    close_session(all);
+
+utmp_closer:
+    if (is_login) {
+	/* do [uw]tmp cleanup */
+	retcode = utmp_close_session(&place, &err_descr);
+	if (retcode) {
+	    fprintf(stderr, PAM_APP_NAME ": %s: %s\n", place, err_descr);
+	}
+    }
+
+delete_cred:
+    D(("delete credentials"));
+    if (cap_set_proc(all)) {
+	D(("failed to raise all capabilities"));
+    }
+    retcode = pam_setcred(pamh, PAM_DELETE_CRED);
+    if (retcode != PAM_SUCCESS) {
+	fprintf(stderr, "WARNING: could not delete credentials\n\t%s\n",
+		pam_strerror(pamh, retcode));
+    }
+
+    D(("return terminal to local control"));
+    restore_terminal_owner();
+
+auth_exit:
+    D(("for clean up we restore the launching user"));
+    make_process_killable();
+
+    D(("all done - closing down pam"));
+    if (retval != PAM_SUCCESS) {      /* PAM has failed */
+	fprintf(stderr, PAM_APP_NAME ": %s\n", pam_strerror(pamh, retval));
+	final_retval = PAM_ABORT;
+    } else if (err_descr != NULL) {   /* a system error has happened */
+	fprintf(stderr, PAM_APP_NAME ": %s: %s\n", place, err_descr);
+	final_retval = PAM_ABORT;
+    } else {
+	final_retval = PAM_SUCCESS;
+    }
+    (void) pam_end(pamh, final_retval);
+    pamh = NULL;
+
+    if (restore_terminal_modes() != 0 && !status) {
+	status = 1;
+    }
+
+su_exit:
+    if (status != 0) {
+	perror(PAM_APP_NAME " failed");
+    }
+    exit(status);                 /* transparent exit */
+}
diff --git a/contrib/sucap/sucap.pamconfig b/contrib/sucap/sucap.pamconfig
new file mode 100644
index 0000000..02b70f2
--- /dev/null
+++ b/contrib/sucap/sucap.pamconfig
@@ -0,0 +1,6 @@
+#%PAM-1.0
+auth            required        pam_cap.so config=/etc/security/capability.conf 
+auth		required        pam_unix.so
+account		required	pam_unix.so
+password	required	pam_unix.so
+session		required        pam_unix.so
diff --git a/distcheck.sh b/distcheck.sh
new file mode 100755
index 0000000..3360e31
--- /dev/null
+++ b/distcheck.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+actual=$(wget -o/dev/null -O/dev/stdout https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/include/uapi/linux/capability.h | grep "#define.CAP_LAST_CAP"|awk '{print $3}')
+working=$(grep "#define.CAP_LAST_CAP" libcap/include/uapi/linux/capability.h|awk '{print $3}')
+
+if [[ ${actual} = ${working} ]]; then
+    echo "up to date with officially named caps"
+    exit 0
+fi
+
+echo "want: ${actual}"
+echo "have: ${working}"
+exit 1
diff --git a/doc/Makefile b/doc/Makefile
index 7d1f347..6919488 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -8,33 +8,57 @@ include $(topdir)/Make.Rules
 MAN1S = capsh.1
 MAN3S = cap_init.3 cap_free.3 cap_dup.3 \
 	cap_clear.3 cap_clear_flag.3 cap_get_flag.3 cap_set_flag.3 \
+	cap_fill.3 cap_fill_flag.3 cap_max_bits.3 \
 	cap_compare.3 cap_get_proc.3 cap_get_pid.3 cap_set_proc.3 \
 	cap_get_file.3 cap_get_fd.3 cap_set_file.3 cap_set_fd.3 \
-	cap_copy_ext.3 cap_size.3 cap_copy_int.3 \
+	cap_set_nsowner.3 cap_get_nsowner.3 \
+	cap_copy_ext.3 cap_size.3 cap_copy_int.3 cap_mode.3 \
+	cap_copy_int_check.3 cap_set_syscall.3 \
 	cap_from_text.3 cap_to_text.3 cap_from_name.3 cap_to_name.3 \
 	capsetp.3 capgetp.3 libcap.3 \
-	cap_get_bound.3 cap_drop_bound.3
-MAN8S = getcap.8 setcap.8
+	cap_get_bound.3 cap_drop_bound.3 \
+	cap_get_mode.3 cap_set_mode.3 cap_mode_name.3 \
+	cap_get_secbits.3 cap_set_secbits.3 \
+	cap_setuid.3 cap_setgroups.3 \
+	cap_launch.3 cap_func_launcher.3 cap_launcher_callback.3 \
+	cap_launcher_set_chroot.3 cap_launcher_set_mode.3 \
+	cap_launcher_setgroups.3 cap_launcher_setuid.3 \
+	cap_launcher_set_iab.3 cap_new_launcher.3 \
+	cap_iab.3 cap_iab_init.3 cap_iab_dup.3 cap_iab_compare.3 \
+	cap_iab_get_proc.3 cap_iab_get_pid.3 cap_iab_set_proc.3 \
+	cap_iab_to_text.3 cap_iab_from_text.3 cap_iab_get_vector.3 \
+	cap_iab_set_vector.3 cap_iab_fill.3 cap_proc_root.3 \
+	cap_prctl.3 cap_prctlw.3 \
+	psx_syscall.3 psx_syscall3.3 psx_syscall6.3 psx_set_sensitivity.3 \
+	psx_load_syscalls.3 __psx_syscall.3 \
+	libpsx.3
+MAN8S = getcap.8 setcap.8 getpcaps.8 captree.8
 
 MANS = $(MAN1S) $(MAN3S) $(MAN8S)
 
 all: $(MANS)
 
+test:
+	@echo no doc tests available
+
+sudotest:
+	@echo no doc sudotests available
+
 .PHONY: html
 html:
 	mkdir -p html
 	for man in $(MANS) ; \
 	do \
-		egrep '^\.so man' $$man > /dev/null || \
+		$(BUILD_EGREP) '^\.so man' $$man > /dev/null || \
 		groff -man -Thtml $$man > html/$$man.html ; \
 	done
 
 install:
-	mkdir -p -m 755 $(MANDIR)/man1 $(MANDIR)/man3 $(MANDIR)/man8
+	mkdir -p -m 755 $(FAKEROOT)$(MANDIR)/man1 $(FAKEROOT)$(MANDIR)/man3 $(FAKEROOT)$(MANDIR)/man8
 	for man in \
-		$(MANDIR)/man1 $(MAN1S) \
-		$(MANDIR)/man3 $(MAN3S) \
-		$(MANDIR)/man8 $(MAN8S) \
+		$(FAKEROOT)$(MANDIR)/man1 $(MAN1S) \
+		$(FAKEROOT)$(MANDIR)/man3 $(MAN3S) \
+		$(FAKEROOT)$(MANDIR)/man8 $(MAN8S) \
 		; \
 	do \
 		case $$man in \
@@ -46,5 +70,3 @@ install:
 clean:
 	$(LOCALCLEAN)
 	rm -rf html
-
-
diff --git a/doc/__psx_syscall.3 b/doc/__psx_syscall.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/__psx_syscall.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/cap_clear.3 b/doc/cap_clear.3
index 48f5cc0..b8dbc30 100644
--- a/doc/cap_clear.3
+++ b/doc/cap_clear.3
@@ -1,30 +1,29 @@
-.TH CAP_CLEAR 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_CLEAR 3 "2022-10-16" "" "Linux Programmer's Manual"
 .SH NAME
-cap_clear, cap_clear_flag, cap_get_flag, cap_set_flag, cap_compare \- capability data object manipulation
+cap_clear, cap_clear_flag, cap_get_flag, cap_set_flag, cap_fill_flag, cap_fill, cap_compare, cap_max_bits \- capability data object manipulation
 .SH SYNOPSIS
 .nf
-.B #include <sys/capability.h>
-.sp
-.BI "int cap_clear(cap_t " cap_p );
-.sp
-.BI "int cap_clear_flag(cap_t " cap_p ", cap_flag_t " flag ");"
-.sp
-.BI "int cap_get_flag(cap_t " cap_p ", cap_value_t " cap ,
-.BI "                 cap_flag_t " flag ", cap_flag_value_t *" value_p ");"
-.sp
-.BI "int cap_set_flag(cap_t " cap_p ", cap_flag_t " flag ", int " ncap ,
-.BI "                 const cap_value_t *" caps \
-", cap_flag_value_t " value ");"
-.sp
-.BI "int cap_compare(cap_t " cap_a ", cap_t " cap_b ");"
-.sp
-Link with \fI-lcap\fP.
+#include <sys/capability.h>
+
+int cap_clear(cap_t cap_p);
+int cap_clear_flag(cap_t cap_p, cap_flag_t flag);
+int cap_get_flag(cap_t cap_p, cap_value_t cap,
+                 cap_flag_t flag, cap_flag_value_t *value_p);
+int cap_set_flag(cap_t cap_p, cap_flag_t flag, int ncap,
+                 const cap_value_t *caps, cap_flag_value_t value);
+int cap_fill_flag(cap_t cap_p, cap_flag_t to,
+                  const cap_t ref, cap_flag_t from);
+int cap_fill(cap_t cap_p, cap_flag_t to, cap_flag_t from);
+int cap_compare(cap_t cap_a, cap_t cap_b);
+cap_value_t cap_max_bits();
 .fi
+.sp
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 These functions work on a capability state held in working storage.
 A
 .I cap_t
-holds information about the capabilities in each of the three sets,
+holds information about the capabilities in each of the three flags,
 Permitted, Inheritable, and Effective.
 Each capability in a set may be clear (disabled, 0) or set (enabled, 1).
 .PP
@@ -36,7 +35,7 @@ identifies a capability, such as
 .TP
 .I cap_flag_t
 identifies one of the three flags associated with a capability
-(i.e., it identifies one of the three capability sets).
+(i.e., it identifies one of the three capability dimensions).
 Valid values for this type are
 .BR CAP_EFFECTIVE ,
 .B CAP_INHERITABLE
@@ -85,22 +84,39 @@ The argument,
 is used to specify the number of capabilities in the array,
 .IR caps .
 .PP
+.BR cap_fill_flag ()
+fills the to flag of one capability set, with the values in the from
+flag of a reference capability set.
+.PP
+.BR cap_fill ()
+fills the to flag values by copying all of the from flag values.
+.PP
 .BR cap_compare ()
 compares two full capability sets and, in the spirit of
 .BR memcmp (),
 returns zero if the two capability sets are identical. A positive
-return value,
-.BR status ,
-indicates there is a difference between them. The
-returned value carries further information about which of three sets,
-.I cap_flag_t
-.BR flag ,
-differ. Specifically, the macro
+return
+.I value
+indicates there is a difference between them. The returned
+.I value
+carries further information about the
+.BI "cap_flag_t " flag
+differences. Specifically, the macro
 .B CAP_DIFFERS
-.RI ( status ", " flag )
-evaluates to non-zero if the returned status differs in its
+.RI ( value ", " flag )
+evaluates to non-zero if the returned
+.I value
+differs in its
 .I flag
 components.
+.PP
+.BR cap_max_bits ()
+returns the number of capability values known to the running
+kernel. This may differ from libcap's list known at compilation
+time. Unnamed, at compilation time, capabilites can be referred to
+numerically and libcap will handle them appropriately. Note, the
+running kernel wins and it gets to define what "all" capabilities
+means.
 .SH "RETURN VALUE"
 .BR cap_clear (),
 .BR cap_clear_flag (),
@@ -110,7 +126,12 @@ and
 .BR cap_compare ()
 return zero on success, and \-1 on failure. Other return values for
 .BR cap_compare ()
-are described above.
+are described above. The function
+.BR cap_max_bits ()
+returns a numeric value of type
+.B cap_value_t
+that is one larger than the largest actual value known to the running
+kernel.
 .PP
 On failure,
 .I errno
@@ -118,11 +139,14 @@ is set to
 .BR EINVAL ,
 indicating that one of the arguments is invalid.
 .SH "CONFORMING TO"
-These functions are as per the withdrawn POSIX.1e draft specification.
-.BR cap_clear_flag ()
-and
+These functions are mostly as per specified in the withdrawn POSIX.1e
+draft specification.  The following are Linux extensions:
+.BR cap_fill (),
+.BR cap_fill_flag (),
+.BR cap_clear_flag (),
 .BR cap_compare ()
-are Linux extensions.
+and
+.BR cap_max_bits ().
 .SH "SEE ALSO"
 .BR libcap (3),
 .BR cap_copy_ext (3),
diff --git a/doc/cap_copy_ext.3 b/doc/cap_copy_ext.3
index 61d9381..b863442 100644
--- a/doc/cap_copy_ext.3
+++ b/doc/cap_copy_ext.3
@@ -1,17 +1,18 @@
-.TH CAP_COPY_EXT 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_COPY_EXT 3 "2021-03-06" "" "Linux Programmer's Manual"
 .SH NAME
 cap_copy_ext, cap_size, cap_copy_int \- capability state
 external representation translation
 .SH SYNOPSIS
-.B #include <sys/capability.h>
+.nf
+#include <sys/capability.h>
+
+ssize_t cap_size(cap_t cap_p);
+ssize_t cap_copy_ext(void *ext_p, cap_t cap_p, ssize_t size);
+cap_t cap_copy_int(const void * ext_p);
+cap_t cap_copy_int_check(const void *cap_ext, ssize_t length);
+.fi
 .sp
-.BI "ssize_t cap_size(cap_t " cap_p );
-.sp
-.BI "ssize_t cap_copy_ext(void *" ext_p ", cap_t " cap_p ", ssize_t " size );
-.sp
-.BI "cap_t cap_copy_int(const void *" ext_p );
-.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 These functions translate between internal and external
 representations of a capability state.  The external representation is
@@ -34,7 +35,7 @@ function in order to hold the capability data record created from
 .BR cap_copy_ext ()
 copies a capability state in working storage, identified by
 .IR cap_p ,
-from system managed space to user-managed space (pointed to by
+from system-managed space to user-managed space (pointed to by
 .IR ext_p )
 and returns the length of the resulting data record.  The size parameter
 represents the maximum size, in bytes, of the resulting data record.  The
@@ -56,9 +57,9 @@ state.  The function initializes the capability state and then copies
 the capability state from the record pointed to by
 .I ext_p
 into the capability state, converting, if necessary, the data from a
-contiguous, persistent format to an undefined, internal format.  Once
-copied into internal format, the object can be manipulated by the capability
-state manipulation functions (see
+contiguous, persistent format to an opaque, internal format.  Once
+copied into internal format, the object can be manipulated by the
+capability state manipulation functions (see
 .BR cap_clear (3)).
 Note that the record pointed to by
 .I ext_p
@@ -71,19 +72,27 @@ longer required, by calling
 with the
 .I cap_t
 as an argument.
+.PP
+.BR cap_copy_int_check ()
+performs the same operation as
+.BR cap_copy_int ()
+but additionally checks that the provided external data's size is not
+larger than the noted length.
 .SH "RETURN VALUE"
 .BR cap_size ()
 returns the length required to hold a capability data record on success,
-and -1 on failure.
+and \-1 on failure.
 .PP
 .BR cap_copy_ext ()
 returns the number of bytes placed in the user managed space pointed to by
 .I ext_p 
-on success, and -1 on failure.
+on success, and \-1 on failure.
 .PP
 .BR cap_copy_int ()
-returns a pointer to the newly created capability state in working storage
-on success, and NULL on failure.
+and
+.BR cap_copy_int_check ()
+return a pointer to the newly created capability state in working
+storage on success, and NULL on failure.
 .PP
 On failure,
 .BR errno
diff --git a/doc/cap_copy_int_check.3 b/doc/cap_copy_int_check.3
new file mode 100644
index 0000000..2e6e89c
--- /dev/null
+++ b/doc/cap_copy_int_check.3
@@ -0,0 +1 @@
+.so man3/cap_copy_ext.3
diff --git a/doc/cap_fill.3 b/doc/cap_fill.3
new file mode 100644
index 0000000..db506c6
--- /dev/null
+++ b/doc/cap_fill.3
@@ -0,0 +1 @@
+.so man3/cap_clear.3
diff --git a/doc/cap_fill_flag.3 b/doc/cap_fill_flag.3
new file mode 100644
index 0000000..db506c6
--- /dev/null
+++ b/doc/cap_fill_flag.3
@@ -0,0 +1 @@
+.so man3/cap_clear.3
diff --git a/doc/cap_from_text.3 b/doc/cap_from_text.3
index ccf7d95..1a01c7c 100644
--- a/doc/cap_from_text.3
+++ b/doc/cap_from_text.3
@@ -1,22 +1,21 @@
 .\"
 .\" written by Andrew Main <zefram@dcs.warwick.ac.uk>
 .\"
-.TH CAP_FROM_TEXT 3 "2008-05-10" "" "Linux Programmer's Manual"
+.TH CAP_FROM_TEXT 3 "2022-09-22" "" "Linux Programmer's Manual"
 .SH NAME
 cap_from_text, cap_to_text, cap_to_name, cap_from_name \- capability
 state textual representation translation
 .SH SYNOPSIS
-.B #include <sys/capability.h>
-.sp
-.BI "cap_t cap_from_text(const char *" buf_p );
-.sp
-.BI "char *cap_to_text(cap_t " caps ", ssize_t *" length_p );
-.sp
-.BI "int cap_from_name(const char *" name ", cap_value_t *" cap_p );
-.sp
-.BI "char *cap_to_name(cap_value_t " cap );
+.nf
+#include <sys/capability.h>
+
+cap_t cap_from_text(const char *buf_p);
+char *cap_to_text(cap_t caps, ssize_t *len_p);
+int cap_from_name(const char *name, cap_value_t *cap_p);
+char *cap_to_name(cap_value_t cap);
+.fi
 .sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 These functions translate a capability state between
 an internal representation and a textual one.
@@ -47,7 +46,7 @@ is both set and cleared within a single clause.
 .PP
 .BR cap_to_text ()
 converts the capability state in working storage identified by
-.I cap_p
+.I caps
 into a nul-terminated human-readable string.  This function allocates
 any memory necessary to contain the string, and returns a pointer to
 the string.  If the pointer
@@ -57,7 +56,7 @@ the function shall also return the full length of the string (not including
 the nul terminator) in the location pointed to by
 .IR len_p .
 The capability state in working storage, identified by
-.IR cap_p ,
+.IR caps ,
 is completely represented in the character string.
 When the capability state in working storage is no longer required,
 the caller should free any releasable memory by calling
@@ -100,7 +99,7 @@ followed by an
 An action-list consists of a sequence of
 .I operator flag
 pairs.  Legal operators are:
-.RB ` = "', '" + "', and `" - "'."
+.RB ` = "', '" + "', and `" \- "'."
 Legal flags are:
 .RB ` e "', `" i "', and `" p "'."
 These flags are case-sensitive and specify the Effective, Inheritable
@@ -136,15 +135,22 @@ refer to `all' capabilities.  For example, the following three
 clauses are equivalent to each other (and indicate a completely empty
 capability set): "all="; "="; "cap_chown,<every-other-capability>=".
 .PP
-The operators, `+' and `-' both require an explicit preceding
+The operators, `+' and `\-' both require an explicit preceding
 capability list and one or more explicit trailing flags.  The `+'
 operator will raise all of the listed capabilities in the flagged
-capability sets.  The `-' operator will lower all of the listed
+capability sets.  The `\-' operator will lower all of the listed
 capabilities in the flagged capability sets.  For example:
-"all+p" will raise all of the Permitted capabilities; "cap_fowner+p-i"
-will raise the override-file-ownership capability in the Permitted
-capability set and lower this Inheritable capability;
-"cap_fowner+pe-i" and "cap_fowner=+pe" are equivalent.
+"all+p" will raise all of the Permitted capabilities and
+"cap_fowner\-i" will lower the override-file-ownership in the Inheritable set.
+.PP
+The action list can consist of multiple
+.I operator flag
+pairs; the actions are performed in left-to-right order.
+Thus, for example,
+"cap_fowner+p\-i"
+is equivalent to "cap_fowner+p cap_fowner\-i".
+As another example,
+"cap_fowner+pe\-i" and "cap_fowner=+pe" are equivalent.
 .SH "RETURN VALUE"
 .BR cap_from_text (),
 .BR cap_to_text ()
@@ -152,7 +158,7 @@ and
 .BR cap_to_name ()
 return a non-NULL value on success, and NULL on failure.
 .BR cap_from_name ()
-returns 0 for success, and -1 on failure (unknown capability).
+returns 0 for success, and \-1 on failure (unknown capability).
 .PP
 On failure,
 .I errno
@@ -174,17 +180,15 @@ The example program below demonstrates the use of
 .BR cap_from_text ()
 and
 .BR cap_to_text ().
-The following shell session shows a some example runs:
-.in +4n
+The following shell session shows some example runs:
 .nf
 
 $ ./a.out "cap_chown=p cap_chown+e"
-caps_to_text() returned "= cap_chown+ep"
-$ ./a.out "all=pe cap_chown-e cap_kill-pe"
-caps_to_text() returned "=ep cap_chown-e cap_kill-ep"
+caps_to_text() returned "cap_chown=ep"
+$ ./a.out "all=pe cap_chown\-e cap_kill\-pe"
+caps_to_text() returned "=ep cap_chown\-e cap_kill\-ep"
 
 .fi
-.in
 The source code of the program is as follows:
 .nf
 
@@ -225,7 +229,6 @@ main(int argc, char *argv[])
 .SH "SEE ALSO"
 .BR libcap (3),
 .BR cap_clear (3),
-.BR cap_compare (3),
 .BR cap_copy_ext (3),
 .BR cap_get_file (3),
 .BR cap_get_proc (3),
diff --git a/doc/cap_func_launcher.3 b/doc/cap_func_launcher.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_func_launcher.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_get_ambient.3 b/doc/cap_get_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_get_file.3 b/doc/cap_get_file.3
index 107b6d1..985236c 100644
--- a/doc/cap_get_file.3
+++ b/doc/cap_get_file.3
@@ -1,24 +1,23 @@
 .\"
 .\" written by Andrew Main <zefram@dcs.warwick.ac.uk>
 .\"
-.TH CAP_GET_FILE 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_GET_FILE 3 "2022-10-16" "" "Linux Programmer's Manual"
 .SH NAME
-cap_get_file, cap_set_file, cap_get_fd, cap_set_fd \- capability
-manipulation on files
+cap_get_file, cap_set_file, cap_get_fd, cap_set_fd, cap_get_nsowner, \
+cap_set_nsowner \- capability manipulation on files
 .SH SYNOPSIS
-.B
-.sp
-.B #include <sys/capability.h>
-.sp
-.BI "cap_t cap_get_file(const char *" path_p );
-.sp
-.BI "int cap_set_file(const char *" path_p ", cap_t " cap_p );
-.sp
-.BI "cap_t cap_get_fd(int " fd );
-.sp
-.BI "int cap_set_fd(int " fd ", cap_t " caps );
+.nf
+#include <sys/capability.h>
+
+cap_t cap_get_file(const char *path_p);
+int cap_set_file(const char *path_p, cap_t cap_p);
+cap_t cap_get_fd(int fd);
+int cap_set_fd(int fd, cap_t caps);
+uid_t cap_get_nsowner(cap_t caps);
+int cap_set_nsowner(cap_t caps, uid_t rootuid);
+.fi
 .sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 .BR cap_get_file ()
 and
@@ -55,14 +54,25 @@ A NULL value for
 .IR cap_p
 is used to indicate that capabilities for the file should be deleted.
 For these functions to succeed, the calling process must have the
-effective capability,
-.BR CAP_SETFCAP ,
-enabled and either the effective user ID of the process must match the
+.BR CAP_SETFCAP
+capability in its effective set
+and either the effective user ID of the process must match the
 file owner or the calling process must have the
 .B CAP_FOWNER
-flag in its effective capability set.  The effects of writing the
+capability in its effective capability set.  The effects of writing the
 capability state to any file type other than a regular file are
 undefined.
+.PP
+A capability set held in memory can be associated with the root user ID in
+use in a specific user namespace. It is possible to get and set this value
+(in the memory copy) with
+.BR cap_get_nsowner ()
+and
+.BR cap_set_nsowner ()
+respectively. The root user ID is ignored by the libcap library in all cases
+other than when the capability is written to a file. Only if the value
+is non-zero will the library attempt to include it in the written file
+capability set.
 .SH "RETURN VALUE"
 .BR cap_get_file ()
 and
@@ -121,4 +131,5 @@ Permitted or Inheritable flag enabled.
 .BR cap_from_text (3),
 .BR cap_get_proc (3),
 .BR cap_init (3),
-.BR capabilities (7)
+.BR capabilities (7),
+.BR user_namespaces (7)
diff --git a/doc/cap_get_mode.3 b/doc/cap_get_mode.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_mode.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_get_nsowner.3 b/doc/cap_get_nsowner.3
new file mode 100644
index 0000000..3970c34
--- /dev/null
+++ b/doc/cap_get_nsowner.3
@@ -0,0 +1 @@
+.so man3/cap_get_file.3
diff --git a/doc/cap_get_proc.3 b/doc/cap_get_proc.3
index 123ab3d..91fb705 100644
--- a/doc/cap_get_proc.3
+++ b/doc/cap_get_proc.3
@@ -1,28 +1,44 @@
-.\"
-.\" $Id: cap_get_proc.3,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
-.\"
-.TH CAP_GET_PROC 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_GET_PROC 3 "2022-04-28" "" "Linux Programmer's Manual"
 .SH NAME
-cap_get_proc, cap_set_proc, capgetp, cap_get_bound, cap_drop_bound \-
-capability manipulation on processes
+cap_get_proc, cap_set_proc, capgetp, cap_get_bound, cap_drop_bound, \
+cap_get_ambient, cap_set_ambient, cap_reset_ambient, \
+cap_get_secbits, cap_set_secbits, cap_get_mode, cap_set_mode, \
+cap_mode_name, cap_get_pid, cap_setuid, cap_prctl, cap_prctlw, cap_setgroups \
+\- capability manipulation on processes
 .SH SYNOPSIS
-.B #include <sys/capability.h>
-.sp
-.B "cap_t cap_get_proc(void);"
-.sp
-.BI "int cap_set_proc(cap_t " cap_p );
-.sp
-.BI "int cap_get_bound(cap_value_t " cap );
-.sp
-.BI "CAP_IS_SUPPORTED(cap_value_t " cap );
-.sp
-.BI "int cap_drop_bound(cap_value_t " cap );
-.sp
-.B #include <sys/types.h>
-.sp
-.BI "cap_t cap_get_pid(pid_t " pid );
+.nf
+#include <sys/capability.h>
+
+cap_t cap_get_proc(void);
+int cap_set_proc(cap_t cap_p);
+
+int cap_get_bound(cap_value_t cap);
+CAP_IS_SUPPORTED(cap_value_t cap);
+
+int cap_drop_bound(cap_value_t cap);
+int cap_get_ambient(cap_value_t cap);
+int cap_set_ambient(cap_value_t cap, cap_flag_value_t value);
+int cap_reset_ambient(void);
+CAP_AMBIENT_SUPPORTED();
+
+unsigned cap_get_secbits(void);
+int cap_set_secbits(unsigned bits);
+cap_mode_t cap_get_mode(void);
+const char *cap_mode_name(cap_mode_t mode);
+int cap_prctl(long int pr_cmd, long int arg1, long int arg2,
+	      long int arg3, long int arg4, long int arg5);
+int cap_prctlw(long int pr_cmd, long int arg1, long int arg2,
+	       long int arg3, long int arg4, long int arg5);
+int cap_set_mode(cap_mode_t mode);
+
+#include <sys/types.h>
+
+cap_t cap_get_pid(pid_t pid);
+int cap_setuid(uid_t uid);
+int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups);
+.fi
 .sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 .BR cap_get_proc ()
 allocates a capability state in working storage, sets its state to
@@ -49,23 +65,48 @@ the function will fail, and the capability state of the process will remain
 unchanged.
 .PP
 .BR cap_get_pid ()
-returns
-.IR cap_d ,
+returns a
+.IR cap_t ,
 see 
 .BR cap_init (3),
-with the process capabilities of the process indicated by
+with the process capabilities of the process known to the caller as
 .IR pid .
+If
+.I pid
+is 0, then the calling process's capabilities are returned.
 This information can also be obtained from the
 .I /proc/<pid>/status
-file.
+file. (The entries in that file can be translated with the
+.BI "capsh \-\-decode=" XXX
+command line.) When the caller is operating within a
+.RB ( CLONE_NEWPID )
+namespace, the numerical
+.I pid
+argument is interpreted in the range of that namespace. As such, the
+caller's idea of the target
+.I pid
+may differ from that of the target process when they are operating in
+different pid namespaces. See
+.BR pid_namespaces (7)
+for details.
+Further, the returned
+.I cap_t
+value holds the capabilities that the target
+.I pid
+thinks it has. If the target is operating in a
+.RB ( CLONE_NEWUSER )
+namespace, the system wide privilege of those user namespace
+capabilities my be substantially reduced. See
+.BR user_namespaces (7)
+for details.
 .PP
 .BR cap_get_bound ()
 with a
 .I  cap
 as an argument returns the current value of this bounding set
-capability flag in effect for the current process. This operation is
-unpriveged. Note, a macro function
-.BI "CAP_IS_SUPPORTED(cap_value_t " cap )
+capability flag in effect for the calling process. This operation is
+unprivileged. Note, a macro function
+.BR "CAP_IS_SUPPORTED(cap_value_t " cap )
 is provided that evaluates to true (1) if the system supports the
 specified capability,
 .IR cap .
@@ -75,11 +116,104 @@ If the system does not support the capability, this function returns
 .PP
 .BR cap_drop_bound ()
 can be used to lower the specified bounding set capability,
-.BR cap ,
+.BR cap .
 To complete successfully, the prevailing
 .I effective
 capability set must have a raised
 .BR CAP_SETPCAP .
+.PP
+.BR cap_get_ambient ()
+returns the prevailing value of the specified ambient capability, or
+-1 if the capability is not supported by the running kernel.  A macro
+.BR CAP_AMBIENT_SUPPORTED ()
+uses this function to determine if ambient capabilities are supported
+by the kernel.
+.PP
+.BR cap_set_ambient ()
+sets the specified ambient capability to a specific value. To complete
+successfully, the prevailing
+.I effective
+capability set must have a raised
+.BR CAP_SETPCAP .
+Further, to raise a specific ambient capability the
+.IR inheritable " and " permitted
+sets of the calling process must contain the specified capability, and
+raised ambient bits will only be retained as long as this remains true.
+.PP
+.BR cap_reset_ambient ()
+resets all of the ambient capabilities for the calling process to
+their lowered value. Note, the ambient set is intended to operate in a
+legacy environment where the application has limited awareness of
+capabilities in general. Executing a file, with associated filesystem
+capabilities, the kernel will implicitly reset the ambient set of the
+process. Further, changes to the inheritable set by the program code
+without explicitly fixing up the ambient set can also drop ambient
+bits.
+.PP
+.BR cap_get_secbits ()
+returns the securebits of the calling process. These bits affect the
+way in which the calling process implements things like setuid-root
+fixup and ambient capabilities.
+.PP
+.BR cap_set_secbits ()
+attempts to modify the securebits of the calling process. Note
+.B CAP_SETPCAP
+must be in the effective capability set for this to be effective. Some
+settings lock the sub-states of the securebits, so attempts to set values
+may be denied by the kernel even when the
+.B CAP_SETPCAP
+capability is raised.
+.PP
+To help manage the complexity of the securebits, libcap provides a
+combined securebit and capability set concept called a libcap mode.
+.BR cap_get_mode ()
+attempts to summarize the prevailing security environment in the form
+of a numerical
+.B cap_mode_t
+value. A text representation of the mode can be obtained via the
+.BR cap_mode_name ()
+function. The vast majority of combinations of these values are not well
+defined in terms of a libcap mode, and for these states
+.BR cap_get_mode ()
+returns
+.RB ( cap_mode_t )0
+which
+.BR cap_get_name ()
+identifies as
+.RI `` UNCERTAIN ''.
+Supported modes are:
+.BR CAP_MODE_NOPRIV ", " CAP_MODE_PURE1E_INIT " and " CAP_MODE_PURE1E .
+.PP
+.BR cap_prctl ()
+can be used to read state via the \fBprctl\fI()\fP system call.
+.PP
+.BR cap_prctlw ()
+can be used to write state via the \fBprctl\fI()\fP system call.
+.PP
+.BR cap_set_mode ()
+can be used to set the desired mode. The permitted capability
+.B CAP_SETPCAP
+is required for this function to succeed.
+.PP
+.BR cap_setuid ()
+is a convenience function for the
+.BR setuid (2)
+system call. Where
+.BR cap_setuid ()
+arranges for the right effective capability to be raised in order to
+perform the system call, and also arranges to preserve the
+availability of permitted capabilities after the uid has
+changed. Following this call all effective capabilities are lowered.
+.PP
+.BR cap_setgroups ()
+is a convenience function for performing both
+.BR setgid (2)
+and
+.BR setgroups (2)
+calls in one call. The
+.BR cap_setgroups ()
+call raises the right effective capability for the duration of the
+call, and empties the effective capability set before returning.
 .SH "RETURN VALUE"
 The functions
 .BR cap_get_proc ()
@@ -89,11 +223,11 @@ return a non-NULL value on success, and NULL on failure.
 .PP
 The function
 .BR cap_get_bound ()
-returns -1 if the requested capability is unknown, otherwise the
+returns \-1 if the requested capability is unknown, otherwise the
 return value reflects the current state of that capability in the
 prevailing bounding set. Note, a macro function,
 .PP
-The functions
+The all of the setting functions such as
 .BR cap_set_proc ()
 and
 .BR cap_drop_bound ()
@@ -103,7 +237,7 @@ On failure,
 .I errno
 is set to
 .BR EINVAL ,
-.BR EPERM,
+.BR EPERM ,
 or
 .BR ENOMEM .
 .SH "CONFORMING TO"
@@ -114,6 +248,31 @@ are specified in the withdrawn POSIX.1e draft specification.
 .BR cap_get_pid ()
 is a Linux extension.
 .SH "NOTES"
+Neither glibc, nor the Linux kernel honors POSIX semantics for setting
+capabilities and securebits in the presence of pthreads. That is,
+changing capability sets, by default, only affect the running
+thread. To be meaningfully secure, however, the capability sets should
+be mirrored by all threads within a common program because threads are
+not memory isolated. As a workaround for this,
+.B libcap
+is packaged with a separate POSIX semantics system call library:
+.BR libpsx .
+If your program uses POSIX threads, to achieve meaningful POSIX
+semantics capability manipulation, you should link your program with:
+.sp
+.B ld ... \-lcap \-lpsx \-lpthread \-\-wrap=pthread_create
+.sp
+or,
+.sp
+.B gcc ... \-lcap \-lpsx \-lpthread \-Wl,\-wrap,pthread_create
+.sp
+When linked this way, due to linker magic, libcap uses
+.BR psx_syscall "(3) and " psx_syscall6 (3)
+to perform state setting system calls. Notably, this also ensures that
+.BI cap_prctlw ()
+can be used to ensure process control bits are shared over all threads
+of a single process.
+.SS capgetp() and capsetp()
 The library also supports the deprecated functions:
 .PP
 .BI "int capgetp(pid_t " pid ", cap_t " cap_d );
@@ -123,47 +282,62 @@ The library also supports the deprecated functions:
 .BR capgetp ()
 attempts to obtain the capabilities of some other process; storing the
 capabilities in a pre-allocated
-.IR cap_d . See
+.IR cap_d .
+See
 .BR cap_init ()
-for information on allocating an empty capability set. This function,
-.BR capgetp (),
-is deprecated, you should use
+for information on allocating an empty capability set. This function
+is deprecated; you should use
 .BR cap_get_pid ().
 .PP
 .BR capsetp ()
-attempts to set the capabilities of some other process(es),
-.IR pid . 
+attempts to set the capabilities of the calling process or of
+some other process(es),
+.IR pid .
+Note that setting capabilities of another process is only possible on older
+kernels that do not provide VFS support for setting file capabilities.
+See
+.BR capset (2)
+for information on which kernels provide such support.
+.PP
 If
 .I pid
 is positive it refers to a specific process;  if it is zero, it refers
-to the current process; -1 refers to all processes other than the
-current process and process '1' (typically 
+to the calling process; \-1 refers to all processes other than the
+calling process and process '1' (typically 
 .BR init (8));
 other negative values refer to the
-.I -pid
-process group.  In order to use this function, the kernel must support
-it and the current process must have
+.I \-pid
+process group.
+.PP
+In order to use this function, the kernel must support
+it and the calling process must have
 .B CAP_SETPCAP
 raised in its Effective capability set. The capabilities set in the
 target process(es) are those contained in
 .IR cap_d .
+.PP
 Kernels that support filesystem capabilities redefine the semantics of
 .B CAP_SETPCAP
-and on such systems this function will always fail for any target not
-equal to the current process.
+and on such systems,
+.BR capsetp ()
+will always fail for any target not
+equal to the calling process.
 .BR capsetp ()
 returns zero for success, and \-1 on failure.
-
-Where supported by the kernel, the function
+.PP
+On kernels where it is (was) supported,
 .BR capsetp ()
 should be used with care.  It existed, primarily, to overcome an early
 lack of support for capabilities in the filesystems supported by
-Linux.  Note that, by default, the only processes that have
+Linux.  Note that on older kernels where
+.BR capsetp ()
+could be used to set the capabilities of another process,
+the only processes that had
 .B CAP_SETPCAP
-available to them are processes started as a kernel thread.
+available to them by default were processes started as kernel threads.
 (Typically this includes
 .BR init (8),
-kflushd and kswapd). You will need to recompile the kernel to modify
+kflushd and kswapd.) A kernel recompilation was needed to modify
 this default.
 .SH EXAMPLE
 The code segment below raises the
@@ -173,8 +347,9 @@ and
 effective capabilities for the caller:
 .nf
 
+    ...
     cap_t caps;
-    cap_value_t cap_list[2];
+    const cap_value_t cap_list[2] = {CAP_FOWNER, CAP_SETFCAP};
 
     if (!CAP_IS_SUPPORTED(CAP_SETFCAP))
         /* handle error */
@@ -183,22 +358,61 @@ effective capabilities for the caller:
     if (caps == NULL)
         /* handle error */;
 
-    cap_list[0] = CAP_FOWNER;
-    cap_list[1] = CAP_SETFCAP;
-    if (cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_list, CAP_SET) == -1)
+    if (cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_list, CAP_SET) == \-1)
         /* handle error */;
 
-    if (cap_set_proc(caps) == -1)
+    if (cap_set_proc(caps) == \-1)
         /* handle error */;
 
-    if (cap_free(caps) == -1)
+    if (cap_free(caps) == \-1)
         /* handle error */;
+    ...
+
 .fi
+Alternatively, to completely drop privilege in a program launched
+setuid-root but wanting to run as a specific user ID etc. in such a
+way that neither it, nor any of its children can acquire privilege
+again:
+.nf
+
+    ...
+    uid_t nobody = 65534;
+    const gid_t groups[] = {65534};
+
+    if (cap_setgroups(groups[0], 1, groups) != 0)
+        /* handle error */;
+    if (cap_setuid(nobody) != 0)
+        /* handle error */;
+
+    /*
+     * privilege is still available here
+     */
+
+    if (cap_set_mode(CAP_MODE_NOPRIV) != 0)
+        /* handle error */
+    ...
+
+.fi
+Note, the above sequence can be performed by the
+.B capsh
+tool as follows:
+.sp
+.B sudo capsh \-\-user=nobody \-\-mode=NOPRIV \-\-print
+.sp
+where
+.B \-\-print
+displays the resulting privilege state.
 .SH "SEE ALSO"
 .BR libcap (3),
+.BR libpsx (3),
+.BR capsh (1),
 .BR cap_clear (3),
 .BR cap_copy_ext (3),
 .BR cap_from_text (3),
 .BR cap_get_file (3),
 .BR cap_init (3),
-.BR capabilities (7)
+.BR namespaces (7),
+.BR pid_namespaces (7),
+.BR user_namespaces (7),
+.BR psx_syscall (3),
+.BR capabilities (7).
diff --git a/doc/cap_get_secbits.3 b/doc/cap_get_secbits.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_get_secbits.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_iab.3 b/doc/cap_iab.3
new file mode 100644
index 0000000..3e6282d
--- /dev/null
+++ b/doc/cap_iab.3
@@ -0,0 +1,202 @@
+.TH CAP_IAB 3 "2022-10-16" "" "Linux Programmer's Manual"
+.SH NAME
+cap_iab_init, cap_iab_dup, cap_iab_get_proc, cap_iab_get_pid, \
+cap_iab_set_proc, cap_iab_to_text, cap_iab_from_text, \
+cap_iab_get_vector, cap_iab_compare, cap_iab_set_vector, \
+cap_iab_fill, cap_proc_root \- inheritable IAB tuple support functions
+.SH SYNOPSIS
+.nf
+#include <sys/capability.h>
+
+cap_iab_t cap_iab_init(void);
+cap_iab_t cap_iab_dup(cap_iab_t iab);
+cap_iab_t cap_iab_get_proc(void);
+cap_iab_t cap_iab_get_pid(pid_t pid);
+int cap_iab_set_proc(cap_iab_t iab);
+char *cap_iab_to_text(cap_iab_t iab);
+cap_iab_t cap_iab_from_text(const char *text);
+cap_flag_value_t cap_iab_get_vector(cap_iab_t iab, cap_iab_vector_t vec,
+    cap_value_t val);
+int cap_iab_compare(cap_iab_t a, cap_iab_t b);
+int cap_iab_set_vector(cap_iab_t iab, cap_iab_vector_t vec, cap_value_t val,
+    cap_flag_value_t enable);
+int cap_iab_fill(cap_iab_t iab, cap_iab_vector_t vec,
+    cap_t set, cap_flag_t flag);
+char *cap_proc_root(const char *root);
+.fi
+.sp
+Link with \fI\-lcap\fP.
+.SH "DESCRIPTION"
+The functions defined in this man page concern the three naively
+inheritable process capability vectors: Inh, Amb and Bound. This
+\fIIAB\fP 3-tuple of capability vectors, captured in type
+\fIcap_iab_t\fP combine to pass capabilities from one process to
+another through
+.BR execve (2)
+system calls. The convolution rules using the IAB tuple are a fail over
+set of rules when the executed file has no configured
+\fIfile-capabilities\fP.
+.PP
+There are some constraints enforced by the kernel with respect to the
+three components of an IAB tuple and the Permitted process capability
+flag. They are: the Inh vector is entirely equal to the process
+Inheritable flag at all times; the Amb vector contains no more
+capability values than the intersection of the Inh vector and the
+Permitted flag for the process; and the Bound (or \fIblocked\fP)
+vector is the twos-complement of the process bounding vector.
+.PP
+In some environments, it is considered desirable to \fInaively\fP
+inherit capabilities. That is pass capabilities, independent of the
+status of the executed binary, from parent to child through
+\fBexec*\fP system calls. The surviving capabilities become the
+Permitted flag for the post-exec process. This method of inheritance
+differs significantly from the handshake inheritance between a
+pre-exec* process and a file-capability bestowed executable of the
+traditional (POSIX.1e) capability mechanism.
+.PP
+The convolution rules for IAB style inheritance are: I'=I; A'=A&I;
+P'=A&I&P. Where P etc are the pre-exec values and P' etc are the
+post-exec values.
+.PP
+With an understanding of these convolution rules, we can explain how
+.BR libcap (3)
+support for the IAB tuple is managed: the IAB API.
+.PP
+.BR cap_iab_init ()
+returns an empty IAB value. That is a \fImostly-harmless\fP tuple. It
+will not block any Permitted file capabilities through exec, but it
+won't bestow any either. The returned \fIcap_iab_t\fP should be freed
+with
+.BR cap_free (3).
+.sp
+.BR cap_iab_dup ()
+returns a copy of the specified IAB value.  The returned cap_iab_t
+should be freed with
+.BR cap_free (3).
+.sp
+.BR cap_iab_get_proc ()
+returns a copy of the IAB value for the current process.  The returned
+cap_iab_t should be freed with
+.BR cap_free (3).
+.sp
+.BR cap_iab_get_pid ()
+returns a copy of the IAB value for the specified process.  The returned
+cap_iab_t should be freed with
+.BR cap_free (3).
+This function defaults to searching
+.BR /proc/ <PID> /status
+for the IAB information, but that location can be overridden using the
+.BR cap_proc_root ()
+function.
+.sp
+.BR cap_iab_set_proc ()
+can be used to set the IAB value carried by the current process. Such
+a setting will fail if the process is insufficiently capable. The
+process requires CAP_SETPCAP raised in the E flag and a superset of P
+and I values over those in the A vectors.
+.sp
+.BR cap_iab_to_text ()
+will convert an IAB tuple to a canonical text representation. The
+representation is slightly redundant but libcap will try to generate
+as short a representation as it is able.
+.sp
+.BR cap_iab_from_text ()
+generates an IAB tuple from a text string (likely generated by the
+previous function). The returned IAB tuple should be freed with
+.BR cap_free (3).
+.sp
+The text format accepted by
+.BR cap_iab_from_text ()
+is a comma separated list of capability values. Each capability is
+prefixed by nothing (or %) (Inh); ! (Bound, but think Blocked); ^
+(Amb). Or, some combination thereof.  Since the Amb vector is
+constrained to be no greater than the Inh vector, ^ is equivalent to
+%^. Further, unless B is non-zero, % can be omitted. The following are
+legal text representations: "!%cap_chown" (Bound but Inh),
+"!cap_chown,^cap_chown" (Bound, Inh+Amb). "cap_setuid,!cap_chown"
+(Inh, Bound). As noted above, this text representation is the syntax
+for the \fIpam_cap.so\fP config file.
+.sp
+.BR cap_iab_get_vector ()
+can be used to determine the specific capability value of an IAB
+vector.
+.sp
+.BR cap_iab_compare ()
+can be used to compare two cap_iab_t tuples. When the return value is
+non-zero, the macro \fBCAP_IAB_DIFFERS\fR(\fIstatus\fR, \fIvector\fR)
+evaluates to non-zero if the returned status differs in its
+.I vector
+components.
+.sp
+.BR cap_iab_set_vector ()
+can be used to set a specific vector value to the enable setting.
+.sp
+.BR cap_iab_fill ()
+can be used to wholesale copy a cap_t flag value into the vec vector
+of the IAB tuple. Copying into Amb in this way may implicitly raise Inh
+values in the IAB tuple. Similarly copying into the Inh vector may
+implicitly lower Amb values that are not present in the resulting Inh
+vector.
+.sp
+.BR cap_proc_root ()
+can be used to determine the current location queried by
+.BR cap_iab_get_pid ().
+Returned values should be released with
+.BR cap_free (3).
+If the argument to
+.BR cap_proc_root ()
+is not \fBNULL\fP, a copy of it will become the replacement for
+.BR /proc .
+Note, this function is \fInot\fP thread safe with respect to
+concurrent calls to
+.BR cap_iab_get_pid ().
+.SH "ERRORS"
+The functions returning \fIcap_iab_t\fP values or allocated memory in
+the form of a string return NULL on error.
+
+Integer return values are -1 on error and 0 on success.
+
+In the case of error consult \fIerrno\fP.
+.SH "NOTES"
+.PP
+Unlike the traditional \fIcap_t\fP capability set, the
+IAB tuple, taken together, is incompatible with filesystem capabilities
+created via tools like
+.BR setcap (8).
+That is, the Amb vector of the IAB tuple is rendered moot when an
+executable with a file capability is executed.
+.PP
+Further, there are libcap
+.BR cap_mode (3)s
+that render the Amb vector and its method of process inheritance
+disabled.
+
+.SH "HISTORY"
+The IAB format for inheritable variants of capabilities was first
+developed as the configuration syntax for the \fIpam_cap.so\fP
+Linux-PAM module in libcap-2.29. It was introduced to extend the
+\fIsimple\fP comma separated list of process Inheritable capabilities,
+that the module could besow on an authenticated process tree, to
+include enforced limits on the Bounding vector and introduce support
+for the Amibient vector of capability bits.
+
+While the Inheritable and Bounding vectors were anticipated by the
+POSIX.1e draft that introduced capabilities, the Ambient vector is a
+Linux invention, and incompatible with the POSIX.1e file capability
+model. As such, it was felt that trying to meld together all of the 5
+capability vectors into one text representation was not going to
+work. Instead the \fIpam_cap.so\fP config syntax was generalized into
+a whole set of libcap functions for bundling together all three
+naively inheritable capabilities: the IAB tuple. The support for this
+debuted in libcap-2.33.
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
+.SH "SEE ALSO"
+.BR libcap (3),
+.BR cap_launch (3),
+.BR cap_init (3),
+.BR capabilities (7)
+and
+.BR errno (3).
diff --git a/doc/cap_iab_compare.3 b/doc/cap_iab_compare.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_compare.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_dup.3 b/doc/cap_iab_dup.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_dup.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_fill.3 b/doc/cap_iab_fill.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_fill.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_from_text.3 b/doc/cap_iab_from_text.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_from_text.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_get_pid.3 b/doc/cap_iab_get_pid.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_get_pid.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_get_proc.3 b/doc/cap_iab_get_proc.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_get_proc.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_get_vector.3 b/doc/cap_iab_get_vector.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_get_vector.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_init.3 b/doc/cap_iab_init.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_init.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_set_proc.3 b/doc/cap_iab_set_proc.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_set_proc.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_set_vector.3 b/doc/cap_iab_set_vector.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_set_vector.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_iab_to_text.3 b/doc/cap_iab_to_text.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_iab_to_text.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_init.3 b/doc/cap_init.3
index f198f63..125b529 100644
--- a/doc/cap_init.3
+++ b/doc/cap_init.3
@@ -1,19 +1,19 @@
 .\"
 .\" written by Andrew Main <zefram@dcs.warwick.ac.uk>
 .\"
-.TH CAP_INIT 3 "2008-05-11" "" "Linux Programmer's Manual"
+.TH CAP_INIT 3 "2021-03-06" "" "Linux Programmer's Manual"
 .SH NAME
 cap_init, cap_free, cap_dup \- capability data object storage management
 .SH SYNOPSIS
-.B #include <sys/capability.h>
+.nf
+#include <sys/capability.h>
+
+cap_t cap_init(void);
+int cap_free(void *obj_d);
+cap_t cap_dup(cap_t cap_p);
+.fi
 .sp
-.B cap_t cap_init(void);
-.sp
-.BI "int cap_free(void *" obj_d );
-.sp
-.BI "cap_t cap_dup(cap_t " cap_p );
-.sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .SH DESCRIPTION
 The capabilities associated with a file or process are never edited
 directly.  Instead, working storage is allocated to contain a
@@ -41,7 +41,7 @@ The
 argument may identify either a
 .I cap_t
 entity, or a
-.I char *
+.I "char\ *"
 entity allocated by the
 .BR cap_to_text ()
 function.
diff --git a/doc/cap_launch.3 b/doc/cap_launch.3
new file mode 100644
index 0000000..2d186eb
--- /dev/null
+++ b/doc/cap_launch.3
@@ -0,0 +1,191 @@
+.TH CAP_LAUNCH 3 "2021-08-01" "" "Linux Programmer's Manual"
+.SH NAME
+cap_new_launcher, cap_func_launcher, cap_launcher_callback, \
+cap_launcher_set_mode, cap_launcher_set_iab, cap_launcher_set_chroot, \
+cap_launch, cap_launcher_setuid, cap_launcher_setgroups \
+\- libcap launch functionality
+.SH SYNOPSYS
+.nf
+#include <sys/capability.h>
+
+cap_launch_t cap_new_launcher(const char *arg0, const char *const *argv,
+    const char *const *envp);
+
+cap_launch_t cap_func_launcher(int (callback_fn)(void *detail));
+
+int cap_launcher_callback(cap_launch_t attr,
+    int (callback_fn)(void *detail));
+int cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor);
+cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab);
+int cap_launcher_set_chroot(cap_launch_t attr, const char *chroot);
+
+#include <sys/types.h>
+
+pid_t cap_launch(cap_launch_t attr, void *detail);
+int cap_launcher_setuid(cap_launch_t attr, uid_t uid);
+int cap_launcher_setgroups(cap_launch_t attr, gid_t gid,
+    int ngroups, const gid_t *groups);
+.fi
+.sp
+Link with \fI\-lcap\fP.
+.SH DESCRIPTION
+A launcher provides a mechanism for code to execute a callback
+function and/or a program executable in an environment with a modified
+security context. Essentially it provides a mechanism for a program to
+.BR fork (2)
+a new context from that of the main program manipulate capability and other privileged state in that
+.BR fork (2)d
+process before (optionally)
+.BR execve (2)ing
+a new program. When the application links to \fI\-lpsx\fP this support
+is needed to robustly execute the launched code without modifying the
+privilge of the whole (POSIX semantics honoring) main application.
+.PP
+A launcher is defined by one of these two functions:
+.BR cap_new_launcher ()
+or
+.BR cap_func_launcher ().
+The return value being of opaque type
+.B cap_launch_t
+a return value of NULL implies an error has occurred.
+.PP
+Once defined, a
+.B cap_launch_t
+value can be used with
+.BR cap_launch ()
+to execute that \fIlauncher\fP. In such cases, a non-negative return
+value indicates success: zero meaning success without a program being
+invoked; non-zero being equal to the process ID
+.RB ( pid_t )
+of the newly launched program.
+.PP
+A
+.B cap_launch_t
+occupies allocated memory and should be freed with
+.BR cap_free (3).
+Before being
+.BR cap_free (3)d
+a
+.B cap_value_t
+value may be reused for multiple independent launches. The
+.I detail
+argument to
+.BR cap_launch (),
+in conjunction with the launcher's callback function, can be used to
+customize the invocation of the launch. Care must be taken to leverage
+custom shared memory (see
+.BR mmap (2))
+or some other IPC to return values to the main program via
+.I detail
+since the callback and any subsequent program execution will occur
+outside the main process of the calling application. An example of
+this would be to allocate detail as follows:
+.nf
+
+   const char *args[] = { "echo", "hello", NULL };
+   cap_launch_t cmd = cap_new_launcher("/usr/bin/echo", args, NULL);
+   int *detail = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
+                      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+   cap_launcher_callback(cmd, &answer_detail_fn);
+   *detail = 41;
+   pid_t pid = cap_launch(cmd, detail);
+   printf("launcher callback set detail to %d\\n", *detail);
+   munmap(detail, sizeof(int));
+
+.fi
+.PP
+Unless modified by the callback function, the launched code will
+execute with the capability and other security context of the
+application.
+
+If the callback function returns anything other than zero, a
+.BR cap_launch ()
+will be aborted. If detail of the failure is important to the caller,
+it should be communicated via the
+.I detail
+argument.
+
+The following functions can be used to instruct the launcher to modify
+the security state of the invoked program without altering the state
+of the calling program. Such modifications must be performed prior to
+calling \fBcap_launch\fP() if they are to have the desired
+effect. Further, they are only invoked after any installed callback
+has completed. For example, one can drop or modify capabilities,
+\fIjust\fP for executing a file.
+.PP
+The following functions instruct the launcher to do some common tasks
+of this sort (note some require permitted capability bits to succeed):
+.sp
+.BR cap_launcher_callback ()
+can be used to install or replace the currently installed callback
+function of the launcher.
+.sp
+.BR cap_launcher_set_mode ()
+can be used to ensure that the libcap-mode of the launched program is
+the desired one.
+.sp
+.BR cap_launcher_set_iab ()
+This function returns the \fBcap_iab_t\fP previously associated with
+the launcher. Calling this function with an IAB value of NULL will
+configure the launcher to not set an IAB value (the default).  See
+\fBcap_iab\fP(3) for details on the IAB set. Note, the launcher is
+associated directly with the supplied \fIiab\fP value, and does not
+make a copy of it. This iab value is locked to the laucher and cannot
+be modified while associated with the launcher. Set with NULL to
+regain control over the memory associated with that IAB value,
+otherwise the IAB value will be \fBcap_free\fI()\fP'd when the
+launcher is.
+.sp
+.BR cap_launcher_set_chroot ()
+This function causes the launched program executable to be invoked
+inside a chroot \fIroot\fP directory.
+.sp
+.BR cap_launcher_setuid ()
+This function causes the launched program executable to be invoked
+with the specified user identifier (\fIuid_t\fP).
+.sp
+.BR cap_launcher_setgroups ()
+This function causes the launched program executable to be invoked
+with the specified primary and supplementary group IDs.
+.sp
+.PP
+Note, if any of the launcher enhancements made by the above functions
+should fail to take effect (typically for a lack of sufficient
+privilege), the launch will fail and return -1.
+
+.SH "ERRORS"
+A return of NULL for a
+.B cap_launch_t
+should be considered an error.
+.PP
+.BR cap_launch ()
+returns -1 in the case of an error.
+.PP
+In all such cases a return value of 0 implies success. In other cases,
+consult
+.BR errno (3)
+for further details.
+.SH "HISTORY"
+The \fBcap_launch\fP() family of functions were introduced in libcap
+2.33. It primarily addresses a complexity with \fI-lpsx\fP linked
+pthreads(7) applications that use capabilities but also honor POSIX
+semantics.
+
+Using \fI\-lcap\fP and \fI\-lpthread\fP together without the POSIX
+semantics support from \fI\-lpsx\fP introduces a subtle class of
+exposure to privilege escalation. (See
+https://sites.google.com/site/fullycapable/who-ordered-libpsx for an
+explanation.)
+.SH "SEE ALSO"
+.BR libpsx (3),
+.BR psx_syscall (3),
+.BR libcap (3),
+.BR cap_mode (3),
+.BR cap_iab (3),
+.BR capabilities (7),
+.BR errno (3),
+.BR fork (2),
+.BR mmap (2),
+.BR chroot (2),
+and
+.BR munmap (2).
diff --git a/doc/cap_launcher_callback.3 b/doc/cap_launcher_callback.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_callback.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_launcher_set_chroot.3 b/doc/cap_launcher_set_chroot.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_set_chroot.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_launcher_set_iab.3 b/doc/cap_launcher_set_iab.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_set_iab.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_launcher_set_mode.3 b/doc/cap_launcher_set_mode.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_set_mode.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_launcher_setgroups.3 b/doc/cap_launcher_setgroups.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_setgroups.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_launcher_setuid.3 b/doc/cap_launcher_setuid.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_launcher_setuid.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_max_bits.3 b/doc/cap_max_bits.3
new file mode 100644
index 0000000..db506c6
--- /dev/null
+++ b/doc/cap_max_bits.3
@@ -0,0 +1 @@
+.so man3/cap_clear.3
diff --git a/doc/cap_mode.3 b/doc/cap_mode.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_mode.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_mode_name.3 b/doc/cap_mode_name.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_mode_name.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_new_launcher.3 b/doc/cap_new_launcher.3
new file mode 100644
index 0000000..4f072fc
--- /dev/null
+++ b/doc/cap_new_launcher.3
@@ -0,0 +1 @@
+.so man3/cap_launch.3
diff --git a/doc/cap_prctl.3 b/doc/cap_prctl.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_prctl.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_prctlw.3 b/doc/cap_prctlw.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_prctlw.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_proc_root.3 b/doc/cap_proc_root.3
new file mode 100644
index 0000000..3e730b1
--- /dev/null
+++ b/doc/cap_proc_root.3
@@ -0,0 +1 @@
+.so man3/cap_iab.3
diff --git a/doc/cap_reset_ambient.3 b/doc/cap_reset_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_reset_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_ambient.3 b/doc/cap_set_ambient.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_ambient.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_mode.3 b/doc/cap_set_mode.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_mode.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_nsowner.3 b/doc/cap_set_nsowner.3
new file mode 100644
index 0000000..3970c34
--- /dev/null
+++ b/doc/cap_set_nsowner.3
@@ -0,0 +1 @@
+.so man3/cap_get_file.3
diff --git a/doc/cap_set_secbits.3 b/doc/cap_set_secbits.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_set_secbits.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_set_syscall.3 b/doc/cap_set_syscall.3
new file mode 100644
index 0000000..48a44fa
--- /dev/null
+++ b/doc/cap_set_syscall.3
@@ -0,0 +1 @@
+.so man3/libcap.3
diff --git a/doc/cap_setgroups.3 b/doc/cap_setgroups.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_setgroups.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/cap_setuid.3 b/doc/cap_setuid.3
new file mode 100644
index 0000000..65ea3e4
--- /dev/null
+++ b/doc/cap_setuid.3
@@ -0,0 +1 @@
+.so man3/cap_get_proc.3
diff --git a/doc/capability.notes b/doc/capability.md
index b1e5245..cfad4c0 100644
--- a/doc/capability.notes
+++ b/doc/capability.md
@@ -1,5 +1,12 @@
-Overview
---------
+# Notes concerning wider use of capabilities
+
+## Overview
+
+**NOTE** These notes were added to the libcap package in
+libcap-1.03. They pre-date file capability support, but fully
+anticipate it. They are some thoughts on how to restructure a system
+to better leverage capability support. I've updated them to render as
+an `.md` formatted file.
 
 As of Linux 2.2.0, the power of the superuser has been partitioned
 into a set of discrete capabilities (in other places, these
@@ -11,48 +18,46 @@ can be protected (with wrappers) or rewritten to take advantage of
 this fine grained approach to constraining the danger to your system
 from programs running as 'root'.
 
-Notes on securing your system
------------------------------
+## Notes on securing your system
 
-Adopting a role approach to system security:
+### Adopting a role approach to system security
 
-changing all of the system binaries and directories to be owned by
+Changing all of the system binaries and directories to be owned by
 some user that cannot log on. You might like to create a user with
 the name 'system' who's account is locked with a '*' password. This
 user can be made the owner of all of the system directories on your
 system and critical system binaries too.
 
-Why is this a good idea? In a simple case, the CAP_FUSER capabilty is
-required for the superuser to delete files owned by a non-root user in
-a 'sticky-bit' protected non-root owned directory. Thus, the sticky
-bit can help you protect the /lib/ directory from an compromized
+Why is this a good idea? In a simple case, the `CAP_FOWNER` capability
+is required for the superuser to delete files owned by a non-root user
+in a _sticky-bit_ protected non-root owned directory. Thus, the sticky
+bit can help you protect the `/lib/` directory from a compromized
 daemon where the directory and the files it contains are owned by the
-system user. It can be protected by using a wrapper like execcap to
-ensure that the daemon is not running with the CAP_FUSER capability...
+system user. It can be protected to ensure that the daemon is not
+running with the `CAP_FOWNER` capability...
 
-
-Limiting the damage:
+### Limiting the damage
 
 If your daemon only needs to be setuid-root in order to bind to a low
 numbered port. You should restrict it to only having access to the
-CAP_NET_BIND_SERVICE capability. Coupled with not having any files on
-the system owned by root, it becomes significantly harder for such a
-daemon to damage your system.
+`CAP_NET_BIND_SERVICE` capability. Coupled with not having any files
+on the system owned by root, it becomes significantly harder for such
+a daemon to damage your system.
 
 Note, you should think of this kind of trick as making things harder
 for a potential attacker to exploit a hole in a daemon of this
 type. Being able to bind to any privileged port is still a formidable
-privilege and can lead to difficult but 'interesting' man in the
-middle attacks -- hijack the telnet port for example and masquerade as
-the login program... Collecting passwords for another day.
-
+privilege and can lead to difficult but _interesting_
+man-in-the-middle attacks -- hijack the telnet port for example and
+masquerade as the login program... Collecting passwords for another
+day.
 
-The /proc/ filesystem:
+### The /proc/ filesystem
 
 This Linux-specific directory tree holds most of the state of the
 system in a form that can sometimes be manipulated by file
 read/writes.  Take care to ensure that the filesystem is not mounted
 with uid=0, since root (with no capabilities) would still be able to
-read sensitive files in the /proc/ tree - kcore for example.
+read sensitive files in the `/proc/` tree - `kcore` for example.
 
 [Patch is available for 2.2.1 - I just wrote it!]
diff --git a/doc/capsh.1 b/doc/capsh.1
index e68df2c..4f3aaae 100644
--- a/doc/capsh.1
+++ b/doc/capsh.1
@@ -1,7 +1,4 @@
-.\"
-.\" capsh.1 Man page added 2009-12-23 Andrew G. Morgan <morgan@kernel.org>
-.\"
-.TH CAPSH 1 "2011-04-24" "libcap 2" "User Commands"
+.TH CAPSH 1 "2021-10-22" "libcap" "User Commands"
 .SH NAME
 capsh \- capability shell wrapper
 .SH SYNOPSIS
@@ -13,27 +10,54 @@ this tool. This tool provides a handy wrapper for certain types
 of capability testing and environment creation. It also provides some
 debugging features useful for summarizing capability state.
 .SH OPTIONS
-The tool takes a number of optional arguments, acting on them in the
+.B capsh
+takes a number of optional arguments, acting on them in the
 order they are provided. They are as follows:
-.TP 22
-.B --print
+.TP
+.B \-\-help
+Display the list of commands supported by
+.BR capsh .
+.TP
+.B \-\-print
 Display prevailing capability and related state.
 .TP
-.BI -- " [args]"
+.B \-\-current
+Display prevailing capability state, 1e capabilities and IAB vector.
+.TP
+.BI \-\- " [args]"
 Execute
 .B /bin/bash
 with trailing arguments. Note, you can use
-.B -c 'command to execute'
+.B \-c 'command to execute'
 for specific commands.
 .TP
-.B ==
+.BI \-\+ " [args]"
+Uses \fBcap_launch\fP(3) to fork a child to execute the shell. When
+the child exits, \fBcapsh\fP exits with the status of the child or 1
+in the case that the child was terminated by a signal.
+.TP
+.BI == " [args]"
 Execute
 .B capsh
-again with remaining arguments. Useful for testing
+again with the remaining arguments. Useful for testing
 .BR exec ()
-behavior.
+behavior. Note, PATH is searched when the running
+.B capsh
+was found via the shell's PATH searching. If the
+.B exec
+occurs after a
+.BI \-\-chroot= /some/path
+argument the PATH located binary may not be resolve to the same binary
+as that running initially. This behavior is an intended feature as it
+can complete the chroot transition.
+.TP
+.BI =\+ " [args]"
+Uses \fBcap_launch\fP(3) to fork a child to re-execute
+\fBcapsh\fP. When this child exits, \fBcapsh\fP exits with the status
+of the child or 1 in the case that the child was terminated by a
+signal.
 .TP
-.BI --caps= cap-set
+.BI \-\-caps= cap-set
 Set the prevailing process capabilities to those specified by
 .IR cap-set .
 Where
@@ -41,70 +65,120 @@ Where
 is a text-representation of capability state as per
 .BR cap_from_text (3).
 .TP
-.BI --drop= cap-list
+.BI \-\-drop= cap-list
 Remove the listed capabilities from the prevailing bounding set. The
-capabilites are a comma separated list of capabilities as recognized
+capabilities are a comma-separated list of capabilities as recognized
 by the
 .BR cap_from_name (3)
-function. Use of this feature requires that the capsh program is
-operating with
+function. Use of this feature requires that
+.B capsh
+is operating with
 .B CAP_SETPCAP
 in its effective set.
 .TP
-.BI --inh= cap-list
+.BI \-\-inh= cap-list
 Set the inheritable set of capabilities for the current process to
 equal those provided in the comma separated list. For this action to
 succeed, the prevailing process should already have each of these
 capabilities in the union of the current inheritable and permitted
-capability sets, or the capsh program is operating with
+capability sets, or
+.B capsh
+should be operating with
 .B CAP_SETPCAP
 in its effective set.
 .TP
-.BI --user= username
+.BI \-\-user= username
 Assume the identity of the named user. That is, look up the user's
-.IR uid " and " gid
+UID and GID
 with
 .BR getpwuid (3)
 and their group memberships with
 .BR getgrouplist (3)
-and set them all.
+and set them all using
+.BR cap_setuid (3)
+and
+.BR cap_setgroups (3).
+Following this command, the effective capabilities will be cleared,
+but the permitted set will not be, so the running program is still
+privileged.
 .TP
-.BI --uid= id
+.B \-\-mode
+Display the prevailing libcap mode as guessed by the
+.BR cap_get_mode (3)
+function.
+.TP
+.BR \-\-mode= <mode>
+Force the program into a
+.BR cap_set_mode (3)
+security mode. This is a set of securebits and prevailing capability
+arrangement recommended for its pre-determined security stance.
+.TP
+.B \-\-modes
+Lists all of the libcap modes supported by
+.BR \-\-mode= <mode>.
+.TP
+.BR \-\-inmode= <mode>
+Confirm that the prevailing mode is that specified in
+.IR <mode> ,
+or exit with a status 1.
+.TP
+.BI \-\-uid= id
 Force all
-.B uid
+UID
 values to equal
 .I id
 using the
 .BR setuid (2)
-system call.
+system call. This argument may require explicit preparation of the
+effective set.
 .TP
-.BI --gid= <id>
+.BR \-\-cap\-uid= <uid>
+use the
+.BR cap_setuid (3)
+function to set the UID of the current process. This performs all
+preparations for setting the UID without dropping capabilities in the
+process. Following this command the prevailing effective capabilities
+will be lowered.
+.TP
+.BI \-\-is\-uid= <id>
+Exit with status 1 unless the current
+UID equals
+.IR <id> .
+.TP
+.BI \-\-gid= <id>
 Force all
-.B gid
+GID
 values to equal
 .I id
 using the
 .BR setgid (2)
 system call.
 .TP
-.BI --groups= <id-list>
+.BI \-\-is\-gid= <id>
+Exit with status 1 unless the current
+GIQ equals
+.IR <id> .
+.TP
+.BI \-\-groups= <gid-list>
 Set the supplementary groups to the numerical list provided. The
 groups are set with the
 .BR setgroups (2)
-system call.
+system call. See
+.B \-\-user
+for a more convenient way of doing this.
 .TP
-.BI --keep= <0|1>
+.BI \-\-keep= <0|1>
 In a non-pure capability mode, the kernel provides liberal privilege
 to the super-user. However, it is normally the case that when the
 super-user changes
-.I uid
+UID
 to some lesser user, then capabilities are dropped. For these
 situations, the kernel can permit the process to retain its
 capabilities after a
 .BR setuid (2)
 system call. This feature is known as
 .I keep-caps
-support. The way to activate it using this script is with this
+support. The way to activate it using this program is with this
 argument. Setting the value to 1 will cause
 .I keep-caps
 to be active. Setting it to 0 will cause keep-caps to deactivate for
@@ -113,13 +187,27 @@ the current process. In all cases,
 is deactivated when an
 .BR exec ()
 is performed. See
-.B --secbits
+.BR \-\-secbits and \-\-mode
 for ways to disable this feature.
 .TP
-.BI --secbits= N
-XXX - need to document this feature.
+.BI \-\-secbits= N
+Set the security-bits for the program.
+This is done using the
+.BR prctl (2)
+.B PR_SET_SECUREBITS
+operation.  The list of supported bits and their meaning can be found
+in the
+.B <sys/secbits.h>
+header file. The program will list these bits via the
+.B \-\-print
+command.  The argument is expressed as a numeric bitmask, in any of
+the formats permitted by
+.BR strtoul (3).
+An alternative to this bit-twiddling is embedded in the
+.B \-\-mode*
+commandline arguments.
 .TP
-.BI --chroot= path
+.BI \-\-chroot= /some/path
 Execute the
 .BR chroot (2)
 system call with the new root-directory (/) equal to
@@ -128,46 +216,150 @@ This operation requires
 .B CAP_SYS_CHROOT
 to be in effect.
 .TP
-.BI --forkfor= sec
+.BI \-\-forkfor= sec
+This command causes the program to fork a child process for so many
+seconds. The child will sleep that long and then exit with status
+0. The purpose of this command is to support exploring the way
+processes are killable in the face of capability changes. See the
+.B \-\-killit
+command. Only one fork can be active at a time.
+.TP
+.BI \-\-killit= sig
+This commands causes a
+.B \-\-forkfor
+child to be
+.BR kill (2)d
+with the specified signal. The command then waits for the child to exit.
+If the exit status does not match the signal being used to kill it, the
+.B capsh
+program exits with status 1.
+.TP
+.BI \-\-explain= cap_xxx
+Give a brief textual description of what privileges the specified
+capability makes available to a running program. Note, instead of
+\fIcap_xxx\fP, one can provide a decimal number and \fBcapsh\fP will
+look up the corresponding capability's description.
+.TP
+.BI \-\-shell =/full/path
+This option changes the shell that is invoked when the argument
+\fB==\fP is encountered.
 .TP
-.BI --killit= sig
+.BI \-\-strict
+This option toggles the suppression of subsequent attempts to fixup
+\fB\-\-caps=\fP and \fB\-\-inh=\fP arguments. That is, when the
+prevailing Effective flag does not contain \fBCAP_SETPCAP\fB the to be
+raised Inheritable Flag values (in strict mode) are limited to those
+in the Permitted set. The strict mode defaults to off. Supplying this
+argument an even number of times restores this default behavior.
 .TP
-.BI --decode= N
+.BI \-\-suggest= phrase
+Scan each of the textual descriptions of capabilities, known to
+\fBcapsh\fP, and display all descriptions that include \fIphrase\fP.
+.TP
+.BI \-\-decode= N
 This is a convenience feature. If you look at
 .B /proc/1/status
 there are some capability related fields of the following form:
+.nf
 
- CapInh:	0000000000000000
- CapPrm:	ffffffffffffffff
- CapEff:	fffffffffffffeff
- CapBnd:	ffffffffffffffff
+CapInh:	0000000000000000
+CapPrm:	0000003fffffffff
+CapEff:	0000003fffffffff
+CapBnd:	0000003fffffffff
+CapAmb:	0000000000000000
 
+.fi
 This option provides a quick way to decode a capability vector
-represented in this form. For example, the missing capability from
-this effective set is 0x0100. By running:
-
- capsh --decode=0x0100
-
-we observe that the missing capability is:
-.BR cap_setpcap .
+represented in this hexadecimal form.
+Here's an example that decodes the two lowest capability bits:
+.IP
+.nf
+$ \fBcapsh \-\-decode=3\fP
+0x0000000000000003=cap_chown,cap_dac_override
+.fi
 .TP
-.BI --supports= xxx
+.BI \-\-supports= xxx
 As the kernel evolves, more capabilities are added. This option can be used
 to verify the existence of a capability on the system. For example,
-.BI --supports= cap_syslog
-will cause capsh to promptly exit with a status of 1 when run on
+.BI \-\-supports= cap_syslog
+will cause
+.B capsh
+to promptly exit with a status of 1 when run on
 kernel 2.6.27.  However, when run on kernel 2.6.38 it will silently
 succeed.
 .TP
+.BI \-\-has\-p= xxx
+Exit with status 1 unless the
+.I permitted
+vector has capability
+.B xxx
+raised.
+.TP
+.B \-\-has\-ambient
+Performs a check to see if the running kernel supports ambient
+capabilities. If not,
+.B capsh
+exits with status 1.
+.TP
+.BI \-\-has\-a= xxx
+Exit with status 1 unless the
+.I ambient
+vector has capability
+.B xxx
+raised.
+.TP
+.BI \-\-has\-b= xxx
+Exit with status 1 unless the
+.I bounding
+vector has capability
+.B xxx
+in its (default) non-blocked state.
+.TP
+.BI \-\-iab= xxx
+Attempts to set the IAB tuple of inheritable capability vectors.
+The text conventions used for \fIxxx\fP are those of
+.BR cap_iab_from_text (3).
+.TP
+.BI \-\-addamb= xxx
+Adds the specified ambient capability to the running process.
+.TP
+.BI \-\-delamb= xxx
+Removes the specified ambient capability from the running process.
+.TP
+.B \-\-noamb
+Drops all ambient capabilities from the running process.
+.TP
+.B \-\-noenv
+Suppresses overriding of the HOME and USER environment variables when
+a subsequent
+.B \-\-user
+argument is processed.
+.TP
+.B \-\-quiet
+This argument is ignored unless it is the first one. If present, it
+suppresses the capsh runtime check to confirm the running libcap is
+recent enough that it can name all of the kernel supported capability
+values.
 .SH "EXIT STATUS"
-Following successful execution the tool exits with status 0. Following
-an error, the tool immediately exits with status 1.
+Following successful execution,
+.B capsh
+exits with status 0. Following
+an error,
+.B capsh
+immediately exits with status 1.
 .SH AUTHOR
 Written by Andrew G. Morgan <morgan@kernel.org>.
 .SH "REPORTING BUGS"
-Please report bugs to the author.
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
 .SH "SEE ALSO"
 .BR libcap (3),
-.BR getcap (8), setcap (8)
+.BR cap_from_text (3),
+.BR cap_iab (3)
+.BR capabilities (7),
+.BR captree (8),
+.BR getcap (8),
+.BR getpcaps (8),
 and
-.BR capabilities (7).
+.BR setcap (8).
diff --git a/doc/captree.8 b/doc/captree.8
new file mode 100644
index 0000000..86a7de3
--- /dev/null
+++ b/doc/captree.8
@@ -0,0 +1,75 @@
+.\"                                      Hey, EMACS: -*- nroff -*-
+.TH CAPTREE 8 "2022-04-11"
+.\" Please adjust this date whenever revising the manpage.
+.SH NAME
+captree \- display tree of process capabilities
+.SH SYNOPSIS
+.BR captree " [OPTIONS] "
+.RI [( pid | glob-name ") ...]"
+.SH DESCRIPTION
+.B captree
+displays the capabilities on the mentioned processes indicated by
+.IR pid " or " glob-name
+value(s) given on the command line. If no
+.I pid
+etc values are supplied,
+.IR pid =1
+is implied. A
+.I pid
+value of 0 displays all the processes known to the kernel.
+.PP
+The POSIX.1e capabilities are displayed in double quotes in the
+.BR cap_from_text (3)
+format. The IAB tuple of capabilities is displayed between square
+brackets in the text format described in
+.BR cap_iab (3).
+Note, the IAB tuple text is omitted if it contains empty A and B
+components. This is because the regular POSIX.1e text contains
+information about the Inheritable flag already. This behavior can be
+overridden with the
+.B --verbose
+command line argument.
+.PP
+Optional arguments (which must precede the list of pid|glob-name
+values):
+.TP
+.B \-\-help
+Displays usage information and exits. Note, modern Go runtimes exit
+with status 0 in this case, but older runtimes exit with status 2.
+.TP
+.BR \-\-verbose
+Displays capability sets and IAB tuples even when they are empty, or
+redundant.
+.TP
+.BI \-\-depth =n
+Displays the process tree to a depth of
+.IR n .
+Note, the default value for this parameter is 0, which implies
+infinite depth.
+.TP
+.BI \-\-colo[u]r =false
+Colo[u]rs the targeted PIDs, if stdout is a TTY, in red. This option
+defaults to true when running via a TTY. The \fB--color\fI=false\fR
+argument will suppress this color. Piping the output into some other
+program will also suppress the use of colo[u]r.
+.SH EXIT STATUS
+If the supplied target cannot be found the exit status is 1. Should an
+unrecognized option be provided, the exit status is 2. Otherwise,
+.B captree
+exits with status 0.
+.SH REPORTING BUGS
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
+.SH SEE ALSO
+.BR cap_from_text(3),
+.BR capabilities (7),
+and
+.BR cap_iab (3).
+
+There is a longer article about \fBcaptree\fP, which includes some
+examples, here:
+
+   https://sites.google.com/site/fullycapable/captree
+.SH AUTHOR
+Andrew G. Morgan <morgan@kernel.org>
diff --git a/doc/crosslink.sh b/doc/crosslink.sh
new file mode 100755
index 0000000..d701522
--- /dev/null
+++ b/doc/crosslink.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# So many cross links to maintain. Here is a script that I've used to
+# validate things at least conform to some structure:
+#
+for x in *.? ; do
+    y=$(grep -F '.so m' ${x} | awk '{print $2}' | sed -e 's/man..//')
+    if [ -z "${y}" ]; then
+	continue
+    fi
+    echo
+    echo "###########"
+    echo "${x} => ${y}"
+    grep -F "${x%.*}" "${y}"
+done
diff --git a/doc/getcap.8 b/doc/getcap.8
index 1d5ac0f..8b6d201 100644
--- a/doc/getcap.8
+++ b/doc/getcap.8
@@ -1,29 +1,41 @@
-.\"
-.\" $Id: getcap.8,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
-.\" written by Andrew Main <zefram@dcs.warwick.ac.uk>
-.\"
-.TH GETCAP 8 "12 Nov 2007"
+.\" originally written by Andrew Main <zefram@dcs.warwick.ac.uk>
+.TH GETCAP 8 "2021-08-29"
 .SH NAME
 getcap \- examine file capabilities
 .SH SYNOPSIS
-\fBgetcap\fP [-v] [-r] [-h] \fIfilename\fP [ ... ]
+\fBgetcap\fP [\-v] [\-n] [\-r] [\-h] \fIfilename\fP [ ... ]
 .SH DESCRIPTION
 .B getcap
-displays the name and capabilities of each specified
+displays the name and capabilities of each specified file.
 .SH OPTIONS
 .TP 4
-.B -r
-enables recursive search.
+.B \-h
+prints quick usage.
 .TP 4
-.B -v
-enables to display all searched entries, even if it has no file-capabilities.
+.B \-n
+prints any non-zero user namespace root user ID value
+found to be associated with
+a file's capabilities.
 .TP 4
-.B -h
-prints quick usage.
+.B \-r
+enables recursive search.
+.TP 4
+.B \-v
+display all searched entries, even if the have no file-capabilities.
 .TP 4
 .IR filename
 One file per line.
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
 .SH "SEE ALSO"
+.BR capsh (1),
 .BR cap_get_file (3),
 .BR cap_to_text (3),
-.BR setcap (8)
+.BR capabilities (7),
+.BR user_namespaces (7),
+.BR captree (8),
+.BR getpcaps (8)
+and
+.BR setcap (8).
diff --git a/doc/getpcaps.8 b/doc/getpcaps.8
new file mode 100644
index 0000000..1c59ddc
--- /dev/null
+++ b/doc/getpcaps.8
@@ -0,0 +1,59 @@
+.\"                                      Hey, EMACS: -*- nroff -*-
+.TH GETPCAPS 8 "2020-08-29"
+.\" Please adjust this date whenever revising the manpage.
+.SH NAME
+getpcaps \- display process capabilities
+.SH SYNOPSIS
+.BR getpcaps " [optional args]"
+.IR pid ...
+.SH DESCRIPTION
+.B getpcaps
+displays the capabilities on the processes indicated by the
+.I pid
+value(s) given on the command line.
+A
+.I pid
+of 0 displays the capabilities of the process that is running
+.B getpcaps
+itself.
+.PP
+The capabilities are displayed in
+the
+.BR cap_from_text (3)
+format.
+.PP
+Optional arguments:
+.TP
+.BR \-\-help " or " \-\-usage
+Displays usage information and exits.
+.TP
+.BR \-\-ugly " or " \-\-legacy
+Displays output in a somewhat ugly legacy format.
+.TP
+.B \-\-verbose
+Displays usage in a legacy-like format but not quite so ugly in modern
+default terminal fonts.
+.TP
+.B \-\-iab
+Displays IAB tuple capabilities from the process. The output format
+here is the text format described in \fBcap_iab\fR(3). Double
+quotes encase the regular process capabilities and square brackets
+encase the IAB tuple. This format is also used by \fBcaptree\fR(8).
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
+.SH SEE ALSO
+.BR capsh (1),
+.BR cap_from_text (3),
+.BR cap_iab (3),
+.BR capabilities (7),
+.BR captree (8),
+.BR getcap (8),
+and
+.BR setcap (8).
+.SH AUTHOR
+This manual page was originally written by Robert Bihlmeyer
+<robbe@debian.org>, for the Debian GNU/Linux system (but may be used
+by others).
+
diff --git a/doc/libcap.3 b/doc/libcap.3
index 0e76b4e..a91cf7e 100644
--- a/doc/libcap.3
+++ b/doc/libcap.3
@@ -1,70 +1,83 @@
-.TH LIBCAP 3 "2008-07-29" "" "Linux Programmer's Manual"
+.TH LIBCAP 3 "2022-10-16" "" "Linux Programmer's Manual"
 .SH NAME
 cap_clear, cap_clear_flag, cap_compare, cap_copy_ext, cap_copy_int, \
-cap_free, cap_from_name, cap_from_text, cap_get_fd, cap_get_file, \
-cap_get_flag, cap_get_pid, cap_get_proc, cap_set_fd, cap_set_file, \
-cap_set_flag, cap_set_proc, cap_size, cap_to_name, cap_to_text, \
-cap_get_pid, cap_dup \- capability data object manipulation
+cap_drop_bound, cap_dup, cap_fill, cap_fill_flag, cap_free, cap_from_name, \
+cap_from_text, cap_get_ambient, cap_get_bound, cap_get_fd, \
+cap_get_file, cap_get_flag, cap_get_mode, cap_get_nsowner, cap_get_pid, \
+cap_get_pid, cap_get_proc, cap_get_secbits, cap_init, cap_max_bits, \
+cap_prctl, cap_prctlw, cap_proc_root, cap_reset_ambient, \
+cap_set_ambient, cap_set_fd, cap_set_file, cap_set_flag, cap_setgroups, \
+cap_set_mode, cap_set_nsowner, cap_set_proc, cap_set_secbits, \
+cap_setuid, cap_size, cap_to_name, cap_to_text \- capability data object manipulation
 .SH SYNOPSIS
 .nf
-.B #include <sys/capability.h>
-.sp
-.BI "int cap_clear(cap_t " cap_p );
-.sp
-.BI "int cap_clear_flag(cap_t " cap_p ", cap_flag_t " flag ");"
-.sp
-.BI "int cap_compare(cap_t " cap_a ", cap_t " cap_b ");"
-.sp
-.BI "ssize_t cap_copy_ext(void *" ext_p ", cap_t " cap_p ", ssize_t " size );
-.sp
-.BI "cap_t cap_copy_int(const void *" ext_p );
-.sp
-.BI "int cap_free(void *" obj_d );
-.sp
-.BI "int cap_from_name(const char *" name ", cap_value_t *" cap_p );
-.sp
-.BI "cap_t cap_from_text(const char *" buf_p );
-.sp
-.BI "cap_t cap_get_fd(int " fd );
-.sp
-.BI "cap_t cap_get_file(const char *" path_p );
-.sp
-.BI "int cap_get_flag(cap_t " cap_p ", cap_value_t " cap ,
-.BI "                 cap_flag_t " flag ", cap_flag_value_t *" value_p ");"
-.sp
-.B #include <sys/types.h>
-.BI "cap_t cap_get_pid(pid_t " pid );
-.sp
-.B "cap_t cap_get_proc(void);"
-.sp
-.BI "int cap_set_fd(int " fd ", cap_t " caps );
-.sp
-.BI "int cap_set_file(const char *" path_p ", cap_t " cap_p );
-.sp
-.sp
-.BI "int cap_set_flag(cap_t " cap_p ", cap_flag_t " flag ", int " ncap ,
-.BI "                 const cap_value_t *" caps ", cap_flag_value_t " value ");"
-.BI "int cap_set_proc(cap_t " cap_p );
-.sp
-.BI "ssize_t cap_size(cap_t " cap_p );
-.sp
-.BI "char *cap_to_name(cap_value_t " cap );
-.sp
-.BI "char *cap_to_text(cap_t " caps ", ssize_t *" length_p );
-.sp
-.BI "cap_t cap_get_pid(pid_t " pid );
-.sp
-.BI "cap_t cap_dup(cap_t " cap_p );
+#include <sys/capability.h>
+
+int cap_clear(cap_t cap_p);
+int cap_fill(cap_t cap_p, cap_flag_t to, cap_flag_t from);
+int cap_fill_flag(cap_t cap_p, cap_flag_t to, const cap_t ref, cap_flag_t from);
+int cap_clear_flag(cap_t cap_p, cap_flag_t flag);
+int cap_compare(cap_t cap_a, cap_t cap_b);
+ssize_t cap_copy_ext(void *ext_p, cap_t cap_p, ssize_t size);
+cap_t cap_copy_int(const void *ext_p);
+int cap_free(void *obj_d);
+int cap_from_name(const char *name, cap_value_t *cap_p);
+cap_t cap_from_text(const char *buf_p);
+cap_t cap_get_fd(int fd);
+cap_t cap_get_file(const char *path_p);
+int cap_get_flag(cap_t cap_p, cap_value_t cap ,
+                 cap_flag_t flag, cap_flag_value_t *value_p);
+cap_value_t cap_max_bits();
+
+#include <sys/types.h>
+
+cap_t cap_get_pid(pid_t pid);
+cap_t cap_get_proc(void);
+int cap_set_fd(int fd, cap_t caps);
+int cap_set_file(const char *path_p, cap_t cap_p);
+int cap_set_flag(cap_t cap_p, cap_flag_t flag, int ncap ,
+                 const cap_value_t *caps, cap_flag_value_t value);
+int cap_set_proc(cap_t cap_p);
+ssize_t cap_size(cap_t cap_p);
+char *cap_to_name(cap_value_t cap);
+char *cap_to_text(cap_t caps, ssize_t *length_p);
+cap_t cap_get_pid(pid_t pid);
+cap_t cap_init();
+cap_t cap_dup(cap_t cap_p);
+
+char *cap_proc_root(const char *root);
+int cap_get_nsowner(cap_t cap_p);
+int cap_set_nsowner(cap_t cap_p, uid_t rootuid);
+int cap_get_bound(cap_value_t cap);
+int cap_drop_bound(cap_value_t cap);
+int cap_get_ambient(cap_value_t cap);
+int cap_set_ambient(cap_value_t cap, cap_flag_value_t value);
+int cap_reset_ambient(void);
+int cap_set_mode(cap_mode_t flavor);
+cap_mode_t cap_get_mode(void);
+const char *cap_mode_name(cap_mode_t flavor);
+unsigned cap_get_secbits();
+int cap_set_secbits(unsigned bits);
+int cap_prctl(long int pr_cmd, long int arg1, long int arg2, long int arg3,
+              long int arg4, long int arg5);
+int cap_prctlw(long int pr_cmd, long int arg1, long int arg2, long int arg3,
+               long int arg4, long int arg5);
+int cap_setuid(uid_t uid);
+int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[]);
+.fi
 .sp
-Link with \fI-lcap\fP.
+Link with \fI\-lcap\fP.
 .fi
 .SH DESCRIPTION
-These functions work on a capability state held in working storage.
+These primary functions work on a capability state held in working
+storage and attempt to complete the POSIX.1e (draft) user space API
+for Capability based privilege.
+.PP
 A
 .I cap_t
 holds information about the capabilities in each of the three sets,
-Permitted, Inheritable, and Effective.
-Each capability in a set may be clear (disabled, 0) or set (enabled, 1).
+Permitted, Inheritable, and Effective.  Each capability in a set may
+be clear (disabled, 0) or set (enabled, 1).
 .PP
 These functions work with the following data types:
 .TP 18
@@ -97,18 +110,89 @@ is set appropriately.
 These functions are as per the withdrawn POSIX.1e draft specification.
 The following functions are Linux extensions:
 .BR cap_clear_flag (),
-.BR cap_compare (),
+.BR cap_drop_bound (),
+.BR cap_fill (),
+.BR cap_fill_flag (),
 .BR cap_from_name (),
-.BR cap_to_name (),
+.BR cap_get_ambient (),
+.BR cap_get_bound (),
+.BR cap_get_mode (),
+.BR cap_get_nsowner (),
+.BR cap_get_secbits (),
+.BR cap_mode_name (),
+.BR cap_proc_root (),
+.BR cap_prctl (),
+.BR cap_prctlw (),
+.BR cap_reset_ambient (),
+.BR cap_setgroups (),
+.BR cap_setuid (),
+.BR cap_set_ambient (),
+.BR cap_set_mode (),
+.BR cap_set_nsowner (),
+.BR cap_set_secbits (),
+.BR cap_to_name ()
 and
 .BR cap_compare ().
+.PP
+A Linux, \fIIAB\fP, extension of Inheritable, Bounding and Ambient
+tuple capability vectors are also supported by \fBlibcap\fP. Those
+functions are described in a companion man page:
+.BR cap_iab (3).
+Further, for managing the complexity of launching a sub-process,
+\fBlibcap\fP supports the abstraction:
+.BR cap_launch (3).
+.PP
+In addition to the \fBcap_\fP prefixed \fBlibcap\fP API, the library
+also provides prototypes for the Linux system calls that provide the
+native API for process capabilities. These prototypes are:
+.sp
+.nf
+int capget(cap_user_header_t header, cap_user_data_t data);
+int capset(cap_user_header_t header, const cap_user_data_t data);
+.fi
+.sp
+Further, \fBlibcap\fP provides a set-up function,
+.sp
+.nf
+void cap_set_syscall(
+        long int (*new_syscall)(long int, long int, long int, long int),
+        long int (*new_syscall6)(long int,
+                                 long int, long int, long int,
+                                 long int, long int, long int));
+.fi
+.sp
+which can be used to redirect its use of the
+.BR capset ()
+and other system calls that write kernel managed state. This is
+especially useful when supporting POSIX semantics for security
+state. When a program is linked against
+.BR libpsx (3)
+as described in that man page, this function is used to connect
+\fBlibcap\fP to POSIX semantics system calls.
+.SH "REPORTING BUGS"
+The
+.B libcap
+library is distributed from
+https://sites.google.com/site/fullycapable/ where the release notes
+may already cover recent issues.  Please report newly discovered bugs
+via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
 .SH "SEE ALSO"
 .BR cap_clear (3),
 .BR cap_copy_ext (3),
 .BR cap_from_text (3),
 .BR cap_get_file (3),
 .BR cap_get_proc (3),
+.BR cap_iab (3),
 .BR cap_init (3),
+.BR cap_launch (3),
 .BR capabilities (7),
-.BR getpid (2)
-.BR capsh (1)
+.BR getpid (2),
+.BR capsh (1),
+.BR captree (8),
+.BR getcap (8),
+.BR getpcaps (8),
+.BR setcap (8)
+and
+.BR libpsx (3).
diff --git a/doc/libpsx.3 b/doc/libpsx.3
new file mode 100644
index 0000000..4a0b5b6
--- /dev/null
+++ b/doc/libpsx.3
@@ -0,0 +1,133 @@
+.TH LIBPSX 3 "2021-12-12" "" "Linux Programmer's Manual"
+.SH NAME
+psx_syscall3, psx_syscall6, psx_set_sensitivity \- POSIX semantics for system calls
+.SH SYNOPSIS
+.nf
+#include <sys/psx_syscall.h>
+
+long int psx_syscall3(long int syscall_nr,
+                      long int arg1, long int arg2, long int arg3);
+long int psx_syscall6(long int syscall_nr,
+                      long int arg1, long int arg2, long int arg3,
+                      long int arg4, long int arg5, long int arg6);
+int psx_set_sensitivity(psx_sensitivity_t sensitivity);
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+                                    long int, long int, long int),
+                       long int (**syscall6_fn)(long int,
+                                    long int, long int, long int,
+                                    long int, long int, long int));
+.fi
+.sp
+Link with one of these:
+.sp
+.I   ld ... \-lpsx \-lpthread \-\-wrap=pthread_create
+.sp
+.I   gcc ... \-lpsx \-lpthread \-Wl,\-wrap,pthread_create
+.SH DESCRIPTION
+The
+.B libpsx
+library attempts to fill a gap left by the
+.BR pthreads (7)
+implementation on Linux. To be compliant POSIX threads, via the
+.BR nptl "(7) " setxid
+mechanism, glibc maintains consistent UID and GID credentials amongst
+all of the threads associated with the current process. However, other
+credential state is not supported by this abstraction. To support
+these extended kernel managed security attributes,
+.B libpsx
+provides a more generic pair of wrapping system call functions:
+.BR psx_syscall3 "() and " psx_syscall6 ().
+Like the
+.B setxid
+mechanism, the coordination of thread state is mediated by a realtime
+signal. Whereas the
+.B nptl:setxid
+mechanism uses signo=33 (which is hidden by glibc below a redefined
+.BR SIGRTMIN "), " libpsx
+inserts itself in the
+.B SIGSYS
+handler stack. It goes to great length to be the first such handler
+but acts as a pass-through for other
+.B SIGSYS
+uses.
+.PP
+A linker trick of
+.I wrapping
+the
+.BR pthread_create ()
+call with a psx thread registration function is used to ensure
+.B libpsx
+can keep track of all pthreads.
+.PP
+An inefficient macrology trick supports the
+.BR psx_syscall ()
+pseudo function which takes 1 to 7 arguments, depending on the needs
+of the caller. The macrology (which ultimately invokes
+.BR __psx_syscall ())
+pads out the call to actually use
+.BR psx_syscall3 ()
+or
+.BR psx_syscall6 ()
+with zeros filling the missing arguments. While using this in source
+code will make it appear clean, the actual code footprint is
+larger. You are encouraged to use the more explicit
+.BR psx_syscall3 ()
+and
+.BR psx_syscall6 ()
+functions as needed.
+.PP
+.BR psx_set_sensitivity ()
+changes the behavior of the mirrored system calls:
+.B PSX_IGNORE
+ensures that differences are ignored (the default behavior);
+.B PSX_WARNING
+prints a stderr notification about how the results differ; and
+.B PSX_ERROR
+prints the error details and generates a
+.B SIGSYS
+signal.
+.PP
+.BR psx_load_syscalls ()
+can be used to set caller defined function pointers for invoking 3 and
+6 argument syscalls. This function can be used to configure a library,
+or program to change behavior when linked against
+.BR libpsx .
+Indeed,
+.B libcap
+uses this function from
+.B libpsx
+to override its thread scoped default system call based API. When
+linked with
+.BR libpsx ", " libcap
+can operate on all the threads of a multithreaded program to operate
+with POSIX semantics.
+.SH RETURN VALUE
+The return value for system call functions is generally the value
+returned by the kernel, or \-1 in the case of an error. In such cases
+.BR errno (3)
+is set to the detailed error value. The
+.BR psx_syscall3 "() and " psx_syscall6 ()
+functions attempt a single threaded system call and return immediately
+in the case of an error. Should this call succeed, then the same
+system calls are executed from a signal handler on each of the other
+threads of the process.
+.SH CONFORMING TO
+The needs of
+.BR libcap (3)
+for POSIX semantics of capability manipulation. You can read more
+about why this is needed here:
+.TP
+https://sites.google.com/site/fullycapable/who-ordered-libpsx
+.SH "REPORTING BUGS"
+The
+.B libpsx
+library is distributed from
+https://sites.google.com/site/fullycapable/ where the release notes
+may already cover recent issues.  Please report newly discovered bugs
+via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
+.SH SEE ALSO
+.BR libcap (3),
+.BR pthreads "(7) and"
+.BR nptl (7).
diff --git a/doc/md2html.lua b/doc/md2html.lua
new file mode 100644
index 0000000..c2677d8
--- /dev/null
+++ b/doc/md2html.lua
@@ -0,0 +1,6 @@
+-- This is the links-to-html.lua example from stackoverflow:
+-- https://stackoverflow.com/questions/40993488/convert-markdown-links-to-html-with-pandoc
+function Link(el)
+  el.target = string.gsub(el.target, "%.md", ".html")
+  return el
+end
diff --git a/doc/mkmd.sh b/doc/mkmd.sh
new file mode 100755
index 0000000..ce8baa2
--- /dev/null
+++ b/doc/mkmd.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# Handy script to rebuild the markdown version of the man pages.
+# This uses pandoc if it is installed.
+#
+# For rendering the md, we can use a different command:
+#
+#   cd md; for x in *.md ; do pandoc -s $x --metadata pagetitle="${x%.md}" -o ${x%.md}.html --lua-filter=../md2html.lua ; done
+
+if [[ -z "$(which pandoc)" ]]; then
+    echo "pandoc not found - skipping conversion"
+    exit 0
+fi
+
+outdir="$1"
+if [[ -z "${outdir}" ]]; then
+    echo "usage $0 <outdir>"
+    exit 1
+fi
+
+mkdir -p "${outdir}"
+if [[ $? -ne 0 ]]; then
+    echo "failed to make output directory: ${outdir}"
+    exit 1
+fi
+
+index="${outdir}/index.md"
+
+function do_page () {
+    m="$1"
+    base="${m%.*}"
+    sect="${m#*.}"
+    output="${base}-${sect}.md"
+
+    echo "converting ${m}" 1>&2
+
+    redir="$(grep '^.so man' "${m}")"
+    if [[ $? -eq 0 ]]; then
+	r="${redir#*/}"
+	rbase="${r%.*}"
+	rsect="${r#*.}"
+	echo "* [${base}(${sect})](${rbase}-${rsect}.md)" >> "${index}"
+	return
+    fi
+
+    pandoc -f man -t markdown < "${m}" | sed 's/\*\*\([^*]\+\)\*\*(\([138]\+\))/[\1(\2)](\1-\2.md)/g' > "${outdir}/${base}-${sect}.md"
+    echo "* [${base}(${sect})](${base}-${sect}.md)" >> "${index}"
+}
+
+cat > "${index}" <<EOF
+# Manpages for libcap and libpsx
+
+EOF
+
+if [[ -f "local-md.preamble" ]]; then
+    cat "local-md.preamble" >> "${index}"
+fi
+
+cat >> "${index}" <<EOF
+
+## Individual reference pages
+EOF
+
+# Assumes the m's are listed alphabetically.
+for n in 1 3 8 ; do
+	cat >> "${index}" <<EOF
+
+### Section ${n}
+
+EOF
+    for m in *.${n}; do
+	do_page "${m}"
+    done
+done
+
+cat >> "${index}" <<EOF
+
+## More information
+
+EOF
+
+if [[ -f "local-md.postscript" ]]; then
+    cat "local-md.postscript" >> "${index}"
+fi
+
+cat >> "${index}" <<EOF
+
+For further information, see the
+[FullyCapable](https://sites.google.com/site/fullycapable/) homepage
+for libcap.
+
+## MD page generation
+
+These official man pages for libcap and libpsx were converted to
+markdown using [pandoc](https://pandoc.org).
+
+EOF
diff --git a/doc/old/_setfilecap.2 b/doc/old/_setfilecap.2
index 6a0538c..3c9e374 100644
--- a/doc/old/_setfilecap.2
+++ b/doc/old/_setfilecap.2
@@ -93,7 +93,7 @@ Search permission is denied on a component of the path prefix.
 .TP
 .SB ELOOP
 .I filename
-containes a circular reference (via symlinks).
+contains a circular reference (via symlinks).
 .TP
 .SB EBADF
 .I fd
diff --git a/doc/psx_load_syscalls.3 b/doc/psx_load_syscalls.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_load_syscalls.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_set_sensitivity.3 b/doc/psx_set_sensitivity.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_set_sensitivity.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_syscall.3 b/doc/psx_syscall.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_syscall3.3 b/doc/psx_syscall3.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall3.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/psx_syscall6.3 b/doc/psx_syscall6.3
new file mode 100644
index 0000000..663420c
--- /dev/null
+++ b/doc/psx_syscall6.3
@@ -0,0 +1 @@
+.so man3/libpsx.3
diff --git a/doc/setcap.8 b/doc/setcap.8
index 9344ba5..d652076 100644
--- a/doc/setcap.8
+++ b/doc/setcap.8
@@ -1,49 +1,67 @@
-.\"
-.\" $Id: setcap.8,v 1.1.1.1 1999/04/17 22:16:31 morgan Exp $
-.\"
-.TH SETCAP 8 "24th October 2008"
+.TH SETCAP 8 "2020-08-29"
 .SH NAME
 setcap \- set file capabilities
 .SH SYNOPSIS
-\fBsetcap\fP [-q] [-v] (\fIcapabilities|-|-r) filename\fP [ ... \fIcapabilitiesN\fP \fIfileN\fP ]
+\fBsetcap\fP [\-q] [\-n <rootuid>] [\-v] {\fIcapabilities|\-|\-r} filename\fP [ ... \fIcapabilitiesN\fP \fIfileN\fP ]
 .SH DESCRIPTION
 In the absence of the
-.B -v
+.B \-v
 (verify) option
 .B setcap
 sets the capabilities of each specified
 .I filename
 to the
 .I capabilities
-specified.  The
-.B -v
+specified.  The optional
+.B \-n <rootuid>
+argument can be used to set the file capability for use only in a
+user namespace with this root user ID owner. The
+.B \-v
 option is used to verify that the specified capabilities are currently
-associated with the file.
+associated with the file. If \-v and \-n are supplied, the
+.B \-n <rootuid>
+argument is also verified.
 .PP
 The
 .I capabilities
 are specified in the form described in
-.IR cap_from_text (3).
+.BR cap_from_text (3).
 .PP
 The special capability string,
-.BR '-' ,
+.BR '\-' ,
 can be used to indicate that capabilities are read from the standard
 input. In such cases, the capability set is terminated with a blank
 line.
 .PP
 The special capability string,
-.BR '-r' ,
-is used to remove a capability set from a file.
+.BR '\-r' ,
+is used to remove a capability set from a file. Note, setting an empty
+capability set is
+.B not the same
+as removing it. An empty set can be used to guarantee a file is not
+executed with privilege in spite of the fact that the prevailing
+ambient+inheritable sets would otherwise bestow capabilities on
+executed binaries.
 .PP
 The
-.B -q
+.B \-q
 flag is used to make the program less verbose in its output.
 .SH "EXIT CODE"
 The
 .B setcap
 program will exit with a 0 exit code if successful. On failure, the
 exit code is 1.
+.SH "REPORTING BUGS"
+Please report bugs via:
+.TP
+https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1090757
 .SH "SEE ALSO"
+.BR capsh (1),
 .BR cap_from_text (3),
-.BR cap_set_file (3),
+.BR cap_get_file (3),
+.BR capabilities (7),
+.BR user_namespaces (7),
+.BR captree (8),
 .BR getcap (8)
+and
+.BR getpcaps (8).
diff --git a/doc/values/0.txt b/doc/values/0.txt
new file mode 100644
index 0000000..dd2f360
--- /dev/null
+++ b/doc/values/0.txt
@@ -0,0 +1,2 @@
+Allows a process to arbitrarily change the user and
+group ownership of a file.
diff --git a/doc/values/1.txt b/doc/values/1.txt
new file mode 100644
index 0000000..a0e7f72
--- /dev/null
+++ b/doc/values/1.txt
@@ -0,0 +1,5 @@
+Allows a process to override of all Discretionary
+Access Control (DAC) access, including ACL execute
+access. That is read, write or execute files that the
+process would otherwise not have access to. This
+excludes DAC access covered by CAP_LINUX_IMMUTABLE.
diff --git a/doc/values/10.txt b/doc/values/10.txt
new file mode 100644
index 0000000..8335a6b
--- /dev/null
+++ b/doc/values/10.txt
@@ -0,0 +1,3 @@
+Allows a process to bind to privileged ports:
+  - TCP/UDP sockets below 1024
+  - ATM VCIs below 32
diff --git a/doc/values/11.txt b/doc/values/11.txt
new file mode 100644
index 0000000..6f63994
--- /dev/null
+++ b/doc/values/11.txt
@@ -0,0 +1,2 @@
+Allows a process to broadcast to the network and to
+listen to multicast.
diff --git a/doc/values/12.txt b/doc/values/12.txt
new file mode 100644
index 0000000..f4dc172
--- /dev/null
+++ b/doc/values/12.txt
@@ -0,0 +1,17 @@
+Allows a process to perform network configuration
+operations:
+  - interface configuration
+  - administration of IP firewall, masquerading and
+    accounting
+  - setting debug options on sockets
+  - modification of routing tables
+  - setting arbitrary process, and process group
+    ownership on sockets
+  - binding to any address for transparent proxying
+    (this is also allowed via CAP_NET_RAW)
+  - setting TOS (Type of service)
+  - setting promiscuous mode
+  - clearing driver statistics
+  - multicasing
+  - read/write of device-specific registers
+  - activation of ATM control sockets
diff --git a/doc/values/13.txt b/doc/values/13.txt
new file mode 100644
index 0000000..7a1faf7
--- /dev/null
+++ b/doc/values/13.txt
@@ -0,0 +1,5 @@
+Allows a process to use raw networking:
+  - RAW sockets
+  - PACKET sockets
+  - binding to any address for transparent proxying
+    (also permitted via CAP_NET_ADMIN)
diff --git a/doc/values/14.txt b/doc/values/14.txt
new file mode 100644
index 0000000..1f248d6
--- /dev/null
+++ b/doc/values/14.txt
@@ -0,0 +1,3 @@
+Allows a process to lock shared memory segments for IPC
+purposes.  Also enables mlock and mlockall system
+calls.
diff --git a/doc/values/15.txt b/doc/values/15.txt
new file mode 100644
index 0000000..0f5e13c
--- /dev/null
+++ b/doc/values/15.txt
@@ -0,0 +1 @@
+Allows a process to override IPC ownership checks.
diff --git a/doc/values/16.txt b/doc/values/16.txt
new file mode 100644
index 0000000..03373b0
--- /dev/null
+++ b/doc/values/16.txt
@@ -0,0 +1,3 @@
+Allows a process to initiate the loading and unloading
+of kernel modules. This capability can effectively
+modify kernel without limit.
diff --git a/doc/values/17.txt b/doc/values/17.txt
new file mode 100644
index 0000000..79474af
--- /dev/null
+++ b/doc/values/17.txt
@@ -0,0 +1,4 @@
+Allows a process to perform raw IO:
+  - permit ioper/iopl access
+  - permit sending USB messages to any device via
+    /dev/bus/usb
diff --git a/doc/values/18.txt b/doc/values/18.txt
new file mode 100644
index 0000000..2ee0e2a
--- /dev/null
+++ b/doc/values/18.txt
@@ -0,0 +1,3 @@
+Allows a process to perform a chroot syscall to change
+the effective root of the process' file system:
+redirect to directory "/" to some other location.
diff --git a/doc/values/19.txt b/doc/values/19.txt
new file mode 100644
index 0000000..2861571
--- /dev/null
+++ b/doc/values/19.txt
@@ -0,0 +1,2 @@
+Allows a process to perform a ptrace() of any other
+process.
diff --git a/doc/values/2.txt b/doc/values/2.txt
new file mode 100644
index 0000000..99f0031
--- /dev/null
+++ b/doc/values/2.txt
@@ -0,0 +1,4 @@
+Allows a process to override all DAC restrictions
+limiting the read and search of files and
+directories. This excludes DAC access covered by
+CAP_LINUX_IMMUTABLE.
diff --git a/doc/values/20.txt b/doc/values/20.txt
new file mode 100644
index 0000000..3f5796f
--- /dev/null
+++ b/doc/values/20.txt
@@ -0,0 +1 @@
+Allows a process to configure process accounting.
diff --git a/doc/values/21.txt b/doc/values/21.txt
new file mode 100644
index 0000000..4cff57d
--- /dev/null
+++ b/doc/values/21.txt
@@ -0,0 +1,43 @@
+Allows a process to perform a somewhat arbitrary
+grab-bag of privileged operations. Over time, this
+capability should weaken as specific capabilities are
+created for subsets of CAP_SYS_ADMINs functionality:
+  - configuration of the secure attention key
+  - administration of the random device
+  - examination and configuration of disk quotas
+  - setting the domainname
+  - setting the hostname
+  - calling bdflush()
+  - mount() and umount(), setting up new SMB connection
+  - some autofs root ioctls
+  - nfsservctl
+  - VM86_REQUEST_IRQ
+  - to read/write pci config on alpha
+  - irix_prctl on mips (setstacksize)
+  - flushing all cache on m68k (sys_cacheflush)
+  - removing semaphores
+  - Used instead of CAP_CHOWN to "chown" IPC message
+    queues, semaphores and shared memory
+  - locking/unlocking of shared memory segment
+  - turning swap on/off
+  - forged pids on socket credentials passing
+  - setting readahead and flushing buffers on block
+    devices
+  - setting geometry in floppy driver
+  - turning DMA on/off in xd driver
+  - administration of md devices (mostly the above, but
+    some extra ioctls)
+  - tuning the ide driver
+  - access to the nvram device
+  - administration of apm_bios, serial and bttv (TV)
+    device
+  - manufacturer commands in isdn CAPI support driver
+  - reading non-standardized portions of PCI
+    configuration space
+  - DDI debug ioctl on sbpcd driver
+  - setting up serial ports
+  - sending raw qic-117 commands
+  - enabling/disabling tagged queuing on SCSI
+    controllers and sending arbitrary SCSI commands
+  - setting encryption key on loopback filesystem
+  - setting zone reclaim policy
diff --git a/doc/values/22.txt b/doc/values/22.txt
new file mode 100644
index 0000000..9380ceb
--- /dev/null
+++ b/doc/values/22.txt
@@ -0,0 +1 @@
+Allows a process to initiate a reboot of the system.
diff --git a/doc/values/23.txt b/doc/values/23.txt
new file mode 100644
index 0000000..c5a0360
--- /dev/null
+++ b/doc/values/23.txt
@@ -0,0 +1,6 @@
+Allows a process to maipulate the execution priorities
+of arbitrary processes:
+  - those involving different UIDs
+  - setting their CPU affinity
+  - alter the FIFO vs. round-robin (realtime)
+    scheduling for itself and other processes.
diff --git a/doc/values/24.txt b/doc/values/24.txt
new file mode 100644
index 0000000..4911e50
--- /dev/null
+++ b/doc/values/24.txt
@@ -0,0 +1,14 @@
+Allows a process to adjust resource related parameters
+of processes and the system:
+  - set and override resource limits
+  - override quota limits
+  - override the reserved space on ext2 filesystem
+    (this can also be achieved via CAP_FSETID)
+  - modify the data journaling mode on ext3 filesystem,
+    which uses journaling resources
+  - override size restrictions on IPC message queues
+  - configure more than 64Hz interrupts from the
+    real-time clock
+  - override the maximum number of consoles for console
+    allocation
+  - override the maximum number of keymaps
diff --git a/doc/values/25.txt b/doc/values/25.txt
new file mode 100644
index 0000000..95fd513
--- /dev/null
+++ b/doc/values/25.txt
@@ -0,0 +1,4 @@
+Allows a process to perform time manipulation of clocks:
+  - alter the system clock
+  - enable irix_stime on MIPS
+  - set the real-time clock
diff --git a/doc/values/26.txt b/doc/values/26.txt
new file mode 100644
index 0000000..ee446ba
--- /dev/null
+++ b/doc/values/26.txt
@@ -0,0 +1,3 @@
+Allows a process to manipulate tty devices:
+  - configure tty devices
+  - perform vhangup() of a tty
diff --git a/doc/values/27.txt b/doc/values/27.txt
new file mode 100644
index 0000000..0894164
--- /dev/null
+++ b/doc/values/27.txt
@@ -0,0 +1,2 @@
+Allows a process to perform privileged operations with
+the mknod() system call.
diff --git a/doc/values/28.txt b/doc/values/28.txt
new file mode 100644
index 0000000..fd0b6b9
--- /dev/null
+++ b/doc/values/28.txt
@@ -0,0 +1 @@
+Allows a process to take leases on files.
diff --git a/doc/values/29.txt b/doc/values/29.txt
new file mode 100644
index 0000000..ca1fdb8
--- /dev/null
+++ b/doc/values/29.txt
@@ -0,0 +1,2 @@
+Allows a process to write to the audit log via a
+unicast netlink socket.
diff --git a/doc/values/3.txt b/doc/values/3.txt
new file mode 100644
index 0000000..2d68efd
--- /dev/null
+++ b/doc/values/3.txt
@@ -0,0 +1,8 @@
+Allows a process to perform operations on files, even
+where file owner ID should otherwise need be equal to
+the UID, except where CAP_FSETID is applicable. It
+doesn't override MAC and DAC restrictions.
+
+This capability permits the deletion of a file owned
+by another UID in a directory protected by the sticky
+(t) bit.
diff --git a/doc/values/30.txt b/doc/values/30.txt
new file mode 100644
index 0000000..d1ef942
--- /dev/null
+++ b/doc/values/30.txt
@@ -0,0 +1,2 @@
+Allows a process to configure audit logging via a
+unicast netlink socket.
diff --git a/doc/values/31.txt b/doc/values/31.txt
new file mode 100644
index 0000000..ae97df2
--- /dev/null
+++ b/doc/values/31.txt
@@ -0,0 +1,6 @@
+Allows a process to set capabilities on files.
+Permits a process to uid_map the uid=0 of the
+parent user namespace into that of the child
+namespace. Also, permits a process to override
+securebits locks through user namespace
+creation.
diff --git a/doc/values/32.txt b/doc/values/32.txt
new file mode 100644
index 0000000..9c261d8
--- /dev/null
+++ b/doc/values/32.txt
@@ -0,0 +1,4 @@
+Allows a process to override Manditory Access Control
+(MAC) access. Not all kernels are configured with a MAC
+mechanism, but this is the capability reserved for
+overriding them.
diff --git a/doc/values/33.txt b/doc/values/33.txt
new file mode 100644
index 0000000..a4e441e
--- /dev/null
+++ b/doc/values/33.txt
@@ -0,0 +1,4 @@
+Allows a process to configure the Mandatory Access
+Control (MAC) policy. Not all kernels are configured
+with a MAC enabled, but if they are this capability is
+reserved for code to perform administration tasks.
diff --git a/doc/values/34.txt b/doc/values/34.txt
new file mode 100644
index 0000000..9728790
--- /dev/null
+++ b/doc/values/34.txt
@@ -0,0 +1,2 @@
+Allows a process to configure the kernel's syslog
+(printk) behavior.
diff --git a/doc/values/35.txt b/doc/values/35.txt
new file mode 100644
index 0000000..8ce5a17
--- /dev/null
+++ b/doc/values/35.txt
@@ -0,0 +1,2 @@
+Allows a process to trigger something that can wake the
+system up.
diff --git a/doc/values/36.txt b/doc/values/36.txt
new file mode 100644
index 0000000..7088ba6
--- /dev/null
+++ b/doc/values/36.txt
@@ -0,0 +1,2 @@
+Allows a process to block system suspends - prevent the
+system from entering a lower power state.
diff --git a/doc/values/37.txt b/doc/values/37.txt
new file mode 100644
index 0000000..fff9f60
--- /dev/null
+++ b/doc/values/37.txt
@@ -0,0 +1,2 @@
+Allows a process to read the audit log via a multicast
+netlink socket.
diff --git a/doc/values/38.txt b/doc/values/38.txt
new file mode 100644
index 0000000..f75db74
--- /dev/null
+++ b/doc/values/38.txt
@@ -0,0 +1,4 @@
+Allows a process to enable observability of privileged
+operations related to performance. The mechanisms
+include perf_events, i915_perf and other kernel
+subsystems.
diff --git a/doc/values/39.txt b/doc/values/39.txt
new file mode 100644
index 0000000..d05a5c6
--- /dev/null
+++ b/doc/values/39.txt
@@ -0,0 +1,33 @@
+Allows a process to manipulate aspects of the kernel
+enhanced Berkeley Packet Filter (BPF) system. This is
+an execution subsystem of the kernel, that manages BPF
+programs. CAP_BPF permits a process to:
+  - create all types of BPF maps
+  - advanced verifier features:
+    - indirect variable access
+    - bounded loops
+    - BPF to BPF function calls
+    - scalar precision tracking
+    - larger complexity limits
+    - dead code elimination
+    - potentially other features
+
+Other capabilities can be used together with CAP_BFP to
+further manipulate the BPF system:
+  - CAP_PERFMON relaxes the verifier checks as follows:
+    - BPF programs can use pointer-to-integer
+      conversions
+    - speculation attack hardening measures can be
+      bypassed
+    - bpf_probe_read to read arbitrary kernel memory is
+      permitted
+    - bpf_trace_printk to print the content of kernel
+      memory
+  - CAP_SYS_ADMIN permits the following:
+    - use of bpf_probe_write_user
+    - iteration over the system-wide loaded programs,
+      maps, links BTFs and convert their IDs to file
+      descriptors.
+  - CAP_PERFMON is required to load tracing programs.
+  - CAP_NET_ADMIN is required to load networking
+    programs.
diff --git a/doc/values/4.txt b/doc/values/4.txt
new file mode 100644
index 0000000..5797cf8
--- /dev/null
+++ b/doc/values/4.txt
@@ -0,0 +1,4 @@
+Allows a process to set the S_ISUID and S_ISUID bits of
+the file permissions, even when the process' effective
+UID or GID/supplementary GIDs do not match that of the
+file.
diff --git a/doc/values/40.txt b/doc/values/40.txt
new file mode 100644
index 0000000..c5993cf
--- /dev/null
+++ b/doc/values/40.txt
@@ -0,0 +1,4 @@
+Allows a process to perform checkpoint
+and restore operations. Also permits
+explicit PID control via clone3() and
+also writing to ns_last_pid.
diff --git a/doc/values/5.txt b/doc/values/5.txt
new file mode 100644
index 0000000..c4ded8e
--- /dev/null
+++ b/doc/values/5.txt
@@ -0,0 +1,3 @@
+Allows a process to send a kill(2) signal to any other
+process - overriding the limitation that there be a
+[E]UID match between source and target process.
diff --git a/doc/values/6.txt b/doc/values/6.txt
new file mode 100644
index 0000000..4ccc78b
--- /dev/null
+++ b/doc/values/6.txt
@@ -0,0 +1,5 @@
+Allows a process to freely manipulate its own GIDs:
+  - arbitrarily set the GID, EGID, REGID, RESGID values
+  - arbitrarily set the supplementary GIDs
+  - allows the forging of GID credentials passed over a
+    socket
diff --git a/doc/values/7.txt b/doc/values/7.txt
new file mode 100644
index 0000000..fbc1240
--- /dev/null
+++ b/doc/values/7.txt
@@ -0,0 +1,5 @@
+Allows a process to freely manipulate its own UIDs:
+  - arbitrarily set the UID, EUID, REUID and RESUID
+    values
+  - allows the forging of UID credentials passed over a
+    socket
diff --git a/doc/values/8.txt b/doc/values/8.txt
new file mode 100644
index 0000000..de0b47c
--- /dev/null
+++ b/doc/values/8.txt
@@ -0,0 +1,24 @@
+Allows a process to freely manipulate its inheritable
+capabilities.
+
+Linux supports the POSIX.1e Inheritable set, the POXIX.1e (X
+vector) known in Linux as the Bounding vector, as well as
+the Linux extension Ambient vector.
+
+This capability permits dropping bits from the Bounding
+vector (ie. raising B bits in the libcap IAB
+representation). It also permits the process to raise
+Ambient vector bits that are both raised in the Permitted
+and Inheritable sets of the process. This capability cannot
+be used to raise Permitted bits, Effective bits beyond those
+already present in the process' permitted set, or
+Inheritable bits beyond those present in the Bounding
+vector.
+
+[Historical note: prior to the advent of file capabilities
+(2008), this capability was suppressed by default, as its
+unsuppressed behavior was not auditable: it could
+asynchronously grant its own Permitted capabilities to and
+remove capabilities from other processes arbitrarily. The
+former leads to undefined behavior, and the latter is better
+served by the kill system call.]
diff --git a/doc/values/9.txt b/doc/values/9.txt
new file mode 100644
index 0000000..651e1a0
--- /dev/null
+++ b/doc/values/9.txt
@@ -0,0 +1,2 @@
+Allows a process to modify the S_IMMUTABLE and
+S_APPEND file attributes.
diff --git a/go/.gitignore b/go/.gitignore
new file mode 100644
index 0000000..96fe4c0
--- /dev/null
+++ b/go/.gitignore
@@ -0,0 +1,24 @@
+good-names.go
+compare-cap
+try-launching
+try-launching-cgo
+psx-fd
+psx-fd-cgo
+psx-signals
+psx-signals-cgo
+b210613
+b215283
+b215283-cgo
+mismatch
+mismatch-cgo
+mknames
+web
+setid
+gowns
+captree
+captrace
+ok
+vendor
+go.sum
+PSXGOPACKAGE
+CAPGOPACKAGE
diff --git a/go/Makefile b/go/Makefile
new file mode 100644
index 0000000..38c1cf3
--- /dev/null
+++ b/go/Makefile
@@ -0,0 +1,193 @@
+# Building the libcap/{cap.psx} Go packages, and examples.
+#
+# Note, we use symlinks to construct a go.mod build friendly tree. The
+# packages themselves are intended to be (ultimately) found via proxy
+# as "kernel.org/pub/linux/libs/security/libcap/cap" and
+# "kernel.org/pub/linux/libs/security/libcap/psx". However, to
+# validate their use on these paths, we fake such a structure in the
+# build tree with symlinks and a vendor directory.
+
+topdir=$(realpath ..)
+include $(topdir)/Make.Rules
+
+IMPORTDIR=kernel.org/pub/linux/libs/security/libcap
+PKGDIR=pkg/$(GOOSARCH)/$(IMPORTDIR)
+
+DEPS=../libcap/libcap.a ../libcap/libpsx.a
+TESTS=compare-cap try-launching psx-signals mismatch
+
+all: PSXGOPACKAGE CAPGOPACKAGE web setid gowns captree captrace
+
+$(DEPS):
+	$(MAKE) -C ../libcap all
+
+../progs/tcapsh-static:
+	$(MAKE) -C ../progs tcapsh-static
+
+vendor/$(IMPORTDIR):
+	mkdir -p "vendor/$(IMPORTDIR)"
+
+vendor/modules.txt: vendor/$(IMPORTDIR)
+	echo "# $(IMPORTDIR)/psx v$(GOMAJOR).$(VERSION).$(MINOR)" > vendor/modules.txt
+	echo "$(IMPORTDIR)/psx" >> vendor/modules.txt
+	echo "# $(IMPORTDIR)/cap v$(GOMAJOR).$(VERSION).$(MINOR)" >> vendor/modules.txt
+	echo "$(IMPORTDIR)/cap" >> vendor/modules.txt
+
+vendor/$(IMPORTDIR)/psx: vendor/modules.txt
+	ln -sf $(topdir)/psx vendor/$(IMPORTDIR)
+	touch ../psx
+
+vendor/$(IMPORTDIR)/cap: vendor/modules.txt
+	ln -sf $(topdir)/cap vendor/$(IMPORTDIR)
+	touch ../cap
+
+$(topdir)/libcap/cap_names.h:
+	$(MAKE) -C $(topdir)/libcap cap_names.h
+
+good-names.go: $(topdir)/libcap/cap_names.h vendor/$(IMPORTDIR)/cap mknames.go
+	CC="$(CC)" $(GO) run -mod=vendor mknames.go --header=$< --textdir=$(topdir)/doc/values | gofmt > $@ || rm -f $@
+	diff -u ../cap/names.go $@
+
+PSXGOPACKAGE: vendor/$(IMPORTDIR)/psx ../psx/*.go $(DEPS)
+	touch $@
+
+CAPGOPACKAGE: vendor/$(IMPORTDIR)/cap ../cap/*.go good-names.go $(PSXGOPACKAGE)
+	touch $@
+
+# Compiles something with this package to compare it to libcap. This
+# tests more when run under sudotest (see ../progs/quicktest.sh for that).
+compare-cap: compare-cap.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+
+web: ../goapps/web/web.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+ifeq ($(RAISE_GO_FILECAP),yes)
+	$(MAKE) -C ../progs setcap
+	$(SUDO) ../progs/setcap cap_setpcap,cap_net_bind_service=p web
+	@echo "NOTE: RAISED cap_setpcap,cap_net_bind_service ON web binary"
+endif
+
+setid: ../goapps/setid/setid.go CAPGOPACKAGE PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+
+gowns: ../goapps/gowns/gowns.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+
+captree: ../goapps/captree/captree.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+
+captrace: ../goapps/captrace/captrace.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+
+ok: ok.go vendor/modules.txt
+	CC="$(CC)" CGO_ENABLED="0" $(GO) build $(GO_BUILD_FLAGS)  -mod=vendor $<
+
+try-launching: try-launching.go CAPGOPACKAGE ok
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+ifeq ($(CGO_REQUIRED),0)
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@-cgo $<
+endif
+
+# This is a test case developed from the deadlock investigation,
+# https://github.com/golang/go/issues/50113 . Note the psx-fd.go code
+# works when compiled CGO_ENABLED=1, but deadlocks when compiled
+# CGO_ENABLED=0. This is true for go1.16 and go1.17. The go1.18
+# release fixed this by rewriting the AllThreadsSyscall support, but
+# the large change was not backported. (See noted bug for a much
+# smaller patch for this issue on those older releases.)
+psx-fd: psx-fd.go PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+
+ifeq ($(CGO_REQUIRED),0)
+psx-fd-cgo: psx-fd.go PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+endif
+
+psx-signals: psx-signals.go PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+
+ifeq ($(CGO_REQUIRED),0)
+psx-signals-cgo: psx-signals.go PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+endif
+
+b210613: b210613.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+
+b215283: b215283.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+
+ifeq ($(CGO_REQUIRED),0)
+b215283-cgo: b215283.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+endif
+
+mismatch: mismatch.go PSXGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor $<
+
+ifeq ($(CGO_REQUIRED),0)
+mismatch-cgo: mismatch.go CAPGOPACKAGE
+	CC="$(CC)" CGO_ENABLED="1" $(CGO_LDFLAGS_ALLOW) CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build $(GO_BUILD_FLAGS) -mod=vendor -o $@ $<
+endif
+
+test: setid gowns captree psx-fd $(TESTS)
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) test -mod=vendor $(IMPORTDIR)/psx
+	CC="$(CC)" CGO_ENABLED="$(CGO_REQUIRED)" $(CGO_LDFLAGS_ALLOW) $(GO) test -mod=vendor $(IMPORTDIR)/cap
+	LD_LIBRARY_PATH=../libcap ./compare-cap
+	./psx-signals
+	./mismatch || exit 0 ; exit 1
+	timeout 5 ./psx-fd || echo "this is a known Go bug"
+ifeq ($(CGO_REQUIRED),0)
+	$(MAKE) psx-signals-cgo mismatch-cgo psx-fd-cgo
+	./psx-signals-cgo
+	./mismatch-cgo || exit 0 ; exit 1
+	./psx-fd-cgo
+endif
+	./setid --caps=false
+	./gowns -- -c "echo gowns runs"
+	./captree 0
+
+# Note, the user namespace doesn't require sudo, but I wanted to avoid
+# requiring that the hosting kernel supports user namespaces for the
+# regular test case.
+sudotest: test ../progs/tcapsh-static b210613 b215283
+	../progs/tcapsh-static --has-b=cap_sys_admin || exit 0 && ./gowns --ns -- -c "echo gowns runs with user namespace"
+	./try-launching
+ifeq ($(CGO_REQUIRED),0)
+	./try-launching-cgo
+endif
+	$(SUDO) ./try-launching
+ifeq ($(CGO_REQUIRED),0)
+	$(SUDO) ./try-launching-cgo
+endif
+	$(SUDO) ../progs/tcapsh-static --cap-uid=$$(id -u) --caps="cap_setpcap=ep" --iab="^cap_setpcap" -- -c ./b210613
+	$(SUDO) ./b215283
+ifeq ($(CGO_REQUIRED),0)
+	$(MAKE) b215283-cgo
+	$(SUDO) ./b215283-cgo
+endif
+
+
+# As of libcap-2.55 We stopped installing the cap and psx packages as
+# part of the install.  Most distribution's packagers skip the Go
+# builds, so it was not well used any way. The new hotness is to just
+# use Go modules and download the packages from a tagged release in
+# the git repository. For an example of how to do this from scratch:
+#
+#   https://sites.google.com/site/fullycapable/getting-started-with-go/building-go-programs-that-manipulate-capabilities
+#
+# For those brave souls that do include the Go build (testing) as part
+# of their packaging, we reward them with a copy of the captree
+# utility!
+install: all
+	mkdir -p -m 0755 $(FAKEROOT)$(SBINDIR)
+	install -m 0755 captree $(FAKEROOT)$(SBINDIR)
+
+clean:
+	rm -f *.o *.so *~ mknames ok good-names.go
+	rm -f web setid gowns captree captrace
+	rm -f compare-cap try-launching try-launching-cgo
+	rm -f $(topdir)/cap/*~ $(topdir)/psx/*~
+	rm -f b210613 b215283 b215283-cgo psx-signals psx-signals-cgo
+	rm -f mismatch mismatch-cgo psx-fd psx-fd-cgo
+	rm -fr vendor CAPGOPACKAGE PSXGOPACKAGE go.sum
diff --git a/go/b210613.go b/go/b210613.go
new file mode 100644
index 0000000..2bced06
--- /dev/null
+++ b/go/b210613.go
@@ -0,0 +1,21 @@
+// Program b210613 reproduces the code reported in:
+//
+//  https://bugzilla.kernel.org/show_bug.cgi?id=210613
+//
+// This file is evolved directly from the reproducer attached to that
+// bug report originally authored by Lorenz Bauer.
+package main
+
+import (
+	"fmt"
+	"log"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+func main() {
+	if err := cap.ModeNoPriv.Set(); err != nil {
+		log.Fatalf("error dropping privilege: %v", err)
+	}
+	fmt.Println("b210613: PASSED")
+}
diff --git a/go/b215283.go b/go/b215283.go
new file mode 100644
index 0000000..26596b6
--- /dev/null
+++ b/go/b215283.go
@@ -0,0 +1,47 @@
+// Program b215283 requires privilege to execute and is a minimally adapted
+// version of a test case provided by Lorenz Bauer as a reproducer for a
+// problem he found and reported in:
+//
+//    https://bugzilla.kernel.org/show_bug.cgi?id=215283
+package main
+
+import (
+	"fmt"
+	"os"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+func main() {
+	const secbits = cap.SecbitNoRoot | cap.SecbitNoSetUIDFixup
+
+	if v, err := cap.GetProc().GetFlag(cap.Permitted, cap.SETPCAP); err != nil {
+		panic(fmt.Sprintf("failed to get flag value: %v", err))
+		os.Exit(1)
+	} else if !v {
+		fmt.Printf("test requires cap_setpcap: found %q\n", cap.GetProc())
+		os.Exit(1)
+	}
+	if bits := cap.GetSecbits(); bits != 0 {
+		fmt.Printf("test expects secbits=0 to run; found: 0%o\n", bits)
+		os.Exit(1)
+	}
+
+	fmt.Println("secbits:", cap.GetSecbits(), " caps:", cap.GetProc())
+
+	l := cap.FuncLauncher(func(interface{}) error {
+		return cap.NewSet().SetProc()
+	})
+
+	if _, err := l.Launch(nil); err != nil {
+		fmt.Printf("launch failed: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Println("secbits:", cap.GetSecbits(), " caps:", cap.GetProc())
+
+	if err := secbits.Set(); err != nil {
+		fmt.Printf("set securebits: %v", err.Error())
+		os.Exit(1)
+	}
+}
diff --git a/go/cgo-required.sh b/go/cgo-required.sh
new file mode 100755
index 0000000..f9afa52
--- /dev/null
+++ b/go/cgo-required.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Runtime check for whether or not syscall.AllThreadsSyscall is
+# available to the working go runtime or not. If it isn't we always
+# have to use libcap/psx to get POSIX semantics for syscalls that
+# change security state.
+if [ -n "$1" ]; then
+    export GO="${1}"
+else
+    export GO=go
+fi
+
+if [ -z "$(${GO} doc syscall 2>/dev/null|grep AllThreadsSyscall)" ]; then
+    echo "1"
+else
+    echo "0"
+fi
diff --git a/go/compare-cap.go b/go/compare-cap.go
new file mode 100644
index 0000000..064d5fa
--- /dev/null
+++ b/go/compare-cap.go
@@ -0,0 +1,386 @@
+// Program compare-cap is a sanity check that Go's cap package is
+// inter-operable with the C libcap.
+package main
+
+import (
+	"log"
+	"os"
+	"syscall"
+	"unsafe"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// #include <stdlib.h>
+// #include <sys/capability.h>
+// #cgo CFLAGS: -I../libcap/include
+// #cgo LDFLAGS: -L../libcap -lcap
+import "C"
+
+// tryFileCaps attempts to use the cap package to manipulate file
+// capabilities. No reference to libcap in this function.
+func tryFileCaps() {
+	saved := cap.GetProc()
+
+	// Capabilities we will place on a file.
+	want := cap.NewSet()
+	if err := want.SetFlag(cap.Permitted, true, cap.SETFCAP, cap.DAC_OVERRIDE); err != nil {
+		log.Fatalf("failed to explore desired file capability: %v", err)
+	}
+	if err := want.SetFlag(cap.Effective, true, cap.SETFCAP, cap.DAC_OVERRIDE); err != nil {
+		log.Fatalf("failed to raise the effective bits: %v", err)
+	}
+
+	if perm, err := saved.GetFlag(cap.Permitted, cap.SETFCAP); err != nil {
+		log.Fatalf("failed to read capability: %v", err)
+	} else if !perm {
+		log.Printf("skipping file cap tests - insufficient privilege")
+		return
+	}
+
+	if err := saved.ClearFlag(cap.Effective); err != nil {
+		log.Fatalf("failed to drop effective: %v", err)
+	}
+	if err := saved.SetProc(); err != nil {
+		log.Fatalf("failed to limit capabilities: %v", err)
+	}
+
+	// Failing attempt to remove capabilities.
+	var empty *cap.Set
+	if err := empty.SetFile(os.Args[0]); err != syscall.EPERM {
+		log.Fatalf("failed to be blocked from removing filecaps: %v", err)
+	}
+
+	// The privilege we want (in the case we are root, we need the
+	// DAC_OVERRIDE too).
+	working, err := saved.Dup()
+	if err != nil {
+		log.Fatalf("failed to duplicate (%v): %v", saved, err)
+	}
+	if err := working.SetFlag(cap.Effective, true, cap.DAC_OVERRIDE, cap.SETFCAP); err != nil {
+		log.Fatalf("failed to raise effective: %v", err)
+	}
+
+	// Critical (privilege using) section:
+	if err := working.SetProc(); err != nil {
+		log.Fatalf("failed to enable first effective privilege: %v", err)
+	}
+	// Delete capability
+	if err := empty.SetFile(os.Args[0]); err != nil && err != syscall.ENODATA {
+		log.Fatalf("blocked from removing filecaps: %v", err)
+	}
+	if got, err := cap.GetFile(os.Args[0]); err == nil {
+		log.Fatalf("read deleted file caps: %v", got)
+	}
+	// Create file caps (this use employs the effective bit).
+	if err := want.SetFile(os.Args[0]); err != nil {
+		log.Fatalf("failed to set file capability: %v", err)
+	}
+	if err := saved.SetProc(); err != nil {
+		log.Fatalf("failed to lower effective capability: %v", err)
+	}
+	// End of critical section.
+
+	if got, err := cap.GetFile(os.Args[0]); err != nil {
+		log.Fatalf("failed to read caps: %v", err)
+	} else if is, was := got.String(), want.String(); is != was {
+		log.Fatalf("read file caps do not match desired: got=%q want=%q", is, was)
+	}
+
+	// Now, do it all again but this time on an open file.
+	f, err := os.Open(os.Args[0])
+	if err != nil {
+		log.Fatalf("failed to open %q: %v", os.Args[0], err)
+	}
+	defer f.Close()
+
+	// Failing attempt to remove capabilities.
+	if err := empty.SetFd(f); err != syscall.EPERM {
+		log.Fatalf("failed to be blocked from fremoving filecaps: %v", err)
+	}
+
+	// For the next section, we won't set the effective bit on the file.
+	want.ClearFlag(cap.Effective)
+
+	// Critical (privilege using) section:
+	if err := working.SetProc(); err != nil {
+		log.Fatalf("failed to enable effective privilege: %v", err)
+	}
+	if err := empty.SetFd(f); err != nil && err != syscall.ENODATA {
+		log.Fatalf("blocked from fremoving filecaps: %v", err)
+	}
+	if got, err := cap.GetFd(f); err == nil {
+		log.Fatalf("read fdeleted file caps: %v", got)
+	}
+	// This one does not set the effective bit.
+	if err := want.SetFd(f); err != nil {
+		log.Fatalf("failed to fset file capability: %v", err)
+	}
+	if got, err := cap.GetFd(f); err != nil {
+		log.Fatalf("failed to fread caps: %v", err)
+	} else if is, was := got.String(), want.String(); is != was {
+		log.Fatalf("fread file caps do not match desired: got=%q want=%q", is, was)
+	}
+	if err := empty.SetFd(f); err != nil && err != syscall.ENODATA {
+		log.Fatalf("blocked from cleanup fremoving filecaps: %v", err)
+	}
+	if err := saved.SetProc(); err != nil {
+		log.Fatalf("failed to lower effective capability: %v", err)
+	}
+	// End of critical section.
+}
+
+// tryProcCaps performs a set of convenience functions and compares
+// the results with those seen by libcap. At the end of this function,
+// the running process has no privileges at all. So exiting the
+// program is the only option.
+func tryProcCaps() {
+	c := cap.GetProc()
+	if v, err := c.GetFlag(cap.Permitted, cap.SETPCAP); err != nil {
+		log.Fatalf("failed to read permitted setpcap: %v", err)
+	} else if !v {
+		log.Printf("skipping proc cap tests - insufficient privilege")
+		return
+	}
+	if err := cap.SetUID(99); err != nil {
+		log.Fatalf("failed to set uid=99: %v", err)
+	}
+	if u := syscall.Getuid(); u != 99 {
+		log.Fatal("uid=99 did not take: got=%d", u)
+	}
+	if err := cap.SetGroups(98, 100, 101); err != nil {
+		log.Fatalf("failed to set groups=98 [100, 101]: %v", err)
+	}
+	if g := syscall.Getgid(); g != 98 {
+		log.Fatalf("gid=98 did not take: got=%d", g)
+	}
+	if gs, err := syscall.Getgroups(); err != nil {
+		log.Fatalf("error getting groups: %v", err)
+	} else if len(gs) != 2 || gs[0] != 100 || gs[1] != 101 {
+		log.Fatalf("wrong of groups: got=%v want=[100 l01]", gs)
+	}
+
+	if mode := cap.GetMode(); mode != cap.ModeHybrid {
+		log.Fatalf("initial mode should be 4 (HYBRID), got: %d (%v)", mode, mode)
+	}
+
+	// To distinguish PURE1E and PURE1E_INIT we need an inheritable capability set.
+	working := cap.GetProc()
+	if err := working.SetFlag(cap.Inheritable, true, cap.SETPCAP); err != nil {
+		log.Fatalf("unable to raise inheritable bit: %v", err)
+	}
+	if err := working.SetProc(); err != nil {
+		log.Fatalf("failed to add inheritable bit: %v", err)
+	}
+
+	for i, mode := range []cap.Mode{cap.ModePure1E, cap.ModePure1EInit, cap.ModeNoPriv} {
+		if err := mode.Set(); err != nil {
+			log.Fatalf("[%d] in mode=%v and failed to set mode to %d (%v): %v", i, cap.GetMode(), mode, mode, err)
+		}
+		if got := cap.GetMode(); got != mode {
+			log.Fatalf("[%d] unable to recognise mode %d (%v), got: %d (%v)", i, mode, mode, got, got)
+		}
+		cM := C.cap_get_mode()
+		if mode != cap.Mode(cM) {
+			log.Fatalf("[%d] C and Go disagree on mode: %d vs %d", cM, mode)
+		}
+	}
+
+	// The current process is now without any access to privilege.
+}
+
+func main() {
+	// Use the C libcap to obtain a non-trivial capability in text form (from init).
+	cC := C.cap_get_pid(1)
+	if cC == nil {
+		log.Fatal("basic c caps from init function failure")
+	}
+	defer C.cap_free(unsafe.Pointer(cC))
+	var tCLen C.ssize_t
+	tC := C.cap_to_text(cC, &tCLen)
+	if tC == nil {
+		log.Fatal("basic c init caps -> text failure")
+	}
+	defer C.cap_free(unsafe.Pointer(tC))
+
+	importT := C.GoString(tC)
+	if got, want := len(importT), int(tCLen); got != want {
+		log.Fatalf("C string import failed: got=%d [%q] want=%d", got, importT, want)
+	}
+
+	// Validate that it can be decoded in Go.
+	cGo, err := cap.FromText(importT)
+	if err != nil {
+		log.Fatalf("go parsing of c text import failed: %v", err)
+	}
+
+	// Validate that it matches the one directly loaded in Go.
+	c, err := cap.GetPID(1)
+	if err != nil {
+		log.Fatalf("...failed to read init's capabilities:", err)
+	}
+	tGo := c.String()
+	if got, want := tGo, cGo.String(); got != want {
+		log.Fatalf("go text rep does not match c: got=%q, want=%q", got, want)
+	}
+
+	// Export it in text form again from Go.
+	tForC := C.CString(tGo)
+	defer C.free(unsafe.Pointer(tForC))
+
+	// Validate it can be encoded in C.
+	cC2 := C.cap_from_text(tForC)
+	if cC2 == nil {
+		log.Fatal("go text rep not parsable by c")
+	}
+	defer C.cap_free(unsafe.Pointer(cC2))
+
+	// Validate that it can be exported in binary form in C
+	const enoughForAnyone = 1000
+	eC := make([]byte, enoughForAnyone)
+	eCLen := C.cap_copy_ext(unsafe.Pointer(&eC[0]), cC2, C.ssize_t(len(eC)))
+	if eCLen < 5 {
+		log.Fatalf("c export yielded bad length: %d", eCLen)
+	}
+
+	// Validate that it can be imported from binary in Go
+	iGo, err := cap.Import(eC[:eCLen])
+	if err != nil {
+		log.Fatalf("go import of c binary failed: %v", err)
+	}
+	if got, want := iGo.String(), importT; got != want {
+		log.Fatalf("go import of c binary miscompare: got=%q want=%q", got, want)
+	}
+
+	// Validate that it can be exported in binary in Go
+	iE, err := iGo.Export()
+	if err != nil {
+		log.Fatalf("go failed to export binary: %v", err)
+	}
+
+	// Validate that it can be imported in binary in C
+	iC := C.cap_copy_int_check(unsafe.Pointer(&iE[0]), C.ssize_t(len(iE)))
+	if iC == nil {
+		log.Fatal("c failed to import go binary")
+	}
+	defer C.cap_free(unsafe.Pointer(iC))
+	fC := C.cap_to_text(iC, &tCLen)
+	if fC == nil {
+		log.Fatal("basic c init caps -> text failure")
+	}
+	defer C.cap_free(unsafe.Pointer(fC))
+	if got, want := C.GoString(fC), importT; got != want {
+		log.Fatalf("c import from go yielded bad caps: got=%q want=%q", got, want)
+	}
+
+	// Validate that everyone agrees what all is:
+	want := "=ep"
+	all, err := cap.FromText("all=ep")
+	if err != nil {
+		log.Fatalf("unable to parse all=ep: %v", err)
+	}
+	if got := all.String(); got != want {
+		log.Fatalf("all decode failed in Go: got=%q, want=%q", got, want)
+	}
+
+	// Validate some random values stringify consistently between
+	// libcap.cap_to_text() and (*cap.Set).String().
+	mb := cap.MaxBits()
+	sample := cap.NewSet()
+	for c := cap.Value(0); c < 7*mb; c += 3 {
+		n := int(c)
+		raise, f := c%mb, cap.Flag(c/mb)%3
+		sample.SetFlag(f, true, raise)
+		if v, err := cap.FromText(sample.String()); err != nil {
+			log.Fatalf("[%d] cap to text for %q not reversible: %v", n, sample, err)
+		} else if cf, err := v.Compare(sample); err != nil {
+			log.Fatalf("[%d] FromText generated bad capability from %q: %v", n, sample, err)
+		} else if cf != 0 {
+			log.Fatalf("[%d] text import got=%q want=%q", n, v, sample)
+		}
+		e, err := sample.Export()
+		if err != nil {
+			log.Fatalf("[%d] failed to export %q: %v", n, sample, err)
+		}
+		i, err := cap.Import(e)
+		if err != nil {
+			log.Fatalf("[%d] failed to import %q: %v", n, sample, err)
+		}
+		if cf, err := i.Compare(sample); err != nil {
+			log.Fatalf("[%d] failed to compare %q vs original:%q", n, i, sample)
+		} else if cf != 0 {
+			log.Fatalf("[%d] import got=%q want=%q", n, i, sample)
+		}
+		// Confirm that importing this portable binary
+		// representation in libcap and converting to text,
+		// generates the same text as Go generates. This was
+		// broken prior to v0.2.41.
+		cCap := C.cap_copy_int(unsafe.Pointer(&e[0]))
+		if cCap == nil {
+			log.Fatalf("[%d] C import failed for %q export", n, sample)
+		}
+		var tCLen C.ssize_t
+		tC := C.cap_to_text(cCap, &tCLen)
+		if tC == nil {
+			log.Fatalf("[%d] basic c init caps -> text failure", n)
+		}
+		C.cap_free(unsafe.Pointer(cCap))
+		importT := C.GoString(tC)
+		C.cap_free(unsafe.Pointer(tC))
+		if got, want := len(importT), int(tCLen); got != want {
+			log.Fatalf("[%d] C text generated wrong length: Go=%d, C=%d", n, got, want)
+		}
+		if got, want := importT, sample.String(); got != want {
+			log.Fatalf("[%d] C and Go text rep disparity: C=%q Go=%q", n, got, want)
+		}
+	}
+
+	iab, err := cap.IABFromText("cap_chown,!cap_setuid,^cap_setgid")
+	if err != nil {
+		log.Fatalf("failed to initialize iab from text: %v", err)
+	}
+	cIAB := C.cap_iab_init()
+	defer C.cap_free(unsafe.Pointer(cIAB))
+	for c := cap.MaxBits(); c > 0; {
+		c--
+		if en, err := iab.GetVector(cap.Inh, c); err != nil {
+			log.Fatalf("failed to read iab.i[%v]", c)
+		} else if en {
+			if C.cap_iab_set_vector(cIAB, C.CAP_IAB_INH, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+				log.Fatalf("failed to set C's AIB.I %v: %v", c)
+			}
+		}
+		if en, err := iab.GetVector(cap.Amb, c); err != nil {
+			log.Fatalf("failed to read iab.a[%v]", c)
+		} else if en {
+			if C.cap_iab_set_vector(cIAB, C.CAP_IAB_AMB, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+				log.Fatalf("failed to set C's AIB.A %v: %v", c)
+			}
+		}
+		if en, err := iab.GetVector(cap.Bound, c); err != nil {
+			log.Fatalf("failed to read iab.b[%v]", c)
+		} else if en {
+			if C.cap_iab_set_vector(cIAB, C.CAP_IAB_BOUND, C.cap_value_t(int(c)), C.CAP_SET) != 0 {
+				log.Fatalf("failed to set C's AIB.B %v: %v", c)
+			}
+		}
+	}
+	iabC := C.cap_iab_to_text(cIAB)
+	if iabC == nil {
+		log.Fatalf("failed to get text from C for %q", iab)
+	}
+	defer C.cap_free(unsafe.Pointer(iabC))
+	if got, want := C.GoString(iabC), iab.String(); got != want {
+		log.Fatalf("IAB for Go and C differ: got=%q, want=%q", got, want)
+	}
+
+	// Next, we attempt to manipulate some file capabilities on
+	// the running program.  These are optional, based on whether
+	// the current program is capable enough and do not involve
+	// any cgo calls to libcap.
+	tryFileCaps()
+
+	// Nothing left to do but exit after this one.
+	tryProcCaps()
+	log.Printf("compare-cap success!")
+}
diff --git a/go/go-mod-index.html b/go/go-mod-index.html
new file mode 100644
index 0000000..9cfe13f
--- /dev/null
+++ b/go/go-mod-index.html
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<meta name="go-import" content="kernel.org/pub/linux/libs/security/libcap git https://git.kernel.org/pub/scm/libs/libcap/libcap.git">
+<meta http-equiv="refresh" content="10; url=https://sites.google.com/site/fullycapable">
+</head>
+<body>
+  Redirecting in 10 seconds to
+  the <a href="https://sites.google.com/site/fullycapable">Fully
+    Capable</a> project page, the home of these Go packages:
+  <ul>
+    <li><tt>"kernel.org/pub/linux/libs/security/libcap/psx"</tt></li>
+    <li><tt>"kernel.org/pub/linux/libs/security/libcap/cap"</tt></li>
+</body>
+</html>
diff --git a/go/go.mod b/go/go.mod
new file mode 100644
index 0000000..378b218
--- /dev/null
+++ b/go/go.mod
@@ -0,0 +1,8 @@
+module main
+
+go 1.11
+
+require (
+	kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
+	kernel.org/pub/linux/libs/security/libcap/psx v1.2.69
+)
diff --git a/go/mismatch.go b/go/mismatch.go
new file mode 100644
index 0000000..bbcf6eb
--- /dev/null
+++ b/go/mismatch.go
@@ -0,0 +1,15 @@
+// Program mismatch should panic because the syscall being requested
+// never returns consistent results.
+package main
+
+import (
+	"fmt"
+	"syscall"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+func main() {
+	tid, _, err := psx.Syscall3(syscall.SYS_GETTID, 0, 0, 0)
+	fmt.Printf("gettid() -> %d: %v\n", tid, err)
+}
diff --git a/go/mknames.go b/go/mknames.go
new file mode 100644
index 0000000..ef348ae
--- /dev/null
+++ b/go/mknames.go
@@ -0,0 +1,123 @@
+// Program mknames parses the cap_names.h file and creates an
+// equivalent names.go file including comments on each cap.Value from
+// the documentation directory.
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"strings"
+)
+
+var (
+	header = flag.String("header", "", "name of header file")
+	text   = flag.String("textdir", "", "directory name for value txt files")
+)
+
+func main() {
+	flag.Parse()
+
+	if *header == "" {
+		log.Fatal("usage: mknames --header=.../cap_names.h")
+	}
+	d, err := ioutil.ReadFile(*header)
+	if err != nil {
+		log.Fatal("reading:", err)
+	}
+
+	b := bytes.NewBuffer(d)
+
+	var list []string
+	for {
+		line, err := b.ReadString('\n')
+		if err == io.EOF {
+			break
+		}
+		if !strings.Contains(line, `"`) {
+			continue
+		}
+		i := strings.Index(line, `"`)
+		line = line[i+1:]
+		i = strings.Index(line, `"`)
+		line = line[:i]
+		list = append(list, line)
+	}
+
+	// generate package file names.go
+	fmt.Print(`package cap
+
+/* ** DO NOT EDIT THIS FILE. IT WAS AUTO-GENERATED BY LIBCAP'S GO BUILDER (mknames.go) ** */
+
+// NamedCount holds the number of capability values, with official
+// names, known at the time this libcap/cap version was released. The
+// "../libcap/cap" package is fully able to manipulate higher numbered
+// capability values by numerical value. However, if you find
+// cap.NamedCount < cap.MaxBits(), it is probably time to upgrade this
+// package on your system.
+//
+// FWIW the userspace tool '/sbin/capsh' also contains a runtime check
+// for the condition that libcap is behind the running kernel in this
+// way.
+const NamedCount = `, len(list), `
+
+// CHOWN etc., are the named capability values of the Linux
+// kernel. The canonical source for each name is the
+// "uapi/linux/capabilities.h" file.  Some values may not be available
+// (yet) where the kernel is older.  The actual number of capabities
+// supported by the running kernel can be obtained using the
+// cap.MaxBits() function.
+const (
+`)
+	bits := make(map[string]string)
+	for i, name := range list {
+		doc := fmt.Sprintf("%s/%d.txt", *text, i)
+		content, err := ioutil.ReadFile(doc)
+		if err != nil {
+			log.Fatalf("filed to read %q: %v", doc, err)
+		}
+		detail := strings.Split(strings.Replace(string(content), "CAP_", "cap.", -1), "\n")
+		if i != 0 {
+			fmt.Println()
+		}
+		v := strings.ToUpper(strings.TrimPrefix(name, "cap_"))
+		for j, line := range detail {
+			preamble := ""
+			offset := 0
+			if j == 0 {
+				if !strings.HasPrefix(line, "Allows ") {
+					log.Fatalf("line should begin \"Allows \": got %s:%d:%q", doc, j, line)
+				}
+				preamble = fmt.Sprint(v, " a")
+				offset = 1
+			}
+			if len(line) != 0 || j != len(detail)-1 {
+				fmt.Printf(" // %s%s\n", preamble, line[offset:])
+			}
+		}
+		bits[name] = v
+		if i == 0 {
+			fmt.Println(v, " Value =  iota")
+		} else {
+			fmt.Println(v)
+		}
+	}
+	fmt.Print(`)
+
+var names = map[Value]string{
+`)
+	for _, name := range list {
+		fmt.Printf("%s: %q,\n", bits[name], name)
+	}
+	fmt.Print(`}
+
+var bits = map[string]Value {
+`)
+	for _, name := range list {
+		fmt.Printf("%q: %s,\n", name, bits[name])
+	}
+	fmt.Println(`}`)
+}
diff --git a/go/ok.go b/go/ok.go
new file mode 100644
index 0000000..509638e
--- /dev/null
+++ b/go/ok.go
@@ -0,0 +1,9 @@
+// Program ok exits with status zero. We use it as a chroot test.
+// To avoid any confusion, it needs to be linked statically.
+package main
+
+import "os"
+
+func main() {
+	os.Exit(0)
+}
diff --git a/go/psx-fd.go b/go/psx-fd.go
new file mode 100644
index 0000000..7aa3a76
--- /dev/null
+++ b/go/psx-fd.go
@@ -0,0 +1,25 @@
+package main
+
+import (
+	"log"
+	"os"
+	"syscall"
+	"time"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+const prSetKeepCaps = 8
+
+func main() {
+	r, w, err := os.Pipe()
+	if err != nil {
+		log.Fatalf("failed to obtain pipe: %v", err)
+	}
+	data := make([]byte, 2+r.Fd())
+	go r.Read(data)
+	time.Sleep(500 * time.Millisecond)
+	psx.Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, 1, 0)
+	w.Close()
+	r.Close()
+}
diff --git a/go/psx-signals.go b/go/psx-signals.go
new file mode 100644
index 0000000..486f284
--- /dev/null
+++ b/go/psx-signals.go
@@ -0,0 +1,46 @@
+// Program psx-signals validates that the psx mechanism can coexist
+// with Go use of signals. This is an unprivilaged program derived
+// from the sample code provided in this bug report:
+//
+//   https://bugzilla.kernel.org/show_bug.cgi?id=210533
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/signal"
+	"syscall"
+	"time"
+
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+const maxSig = 10
+const prSetKeepCaps = 8
+
+func main() {
+	sig := make(chan os.Signal, maxSig)
+	signal.Notify(sig, os.Interrupt)
+
+	fmt.Print("Toggling KEEP_CAPS ")
+	for i := 0; i < maxSig; i++ {
+		fmt.Print(".")
+		_, _, err := psx.Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0)
+		if err != 0 {
+			log.Fatalf("[%d] attempt to set KEEPCAPS (to %d) failed: %v", i, i%2, err)
+		}
+	}
+
+	fmt.Println(" done")
+	fmt.Print("Wait 1 second to see if unwanted signals arrive...")
+	// Confirm no signals are delivered.
+	select {
+	case <-time.After(1 * time.Second):
+		break
+	case info := <-sig:
+		log.Fatalf("signal received: %v", info)
+	}
+	fmt.Println(" none arrived")
+	fmt.Println("PASSED")
+}
diff --git a/go/try-launching.go b/go/try-launching.go
new file mode 100644
index 0000000..b09b254
--- /dev/null
+++ b/go/try-launching.go
@@ -0,0 +1,116 @@
+// Program try-launching validates the cap.Launch feature.
+package main
+
+import (
+	"fmt"
+	"log"
+	"strings"
+	"syscall"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// tryLaunching attempts to launch a bunch of programs in parallel. It
+// first tries some unprivileged launches, and then (if privileged)
+// tries some more ambitious ones.
+func tryLaunching() {
+	cwd, err := syscall.Getwd()
+	if err != nil {
+		log.Fatalf("no working directory: %v", err)
+	}
+	root := cwd[:strings.LastIndex(cwd, "/")]
+
+	hasSysAdmin, _ := cap.GetBound(cap.SYS_ADMIN)
+
+	vs := []struct {
+		args       []string
+		fail       bool
+		callbackFn func(*syscall.ProcAttr, interface{}) error
+		chroot     string
+		iab        string
+		uid        int
+		gid        int
+		mode       cap.Mode
+		groups     []int
+	}{
+		{args: []string{root + "/go/ok"}},
+		{
+			args:   []string{root + "/progs/tcapsh-static", "--dropped=cap_chown", "--is-uid=123", "--is-gid=456", "--has-a=cap_setuid"},
+			iab:    "!cap_chown,^cap_setuid,cap_sys_admin",
+			uid:    123,
+			gid:    456,
+			groups: []int{1, 2, 3},
+			fail:   syscall.Getuid() != 0 || !hasSysAdmin,
+		},
+		{
+			args:   []string{"/ok"},
+			chroot: root + "/go",
+			fail:   syscall.Getuid() != 0,
+		},
+		{
+			args: []string{root + "/progs/tcapsh-static", "--inmode=NOPRIV", "--has-no-new-privs"},
+			mode: cap.ModeNoPriv,
+			fail: syscall.Getuid() != 0,
+		},
+	}
+
+	ps := make([]int, len(vs))
+	ws := make([]syscall.WaitStatus, len(vs))
+
+	for i, v := range vs {
+		e := cap.NewLauncher(v.args[0], v.args, nil)
+		e.Callback(v.callbackFn)
+		if v.chroot != "" {
+			e.SetChroot(v.chroot)
+		}
+		if v.uid != 0 {
+			e.SetUID(v.uid)
+		}
+		if v.gid != 0 {
+			e.SetGroups(v.gid, v.groups)
+		}
+		if v.mode != 0 {
+			e.SetMode(v.mode)
+		}
+		if v.iab != "" {
+			if iab, err := cap.IABFromText(v.iab); err != nil {
+				log.Fatalf("failed to parse iab=%q: %v", v.iab, err)
+			} else {
+				e.SetIAB(iab)
+			}
+		}
+		log.Printf("[%d] trying: %q\n", i, v.args)
+		if ps[i], err = e.Launch(nil); err != nil {
+			if v.fail {
+				continue
+			}
+			log.Fatalf("[%d] launch %q failed: %v", i, v.args, err)
+		}
+	}
+
+	for i, p := range ps {
+		if p == -1 {
+			continue
+		}
+		if pr, err := syscall.Wait4(p, &ws[i], 0, nil); err != nil {
+			log.Fatalf("wait4 <%d> failed: %v", p, err)
+		} else if p != pr {
+			log.Fatalf("wait4 <%d> returned <%d> instead", p, pr)
+		} else if ws[i] != 0 {
+			if vs[i].fail {
+				continue
+			}
+			log.Fatalf("wait4 <%d> status was %d", p, ws[i])
+		}
+	}
+}
+
+func main() {
+	if cap.LaunchSupported {
+		// The Go runtime had some OS threading bugs that
+		// prevented Launch from working. Specifically, the
+		// launch OS thread would get reused.
+		tryLaunching()
+	}
+	fmt.Println("PASSED")
+}
diff --git a/goapps/captrace/captrace.go b/goapps/captrace/captrace.go
new file mode 100644
index 0000000..1ef1ace
--- /dev/null
+++ b/goapps/captrace/captrace.go
@@ -0,0 +1,230 @@
+// Program captrace traces processes and notices when they attempt
+// kernel actions that require Effective capabilities.
+//
+// The reference material for developing this tool was the the book
+// "Linux Observabililty with BPF" by David Calavera and Lorenzo
+// Fontana.
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+var (
+	bpftrace = flag.String("bpftrace", "bpftrace", "command to launch bpftrace")
+	debug    = flag.Bool("debug", false, "more output")
+	pid      = flag.Int("pid", -1, "PID of target process to trace (-1 = trace all)")
+)
+
+type thread struct {
+	PPID, Datum int
+	Value       cap.Value
+	Token       string
+}
+
+// mu protects these two maps.
+var mu sync.Mutex
+
+// tids tracks which PIDs we are following.
+var tids = make(map[int]int)
+
+// cache tracks in-flight cap_capable invocations.
+var cache = make(map[int]*thread)
+
+// event adds or resolves a capability event.
+func event(add bool, tid int, th *thread) {
+	mu.Lock()
+	defer mu.Unlock()
+
+	if len(tids) != 0 {
+		if _, ok := tids[th.PPID]; !ok {
+			if *debug {
+				log.Printf("dropped %d %d %v event", th.PPID, tid, *th)
+			}
+			return
+		}
+		tids[tid] = th.PPID
+		tids[th.PPID] = th.PPID
+	}
+
+	if add {
+		cache[tid] = th
+	} else {
+		if b, ok := cache[tid]; ok {
+			detail := ""
+			if th.Datum < 0 {
+				detail = fmt.Sprintf(" (%v)", syscall.Errno(-th.Datum))
+			}
+			task := ""
+			if th.PPID != tid {
+				task = fmt.Sprintf("+{%d}", tid)
+			}
+			log.Printf("%-16s %d%s opt=%d %q -> %d%s", b.Token, b.PPID, task, b.Datum, b.Value, th.Datum, detail)
+		}
+		delete(cache, tid)
+	}
+}
+
+// tailTrace tails the bpftrace command output recognizing lines of
+// interest.
+func tailTrace(cmd *exec.Cmd, out io.Reader) {
+	launched := false
+	sc := bufio.NewScanner(out)
+	for sc.Scan() {
+		fields := strings.Split(sc.Text(), " ")
+		if len(fields) < 4 {
+			continue // ignore
+		}
+		if !launched {
+			launched = true
+			mu.Unlock()
+		}
+		switch fields[0] {
+		case "CB":
+			if len(fields) < 6 {
+				continue
+			}
+			pid, err := strconv.Atoi(fields[1])
+			if err != nil {
+				continue
+			}
+			th := &thread{
+				PPID: pid,
+			}
+			tid, err := strconv.Atoi(fields[2])
+			if err != nil {
+				continue
+			}
+			c, err := strconv.Atoi(fields[3])
+			if err != nil {
+				continue
+			}
+			th.Value = cap.Value(c)
+			aud, err := strconv.Atoi(fields[4])
+			if err != nil {
+				continue
+			}
+			th.Datum = aud
+			th.Token = strings.Join(fields[5:], " ")
+			event(true, tid, th)
+		case "CE":
+			if len(fields) < 4 {
+				continue
+			}
+			pid, err := strconv.Atoi(fields[1])
+			if err != nil {
+				continue
+			}
+			th := &thread{
+				PPID: pid,
+			}
+			tid, err := strconv.Atoi(fields[2])
+			if err != nil {
+				continue
+			}
+			aud, err := strconv.Atoi(fields[3])
+			if err != nil {
+				continue
+			}
+			th.Datum = aud
+			event(false, tid, th)
+		default:
+			if *debug {
+				fmt.Println("unparsable:", fields)
+			}
+		}
+	}
+	if err := sc.Err(); err != nil {
+		log.Fatalf("scanning failed: %v", err)
+	}
+}
+
+// tracer invokes bpftool it returns an error if the invocation fails.
+func tracer() (*exec.Cmd, error) {
+	cmd := exec.Command(*bpftrace, "-e", `kprobe:cap_capable {
+    printf("CB %d %d %d %d %s\n", pid, tid, arg2, arg3, comm);
+}
+kretprobe:cap_capable {
+    printf("CE %d %d %d\n", pid, tid, retval);
+}`)
+	out, err := cmd.StdoutPipe()
+	cmd.Stderr = os.Stderr
+	if err != nil {
+		return nil, fmt.Errorf("unable to create stdout for %q: %v", *bpftrace, err)
+	}
+	mu.Lock() // Unlocked on first ouput from tracer.
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("failed to start %q: %v", *bpftrace, err)
+	}
+	go tailTrace(cmd, out)
+	return cmd, nil
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(flag.CommandLine.Output(), `Usage: %s [options] [command ...]
+
+This tool monitors cap_capable() kernel execution to summarize when
+Effective Flag capabilities are checked in a running process{thread}.
+The monitoring is performed indirectly using the bpftrace tool.
+
+Each line logged has a timestamp at which the tracing program is able to
+summarize the return value of the check. A return value of " -> 0" implies
+the check succeeded and confirms the process{thread} does have the
+specified Effective capability.
+
+The listed "opt=" value indicates some auditing context for why the
+kernel needed to check the capability was Effective.
+
+Options:
+`, os.Args[0])
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+
+	tr, err := tracer()
+	if err != nil {
+		log.Fatalf("failed to start tracer: %v", err)
+	}
+
+	mu.Lock()
+
+	if *pid != -1 {
+		tids[*pid] = *pid
+	} else if len(flag.Args()) != 0 {
+		args := flag.Args()
+		cmd := exec.Command(args[0], args[1:]...)
+		cmd.Stdin = os.Stdin
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		if err := cmd.Start(); err != nil {
+			log.Fatalf("failed to start %v: %v", flag.Args(), err)
+		}
+		tids[cmd.Process.Pid] = cmd.Process.Pid
+
+		// waiting for the trace to complete is racy, so we sleep
+		// to obtain the last events then kill the tracer and wait
+		// for it to exit. Defers are in reverse order.
+		defer tr.Wait()
+		defer tr.Process.Kill()
+		defer time.Sleep(1 * time.Second)
+
+		tr = cmd
+	}
+
+	mu.Unlock()
+	tr.Wait()
+}
diff --git a/goapps/captrace/go.mod b/goapps/captrace/go.mod
new file mode 100644
index 0000000..9817252
--- /dev/null
+++ b/goapps/captrace/go.mod
@@ -0,0 +1,5 @@
+module captrace
+
+go 1.16
+
+require kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
diff --git a/goapps/captree/captree.go b/goapps/captree/captree.go
new file mode 100644
index 0000000..7768b11
--- /dev/null
+++ b/goapps/captree/captree.go
@@ -0,0 +1,468 @@
+// Program captree explores a process tree rooted in the supplied
+// argument(s) and displays a process tree indicating the capabilities
+// of all the dependent PID values.
+//
+// This was inspired by the pstree utility. The key idea here, however,
+// is to explore a process tree for capability state.
+//
+// Each line of output is intended to capture a brief representation
+// of the capability state of a process (both *Set and *IAB) and
+// for its related threads.
+//
+// Ex:
+//
+//   $ bash -c 'exec captree $$'
+//   --captree(9758+{9759,9760,9761,9762})
+//
+// In the normal case, such as the above, where the targeted process
+// is not privileged, no distracting capability strings are displayed.
+// Where a process is thread group leader to a set of other thread
+// ids, they are listed as `+{...}`.
+//
+// For privileged binaries, we have:
+//
+//   $ captree 551
+//   --polkitd(551) "=ep"
+//     :>-gmain{552} "=ep"
+//     :>-gdbus{555} "=ep"
+//
+// That is, the text representation of the process capability state is
+// displayed in double quotes "..." as a suffix to the process/thread.
+// If the name of any thread of this process, or its own capability
+// state, is in some way different from the primary process then it is
+// displayed on a subsequent line prefixed with ":>-" and threads
+// sharing name and capability state are listed on that line. Here we
+// have two sub-threads with the same capability state, but unique
+// names.
+//
+// Sometimes members of a process group have different capabilities:
+//
+//   $ captree 1368
+//   --dnsmasq(1368) "cap_net_bind_service,cap_net_admin,cap_net_raw=ep"
+//     +-dnsmasq(1369) "=ep"
+//
+// Where the A and B components of the IAB tuple are non-default, the
+// output also includes these:
+//
+//   $ captree 925
+//   --dbus-broker-lau(925) [!cap_sys_rawio,!cap_mknod]
+//     +-dbus-broker(965) "cap_audit_write=eip" [!cap_sys_rawio,!cap_mknod,cap_audit_write]
+//
+// That is, the `[...]` appendage captures the IAB text representation
+// of that tuple. Note, if only the I part of that tuple is
+// non-default, it is already captured in the quoted process
+// capability state, so the IAB tuple is omitted.
+//
+// To view the complete system process map, rooted at the kernel, try
+// this:
+//
+//   $ captree 0
+//
+// To view a specific binary (as named in /proc/<PID>/status as 'Name:
+// ...'), matched by a glob, try this:
+//
+//   $ captree 'cap*ree'
+//
+// The quotes might be needed to avoid the '*' confusing your shell.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+var (
+	proc    = flag.String("proc", "/proc", "root of proc filesystem")
+	depth   = flag.Int("depth", 0, "how many processes deep (0=all)")
+	verbose = flag.Bool("verbose", false, "display empty capabilities")
+	color   = flag.Bool("color", true, "color targeted PIDs on tty in red")
+	colour  = flag.Bool("colour", true, "colour targeted PIDs on tty in red")
+)
+
+type task struct {
+	mu       sync.Mutex
+	viewed   bool
+	depth    int
+	pid      string
+	cmd      string
+	cap      *cap.Set
+	iab      *cap.IAB
+	parent   string
+	threads  []*task
+	children []string
+}
+
+func (ts *task) String() string {
+	return fmt.Sprintf("%s %q [%v] %s %v %v", ts.cmd, ts.cap, ts.iab, ts.parent, ts.threads, ts.children)
+}
+
+var (
+	wg      sync.WaitGroup
+	mu      sync.Mutex
+	colored bool
+)
+
+func isATTY() bool {
+	s, err := os.Stdout.Stat()
+	if err == nil && (s.Mode()&os.ModeCharDevice) != 0 {
+		return true
+	}
+	return false
+}
+
+func highlight(text string) string {
+	if colored {
+		return fmt.Sprint("\033[31m", text, "\033[0m")
+	}
+	return text
+}
+
+func (ts *task) fill(pid string, n int, thread bool) {
+	defer wg.Done()
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		c, _ := cap.GetPID(n)
+		iab, _ := cap.IABGetPID(n)
+		ts.mu.Lock()
+		defer ts.mu.Unlock()
+		ts.pid = pid
+		ts.cap = c
+		ts.iab = iab
+	}()
+
+	d, err := ioutil.ReadFile(fmt.Sprintf("%s/%s/status", *proc, pid))
+	if err != nil {
+		ts.mu.Lock()
+		defer ts.mu.Unlock()
+		ts.cmd = "<zombie>"
+		ts.parent = "1"
+		return
+	}
+	for _, line := range strings.Split(string(d), "\n") {
+		if strings.HasPrefix(line, "Name:\t") {
+			ts.mu.Lock()
+			ts.cmd = line[6:]
+			ts.mu.Unlock()
+			continue
+		}
+		if strings.HasPrefix(line, "PPid:\t") {
+			ppid := line[6:]
+			if ppid == pid {
+				continue
+			}
+			ts.mu.Lock()
+			ts.parent = ppid
+			ts.mu.Unlock()
+		}
+	}
+	if thread {
+		return
+	}
+
+	threads, err := ioutil.ReadDir(fmt.Sprintf("%s/%s/task", *proc, pid))
+	if err != nil {
+		return
+	}
+	var ths []*task
+	for _, t := range threads {
+		tid := t.Name()
+		if tid == pid {
+			continue
+		}
+		n, err := strconv.ParseInt(pid, 10, 64)
+		if err != nil {
+			continue
+		}
+		thread := &task{}
+		wg.Add(1)
+		go thread.fill(tid, int(n), true)
+		ths = append(ths, thread)
+	}
+	ts.mu.Lock()
+	defer ts.mu.Unlock()
+	ts.threads = ths
+}
+
+var empty = cap.NewSet()
+var noiab = cap.IABInit()
+
+// rDump prints out the tree of processes rooted at pid.
+func rDump(pids map[string]*task, requested map[string]bool, pid, stub, lstub, estub string, depth int) {
+	info, ok := pids[pid]
+	if !ok {
+		panic("programming error")
+		return
+	}
+	if info.viewed {
+		// This process (tree) has already been viewed so skip
+		// repeating it.
+		return
+	}
+	info.viewed = true
+
+	c := ""
+	set := info.cap
+	if set != nil {
+		if val, _ := set.Cf(empty); val != 0 || *verbose {
+			c = fmt.Sprintf(" %q", set)
+		}
+	}
+	iab := ""
+	tup := info.iab
+	if tup != nil {
+		if val, _ := tup.Cf(noiab); val.Has(cap.Bound) || val.Has(cap.Amb) || *verbose {
+			iab = fmt.Sprintf(" [%s]", tup)
+		}
+	}
+	var misc []*task
+	var same []string
+	for _, t := range info.threads {
+		if val, _ := t.cap.Cf(set); val != 0 {
+			misc = append(misc, t)
+			continue
+		}
+		if val, _ := t.iab.Cf(tup); val != 0 {
+			misc = append(misc, t)
+			continue
+		}
+		if t.cmd != info.cmd {
+			misc = append(misc, t)
+			continue
+		}
+		same = append(same, t.pid)
+	}
+	tids := ""
+	if len(same) != 0 {
+		tids = fmt.Sprintf("+{%s}", strings.Join(same, ","))
+	}
+	hPID := pid
+	if requested[pid] {
+		hPID = highlight(pid)
+		requested[pid] = false
+	}
+	fmt.Printf("%s%s%s(%s%s)%s%s\n", stub, lstub, info.cmd, hPID, tids, c, iab)
+	// loop over any threads that differ in capability state.
+	for len(misc) != 0 {
+		this := misc[0]
+		var nmisc []*task
+		var hPID = this.pid
+		if requested[this.pid] {
+			hPID = highlight(this.pid)
+			requested[this.pid] = false
+		}
+		same := []string{hPID}
+		for _, t := range misc[1:] {
+			if val, _ := this.cap.Cf(t.cap); val != 0 {
+				nmisc = append(nmisc, t)
+				continue
+			}
+			if val, _ := this.iab.Cf(t.iab); val != 0 {
+				nmisc = append(nmisc, t)
+				continue
+			}
+			if this.cmd != t.cmd {
+				nmisc = append(nmisc, t)
+				continue
+			}
+			hPID = t.pid
+			if requested[t.pid] {
+				hPID = highlight(t.pid)
+				requested[t.pid] = false
+			}
+			same = append(same, hPID)
+		}
+		c := ""
+		set := this.cap
+		if set != nil {
+			if val, _ := set.Cf(empty); val != 0 || *verbose {
+				c = fmt.Sprintf(" %q", set)
+			}
+		}
+		iab := ""
+		tup := this.iab
+		if tup != nil {
+			if val, _ := tup.Cf(noiab); val.Has(cap.Bound) || val.Has(cap.Amb) || *verbose {
+				iab = fmt.Sprintf(" [%s]", tup)
+			}
+		}
+		fmt.Printf("%s%s:>-%s{%s}%s%s\n", stub, estub, this.cmd, strings.Join(same, ","), c, iab)
+		misc = nmisc
+	}
+	if depth == 1 {
+		return
+	}
+	if depth > 1 {
+		depth--
+	}
+	x := info.children
+	sort.Slice(x, func(i, j int) bool {
+		a, _ := strconv.Atoi(x[i])
+		b, _ := strconv.Atoi(x[j])
+		return a < b
+	})
+	stub = fmt.Sprintf("%s%s", stub, estub)
+	lstub = "+-"
+	for i, cid := range x {
+		estub := "| "
+		if i+1 == len(x) {
+			estub = "  "
+		}
+		rDump(pids, requested, cid, stub, lstub, estub, depth)
+	}
+}
+
+func findPIDs(list []string, pids map[string]*task, glob string) <-chan string {
+	finds := make(chan string)
+	go func() {
+		defer close(finds)
+		found := false
+		// search for PIDs, if found exit.
+		for _, pid := range list {
+			match, _ := filepath.Match(glob, pids[pid].cmd)
+			if !match {
+				continue
+			}
+			found = true
+			finds <- pid
+		}
+		if found {
+			return
+		}
+		fmt.Printf("no process matched %q\n", glob)
+		os.Exit(1)
+	}()
+	return finds
+}
+
+func setDepth(pids map[string]*task, pid string) int {
+	if pid == "0" {
+		return 0
+	}
+	x := pids[pid]
+	if x.depth == 0 {
+		x.depth = setDepth(pids, x.parent) + 1
+	}
+	return x.depth
+}
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] [pid|glob] ...\nOptions:\n", os.Args[0])
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+
+	// Honor the command line request if possible.
+	colored = *color && *colour && isATTY()
+
+	// Just in case the user wants to override this, we set the
+	// cap package up to find it.
+	cap.ProcRoot(*proc)
+
+	pids := make(map[string]*task)
+	pids["0"] = &task{
+		cmd: "<kernel>",
+	}
+
+	// Ingest the entire process tree
+	fs, err := ioutil.ReadDir(*proc)
+	if err != nil {
+		log.Fatalf("unable to open %q: %v", *proc, err)
+	}
+	for _, f := range fs {
+		pid := f.Name()
+		n, err := strconv.ParseInt(pid, 10, 64)
+		if err != nil {
+			continue
+		}
+		ts := &task{}
+		mu.Lock()
+		pids[pid] = ts
+		mu.Unlock()
+		wg.Add(1)
+		go ts.fill(pid, int(n), false)
+	}
+	wg.Wait()
+
+	var list []string
+	for pid, ts := range pids {
+		setDepth(pids, pid)
+		list = append(list, pid)
+		if pid == "0" {
+			continue
+		}
+		if pts, ok := pids[ts.parent]; ok {
+			pts.children = append(pts.children, pid)
+		}
+	}
+
+	// Sort the process tree by tree depth - shallowest first,
+	// with numerical order breaking ties.
+	sort.Slice(list, func(i, j int) bool {
+		x, y := pids[list[i]], pids[list[j]]
+		if x.depth == y.depth {
+			a, _ := strconv.Atoi(x.pid)
+			b, _ := strconv.Atoi(y.pid)
+			return a < b
+		}
+		return x.depth < y.depth
+	})
+
+	args := flag.Args()
+	if len(args) == 0 {
+		args = []string{"1"}
+	}
+
+	wanted := make(map[string]int)
+	requested := make(map[string]bool)
+	for _, pid := range args {
+		if _, err := strconv.ParseUint(pid, 10, 64); err == nil {
+			requested[pid] = true
+			if info, ok := pids[pid]; ok {
+				wanted[pid] = info.depth
+				continue
+			}
+			if requested[pid] {
+				continue
+			}
+			requested[pid] = true
+			continue
+		}
+		for pid := range findPIDs(list, pids, pid) {
+			requested[pid] = true
+			if info, ok := pids[pid]; ok {
+				wanted[pid] = info.depth
+			}
+		}
+	}
+
+	var noted []string
+	for pid := range wanted {
+		noted = append(noted, pid)
+	}
+	sort.Slice(noted, func(i, j int) bool {
+		return wanted[noted[i]] < wanted[noted[j]]
+	})
+
+	// We've boiled down the processes to a unique set of targets.
+	for _, pid := range noted {
+		rDump(pids, requested, pid, "", "--", "  ", *depth)
+	}
+
+	for pid, missed := range requested {
+		if missed {
+			fmt.Println("[PID", pid, "not found]")
+		}
+	}
+}
diff --git a/goapps/captree/go.mod b/goapps/captree/go.mod
new file mode 100644
index 0000000..09e579c
--- /dev/null
+++ b/goapps/captree/go.mod
@@ -0,0 +1,5 @@
+module captree
+
+go 1.16
+
+require kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
diff --git a/goapps/gowns/go.mod b/goapps/gowns/go.mod
new file mode 100644
index 0000000..0e867ed
--- /dev/null
+++ b/goapps/gowns/go.mod
@@ -0,0 +1,5 @@
+module gowns
+
+go 1.15
+
+require kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
diff --git a/goapps/gowns/gowns.go b/goapps/gowns/gowns.go
new file mode 100644
index 0000000..3d26c34
--- /dev/null
+++ b/goapps/gowns/gowns.go
@@ -0,0 +1,253 @@
+// Program gowns is a small program to explore and demonstrate using
+// Go to Wrap a child in a NameSpace under Linux.
+//
+// Note, this program is under active development and should not be
+// considered stable. That is, it is more a worked example and may
+// change command line arguments and behavior from release to release.
+// Should it become stable, I'll remove this comment.
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"strings"
+	"syscall"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+// nsDetail is how we summarize the type of namespace we want to
+// enter.
+type nsDetail struct {
+	// uid holds the uid for the base user in this namespace (defaults to getuid).
+	uid int
+
+	// uidMap holds the namespace mapping of uid values.
+	uidMap []syscall.SysProcIDMap
+
+	// gid holds the gid for the base user in this namespace (defaults to getgid).
+	gid int
+
+	// uidMap holds the namespace mapping of gid values.
+	gidMap []syscall.SysProcIDMap
+}
+
+var (
+	baseID = flag.Int("base", -1, "base id for uids and gids (-1 = invoker's uid)")
+	uid    = flag.Int("uid", -1, "uid of the hosting user")
+	gid    = flag.Int("gid", -1, "gid of the hosting user")
+	iab    = flag.String("iab", "", "IAB string for inheritable capabilities")
+	mode   = flag.String("mode", "", "force a libcap mode (capsh --modes for list)")
+
+	ns   = flag.Bool("ns", false, "enable user namespace features")
+	uids = flag.String("uids", "", "comma separated UID ranges to map contiguously (req. CAP_SETUID)")
+	gids = flag.String("gids", "", "comma separated GID ranges to map contiguously (req. CAP_SETGID)")
+
+	shell = flag.String("shell", "/bin/bash", "shell to be launched")
+	debug = flag.Bool("verbose", false, "more verbose output")
+)
+
+// r holds a base and count for a contiguous range.
+type r struct {
+	base, count int
+}
+
+// ranges unpacks numerical ranges.
+func ranges(s string) []r {
+	if s == "" {
+		return nil
+	}
+	var rs []r
+	for _, n := range strings.Split(s, ",") {
+		var base, upper int
+		if _, err := fmt.Sscanf(n, "%d-%d", &base, &upper); err == nil {
+			if upper < base {
+				log.Fatalf("invalid range: [%d-%d]", base, upper)
+			}
+			rs = append(rs, r{
+				base:  base,
+				count: 1 + upper - base,
+			})
+		} else if _, err := fmt.Sscanf(n, "%d", &base); err == nil {
+			rs = append(rs, r{
+				base:  base,
+				count: 1,
+			})
+		} else {
+			log.Fatalf("unable to parse range [%s]", n)
+		}
+	}
+	return rs
+}
+
+// restart launches the program again with the remaining arguments.
+func restart() {
+	log.Fatalf("failed to restart: flags: %q %q", os.Args[0], flag.Args()[1:])
+}
+
+// errUnableToSetup is how nsSetup fails.
+var errUnableToSetup = errors.New("data was not in supported format")
+
+// nsSetup is the callback used to enter the namespace for the user
+// via callback in the cap.Launcher mechanism.
+func nsSetup(pa *syscall.ProcAttr, data interface{}) error {
+	nsD, ok := data.(nsDetail)
+	if !ok {
+		return errUnableToSetup
+	}
+
+	if pa.Sys == nil {
+		pa.Sys = &syscall.SysProcAttr{}
+	}
+	pa.Sys.Cloneflags |= syscall.CLONE_NEWUSER
+	pa.Sys.UidMappings = nsD.uidMap
+	pa.Sys.GidMappings = nsD.gidMap
+	return nil
+}
+
+func parseRanges(detail *nsDetail, ids string, id int) []syscall.SysProcIDMap {
+	base := *baseID
+	if base < 0 {
+		base = detail.uid
+	}
+
+	list := []syscall.SysProcIDMap{
+		syscall.SysProcIDMap{
+			ContainerID: base,
+			HostID:      id,
+			Size:        1,
+		},
+	}
+
+	base++
+	for _, next := range ranges(ids) {
+		list = append(list,
+			syscall.SysProcIDMap{
+				ContainerID: base,
+				HostID:      next.base,
+				Size:        next.count,
+			})
+		base += next.count
+	}
+	return list
+}
+
+func main() {
+	flag.Parse()
+
+	detail := nsDetail{
+		gid: syscall.Getgid(),
+	}
+
+	thisUID := syscall.Getuid()
+	switch *uid {
+	case -1:
+		detail.uid = thisUID
+	default:
+		detail.uid = *uid
+	}
+	detail.uidMap = parseRanges(&detail, *uids, detail.uid)
+
+	thisGID := syscall.Getgid()
+	switch *gid {
+	case -1:
+		detail.gid = thisGID
+	default:
+		detail.gid = *gid
+	}
+	detail.gidMap = parseRanges(&detail, *gids, detail.gid)
+
+	unparsed := flag.Args()
+
+	arg0 := *shell
+	skip := 0
+	var w *cap.Launcher
+	if len(unparsed) > 0 {
+		switch unparsed[0] {
+		case "==":
+			arg0 = os.Args[0]
+			skip++
+		}
+	}
+
+	w = cap.NewLauncher(arg0, append([]string{arg0}, unparsed[skip:]...), nil)
+	if *ns {
+		// Include the namespace setup callback with the launcher.
+		w.Callback(nsSetup)
+	}
+
+	if thisUID != detail.uid {
+		w.SetUID(detail.uid)
+	}
+
+	if thisGID != detail.gid {
+		w.SetGroups(detail.gid, nil)
+	}
+
+	if *iab != "" {
+		ins, err := cap.IABFromText(*iab)
+		if err != nil {
+			log.Fatalf("--iab=%q parsing issue: %v", err)
+		}
+		w.SetIAB(ins)
+	}
+
+	if *mode != "" {
+		for m := cap.Mode(1); ; m++ {
+			if s := m.String(); s == "UNKNOWN" {
+				log.Fatalf("mode %q is unknown", *mode)
+			} else if s == *mode {
+				w.SetMode(m)
+				break
+			}
+		}
+	}
+
+	// The launcher can enable more functionality if involked with
+	// effective capabilities.
+	have := cap.GetProc()
+	for _, c := range []cap.Value{cap.SETUID, cap.SETGID} {
+		if canDo, err := have.GetFlag(cap.Permitted, c); err != nil {
+			log.Fatalf("failed to explore process capabilities, %q for %q", have, c)
+		} else if canDo {
+			if err := have.SetFlag(cap.Effective, true, c); err != nil {
+				log.Fatalf("failed to raise effective capability: \"%v e+%v\"", have, c)
+			}
+		}
+	}
+	if err := have.SetProc(); err != nil {
+		log.Fatalf("privilege assertion %q failed: %v", have, err)
+	}
+
+	if *debug {
+		if *ns {
+			fmt.Println("launching namespace")
+		} else {
+			fmt.Println("launching without namespace")
+		}
+	}
+
+	pid, err := w.Launch(detail)
+	if err != nil {
+		log.Fatalf("launch failed: %v", err)
+	}
+	if err := cap.NewSet().SetProc(); err != nil {
+		log.Fatalf("gowns could not drop privilege: %v", err)
+	}
+
+	p, err := os.FindProcess(pid)
+	if err != nil {
+		log.Fatalf("cannot find process: %v", err)
+	}
+	state, err := p.Wait()
+	if err != nil {
+		log.Fatalf("waiting failed: %v", err)
+	}
+
+	if *debug {
+		fmt.Println("process exited:", state)
+	}
+}
diff --git a/goapps/setid/go.mod b/goapps/setid/go.mod
new file mode 100644
index 0000000..09595b4
--- /dev/null
+++ b/goapps/setid/go.mod
@@ -0,0 +1,8 @@
+module setid
+
+go 1.11
+
+require (
+	kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
+	kernel.org/pub/linux/libs/security/libcap/psx v1.2.69
+)
diff --git a/goapps/setid/setid.go b/goapps/setid/setid.go
new file mode 100644
index 0000000..2bbe5b0
--- /dev/null
+++ b/goapps/setid/setid.go
@@ -0,0 +1,151 @@
+// Program setid demonstrates how the to use the cap and/or psx packages to
+// change the uid, gids of a program.
+//
+// A long writeup explaining how to use it in various different ways
+// is available:
+//
+//   https://sites.google.com/site/fullycapable/Home/using-go-to-set-uid-and-gids
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"strconv"
+	"strings"
+	"syscall"
+	"unsafe"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+	"kernel.org/pub/linux/libs/security/libcap/psx"
+)
+
+var (
+	uid      = flag.Int("uid", -1, "specify a uid with a value other than (euid)")
+	gid      = flag.Int("gid", -1, "specify a gid with a value other than (egid)")
+	drop     = flag.Bool("drop", true, "drop privilege once IDs have been changed")
+	suppl    = flag.String("suppl", "", "comma separated list of groups")
+	withCaps = flag.Bool("caps", true, "raise capabilities to setuid/setgid")
+)
+
+// setIDWithCaps uses the cap.SetUID and cap.SetGroups functions.
+func setIDsWithCaps(setUID, setGID int, gids []int) {
+	if err := cap.SetGroups(setGID, gids...); err != nil {
+		log.Fatalf("group setting failed: %v", err)
+	}
+	if err := cap.SetUID(setUID); err != nil {
+		log.Fatalf("user setting failed: %v", err)
+	}
+}
+
+func main() {
+	flag.Parse()
+
+	showIDs("before", false, syscall.Getuid(), syscall.Getgid())
+
+	gids := splitToInts()
+	setGID := *gid
+	if *gid == -1 {
+		setGID = syscall.Getegid()
+	}
+	setUID := *uid
+	if *uid == -1 {
+		setUID = syscall.Getuid()
+	}
+
+	if *withCaps {
+		setIDsWithCaps(setUID, setGID, gids)
+	} else {
+		if _, _, err := psx.Syscall3(syscall.SYS_SETGID, uintptr(setGID), 0, 0); err != 0 {
+			log.Fatalf("failed to setgid(%d): %v", setGID, err)
+		}
+		if len(gids) != 0 {
+			gids32 := []int32{int32(setGID)}
+			for _, g := range gids {
+				gids32 = append(gids32, int32(g))
+			}
+			if _, _, err := psx.Syscall3(syscall.SYS_SETGROUPS, uintptr(unsafe.Pointer(&gids32[0])), 0, 0); err != 0 {
+				log.Fatalf("failed to setgroups(%d, %v): %v", setGID, gids32, err)
+			}
+		}
+		if _, _, err := psx.Syscall3(syscall.SYS_SETUID, uintptr(setUID), 0, 0); err != 0 {
+			log.Fatalf("failed to setgid(%d): %v", setUID, err)
+		}
+	}
+
+	if *drop {
+		if err := cap.NewSet().SetProc(); err != nil {
+			log.Fatalf("unable to drop privilege: %v", err)
+		}
+	}
+
+	showIDs("after", true, setUID, setGID)
+}
+
+// splitToInts parses a comma separated string to a slice of integers.
+func splitToInts() (ret []int) {
+	if *suppl == "" {
+		return
+	}
+	a := strings.Split(*suppl, ",")
+	for _, s := range a {
+		n, err := strconv.Atoi(s)
+		if err != nil {
+			log.Fatalf("bad supplementary group [%q]: %v", s, err)
+		}
+		ret = append(ret, n)
+	}
+	return
+}
+
+// dumpStatus explores the current process /proc/task/* status files
+// for matching values.
+func dumpStatus(testCase string, validate bool, filter, expect string) bool {
+	fmt.Printf("%s:\n", testCase)
+	var failed bool
+	pid := syscall.Getpid()
+	fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid))
+	if err != nil {
+		log.Fatal(err)
+	}
+	for _, f := range fs {
+		tf := fmt.Sprintf("/proc/%s/status", f.Name())
+		d, err := ioutil.ReadFile(tf)
+		if err != nil {
+			fmt.Println(tf, err)
+			failed = true
+			continue
+		}
+		lines := strings.Split(string(d), "\n")
+		for _, line := range lines {
+			if strings.HasPrefix(line, filter) {
+				fails := line != expect
+				failure := ""
+				if fails && validate {
+					failed = fails
+					failure = " (bad)"
+				}
+				fmt.Printf("%s %s%s\n", tf, line, failure)
+				break
+			}
+		}
+	}
+	return failed
+}
+
+// showIDs dumps the thread map out of the /proc/<proc>/tasks
+// filesystem to confirm that all of the threads associated with the
+// process have the same uid/gid values. Note, the code does not
+// attempt to validate the supplementary groups at present.
+func showIDs(test string, validate bool, wantUID, wantGID int) {
+	fmt.Printf("%s capability state: %q\n", test, cap.GetProc())
+
+	failed := dumpStatus(test+" gid", validate, "Gid:", fmt.Sprintf("Gid:\t%d\t%d\t%d\t%d", wantGID, wantGID, wantGID, wantGID))
+
+	failed = dumpStatus(test+" uid", validate, "Uid:", fmt.Sprintf("Uid:\t%d\t%d\t%d\t%d", wantUID, wantUID, wantUID, wantUID)) || failed
+
+	if validate && failed {
+		log.Fatal("did not observe desired *id state")
+	}
+}
diff --git a/goapps/web/README.md b/goapps/web/README.md
new file mode 100644
index 0000000..970d10e
--- /dev/null
+++ b/goapps/web/README.md
@@ -0,0 +1,28 @@
+# Web serving with/without privilege
+
+## Building
+
+This sample program needs to be built as follows (when built with Go
+prior to 1.15):
+```
+   export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+   go mod tidy
+   go build web.go
+```
+go1.15+ does not require the `CGO_LDFLAGS_ALLOW` environment variable
+and can build this code with:
+```
+   go mod tidy
+   go build web.go
+```
+
+## Further discussion
+
+A more complete walk through of what this code does is provided on the
+[Fully Capable
+website](https://sites.google.com/site/fullycapable/getting-started-with-go/building-go-programs-that-manipulate-capabilities).
+
+## Reporting bugs
+
+Go compilers prior to go1.11.13 are not expected to work. Report more
+recent issues to the [`libcap` bug tracker](https://bugzilla.kernel.org/buglist.cgi?component=libcap&list_id=1065141&product=Tools&resolution=---).
diff --git a/goapps/web/go.mod b/goapps/web/go.mod
new file mode 100644
index 0000000..054357b
--- /dev/null
+++ b/goapps/web/go.mod
@@ -0,0 +1,5 @@
+module web
+
+go 1.11
+
+require kernel.org/pub/linux/libs/security/libcap/cap v1.2.69
diff --git a/goapps/web/web.go b/goapps/web/web.go
new file mode 100644
index 0000000..5f9c5cb
--- /dev/null
+++ b/goapps/web/web.go
@@ -0,0 +1,132 @@
+// Program web provides an example of a webserver using capabilities to
+// bind to a privileged port, and then drop all capabilities before
+// handling the first web request.
+//
+// This program can be compiled CGO_ENABLED=0 with the go1.16+
+// toolchain.
+//
+// Go versions prior to 1.16 use some cgo support provided by the
+// "kernel.org/pub/linux/libs/security/libcap/psx" package.
+//
+// To set this up, compile and empower this binary as follows (the
+// README contains a pointer to a full writeup for building this
+// package - go versions prior to 1.15 need some environment variable
+// workarounds):
+//
+//   go mod init web
+//   go mod tidy
+//   go build web.go
+//   sudo setcap cap_setpcap,cap_net_bind_service=p web
+//   ./web --port=80
+//
+// Make requests using wget and observe the log of web:
+//
+//   wget -o/dev/null -O/dev/stdout localhost:80
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	"runtime"
+	"syscall"
+
+	"kernel.org/pub/linux/libs/security/libcap/cap"
+)
+
+var (
+	port     = flag.Int("port", 0, "port to listen on")
+	skipPriv = flag.Bool("skip", false, "skip raising the effective capability - will fail for low ports")
+)
+
+// ensureNotEUID aborts the program if it is running setuid something,
+// or being invoked by root.  That is, the preparer isn't setting up
+// the program correctly.
+func ensureNotEUID() {
+	euid := syscall.Geteuid()
+	uid := syscall.Getuid()
+	egid := syscall.Getegid()
+	gid := syscall.Getgid()
+	if uid != euid || gid != egid {
+		log.Fatalf("go runtime is setuid uids:(%d vs %d), gids(%d vs %d)", uid, euid, gid, egid)
+	}
+	if uid == 0 {
+		log.Fatalf("go runtime is running as root - cheating")
+	}
+}
+
+// listen creates a listener by raising effective privilege only to
+// bind to address and then lowering that effective privilege.
+func listen(network, address string) (net.Listener, error) {
+	if *skipPriv {
+		return net.Listen(network, address)
+	}
+
+	orig := cap.GetProc()
+	defer orig.SetProc() // restore original caps on exit.
+
+	c, err := orig.Dup()
+	if err != nil {
+		return nil, fmt.Errorf("failed to dup caps: %v", err)
+	}
+
+	if on, _ := c.GetFlag(cap.Permitted, cap.NET_BIND_SERVICE); !on {
+		return nil, fmt.Errorf("insufficient privilege to bind to low ports - want %q, have %q", cap.NET_BIND_SERVICE, c)
+	}
+
+	if err := c.SetFlag(cap.Effective, true, cap.NET_BIND_SERVICE); err != nil {
+		return nil, fmt.Errorf("unable to set capability: %v", err)
+	}
+
+	if err := c.SetProc(); err != nil {
+		return nil, fmt.Errorf("unable to raise capabilities %q: %v", c, err)
+	}
+	return net.Listen(network, address)
+}
+
+// Handler is used to abstract the ServeHTTP function.
+type Handler struct{}
+
+// ServeHTTP says hello from a single Go hardware thread and reveals
+// its capabilities.
+func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	runtime.LockOSThread()
+	// Get some numbers consistent to the current execution, so
+	// the returned web page demonstrates that the code execution
+	// is bouncing around on different kernel thread ids.
+	p := syscall.Getpid()
+	t := syscall.Gettid()
+	c := cap.GetProc()
+	runtime.UnlockOSThread()
+
+	log.Printf("Saying hello from proc: %d->%d, caps=%q", p, t, c)
+	fmt.Fprintf(w, "Hello from proc: %d->%d, caps=%q\n", p, t, c)
+}
+
+func main() {
+	flag.Parse()
+
+	if *port == 0 {
+		log.Fatal("please supply --port value")
+	}
+
+	ensureNotEUID()
+
+	ls, err := listen("tcp", fmt.Sprintf(":%d", *port))
+	if err != nil {
+		log.Fatalf("aborting: %v", err)
+	}
+	defer ls.Close()
+
+	if !*skipPriv {
+		if err := cap.ModeNoPriv.Set(); err != nil {
+			log.Fatalf("unable to drop all privilege: %v", err)
+		}
+	}
+
+	if err := http.Serve(ls, &Handler{}); err != nil {
+		log.Fatalf("server failed: %v", err)
+	}
+}
diff --git a/gomods.sh b/gomods.sh
new file mode 100755
index 0000000..890cccd
--- /dev/null
+++ b/gomods.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+version="${1}"
+if [[ -z "${version}" ]]; then
+    echo "usage: supply a cap/psx module version to target"
+    exit 1
+fi
+
+for x in $(find . -name 'go.mod'); do
+    sed -i -e 's@kernel.org/\([^ ]*\) v.*$@kernel.org/\1 '"${version}@" "${x}"
+done
diff --git a/kdebug/Makefile b/kdebug/Makefile
new file mode 100644
index 0000000..35a16d0
--- /dev/null
+++ b/kdebug/Makefile
@@ -0,0 +1,22 @@
+topdir=$(shell pwd)/..
+include ../Make.Rules
+
+test: exit
+	rm -f interactive
+	./test-kernel.sh
+
+shell: exit
+	touch interactive
+	./test-kernel.sh
+
+exit: exit.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $< -o $@ --static
+
+all:
+	@echo cd to kdebug to test a kernel build
+
+install:
+
+clean:
+	$(LOCALCLEAN)
+	rm -f fs.conf initramfs.img exit interactive
diff --git a/kdebug/exit.c b/kdebug/exit.c
new file mode 100644
index 0000000..a83232d
--- /dev/null
+++ b/kdebug/exit.c
@@ -0,0 +1,36 @@
+/*
+ * See https://stackoverflow.com/questions/42208228/how-to-automatically-close-the-execution-of-the-qemu-after-end-of-process
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/io.h>
+#include <unistd.h>
+
+#define SHUTDOWN_PORT 0x604
+#define EXIT_PORT     0x501
+
+static void clean_exit(void) {
+    ioperm(SHUTDOWN_PORT, 16, 1);
+    outw(0x2000, SHUTDOWN_PORT);
+}
+
+int main(int argc, char **argv) {
+    int status;
+    if (argc != 2) {
+	clean_exit();
+    }
+    status = atoi(argv[1]);
+    printf("exiting with status %d (in three seconds)\n", status);
+    sleep(3);
+    if (!status) {
+	clean_exit();
+    }
+    ioperm(EXIT_PORT, 8, 1);
+    /*
+     * status returned is 1+(2*orig_status)
+     */
+    outb(status-1, EXIT_PORT);
+    printf("didn't exit.. did you include '-device isa-debug-exit'"
+	   " in qemu command?\n");
+    exit(1);
+}
diff --git a/kdebug/test-bash.sh b/kdebug/test-bash.sh
new file mode 100644
index 0000000..2777b21
--- /dev/null
+++ b/kdebug/test-bash.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# bash is used in various headers so we need a wrapper to invoke sh
+# instead.
+exec sh "$@"
diff --git a/kdebug/test-init.sh b/kdebug/test-init.sh
new file mode 100644
index 0000000..849d9c7
--- /dev/null
+++ b/kdebug/test-init.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+PATH=/bin
+
+echo -n "Mounting filesystems ... "
+mount -t proc proc /proc
+mount -t devtmpfs dev /dev
+mount -t sysfs sys /sys
+mount -t devpts pts /dev/pts
+echo done
+
+echo Hello, World
+cd /root
+if [ -f ./interactive ]; then
+    ./quicktest.sh
+    sh -i
+else
+    ./quicktest.sh || ./exit 1
+fi
+./exit
diff --git a/kdebug/test-kernel.sh b/kdebug/test-kernel.sh
new file mode 100755
index 0000000..391bb8a
--- /dev/null
+++ b/kdebug/test-kernel.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# The following is a synthesis of info in:
+#
+#  http://vmsplice.net/~stefan/stefanha-kernel-recipes-2015.pdf
+#  http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/README
+#
+KBASE=../../linux
+#APPEND="console=ttyS0"
+
+function die {
+    echo "$*"
+    exit 1
+}
+
+pushd ..
+make LIBCSTATIC=yes all test || die "failed to make all test of libcap tree"
+make LIBCSTATIC=yes -C progs tcapsh-static || die "failed to make progs/tcapsh-static"
+make -C tests uns_test
+popd
+
+# Assumes desired make *config (eg. make defconfig) is already done.
+pushd $KBASE
+pwd
+make V=1 all || die "failed to build kernel: $0"
+popd
+
+HERE=$(/bin/pwd)
+
+cat > fs.conf <<EOF
+file /init test-init.sh 0755 0 0
+dir /etc 0755 0 0
+file /etc/passwd test-passwd 0444 0 0
+dir /lib 0755 0 0
+dir /proc 0755 0 0
+dir /dev 0755 0 0
+dir /sys 0755 0 0
+dir /sbin 0755 0 0
+file /sbin/busybox /usr/sbin/busybox 0755 0 0
+dir /bin 0755 0 0
+file /bin/myprompt test-prompt.sh 0755 0 0
+file /bin/bash test-bash.sh 0755 0 0
+dir /usr 0755 0 0
+dir /usr/bin 0755 0 0
+dir /root 0755 0 0
+file /root/quicktest.sh $HERE/../progs/quicktest.sh 0755 0 0
+file /root/setcap $HERE/../progs/setcap 0755 0 0
+file /root/getcap $HERE/../progs/getcap 0755 0 0
+file /root/capsh $HERE/../progs/capsh 0755 0 0
+file /root/getpcaps $HERE/../progs/getpcaps 0755 0 0
+file /root/tcapsh-static $HERE/../progs/tcapsh-static 0755 0 0
+file /root/exit $HERE/exit 0755 0 0
+file /root/uns_test $HERE/../tests/uns_test 0755 0 0
+EOF
+
+# convenience for some local experiments
+if [ -f "$HERE/extras.sh" ]; then
+    echo "local, uncommitted enhancements to kernel test"
+    . "$HERE/extras.sh"
+fi
+
+if [ -f "$HERE/interactive" ]; then
+    echo "file /root/interactive $HERE/interactive 0755 0 0" >> fs.conf
+fi
+
+COMMANDS="awk cat chmod cp dmesg grep id less ln ls mkdir mount pwd rm rmdir sh sort umount uniq vi"
+for f in $COMMANDS; do
+    echo slink /bin/$f /sbin/busybox 0755 0 0 >> fs.conf
+done
+
+UCOMMANDS="id cut"
+for f in $UCOMMANDS; do
+    echo slink /usr/bin/$f /sbin/busybox 0755 0 0 >> fs.conf
+done
+
+$KBASE/usr/gen_init_cpio fs.conf | gzip -9 > initramfs.img
+
+KERNEL=$KBASE/arch/$(uname -m)/boot/bzImage
+
+qemu-system-$(uname -m) -m 1024 \
+		   -kernel $KERNEL \
+		   -initrd initramfs.img \
+		   -append "$APPEND" \
+		   -smp sockets=2,dies=1,cores=4 \
+		   -device isa-debug-exit
diff --git a/kdebug/test-passwd b/kdebug/test-passwd
new file mode 100644
index 0000000..0ff71df
--- /dev/null
+++ b/kdebug/test-passwd
@@ -0,0 +1,3 @@
+root:x:0:0:root:/root:/bin/bash
+luser:x:1:1:Luser:/:/bin/bash
+nobody:x:99:99:Nobody:/:/sbin/nologin
diff --git a/kdebug/test-prompt.sh b/kdebug/test-prompt.sh
new file mode 100644
index 0000000..1c19c16
--- /dev/null
+++ b/kdebug/test-prompt.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+echo -n "$(pwd)# "
diff --git a/libcap/.gitignore b/libcap/.gitignore
index 34cc5d6..a0771d4 100644
--- a/libcap/.gitignore
+++ b/libcap/.gitignore
@@ -3,4 +3,13 @@ cap_names.list.h
 _caps_output.gperf
 libcap.a
 libcap.so*
+libpsx.a
+libpsx.so*
 _makenames
+cap_test
+libcap.pc
+libpsx.pc
+empty
+loader.txt
+cap_magic.o
+psx_magic.o
diff --git a/libcap/Makefile b/libcap/Makefile
index 20ab00f..f5dde3e 100644
--- a/libcap/Makefile
+++ b/libcap/Makefile
@@ -6,86 +6,230 @@ include ../Make.Rules
 #
 # Library version
 #
-LIBNAME=$(LIBTITLE).so
-STALIBNAME=$(LIBTITLE).a
+CAPLIBNAME=$(LIBTITLE).so
+STACAPLIBNAME=$(LIBTITLE).a
 #
+PSXTITLE=libpsx
+PSXLIBNAME=$(PSXTITLE).so
+STAPSXLIBNAME=$(PSXTITLE).a
 
-FILES=cap_alloc cap_proc cap_extint cap_flag cap_text
+CAPFILES=cap_alloc cap_proc cap_extint cap_flag cap_text cap_file
+CAPMAGICOBJ=cap_magic.o
+PSXFILES=../psx/psx
+PSXMAGICOBJ=psx_magic.o
 
-# make including file support something you can override (no libattr
-# no support).
-ifeq ($(LIBATTR),yes)
-FILES += cap_file
-LDFLAGS += -lattr
-DEPS = -lattr
-endif
+# Always build libcap sources this way:
+CFLAGS += -fPIC
+
+# The linker magic needed to build a dynamic library as independently
+# executable
+MAGIC=-Wl,-e,__so_start
 
 INCLS=libcap.h cap_names.h $(INCS)
-OBJS=$(addsuffix .o, $(FILES))
-MAJLIBNAME=$(LIBNAME).$(VERSION)
-MINLIBNAME=$(MAJLIBNAME).$(MINOR)
 GPERF_OUTPUT = _caps_output.gperf
 
-all: $(MINLIBNAME) $(STALIBNAME) libcap.pc
+CAPOBJS=$(addsuffix .o, $(CAPFILES))
+MAJCAPLIBNAME=$(CAPLIBNAME).$(VERSION)
+MINCAPLIBNAME=$(MAJCAPLIBNAME).$(MINOR)
+
+PSXOBJS=$(addsuffix .o, $(PSXFILES))
+MAJPSXLIBNAME=$(PSXLIBNAME).$(VERSION)
+MINPSXLIBNAME=$(MAJPSXLIBNAME).$(MINOR)
+
+all: pcs $(STACAPLIBNAME)
+ifeq ($(SHARED),yes)
+	$(MAKE) $(CAPLIBNAME)
+endif
+ifeq ($(PTHREADS),yes)
+	$(MAKE) $(STAPSXLIBNAME)
+ifeq ($(SHARED),yes)
+	$(MAKE) $(PSXLIBNAME)
+endif
+endif
 
-ifeq ($(shell gperf --version > /dev/null 2>&1 && echo yes),yes)
+pcs: $(LIBTITLE).pc
+ifeq ($(PTHREADS),yes)
+	$(MAKE) $(PSXTITLE).pc
+endif
+
+ifeq ($(USE_GPERF),yes)
 USE_GPERF_OUTPUT = $(GPERF_OUTPUT)
-INCLUDE_GPERF_OUTPUT = -include $(GPERF_OUTPUT)
+INCLUDE_GPERF_OUTPUT = -DINCLUDE_GPERF_OUTPUT='"$(GPERF_OUTPUT)"'
 endif
 
-libcap.pc: libcap.pc.in
-	sed -e 's,@prefix@,$(prefix),' \
+$(LIBTITLE).pc: $(LIBTITLE).pc.in
+	$(BUILD_SED) -e 's,@prefix@,$(prefix),' \
 		-e 's,@exec_prefix@,$(exec_prefix),' \
-		-e 's,@libdir@,$(lib_prefix)/$(lib),' \
+		-e 's,@libdir@,$(LIBDIR),' \
+		-e 's,@includedir@,$(inc_prefix)/include,' \
+		-e 's,@VERSION@,$(VERSION).$(MINOR),' \
+		-e 's,@deps@,$(DEPS),' \
+		$< >$@
+
+$(PSXTITLE).pc: $(PSXTITLE).pc.in
+	$(BUILD_SED) -e 's,@prefix@,$(prefix),' \
+		-e 's,@exec_prefix@,$(exec_prefix),' \
+		-e 's,@libdir@,$(LIBDIR),' \
 		-e 's,@includedir@,$(inc_prefix)/include,' \
 		-e 's,@VERSION@,$(VERSION).$(MINOR),' \
 		-e 's,@deps@,$(DEPS),' \
 		$< >$@
 
 _makenames: _makenames.c cap_names.list.h
-	$(BUILD_CC) $(BUILD_CFLAGS) $< -o $@
+	$(BUILD_CC) $(BUILD_CFLAGS) $(BUILD_CPPFLAGS) $< -o $@
 
 cap_names.h: _makenames
 	./_makenames > cap_names.h
 
-$(GPERF_OUTPUT): cap_names.list.h
-	perl -e 'print "struct __cap_token_s { const char *name; int index; };\n%{\nconst struct __cap_token_s *__cap_lookup_name(const char *, unsigned int);\n%}\n%%\n"; while ($$l = <>) { $$l =~ s/[\{\"]//g; $$l =~ s/\}.*// ; print $$l; }' < $< | gperf --ignore-case --language=ANSI-C --readonly --null-strings --global-table --hash-function-name=__cap_hash_name --lookup-function-name="__cap_lookup_name" -c -t -m20 $(INDENT) > $@
-
-cap_names.list.h: Makefile $(KERNEL_HEADERS)/linux/capability.h
-	@echo "=> making $@ from $(KERNEL_HEADERS)/linux/capability.h"
-	perl -e 'while ($$l=<>) { if ($$l =~ /^\#define[ \t](CAP[_A-Z]+)[ \t]+([0-9]+)\s+$$/) { $$tok=$$1; $$val=$$2; $$tok =~ tr/A-Z/a-z/; print "{\"$$tok\",$$val},\n"; } }' $(KERNEL_HEADERS)/linux/capability.h | fgrep -v 0x > $@
+$(GPERF_OUTPUT): cap_names.list.h cap_names.header Makefile
+	(cat cap_names.header ; $(BUILD_SED) -e 's/[\{\}"]//g' -e 's/,$$//' cap_names.list.h) | gperf --ignore-case --language=ANSI-C --readonly --null-strings --global-table --hash-function-name=__cap_hash_name --lookup-function-name="__cap_lookup_name" -c -t -m20 $(INDENT) > $@
+	$(BUILD_SED) -e 's/unsigned int len/size_t len/' -i $@
 
-$(STALIBNAME): $(OBJS)
+# Intention is that libcap keeps up with torvalds' tree, as reflected
+# by this maintained version of the kernel header. libcap dynamically
+# trims the meaning of "all" capabilities down to that of the running
+# kernel as of 2.30. That is, all production kernels should be equal
+# to or behind libcap.
+#
+# Note "./libcap.so --summary" should explain how the built libcap.so
+# compares to the running kernel.
+UAPI_HEADER := $(topdir)/libcap/include/uapi/linux/capability.h
+cap_names.list.h: Makefile $(UAPI_HEADER)
+	@echo "=> making $@ from $(UAPI_HEADER)"
+	$(BUILD_EGREP) '^#define\s+CAP_([^\s]+)\s+[0-9]+\s*$$' include/uapi/linux/capability.h | $(BUILD_SED) -e 's/^#define\s\+/{"/' -e 's/\s*$$/},/' -e 's/\s\+/",/' -e 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/' > $@
+
+$(STACAPLIBNAME): $(CAPOBJS)
 	$(AR) rcs $@ $^
 	$(RANLIB) $@
 
-$(MINLIBNAME): $(OBJS)
-	$(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJLIBNAME) -o $@ $^
-	ln -sf $(MINLIBNAME) $(MAJLIBNAME)
-	ln -sf $(MAJLIBNAME) $(LIBNAME)
+$(STAPSXLIBNAME): $(PSXOBJS) include/sys/psx_syscall.h
+	$(AR) rcs $@ $(PSXOBJS)
+	$(RANLIB) $@
+
+ifeq ($(SHARED),yes)
+
+empty: empty.c
+	$(CC) -o $@ $<
+
+loader.txt: empty
+	$(OBJCOPY) --dump-section .interp=$@ $< /dev/null
+
+cap_magic.o: execable.h execable.c loader.txt libcap.h
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DLIBRARY_VERSION=\"$(LIBTITLE)-$(VERSION).$(MINOR)\" -DSHARED_LOADER=\"$(shell cat loader.txt)\" -include ./libcap.h -c execable.c -o $@
+
+$(CAPLIBNAME) $(MAJCAPLIBNAME) $(MINCAPLIBNAME): $(CAPOBJS) $(CAPMAGICOBJ)
+	$(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJCAPLIBNAME) -o $(MINCAPLIBNAME) $^ $(MAGIC)
+	ln -sf $(MINCAPLIBNAME) $(MAJCAPLIBNAME)
+	ln -sf $(MAJCAPLIBNAME) $(CAPLIBNAME)
+
+psx_magic.o: execable.h psx_exec.c loader.txt
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DLIBRARY_VERSION=\"$(PSXTITLE)-$(VERSION).$(MINOR)\" -DSHARED_LOADER=\"$(shell cat loader.txt)\" -c psx_exec.c -o $@
+
+$(PSXLIBNAME) $(MAJPSXLIBNAME) $(MINPSXLIBNAME): $(PSXOBJS) include/sys/psx_syscall.h $(PSXMAGICOBJ)
+	$(LD) $(CFLAGS) $(LDFLAGS) -Wl,-soname,$(MAJPSXLIBNAME) -o $(MINPSXLIBNAME) $(PSXOBJS) $(PSXMAGICOBJ) $(MAGIC) $(PSXLINKFLAGS)
+	ln -sf $(MINPSXLIBNAME) $(MAJPSXLIBNAME)
+	ln -sf $(MAJPSXLIBNAME) $(PSXLIBNAME)
+endif
 
 %.o: %.c $(INCLS)
-	$(CC) $(CFLAGS) $(IPATH) -c $< -o $@
+	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
 
 cap_text.o: cap_text.c $(USE_GPERF_OUTPUT) $(INCLS)
-	$(CC) $(CFLAGS) $(IPATH) $(INCLUDE_GPERF_OUTPUT) -c $< -o $@
-
-install: all
-	mkdir -p -m 0755 $(INCDIR)/sys
-	install -m 0644 include/sys/capability.h $(INCDIR)/sys
-	mkdir -p -m 0755 $(LIBDIR)
-	install -m 0644 $(STALIBNAME) $(LIBDIR)/$(STALIBNAME)
-	install -m 0644 $(MINLIBNAME) $(LIBDIR)/$(MINLIBNAME)
-	ln -sf $(MINLIBNAME) $(LIBDIR)/$(MAJLIBNAME)
-	ln -sf $(MAJLIBNAME) $(LIBDIR)/$(LIBNAME)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(INCLUDE_GPERF_OUTPUT) -c $< -o $@
+
+cap_test: cap_test.c $(INCLS) $(CAPOBJS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(CAPOBJS) -o $@
+
+libcapsotest: $(CAPLIBNAME)
+	./$(CAPLIBNAME)
+	./$(CAPLIBNAME) --usage
+	./$(CAPLIBNAME) --help
+	./$(CAPLIBNAME) --summary
+
+libpsxsotest: $(PSXLIBNAME)
+	./$(PSXLIBNAME)
+
+test: cap_test
+	./cap_test
+ifeq ($(SHARED),yes)
+	$(MAKE) libcapsotest
+ifeq ($(PTHREADS),yes)
+	$(MAKE) libpsxsotest
+endif
+endif
+
+sudotest:
+	@echo no sudotests for libcap
+
+install: install-static
+ifeq ($(SHARED),yes)
+	$(MAKE) install-shared
+endif
+
+install-static: install-static-cap
+ifeq ($(PTHREADS),yes)
+	$(MAKE) install-static-psx
+endif
+
+install-shared: install-shared-cap
+ifeq ($(PTHREADS),yes)
+	$(MAKE) install-shared-psx
+endif
+
+install-cap: install-static-cap
+ifeq ($(SHARED),yes)
+	$(MAKE) install-shared-cap
+endif
+
+install-psx: install-static-psx
+ifeq ($(SHARED),yes)
+	$(MAKE) install-shared-psx
+endif
+
+install-static-cap: install-common-cap $(STACAPLIBNAME)
+	install -m 0644 $(STACAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(STACAPLIBNAME)
+
+install-shared-cap: install-common-cap $(MINCAPLIBNAME)
+	install -m 0755 $(MINCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MINCAPLIBNAME)
+	ln -sf $(MINCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MAJCAPLIBNAME)
+	ln -sf $(MAJCAPLIBNAME) $(FAKEROOT)$(LIBDIR)/$(CAPLIBNAME)
+ifeq ($(FAKEROOT),)
+	-/sbin/ldconfig
+endif
+
+install-static-psx: install-common-psx $(STAPSXLIBNAME)
+	install -m 0644 $(STAPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(STAPSXLIBNAME)
+
+install-shared-psx: install-common-psx $(MINPSXLIBNAME)
+	install -m 0755 $(MINPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MINPSXLIBNAME)
+	ln -sf $(MINPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(MAJPSXLIBNAME)
+	ln -sf $(MAJPSXLIBNAME) $(FAKEROOT)$(LIBDIR)/$(PSXLIBNAME)
 ifeq ($(FAKEROOT),)
 	-/sbin/ldconfig
 endif
-	mkdir -p -m 0755 $(PKGCONFIGDIR)
-	install -m 0644 libcap.pc $(PKGCONFIGDIR)/libcap.pc
+
+install-common-cap: install-common $(LIBTITLE).pc
+	install -m 0644 include/sys/capability.h $(FAKEROOT)$(INCDIR)/sys
+	install -m 0644 $(LIBTITLE).pc $(FAKEROOT)$(PKGCONFIGDIR)/$(LIBTITLE).pc
+
+include/sys/psx_syscall.h: ../psx/psx_syscall.h
+	rm -f $@
+	ln -s ../../../psx/psx_syscall.h $@
+
+install-common-psx: install-common $(PSXTITLE).pc include/sys/psx_syscall.h
+	install -m 0644 include/sys/psx_syscall.h $(FAKEROOT)$(INCDIR)/sys
+	install -m 0644 $(PSXTITLE).pc $(FAKEROOT)$(PKGCONFIGDIR)/$(PSXTITLE).pc
+
+install-common:
+	mkdir -p -m 0755 $(FAKEROOT)$(INCDIR)/sys
+	mkdir -p -m 0755 $(FAKEROOT)$(PKGCONFIGDIR)
+	mkdir -p -m 0755 $(FAKEROOT)$(LIBDIR)
 
 clean:
 	$(LOCALCLEAN)
-	rm -f $(OBJS) $(LIBNAME)* $(STALIBNAME) libcap.pc
-	rm -f cap_names.h cap_names.list.h _makenames $(GPERF_OUTPUT)
+	rm -f $(CAPOBJS) $(CAPLIBNAME)* $(STACAPLIBNAME) $(LIBTITLE).pc
+	rm -f $(PSXOBJS) $(PSXLIBNAME)* $(STAPSXLIBNAME) $(PSXTITLE).pc
+	rm -f cap_names.h cap_names.list.h _makenames $(GPERF_OUTPUT) cap_test
+	rm -f include/sys/psx_syscall.h
+	rm -f $(CAPMAGICOBJ) $(PSXMAGICOBJ) empty loader.txt
 	cd include/sys && $(LOCALCLEAN)
diff --git a/libcap/_makenames.c b/libcap/_makenames.c
index 8cc819b..30eb080 100644
--- a/libcap/_makenames.c
+++ b/libcap/_makenames.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-8 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2020 Andrew G. Morgan <morgan@kernel.org>
  *
  * This is a file to make the capability <-> string mappings for
  * libcap.
@@ -7,7 +7,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/capability.h>
+#include <string.h>
 
 /*
  * #include 'sed' generated array
@@ -21,41 +21,70 @@ struct {
     {NULL, -1}
 };
 
-/* this should be more than big enough (factor of three at least) */
-const char *pointers[8*sizeof(struct __user_cap_data_struct)];
+/*
+ * recalloc uses realloc to grow some memory but it resets the
+ * indicated extended empty space.
+ */
+static void *recalloc(void *p, int was, int is) {
+    char *n = realloc(p, is);
+    if (!n) {
+	fputs("out of memory", stderr);
+	exit(1);
+    }
+    memset(n+was, 0, is-was);
+    return n;
+}
 
 int main(void)
 {
-    int i, maxcaps=0;
+    int i, maxcaps=0, maxlength=0;
+    const char **pointers = NULL;
+    int pointers_avail = 0;
 
     for ( i=0; list[i].index >= 0 && list[i].name; ++i ) {
 	if (maxcaps <= list[i].index) {
 	    maxcaps = list[i].index + 1;
 	}
+        if (pointers == NULL || list[i].index >= pointers_avail) {
+	    int was = pointers_avail * sizeof(char *);
+	    pointers_avail = 2 * list[i].index + 1;
+	    pointers = recalloc(pointers, was, pointers_avail * sizeof(char *));
+	    if (pointers == NULL) {
+		perror("unable to continue");
+		exit(1);
+	    }
+        }
 	pointers[list[i].index] = list[i].name;
+	int n = strlen(list[i].name);
+	if (n > maxlength) {
+	    maxlength = n;
+	}
     }
 
     printf("/*\n"
 	   " * DO NOT EDIT: this file is generated automatically from\n"
 	   " *\n"
-	   " *     <linux/capability.h>\n"
-	   " */\n"
-	   "#define __CAP_BITS   %d\n"
+	   " *     <uapi/linux/capability.h>\n"
+	   " */\n\n"
+	   "#define __CAP_BITS       %d\n"
+	   "#define __CAP_NAME_SIZE  %d\n"
 	   "\n"
 	   "#ifdef LIBCAP_PLEASE_INCLUDE_ARRAY\n"
-	   "  char const *_cap_names[__CAP_BITS] = {\n", maxcaps);
+	   "#define LIBCAP_CAP_NAMES { \\\n", maxcaps, maxlength+1);
 
     for (i=0; i<maxcaps; ++i) {
-	if (pointers[i])
-	    printf("      /* %d */\t\"%s\",\n", i, pointers[i]);
-	else
-	    printf("      /* %d */\tNULL,\t\t/* - presently unused */\n", i);
+	if (pointers[i]) {
+	    printf("      /* %d */\t\"%s\", \\\n", i, pointers[i]);
+	} else {
+	    printf("      /* %d */\tNULL,\t\t/* - presently unused */ \\\n", i);
+	}
     }
 
-    printf("  };\n"
+    printf("  }\n"
 	   "#endif /* LIBCAP_PLEASE_INCLUDE_ARRAY */\n"
 	   "\n"
 	   "/* END OF FILE */\n");
 
+    free(pointers);
     exit(0);
 }
diff --git a/libcap/cap_alloc.c b/libcap/cap_alloc.c
index 525ea90..504abd2 100644
--- a/libcap/cap_alloc.c
+++ b/libcap/cap_alloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-8 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2019,2021 Andrew G Morgan <morgan@kernel.org>
  *
  * This file deals with allocation and deallocation of internal
  * capability sets as specified by POSIX.1e (formerlly, POSIX 6).
@@ -8,26 +8,65 @@
 #include "libcap.h"
 
 /*
- * Obtain a blank set of capabilities
+ * Make start up atomic.
  */
+static __u8 __libcap_mutex;
+
+/*
+ * These get set via the pre-main() executed constructor function below it.
+ */
+static cap_value_t _cap_max_bits;
+
+__attribute__((visibility ("hidden")))
+__attribute__((constructor (300))) void _libcap_initialize(void)
+{
+    int errno_saved = errno;
+    _cap_mu_lock(&__libcap_mutex);
+    if (!_cap_max_bits) {
+	cap_set_syscall(NULL, NULL);
+	_binary_search(_cap_max_bits, cap_get_bound, 0, __CAP_MAXBITS,
+		       __CAP_BITS);
+    }
+    _cap_mu_unlock(&__libcap_mutex);
+    errno = errno_saved;
+}
 
+cap_value_t cap_max_bits(void)
+{
+    return _cap_max_bits;
+}
+
+/*
+ * capability allocation is all done in terms of this structure.
+ */
+struct _cap_alloc_s {
+    __u32 magic;
+    __u32 size;
+    union {
+	struct _cap_struct set;
+	struct cap_iab_s iab;
+	struct cap_launch_s launcher;
+    } u;
+};
+
+/*
+ * Obtain a blank set of capabilities
+ */
 cap_t cap_init(void)
 {
-    __u32 *raw_data;
+    struct _cap_alloc_s *raw_data;
     cap_t result;
 
-    raw_data = malloc( sizeof(__u32) + sizeof(*result) );
-
+    raw_data = calloc(1, sizeof(struct _cap_alloc_s));
     if (raw_data == NULL) {
 	_cap_debug("out of memory");
 	errno = ENOMEM;
 	return NULL;
     }
+    raw_data->magic = CAP_T_MAGIC;
+    raw_data->size = sizeof(struct _cap_alloc_s);
 
-    *raw_data = CAP_T_MAGIC;
-    result = (cap_t) (raw_data + 1);
-    memset(result, 0, sizeof(*result));
-
+    result = &raw_data->u.set;
     result->head.version = _LIBCAP_CAPABILITY_VERSION;
     capget(&result->head, NULL);      /* load the kernel-capability version */
 
@@ -57,34 +96,47 @@ cap_t cap_init(void)
  * This is an internal library function to duplicate a string and
  * tag the result as something cap_free can handle.
  */
-
-char *_libcap_strdup(const char *old)
+__attribute__((visibility ("hidden"))) char *_libcap_strdup(const char *old)
 {
-    __u32 *raw_data;
+    struct _cap_alloc_s *header;
+    char *raw_data;
+    size_t len;
 
     if (old == NULL) {
 	errno = EINVAL;
 	return NULL;
     }
 
-    raw_data = malloc( sizeof(__u32) + strlen(old) + 1 );
+    len = strlen(old);
+    if ((len & 0x3fffffff) != len) {
+	_cap_debug("len is too long for libcap to manage");
+	errno = EINVAL;
+	return NULL;
+    }
+    len += 1 + 2*sizeof(__u32);
+    if (len < sizeof(struct _cap_alloc_s)) {
+	len = sizeof(struct _cap_alloc_s);
+    }
+
+    raw_data = calloc(1, len);
     if (raw_data == NULL) {
 	errno = ENOMEM;
 	return NULL;
     }
+    header = (void *) raw_data;
+    header->magic = CAP_S_MAGIC;
+    header->size = (__u32) len;
 
-    *(raw_data++) = CAP_S_MAGIC;
-    strcpy((char *) raw_data, old);
-
-    return ((char *) raw_data);
+    raw_data += 2*sizeof(__u32);
+    strcpy(raw_data, old);
+    return raw_data;
 }
 
 /*
  * This function duplicates an internal capability set with
- * malloc()'d memory. It is the responsibility of the user to call
+ * calloc()'d memory. It is the responsibility of the user to call
  * cap_free() to liberate it.
  */
-
 cap_t cap_dup(cap_t cap_d)
 {
     cap_t result;
@@ -101,39 +153,155 @@ cap_t cap_dup(cap_t cap_d)
 	return NULL;
     }
 
+    _cap_mu_lock(&cap_d->mutex);
     memcpy(result, cap_d, sizeof(*cap_d));
+    _cap_mu_unlock(&cap_d->mutex);
+    _cap_mu_unlock(&result->mutex);
+
+    return result;
+}
+
+cap_iab_t cap_iab_init(void)
+{
+    struct _cap_alloc_s *base = calloc(1, sizeof(struct _cap_alloc_s));
+    if (base == NULL) {
+	_cap_debug("out of memory");
+	return NULL;
+    }
+    base->magic = CAP_IAB_MAGIC;
+    base->size = sizeof(struct _cap_alloc_s);
+    return &base->u.iab;
+}
+
+/*
+ * This function duplicates an internal iab tuple with calloc()'d
+ * memory. It is the responsibility of the user to call cap_free() to
+ * liberate it.
+ */
+cap_iab_t cap_iab_dup(cap_iab_t iab)
+{
+    cap_iab_t result;
+
+    if (!good_cap_iab_t(iab)) {
+	_cap_debug("bad argument");
+	errno = EINVAL;
+	return NULL;
+    }
+
+    result = cap_iab_init();
+    if (result == NULL) {
+	_cap_debug("out of memory");
+	return NULL;
+    }
+
+    _cap_mu_lock(&iab->mutex);
+    memcpy(result, iab, sizeof(*iab));
+    _cap_mu_unlock(&iab->mutex);
+    _cap_mu_unlock(&result->mutex);
 
     return result;
 }
 
+/*
+ * cap_new_launcher allocates some memory for a launcher and
+ * initializes it.  To actually launch a program with this launcher,
+ * use cap_launch(). By default, the launcher is a no-op from a
+ * security perspective and will act just as fork()/execve()
+ * would. Use cap_launcher_setuid() etc to override this.
+ */
+cap_launch_t cap_new_launcher(const char *arg0, const char * const *argv,
+			      const char * const *envp)
+{
+    struct _cap_alloc_s *data = calloc(1, sizeof(struct _cap_alloc_s));
+    if (data == NULL) {
+	_cap_debug("out of memory");
+	return NULL;
+    }
+    data->magic = CAP_LAUNCH_MAGIC;
+    data->size = sizeof(struct _cap_alloc_s);
+
+    struct cap_launch_s *attr = &data->u.launcher;
+    attr->arg0 = arg0;
+    attr->argv = argv;
+    attr->envp = envp;
+    return attr;
+}
 
 /*
- * Scrub and then liberate an internal capability set.
+ * cap_func_launcher allocates some memory for a launcher and
+ * initializes it. The purpose of this launcher, unlike one created
+ * with cap_new_launcher(), is to execute some function code from a
+ * forked copy of the program. The forked process will exit when the
+ * callback function, func, returns.
  */
+cap_launch_t cap_func_launcher(int (callback_fn)(void *detail))
+{
+    struct _cap_alloc_s *data = calloc(1, sizeof(struct _cap_alloc_s));
+    if (data == NULL) {
+	_cap_debug("out of memory");
+	return NULL;
+    }
+    data->magic = CAP_LAUNCH_MAGIC;
+    data->size = sizeof(struct _cap_alloc_s);
+
+    struct cap_launch_s *attr = &data->u.launcher;
+    attr->custom_setup_fn = callback_fn;
+    return attr;
+}
 
+/*
+ * Scrub and then liberate the recognized allocated object.
+ */
 int cap_free(void *data_p)
 {
-    if ( !data_p )
+    if (!data_p) {
 	return 0;
+    }
 
-    if ( good_cap_t(data_p) ) {
-	data_p = -1 + (__u32 *) data_p;
-	memset(data_p, 0, sizeof(__u32) + sizeof(struct _cap_struct));
-	free(data_p);
-	data_p = NULL;
-	return 0;
+    /* confirm alignment */
+    if ((sizeof(uintptr_t)-1) & (uintptr_t) data_p) {
+	_cap_debug("whatever we're cap_free()ing it isn't aligned right: %p",
+		   data_p);
+	errno = EINVAL;
+	return -1;
     }
 
-    if ( good_cap_string(data_p) ) {
-	size_t length = strlen(data_p) + sizeof(__u32);
-     	data_p = -1 + (__u32 *) data_p;
-     	memset(data_p, 0, length);
-     	free(data_p);
-     	data_p = NULL;
-     	return 0;
+    void *base = (void *) (-2 + (__u32 *) data_p);
+    struct _cap_alloc_s *data = base;
+    switch (data->magic) {
+    case CAP_T_MAGIC:
+	_cap_mu_lock(&data->u.set.mutex);
+	break;
+    case CAP_S_MAGIC:
+    case CAP_IAB_MAGIC:
+	break;
+    case CAP_LAUNCH_MAGIC:
+	if (data->u.launcher.iab != NULL) {
+	    _cap_mu_unlock(&data->u.launcher.iab->mutex);
+	    if (cap_free(data->u.launcher.iab) != 0) {
+		return -1;
+	    }
+	}
+	data->u.launcher.iab = NULL;
+	if (cap_free(data->u.launcher.chroot) != 0) {
+	    return -1;
+	}
+	data->u.launcher.chroot = NULL;
+	break;
+    default:
+	_cap_debug("don't recognize what we're supposed to liberate");
+	errno = EINVAL;
+	return -1;
     }
 
-    _cap_debug("don't recognize what we're supposed to liberate");
-    errno = EINVAL;
-    return -1;
+    /*
+     * operate here with respect to base, to avoid tangling with the
+     * automated buffer overflow detection.
+     */
+    memset(base, 0, data->size);
+    free(base);
+    data_p = NULL;
+    data = NULL;
+    base = NULL;
+    return 0;
 }
diff --git a/libcap/cap_extint.c b/libcap/cap_extint.c
index 7d6e7ad..462cc65 100644
--- a/libcap/cap_extint.c
+++ b/libcap/cap_extint.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-8 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2021 Andrew G. Morgan <morgan@kernel.org>
  *
  * This file deals with exchanging internal and external
  * representations of capability sets.
@@ -15,6 +15,11 @@
 #define CAP_EXT_MAGIC_SIZE 4
 const static __u8 external_magic[CAP_EXT_MAGIC_SIZE+1] = CAP_EXT_MAGIC;
 
+/*
+ * This is the largest size libcap can currently export.
+ * cap_size() may return something smaller depending on the
+ * content of its argument cap_t.
+ */
 struct cap_ext_struct {
     __u8 magic[CAP_EXT_MAGIC_SIZE];
     __u8 length_of_capset;
@@ -26,12 +31,54 @@ struct cap_ext_struct {
 };
 
 /*
- * return size of external capability set
+ * minimum exported flag size: libcap2 has always exported with flags
+ * this size.
  */
+static size_t _libcap_min_ext_flag_size = CAP_SET_SIZE < 8 ? CAP_SET_SIZE : 8;
 
-ssize_t cap_size(cap_t caps)
+static ssize_t _cap_size_locked(cap_t cap_d)
 {
-    return ssizeof(struct cap_ext_struct);
+    size_t j, used;
+    for (j=used=0; j<CAP_SET_SIZE; j+=sizeof(__u32)) {
+	int i;
+	__u32 val = 0;
+	for (i=0; i<NUMBER_OF_CAP_SETS; ++i) {
+	    val |= cap_d->u[j/sizeof(__u32)].flat[i];
+	}
+	if (val == 0) {
+	    continue;
+	}
+	if (val > 0x0000ffff) {
+	    if (val > 0x00ffffff) {
+		used = j+4;
+	    } else {
+		used = j+3;
+	    }
+	} else if (val > 0x000000ff) {
+	    used = j+2;
+	} else {
+	    used = j+1;
+	}
+    }
+    if (used < _libcap_min_ext_flag_size) {
+	used = _libcap_min_ext_flag_size;
+    }
+    return (ssize_t)(CAP_EXT_MAGIC_SIZE + 1+ NUMBER_OF_CAP_SETS * used);
+}
+
+/*
+ * return size of external capability set
+ */
+ssize_t cap_size(cap_t cap_d)
+{
+    size_t used;
+    if (!good_cap_t(cap_d)) {
+	return ssizeof(struct cap_ext_struct);
+    }
+    _cap_mu_lock(&cap_d->mutex);
+    used = _cap_size_locked(cap_d);
+    _cap_mu_unlock(&cap_d->mutex);
+    return used;
 }
 
 /*
@@ -43,42 +90,58 @@ ssize_t cap_size(cap_t caps)
 ssize_t cap_copy_ext(void *cap_ext, cap_t cap_d, ssize_t length)
 {
     struct cap_ext_struct *result = (struct cap_ext_struct *) cap_ext;
+    ssize_t csz, len_set;
     int i;
 
     /* valid arguments? */
-    if (!good_cap_t(cap_d) || length < ssizeof(struct cap_ext_struct)
-	|| cap_ext == NULL) {
+    if (!good_cap_t(cap_d) || cap_ext == NULL) {
 	errno = EINVAL;
 	return -1;
     }
 
+    _cap_mu_lock(&cap_d->mutex);
+    csz = _cap_size_locked(cap_d);
+    if (csz > length) {
+	errno = EINVAL;
+	_cap_mu_unlock_return(&cap_d->mutex, -1);
+    }
+    len_set = (csz - (CAP_EXT_MAGIC_SIZE+1))/NUMBER_OF_CAP_SETS;
+
     /* fill external capability set */
     memcpy(&result->magic, external_magic, CAP_EXT_MAGIC_SIZE);
-    result->length_of_capset = CAP_SET_SIZE;
+    result->length_of_capset = len_set;
 
     for (i=0; i<NUMBER_OF_CAP_SETS; ++i) {
 	size_t j;
-	for (j=0; j<CAP_SET_SIZE; ) {
+	for (j=0; j<len_set; ) {
 	    __u32 val;
 
 	    val = cap_d->u[j/sizeof(__u32)].flat[i];
 
-	    result->bytes[j++][i] =  val        & 0xFF;
-	    result->bytes[j++][i] = (val >>= 8) & 0xFF;
-	    result->bytes[j++][i] = (val >>= 8) & 0xFF;
-	    result->bytes[j++][i] = (val >> 8)  & 0xFF;
+	    result->bytes[j++][i] =      val        & 0xFF;
+	    if (j < len_set) {
+		result->bytes[j++][i] = (val >>= 8) & 0xFF;
+	    }
+	    if (j < len_set) {
+		result->bytes[j++][i] = (val >>= 8) & 0xFF;
+	    }
+	    if (j < len_set) {
+		result->bytes[j++][i] = (val >> 8)  & 0xFF;
+	    }
 	}
     }
 
     /* All done: return length of external representation */
-    return (ssizeof(struct cap_ext_struct));
+    _cap_mu_unlock_return(&cap_d->mutex, csz);
 }
 
 /*
  * Import an external representation to produce an internal rep.
  * the internal rep should be liberated with cap_free().
+ *
+ * Note, this function assumes that cap_ext has a valid length. That
+ * is, feeding garbage to this function will likely crash the program.
  */
-
 cap_t cap_copy_int(const void *cap_ext)
 {
     const struct cap_ext_struct *export =
@@ -121,3 +184,24 @@ cap_t cap_copy_int(const void *cap_ext)
     return cap_d;
 }
 
+/*
+ * This function is the same as cap_copy_int() although it requires an
+ * extra argument that is the length of the cap_ext data. Before
+ * running cap_copy_int() the function validates that length is
+ * consistent with the stated length. It returns NULL on error.
+ */
+cap_t cap_copy_int_check(const void *cap_ext, ssize_t length)
+{
+    const struct cap_ext_struct *export =
+	(const struct cap_ext_struct *) cap_ext;
+
+    if (length < 1+CAP_EXT_MAGIC_SIZE) {
+	errno = EINVAL;
+	return NULL;
+    }
+    if (length < 1+CAP_EXT_MAGIC_SIZE + export->length_of_capset * NUMBER_OF_CAP_SETS) {
+	errno = EINVAL;
+	return NULL;
+    }
+    return cap_copy_int(cap_ext);
+}
diff --git a/libcap/cap_file.c b/libcap/cap_file.c
index d3dc1d0..0bc07f7 100644
--- a/libcap/cap_file.c
+++ b/libcap/cap_file.c
@@ -1,16 +1,46 @@
 /*
- * Copyright (c) 1997,2007 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997,2007,2016 Andrew G Morgan <morgan@kernel.org>
  *
- * This file deals with setting capabilities on files.
+ * This file deals with getting/setting capabilities from/on files.
  */
 
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
 #include <sys/types.h>
-#include <sys/xattr.h>
 #include <byteswap.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
+/*
+ * We hardcode the prototypes for the Linux system calls here since
+ * there are no libcap library APIs that expose the user to these
+ * details, and that way we don't need to force clients to link any
+ * other libraries to access them.
+ */
+extern ssize_t getxattr(const char *, const char *, void *, size_t);
+extern ssize_t fgetxattr(int, const char *, void *, size_t);
+extern int setxattr(const char *, const char *, const void *, size_t, int);
+extern int fsetxattr(int, const char *, const void *, size_t, int);
+extern int removexattr(const char *, const char *);
+extern int fremovexattr(int, const char *);
+
+/*
+ * This public API was moved to include/uapi/linux/xattr.h . For just
+ * these definitions, it isn't really worth managing this in our build
+ * system with yet another copy of a header file. We just, provide
+ * fallback definitions here.
+ */
+#ifndef XATTR_CAPS_SUFFIX
+#define XATTR_CAPS_SUFFIX "capability"
+#endif
+#ifndef XATTR_SECURITY_PREFIX
 #define XATTR_SECURITY_PREFIX "security."
+#endif
+#ifndef XATTR_NAME_CAPS
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+#endif
 
 #include "libcap.h"
 
@@ -26,7 +56,7 @@
 #define FIXUP_32BITS(x) (x)
 #endif
 
-static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
+static cap_t _fcaps_load(struct vfs_ns_cap_data *rawvfscap, cap_t result,
 			 int bytes)
 {
     __u32 magic_etc;
@@ -34,19 +64,21 @@ static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
 
     magic_etc = FIXUP_32BITS(rawvfscap->magic_etc);
     switch (magic_etc & VFS_CAP_REVISION_MASK) {
-#ifdef VFS_CAP_REVISION_1
     case VFS_CAP_REVISION_1:
 	tocopy = VFS_CAP_U32_1;
 	bytes -= XATTR_CAPS_SZ_1;
 	break;
-#endif
 
-#ifdef VFS_CAP_REVISION_2
     case VFS_CAP_REVISION_2:
 	tocopy = VFS_CAP_U32_2;
 	bytes -= XATTR_CAPS_SZ_2;
 	break;
-#endif
+
+    case VFS_CAP_REVISION_3:
+	tocopy = VFS_CAP_U32_3;
+	bytes -= XATTR_CAPS_SZ_3;
+	result->rootid = FIXUP_32BITS(rawvfscap->rootid);
+	break;
 
     default:
 	cap_free(result);
@@ -84,7 +116,7 @@ static cap_t _fcaps_load(struct vfs_cap_data *rawvfscap, cap_t result,
     return result;
 }
 
-static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
+static int _fcaps_save(struct vfs_ns_cap_data *rawvfscap, cap_t cap_d,
 		       int *bytes_p)
 {
     __u32 eff_not_zero, magic;
@@ -94,35 +126,37 @@ static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
 	errno = EINVAL;
 	return -1;
     }
+    _cap_mu_lock(&cap_d->mutex);
 
     switch (cap_d->head.version) {
-#ifdef _LINUX_CAPABILITY_VERSION_1
     case _LINUX_CAPABILITY_VERSION_1:
 	magic = VFS_CAP_REVISION_1;
 	tocopy = VFS_CAP_U32_1;
 	*bytes_p = XATTR_CAPS_SZ_1;
 	break;
-#endif
 
-#ifdef _LINUX_CAPABILITY_VERSION_2
     case _LINUX_CAPABILITY_VERSION_2:
-	magic = VFS_CAP_REVISION_2;
-	tocopy = VFS_CAP_U32_2;
-	*bytes_p = XATTR_CAPS_SZ_2;
-	break;
-#endif
-
-#ifdef _LINUX_CAPABILITY_VERSION_3
     case _LINUX_CAPABILITY_VERSION_3:
 	magic = VFS_CAP_REVISION_2;
 	tocopy = VFS_CAP_U32_2;
 	*bytes_p = XATTR_CAPS_SZ_2;
 	break;
-#endif
 
     default:
 	errno = EINVAL;
-	return -1;
+	_cap_mu_unlock_return(&cap_d->mutex, -1);
+    }
+
+    if (cap_d->rootid != 0) {
+	if (cap_d->head.version < _LINUX_CAPABILITY_VERSION_3) {
+	    _cap_debug("namespaces with non-0 rootid unsupported by kernel");
+	    errno = EINVAL;
+	    _cap_mu_unlock_return(&cap_d->mutex, -1);
+	}
+	magic = VFS_CAP_REVISION_3;
+	tocopy = VFS_CAP_U32_3;
+	*bytes_p = XATTR_CAPS_SZ_3;
+	rawvfscap->rootid = FIXUP_32BITS(cap_d->rootid);
     }
 
     _cap_debug("setting named file capabilities");
@@ -138,7 +172,7 @@ static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
 	     * System does not support these capabilities
 	     */
 	    errno = EINVAL;
-	    return -1;
+	    _cap_mu_unlock_return(&cap_d->mutex, -1);
 	}
 	i++;
     }
@@ -154,7 +188,7 @@ static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
 		& (cap_d->u[i].flat[CAP_PERMITTED]
 		   | cap_d->u[i].flat[CAP_INHERITABLE]))) {
 	    errno = EINVAL;
-	    return -1;
+	    _cap_mu_unlock_return(&cap_d->mutex, -1);
 	}
     }
 
@@ -164,7 +198,7 @@ static int _fcaps_save(struct vfs_cap_data *rawvfscap, cap_t cap_d,
 	rawvfscap->magic_etc = FIXUP_32BITS(magic|VFS_CAP_FLAGS_EFFECTIVE);
     }
 
-    return 0;      /* success */
+    _cap_mu_unlock_return(&cap_d->mutex, 0);    /* success */
 }
 
 /*
@@ -179,7 +213,7 @@ cap_t cap_get_fd(int fildes)
     /* allocate a new capability set */
     result = cap_init();
     if (result) {
-	struct vfs_cap_data rawvfscap;
+	struct vfs_ns_cap_data rawvfscap;
 	int sizeofcaps;
 
 	_cap_debug("getting fildes capabilities");
@@ -209,7 +243,7 @@ cap_t cap_get_file(const char *filename)
     /* allocate a new capability set */
     result = cap_init();
     if (result) {
-	struct vfs_cap_data rawvfscap;
+	struct vfs_ns_cap_data rawvfscap;
 	int sizeofcaps;
 
 	_cap_debug("getting filename capabilities");
@@ -229,13 +263,31 @@ cap_t cap_get_file(const char *filename)
 }
 
 /*
+ * Get rootid as seen in the current user namespace for the file capability
+ * sets.
+ */
+
+uid_t cap_get_nsowner(cap_t cap_d)
+{
+    uid_t nsowner;
+    if (!good_cap_t(cap_d)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&cap_d->mutex);
+    nsowner = cap_d->rootid;
+    _cap_mu_unlock(&cap_d->mutex);
+    return nsowner;
+}
+
+/*
  * Set the capabilities of an open file, as specified by its file
  * descriptor.
  */
 
 int cap_set_fd(int fildes, cap_t cap_d)
 {
-    struct vfs_cap_data rawvfscap;
+    struct vfs_ns_cap_data rawvfscap;
     int sizeofcaps;
     struct stat buf;
 
@@ -267,7 +319,7 @@ int cap_set_fd(int fildes, cap_t cap_d)
 
 int cap_set_file(const char *filename, cap_t cap_d)
 {
-    struct vfs_cap_data rawvfscap;
+    struct vfs_ns_cap_data rawvfscap;
     int sizeofcaps;
     struct stat buf;
 
@@ -292,6 +344,20 @@ int cap_set_file(const char *filename, cap_t cap_d)
     return setxattr(filename, XATTR_NAME_CAPS, &rawvfscap, sizeofcaps, 0);
 }
 
+/*
+ * Set nsowner for the file capability set.
+ */
+int cap_set_nsowner(cap_t cap_d, uid_t rootuid)
+{
+    if (!good_cap_t(cap_d)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&cap_d->mutex);
+    cap_d->rootid = rootuid;
+    _cap_mu_unlock_return(&cap_d->mutex, 0);
+}
+
 #else /* ie. ndef VFS_CAP_U32 */
 
 cap_t cap_get_fd(int fildes)
@@ -306,6 +372,12 @@ cap_t cap_get_file(const char *filename)
     return NULL;
 }
 
+uid_t cap_get_nsowner(cap_t cap_d)
+{
+    errno = EINVAL;
+    return -1;
+}
+
 int cap_set_fd(int fildes, cap_t cap_d)
 {
     errno = EINVAL;
@@ -318,4 +390,10 @@ int cap_set_file(const char *filename, cap_t cap_d)
     return -1;
 }
 
+int cap_set_nsowner(cap_t cap_d, uid_t rootuid)
+{
+    errno = EINVAL;
+    return -1;
+}
+
 #endif /* def VFS_CAP_U32 */
diff --git a/libcap/cap_flag.c b/libcap/cap_flag.c
index 52ec3b3..94afd1e 100644
--- a/libcap/cap_flag.c
+++ b/libcap/cap_flag.c
@@ -1,8 +1,10 @@
 /*
- * Copyright (c) 1997-8,2008 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2008,20-21 Andrew G. Morgan <morgan@kernel.org>
  *
  * This file deals with flipping of capabilities on internal
  * capability sets as specified by POSIX.1e (formerlly, POSIX 6).
+ *
+ * It also contains similar code for bit flipping cap_iab_t values.
  */
 
 #include "libcap.h"
@@ -12,7 +14,6 @@
  * returned as the contents of *raised.  The capability is from one of
  * the sets stored in cap_d as specified by set and value
  */
-
 int cap_get_flag(cap_t cap_d, cap_value_t value, cap_flag_t set,
 		 cap_flag_value_t *raised)
 {
@@ -21,9 +22,11 @@ int cap_get_flag(cap_t cap_d, cap_value_t value, cap_flag_t set,
      * Is it a known capability?
      */
 
-    if (raised && good_cap_t(cap_d) && value >= 0 && value < __CAP_BITS
+    if (raised && good_cap_t(cap_d) && value >= 0 && value < __CAP_MAXBITS
 	&& set >= 0 && set < NUMBER_OF_CAP_SETS) {
+	_cap_mu_lock(&cap_d->mutex);
 	*raised = isset_cap(cap_d,value,set) ? CAP_SET:CAP_CLEAR;
+	_cap_mu_unlock(&cap_d->mutex);
 	return 0;
     } else {
 	_cap_debug("invalid arguments");
@@ -45,12 +48,13 @@ int cap_set_flag(cap_t cap_d, cap_flag_t set,
      * Is it a known capability?
      */
 
-    if (good_cap_t(cap_d) && no_values > 0 && no_values <= __CAP_BITS
+    if (good_cap_t(cap_d) && no_values > 0 && no_values < __CAP_MAXBITS
 	&& (set >= 0) && (set < NUMBER_OF_CAP_SETS)
 	&& (raise == CAP_SET || raise == CAP_CLEAR) ) {
 	int i;
+	_cap_mu_lock(&cap_d->mutex);
 	for (i=0; i<no_values; ++i) {
-	    if (array_values[i] < 0 || array_values[i] >= __CAP_BITS) {
+	    if (array_values[i] < 0 || array_values[i] >= __CAP_MAXBITS) {
 		_cap_debug("weird capability (%d) - skipped", array_values[i]);
 	    } else {
 		int value = array_values[i];
@@ -62,14 +66,12 @@ int cap_set_flag(cap_t cap_d, cap_flag_t set,
 		}
 	    }
 	}
+	_cap_mu_unlock(&cap_d->mutex);
 	return 0;
-
     } else {
-
 	_cap_debug("invalid arguments");
 	errno = EINVAL;
 	return -1;
-
     }
 }
 
@@ -80,16 +82,14 @@ int cap_set_flag(cap_t cap_d, cap_flag_t set,
 int cap_clear(cap_t cap_d)
 {
     if (good_cap_t(cap_d)) {
-
+	_cap_mu_lock(&cap_d->mutex);
 	memset(&(cap_d->u), 0, sizeof(cap_d->u));
+	_cap_mu_unlock(&cap_d->mutex);
 	return 0;
-
     } else {
-
 	_cap_debug("invalid pointer");
 	errno = EINVAL;
 	return -1;
-
     }
 }
 
@@ -106,9 +106,11 @@ int cap_clear_flag(cap_t cap_d, cap_flag_t flag)
 	if (good_cap_t(cap_d)) {
 	    unsigned i;
 
+	    _cap_mu_lock(&cap_d->mutex);
 	    for (i=0; i<_LIBCAP_CAPABILITY_U32S; i++) {
 		cap_d->u[i].flat[flag] = 0;
 	    }
+	    _cap_mu_unlock(&cap_d->mutex);
 	    return 0;
 	}
 	/*
@@ -125,7 +127,6 @@ int cap_clear_flag(cap_t cap_d, cap_flag_t flag)
 /*
  * Compare two capability sets
  */
-
 int cap_compare(cap_t a, cap_t b)
 {
     unsigned i;
@@ -137,6 +138,15 @@ int cap_compare(cap_t a, cap_t b)
 	return -1;
     }
 
+    /*
+     * To avoid a deadlock corner case, we operate on an unlocked
+     * private copy of b
+     */
+    b = cap_dup(b);
+    if (b == NULL) {
+	return -1;
+    }
+    _cap_mu_lock(&a->mutex);
     for (i=0, result=0; i<_LIBCAP_CAPABILITY_U32S; i++) {
 	result |=
 	    ((a->u[i].flat[CAP_EFFECTIVE] != b->u[i].flat[CAP_EFFECTIVE])
@@ -146,5 +156,214 @@ int cap_compare(cap_t a, cap_t b)
 	    | ((a->u[i].flat[CAP_PERMITTED] != b->u[i].flat[CAP_PERMITTED])
 	       ? LIBCAP_PER : 0);
     }
+    _cap_mu_unlock(&a->mutex);
+    cap_free(b);
+    return result;
+}
+
+/*
+ * cap_fill_flag copies a bit-vector of capability state in one cap_t from one
+ * flag to another flag of another cap_t.
+ */
+int cap_fill_flag(cap_t cap_d, cap_flag_t to, cap_t ref, cap_flag_t from)
+{
+    int i;
+    cap_t orig;
+
+    if (!good_cap_t(cap_d) || !good_cap_t(ref)) {
+	errno = EINVAL;
+	return -1;
+    }
+
+    if (to < CAP_EFFECTIVE || to > CAP_INHERITABLE ||
+	from < CAP_EFFECTIVE || from > CAP_INHERITABLE) {
+	errno = EINVAL;
+	return -1;
+    }
+
+    orig = cap_dup(ref);
+    if (orig == NULL) {
+	return -1;
+    }
+
+    _cap_mu_lock(&cap_d->mutex);
+    for (i = 0; i < _LIBCAP_CAPABILITY_U32S; i++) {
+	cap_d->u[i].flat[to] = orig->u[i].flat[from];
+    }
+    _cap_mu_unlock(&cap_d->mutex);
+
+    cap_free(orig);
+    return 0;
+}
+
+/*
+ * cap_fill copies a bit-vector of capability state in a cap_t from
+ * one flag to another.
+ */
+int cap_fill(cap_t cap_d, cap_flag_t to, cap_flag_t from)
+{
+    return cap_fill_flag(cap_d, to, cap_d, from);
+}
+
+/*
+ * cap_iab_get_vector reads the single bit value from an IAB vector set.
+ */
+cap_flag_value_t cap_iab_get_vector(cap_iab_t iab, cap_iab_vector_t vec,
+				    cap_value_t bit)
+{
+    if (!good_cap_iab_t(iab) || bit >= cap_max_bits()) {
+	return 0;
+    }
+
+    unsigned o = (bit >> 5);
+    __u32 mask = 1u << (bit & 31);
+    cap_flag_value_t ret;
+
+    _cap_mu_lock(&iab->mutex);
+    switch (vec) {
+    case CAP_IAB_INH:
+	ret = !!(iab->i[o] & mask);
+	break;
+    case CAP_IAB_AMB:
+	ret = !!(iab->a[o] & mask);
+	break;
+    case CAP_IAB_BOUND:
+	ret = !!(iab->nb[o] & mask);
+	break;
+    default:
+	ret = 0;
+    }
+    _cap_mu_unlock(&iab->mutex);
+
+    return ret;
+}
+
+/*
+ * cap_iab_set_vector sets the bits in an IAB to the value
+ * raised. Note, setting A implies setting I too, lowering I implies
+ * lowering A too.  The B bits are, however, independently settable.
+ */
+int cap_iab_set_vector(cap_iab_t iab, cap_iab_vector_t vec, cap_value_t bit,
+		       cap_flag_value_t raised)
+{
+    if (!good_cap_iab_t(iab) || (raised >> 1) || bit >= cap_max_bits()) {
+	errno = EINVAL;
+	return -1;
+    }
+
+    unsigned o = (bit >> 5);
+    __u32 on = 1u << (bit & 31);
+    __u32 mask = ~on;
+
+    _cap_mu_lock(&iab->mutex);
+    switch (vec) {
+    case CAP_IAB_INH:
+	iab->i[o] = (iab->i[o] & mask) | (raised ? on : 0);
+	iab->a[o] &= iab->i[o];
+	break;
+    case CAP_IAB_AMB:
+	iab->a[o] = (iab->a[o] & mask) | (raised ? on : 0);
+	iab->i[o] |= iab->a[o];
+	break;
+    case CAP_IAB_BOUND:
+	iab->nb[o] = (iab->nb[o] & mask) | (raised ? on : 0);
+	break;
+    default:
+	errno = EINVAL;
+	_cap_mu_unlock_return(&iab->mutex, -1);
+    }
+
+    _cap_mu_unlock(&iab->mutex);
+    return 0;
+}
+
+/*
+ * cap_iab_fill copies a bit-vector of capability state from a cap_t
+ * to a cap_iab_t. Note, because the bounding bits in an iab are to be
+ * dropped when applied, the copying process, when to a CAP_IAB_BOUND
+ * vector involves inverting the bits. Also, adjusting I will mask
+ * bits in A, and adjusting A may implicitly raise bits in I.
+ */
+int cap_iab_fill(cap_iab_t iab, cap_iab_vector_t vec,
+		 cap_t cap_d, cap_flag_t flag)
+{
+    int i, ret = 0;
+
+    if (!good_cap_t(cap_d) || !good_cap_iab_t(iab)) {
+	errno = EINVAL;
+	return -1;
+    }
+
+    switch (flag) {
+    case CAP_EFFECTIVE:
+    case CAP_INHERITABLE:
+    case CAP_PERMITTED:
+	break;
+    default:
+	errno = EINVAL;
+	return -1;
+    }
+
+    /*
+     * Make a private copy so we don't need to hold two locks at once
+     * avoiding a recipe for a deadlock.
+     */
+    cap_d = cap_dup(cap_d);
+    if (cap_d == NULL) {
+	return -1;
+    }
+
+    _cap_mu_lock(&iab->mutex);
+    for (i = 0; !ret && i < _LIBCAP_CAPABILITY_U32S; i++) {
+	switch (vec) {
+	case CAP_IAB_INH:
+	    iab->i[i] = cap_d->u[i].flat[flag];
+	    iab->a[i] &= iab->i[i];
+	    break;
+	case CAP_IAB_AMB:
+	    iab->a[i] = cap_d->u[i].flat[flag];
+	    iab->i[i] |= cap_d->u[i].flat[flag];
+	    break;
+	case CAP_IAB_BOUND:
+	    iab->nb[i] = ~cap_d->u[i].flat[flag];
+	    break;
+	default:
+	    errno = EINVAL;
+	    ret = -1;
+	    break;
+	}
+    }
+    _cap_mu_unlock(&iab->mutex);
+
+    cap_free(cap_d);
+    return ret;
+}
+
+/*
+ * cap_iab_compare compares two iab tuples.
+ */
+int cap_iab_compare(cap_iab_t a, cap_iab_t b)
+{
+    int j, result;
+    if (!(good_cap_iab_t(a) && good_cap_iab_t(b))) {
+	_cap_debug("invalid arguments");
+	errno = EINVAL;
+	return -1;
+    }
+    b = cap_iab_dup(b);
+    if (b == NULL) {
+	return -1;
+    }
+
+    _cap_mu_lock(&a->mutex);
+    for (j=0, result=0; j<_LIBCAP_CAPABILITY_U32S; j++) {
+	result |=
+	    (a->i[j] == b->i[j] ? 0 : (1 << CAP_IAB_INH)) |
+	    (a->a[j] == b->a[j] ? 0 : (1 << CAP_IAB_AMB)) |
+	    (a->nb[j] == b->nb[j] ? 0 : (1 << CAP_IAB_BOUND));
+    }
+    _cap_mu_unlock(&a->mutex);
+    cap_free(b);
+
     return result;
 }
diff --git a/libcap/cap_names.header b/libcap/cap_names.header
new file mode 100644
index 0000000..8d64f64
--- /dev/null
+++ b/libcap/cap_names.header
@@ -0,0 +1,5 @@
+struct __cap_token_s { const char *name; int index; };
+%{
+const struct __cap_token_s *__cap_lookup_name(const char *, size_t);
+%}
+%%
diff --git a/libcap/cap_proc.c b/libcap/cap_proc.c
index 8ecb57a..24bc274 100644
--- a/libcap/cap_proc.c
+++ b/libcap/cap_proc.c
@@ -1,13 +1,170 @@
 /*
- * Copyright (c) 1997-8,2007,2011 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2007,11,19-21 Andrew G Morgan <morgan@kernel.org>
  *
  * This file deals with getting and setting capabilities on processes.
  */
 
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h>              /* Obtain O_* constant definitions */
+#include <grp.h>
 #include <sys/prctl.h>
+#include <sys/securebits.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 
 #include "libcap.h"
 
+/*
+ * libcap uses this abstraction for all system calls that change
+ * kernel managed capability state. This permits the user to redirect
+ * it for testing and also to better implement posix semantics when
+ * using pthreads.
+ */
+
+static long int _cap_syscall3(long int syscall_nr,
+			      long int arg1, long int arg2, long int arg3)
+{
+    return syscall(syscall_nr, arg1, arg2, arg3);
+}
+
+static long int _cap_syscall6(long int syscall_nr,
+			      long int arg1, long int arg2, long int arg3,
+			      long int arg4, long int arg5, long int arg6)
+{
+    return syscall(syscall_nr, arg1, arg2, arg3, arg4, arg5, arg6);
+}
+
+/*
+ * to keep the structure of the code conceptually similar in C and Go
+ * implementations, we introduce this abstraction for invoking state
+ * writing system calls. In psx+pthreaded code, the fork
+ * implementation provided by nptl ensures that we can consistently
+ * use the multithreaded syscalls even in the child after a fork().
+ */
+struct syscaller_s {
+    long int (*three)(long int syscall_nr,
+		      long int arg1, long int arg2, long int arg3);
+    long int (*six)(long int syscall_nr,
+		    long int arg1, long int arg2, long int arg3,
+		    long int arg4, long int arg5, long int arg6);
+};
+
+/* use this syscaller for multi-threaded code */
+static struct syscaller_s multithread = {
+    .three = _cap_syscall3,
+    .six = _cap_syscall6
+};
+
+/* use this syscaller for single-threaded code */
+static struct syscaller_s singlethread = {
+    .three = _cap_syscall3,
+    .six = _cap_syscall6
+};
+
+/*
+ * This gets reset to 0 if we are *not* linked with libpsx.
+ */
+static int _libcap_overrode_syscalls = 1;
+
+/*
+ * psx_load_syscalls() is weakly defined so we can have it overridden
+ * by libpsx if that library is linked. Specifically, when libcap
+ * calls psx_load_sycalls() it is prepared to override the default
+ * values for the syscalls that libcap uses to change security state.
+ * As can be seen here this present function is mostly a
+ * no-op. However, if libpsx is linked, the one present in that
+ * library (not being weak) will replace this one and the
+ * _libcap_overrode_syscalls value isn't forced to zero.
+ *
+ * Note: we hardcode the prototype for the psx_load_syscalls()
+ * function here so the compiler isn't worried. If we force the build
+ * to include the header, we are close to requiring the optional
+ * libpsx to be linked.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+					      long int, long int, long int),
+		       long int (**syscall6_fn)(long int,
+						long int, long int, long int,
+						long int, long int, long int));
+
+__attribute__((weak))
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+					       long int, long int, long int),
+		       long int (**syscall6_fn)(long int,
+						long int, long int, long int,
+						long int, long int, long int))
+{
+    _libcap_overrode_syscalls = 0;
+}
+
+/*
+ * cap_set_syscall overrides the state setting syscalls that libcap does.
+ * Generally, you don't need to call this manually: libcap tries hard to
+ * set things up appropriately.
+ */
+void cap_set_syscall(long int (*new_syscall)(long int,
+					     long int, long int, long int),
+			    long int (*new_syscall6)(long int, long int,
+						     long int, long int,
+						     long int, long int,
+						     long int)) {
+    if (new_syscall == NULL) {
+	psx_load_syscalls(&multithread.three, &multithread.six);
+    } else {
+	multithread.three = new_syscall;
+	multithread.six = new_syscall6;
+    }
+}
+
+static int _libcap_capset(struct syscaller_s *sc,
+			  cap_user_header_t header, const cap_user_data_t data)
+{
+    if (_libcap_overrode_syscalls) {
+	return sc->three(SYS_capset, (long int) header, (long int) data, 0);
+    }
+    return capset(header, data);
+}
+
+static int _libcap_wprctl3(struct syscaller_s *sc,
+			   long int pr_cmd, long int arg1, long int arg2)
+{
+    if (_libcap_overrode_syscalls) {
+	int result;
+	result = sc->three(SYS_prctl, pr_cmd, arg1, arg2);
+	if (result >= 0) {
+	    return result;
+	}
+	errno = -result;
+	return -1;
+    }
+    return prctl(pr_cmd, arg1, arg2, 0, 0, 0);
+}
+
+static int _libcap_wprctl6(struct syscaller_s *sc,
+			   long int pr_cmd, long int arg1, long int arg2,
+			   long int arg3, long int arg4, long int arg5)
+{
+    if (_libcap_overrode_syscalls) {
+	int result;
+	result = sc->six(SYS_prctl, pr_cmd, arg1, arg2, arg3, arg4, arg5);
+	if (result >= 0) {
+	    return result;
+	}
+	errno = -result;
+	return -1;
+    }
+    return prctl(pr_cmd, arg1, arg2, arg3, arg4, arg5);
+}
+
+/*
+ * cap_get_proc obtains the capability set for the current process.
+ */
 cap_t cap_get_proc(void)
 {
     cap_t result;
@@ -27,8 +184,7 @@ cap_t cap_get_proc(void)
     return result;
 }
 
-int cap_set_proc(cap_t cap_d)
-{
+static int _cap_set_proc(struct syscaller_s *sc, cap_t cap_d) {
     int retval;
 
     if (!good_cap_t(cap_d)) {
@@ -37,11 +193,18 @@ int cap_set_proc(cap_t cap_d)
     }
 
     _cap_debug("setting process capabilities");
-    retval = capset(&cap_d->head, &cap_d->u[0].set);
+    _cap_mu_lock(&cap_d->mutex);
+    retval = _libcap_capset(sc, &cap_d->head, &cap_d->u[0].set);
+    _cap_mu_unlock(&cap_d->mutex);
 
     return retval;
 }
 
+int cap_set_proc(cap_t cap_d)
+{
+    return _cap_set_proc(&multithread, cap_d);
+}
+
 /* the following two functions are not required by POSIX */
 
 /* read the caps on a specific process */
@@ -57,9 +220,11 @@ int capgetp(pid_t pid, cap_t cap_d)
 
     _cap_debug("getting process capabilities for proc %d", pid);
 
+    _cap_mu_lock(&cap_d->mutex);
     cap_d->head.pid = pid;
     error = capget(&cap_d->head, &cap_d->u[0].set);
     cap_d->head.pid = 0;
+    _cap_mu_unlock(&cap_d->mutex);
 
     return error;
 }
@@ -85,7 +250,11 @@ cap_t cap_get_pid(pid_t pid)
     return result;
 }
 
-/* set the caps on a specific process/pg etc.. */
+/*
+ * set the caps on a specific process/pg etc.. The kernel has long
+ * since deprecated this asynchronous interface. DON'T EXPECT THIS TO
+ * EVER WORK AGAIN.
+ */
 
 int capsetp(pid_t pid, cap_t cap_d)
 {
@@ -97,30 +266,857 @@ int capsetp(pid_t pid, cap_t cap_d)
     }
 
     _cap_debug("setting process capabilities for proc %d", pid);
+    _cap_mu_lock(&cap_d->mutex);
     cap_d->head.pid = pid;
     error = capset(&cap_d->head, &cap_d->u[0].set);
     cap_d->head.version = _LIBCAP_CAPABILITY_VERSION;
     cap_d->head.pid = 0;
+    _cap_mu_unlock(&cap_d->mutex);
 
     return error;
 }
 
+/* the kernel api requires unsigned long arguments */
+#define pr_arg(x) ((unsigned long) x)
+
 /* get a capability from the bounding set */
 
 int cap_get_bound(cap_value_t cap)
 {
-    int result;
+    return prctl(PR_CAPBSET_READ, pr_arg(cap), pr_arg(0));
+}
 
-    result = prctl(PR_CAPBSET_READ, cap);
-    return result;
+static int _cap_drop_bound(struct syscaller_s *sc, cap_value_t cap)
+{
+    return _libcap_wprctl3(sc, PR_CAPBSET_DROP, pr_arg(cap), pr_arg(0));
 }
 
 /* drop a capability from the bounding set */
 
-int cap_drop_bound(cap_value_t cap)
+int cap_drop_bound(cap_value_t cap) {
+    return _cap_drop_bound(&multithread, cap);
+}
+
+/* get a capability from the ambient set */
+
+int cap_get_ambient(cap_value_t cap)
 {
     int result;
-
-    result = prctl(PR_CAPBSET_DROP, cap);
+    result = prctl(PR_CAP_AMBIENT, pr_arg(PR_CAP_AMBIENT_IS_SET),
+		   pr_arg(cap), pr_arg(0), pr_arg(0));
+    if (result < 0) {
+	errno = -result;
+	return -1;
+    }
     return result;
 }
+
+static int _cap_set_ambient(struct syscaller_s *sc,
+			    cap_value_t cap, cap_flag_value_t set)
+{
+    int val;
+    switch (set) {
+    case CAP_SET:
+	val = PR_CAP_AMBIENT_RAISE;
+	break;
+    case CAP_CLEAR:
+	val = PR_CAP_AMBIENT_LOWER;
+	break;
+    default:
+	errno = EINVAL;
+	return -1;
+    }
+    return _libcap_wprctl6(sc, PR_CAP_AMBIENT, pr_arg(val), pr_arg(cap),
+			   pr_arg(0), pr_arg(0), pr_arg(0));
+}
+
+/*
+ * cap_set_ambient modifies a single ambient capability value.
+ */
+int cap_set_ambient(cap_value_t cap, cap_flag_value_t set)
+{
+    return _cap_set_ambient(&multithread, cap, set);
+}
+
+static int _cap_reset_ambient(struct syscaller_s *sc)
+{
+    int olderrno = errno;
+    cap_value_t c;
+    int result = 0;
+
+    for (c = 0; !result; c++) {
+	result = cap_get_ambient(c);
+	if (result == -1) {
+	    errno = olderrno;
+	    return 0;
+	}
+    }
+
+    return _libcap_wprctl6(sc, PR_CAP_AMBIENT,
+			   pr_arg(PR_CAP_AMBIENT_CLEAR_ALL),
+			   pr_arg(0), pr_arg(0), pr_arg(0), pr_arg(0));
+}
+
+/*
+ * cap_reset_ambient erases all ambient capabilities - this reads the
+ * ambient caps before performing the erase to workaround the corner
+ * case where the set is empty already but the ambient cap API is
+ * locked.
+ */
+int cap_reset_ambient(void)
+{
+    return _cap_reset_ambient(&multithread);
+}
+
+/*
+ * Read the security mode of the current process.
+ */
+unsigned cap_get_secbits(void)
+{
+    return (unsigned) prctl(PR_GET_SECUREBITS, pr_arg(0), pr_arg(0));
+}
+
+static int _cap_set_secbits(struct syscaller_s *sc, unsigned bits)
+{
+    return _libcap_wprctl3(sc, PR_SET_SECUREBITS, bits, 0);
+}
+
+/*
+ * Set the secbits of the current process.
+ */
+int cap_set_secbits(unsigned bits)
+{
+    return _cap_set_secbits(&multithread, bits);
+}
+
+/*
+ * Attempt to raise the no new privs prctl value.
+ */
+static void _cap_set_no_new_privs(struct syscaller_s *sc)
+{
+    (void) _libcap_wprctl6(sc, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0);
+}
+
+/*
+ * cap_prctl performs a prctl() 6 argument call on the current
+ * thread. Use cap_prctlw() if you want to perform a POSIX semantics
+ * prctl() system call.
+ */
+int cap_prctl(long int pr_cmd, long int arg1, long int arg2,
+	      long int arg3, long int arg4, long int arg5)
+{
+    return prctl(pr_cmd, arg1, arg2, arg3, arg4, arg5);
+}
+
+/*
+ * cap_prctlw performs a POSIX semantics prctl() call. That is a 6 arg
+ * prctl() call that executes on all available threads when libpsx is
+ * linked. The suffix 'w' refers to the fact one only ever needs to
+ * invoke this is if the call will write some kernel state.
+ */
+int cap_prctlw(long int pr_cmd, long int arg1, long int arg2,
+	       long int arg3, long int arg4, long int arg5)
+{
+    return _libcap_wprctl6(&multithread, pr_cmd, arg1, arg2, arg3, arg4, arg5);
+}
+
+/*
+ * Some predefined constants
+ */
+#define CAP_SECURED_BITS_BASIC                                 \
+    (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED |                    \
+     SECBIT_NO_SETUID_FIXUP | SECBIT_NO_SETUID_FIXUP_LOCKED |  \
+     SECBIT_KEEP_CAPS_LOCKED)
+
+#define CAP_SECURED_BITS_AMBIENT  (CAP_SECURED_BITS_BASIC |    \
+     SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED)
+
+static cap_value_t raise_cap_setpcap[] = {CAP_SETPCAP};
+
+static int _cap_set_mode(struct syscaller_s *sc, cap_mode_t flavor)
+{
+    int ret;
+    unsigned secbits = CAP_SECURED_BITS_AMBIENT;
+    cap_t working = cap_get_proc();
+
+    if (working == NULL) {
+	_cap_debug("getting current process' capabilities failed");
+	return -1;
+    }
+
+    ret = cap_set_flag(working, CAP_EFFECTIVE, 1, raise_cap_setpcap, CAP_SET) |
+	_cap_set_proc(sc, working);
+    if (ret == 0) {
+	cap_flag_t c;
+
+	switch (flavor) {
+	case CAP_MODE_NOPRIV:
+	    /* fall through */
+	case CAP_MODE_PURE1E_INIT:
+	    (void) cap_clear_flag(working, CAP_INHERITABLE);
+	    /* fall through */
+	case CAP_MODE_PURE1E:
+	    if (!CAP_AMBIENT_SUPPORTED()) {
+		secbits = CAP_SECURED_BITS_BASIC;
+	    } else {
+		ret = _cap_reset_ambient(sc);
+		if (ret) {
+		    break; /* ambient dropping failed */
+		}
+	    }
+	    ret = _cap_set_secbits(sc, secbits);
+	    if (flavor != CAP_MODE_NOPRIV) {
+		break;
+	    }
+
+	    /* just for "case CAP_MODE_NOPRIV:" */
+
+	    for (c = 0; cap_get_bound(c) >= 0; c++) {
+		(void) _cap_drop_bound(sc, c);
+	    }
+	    (void) cap_clear_flag(working, CAP_PERMITTED);
+
+	    /* for good measure */
+	    _cap_set_no_new_privs(sc);
+	    break;
+	case CAP_MODE_HYBRID:
+	    ret = _cap_set_secbits(sc, 0);
+	    break;
+	default:
+	    errno = EINVAL;
+	    ret = -1;
+	    break;
+	}
+    }
+
+    (void) cap_clear_flag(working, CAP_EFFECTIVE);
+    ret = _cap_set_proc(sc, working) | ret;
+    (void) cap_free(working);
+    return ret;
+}
+
+/*
+ * cap_set_mode locks the overarching capability framework of the
+ * present process and thus its children to a predefined flavor. Once
+ * set, these modes cannot be undone by the affected process tree and
+ * can only be done by "cap_setpcap" permitted processes. Note, a side
+ * effect of this function, whether it succeeds or fails, is to clear
+ * at least the CAP_EFFECTIVE flags for the current process.
+ */
+int cap_set_mode(cap_mode_t flavor)
+{
+    return _cap_set_mode(&multithread, flavor);
+}
+
+/*
+ * cap_get_mode attempts to determine what the current capability mode
+ * is. If it can find no match in the libcap pre-defined modes, it
+ * returns CAP_MODE_UNCERTAIN.
+ */
+cap_mode_t cap_get_mode(void)
+{
+    unsigned secbits = cap_get_secbits();
+
+    if (secbits == 0) {
+	return CAP_MODE_HYBRID;
+    }
+    if ((secbits & CAP_SECURED_BITS_BASIC) != CAP_SECURED_BITS_BASIC) {
+	return CAP_MODE_UNCERTAIN;
+    }
+
+    /* validate ambient is not set */
+    int olderrno = errno;
+    int ret = 0, cf;
+    cap_value_t c;
+    for (c = 0; !ret; c++) {
+	ret = cap_get_ambient(c);
+	if (ret == -1) {
+	    errno = olderrno;
+	    if (c && secbits != CAP_SECURED_BITS_AMBIENT) {
+		return CAP_MODE_UNCERTAIN;
+	    }
+	    ret = 0;
+	    break;
+	}
+	if (ret) {
+	    return CAP_MODE_UNCERTAIN;
+	}
+    }
+
+    /*
+     * Explore how capabilities differ from empty.
+     */
+    cap_t working = cap_get_proc();
+    cap_t empty = cap_init();
+    if (working == NULL || empty == NULL) {
+	_cap_debug("working=%p, empty=%p - need both non-NULL", working, empty);
+	ret = -1;
+    } else {
+	cf = cap_compare(empty, working);
+    }
+    cap_free(empty);
+    cap_free(working);
+    if (ret != 0) {
+	return CAP_MODE_UNCERTAIN;
+    }
+
+    if (CAP_DIFFERS(cf, CAP_INHERITABLE)) {
+	return CAP_MODE_PURE1E;
+    }
+    if (CAP_DIFFERS(cf, CAP_PERMITTED) || CAP_DIFFERS(cf, CAP_EFFECTIVE)) {
+	return CAP_MODE_PURE1E_INIT;
+    }
+
+    for (c = 0; ; c++) {
+	int v = cap_get_bound(c);
+	if (v == -1) {
+	    break;
+	}
+	if (v) {
+	    return CAP_MODE_PURE1E_INIT;
+	}
+    }
+
+    return CAP_MODE_NOPRIV;
+}
+
+static int _cap_setuid(struct syscaller_s *sc, uid_t uid)
+{
+    const cap_value_t raise_cap_setuid[] = {CAP_SETUID};
+    cap_t working = cap_get_proc();
+    if (working == NULL) {
+	return -1;
+    }
+
+    (void) cap_set_flag(working, CAP_EFFECTIVE,
+			1, raise_cap_setuid, CAP_SET);
+    /*
+     * Note, we are cognizant of not using glibc's setuid in the case
+     * that we've modified the way libcap is doing setting
+     * syscalls. This is because prctl needs to be working in a POSIX
+     * compliant way for the code below to work, so we are either
+     * all-broken or not-broken and don't allow for "sort of working".
+     */
+    (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 1, 0);
+    int ret = _cap_set_proc(sc, working);
+    if (ret == 0) {
+	if (_libcap_overrode_syscalls) {
+	    ret = sc->three(SYS_setuid, (long int) uid, 0, 0);
+	    if (ret < 0) {
+		errno = -ret;
+		ret = -1;
+	    }
+	} else {
+	    ret = setuid(uid);
+	}
+    }
+    int olderrno = errno;
+    (void) _libcap_wprctl3(sc, PR_SET_KEEPCAPS, 0, 0);
+    (void) cap_clear_flag(working, CAP_EFFECTIVE);
+    (void) _cap_set_proc(sc, working);
+    (void) cap_free(working);
+
+    errno = olderrno;
+    return ret;
+}
+
+/*
+ * cap_setuid attempts to set the uid of the process without dropping
+ * any permitted capabilities in the process. A side effect of a call
+ * to this function is that the effective set will be cleared by the
+ * time the function returns.
+ */
+int cap_setuid(uid_t uid)
+{
+    return _cap_setuid(&multithread, uid);
+}
+
+#if defined(__arm__) || defined(__i386__) || \
+    defined(__i486__) || defined(__i586__) || defined(__i686__)
+#define sys_setgroups_variant  SYS_setgroups32
+#else
+#define sys_setgroups_variant  SYS_setgroups
+#endif
+
+static int _cap_setgroups(struct syscaller_s *sc,
+			  gid_t gid, size_t ngroups, const gid_t groups[])
+{
+    const cap_value_t raise_cap_setgid[] = {CAP_SETGID};
+    cap_t working = cap_get_proc();
+    if (working == NULL) {
+	return -1;
+    }
+
+    (void) cap_set_flag(working, CAP_EFFECTIVE,
+			1, raise_cap_setgid, CAP_SET);
+    /*
+     * Note, we are cognizant of not using glibc's setgid etc in the
+     * case that we've modified the way libcap is doing setting
+     * syscalls. This is because prctl needs to be working in a POSIX
+     * compliant way for the other functions of this file so we are
+     * all-broken or not-broken and don't allow for "sort of working".
+     */
+    int ret = _cap_set_proc(sc, working);
+    if (_libcap_overrode_syscalls) {
+	if (ret == 0) {
+	    ret = sc->three(SYS_setgid, (long int) gid, 0, 0);
+	}
+	if (ret == 0) {
+	    ret = sc->three(sys_setgroups_variant, (long int) ngroups,
+			    (long int) groups, 0);
+	}
+	if (ret < 0) {
+	    errno = -ret;
+	    ret = -1;
+	}
+    } else {
+	if (ret == 0) {
+	    ret = setgid(gid);
+	}
+	if (ret == 0) {
+	    ret = setgroups(ngroups, groups);
+	}
+    }
+    int olderrno = errno;
+
+    (void) cap_clear_flag(working, CAP_EFFECTIVE);
+    (void) _cap_set_proc(sc, working);
+    (void) cap_free(working);
+
+    errno = olderrno;
+    return ret;
+}
+
+/*
+ * cap_setgroups combines setting the gid with changing the set of
+ * supplemental groups for a user into one call that raises the needed
+ * capabilities to do it for the duration of the call. A side effect
+ * of a call to this function is that the effective set will be
+ * cleared by the time the function returns.
+ */
+int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[])
+{
+    return _cap_setgroups(&multithread, gid, ngroups, groups);
+}
+
+/*
+ * cap_iab_get_proc returns a cap_iab_t value initialized by the
+ * current process state related to these iab bits.
+ */
+cap_iab_t cap_iab_get_proc(void)
+{
+    cap_iab_t iab;
+    cap_t current;
+
+    iab = cap_iab_init();
+    if (iab == NULL) {
+	_cap_debug("no memory for IAB tuple");
+	return NULL;
+    }
+
+    current = cap_get_proc();
+    if (current == NULL) {
+	_cap_debug("no memory for cap_t");
+	cap_free(iab);
+	return NULL;
+    }
+
+    cap_iab_fill(iab, CAP_IAB_INH, current, CAP_INHERITABLE);
+    cap_free(current);
+
+    cap_value_t c;
+    for (c = cap_max_bits(); c; ) {
+	--c;
+	int o = c >> 5;
+	__u32 mask = 1U << (c & 31);
+	if (cap_get_bound(c) == 0) {
+	    iab->nb[o] |= mask;
+	}
+	if (cap_get_ambient(c) == 1) {
+	    iab->a[o] |= mask;
+	}
+    }
+    return iab;
+}
+
+/*
+ * _cap_iab_set_proc sets the iab collection using the requested syscaller.
+ * The iab value is locked by the caller.
+ */
+static int _cap_iab_set_proc(struct syscaller_s *sc, cap_iab_t iab)
+{
+    int ret, i, raising = 0;
+    cap_value_t c;
+    cap_t working, temp = cap_get_proc();
+
+    if (temp == NULL) {
+	return -1;
+    }
+
+    for (i = 0; i < _LIBCAP_CAPABILITY_U32S; i++) {
+	__u32 newI = iab->i[i];
+	__u32 oldIP = temp->u[i].flat[CAP_INHERITABLE] |
+	    temp->u[i].flat[CAP_PERMITTED];
+	raising |= (newI & ~oldIP) | iab->a[i] | iab->nb[i];
+	temp->u[i].flat[CAP_INHERITABLE] = newI;
+
+    }
+
+    working = cap_dup(temp);
+    if (working == NULL) {
+	ret = -1;
+	goto defer;
+    }
+    if (raising) {
+	ret = cap_set_flag(working, CAP_EFFECTIVE,
+			   1, raise_cap_setpcap, CAP_SET);
+	if (ret) {
+	    goto defer;
+	}
+    }
+    if ((ret = _cap_set_proc(sc, working))) {
+	goto defer;
+    }
+    if ((ret = _cap_reset_ambient(sc))) {
+	goto done;
+    }
+
+    for (c = cap_max_bits(); c-- != 0; ) {
+	unsigned offset = c >> 5;
+	__u32 mask = 1U << (c & 31);
+	if (iab->a[offset] & mask) {
+	    ret = _cap_set_ambient(sc, c, CAP_SET);
+	    if (ret) {
+		goto done;
+	    }
+	}
+	if (iab->nb[offset] & mask) {
+	    /* drop the bounding bit */
+	    ret = _cap_drop_bound(sc, c);
+	    if (ret) {
+		goto done;
+	    }
+	}
+    }
+
+done:
+    (void) cap_set_proc(temp);
+
+defer:
+    cap_free(working);
+    cap_free(temp);
+
+    return ret;
+}
+
+/*
+ * cap_iab_set_proc sets the iab capability vectors of the current
+ * process.
+ */
+int cap_iab_set_proc(cap_iab_t iab)
+{
+    int retval;
+    if (!good_cap_iab_t(iab)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&iab->mutex);
+    retval = _cap_iab_set_proc(&multithread, iab);
+    _cap_mu_unlock(&iab->mutex);
+    return retval;
+}
+
+/*
+ * cap_launcher_callback primes the launcher with a callback that will
+ * be invoked after the fork() but before any privilege has changed
+ * and before the execve(). This can be used to augment the state of
+ * the child process within the cap_launch() process. You can cancel
+ * any callback associated with a launcher by calling this function
+ * with a callback_fn value NULL.
+ *
+ * If the callback function returns anything other than 0, it is
+ * considered to have failed and the launch will be aborted - further,
+ * errno will be communicated to the parent.
+ */
+int cap_launcher_callback(cap_launch_t attr, int (callback_fn)(void *detail))
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+    attr->custom_setup_fn = callback_fn;
+    _cap_mu_unlock(&attr->mutex);
+    return 0;
+}
+
+/*
+ * cap_launcher_setuid primes the launcher to attempt a change of uid.
+ */
+int cap_launcher_setuid(cap_launch_t attr, uid_t uid)
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+    attr->uid = uid;
+    attr->change_uids = 1;
+    _cap_mu_unlock(&attr->mutex);
+    return 0;
+}
+
+/*
+ * cap_launcher_setgroups primes the launcher to attempt a change of
+ * gid and groups.
+ */
+int cap_launcher_setgroups(cap_launch_t attr, gid_t gid,
+			   int ngroups, const gid_t *groups)
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+    attr->gid = gid;
+    attr->ngroups = ngroups;
+    attr->groups = groups;
+    attr->change_gids = 1;
+    _cap_mu_unlock(&attr->mutex);
+    return 0;
+}
+
+/*
+ * cap_launcher_set_mode primes the launcher to attempt a change of
+ * mode.
+ */
+int cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor)
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+    attr->mode = flavor;
+    attr->change_mode = 1;
+    _cap_mu_unlock(&attr->mutex);
+    return 0;
+}
+
+/*
+ * cap_launcher_set_iab primes the launcher to attempt to change the
+ * IAB values of the launched child. The launcher locks iab while it
+ * is owned by the launcher: this prevents the user from
+ * asynchronously changing its value while it is associated with the
+ * launcher.
+ */
+cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab)
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return NULL;
+    }
+    _cap_mu_lock(&attr->mutex);
+    cap_iab_t old = attr->iab;
+    attr->iab = iab;
+    if (old != NULL) {
+	_cap_mu_unlock(&old->mutex);
+    }
+    if (iab != NULL) {
+	_cap_mu_lock(&iab->mutex);
+    }
+    _cap_mu_unlock(&attr->mutex);
+    return old;
+}
+
+/*
+ * cap_launcher_set_chroot sets the intended chroot for the launched
+ * child.
+ */
+int cap_launcher_set_chroot(cap_launch_t attr, const char *chroot)
+{
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+    attr->chroot = _libcap_strdup(chroot);
+    _cap_mu_unlock(&attr->mutex);
+    return 0;
+}
+
+static int _cap_chroot(struct syscaller_s *sc, const char *root)
+{
+    const cap_value_t raise_cap_sys_chroot[] = {CAP_SYS_CHROOT};
+    cap_t working = cap_get_proc();
+    if (working == NULL) {
+	return -1;
+    }
+
+    (void) cap_set_flag(working, CAP_EFFECTIVE,
+			1, raise_cap_sys_chroot, CAP_SET);
+    int ret = _cap_set_proc(sc, working);
+    if (ret == 0) {
+	if (_libcap_overrode_syscalls) {
+	    ret = sc->three(SYS_chroot, (long int) root, 0, 0);
+	    if (ret < 0) {
+		errno = -ret;
+		ret = -1;
+	    }
+	} else {
+	    ret = chroot(root);
+	}
+	if (ret == 0) {
+	    ret = chdir("/");
+	}
+    }
+    int olderrno = errno;
+    (void) cap_clear_flag(working, CAP_EFFECTIVE);
+    (void) _cap_set_proc(sc, working);
+    (void) cap_free(working);
+
+    errno = olderrno;
+    return ret;
+}
+
+/*
+ * _cap_launch is invoked in the forked child, it cannot return but is
+ * required to exit, if the execve fails. It will write the errno
+ * value for any failure over the filedescriptor, fd, and exit with
+ * status 1.
+ */
+__attribute__ ((noreturn))
+static void _cap_launch(int fd, cap_launch_t attr, void *detail) {
+    struct syscaller_s *sc = &singlethread;
+    int my_errno;
+
+    if (attr->custom_setup_fn && attr->custom_setup_fn(detail)) {
+	goto defer;
+    }
+    if (attr->arg0 == NULL) {
+	/* handle the successful cap_func_launcher completion */
+	exit(0);
+    }
+
+    if (attr->change_uids && _cap_setuid(sc, attr->uid)) {
+	goto defer;
+    }
+    if (attr->change_gids &&
+	_cap_setgroups(sc, attr->gid, attr->ngroups, attr->groups)) {
+	goto defer;
+    }
+    if (attr->change_mode && _cap_set_mode(sc, attr->mode)) {
+	goto defer;
+    }
+    if (attr->iab && _cap_iab_set_proc(sc, attr->iab)) {
+	goto defer;
+    }
+    if (attr->chroot != NULL && _cap_chroot(sc, attr->chroot)) {
+	goto defer;
+    }
+
+    /*
+     * Some type wrangling to work around what the kernel API really
+     * means: not "const char **".
+     */
+    const void *temp_args = attr->argv;
+    const void *temp_envp = attr->envp;
+
+    execve(attr->arg0, temp_args, temp_envp);
+    /* if the exec worked, execution will not reach here */
+
+defer:
+    /*
+     * getting here means an error has occurred and errno is
+     * communicated to the parent
+     */
+    my_errno = errno;
+    for (;;) {
+	int n = write(fd, &my_errno, sizeof(my_errno));
+	if (n < 0 && errno == EAGAIN) {
+	    continue;
+	}
+	break;
+    }
+    close(fd);
+    exit(1);
+}
+
+/*
+ * cap_launch performs a wrapped fork+(callback and/or exec) that
+ * works in both an unthreaded environment and also where libcap is
+ * linked with psx+pthreads. The function supports dropping privilege
+ * in the forked thread, but retaining privilege in the parent
+ * thread(s).
+ *
+ * When applying the IAB vector inside the fork, since the ambient set
+ * is fragile with respect to changes in I or P, the function
+ * carefully orders setting of these inheritable characteristics, to
+ * make sure they stick.
+ *
+ * This function will return an error of -1 setting errno if the
+ * launch failed.
+ */
+pid_t cap_launch(cap_launch_t attr, void *detail) {
+    int my_errno;
+    int ps[2];
+    pid_t child;
+
+    if (!good_cap_launch_t(attr)) {
+	errno = EINVAL;
+	return -1;
+    }
+    _cap_mu_lock(&attr->mutex);
+
+    /* The launch must have a purpose */
+    if (attr->custom_setup_fn == NULL &&
+	(attr->arg0 == NULL || attr->argv == NULL)) {
+	errno = EINVAL;
+	_cap_mu_unlock_return(&attr->mutex, -1);
+    }
+
+    if (pipe2(ps, O_CLOEXEC) != 0) {
+	_cap_mu_unlock_return(&attr->mutex, -1);
+    }
+
+    child = fork();
+    my_errno = errno;
+
+    if (!child) {
+	close(ps[0]);
+	prctl(PR_SET_NAME, "cap-launcher", 0, 0, 0);
+	_cap_launch(ps[1], attr, detail);
+	/* no return from above function */
+    }
+
+    /* child has its own copy, and parent no longer needs it locked. */
+    _cap_mu_unlock(&attr->mutex);
+    close(ps[1]);
+    if (child < 0) {
+	goto defer;
+    }
+
+    /*
+     * Extend this function's return codes to include setup failures
+     * in the child.
+     */
+    for (;;) {
+	int ignored;
+	int n = read(ps[0], &my_errno, sizeof(my_errno));
+	if (n == 0) {
+	    goto defer;
+	}
+	if (n < 0 && errno == EAGAIN) {
+	    continue;
+	}
+	waitpid(child, &ignored, 0);
+	child = -1;
+	my_errno = ECHILD;
+	break;
+    }
+
+defer:
+    close(ps[0]);
+    errno = my_errno;
+    return child;
+}
diff --git a/libcap/cap_test.c b/libcap/cap_test.c
new file mode 100644
index 0000000..68b6a13
--- /dev/null
+++ b/libcap/cap_test.c
@@ -0,0 +1,299 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+
+#include "libcap.h"
+
+static cap_value_t top;
+
+static int cf(cap_value_t x)
+{
+    return top - x - 1;
+}
+
+static int test_cap_bits(void)
+{
+    static cap_value_t vs[] = {
+	5, 6, 11, 12, 15, 16, 17, 38, 41, 63, 64, __CAP_MAXBITS+3, 0, -1
+    };
+    int failed = 0;
+    cap_value_t i;
+    for (i = 0; vs[i] >= 0; i++) {
+	cap_value_t ans;
+
+	top = vs[i];
+	_binary_search(ans, cf, 0, __CAP_MAXBITS, -1);
+	if (ans != top) {
+	    if (top == 0 && ans == -1) {
+		continue;
+	    }
+	    if (top > __CAP_MAXBITS && ans == -1) {
+		continue;
+	    }
+	    printf("test_cap_bits miscompared [%d] top=%d - got=%d\n",
+		   i, top, ans);
+	    failed = -1;
+	}
+    }
+    return failed;
+}
+
+static int test_cap_flags(void)
+{
+    cap_t c, d;
+    cap_flag_t f = CAP_INHERITABLE, t;
+    cap_value_t v;
+    int retval = 0;
+
+    c = cap_init();
+    if (c == NULL) {
+	printf("test_flags failed to allocate a set\n");
+	return -1;
+    }
+    if (cap_compare(c, NULL) != -1) {
+	printf("compare to NULL should give invalid\n");
+	return -1;
+    }
+    if (cap_compare(NULL, c) != -1) {
+	printf("compare with NULL should give invalid\n");
+	return -1;
+    }
+
+    for (v = 0; v < __CAP_MAXBITS; v += 3) {
+	if (cap_set_flag(c, CAP_INHERITABLE, 1, &v, CAP_SET)) {
+	    printf("unable to set inheritable bit %d\n", v);
+	    retval = -1;
+	    goto drop_c;
+	}
+    }
+
+    d = cap_dup(c);
+    for (t = CAP_EFFECTIVE; t <= CAP_INHERITABLE; t++) {
+	if (cap_fill(c, t, f)) {
+	    printf("cap_fill failed %d -> %d\n", f, t);
+	    retval = -1;
+	    goto drop_d;
+	}
+	if (cap_clear_flag(c, f)) {
+	    printf("cap_fill unable to clear flag %d\n", f);
+	    retval = -1;
+	    goto drop_d;
+	}
+	f = t;
+    }
+    if (cap_compare(c, d)) {
+	printf("permuted cap_fill()ing failed to perform net no-op\n");
+	retval = -1;
+    }
+    if (cap_fill_flag(NULL, CAP_EFFECTIVE, c, CAP_INHERITABLE) == 0) {
+	printf("filling NULL flag should fail\n");
+	retval = -1;
+    }
+    if (cap_fill_flag(d, CAP_PERMITTED, c, CAP_INHERITABLE) != 0) {
+	perror("filling PERMITEED flag should work");
+	retval = -1;
+    }
+    if (cap_fill_flag(c, CAP_PERMITTED, d, CAP_PERMITTED) != 0) {
+	perror("filling PERMITTED flag from another cap_t should work");
+	retval = -1;
+    }
+    if (cap_compare(c, d)) {
+	printf("permuted cap_fill()ing failed to perform net no-op\n");
+	retval = -1;
+    }
+
+drop_d:
+    if (cap_free(d) != 0) {
+	perror("failed to free d");
+	retval = -1;
+    }
+drop_c:
+    if (cap_free(c) != 0) {
+	perror("failed to free c");
+	retval = -1;
+    }
+    return retval;
+}
+
+static int test_short_bits(void)
+{
+    int result = 0;
+    char *tmp;
+    int n = asprintf(&tmp, "%d", __CAP_MAXBITS);
+    if (n <= 0) {
+	return -1;
+    }
+    if (strlen(tmp) > __CAP_NAME_SIZE) {
+	printf("cap_to_text buffer size reservation needs fixing (%ld > %d)\n",
+	       (long int)strlen(tmp), __CAP_NAME_SIZE);
+	result = -1;
+    }
+    free(tmp);
+    return result;
+}
+
+static int noop(void *data)
+{
+    return -1;
+}
+
+static int test_alloc(void)
+{
+    int retval = 0;
+    cap_t c;
+    cap_iab_t iab;
+    cap_launch_t launcher;
+    char *old_root;
+
+    printf("test_alloc\n");
+    fflush(stdout);
+
+    c = cap_init();
+    if (c == NULL) {
+	perror("failed to allocate a cap_t");
+	fflush(stderr);
+	return -1;
+    }
+
+    iab = cap_iab_init();
+    if (iab == NULL) {
+	perror("failed to allocate a cap_iab_t");
+	fflush(stderr);
+	retval = -1;
+	goto drop_c;
+    }
+
+    launcher = cap_func_launcher(noop);
+    if (launcher == NULL) {
+	perror("failde to allocate a launcher");
+	fflush(stderr);
+	retval = -1;
+	goto drop_iab;
+    }
+
+    cap_launcher_set_chroot(launcher, "/tmp");
+    if (cap_launcher_set_iab(launcher, iab) != NULL) {
+	printf("unable to replace iab in launcher\n");
+	fflush(stdout);
+	retval = -1;
+	goto drop_iab;
+    }
+
+    iab = cap_launcher_set_iab(launcher, cap_iab_init());
+    if (iab == NULL) {
+	printf("unable to recover iab in launcher\n");
+	fflush(stdout);
+	retval = -1;
+	goto drop_launcher;
+    }
+
+    old_root = cap_proc_root("blah");
+    if (old_root != NULL) {
+	printf("bad initial proc_root [%s]\n", old_root);
+	fflush(stdout);
+	retval = -1;
+    }
+    if (cap_free(old_root)) {
+	perror("unable to free old proc root");
+	fflush(stderr);
+	retval = -1;
+    }
+    if (retval) {
+	goto drop_launcher;
+    }
+    old_root = cap_proc_root("/proc");
+    if (strcmp(old_root, "blah") != 0) {
+	printf("bad proc_root value [%s]\n", old_root);
+	fflush(stdout);
+	retval = -1;
+    }
+    if (cap_free(old_root)) {
+	perror("unable to free replacement proc root");
+	fflush(stderr);
+	retval = -1;
+    }
+    if (retval) {
+	goto drop_launcher;
+    }
+
+drop_launcher:
+    printf("test_alloc: drop_launcher\n");
+    fflush(stdout);
+    if (cap_free(launcher)) {
+	perror("failed to free launcher");
+	fflush(stderr);
+	retval = -1;
+    }
+
+drop_iab:
+    printf("test_alloc: drop_iab\n");
+    fflush(stdout);
+    if (!cap_free(2+(__u32 *) iab)) {
+	printf("unable to recognize bad cap_iab_t pointer\n");
+	fflush(stdout);
+	retval = -1;
+    }
+    if (cap_free(iab)) {
+	perror("failed to free iab");
+	fflush(stderr);
+	retval = -1;
+    }
+
+drop_c:
+    printf("test_alloc: drop_cap\n");
+    fflush(stdout);
+    if (!cap_free(1+(__u32 *) c)) {
+	printf("unable to recognize bad cap_t pointer\n");
+	fflush(stdout);
+	retval = -1;
+    }
+    if (cap_free(c)) {
+	perror("failed to free c");
+	fflush(stderr);
+	retval = -1;
+    }
+    return retval;
+}
+
+static int test_prctl(void)
+{
+    int ret, retval=0;
+    errno = 0;
+    ret = cap_get_bound((cap_value_t) -1);
+    if (ret != -1) {
+	printf("cap_get_bound(-1) did not return error: %d\n", ret);
+	retval = -1;
+    } else if (errno != EINVAL) {
+	perror("cap_get_bound(-1) errno != EINVAL");
+	retval = -1;
+    }
+    return retval;
+}
+
+int main(int argc, char **argv) {
+    int result = 0;
+
+    printf("test_cap_bits: being called\n");
+    fflush(stdout);
+    result = test_cap_bits() | result;
+    printf("test_cap_flags: being called\n");
+    fflush(stdout);
+    result = test_cap_flags() | result;
+    printf("test_short_bits: being called\n");
+    fflush(stdout);
+    result = test_short_bits() | result;
+    printf("test_alloc: being called\n");
+    fflush(stdout);
+    result = test_alloc() | result;
+    printf("test_prctl: being called\n");
+    fflush(stdout);
+    result = test_prctl() | result;
+    printf("tested\n");
+    fflush(stdout);
+
+    if (result) {
+	printf("cap_test FAILED\n");
+	exit(1);
+    }
+    printf("cap_test PASS\n");
+    exit(0);
+}
diff --git a/libcap/cap_text.c b/libcap/cap_text.c
index 42fb685..7566bd8 100644
--- a/libcap/cap_text.c
+++ b/libcap/cap_text.c
@@ -1,22 +1,32 @@
 /*
- * Copyright (c) 1997-8,2007-8 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997-8,2007-8,2019,2021 Andrew G Morgan <morgan@kernel.org>
  * Copyright (c) 1997 Andrew Main <zefram@dcs.warwick.ac.uk>
  *
  * This file deals with exchanging internal and textual
  * representations of capability sets.
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
+
 #include <stdio.h>
 
 #define LIBCAP_PLEASE_INCLUDE_ARRAY
 #include "libcap.h"
 
+static char const *_cap_names[__CAP_BITS] = LIBCAP_CAP_NAMES;
+
 #include <ctype.h>
 #include <limits.h>
 
-/* Maximum output text length (16 per cap) */
-#define CAP_TEXT_SIZE    (16*__CAP_MAXBITS)
+#ifdef INCLUDE_GPERF_OUTPUT
+/* we need to include it after #define _GNU_SOURCE is set */
+#include INCLUDE_GPERF_OUTPUT
+#endif
+
+/* Maximum output text length */
+#define CAP_TEXT_SIZE    (__CAP_NAME_SIZE * __CAP_MAXBITS)
 
 /*
  * Parse a textual representation of capabilities, returning an internal
@@ -51,11 +61,25 @@ static char const *namcmp(char const *str, char const *nam)
     return str;
 }
 
+/*
+ * forceall forces all of the kernel named capabilities to be assigned
+ * the masked value, and zeroed otherwise. Note, if the kernel is ahead
+ * of libcap, the upper bits will be referred to by number.
+ */
 static void forceall(__u32 *flat, __u32 value, unsigned blks)
 {
     unsigned n;
-
-    for (n = blks; n--; flat[n] = value);
+    cap_value_t cmb = cap_max_bits();
+    for (n = blks; n--; ) {
+	unsigned base = 32*n;
+	__u32 mask = 0;
+	if (cmb >= base + 32) {
+	    mask = ~0;
+	} else if (cmb > base) {
+	    mask = (unsigned) ((1ULL << (cmb % 32)) - 1);
+	}
+	flat[n] = value & mask;
+    }
 
     return;
 }
@@ -76,7 +100,7 @@ static int lookupname(char const **strp)
 	return n;
     } else {
 	int c;
-	unsigned len;
+	size_t len;
 
 	for (len=0; (c = str.constp[len]); ++len) {
 	    if (!(isalpha(c) || (c == '_'))) {
@@ -94,13 +118,16 @@ static int lookupname(char const **strp)
 	}
 #else /* ie., ndef GPERF_DOWNCASE */
 	char const *s;
-	unsigned n;
-
-	for (n = __CAP_BITS; n--; )
+	unsigned n = cap_max_bits();
+	if (n > __CAP_BITS) {
+	    n = __CAP_BITS;
+	}
+	while (n--) {
 	    if (_cap_names[n] && (s = namcmp(str.constp, _cap_names[n]))) {
 		*strp = s;
 		return n;
 	    }
+	}
 #endif /* def GPERF_DOWNCASE */
 
 	return -1;   	/* No definition available */
@@ -133,10 +160,11 @@ cap_t cap_from_text(const char *str)
 	cap_blks = _LINUX_CAPABILITY_U32S_3;
 	break;
     default:
+	cap_free(res);
 	errno = EINVAL;
 	return NULL;
     }
-    
+
     _cap_debug("%s", str);
 
     for (;;) {
@@ -144,7 +172,7 @@ cap_t cap_from_text(const char *str)
 	char op;
 	int flags = 0, listed=0;
 
-	forceall(list, 0, __CAP_BLKS);
+	memset(list, 0, sizeof(__u32)*__CAP_BLKS);
 
 	/* skip leading spaces */
 	while (isspace((unsigned char)*str))
@@ -192,7 +220,7 @@ cap_t cap_from_text(const char *str)
 
 	/* cycle through list of actions */
 	do {
-	    _cap_debug("next char = `%c'", *str);
+	    _cap_debug("next char = '%c'", *str);
 	    if (*str && !isspace(*str)) {
 		switch (*str++) {    /* Effective, Inheritable, Permitted */
 		case 'e':
@@ -282,20 +310,19 @@ int cap_from_name(const char *name, cap_value_t *value_p)
  */
 char *cap_to_name(cap_value_t cap)
 {
-    if ((cap < 0) || (cap >= __CAP_BITS)) {
-#if UINT_MAX != 4294967295U
-# error Recompile with correctly sized numeric array
-#endif
-	char *tmp, *result;
-
-	asprintf(&tmp, "%u", cap);
-	result = _libcap_strdup(tmp);
-	free(tmp);
+    char *tmp, *result;
 
-	return result;
-    } else {
+    if ((cap >= 0) && (cap < __CAP_BITS)) {
 	return _libcap_strdup(_cap_names[cap]);
     }
+    if (asprintf(&tmp, "%u", cap) <= 0) {
+	_cap_debug("asprintf filed");
+	return NULL;
+    }
+
+    result = _libcap_strdup(tmp);
+    free(tmp);
+    return result;
 }
 
 /*
@@ -321,16 +348,21 @@ static int getstateflags(cap_t caps, int capno)
     return f;
 }
 
+/*
+ * This code assumes that the longest named capability is longer than
+ * the decimal text representation of __CAP_MAXBITS. This is very true
+ * at the time of writing and likely to remain so. However, we have
+ * a test in cap_text to validate it at build time.
+ */
 #define CAP_TEXT_BUFFER_ZONE 100
 
 char *cap_to_text(cap_t caps, ssize_t *length_p)
 {
     char buf[CAP_TEXT_SIZE+CAP_TEXT_BUFFER_ZONE];
-    char *p;
+    char *p, *base;
     int histo[8];
     int m, t;
     unsigned n;
-    unsigned cap_maxbits, cap_blks;
 
     /* Check arguments */
     if (!good_cap_t(caps)) {
@@ -338,31 +370,15 @@ char *cap_to_text(cap_t caps, ssize_t *length_p)
 	return NULL;
     }
 
-    switch (caps->head.version) {
-    case _LINUX_CAPABILITY_VERSION_1:
-	cap_blks = _LINUX_CAPABILITY_U32S_1;
-	break;
-    case _LINUX_CAPABILITY_VERSION_2:
-	cap_blks = _LINUX_CAPABILITY_U32S_2;
-	break;
-    case _LINUX_CAPABILITY_VERSION_3:
-	cap_blks = _LINUX_CAPABILITY_U32S_3;
-	break;
-    default:
-	errno = EINVAL;
-	return NULL;
-    }
-
-    cap_maxbits = 32 * cap_blks;
-
     _cap_debugcap("e = ", *caps, CAP_EFFECTIVE);
     _cap_debugcap("i = ", *caps, CAP_INHERITABLE);
     _cap_debugcap("p = ", *caps, CAP_PERMITTED);
 
     memset(histo, 0, sizeof(histo));
 
-    /* default prevailing state to the upper - unnamed bits */
-    for (n = cap_maxbits-1; n > __CAP_BITS; n--)
+    /* default prevailing state to the named bits */
+    cap_value_t cmb = cap_max_bits();
+    for (n = 0; n < cmb; n++)
 	histo[getstateflags(caps, n)]++;
 
     /* find which combination of capability sets shares the most bits
@@ -373,57 +389,382 @@ char *cap_to_text(cap_t caps, ssize_t *length_p)
 	if (histo[t] >= histo[m])
 	    m = t;
 
-    /* capture remaining bits - selecting m from only the unnamed bits,
-       we maximize the likelihood that we won't see numeric capability
-       values in the text output. */
-    while (n--)
-	histo[getstateflags(caps, n)]++;
-
     /* blank is not a valid capability set */
+    base = buf;
     p = sprintf(buf, "=%s%s%s",
 		(m & LIBCAP_EFF) ? "e" : "",
 		(m & LIBCAP_INH) ? "i" : "",
 		(m & LIBCAP_PER) ? "p" : "" ) + buf;
 
-    for (t = 8; t--; )
-	if (t != m && histo[t]) {
-	    *p++ = ' ';
-	    for (n = 0; n < cap_maxbits; n++)
-		if (getstateflags(caps, n) == t) {
-		    char *this_cap_name;
-
-		    this_cap_name = cap_to_name(n);
-		    if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
-			cap_free(this_cap_name);
-			errno = ERANGE;
-			return NULL;
-		    }
-		    p += sprintf(p, "%s,", this_cap_name);
+    for (t = 8; t--; ) {
+	if (t == m || !histo[t]) {
+	    continue;
+	}
+	*p++ = ' ';
+	for (n = 0; n < cmb; n++) {
+	    if (getstateflags(caps, n) == t) {
+	        char *this_cap_name = cap_to_name(n);
+		if (this_cap_name == NULL) {
+		    return NULL;
+		}
+	        if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
 		    cap_free(this_cap_name);
+		    errno = ERANGE;
+		    return NULL;
+	        }
+	        p += sprintf(p, "%s,", this_cap_name);
+	        cap_free(this_cap_name);
+	    }
+	}
+	p--;
+	n = t & ~m;
+	if (n) {
+	    char op = '+';
+	    if (base[0] == '=' && base[1] == ' ') {
+		/*
+		 * Special case all lowered default "= foo,...+eip
+		 * ..." as "foo,...=eip ...". (Equivalent but shorter.)
+		 */
+		base += 2;
+		op = '=';
+	    }
+	    p += sprintf(p, "%c%s%s%s", op,
+			 (n & LIBCAP_EFF) ? "e" : "",
+			 (n & LIBCAP_INH) ? "i" : "",
+			 (n & LIBCAP_PER) ? "p" : "");
+	}
+	n = ~t & m;
+	if (n) {
+	    p += sprintf(p, "-%s%s%s",
+			 (n & LIBCAP_EFF) ? "e" : "",
+			 (n & LIBCAP_INH) ? "i" : "",
+			 (n & LIBCAP_PER) ? "p" : "");
+	}
+	if (p - buf > CAP_TEXT_SIZE) {
+	    errno = ERANGE;
+	    return NULL;
+	}
+    }
+
+    /* capture remaining unnamed bits - which must all be +. */
+    memset(histo, 0, sizeof(histo));
+    for (n = cmb; n < __CAP_MAXBITS; n++)
+	histo[getstateflags(caps, n)]++;
+
+    for (t = 8; t-- > 1; ) {
+	if (!histo[t]) {
+	    continue;
+	}
+	*p++ = ' ';
+	for (n = cmb; n < __CAP_MAXBITS; n++) {
+	    if (getstateflags(caps, n) == t) {
+		char *this_cap_name = cap_to_name(n);
+		if (this_cap_name == NULL) {
+		    return NULL;
 		}
-	    p--;
-	    n = t & ~m;
-	    if (n)
-		p += sprintf(p, "+%s%s%s",
-			     (n & LIBCAP_EFF) ? "e" : "",
-			     (n & LIBCAP_INH) ? "i" : "",
-			     (n & LIBCAP_PER) ? "p" : "");
-	    n = ~t & m;
-	    if (n)
-		p += sprintf(p, "-%s%s%s",
-			     (n & LIBCAP_EFF) ? "e" : "",
-			     (n & LIBCAP_INH) ? "i" : "",
-			     (n & LIBCAP_PER) ? "p" : "");
-	    if (p - buf > CAP_TEXT_SIZE) {
-		errno = ERANGE;
-		return NULL;
+	        if ((strlen(this_cap_name) + (p - buf)) > CAP_TEXT_SIZE) {
+		    cap_free(this_cap_name);
+		    errno = ERANGE;
+		    return NULL;
+	        }
+		p += sprintf(p, "%s,", this_cap_name);
+		cap_free(this_cap_name);
 	    }
 	}
+	p--;
+	p += sprintf(p, "+%s%s%s",
+		     (t & LIBCAP_EFF) ? "e" : "",
+		     (t & LIBCAP_INH) ? "i" : "",
+		     (t & LIBCAP_PER) ? "p" : "");
+	if (p - buf > CAP_TEXT_SIZE) {
+	    errno = ERANGE;
+	    return NULL;
+	}
+    }
 
-    _cap_debug("%s", buf);
+    _cap_debug("%s", base);
     if (length_p) {
-	*length_p = p - buf;
+	*length_p = p - base;
     }
 
-    return (_libcap_strdup(buf));
+    return (_libcap_strdup(base));
+}
+
+/*
+ * cap_mode_name returns a text token naming the specified mode.
+ */
+const char *cap_mode_name(cap_mode_t flavor) {
+    switch (flavor) {
+    case CAP_MODE_NOPRIV:
+	return "NOPRIV";
+    case CAP_MODE_PURE1E_INIT:
+	return "PURE1E_INIT";
+    case CAP_MODE_PURE1E:
+	return "PURE1E";
+    case CAP_MODE_UNCERTAIN:
+	return "UNCERTAIN";
+    case CAP_MODE_HYBRID:
+	return "HYBRID";
+    default:
+	return "UNKNOWN";
+    }
+}
+
+/*
+ * cap_iab_to_text serializes an iab into a canonical text
+ * representation.
+ */
+char *cap_iab_to_text(cap_iab_t iab)
+{
+    char buf[CAP_TEXT_SIZE+CAP_TEXT_BUFFER_ZONE];
+    char *p = buf;
+    cap_value_t c, cmb = cap_max_bits();
+    int first = 1;
+
+    if (good_cap_iab_t(iab)) {
+	_cap_mu_lock(&iab->mutex);
+	for (c = 0; c < cmb; c++) {
+	    int keep = 0;
+	    int o = c >> 5;
+	    __u32 bit = 1U << (c & 31);
+	    __u32 ib = iab->i[o] & bit;
+	    __u32 ab = iab->a[o] & bit;
+	    __u32 nbb = iab->nb[o] & bit;
+	    if (!(nbb | ab | ib)) {
+		continue;
+	    }
+	    if (!first) {
+		*p++ = ',';
+	    }
+	    if (nbb) {
+		*p++ = '!';
+		keep = 1;
+	    }
+	    if (ab) {
+		*p++ = '^';
+		keep = 1;
+	    } else if (nbb && ib) {
+		*p++ = '%';
+	    }
+	    if (keep || ib) {
+		if (c < __CAP_BITS) {
+		    strcpy(p, _cap_names[c]);
+		} else {
+		    sprintf(p, "%u", c);
+		}
+		p += strlen(p);
+		first = 0;
+	    }
+	}
+	_cap_mu_unlock(&iab->mutex);
+    }
+    *p = '\0';
+    return _libcap_strdup(buf);
+}
+
+cap_iab_t cap_iab_from_text(const char *text)
+{
+    cap_iab_t iab = cap_iab_init();
+    if (iab == NULL) {
+	return iab;
+    }
+    if (text != NULL) {
+	unsigned flags;
+	for (flags = 0; *text; text++) {
+	    /* consume prefixes */
+	    switch (*text) {
+	    case '!':
+		flags |= LIBCAP_IAB_NB_FLAG;
+		continue;
+	    case '^':
+		flags |= LIBCAP_IAB_IA_FLAG;
+		continue;
+	    case '%':
+		flags |= LIBCAP_IAB_I_FLAG;
+		continue;
+	    default:
+		break;
+	    }
+	    if (!flags) {
+		flags = LIBCAP_IAB_I_FLAG;
+	    }
+
+	    /* consume cap name */
+	    cap_value_t c = lookupname(&text);
+	    if (c == -1) {
+		goto cleanup;
+	    }
+	    unsigned o = c >> 5;
+	    __u32 mask = 1U << (c & 31);
+	    if (flags & LIBCAP_IAB_I_FLAG) {
+		iab->i[o] |= mask;
+	    }
+	    if (flags & LIBCAP_IAB_A_FLAG) {
+		iab->a[o] |= mask;
+	    }
+	    if (flags & LIBCAP_IAB_NB_FLAG) {
+		iab->nb[o] |= mask;
+	    }
+
+	    /* rest should be end or comma */
+	    if (*text == '\0') {
+		break;
+	    }
+	    if (*text != ',') {
+		goto cleanup;
+	    }
+	    flags = 0;
+	}
+    }
+    return iab;
+
+cleanup:
+    cap_free(iab);
+    errno = EINVAL;
+    return NULL;
+}
+
+static __u32 _parse_hex32(const char *c)
+{
+    int i;
+    __u32 v = 0;
+    for (i=0; i < 8; i++, c++) {
+	v <<= 4;
+	if (*c == 0 || *c < '0') {
+	    return 0;
+	} else if (*c <= '9') {
+	    v += *c - '0';
+	} else if (*c > 'f') {
+	    return 0;
+	} else if (*c >= 'a') {
+	    v += *c + 10 - 'a';
+	} else if (*c < 'A') {
+	    return 0;
+	} else if (*c <= 'F') {
+	    v += *c + 10 - 'A';
+	} else {
+	    return 0;
+	}
+    }
+    return v;
+}
+
+/*
+ * _parse_vec_string converts the hex dumps in /proc/<pid>/current into
+ * an array of u32s - masked as per the forceall() mask.
+ */
+static __u32 _parse_vec_string(__u32 *vals, const char *c, int invert)
+{
+    int i;
+    int words = strlen(c)/8;
+    if (words > _LIBCAP_CAPABILITY_U32S) {
+	return 0;
+    }
+    forceall(vals, ~0, words);
+    for (i = 0; i < words; i++) {
+	__u32 val = _parse_hex32(c+8*(words-1-i));
+	if (invert) {
+	    val = ~val;
+	}
+	vals[i] &= val;
+    }
+    return ~0;
+}
+
+/*
+ * libcap believes this is the root of the mounted "/proc"
+ * filesystem. (NULL == "/proc".)
+ */
+static char *_cap_proc_dir;
+
+/*
+ * If the constructor is called (see cap_alloc.c) then we'll need the
+ * corresponding destructor.
+ */
+__attribute__((destructor (300))) static void _cleanup_libcap(void)
+{
+    if (_cap_proc_dir == NULL) {
+	return;
+    }
+    cap_free(_cap_proc_dir);
+    _cap_proc_dir = NULL;
+}
+
+/*
+ * cap_proc_root reads and (optionally: when root != NULL) changes
+ * libcap's notion of where the "/proc" filesystem is mounted. It
+ * defaults to the value "/proc". Note, this is a global value and not
+ * considered thread safe to write - so the client should take
+ * suitable care when changing it. Further, libcap will allocate
+ * memory for storing the replacement root, and it is this memory that
+ * is returned. So, when changing the value, the caller should
+ * cap_free(the-return-value) when done with it.
+ *
+ * A return value of NULL implies the default is in effect "/proc".
+ */
+char *cap_proc_root(const char *root)
+{
+    char *old = _cap_proc_dir;
+    if (root != NULL) {
+	_cap_proc_dir = _libcap_strdup(root);
+    }
+    return old;
+}
+
+#define PROC_LINE_MAX (8 + 8*_LIBCAP_CAPABILITY_U32S + 100)
+/*
+ * cap_iab_get_pid fills an IAB tuple from the content of
+ * /proc/<pid>/status. Linux doesn't support syscall access to the
+ * needed information, so we parse it out of that file.
+ */
+cap_iab_t cap_iab_get_pid(pid_t pid)
+{
+    cap_iab_t iab;
+    char *path;
+    FILE *file;
+    char line[PROC_LINE_MAX];
+    const char *proc_root = _cap_proc_dir;
+
+    if (proc_root == NULL) {
+	proc_root = "/proc";
+    }
+    if (asprintf(&path, "%s/%d/status", proc_root, pid) <= 0) {
+	return NULL;
+    }
+    file = fopen(path, "r");
+    free(path);
+    if (file == NULL) {
+	return NULL;
+    }
+
+    iab = cap_iab_init();
+    uint ok = 0;
+    if (iab != NULL) {
+	while (fgets(line, PROC_LINE_MAX-1, file) != NULL) {
+	    if (strncmp("Cap", line, 3) != 0) {
+		continue;
+	    }
+	    if (strncmp("Inh:\t", line+3, 5) == 0) {
+		ok = (_parse_vec_string(iab->i, line+8, 0) &
+		    LIBCAP_IAB_I_FLAG) | ok;
+		continue;
+	    }
+	    if (strncmp("Bnd:\t", line+3, 5) == 0) {
+		ok = (_parse_vec_string(iab->nb, line+8, 1) &
+		      LIBCAP_IAB_NB_FLAG) | ok;
+		continue;
+	    }
+	    if (strncmp("Amb:\t", line+3, 5) == 0) {
+		ok = (_parse_vec_string(iab->a, line+8, 0) &
+		      LIBCAP_IAB_A_FLAG) | ok;
+		continue;
+	    }
+	}
+    }
+    if (ok != (LIBCAP_IAB_IA_FLAG | LIBCAP_IAB_NB_FLAG)) {
+	cap_free(iab);
+	iab = NULL;
+    }
+    fclose(file);
+    return iab;
 }
diff --git a/libcap/empty.c b/libcap/empty.c
new file mode 100644
index 0000000..0314ff1
--- /dev/null
+++ b/libcap/empty.c
@@ -0,0 +1 @@
+int main(int argc, char **argv) { return 0; }
diff --git a/libcap/execable.c b/libcap/execable.c
new file mode 100644
index 0000000..9f7062e
--- /dev/null
+++ b/libcap/execable.c
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+
+#include "execable.h"
+
+static void usage(int status)
+{
+    printf("\nusage: libcap.so [--help|--usage|--summary]\n");
+    exit(status);
+}
+
+static void summary(void)
+{
+    cap_value_t bits = cap_max_bits(), c;
+    cap_mode_t mode = cap_get_mode();
+
+    printf("\nCurrent mode: %s\n", cap_mode_name(mode));
+    printf("Number of cap values known to: this libcap=%d, running kernel=%d\n",
+	   CAP_LAST_CAP+1, bits);
+
+    if (bits > CAP_LAST_CAP+1) {
+	printf("=> Consider upgrading libcap to name:");
+	for (c = CAP_LAST_CAP+1; c < bits; c++) {
+	    printf(" %d", c);
+	}
+    } else if (bits < CAP_LAST_CAP+1) {
+	printf("=> Newer kernels also provide support for:");
+	for (c = bits; c <= CAP_LAST_CAP; c++) {
+	    char *name = cap_to_name(c);
+	    printf(" %s", name);
+	    cap_free(name);
+	}
+    } else {
+	return;
+    }
+    printf("\n");
+}
+
+SO_MAIN(int argc, char **argv)
+{
+    int i;
+    const char *cmd = "This library";
+
+    if (argv != NULL && argv[0] != NULL) {
+	cmd = argv[0];
+    }
+    printf("%s is the shared library version: " LIBRARY_VERSION ".\n"
+	   "See the License file for distribution information.\n"
+	   "More information on this library is available from:\n"
+	   "\n"
+	   "    https://sites.google.com/site/fullycapable/\n", cmd);
+
+    for (i = 1; i < argc; i++) {
+	if (!strcmp(argv[i], "--usage") || !strcmp(argv[i], "--help")) {
+	    usage(0);
+	}
+	if (!strcmp(argv[i], "--summary")) {
+	    summary();
+	    continue;
+	}
+	usage(1);
+    }
+}
diff --git a/libcap/execable.h b/libcap/execable.h
new file mode 100644
index 0000000..7a2d247
--- /dev/null
+++ b/libcap/execable.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * Some header magic to help make a shared object run-able as a stand
+ * alone executable binary.
+ *
+ * This is a slightly more sophisticated implementation than the
+ * answer I posted here:
+ *
+ *    https://stackoverflow.com/a/68339111/14760867
+ *
+ * Compile your shared library with:
+ *
+ *   -DSHARED_LOADER="\"ld-linux...\"" (loader for your target system)
+ *   ...
+ *   --entry=__so_start
+ */
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __EXECABLE_H
+#error "only include execable.h once"
+#endif
+#define __EXECABLE_H
+
+const char __execable_dl_loader[] __attribute((section(".interp"))) =
+    SHARED_LOADER ;
+
+static void __execable_parse_args(int *argc_p, char ***argv_p)
+{
+    int argc = 0;
+    char **argv = NULL;
+    FILE *f = fopen("/proc/self/cmdline", "rb");
+    if (f != NULL) {
+	char *mem = NULL, *p;
+	size_t size = 32, offset;
+	for (offset=0; ; size *= 2) {
+	    char *new_mem = realloc(mem, size+1);
+	    if (new_mem == NULL) {
+		perror("unable to parse arguments");
+		if (mem != NULL) {
+		    free(mem);
+		}
+		exit(1);
+	    }
+	    mem = new_mem;
+	    offset += fread(mem+offset, 1, size-offset, f);
+	    if (offset < size) {
+		size = offset;
+		mem[size] = '\0';
+		break;
+	    }
+	}
+	fclose(f);
+	for (argc=1, p=mem+size-2; p >= mem; p--) {
+	    argc += (*p == '\0');
+	}
+	argv = calloc(argc+1, sizeof(char *));
+	if (argv == NULL) {
+	    perror("failed to allocate memory for argv");
+	    free(mem);
+	    exit(1);
+	}
+	for (p=mem, argc=0, offset=0; offset < size; argc++) {
+	    argv[argc] = mem+offset;
+	    offset += strlen(mem+offset)+1;
+	}
+    }
+    *argc_p = argc;
+    *argv_p = argv;
+}
+
+/*
+ * Linux x86 ABI requires the stack be 16 byte aligned. Keep things
+ * simple and just force it.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define __SO_FORCE_ARG_ALIGNMENT  __attribute__((force_align_arg_pointer))
+#else
+#define __SO_FORCE_ARG_ALIGNMENT
+#endif /* def some x86 */
+
+/*
+ * Permit the compiler to override this one.
+ */
+#ifndef EXECABLE_INITIALIZE
+#define EXECABLE_INITIALIZE do { } while(0)
+#endif /* ndef EXECABLE_INITIALIZE */
+
+/*
+ * Note, to avoid any runtime confusion, SO_MAIN is a void static
+ * function.
+ */
+#define SO_MAIN							\
+static void __execable_main(int, char**);			\
+__attribute__((visibility ("hidden")))                          \
+void __so_start(void);					        \
+__SO_FORCE_ARG_ALIGNMENT					\
+void __so_start(void)						\
+{								\
+    int argc;							\
+    char **argv;						\
+    __execable_parse_args(&argc, &argv);			\
+    EXECABLE_INITIALIZE;                                        \
+    __execable_main(argc, argv);				\
+    if (argc != 0) {						\
+	free(argv[0]);						\
+	free(argv);						\
+    }								\
+    exit(0);							\
+}								\
+static void __execable_main
diff --git a/libcap/include/sys/.gitignore b/libcap/include/sys/.gitignore
new file mode 100644
index 0000000..595fc39
--- /dev/null
+++ b/libcap/include/sys/.gitignore
@@ -0,0 +1 @@
+psx_syscall.h
diff --git a/libcap/include/sys/capability.h b/libcap/include/sys/capability.h
index 64ac50e..2db9972 100644
--- a/libcap/include/sys/capability.h
+++ b/libcap/include/sys/capability.h
@@ -2,7 +2,7 @@
  * <sys/capability.h>
  *
  * Copyright (C) 1997   Aleph One
- * Copyright (C) 1997-8,2008 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (C) 1997,8, 2008,19-22 Andrew G. Morgan <morgan@kernel.org>
  *
  * defunct POSIX.1e Standard: 25.2 Capabilities           <sys/capability.h>
  */
@@ -15,19 +15,23 @@ extern "C" {
 #endif
 
 /*
+ * Provide a programmatic way to #ifdef around features.
+ */
+#define LIBCAP_MAJOR 2
+#define LIBCAP_MINOR 69
+
+/*
  * This file complements the kernel file by providing prototype
  * information for the user library.
  */
 
 #include <sys/types.h>
 #include <stdint.h>
-#include <linux/types.h>
 
 #ifndef __user
 #define __user
 #endif
 #include <linux/capability.h>
-#include <linux/xattr.h>
 
 /*
  * POSIX capability types
@@ -48,14 +52,65 @@ typedef struct _cap_struct *cap_t;
 typedef int cap_value_t;
 
 /*
+ * libcap initialized first unnamed capability of the running kernel.
+ * capsh includes a runtime test to flag when this is larger than
+ * what is known to libcap... Time for a new libcap release!
+ */
+extern cap_value_t cap_max_bits(void);
+
+/*
+ * cap_proc_root reads and (optionally: when root != NULL) changes
+ * libcap's notion of where the "/proc" filesystem is mounted. When
+ * the return value is NULL, it should be interpreted as the
+ * value "/proc".
+ *
+ * Note, this is a global value and not considered thread safe to
+ * write - so the client should take suitable care when changing
+ * it.
+ *
+ * Further, libcap will allocate a memory copy for storing the
+ * replacement root, and it is this kind of memory that is returned.
+ * So, when changing the value, the caller should
+ * cap_free(the-return-value) else cause a memory leak.
+ *
+ * Note, the library uses a destructor to clean up the live allocated
+ * value of the working setting.
+ */
+extern char *cap_proc_root(const char *root);
+
+/*
  * Set identifiers
  */
 typedef enum {
-    CAP_EFFECTIVE=0,                        /* Specifies the effective flag */
-    CAP_PERMITTED=1,                        /* Specifies the permitted flag */
-    CAP_INHERITABLE=2                     /* Specifies the inheritable flag */
+    CAP_EFFECTIVE = 0,                 /* Specifies the effective flag */
+    CAP_PERMITTED = 1,                 /* Specifies the permitted flag */
+    CAP_INHERITABLE = 2                /* Specifies the inheritable flag */
 } cap_flag_t;
 
+typedef enum {
+    CAP_IAB_INH = 2,
+    CAP_IAB_AMB = 3,
+    CAP_IAB_BOUND = 4
+} cap_iab_vector_t;
+
+/*
+ * An opaque generalization of the inheritable bits that includes both
+ * what ambient bits to raise and what bounding bits to *lower* (aka
+ * drop).  None of these bits once set, using cap_iab_set(), affect
+ * the running process but are consulted, through the execve() system
+ * call, by the kernel. Note, the ambient bits ('A') of the running
+ * process are fragile with respect to other aspects of the "posix"
+ * (cap_t) operations: most importantly, 'A' cannot ever hold bits not
+ * present in the intersection of 'pI' and 'pP'. The kernel
+ * immediately drops all ambient caps whenever such a situation
+ * arises. Typically, the ambient bits are used to support a naive
+ * capability inheritance model - at odds with the POSIX (sic) model
+ * of inheritance where inherited (pI) capabilities need to also be
+ * wanted by the executed binary (fI) in order to become raised
+ * through exec.
+ */
+typedef struct cap_iab_s *cap_iab_t;
+
 /*
  * These are the states available to each capability
  */
@@ -67,11 +122,19 @@ typedef enum {
 /*
  * User-space capability manipulation routines
  */
+typedef unsigned cap_mode_t;
+#define CAP_MODE_UNCERTAIN    ((cap_mode_t) 0)
+#define CAP_MODE_NOPRIV       ((cap_mode_t) 1)
+#define CAP_MODE_PURE1E_INIT  ((cap_mode_t) 2)
+#define CAP_MODE_PURE1E       ((cap_mode_t) 3)
+#define CAP_MODE_HYBRID       ((cap_mode_t) 4)
 
 /* libcap/cap_alloc.c */
-extern cap_t   cap_dup(cap_t);
-extern int     cap_free(void *);
-extern cap_t   cap_init(void);
+extern cap_t      cap_dup(cap_t);
+extern int        cap_free(void *);
+extern cap_t      cap_init(void);
+extern cap_iab_t  cap_iab_dup(cap_iab_t);
+extern cap_iab_t  cap_iab_init(void);
 
 /* libcap/cap_flag.c */
 extern int     cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
@@ -79,12 +142,28 @@ extern int     cap_set_flag(cap_t, cap_flag_t, int, const cap_value_t *,
 			    cap_flag_value_t);
 extern int     cap_clear(cap_t);
 extern int     cap_clear_flag(cap_t, cap_flag_t);
+extern int     cap_fill_flag(cap_t cap_d, cap_flag_t to,
+                             cap_t ref, cap_flag_t from);
+extern int     cap_fill(cap_t, cap_flag_t, cap_flag_t);
+
+#define CAP_DIFFERS(result, flag)  (((result) & (1 << (flag))) != 0)
+extern int     cap_compare(cap_t, cap_t);
+#define CAP_IAB_DIFFERS(result, vector)  (((result) & (1 << (vector))) != 0)
+extern int     cap_iab_compare(cap_iab_t, cap_iab_t);
+
+extern cap_flag_value_t cap_iab_get_vector(cap_iab_t, cap_iab_vector_t,
+					 cap_value_t);
+extern int     cap_iab_set_vector(cap_iab_t, cap_iab_vector_t, cap_value_t,
+				cap_flag_value_t);
+extern int     cap_iab_fill(cap_iab_t, cap_iab_vector_t, cap_t, cap_flag_t);
 
 /* libcap/cap_file.c */
 extern cap_t   cap_get_fd(int);
 extern cap_t   cap_get_file(const char *);
+extern uid_t   cap_get_nsowner(cap_t);
 extern int     cap_set_fd(int, cap_t);
 extern int     cap_set_file(const char *, cap_t);
+extern int     cap_set_nsowner(cap_t, uid_t);
 
 /* libcap/cap_proc.c */
 extern cap_t   cap_get_proc(void);
@@ -93,13 +172,18 @@ extern int     cap_set_proc(cap_t);
 
 extern int     cap_get_bound(cap_value_t);
 extern int     cap_drop_bound(cap_value_t);
-
 #define CAP_IS_SUPPORTED(cap)  (cap_get_bound(cap) >= 0)
 
+extern int     cap_get_ambient(cap_value_t);
+extern int     cap_set_ambient(cap_value_t, cap_flag_value_t);
+extern int     cap_reset_ambient(void);
+#define CAP_AMBIENT_SUPPORTED() (cap_get_ambient(CAP_CHOWN) >= 0)
+
 /* libcap/cap_extint.c */
-extern ssize_t cap_size(cap_t);
-extern ssize_t cap_copy_ext(void *, cap_t, ssize_t);
-extern cap_t   cap_copy_int(const void *);
+extern ssize_t cap_size(cap_t cap_d);
+extern ssize_t cap_copy_ext(void *cap_ext, cap_t cap_d, ssize_t length);
+extern cap_t   cap_copy_int(const void *cap_ext);
+extern cap_t   cap_copy_int_check(const void *cap_ext, ssize_t length);
 
 /* libcap/cap_text.c */
 extern cap_t   cap_from_text(const char *);
@@ -107,12 +191,56 @@ extern char *  cap_to_text(cap_t, ssize_t *);
 extern int     cap_from_name(const char *, cap_value_t *);
 extern char *  cap_to_name(cap_value_t);
 
-#define CAP_DIFFERS(result, flag)  (((result) & (1 << (flag))) != 0)
-extern int     cap_compare(cap_t, cap_t);
+extern char *     cap_iab_to_text(cap_iab_t iab);
+extern cap_iab_t  cap_iab_from_text(const char *text);
 
-/* system calls - look to libc for function to system call mapping */
-extern int capset(cap_user_header_t header, cap_user_data_t data);
-extern int capget(cap_user_header_t header, const cap_user_data_t data);
+/* libcap/cap_proc.c */
+extern void cap_set_syscall(long int (*new_syscall)(long int,
+				long int, long int, long int),
+			    long int (*new_syscall6)(long int,
+				long int, long int, long int,
+				long int, long int, long int));
+
+extern int cap_set_mode(cap_mode_t flavor);
+extern cap_mode_t cap_get_mode(void);
+extern const char *cap_mode_name(cap_mode_t flavor);
+
+extern unsigned cap_get_secbits(void);
+extern int cap_set_secbits(unsigned bits);
+
+extern int cap_prctl(long int pr_cmd, long int arg1, long int arg2,
+		     long int arg3, long int arg4, long int arg5);
+extern int cap_prctlw(long int pr_cmd, long int arg1, long int arg2,
+		      long int arg3, long int arg4, long int arg5);
+extern int cap_setuid(uid_t uid);
+extern int cap_setgroups(gid_t gid, size_t ngroups, const gid_t groups[]);
+
+extern cap_iab_t cap_iab_get_proc(void);
+extern cap_iab_t cap_iab_get_pid(pid_t);
+extern int cap_iab_set_proc(cap_iab_t iab);
+
+typedef struct cap_launch_s *cap_launch_t;
+
+extern cap_launch_t cap_new_launcher(const char *arg0, const char * const *argv,
+				     const char * const *envp);
+extern cap_launch_t cap_func_launcher(int (callback_fn)(void *detail));
+extern int cap_launcher_callback(cap_launch_t attr,
+				 int (callback_fn)(void *detail));
+extern int cap_launcher_setuid(cap_launch_t attr, uid_t uid);
+extern int cap_launcher_setgroups(cap_launch_t attr, gid_t gid,
+				  int ngroups, const gid_t *groups);
+extern int cap_launcher_set_mode(cap_launch_t attr, cap_mode_t flavor);
+extern cap_iab_t cap_launcher_set_iab(cap_launch_t attr, cap_iab_t iab);
+extern int cap_launcher_set_chroot(cap_launch_t attr, const char *chroot);
+extern pid_t cap_launch(cap_launch_t attr, void *detail);
+
+/*
+ * system calls - look to libc for function to system call
+ * mapping. Note, libcap does not use capset directly, but permits the
+ * cap_set_syscall() to redirect the system call function.
+ */
+extern int capget(cap_user_header_t header, cap_user_data_t data);
+extern int capset(cap_user_header_t header, const cap_user_data_t data);
 
 /* deprecated - use cap_get_pid() */
 extern int capgetp(pid_t pid, cap_t cap_d);
diff --git a/libcap/include/uapi/linux/capability.h b/libcap/include/uapi/linux/capability.h
index a4b907f..56c9180 100644
--- a/libcap/include/uapi/linux/capability.h
+++ b/libcap/include/uapi/linux/capability.h
@@ -7,15 +7,16 @@
  *
  * See here for the libcap library ("POSIX draft" compliance):
  *
+ * https://git.kernel.org/pub/scm/libs/libcap/libcap.git/refs/
  * http://www.kernel.org/pub/linux/libs/security/linux-privs/
  */
 
 #ifndef _UAPI_LINUX_CAPABILITY_H
 #define _UAPI_LINUX_CAPABILITY_H
 
-#include <linux/types.h>
-
-struct task_struct;
+#include <stdint.h>
+#define __u32 uint32_t
+#define __le32 __u32
 
 /* User-level do most of the mapping between kernel and user
    capabilities based on the version tag given by the kernel. The
@@ -40,13 +41,13 @@ struct task_struct;
 typedef struct __user_cap_header_struct {
 	__u32 version;
 	int pid;
-} __user *cap_user_header_t;
+} *cap_user_header_t;
 
 typedef struct __user_cap_data_struct {
         __u32 effective;
         __u32 permitted;
         __u32 inheritable;
-} __user *cap_user_data_t;
+} *cap_user_data_t;
 
 
 #define VFS_CAP_REVISION_MASK	0xFF000000
@@ -62,16 +63,32 @@ typedef struct __user_cap_data_struct {
 #define VFS_CAP_U32_2           2
 #define XATTR_CAPS_SZ_2         (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
 
+#define VFS_CAP_REVISION_3	0x03000000
+#define VFS_CAP_U32_3           VFS_CAP_U32_2
+#define XATTR_CAPS_SZ_3         (sizeof(__le32)+XATTR_CAPS_SZ_2)
+
+/*
+ * Kernel capabilities default to v2. The v3 VFS caps are only used,
+ * at present, for namespace specific filesystem capabilities.
+ */
 #define XATTR_CAPS_SZ           XATTR_CAPS_SZ_2
 #define VFS_CAP_U32             VFS_CAP_U32_2
 #define VFS_CAP_REVISION	VFS_CAP_REVISION_2
 
+#define _VFS_CAP_DATA_HEAD \
+	__le32 magic_etc;            /* Little endian */ \
+	struct {                                         \
+		__le32 permitted;    /* Little endian */ \
+		__le32 inheritable;  /* Little endian */ \
+	} data[VFS_CAP_U32]
+
 struct vfs_cap_data {
-	__le32 magic_etc;            /* Little endian */
-	struct {
-		__le32 permitted;    /* Little endian */
-		__le32 inheritable;  /* Little endian */
-	} data[VFS_CAP_U32];
+	_VFS_CAP_DATA_HEAD;
+};
+
+struct vfs_ns_cap_data {
+	_VFS_CAP_DATA_HEAD;
+	__le32 rootid;
 };
 
 #ifndef __KERNEL__
@@ -207,7 +224,7 @@ struct vfs_cap_data {
 #define CAP_SYS_MODULE       16
 
 /* Allow ioperm/iopl access */
-/* Allow sending USB messages to any device via /proc/bus/usb */
+/* Allow sending USB messages to any device via /dev/bus/usb */
 
 #define CAP_SYS_RAWIO        17
 
@@ -308,10 +325,16 @@ struct vfs_cap_data {
 
 #define CAP_LEASE            28
 
+/* Allow writing the audit log via unicast netlink socket */
+
 #define CAP_AUDIT_WRITE      29
 
+/* Allow configuration of audit via unicast netlink socket */
+
 #define CAP_AUDIT_CONTROL    30
 
+/* Set capabilities on files. */
+
 #define CAP_SETFCAP	     31
 
 /* Override MAC access.
@@ -343,7 +366,54 @@ struct vfs_cap_data {
 
 #define CAP_BLOCK_SUSPEND    36
 
-#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
+/* Allow reading the audit log via multicast netlink socket */
+
+#define CAP_AUDIT_READ       37
+
+/* Allow system performance and observability privileged operations using
+ * perf_events, i915_perf and other kernel subsystems. */
+
+#define CAP_PERFMON	     38
+
+/*
+ * CAP_BPF allows the following BPF operations:
+ * - Creating all types of BPF maps
+ * - Advanced verifier features
+ *   - Indirect variable access
+ *   - Bounded loops
+ *   - BPF to BPF function calls
+ *   - Scalar precision tracking
+ *   - Larger complexity limits
+ *   - Dead code elimination
+ *   - And potentially other features
+ * - Loading BPF Type Format (BTF) data
+ * - Retrieve xlated and JITed code of BPF programs
+ * - Use bpf_spin_lock() helper
+ *
+ * CAP_PERFMON relaxes the verifier checks further:
+ * - BPF progs can use of pointer-to-integer conversions
+ * - speculation attack hardening measures are bypassed
+ * - bpf_probe_read to read arbitrary kernel memory is allowed
+ * - bpf_trace_printk to print kernel memory is allowed
+ *
+ * CAP_SYS_ADMIN is required to use bpf_probe_write_user.
+ *
+ * CAP_SYS_ADMIN is required to iterate system wide loaded
+ * programs, maps, links, BTFs and convert their IDs to file descriptors.
+ *
+ * CAP_PERFMON and CAP_BPF are required to load tracing programs.
+ * CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
+ */
+
+#define CAP_BPF		     39
+
+/* Allow checkpoint/restore related operations */
+/* Allow PID selection during clone3() */
+/* Allow writing to ns_last_pid */
+
+#define CAP_CHECKPOINT_RESTORE 40
+
+#define CAP_LAST_CAP         CAP_CHECKPOINT_RESTORE
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
@@ -352,7 +422,6 @@ struct vfs_cap_data {
  */
 
 #define CAP_TO_INDEX(x)     ((x) >> 5)        /* 1 << 5 == bits in __u32 */
-#define CAP_TO_MASK(x)      (1 << ((x) & 31)) /* mask for indexed __u32 */
-
+#define CAP_TO_MASK(x)      (1u << ((x) & 31)) /* mask for indexed __u32 */
 
 #endif /* _UAPI_LINUX_CAPABILITY_H */
diff --git a/libcap/include/uapi/linux/prctl.h b/libcap/include/uapi/linux/prctl.h
index 289760f..1b6a009 100644
--- a/libcap/include/uapi/linux/prctl.h
+++ b/libcap/include/uapi/linux/prctl.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PRCTL_H
 #define _LINUX_PRCTL_H
 
+#include <linux/types.h>
+
 /* Values to pass as first argument to prctl() */
 
 #define PR_SET_PDEATHSIG  1  /* Second arg is a signal */
@@ -119,6 +121,31 @@
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
 # define PR_SET_MM_EXE_FILE		13
+# define PR_SET_MM_MAP			14
+# define PR_SET_MM_MAP_SIZE		15
+
+/*
+ * This structure provides new memory descriptor
+ * map which mostly modifies /proc/pid/stat[m]
+ * output for a task. This mostly done in a
+ * sake of checkpoint/restore functionality.
+ */
+struct prctl_mm_map {
+	__u64	start_code;		/* code section bounds */
+	__u64	end_code;
+	__u64	start_data;		/* data section bounds */
+	__u64	end_data;
+	__u64	start_brk;		/* heap for brk() syscall */
+	__u64	brk;
+	__u64	start_stack;		/* stack starts at */
+	__u64	arg_start;		/* command line arguments bounds */
+	__u64	arg_end;
+	__u64	env_start;		/* environment variables bounds */
+	__u64	env_end;
+	__u64	*auxv;			/* auxiliary vector */
+	__u32	auxv_size;		/* vector size */
+	__u32	exe_fd;			/* /proc/$pid/exe link file */
+};
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
@@ -149,4 +176,25 @@
 
 #define PR_GET_TID_ADDRESS	40
 
+#define PR_SET_THP_DISABLE	41
+#define PR_GET_THP_DISABLE	42
+
+/*
+ * Tell the kernel to start/stop helping userspace manage bounds tables.
+ */
+#define PR_MPX_ENABLE_MANAGEMENT  43
+#define PR_MPX_DISABLE_MANAGEMENT 44
+
+#define PR_SET_FP_MODE		45
+#define PR_GET_FP_MODE		46
+# define PR_FP_MODE_FR		(1u << 0)	/* 64b FP registers */
+# define PR_FP_MODE_FRE		(1u << 1)	/* 32b compatibility */
+
+/* Control the ambient capability set */
+#define PR_CAP_AMBIENT			47
+# define PR_CAP_AMBIENT_IS_SET		1
+# define PR_CAP_AMBIENT_RAISE		2
+# define PR_CAP_AMBIENT_LOWER		3
+# define PR_CAP_AMBIENT_CLEAR_ALL	4
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/libcap/include/uapi/linux/securebits.h b/libcap/include/uapi/linux/securebits.h
index 985aac9..e9b1309 100644
--- a/libcap/include/uapi/linux/securebits.h
+++ b/libcap/include/uapi/linux/securebits.h
@@ -5,7 +5,7 @@
    whether the setting is on or off. The other bit specify whether the
    setting is locked or not. A setting which is locked cannot be
    changed from user-level. */
-#define issecure_mask(X)	(1 << (X))
+#define issecure_mask(X)	(1u << (X))
 
 #define SECUREBITS_DEFAULT 0x00000000
 
@@ -22,7 +22,7 @@
 #define SECBIT_NOROOT_LOCKED	(issecure_mask(SECURE_NOROOT_LOCKED))
 
 /* When set, setuid to/from uid 0 does not trigger capability-"fixup".
-   When unset, to provide compatiblility with old programs relying on
+   When unset, to provide compatibility with old programs relying on
    set*uid to gain/lose privilege, transitions to/from uid 0 cause
    capabilities to be gained/lost. */
 #define SECURE_NO_SETUID_FIXUP		2
@@ -43,9 +43,18 @@
 #define SECBIT_KEEP_CAPS	(issecure_mask(SECURE_KEEP_CAPS))
 #define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED))
 
+/* When set, a process cannot add new capabilities to its ambient set. */
+#define SECURE_NO_CAP_AMBIENT_RAISE		6
+#define SECURE_NO_CAP_AMBIENT_RAISE_LOCKED	7  /* make bit-6 immutable */
+
+#define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
+#define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \
+			(issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED))
+
 #define SECURE_ALL_BITS		(issecure_mask(SECURE_NOROOT) | \
 				 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
-				 issecure_mask(SECURE_KEEP_CAPS))
+				 issecure_mask(SECURE_KEEP_CAPS) | \
+				 issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE))
 #define SECURE_ALL_LOCKS	(SECURE_ALL_BITS << 1)
 
 #endif /* _UAPI_LINUX_SECUREBITS_H */
diff --git a/libcap/libcap.h b/libcap/libcap.h
index 2596c11..f4a72fe 100644
--- a/libcap/libcap.h
+++ b/libcap/libcap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997 Andrew G Morgan <morgan@kernel.org>
+ * Copyright (c) 1997,2020 Andrew G Morgan <morgan@kernel.org>
  *
  * This file contains internal definitions for the various functions in
  * this small capability library.
@@ -9,6 +9,7 @@
 #define LIBCAP_H
 
 #include <errno.h>
+#include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -28,7 +29,7 @@
 
 #ifndef _LINUX_CAPABILITY_U32S_1
 # define _LINUX_CAPABILITY_U32S_1          1
-#endif /* ndef _LINUX_CAPABILITY_U32S */
+#endif /* ndef _LINUX_CAPABILITY_U32S_1 */
 
 /*
  * Do we match the local kernel?
@@ -113,34 +114,57 @@ struct _cap_vfs_cap_data {
 
 #define CAP_T_MAGIC 0xCA90D0
 struct _cap_struct {
+    __u8 mutex;
     struct __user_cap_header_struct head;
     union {
 	struct __user_cap_data_struct set;
 	__u32 flat[NUMBER_OF_CAP_SETS];
     } u[_LIBCAP_CAPABILITY_U32S];
+    uid_t rootid;
 };
 
+/*
+ * Elementary exclusive locking primatives for situations where
+ * linking with pthreads needs it, but such linking is not common.
+ *
+ *  _cap_mu_blocked(x) attempts to lock x but if already locked, returns true
+ *  _cap_mu_lock(x)    attempts to lock and waits until the lock is granted
+ *  _cap_mu_unlock(x)  unconditionally unlocks the lock
+ *  _cap_mu_unlock_return(x, y) unlock lock x and return value y
+ */
+#define _cap_mu_blocked(x)          \
+    __atomic_test_and_set((void *)(x), __ATOMIC_SEQ_CST)
+#define _cap_mu_lock(x)             \
+    while (_cap_mu_blocked(x)) sched_yield()
+#define _cap_mu_unlock(x)           \
+    __atomic_clear((void *) (x), __ATOMIC_SEQ_CST)
+#define _cap_mu_unlock_return(x, y) \
+    do { _cap_mu_unlock(x); return (y); } while (0)
+
 /* the maximum bits supportable */
 #define __CAP_MAXBITS (__CAP_BLKS * 32)
 
 /* string magic for cap_free */
 #define CAP_S_MAGIC 0xCA95D0
 
-/*
- * kernel API cap set abstraction
- */
+/* iab set magic for cap_free */
+#define CAP_IAB_MAGIC 0xCA91AB
+
+/* launcher magic for cap_free */
+#define CAP_LAUNCH_MAGIC 0xCA91AC
 
-#define raise_cap(x,set)   u[(x)>>5].flat[set]       |=  (1<<((x)&31))
-#define lower_cap(x,set)   u[(x)>>5].flat[set]       &= ~(1<<((x)&31))
-#define isset_cap(y,x,set) ((y)->u[(x)>>5].flat[set] &   (1<<((x)&31)))
+#define magic_of(x)           ((x) ? *(-2 + (const __u32 *) x) : 0)
+#define good_cap_t(x)         (CAP_T_MAGIC   == magic_of(x))
+#define good_cap_iab_t(x)     (CAP_IAB_MAGIC == magic_of(x))
+#define good_cap_launch_t(x)  (CAP_LAUNCH_MAGIC == magic_of(x))
 
 /*
- * Private definitions for internal use by the library.
+ * kernel API cap set abstraction
  */
 
-#define __libcap_check_magic(c,magic) ((c) && *(-1+(__u32 *)(c)) == (magic))
-#define good_cap_t(c)        __libcap_check_magic(c, CAP_T_MAGIC)
-#define good_cap_string(c)   __libcap_check_magic(c, CAP_S_MAGIC)
+#define raise_cap(x, set)    u[(x) >> 5].flat[set]       |=  (1u << ((x)&31))
+#define lower_cap(x, set)    u[(x) >> 5].flat[set]       &= ~(1u << ((x)&31))
+#define isset_cap(y, x, set) ((y)->u[(x) >> 5].flat[set] &   (1u << ((x)&31)))
 
 /*
  * These match CAP_DIFFERS() expectations
@@ -178,6 +202,9 @@ struct _cap_struct {
 #endif /* DEBUG */
 
 extern char *_libcap_strdup(const char *text);
+extern void _libcap_initialize(void);
+
+#define EXECABLE_INITIALIZE _libcap_initialize()
 
 /*
  * These are semi-public prototypes, they will only be defined in
@@ -185,8 +212,7 @@ extern char *_libcap_strdup(const char *text);
  * place them here too.
  */
 
-extern int capset(cap_user_header_t header, cap_user_data_t data);
-extern int capget(cap_user_header_t header, const cap_user_data_t data);
+extern int capget(cap_user_header_t header, cap_user_data_t data);
 extern int capgetp(pid_t pid, cap_t cap_d);
 extern int capsetp(pid_t pid, cap_t cap_d);
 
@@ -206,4 +232,88 @@ extern int capsetp(pid_t pid, cap_t cap_d);
  */
 #define ssizeof(x) ((ssize_t) sizeof(x))
 
+/*
+ * Put this here as a macro so we can unit test it.
+ */
+#define _binary_search(val, fn, low, high, fallback) do {	\
+	cap_value_t min = low, max = high;			\
+	while (min <= max) {					\
+	    cap_value_t mid = (min+max) / 2;			\
+	    if (fn(mid) < 0) {					\
+		max = mid - 1;					\
+	    } else {						\
+		min = mid + 1;					\
+	    }							\
+	}							\
+	val = min ? (min <= high ? min : fallback) : fallback;	\
+    } while(0)
+
+/*
+ * cap_iab_s holds a collection of inheritable capability bits. The i
+ * bits are inheritable (these are the same as those in cap_t), the a
+ * bits are ambient bits (which cannot be a superset of i&p), and nb
+ * are the bits that will be dropped from the bounding set when
+ * applied.
+ */
+struct cap_iab_s {
+    __u8 mutex;
+    __u32 i[_LIBCAP_CAPABILITY_U32S];
+    __u32 a[_LIBCAP_CAPABILITY_U32S];
+    __u32 nb[_LIBCAP_CAPABILITY_U32S];
+};
+
+#define LIBCAP_IAB_I_FLAG (1U << CAP_IAB_INH)
+#define LIBCAP_IAB_A_FLAG (1U << CAP_IAB_AMB)
+#define LIBCAP_IAB_IA_FLAG (LIBCAP_IAB_I_FLAG | LIBCAP_IAB_A_FLAG)
+#define LIBCAP_IAB_NB_FLAG (1U << CAP_IAB_BOUND)
+
+/*
+ * The following support launching another process without destroying
+ * the state of the current process. This is especially useful for
+ * multithreaded applications.
+ */
+struct cap_launch_s {
+    __u8 mutex;
+    /*
+     * Once forked but before active privilege is changed, this
+     * function (if non-NULL) is called.
+     */
+    int (*custom_setup_fn)(void *detail);
+
+    /*
+     * user and groups to be used by the forked child.
+     */
+    int change_uids;
+    uid_t uid;
+
+    int change_gids;
+    gid_t gid;
+    int ngroups;
+    const gid_t *groups;
+
+    /*
+     * mode holds the preferred capability mode. Any non-uncertain
+     * setting here will require an empty ambient set.
+     */
+    int change_mode;
+    cap_mode_t mode;
+
+    /*
+     * i,a,[n]b caps. These bitmaps hold all of the capability sets that
+     * cap_launch will affect. nb holds values to be lowered in the bounding
+     * set.
+     */
+    struct cap_iab_s *iab;
+
+    /* chroot holds a preferred chroot for the launched child. */
+    char *chroot;
+
+    /*
+     * execve style arguments
+     */
+    const char *arg0;
+    const char *const *argv;
+    const char *const *envp;
+};
+
 #endif /* LIBCAP_H */
diff --git a/libcap/libcap.pc.in b/libcap/libcap.pc.in
index a28e3e4..69cd231 100644
--- a/libcap/libcap.pc.in
+++ b/libcap/libcap.pc.in
@@ -4,7 +4,7 @@ libdir=@libdir@
 includedir=@includedir@
 
 Name: libcap
-Description: libcap
+Description: libcap - linux capabilities library
 Version: @VERSION@
 Libs: -L${libdir} -lcap
 Libs.private: @deps@
diff --git a/libcap/libpsx.pc.in b/libcap/libpsx.pc.in
new file mode 100644
index 0000000..d032b9f
--- /dev/null
+++ b/libcap/libpsx.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libpsx
+Description: libpsx - linux posix syscall API for pthreads
+Version: @VERSION@
+Libs: -L${libdir} -lpsx -lpthread -Wl,-wrap,pthread_create
+Libs.private: @deps@
+Cflags: -I${includedir}
diff --git a/libcap/psx_exec.c b/libcap/psx_exec.c
new file mode 100644
index 0000000..5e7a88f
--- /dev/null
+++ b/libcap/psx_exec.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include "execable.h"
+
+SO_MAIN(int argc, char **argv)
+{
+    const char *cmd = "This library";
+    if (argv != NULL && argv[0] != NULL) {
+	cmd = argv[0];
+    }
+    printf("%s is the shared library version: " LIBRARY_VERSION ".\n"
+	   "See the License file for distribution information.\n"
+	   "More information on this library is available from:\n"
+	   "\n"
+	   "    https://sites.google.com/site/fullycapable/\n", cmd);
+}
diff --git a/pam_cap/.gitignore b/pam_cap/.gitignore
index 11806f5..dac617b 100644
--- a/pam_cap/.gitignore
+++ b/pam_cap/.gitignore
@@ -1,2 +1,7 @@
 pam_cap.so
-testcompile
+testlink
+test_pam_cap
+lazylink.so
+pam_cap_linkopts
+LIBCAP
+incapable.conf
diff --git a/pam_cap/License b/pam_cap/License
index e88aa3f..6c20dc0 100644
--- a/pam_cap/License
+++ b/pam_cap/License
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR LGPL-2.0-or-later */
+
 Unless otherwise *explicitly* stated the following text describes the
 licensed conditions under which the contents of this module release
 may be distributed:
diff --git a/pam_cap/Makefile b/pam_cap/Makefile
index 9ca5bef..258e519 100644
--- a/pam_cap/Makefile
+++ b/pam_cap/Makefile
@@ -3,27 +3,91 @@
 topdir=$(shell pwd)/..
 include ../Make.Rules
 
-# Note (as the author of much of the Linux-PAM library, I am confident
-# that this next line does *not* require -lpam on it.) If you think it
-# does, *verify that it does*, and if you observe that it fails as
-# written (and you know why it fails), email me and explain why. Thanks!
-LDLIBS += -L../libcap -lcap
+# Always build pam_cap sources this way:
+CFLAGS += -fPIC
 
 all: pam_cap.so
-	$(MAKE) testcompile
+	$(MAKE) testlink
 
 install: all
-	mkdir -p -m 0755 $(LIBDIR)/security
-	install -m 0755 pam_cap.so $(LIBDIR)/security
+	mkdir -p -m 0755 $(FAKEROOT)$(LIBDIR)/security
+	install -m 0755 pam_cap.so $(FAKEROOT)$(LIBDIR)/security
 
-pam_cap.so: pam_cap.o
-	$(LD) $(LDFLAGS) -o pam_cap.so $< $(LDLIBS)
+../libcap/loader.txt:
+	$(MAKE) -C ../libcap loader.txt
 
-pam_cap.o: pam_cap.c
-	$(CC) $(CFLAGS) $(IPATH) -c $< -o $@
+execable.o: execable.c ../libcap/execable.h ../libcap/loader.txt
+	$(CC) $(CFLAGS) $(CPPFLAGS) -DLIBCAP_VERSION=\"libcap-$(VERSION).$(MINOR)\" -DSHARED_LOADER=\"$(shell cat ../libcap/loader.txt)\" -c execable.c -o $@
 
-testcompile: test.c pam_cap.o
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $+ -lpam -ldl $(LDLIBS)
+LIBCAP:
+	$(MAKE) -C ../libcap all
+	touch $@
+
+pam_cap.so: pam_cap.o execable.o pam_cap_linkopts LIBCAP
+	cat pam_cap_linkopts | xargs -e $(LD) $(LDFLAGS) -o $@ pam_cap.o execable.o $(LIBCAPLIB)
+
+# Some distributions force link everything at compile time, and don't
+# take advantage of libpam's dlopen runtime options to resolve ill
+# defined symbols from its own linkage as needed. (As the original
+# author of that part of libpam, I consider this force linking
+# premature optimization.) We debugged its consequences to pam_cap.so
+# as part of:
+#
+#   https://bugzilla.kernel.org/show_bug.cgi?id=214023
+#
+# If the current build environment is one of those, or we can't
+# reliably prove it isn't, extend the link options for pam_cap.so to
+# force linkage against libpam and the gazillion other things libpam
+# is linked against...
+#
+# If you want to force this behavior one way or the other, use the
+# make FORCELINKPAM=yes or FORCELINKPAM=no override.
+ifeq ($(FORCELINKPAM),yes)
+pam_cap_linkopts: Makefile
+	echo "-Wl,-e,__so_start -lpam" > $@
+else
+ifeq ($(FORCELINKPAM),no)
+pam_cap_linkopts: Makefile
+	echo "-Wl,-e,__so_start" > $@
+else
+pam_cap_linkopts: lazylink.so
+	echo "-Wl,-e,__so_start" > $@
+	./lazylink.so || echo "-lpam" >> $@
+
+lazylink.so: lazylink.c ../libcap/execable.h ../libcap/loader.txt
+	$(LD) -o $@ $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) lazylink.c -DSHARED_LOADER=\"$(shell cat ../libcap/loader.txt)\" -Wl,-e,__so_start
+endif
+endif
+
+../libcap/libcap.a:
+	$(MAKE) -C ../libcap libcap.a
+
+# Avoid $(LDFLAGS) here to avoid conflicts with --static for a in-tree
+# test binary.
+test_pam_cap: test_pam_cap.c pam_cap.c ../libcap/libcap.a
+	$(CC) $(CFLAGS) $(CPPFLAGS) -o $@ test_pam_cap.c $(LIBCAPLIB) --static
+
+testlink: test.o pam_cap.o
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $+ -lpam -ldl $(LIBCAPLIB)
+
+incapable.conf:
+	echo "^cap_setuid  alpha" > $@ && chmod o+w $@
+
+test: testlink test_pam_cap pam_cap.so incapable.conf
+	./test_pam_cap
+	LD_LIBRARY_PATH=../libcap ./pam_cap.so
+	LD_LIBRARY_PATH=../libcap ./pam_cap.so --help
+	@echo "module can be run as an executable!"
+
+sudotest: test_pam_cap incapable.conf
+	$(SUDO) ./test_pam_cap root 0x0 0x0 0x0 config=./capability.conf
+	$(SUDO) ./test_pam_cap root 0x0 0x0 0x0 config=./sudotest.conf
+	$(SUDO) ./test_pam_cap alpha 0x0 0x0 0x0 config=./capability.conf
+	$(SUDO) ./test_pam_cap alpha 0x0 0x1 0x80 config=./sudotest.conf
+	$(SUDO) ./test_pam_cap beta 0x0 0x1 0x0 config=./sudotest.conf
+	$(SUDO) ./test_pam_cap gamma 0x0 0x0 0x81 config=./sudotest.conf
+	$(SUDO) ./test_pam_cap delta 0x41 0x80 0x41 config=./sudotest.conf
 
 clean:
-	rm -f *.o *.so testcompile *~
+	rm -f *.o *.so testlink lazylink.so test_pam_cap pam_cap_linkopts *~
+	rm -f LIBCAP incapable.conf
diff --git a/pam_cap/capability.conf b/pam_cap/capability.conf
index 09517f8..08c01e1 100644
--- a/pam_cap/capability.conf
+++ b/pam_cap/capability.conf
@@ -6,14 +6,26 @@
 #
 # In order to use this module, it must have been linked with libcap
 # and thus you'll know about Linux's capability support.
-# [If you don't know about libcap, the sources for it are here:
+# [If you don't know about libcap, read more about it here:
 #
-#   http://www.kernel.org/pub/linux/libs/security/linux-privs/
+#   https://sites.google.com/site/fullycapable/
+#
+# There is a page devoted to pam_cap.so here:
+#
+#   https://sites.google.com/site/fullycapable/pam_cap-so
 #
 # .]
 #
 # Here are some sample lines (remove the preceding '#' if you want to
-# use them
+# use them.
+#
+# The pam_cap.so module accepts the following arguments:
+#
+#   debug         - be more verbose logging things (unused by pam_cap for now)
+#   config=<file> - override the default config for the module with file
+#   keepcaps      - workaround for applications that setuid without this
+#   autoauth      - if you want pam_cap.so to always succeed for the auth phase
+#   default=<iab> - provide a fallback IAB value if there is no '*' rule
 
 ## user 'morgan' gets the CAP_SETFCAP inheritable capability (commented out!)
 #cap_setfcap		morgan
@@ -24,20 +36,23 @@
 ## 'everyone else' gets no inheritable capabilities (restrictive config)
 none  *
 
-## if there is no '*' entry, all users not explicitly mentioned will
-## get all available capabilities. This is a permissive default, and
-## possibly not what you want... On first reading, you might think this
-## is a security problem waiting to happen, but it defaults to not being
-## so in this sample file! Further, by 'get', we mean 'get in their inheritable
-## set'. That is, if you look at a random process, even one run by root,
-## you will see it has no inheritable capabilities (by default):
+## if there is no '*' entry, and no "default=<iab>" pam_cap.so module
+## argument to fallback on, all users not explicitly mentioned will
+## get all currently available inheritable capabilities. This is a
+## permissive default, and possibly not what you want... On first
+## reading, you might think this is a security problem waiting to
+## happen, but it defaults to not being so in this sample file!
+## Further, by 'get', we mean 'get in their IAB sets'. That is, if you
+## look at a random process, even one run by root, you will see it has
+## no IAB capabilities (by default):
 ##
 ##   $ /sbin/capsh --decode=$(grep CapInh /proc/1/status|awk '{print $2}')
 ##   0000000000000000=
 ##
-## The pam_cap module simply alters the value of this capability
-## set. Including the 'none *' forces use of this module with an
-## unspecified user to have their inheritable set forced to zero.
+## The pam_cap module simply alters the value of the inheritable
+## capability vactors (IAB). Including the 'none *' forces use of this
+## module with an unspecified user to have their inheritable set
+## forced to zero.
 ##
 ## Omitting the line will cause the inheritable set to be unmodified
 ## from what the parent process had (which is generally 0 unless the
diff --git a/pam_cap/execable.c b/pam_cap/execable.c
new file mode 100644
index 0000000..17276b4
--- /dev/null
+++ b/pam_cap/execable.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * The purpose of this file is to provide an executable mode for the
+ * pam_cap.so binary. If you run it directly, all it does is print
+ * version information.
+ *
+ * It accepts the optional --help argument which causes the executable
+ * to display a summary of all the supported, pam stacked, module
+ * arguments.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../libcap/execable.h"
+
+SO_MAIN(int argc, char **argv)
+{
+    const char *cmd = "<pam_cap.so>";
+    if (argv != NULL) {
+	cmd = argv[0];
+    }
+
+    printf(
+	"%s (version " LIBCAP_VERSION ") is a PAM module to specify\n"
+	"inheritable (IAB) capabilities via the libpam authentication\n"
+	"abstraction. See the pam_cap License file for licensing information.\n"
+	"\n"
+	"Release notes and feature documentation for libcap and pam_cap.so\n"
+	"can be found at:\n"
+	"\n"
+	"    https://sites.google.com/site/fullycapable/\n", cmd);
+    if (argc <= 1) {
+	return;
+    }
+
+    if (argc > 2 || argv[1] == NULL || strcmp(argv[1], "--help")) {
+	printf("\n%s only supports the optional argument --help\n", cmd);
+	exit(1);
+    }
+
+    printf("\n"
+	   "%s supports the following module arguments:\n"
+	   "\n"
+	   "debug         - verbose logging (ignored for now)\n"
+	   "config=<file> - override the default config with file\n"
+	   "keepcaps      - workaround for apps that setuid without this\n"
+	   "autoauth      - pam_cap.so to always succeed for the 'auth' phase\n"
+	   "default=<iab> - fallback IAB value if there is no '*' rule\n"
+	   "defer         - apply IAB value at pam_exit (not via setcred)\n",
+	cmd);
+}
diff --git a/pam_cap/lazylink.c b/pam_cap/lazylink.c
new file mode 100644
index 0000000..969c92d
--- /dev/null
+++ b/pam_cap/lazylink.c
@@ -0,0 +1,20 @@
+/*
+ * Test if the provided LDFLAGS support lazy linking
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../libcap/execable.h"
+
+extern int nothing_sets_this(void);
+extern void nothing_uses_this(void);
+
+void nothing_uses_this(void)
+{
+    nothing_sets_this();
+}
+
+SO_MAIN(int argc, char **argv)
+{
+    exit(0);
+}
diff --git a/pam_cap/pam_cap.c b/pam_cap/pam_cap.c
index e6ebbe9..b9419cb 100644
--- a/pam_cap/pam_cap.c
+++ b/pam_cap/pam_cap.c
@@ -1,20 +1,31 @@
 /*
- * Copyright (c) 1999,2007 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 1999,2007,2019-21 Andrew G. Morgan <morgan@kernel.org>
  *
- * The purpose of this module is to enforce inheritable capability sets
- * for a specified user.
+ * The purpose of this module is to enforce inheritable, bounding and
+ * ambient capability sets for a specified user.
  */
 
-/* #define DEBUG */
+/* #define PAM_DEBUG */
+
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
 
-#include <stdio.h>
-#include <string.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <limits.h>
+#include <pwd.h>
 #include <stdarg.h>
 #include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
 #include <syslog.h>
-
 #include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
 
 #include <security/pam_modules.h>
 #include <security/_pam_macros.h>
@@ -22,34 +33,110 @@
 #define USER_CAP_FILE           "/etc/security/capability.conf"
 #define CAP_FILE_BUFFER_SIZE    4096
 #define CAP_FILE_DELIMITERS     " \t\n"
-#define CAP_COMBINED_FORMAT     "%s all-i %s+i"
-#define CAP_DROP_ALL            "%s all-i"
 
+/*
+ * pam_cap_s is used to summarize argument values in a parsed form.
+ */
 struct pam_cap_s {
     int debug;
+    int keepcaps;
+    int autoauth;
+    int defer;
     const char *user;
     const char *conf_filename;
+    const char *fallback;
+    pam_handle_t *pamh;
 };
 
-/* obtain the inheritable capabilities for the current user */
+/*
+ * load_groups obtains the list all of the groups associated with the
+ * requested user: gid & supplemental groups.
+ */
+static int load_groups(const char *user, char ***groups, int *groups_n) {
+    struct passwd *pwd;
+    gid_t grps[NGROUPS_MAX];
+    int ngrps = NGROUPS_MAX;
+
+    *groups = NULL;
+    *groups_n = 0;
+
+    pwd = getpwnam(user);
+    if (pwd == NULL) {
+	return -1;
+    }
+
+    /* must include at least pwd->pw_gid, hence < 1 test. */
+    if (getgrouplist(user, pwd->pw_gid, grps, &ngrps) < 1) {
+	return -1;
+    }
+
+    *groups = calloc(ngrps, sizeof(char *));
+    if (*groups == NULL) {
+	return -1;
+    }
+    int g_n = 0, i;
+    for (i = 0; i < ngrps; i++) {
+	const struct group *g = getgrgid(grps[i]);
+	if (g == NULL) {
+	    continue;
+	}
+	D(("noting [%s] is a member of [%s]", user, g->gr_name));
+	(*groups)[g_n++] = strdup(g->gr_name);
+    }
+
+    *groups_n = g_n;
+    return 0;
+}
+
+/* obtain the desired IAB capabilities for the current user */
 
 static char *read_capabilities_for_user(const char *user, const char *source)
 {
     char *cap_string = NULL;
     char buffer[CAP_FILE_BUFFER_SIZE], *line;
+    char **groups;
+    int groups_n;
     FILE *cap_file;
 
+    if (load_groups(user, &groups, &groups_n)) {
+	D(("unknown user [%s]", user));
+	return NULL;
+    }
+
     cap_file = fopen(source, "r");
     if (cap_file == NULL) {
 	D(("failed to open capability file"));
-	return NULL;
+	goto defer;
+    }
+    /*
+     * In all cases other than "/dev/null", the config file should not
+     * be world writable. We do not check for ownership limitations or
+     * group write restrictions as these represent legitimate local
+     * administration choices. Especially in a system operating in
+     * CAP_MODE_PURE1E.
+     */
+    if (strcmp(source, "/dev/null") != 0) {
+	struct stat sb;
+	D(("validate filehandle [for opened %s] does not point to a world"
+	   " writable file", source));
+	if (fstat(fileno(cap_file), &sb) != 0) {
+	    D(("unable to fstat config file: %d", errno));
+	    goto close_out_file;
+	}
+	if ((sb.st_mode & S_IWOTH) != 0) {
+	    D(("open failed [%s] is world writable test: security hole",
+	       source));
+	    goto close_out_file;
+	}
     }
 
-    while ((line = fgets(buffer, CAP_FILE_BUFFER_SIZE, cap_file))) {
-	int found_one = 0;
+    int found_one = 0;
+    while (!found_one &&
+	   (line = fgets(buffer, CAP_FILE_BUFFER_SIZE, cap_file))) {
 	const char *cap_text;
 
-	cap_text = strtok(line, CAP_FILE_DELIMITERS);
+	char *next = NULL;
+	cap_text = strtok_r(line, CAP_FILE_DELIMITERS, &next);
 
 	if (cap_text == NULL) {
 	    D(("empty line"));
@@ -60,55 +147,125 @@ static char *read_capabilities_for_user(const char *user, const char *source)
 	    continue;
 	}
 
-	while ((line = strtok(NULL, CAP_FILE_DELIMITERS))) {
-
+	/*
+	 * Explore whether any of the ids are a match for the current
+	 * user.
+	 */
+	while ((line = strtok_r(next, CAP_FILE_DELIMITERS, &next))) {
 	    if (strcmp("*", line) == 0) {
 		D(("wildcard matched"));
 		found_one = 1;
-		cap_string = strdup(cap_text);
 		break;
 	    }
 
 	    if (strcmp(user, line) == 0) {
 		D(("exact match for user"));
 		found_one = 1;
-		cap_string = strdup(cap_text);
 		break;
 	    }
 
-	    D(("user is not [%s] - skipping", line));
-	}
+	    if (line[0] != '@') {
+		D(("user [%s] is not [%s] - skipping", user, line));
+	    }
 
-	cap_text = NULL;
-	line = NULL;
+	    int i;
+	    for (i=0; i < groups_n; i++) {
+		if (!strcmp(groups[i], line+1)) {
+		    D(("user group matched [%s]", line));
+		    found_one = 1;
+		    break;
+		}
+	    }
+	    if (found_one) {
+		break;
+	    }
+	}
 
 	if (found_one) {
+	    cap_string = strdup(cap_text);
 	    D(("user [%s] matched - caps are [%s]", user, cap_string));
-	    break;
 	}
+
+	cap_text = NULL;
+	line = NULL;
     }
 
+close_out_file:
     fclose(cap_file);
 
+defer:
     memset(buffer, 0, CAP_FILE_BUFFER_SIZE);
 
+    int i;
+    for (i = 0; i < groups_n; i++) {
+	char *g = groups[i];
+	_pam_overwrite(g);
+	_pam_drop(g);
+    }
+    if (groups != NULL) {
+	memset(groups, 0, groups_n * sizeof(char *));
+	_pam_drop(groups);
+    }
+
     return cap_string;
 }
 
 /*
+ * This is the "defer" cleanup function that actually applies the IAB
+ * tuple. This happens really late in the PAM session, hopefully after
+ * the application has performed its setuid() function.
+ */
+static void iab_apply(pam_handle_t *pamh, void *data, int error_status)
+{
+    cap_iab_t iab = data;
+    int retval = error_status & ~(PAM_DATA_REPLACE|PAM_DATA_SILENT);
+
+#ifdef PAM_DEBUG
+    {
+	cap_t c = cap_get_proc();
+	cap_iab_t tu = cap_iab_get_proc();
+	char *tc, *ttu;
+	tc = cap_to_text(c, NULL);
+	ttu = cap_iab_to_text(tu);
+
+	D(("iab_apply with uid=%d,euid=%d and error_status=0x%08x \"%s\", [%s]",
+	   getuid(), geteuid(), error_status, tc, ttu));
+
+	cap_free(ttu);
+	cap_free(tc);
+	cap_free(tu);
+	cap_free(c);
+    }
+#endif
+
+    data = NULL;
+    if (error_status & PAM_DATA_REPLACE) {
+	goto done;
+    }
+
+    if (retval != PAM_SUCCESS || !(error_status & PAM_DATA_SILENT)) {
+	goto done;
+    }
+
+    if (cap_iab_set_proc(iab) != 0) {
+	D(("IAB setting failed"));
+    }
+
+done:
+    cap_free(iab);
+}
+
+/*
  * Set capabilities for current process to match the current
  * permitted+executable sets combined with the configured inheritable
  * set.
  */
-
 static int set_capabilities(struct pam_cap_s *cs)
 {
     cap_t cap_s;
-    ssize_t length = 0;
-    char *conf_icaps;
-    char *proc_epcaps;
-    char *combined_caps;
+    char *conf_caps;
     int ok = 0;
+    cap_iab_t iab;
 
     cap_s = cap_get_proc();
     if (cap_s == NULL) {
@@ -117,80 +274,80 @@ static int set_capabilities(struct pam_cap_s *cs)
 	return 0;
     }
 
-    conf_icaps =
-	read_capabilities_for_user(cs->user,
-				   cs->conf_filename
-				   ? cs->conf_filename:USER_CAP_FILE );
-    if (conf_icaps == NULL) {
+    conf_caps =	read_capabilities_for_user(cs->user,
+					   cs->conf_filename
+					   ? cs->conf_filename:USER_CAP_FILE );
+    if (conf_caps == NULL) {
 	D(("no capabilities found for user [%s]", cs->user));
-	goto cleanup_cap_s;
-    }
-
-    proc_epcaps = cap_to_text(cap_s, &length);
-    if (proc_epcaps == NULL) {
-	D(("unable to convert process capabilities to text"));
-	goto cleanup_icaps;
+	if (cs->fallback == NULL) {
+	    goto cleanup_cap_s;
+	}
+	conf_caps = strdup(cs->fallback);
+	D(("user [%s] received fallback caps [%s]", cs->user, conf_caps));
     }
 
-    /*
-     * This is a pretty inefficient way to combine
-     * capabilities. However, it seems to be the most straightforward
-     * one, given the limitations of the POSIX.1e draft spec. The spec
-     * is optimized for applications that know the capabilities they
-     * want to manipulate at compile time.
-     */
-
-    combined_caps = malloc(1+strlen(CAP_COMBINED_FORMAT)
-			   +strlen(proc_epcaps)+strlen(conf_icaps));
-    if (combined_caps == NULL) {
-	D(("unable to combine capabilities into one string - no memory"));
-	goto cleanup_epcaps;
+    ssize_t conf_caps_length = strlen(conf_caps);
+    if (!strcmp(conf_caps, "all")) {
+	/*
+	 * all here is interpreted as no change/pass through, which is
+	 * likely to be the same as none for sensible system defaults.
+	 */
+	ok = 1;
+	goto cleanup_conf;
     }
 
-    if (!strcmp(conf_icaps, "none")) {
-	sprintf(combined_caps, CAP_DROP_ALL, proc_epcaps);
-    } else if (!strcmp(conf_icaps, "all")) {
-	/* no change */
-	sprintf(combined_caps, "%s", proc_epcaps);
-    } else {
-	sprintf(combined_caps, CAP_COMBINED_FORMAT, proc_epcaps, conf_icaps);
+    if (!strcmp(conf_caps, "none")) {
+	/* clearing CAP_INHERITABLE will also clear the ambient caps,
+	 * but for legacy reasons we do not alter the bounding set. */
+	cap_clear_flag(cap_s, CAP_INHERITABLE);
+	if (!cap_set_proc(cap_s)) {
+	    ok = 1;
+	}
+	goto cleanup_conf;
     }
-    D(("combined_caps=[%s]", combined_caps));
-
-    cap_free(cap_s);
-    cap_s = cap_from_text(combined_caps);
-    _pam_overwrite(combined_caps);
-    _pam_drop(combined_caps);
 
-#ifdef DEBUG
-    {
-        char *temp = cap_to_text(cap_s, NULL);
-	D(("abbreviated caps for process will be [%s]", temp));
-	cap_free(temp);
+    iab = cap_iab_from_text(conf_caps);
+    if (iab == NULL) {
+	D(("unable to parse the IAB [%s] value", conf_caps));
+	goto cleanup_conf;
     }
-#endif /* DEBUG */
 
-    if (cap_s == NULL) {
-	D(("no capabilies to set"));
-    } else if (cap_set_proc(cap_s) == 0) {
-	D(("capabilities were set correctly"));
+    if (cs->defer) {
+	D(("configured to delay applying IAB"));
+	int ret = pam_set_data(cs->pamh, "pam_cap_iab", iab, iab_apply);
+	if (ret != PAM_SUCCESS) {
+	    D(("unable to cache capabilities for delayed setting: %d", ret));
+	    /* since ok=0, the module will return PAM_IGNORE */
+	    cap_free(iab);
+	}
+	iab = NULL;
+    } else if (!cap_iab_set_proc(iab)) {
+	D(("able to set the IAB [%s] value", conf_caps));
 	ok = 1;
-    } else {
-	D(("failed to set specified capabilities: %s", strerror(errno)));
+    }
+    cap_free(iab);
+
+    if (cs->keepcaps) {
+	/*
+	 * Best effort to set keep caps - this may help work around
+	 * situations where applications are using a capabilities
+	 * unaware setuid() call.
+	 *
+	 * It isn't needed unless you want to support Ambient vector
+	 * values in the IAB. In this case, it will likely also
+	 * require you use the "defer" module argument.
+	 */
+	D(("setting keepcaps"));
+	(void) cap_prctlw(PR_SET_KEEPCAPS, 1, 0, 0, 0, 0);
     }
 
-cleanup_epcaps:
-    cap_free(proc_epcaps);
-
-cleanup_icaps:
-    _pam_overwrite(conf_icaps);
-    _pam_drop(conf_icaps);
+cleanup_conf:
+    memset(conf_caps, 0, conf_caps_length);
+    _pam_drop(conf_caps);
 
 cleanup_cap_s:
-    if (cap_s) {
-	cap_free(cap_s);
-	cap_s = NULL;
-    }
+    cap_free(cap_s);
+    cap_s = NULL;
 
     return ok;
 }
@@ -210,101 +367,117 @@ static void _pam_log(int err, const char *format, ...)
 
 static void parse_args(int argc, const char **argv, struct pam_cap_s *pcs)
 {
-    int ctrl=0;
+    D(("parsing %d module arg(s)", argc));
 
-    /* step through arguments */
-    for (ctrl=0; argc-- > 0; ++argv) {
+    memset(pcs, 0, sizeof(*pcs));
 
+    /* step through arguments */
+    for (; argc-- > 0; ++argv) {
 	if (!strcmp(*argv, "debug")) {
 	    pcs->debug = 1;
-	} else if (!memcmp(*argv, "config=", 7)) {
+	} else if (!strncmp(*argv, "config=", 7)) {
 	    pcs->conf_filename = 7 + *argv;
+	} else if (!strcmp(*argv, "keepcaps")) {
+	    pcs->keepcaps = 1;
+	} else if (!strcmp(*argv, "autoauth")) {
+	    pcs->autoauth = 1;
+	} else if (!strncmp(*argv, "default=", 8)) {
+	    pcs->fallback = 8 + *argv;
+	} else if (!strcmp(*argv, "defer")) {
+	    pcs->defer = 1;
 	} else {
 	    _pam_log(LOG_ERR, "unknown option; %s", *argv);
 	}
-
     }
 }
 
+/*
+ * pam_sm_authenticate parses the config file with respect to the user
+ * being authenticated and determines if they are covered by any
+ * capability inheritance rules.
+ */
 int pam_sm_authenticate(pam_handle_t *pamh, int flags,
 			int argc, const char **argv)
 {
     int retval;
     struct pam_cap_s pcs;
-    char *conf_icaps;
-
-    memset(&pcs, 0, sizeof(pcs));
+    char *conf_caps;
 
     parse_args(argc, argv, &pcs);
 
     retval = pam_get_user(pamh, &pcs.user, NULL);
-
     if (retval == PAM_CONV_AGAIN) {
 	D(("user conversation is not available yet"));
 	memset(&pcs, 0, sizeof(pcs));
 	return PAM_INCOMPLETE;
     }
 
+    if (pcs.autoauth) {
+	D(("pam_sm_authenticate autoauth = success"));
+	memset(&pcs, 0, sizeof(pcs));
+	return PAM_SUCCESS;
+    }
+
     if (retval != PAM_SUCCESS) {
-	D(("pam_get_user failed: %s", pam_strerror(pamh, retval)));
+	D(("pam_get_user failed: pam error=%d", retval));
 	memset(&pcs, 0, sizeof(pcs));
 	return PAM_AUTH_ERR;
     }
 
-    conf_icaps =
-	read_capabilities_for_user(pcs.user,
-				   pcs.conf_filename
-				   ? pcs.conf_filename:USER_CAP_FILE );
-
+    conf_caps =	read_capabilities_for_user(pcs.user,
+					   pcs.conf_filename
+					   ? pcs.conf_filename:USER_CAP_FILE );
     memset(&pcs, 0, sizeof(pcs));
 
-    if (conf_icaps) {
+    if (conf_caps) {
 	D(("it appears that there are capabilities for this user [%s]",
-	   conf_icaps));
+	   conf_caps));
 
 	/* We could also store this as a pam_[gs]et_data item for use
-	   by the setcred call to follow. As it is, there is a small
-	   race associated with a redundant read. Oh well, if you
-	   care, send me a patch.. */
+	   by the setcred call to follow. However, this precludes
+	   using pam_cap as just a cred module, and requires that the
+	   'auth' component be called first.  As it is, there is a
+	   small race associated with a redundant read of the
+	   config. */
 
-	_pam_overwrite(conf_icaps);
-	_pam_drop(conf_icaps);
+	_pam_overwrite(conf_caps);
+	_pam_drop(conf_caps);
 
 	return PAM_SUCCESS;
-
-    } else {
-
-	D(("there are no capabilities restrctions on this user"));
-	return PAM_IGNORE;
-
     }
+
+    D(("there are no capabilities restrictions on this user"));
+    return PAM_IGNORE;
 }
 
+/*
+ * pam_sm_setcred optionally applies inheritable capabilities loaded
+ * by the pam_sm_authenticate pass for the user. If it doesn't apply
+ * them directly (because of the "defer" module argument), it caches
+ * the cap_iab_t value for later use during the pam_end() call.
+ */
 int pam_sm_setcred(pam_handle_t *pamh, int flags,
 		   int argc, const char **argv)
 {
-    int retval;
+    int retval = 0;
     struct pam_cap_s pcs;
 
-    if (!(flags & PAM_ESTABLISH_CRED)) {
+    if (!(flags & (PAM_ESTABLISH_CRED | PAM_REINITIALIZE_CRED))) {
 	D(("we don't handle much in the way of credentials"));
 	return PAM_IGNORE;
     }
 
-    memset(&pcs, 0, sizeof(pcs));
-
     parse_args(argc, argv, &pcs);
 
     retval = pam_get_item(pamh, PAM_USER, (const void **)&pcs.user);
     if ((retval != PAM_SUCCESS) || (pcs.user == NULL) || !(pcs.user[0])) {
-
 	D(("user's name is not set"));
 	return PAM_AUTH_ERR;
     }
 
+    pcs.pamh = pamh;
     retval = set_capabilities(&pcs);
-
     memset(&pcs, 0, sizeof(pcs));
 
-    return (retval ? PAM_SUCCESS:PAM_IGNORE );
+    return (retval ? PAM_SUCCESS:PAM_IGNORE);
 }
diff --git a/pam_cap/sudotest.conf b/pam_cap/sudotest.conf
new file mode 100644
index 0000000..ff528ce
--- /dev/null
+++ b/pam_cap/sudotest.conf
@@ -0,0 +1,23 @@
+# only root
+all                                root
+
+# this should fire for beta only
+!cap_chown                         beta
+
+# the next one should snag gamma since beta done
+cap_setuid,cap_chown               @three
+
+# neither of these should fire
+cap_chown                          beta gamma
+
+# just alpha
+!cap_chown,cap_setuid              @one
+
+# not this one
+^cap_setuid                        alpha
+
+# this should fire
+^cap_chown,^cap_setgid,!cap_setuid delta
+
+# not this one
+cap_setuid                         @four
diff --git a/pam_cap/test_pam_cap.c b/pam_cap/test_pam_cap.c
new file mode 100644
index 0000000..4bcf236
--- /dev/null
+++ b/pam_cap/test_pam_cap.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2019 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * This test inlines the pam_cap module and runs test vectors against
+ * it.
+ */
+
+#define _DEFAULT_SOURCE
+
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "./pam_cap.c"
+
+const char *test_groups[] = {
+    "root", "one", "two", "three", "four", "five", "six", "seven"
+};
+#define n_groups sizeof(test_groups)/sizeof(*test_groups)
+
+const char *test_users[] = {
+    "root", "alpha", "beta", "gamma", "delta"
+};
+#define n_users sizeof(test_users)/sizeof(*test_users)
+
+/* Note about memberships:
+ *
+ *  user gid   suppl groups
+ *  root  root
+ *  alpha one   two
+ *  beta  two   three four
+ *  gamma three four five six
+ *  delta four  five six seven [eight]
+ */
+
+static char *test_user;
+
+int pam_get_user(pam_handle_t *pamh, const char **user, const char *prompt) {
+    *user = test_user;
+    if (*user == NULL) {
+	return PAM_CONV_AGAIN;
+    }
+    return PAM_SUCCESS;
+}
+
+int pam_get_item(const pam_handle_t *pamh, int item_type, const void **item) {
+    if (item_type != PAM_USER) {
+	errno = EINVAL;
+	return -1;
+    }
+    *item = test_user;
+    return 0;
+}
+
+int pam_set_data(pam_handle_t *pamh, const char *module_data_name, void *data,
+		 void (*cleanup)(pam_handle_t *pamh, void *data,
+				 int error_status)) {
+    if (cleanup != iab_apply) {
+	errno = EINVAL;
+	return -1;
+    }
+    cap_free(data);
+    return -1;
+}
+
+int getgrouplist(const char *user, gid_t group, gid_t *groups, int *ngroups) {
+    int i,j;
+    for (i = 0; i < n_users; i++) {
+	if (strcmp(user, test_users[i]) == 0) {
+	    *ngroups = i+1;
+	    break;
+	}
+    }
+    if (i == n_users) {
+	return -1;
+    }
+    groups[0] = i;
+    for (j = 1; j < *ngroups; j++) {
+	groups[j] = i+j;
+    }
+    return *ngroups;
+}
+
+static struct group gr;
+struct group *getgrgid(gid_t gid) {
+    if (gid >= n_groups) {
+	errno = EINVAL;
+	return NULL;
+    }
+    gr.gr_name = strdup(test_groups[gid]);
+    return &gr;
+}
+
+static struct passwd pw;
+struct passwd *getpwnam(const char *name) {
+    int i;
+    for (i = 0; i < n_users; i++) {
+	if (strcmp(name, test_users[i]) == 0) {
+	    pw.pw_gid = i;
+	    return &pw;
+	}
+    }
+    return NULL;
+}
+
+/* we'll use these to keep track of the three vectors - only use
+   lowest 64 bits */
+
+#define A 0
+#define B 1
+#define I 2
+
+/*
+ * load_vectors caches a copy of the lowest 64 bits of the inheritable
+ * cap vectors
+ */
+static void load_vectors(unsigned long int bits[3]) {
+    memset(bits, 0, 3*sizeof(unsigned long int));
+    cap_t prev = cap_get_proc();
+    int i;
+    for (i = 0; i < 64; i++) {
+	unsigned long int mask = (1ULL << i);
+	int v = cap_get_bound(i);
+	if (v < 0) {
+	    break;
+	}
+	bits[B] |= v ? mask : 0;
+	cap_flag_value_t u;
+	if (cap_get_flag(prev, i, CAP_INHERITABLE, &u) != 0) {
+	    break;
+	}
+	bits[I] |= u ? mask : 0;
+	v = cap_get_ambient(i);
+	if (v > 0) {
+	    bits[A] |= mask;
+	}
+    }
+    cap_free(prev);
+}
+
+struct vargs {
+    struct pam_cap_s cs;
+    const char *args[5];
+};
+
+static int test_arg_parsing(void) {
+    static struct vargs vs[] = {
+	{
+	    { 1, 0, 0, 0, NULL, NULL, NULL },
+	    { "debug", NULL }
+	},
+	{
+	    { 0, 1, 0, 0, NULL, NULL, NULL },
+	    { "keepcaps", NULL }
+	},
+	{
+	    { 0, 0, 1, 0, NULL, NULL, NULL },
+	    { "autoauth", NULL }
+	},
+	{
+	    { 1, 0, 1, 0, NULL, NULL, NULL },
+	    { "autoauth", "debug", NULL }
+	},
+	{
+	    { 0, 0, 0, 0, NULL, "/over/there", NULL },
+	    { "config=/over/there", NULL }
+	},
+	{
+	    { 0, 0, 0, 0, NULL, NULL, "^cap_setfcap" },
+	    { "default=^cap_setfcap", NULL }
+	},
+	{
+	    { 0, 0, 0, 1, NULL, NULL, NULL },
+	    { "defer", NULL }
+	},
+	{
+	    { 0, 0, 0, 0, NULL, NULL, NULL },
+	    { NULL }
+	}
+    };
+    int i;
+
+    for (i=0; ; i++) {
+	int argc;
+	const char **argv;
+	struct vargs *v;
+
+	v = &vs[i];
+	argv = v->args;
+
+	for (argc = 0; argv[argc] != NULL; argc++);
+
+	struct pam_cap_s cs;
+	parse_args(argc, argv, &cs);
+
+	if (cs.debug != v->cs.debug) {
+	    printf("test_arg_parsing[%d]: debug=%d, wanted debug=%d\n",
+		   i, cs.debug, v->cs.debug);
+	    return 1;
+	}
+	if (cs.keepcaps != v->cs.keepcaps) {
+	    printf("test_arg_parsing[%d]: keepcaps=%d, wanted keepcaps=%d\n",
+		   i, cs.keepcaps, v->cs.keepcaps);
+	    return 1;
+	}
+	if (cs.autoauth != v->cs.autoauth) {
+	    printf("test_arg_parsing[%d]: autoauth=%d, wanted autoauth=%d\n",
+		   i, cs.autoauth, v->cs.autoauth);
+	    return 1;
+	}
+	if (cs.conf_filename != v->cs.conf_filename &&
+	    strcmp(cs.conf_filename, v->cs.conf_filename)) {
+	    printf("test_arg_parsing[%d]: conf_filename=[%s], wanted=[%s]\n",
+		   i, cs.conf_filename, v->cs.conf_filename);
+	    return 1;
+	}
+	if (cs.fallback != v->cs.fallback &&
+	    strcmp(cs.fallback, v->cs.fallback)) {
+	    printf("test_arg_parsing[%d]: fallback=[%s], wanted=[%s]\n",
+		   i, cs.fallback, v->cs.fallback);
+	    return 1;
+	}
+
+	if (argc == 0) {
+	    break;
+	}
+    }
+    return 0;
+}
+
+/*
+ * args: user a b i config-args...
+ */
+int main(int argc, char *argv[]) {
+    unsigned long int before[3], change[3], after[3];
+
+    if (test_arg_parsing()) {
+	printf("failed to parse arguments\n");
+	exit(1);
+    }
+    if (read_capabilities_for_user("alpha", "/dev/null") != NULL) {
+	printf("/dev/null should return no capabilities\n");
+	exit(1);
+    }
+    if (read_capabilities_for_user("unknown", "capability.conf") != NULL) {
+	printf("capability.conf should return no capabilities for unknown\n");
+	exit(1);
+    }
+    char *iab_text = read_capabilities_for_user("alpha", "./incapable.conf");
+    if (iab_text != NULL) {
+	printf("./incapable.conf should grant no capabilities: got=%s\n",
+	       iab_text);
+	free(iab_text);
+	exit(1);
+    }
+
+    /*
+     * Start out with a cleared inheritable set.
+     */
+    cap_t orig = cap_get_proc();
+    cap_clear_flag(orig, CAP_INHERITABLE);
+    cap_set_proc(orig);
+
+    if (getuid() != 0) {
+	cap_free(orig);
+	printf("test_pam_cap: OK! (Skipping privileged tests (uid!=0))\n");
+	exit(0);
+    }
+    if (argc == 1) {
+	printf("test_pam_cap: OK (kick the tires test)\n");
+	exit(0);
+    }
+
+    change[A] = strtoul(argv[2], NULL, 0);
+    change[B] = strtoul(argv[3], NULL, 0);
+    change[I] = strtoul(argv[4], NULL, 0);
+
+    void* args_for_pam = argv+4;
+
+    int status = pam_sm_authenticate(NULL, 0, argc-4,
+				     (const char **) args_for_pam);
+    if (status != PAM_INCOMPLETE) {
+	printf("failed to recognize no username\n");
+	exit(1);
+    }
+
+    test_user = argv[1];
+
+    status = pam_sm_authenticate(NULL, 0, argc-4, (const char **) args_for_pam);
+    if (status == PAM_IGNORE) {
+	if (strcmp(test_user, "root") == 0) {
+	    exit(0);
+	}
+	printf("unconfigured non-root user: %s\n", test_user);
+	exit(1);
+    }
+    if (status != PAM_SUCCESS) {
+	printf("failed to recognize username\n");
+	exit(1);
+    }
+
+    /* Now it is time to execute the credential setting */
+    load_vectors(before);
+
+    status = pam_sm_setcred(NULL, PAM_ESTABLISH_CRED, argc-4,
+			    (const char **) args_for_pam);
+
+    load_vectors(after);
+
+    printf("before: A=0x%016lx B=0x%016lx I=0x%016lx\n",
+	   before[A], before[B], before[I]);
+
+    long unsigned int dA = before[A] ^ after[A];
+    long unsigned int dB = before[B] ^ after[B];
+    long unsigned int dI = before[I] ^ after[I];
+
+    printf("diff  : A=0x%016lx B=0x%016lx I=0x%016lx\n", dA, dB, dI);
+    printf("after : A=0x%016lx B=0x%016lx I=0x%016lx\n",
+	   after[A], after[B], after[I]);
+
+    int failure = 0;
+    if (after[A] != change[A]) {
+	printf("Ambient set error: got=0x%016lx, want=0x%016lx\n",
+	       after[A], change[A]);
+	failure = 1;
+    }
+    if (dB != change[B]) {
+	printf("Bounding set error: got=0x%016lx, want=0x%016lx\n",
+	       after[B], before[B] ^ change[B]);
+	failure = 1;
+    }
+    if (after[I] != change[I]) {
+	printf("Inheritable set error: got=0x%016lx, want=0x%016lx\n",
+	       after[I], change[I]);
+	failure = 1;
+    }
+
+    exit(failure);
+}
diff --git a/pgp.keys.asc b/pgp.keys.asc
index b39f76a..b03643f 100644
--- a/pgp.keys.asc
+++ b/pgp.keys.asc
@@ -3,7 +3,6 @@ morgan@kernel.org upload/signature key.
 pub  4096R/E2CCF3F4 2011-10-07 Andrew G. Morgan (Work Address) <agm@google.com>
 uid                            Andrew G. Morgan <morgan@kernel.org>
 -----BEGIN PGP PUBLIC KEY BLOCK-----
-Version: GnuPG v1.4.15 (GNU/Linux)
 
 mQINBE6OiBIBEADpdtUxC8Fmhn5UK6UCZdU7mFgZwN8U9cabFUPfUIkMqXULhCD0
 hG2/amuiiUoLollPjOopNqk4cc8LcZfszOdBFAYj7MeWzNySVw4KkWrVCEH/bZ0Q
@@ -16,97 +15,144 @@ VPGAa4K+dnI2oy4wukzl/unAKrlMCBRsRoW2qjy3TDSXqwJhd34ilHzrdAdchrh/
 acBfbBtRzVlcDTnGltDNMuRTXzujaY9C3B0L2E+Jfrds8WcM8ASO4mHwJUTMrBwM
 b5sFSG+/X9Ufg/c2G086HQ7xMERUA5oz66P5ReHCph8WHQN2L5vtZwL7//hZB9hn
 G0K1210YEDXpFPijpis/54MKUSkWEFOLjUbiSPbwEfb79A00CcHojQQinwARAQAB
-tDBBbmRyZXcgRy4gTW9yZ2FuIChXb3JrIEFkZHJlc3MpIDxhZ21AZ29vZ2xlLmNv
-bT6JAjgEEwECACIFAk6VD4ICGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJ
-ECnuhIrizPP0zNoQAMDjx3iovvf0rpAYFvvAoPbzhEXcJ41/T+paxWOJm8SEg7fX
-nUHgXeTwW3RJPIp7PguctPogvKQV+7GcU5Dcg13DZO4nMrSsvInsLQkfeDVU/zl2
-MuHFOtBMpDp6iGcUwjS0bYbvl03fPj7ZXIML+I7OSyNeoZ/n2ztI9UiIBHovsHqZ
-qYm4d7VOi4nVj1Y/Gak99sw3cLvUwq9f3i8ioNzynqBT7jA+GWFaeVJuGrOCBBBg
-uIu0Ekg42NAZ2AR32wQP5eEtlSAq8Il9RZzewa1v74loDNJOl+kW5/jQK6tGj2A9
-vlTqVzHUDmPZ9n6Ds7h3wo2g3gzYX1cuM3spW9UsA8XUDNY2yNFYDC9IsAI09u18
-N7f89isG/yYh5MZpJz2fx7cecHtwSVukTGHDsaoHTXMlfjQmVU5efORZJa6Bx0Tk
-aSCwecem3q+3OcdgW8XwPWik/5Wv8B3dJopMH1Mw3pRhirtTd6/88xNyLkJStptB
-DZvbqvB2nMmSiqgh0mPeslnwubxJ5/4FbP9zlLN7zp49RZHKDl/8EMSXGCjmG6UT
-xW6I3YpKdc4+yEd19/UUtxqQOfbgFvlcbesQ5ILvLOzZidkS7y0v4i9rZBe/HEy3
-eG8z4s5dloBrpSBvKySwqWuuSDn3tMqw4Bz2Be3FgtYA4TnNy7shcFR2BMFotCRB
-bmRyZXcgRy4gTW9yZ2FuIDxtb3JnYW5Aa2VybmVsLm9yZz6JAjsEEwECACUCGwMG
-CwkIBwMCBhUIAgkKCwQWAgMBAh4BAheABQJOmRGPAhkBAAoJECnuhIrizPP0wK0P
-/RMvjmzeXbgoa36cBDvDKReAiC56Au4qGXkNah3984tNPT1hVUKCiwiUmULoNJbE
-I4qFJTtwsMi5QzE+daCA7t+ALJiC+PKiKFG1LDz7mxfhmBeS3XcYuqZdjyKrATUF
-r0SHbsJxtRCslawGD2gKczLknFeBXL0997TfJS9ipLibqCtmvyryHn4EbZfoJqcp
-j/RBN/izVGHNYI8BsZpO5F6z7vXoncDL0dKh65ndGaIbhVDUPsDBvzg3i+EzhB51
-hYTTNKK0QpWbmsXfJBnvztinfLUsnO9HV8aRaygOI/DAKAtT7YPXORA1oFYtx69b
-zulqC+TXUmeV8YW8bETH4xHM9mQb0oNLPibR2nK2FSDiLp0/eEM5vgzfPVUX7WzB
-JUPsf0ah/e1yrXqudGUUZ0R+3VMOdxMryZBKLymkzyvu6a5DcLarqAt8y9ciRH67
-HKNnE1gvHf5K2Q37gwSecwmXCjpMlbVJnIarLKBcVRcYKtxgPxCv6483I8heSKF7
-PB/IFBmzT1cX7lhln9+62Ks/0Gs0pA0iNLaD+POPiqWrAwZsFvKjD9PDaCBDFRWj
-FqZLyJMsMi1qmP8jWsdQqPdUskQC0ftvw3Z6SiyyrriSAzglCjmmAcfdt+w4b/EO
-4SzSZUnd/ApkHkZx1Lbta15WKxGi7S8/5zNdaK721nUdiEYEEBECAAYFAk6Oi/kA
-CgkQQheEq9QabfJhdwCdEhWd2WbjrypMC2jEqWUswmf7fsQAn3LwZyeVJK5LApOF
-7NimHkCQV9z7iQIcBBABAgAGBQJOl+CHAAoJEO2/8mhZLMbY4ywP/2qX0+QrilRC
-eqk8cOmljLB+sxiA2Jc5YINAXipg6PSQzF7IlMnSNSW69ARLPW5iyDTljXTtD85W
-/yWhm3vsouWldBa1Wb6xVb8iA8H8fUUKCY7ngCSjHJxPa1KRsTrMKCkLHR2MP7Qi
-ar0dvquomtlx5chkhXmY+0cxcA/cMB/A/fbfDvvbYD5HYiB90AylPmLbM9XiLF0F
-RSJt7iokGidS1W80ZCg5p1R02dQV5H7/111Xx1QIggPcNPWGwCK61Q3tPV0xc0oQ
-dZpQk2hnPVHF7BMmCyB/iNRofF9mpC/QZGFRQkb3XgdIdK/O23VQntSGctrtnL1M
-rcrgQUIrMaU3LKFbIE7DBwMUzUaTO/t14ZQQUZJTAKLSVCfvGvgh6/dqaXpssQxL
-D2S5J1sWs1ZVInOhjo2OZnVl3SEmQT9h6NB93QRoGfbfy+AJgReRcfCep5zDMrud
-5HPym9itvMLVVzw267Yn0ATBhrESAY8LqBBRbigM/TL+jNPfsQzhEzHXFsQL/dKh
-V4N8IURnpCqHzY2BSnTX1K8ipl+iRGpMVfkYQnM660AIJhAReT2rwzuhGRKHbOXz
-UrzoEg1PEw/+69ZmcGUZH1VtSrOw0r6eub+rg7Q0R4r6c8kF2vS2XSQn/MZ2Wqjk
-hW4fWCqqogIvCkqk1Jt3OCRIWbVC0bKKiQIcBBABAgAGBQJOmJxYAAoJECDQTlpx
-NmCnTvEP/38M2bsQGnKVhNsAcr7sDO4YmDrc8V/bUrGjADWmLcW/K2MDOWLZIwmg
-Z1qMifHXuy/NhyX3/xp8VacNAlpuQ8o/T77P1QCLwuPu+fuXLOmFkCISFeTW5g/d
-pShZ4tsTXAaJs7bQdQnsY3prZl0CMJtItOhwW34PDZL95Vp2ZRx84Dn355KHUeeq
-yQjqu+cEz2T5sfVj/O2w1tgeWcMxrOI3ARD/Ks+CeWoFZPezq2K4ctka7Q+muH9/
-1WCatdpryf5SJoBMDaC7GXzGegesKQr35sfNM9XRP1TphmCqQz4VOb+stIEJv1Dq
-c9Lc4EScOwmESt5mzPwrZ3OJ+stFKW1QJgErUb55TNQ4C957rodxCerNa9ptpdUk
-U9Pb2vpSurNRgETA/urZkBO/vPQ8MEgdJSbVgh0Rj/zPFnj3akQFc98U5Km0TIHJ
-7r6S+qj73itUM79jMVKJgewPEA8cys0ACLoM5uRNYq35mY4OeP/Edm6NLiKfD0us
-MfEQ+02B8RqXuHBAJAa/+f+U3zGkw268f3/16kZv/PTMfdOEy1cjKlQ3LFwIHfny
-Brb/3vHAVTAyEbBPWmULEjopdevEPKmKyW2EXFphBmjOHSghmIRDxO2WmSuI8bIU
-sH4oq6MwqAJpE5rzreBNLNh5ZY4yzw3nAJb6Bb59m0kt2fHKIq+AiQI4BBMBAgAi
-BQJOjogSAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRAp7oSK4szz9HRi
-D/4sMdw5WrUtmagrXWVyj83YLJW2GBxH6s5UR1/fyl5uDYjBAccf3jDuIwVZzpCJ
-ZpQ8RwvRV699Pag5L5uwDEvkiIMROPNescaXGROuNoCFfqIOTVZfGya2w06dB0Kh
-C0l++iO6YVy1eJkyc/XooiOOtEfv5UpBZSWn9hMYaNSc9tiQcyPxzEnEQYUmIoXG
-kHXUNRDBQfJLRZP4e9YjN/hH0ZW7/rHXXMxeBREfbCekKy0qDgJ/Sf3Eh6dwUkOR
-/vCrdZM2Q8TTX0LJdflJdqMEuYHqm1j9RrnoXIjhx0wFopEOHPSr2qxOu2gOkyxB
-JE7Ur3IKpMRaoCR0xHMb5MOgnMmwRW2G6KcZTCdr2jmxp2hK3BxRcUt3qh74jhZL
-Dbv5dxTqVn/VK1CGhHbrcW2adkyi2sK7vVARdlSmHYWIWhLqv77p7tkSAX76Qig8
-X75WGF+W3YSAS4f3I6QXRnXxzG8TbMIa4CfeN5IZ2Z5TisC2YyuG8VdM/m6i6W18
-cLa7ZNGE3w04eVQvtigG+9p9gCs5Kg6PVVxwJsjGDDqHkCslfFF8Wl1ZdqXqtUB2
-RKTWb4XNU5XxO0xIGFtLUNnCKcJAOUCu/oRJ/WWHW+BKDdG1VbgYVFTXHc6YZpet
-2D+sAs7cWV8GDJ9nChHWcQ5C/bPV1PVnheZhwGvHLsWrILkCDQROjogSARAAtLny
-8nlyr8fyYGAocQz0S47a99n/X0Vmgwo1trJsCXWbOrpztznY8IFRK/dRnRHiMwBx
-WQ4CvdUk2p0MweUiOjpEN7bUm92jeFXMr0hpQKf+O4DMExHS4hxLwArnKFuAk2ej
-RQGXBcEoMv11LiUwuzFbWdXqMsA1TbuA+WvEBnFUYM/6xNiJeRIUIiGydhG1yaw8
-HrNWLHnhhcOfT6z5AO69hZZiJacp9pU/+jnep/M42p4J17x81+ESpJeladwR0Qxc
-0qxOyWidN7oO5hSiBEwU6lYQjdQ23pa7tN1o90P9jyN2nFBEdBu2D/mi4DV/+VXU
-YHNEy3uNhmmLGwMoPVWiZveRmG74+ne7MVyxwb9EIF3IenS4T65ee1dlZvaoMxUl
-Ue8htEK0ChrQZOfITs9MyjUwoTiLUVo3kQeMli9HJEQXPRjHqkkZ7W65LhkEVnHS
-PHWtttRSDkuZYtze+he142GzDSQA3dF2zy/tLpBb5CA29ITcQTspgV7AuV8YQqDZ
-4XWHsR9Am5334N83EXk2oouqxl7mKUB0Vg6tujNCBSRn6A3CUaA29w/MyTg4z6Yw
-6HD3il1J8PcWEoOzqlUoPd8tA5pcZCcKngkXndpXgsZCgoCgvx9WNU+LUrHBfhC3
-TLLsI7iGO1JvLghkesKTARF3O2hS3xAhfGZxn8MAEQEAAYkCHwQYAQIACQUCTo6I
-EgIbDAAKCRAp7oSK4szz9HSYD/9hmEsJuSgAGwx/OPweYuDGkA25ajDAu59LpzTb
-jB/yOU1rDVUu3cMH+UEyaEGlhbneGvHF2DsEC9il/8fVL4eaE9EWpopIonYndBE9
-1+YiGHPToiyKcdp0KuQMwm2ENAiEf/qErrB2NLna4wfZUx5lzvEOEk3cNPmNz2ER
-yMPXIeeiQ9VKp3MzopWhvBItAyIzzuydKKvJAKzDoTOEL4w60slAphj8rVCsW45k
-2AurWUH7VFM8ezXunieLeygCGb+YJZAet6yVXD3UwnNcWCGQ+xKSPuyKrn4xKG0N
-5gzxnGIh/S/7IOjRaNR5X+pfWd6YzN9qURUfiXmuLSPRHK4Flfam4gMMHul9wL6X
-BayFo2NUPBaxg4U9ACAgSJxgCTNPCKwnovecOsRmIESKtT1F3hbZRRgRGj/TDepJ
-QNfHSyk/ZQfuoJggBMQLJKzGII42rb0W90QLMk0SyCzeb3LO3yyNiKpluNpJsl2I
-qdBJE5t1LxhKDnju6JlFyPcGJnP/doTuDTjjL0V+guPAGVbuq0g2hku+ZlJwjMSt
-NwHPWxeifuDJbQVIp0xZbI5djdHC8hVJX+d09J5eq0PlgMEidc4F+Vv+mmGJl0Gi
-NfhmTaACSRzbI25/bhvj2xhx8A2LEOuU/+nzYgQzPcFpawiUP1wBnTqi+maxKx5/
-9ifyrw==
-=Ibs8
+tCRBbmRyZXcgRy4gTW9yZ2FuIDxtb3JnYW5Aa2VybmVsLm9yZz6JAjsEEwECACUC
+GwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheABQJOmRGPAhkBAAoJECnuhIrizPP0
+wK0P/RMvjmzeXbgoa36cBDvDKReAiC56Au4qGXkNah3984tNPT1hVUKCiwiUmULo
+NJbEI4qFJTtwsMi5QzE+daCA7t+ALJiC+PKiKFG1LDz7mxfhmBeS3XcYuqZdjyKr
+ATUFr0SHbsJxtRCslawGD2gKczLknFeBXL0997TfJS9ipLibqCtmvyryHn4EbZfo
+Jqcpj/RBN/izVGHNYI8BsZpO5F6z7vXoncDL0dKh65ndGaIbhVDUPsDBvzg3i+Ez
+hB51hYTTNKK0QpWbmsXfJBnvztinfLUsnO9HV8aRaygOI/DAKAtT7YPXORA1oFYt
+x69bzulqC+TXUmeV8YW8bETH4xHM9mQb0oNLPibR2nK2FSDiLp0/eEM5vgzfPVUX
+7WzBJUPsf0ah/e1yrXqudGUUZ0R+3VMOdxMryZBKLymkzyvu6a5DcLarqAt8y9ci
+RH67HKNnE1gvHf5K2Q37gwSecwmXCjpMlbVJnIarLKBcVRcYKtxgPxCv6483I8he
+SKF7PB/IFBmzT1cX7lhln9+62Ks/0Gs0pA0iNLaD+POPiqWrAwZsFvKjD9PDaCBD
+FRWjFqZLyJMsMi1qmP8jWsdQqPdUskQC0ftvw3Z6SiyyrriSAzglCjmmAcfdt+w4
+b/EO4SzSZUnd/ApkHkZx1Lbta15WKxGi7S8/5zNdaK721nUdiEYEEBECAAYFAk6O
+i/kACgkQQheEq9QabfJhdwCdEhWd2WbjrypMC2jEqWUswmf7fsQAn3LwZyeVJK5L
+ApOF7NimHkCQV9z7iQIcBBABAgAGBQJOl+CHAAoJEO2/8mhZLMbY4ywP/2qX0+Qr
+ilRCeqk8cOmljLB+sxiA2Jc5YINAXipg6PSQzF7IlMnSNSW69ARLPW5iyDTljXTt
+D85W/yWhm3vsouWldBa1Wb6xVb8iA8H8fUUKCY7ngCSjHJxPa1KRsTrMKCkLHR2M
+P7Qiar0dvquomtlx5chkhXmY+0cxcA/cMB/A/fbfDvvbYD5HYiB90AylPmLbM9Xi
+LF0FRSJt7iokGidS1W80ZCg5p1R02dQV5H7/111Xx1QIggPcNPWGwCK61Q3tPV0x
+c0oQdZpQk2hnPVHF7BMmCyB/iNRofF9mpC/QZGFRQkb3XgdIdK/O23VQntSGctrt
+nL1MrcrgQUIrMaU3LKFbIE7DBwMUzUaTO/t14ZQQUZJTAKLSVCfvGvgh6/dqaXps
+sQxLD2S5J1sWs1ZVInOhjo2OZnVl3SEmQT9h6NB93QRoGfbfy+AJgReRcfCep5zD
+Mrud5HPym9itvMLVVzw267Yn0ATBhrESAY8LqBBRbigM/TL+jNPfsQzhEzHXFsQL
+/dKhV4N8IURnpCqHzY2BSnTX1K8ipl+iRGpMVfkYQnM660AIJhAReT2rwzuhGRKH
+bOXzUrzoEg1PEw/+69ZmcGUZH1VtSrOw0r6eub+rg7Q0R4r6c8kF2vS2XSQn/MZ2
+WqjkhW4fWCqqogIvCkqk1Jt3OCRIWbVC0bKKiQIcBBABAgAGBQJOmJxYAAoJECDQ
+TlpxNmCnTvEP/38M2bsQGnKVhNsAcr7sDO4YmDrc8V/bUrGjADWmLcW/K2MDOWLZ
+IwmgZ1qMifHXuy/NhyX3/xp8VacNAlpuQ8o/T77P1QCLwuPu+fuXLOmFkCISFeTW
+5g/dpShZ4tsTXAaJs7bQdQnsY3prZl0CMJtItOhwW34PDZL95Vp2ZRx84Dn355KH
+UeeqyQjqu+cEz2T5sfVj/O2w1tgeWcMxrOI3ARD/Ks+CeWoFZPezq2K4ctka7Q+m
+uH9/1WCatdpryf5SJoBMDaC7GXzGegesKQr35sfNM9XRP1TphmCqQz4VOb+stIEJ
+v1Dqc9Lc4EScOwmESt5mzPwrZ3OJ+stFKW1QJgErUb55TNQ4C957rodxCerNa9pt
+pdUkU9Pb2vpSurNRgETA/urZkBO/vPQ8MEgdJSbVgh0Rj/zPFnj3akQFc98U5Km0
+TIHJ7r6S+qj73itUM79jMVKJgewPEA8cys0ACLoM5uRNYq35mY4OeP/Edm6NLiKf
+D0usMfEQ+02B8RqXuHBAJAa/+f+U3zGkw268f3/16kZv/PTMfdOEy1cjKlQ3LFwI
+HfnyBrb/3vHAVTAyEbBPWmULEjopdevEPKmKyW2EXFphBmjOHSghmIRDxO2WmSuI
+8bIUsH4oq6MwqAJpE5rzreBNLNh5ZY4yzw3nAJb6Bb59m0kt2fHKIq+AiQI4BBMB
+AgAiBQJOjogSAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAAKCRAp7oSK4szz
+9HRiD/4sMdw5WrUtmagrXWVyj83YLJW2GBxH6s5UR1/fyl5uDYjBAccf3jDuIwVZ
+zpCJZpQ8RwvRV699Pag5L5uwDEvkiIMROPNescaXGROuNoCFfqIOTVZfGya2w06d
+B0KhC0l++iO6YVy1eJkyc/XooiOOtEfv5UpBZSWn9hMYaNSc9tiQcyPxzEnEQYUm
+IoXGkHXUNRDBQfJLRZP4e9YjN/hH0ZW7/rHXXMxeBREfbCekKy0qDgJ/Sf3Eh6dw
+UkOR/vCrdZM2Q8TTX0LJdflJdqMEuYHqm1j9RrnoXIjhx0wFopEOHPSr2qxOu2gO
+kyxBJE7Ur3IKpMRaoCR0xHMb5MOgnMmwRW2G6KcZTCdr2jmxp2hK3BxRcUt3qh74
+jhZLDbv5dxTqVn/VK1CGhHbrcW2adkyi2sK7vVARdlSmHYWIWhLqv77p7tkSAX76
+Qig8X75WGF+W3YSAS4f3I6QXRnXxzG8TbMIa4CfeN5IZ2Z5TisC2YyuG8VdM/m6i
+6W18cLa7ZNGE3w04eVQvtigG+9p9gCs5Kg6PVVxwJsjGDDqHkCslfFF8Wl1ZdqXq
+tUB2RKTWb4XNU5XxO0xIGFtLUNnCKcJAOUCu/oRJ/WWHW+BKDdG1VbgYVFTXHc6Y
+Zpet2D+sAs7cWV8GDJ9nChHWcQ5C/bPV1PVnheZhwGvHLsWrIIkCMwQQAQgAHRYh
+BCB5yApF/r2boJUa8ssjEuHs9z9XBQJdDwwPAAoJEMsjEuHs9z9Xh14P/3HmG8NE
+Rr75KQZ+Nbdg9jhA56MMgKoHawchaLm+Jmpt5cYymaG+hZyFXKdGmOk0zltD3qNG
+TIoC4dg5BKRdCWa7U7uTptrWaZ/tendnPU4UThGrExEaiAJ1BzJqV7qUWQ0nRyU0
+g/h3ZQUrzvVXtr7SL/xfQFvKSEKmW8t6yXhcLGkJfRlLsE+rXsgRuRmVRSw7zTNq
++uJqExr8bGXThM/9ikmTTDjRCBVmCT2wbWimY5TSXm4fomMDnWumKhsJsgabnbTL
+ch1E+YFwSGmjHyluNAscU3rDrle2FoOdmZCXvLbFEZyFIuecAmi5zTPm7Ko0SA2e
+bQLwqkwo/k+YYOriW5ljoQh1uFTKdzyhIJmozEmVQH83Foq8gE/8aJed9asHq/ty
+0UUttaoWt5XwCTWg8uq0xwang18qO6HgUMKBSCRQOcPSGg1UX1l4ipx47JmtAhIN
+SV15y9Xm9gc4fKiJ1EBIJvdgf8NrSYDQwzTyA0A5W1oEMNuFs8eUBjn6+AiTN1jI
+UnVBJ/I8shY/hpgnwgLUUxD4RiH+KKeq2Xx+UPDtFCfOuYiL7ITTS6FrZFcdjE1O
+I7l7M+uulavhxc8iMNsT3mm1SClcxdufr2000UCIaIblPRVd+iKlzhhsGAJZ7vP9
+jRQ1m1CIWgmgdDdgHlR/JEHDvS9DhBwdDCcviQIzBBABCgAdFiEEiRirsLX+whtp
+fs5a+9SYGk9gV3wFAmLHkdcACgkQ+9SYGk9gV3yNew/7BlNc4Gv+FYIg/37+SmDf
+b/WVvLvJ1Rw1x7rnRhivQFMPwFw6R0dahK7taxFgC94b7nlGxRfgf3D63P6IXTtR
+YirLeICun7WrGr4vPbjb+qXLMZLvq4MnVRmxnor+z+wCTn3csZgEvRAdw4kppG6F
+o4wBB5mphgAxxF7FTFKU7Y1az/YZdGt/TvG5CLLCZYKmCC2DplDI2hABcIlIN+XB
+z8sZzZBI/6YIuRuABeXgo35hlU9RrKAYU3qxp09YAIbD1eR56l7spcdbTJnE4qs1
+o4XsUujv+JGoUc1UI7eF0TtA5T2NSs42fNwfsXcr2f37fx6rDWyypD2bDgfQL20+
+jtgud7Jzf41M/o7Mfhy6iOAeCBhs/+JbAgNmvJb+zOl+DlaCwQnvZFouunLkO09B
+QwgOG37TaS9WuZik1c1Wi9qzlB1/tZJABf28LgBXrQaF/8EBSmfMESByR3NLMuQE
+2C53l+WoFE0y6VNplpGDcU+hoZpgU2lNJyA7J4MODjVcQzhSBIFTMw3hcqp+c+QY
+a+hxYqcjEP56BymKrVDk6pHjCGagA+GrrKd3J50p4WjbNmIj6NThgYqFEWOKg/eL
+cADnWsDoam69PsxIZIttY3MtkdY+xMVpXZCLu6Kl8hTFkALHrpbCD+vrFt3wJ7Iy
+EZnKqqLTws9GwuQnD4l8FGm0MEFuZHJldyBHLiBNb3JnYW4gKFdvcmsgQWRkcmVz
+cykgPGFnbUBnb29nbGUuY29tPokCOAQTAQIAIgUCTpUPggIbAwYLCQgHAwIGFQgC
+CQoLBBYCAwECHgECF4AACgkQKe6EiuLM8/TM2hAAwOPHeKi+9/SukBgW+8Cg9vOE
+RdwnjX9P6lrFY4mbxISDt9edQeBd5PBbdEk8ins+C5y0+iC8pBX7sZxTkNyDXcNk
+7icytKy8iewtCR94NVT/OXYy4cU60EykOnqIZxTCNLRthu+XTd8+Ptlcgwv4js5L
+I16hn+fbO0j1SIgEei+wepmpibh3tU6LidWPVj8ZqT32zDdwu9TCr1/eLyKg3PKe
+oFPuMD4ZYVp5Um4as4IEEGC4i7QSSDjY0BnYBHfbBA/l4S2VICrwiX1FnN7BrW/v
+iWgM0k6X6Rbn+NArq0aPYD2+VOpXMdQOY9n2foOzuHfCjaDeDNhfVy4zeylb1SwD
+xdQM1jbI0VgML0iwAjT27Xw3t/z2Kwb/JiHkxmknPZ/Htx5we3BJW6RMYcOxqgdN
+cyV+NCZVTl585FklroHHRORpILB5x6ber7c5x2BbxfA9aKT/la/wHd0mikwfUzDe
+lGGKu1N3r/zzE3IuQlK2m0ENm9uq8HacyZKKqCHSY96yWfC5vEnn/gVs/3OUs3vO
+nj1FkcoOX/wQxJcYKOYbpRPFbojdikp1zj7IR3X39RS3GpA59uAW+Vxt6xDkgu8s
+7NmJ2RLvLS/iL2tkF78cTLd4bzPizl2WgGulIG8rJLCpa65IOfe0yrDgHPYF7cWC
+1gDhOc3LuyFwVHYEwWiIRgQQEQIABgUCW52DcgAKCRBCF4Sr1Bpt8tboAJ4uDyZQ
+PHCoV04tDbBKHIynok1dfACeMNckl8DppIgV3kgFJsHG2vVHKjmJAjMEEAEIAB0W
+IQQgecgKRf69m6CVGvLLIxLh7Pc/VwUCXQ8MGwAKCRDLIxLh7Pc/V0F9EACHKNqF
+l5xXDHe/0nlZ+J/OFRNIE8ObZAxQLaPfK3gRkFn/SbKQzkzB84X2il7A/W221Lzi
+me5eTFPhTX3RxUcoSQdrtCCov5gCeuiUbhuJ28zuJxslxLE8bhnmNfpLmFFGtbMI
+kXq+y0uqc08Yj8frPXKgx7KvOoovpm0X/igiAkiuKLhbq8xIwaIN0NL4slFlx+ZP
+Ed0KA6qOvlLr0T/lLVptAeMrzfi2gqY1utSqE5IVrbtU6Kptw3zfURsGFFIaKjIr
+hzu25Cdpg/NxYGqo2GqD0lZ+OeWSy0WI5sxCSDqr0to9lvsJGv2Nc06ixIjH7vG2
+Hc/cC0QyHdBM6GwaLmUH9hrcSCLR5kxTzAW0Cf6lrAZUL36Ivl5l+zoLdJqSgZLY
+YXqMdQf75Y5TRFzry5pWRef3ba4/sgui89W11Uccdq/pGe4OKo0I/vq3bv35/3cZ
+aMGjj3x6v67kk8GWbKg6CPBnzb1dY7VDA5RWOt2lPZr4omUNFwRpxAfZADUz2Q4S
+tMQVE018SSH1i6G9EB8KVQEBeD4qgaWs1z9sqA7K5wlBzGarTa2RspH0GMmYwxBY
+hXtYpKm/47Dkg8j3N01VVwky0XGPFHCVgFbeXGknL1O3thOGs5XPO05jtBcbYI1u
+vvK+h/CNn1yuTG13BSG4pgRF1Sy6CFLHme0d/okCMwQQAQoAHRYhBIkYq7C1/sIb
+aX7OWvvUmBpPYFd8BQJix5HeAAoJEPvUmBpPYFd8viUP/0p2jAtPGX4rQ22IVBHt
+JkfsXe9Jj0L3qtHUiH3Y9A6pPWhfr17PNEy6oQ57PgmPA7MS7rfJ2Dzr61g5ItgP
+5MMX1DY+6tcKahgzutAV2eLCEwkS3Nfv2z8t+DSQHCU+MzBFr7pP/Z9egr/jCCjv
+FoB3nLZ0luAxV4RXtAaGnXrMJrJjGSt2iUEwaYKM1hYP9DcQ+ur3d5i6GTVnLSRa
+3pNuLnvqfH2emRN2XFugGaa6DEwHvQOJn8NcFwHfohSSOEIQSGMES15/ww+CUmPt
+b9NgfbCAuLdZEzkKoatgo0Lp/yZqMMw8m3uJ/Kt1FoJojm/k7hCy97WaHBhBfKMo
+BdFwT6/7cmnQZJDXiLUSRe8UpmrgvJ2wuFp2LV8xLr8FYweuUSCfqs8EzYf+A/12
+64DLrlmhFa/WA4HIYs8F9a+QOIZ9dMcqy17RqPjTwVHnB/m+uQcbYiOBk/26WVNy
+MT8isZzyIX5eN5xTj4nMARi2MjVcYsGouo1smppygWdHZ5PHICrJ9wcSyY/2wo6e
+JW0foNFKkffmdQ2jW0OxI6iPD/khbaJ+qtAq7L8RpjALjcr6kALr5EvuH7p9le1Y
+eKjN7jXRQ0CcBJ3zAFbWEtWXtsemXz8f7o9Jlji9rzRUCxR3atIjQTkccv8b6FEe
+dVOWefKBtO4jl17UQng/DeVAuQINBE6OiBIBEAC0ufLyeXKvx/JgYChxDPRLjtr3
+2f9fRWaDCjW2smwJdZs6unO3OdjwgVEr91GdEeIzAHFZDgK91STanQzB5SI6OkQ3
+ttSb3aN4VcyvSGlAp/47gMwTEdLiHEvACucoW4CTZ6NFAZcFwSgy/XUuJTC7MVtZ
+1eoywDVNu4D5a8QGcVRgz/rE2Il5EhQiIbJ2EbXJrDwes1YseeGFw59PrPkA7r2F
+lmIlpyn2lT/6Od6n8zjangnXvHzX4RKkl6Vp3BHRDFzSrE7JaJ03ug7mFKIETBTq
+VhCN1Dbelru03Wj3Q/2PI3acUER0G7YP+aLgNX/5VdRgc0TLe42GaYsbAyg9VaJm
+95GYbvj6d7sxXLHBv0QgXch6dLhPrl57V2Vm9qgzFSVR7yG0QrQKGtBk58hOz0zK
+NTChOItRWjeRB4yWL0ckRBc9GMeqSRntbrkuGQRWcdI8da221FIOS5li3N76F7Xj
+YbMNJADd0XbPL+0ukFvkIDb0hNxBOymBXsC5XxhCoNnhdYexH0Cbnffg3zcReTai
+i6rGXuYpQHRWDq26M0IFJGfoDcJRoDb3D8zJODjPpjDocPeKXUnw9xYSg7OqVSg9
+3y0DmlxkJwqeCRed2leCxkKCgKC/H1Y1T4tSscF+ELdMsuwjuIY7Um8uCGR6wpMB
+EXc7aFLfECF8ZnGfwwARAQABiQIfBBgBAgAJBQJOjogSAhsMAAoJECnuhIrizPP0
+dJgP/2GYSwm5KAAbDH84/B5i4MaQDblqMMC7n0unNNuMH/I5TWsNVS7dwwf5QTJo
+QaWFud4a8cXYOwQL2KX/x9Uvh5oT0Ramikiidid0ET3X5iIYc9OiLIpx2nQq5AzC
+bYQ0CIR/+oSusHY0udrjB9lTHmXO8Q4STdw0+Y3PYRHIw9ch56JD1UqnczOilaG8
+Ei0DIjPO7J0oq8kArMOhM4QvjDrSyUCmGPytUKxbjmTYC6tZQftUUzx7Ne6eJ4t7
+KAIZv5glkB63rJVcPdTCc1xYIZD7EpI+7IqufjEobQ3mDPGcYiH9L/sg6NFo1Hlf
+6l9Z3pjM32pRFR+Jea4tI9EcrgWV9qbiAwwe6X3AvpcFrIWjY1Q8FrGDhT0AICBI
+nGAJM08IrCei95w6xGYgRIq1PUXeFtlFGBEaP9MN6klA18dLKT9lB+6gmCAExAsk
+rMYgjjatvRb3RAsyTRLILN5vcs7fLI2IqmW42kmyXYip0EkTm3UvGEoOeO7omUXI
+9wYmc/92hO4NOOMvRX6C48AZVu6rSDaGS75mUnCMxK03Ac9bF6J+4MltBUinTFls
+jl2N0cLyFUlf53T0nl6rQ+WAwSJ1zgX5W/6aYYmXQaI1+GZNoAJJHNsjbn9uG+Pb
+GHHwDYsQ65T/6fNiBDM9wWlrCJQ/XAGdOqL6ZrErHn/2J/Kv
+=0HgQ
 -----END PGP PUBLIC KEY BLOCK-----
 
 pub  1024D/D41A6DF2 2002-09-23 Andrew G. Morgan <morgan@kernel.org>
 -----BEGIN PGP PUBLIC KEY BLOCK-----
-Version: GnuPG v1.4.15 (GNU/Linux)
 
 mQGiBD2PVCcRBADmR2dfKJIaGj120v0EjrGbnYic8nKCrDLUHmtiZyIlMeTNqnw/
 /Q2m057SIyFC5K5W7XV8LIsOcpEBAdIS5QLClwec/wqVj1FU5TLHNifR9fBq+DaI
@@ -140,8 +186,33 @@ PskEkzaX+mzf3Tfn6k7+d/dPYRHPrX0STdsNMqrZkqjt5tiozuLYJUE/PDKafzdI
 Q7ya5ps2AdIKNixiSRPC+6cNB85NBorLXs9yg+JkQlPeUI7/DZb2iz1iZXTnyCZv
 SvKA7JMYpCOzZ6fWshanZ/91hxQvsYHjYC+zQVTErSYQlBqz8fDLPfyYQiqQFn4c
 T/i1WsxYMEaZtexvLuQe9LeaDqyY19DyBaJIIiN6EcGZ4sXRa7M6QUD1HKjEWt62
-U4shHkPGIMgQWLcRZDepovlpGVXLmXEf
-=oXom
+U4shHkPGIMgQWLcRZDepovlpGVXLmXEfiQIcBBABAgAGBQJbnYNFAAoJECnuhIri
+zPP0M+4QAI9UJZJL7X7wPkLr3srTZBDbMhJkEQVHQolirrnb3Ojkp1NAQ8vsujqm
+tyPM9OPvQC3gylat/tQoMPO9C0/2ztfqMY0ZlxPuiys582wn23/Iqrwn3a3UicU4
+iiCwOS0iBVPeNFr5QuZRZJ58S9af1w53PoPr7ZdS0+bg4hcuO7sq/3VMzqmvkL5O
+ptXNd7LEiHzxZDMNrdsgnLwC+umOvzV4/wSY0N5BCwCGYpoTipH9X08KF19uSKGV
+xG80eEErYKo1o20OMAoDEUHlc6wIPArqdsXg9tt4qclyx0Y83LFWTyc4f6lesG3c
+dZKFZXFSjIFEn5NVgsX/NhzOkUag+rRqpJR6hxz2MunHGNGmmT+O6B/z84PEHxqp
+VbMFOIhQJO6MMwJ1sv2RTk4yNeZS7IcW4q/ApKTWRaNoDl3s6csIKGD1myHTGs8e
+/yAodElwIHXQL3TroY7kP0dtsDj3Am1+UTbM6O8ro4vjhOVm9Ce+f5nek+es9JiX
+4xCWC3ngjnqrPMGKMEWark9mJZRk+QOqBb1H9ZzQJnvbiq6MQJot83hMBxsirlZp
+AdI1p1Qzmai7TToQnzyrZqKc1fl9dJrbxxIbPbJNClU19LTcXraPh0GLVpgdkt1q
+27PKbyeWnKK7FBKnnFJeOXgO4HSP7XkpMiUSGYZgu/QpJ5SkipW8iFcEExECABcF
+Aj2PVCcFCwcKAwQDFQMCAxYCAQIXgAAKCRBCF4Sr1Bpt8u+VAJ9BWCevOxPoNaWm
+O81Sz1HIPuII+QCeNaquwRuYR19cjrT6C0HWwmd4v8eJAjMEEAEKAB0WIQSJGKuw
+tf7CG2l+zlr71JgaT2BXfAUCYsdB1QAKCRD71JgaT2BXfIiFEACqo6nZhMVjldEF
+Pat89Hka/OTSMtG/m4U5Sf5JgZD+DvqbVojk06+Ca8viLQgz0EJHthyLwP7T7L0y
+o0ZBTcaJAjTgjtEb9ZeAFuq+lgK9XCAf0+rElVqJ93cAhS6+hucsLjLhhakDr60K
+TUwJzdWMnzlizOQjEKm8TdYbukG3xYE/DVCYLHWW39aYGFcOo1N9+UszN4YKSjTa
+zJ1+yR9/VncIIH8KWN1EMW8bOAJ5SYjkdA5cQc1oP0aNqMM3mgTtCnCS0EFtoBxl
+HRc+tg9oJoCAnmj3sPvZfaFhLoqz8+Xp1QHRFSkxaunXr33L1Y/KxNWWCQBpe7J9
+/FSReMmKiqZaUoNRrkh7RfSGXtSIXpAB/cr5iTdty5TYLYCuGY7ITe0DzGjhn3GJ
+87i2uJgvVKETJV0lO6lcugqXN/FGXGlJNtgsWCmRlrkqix+S91ZCI6FNTfY9d78y
+fxNpWxElT9rs35CvggIDigQ8YJKVb9JSciZhXFSE1U/2/xW3DTUXrHQDmvB1Ch+I
+L5N75Q/n56eQKN5Mb2oUchXluJBg7Bu0oR63/vEDA5ALaK/6OX7u0pbTDhugFjym
+ssYorVQQgFarAZU13JmzNX3PWDf2VPTf/rxETHmBiVYnCh9Ois9ZYEpxTrWy0AY7
+RF7BYQDbZJbxCppcYIIDh1S6EYFvrw==
+=w+Ya
 -----END PGP PUBLIC KEY BLOCK-----
 
 The following is my DSA key 'D41A6DF2' signed with my old '2A398175'
diff --git a/progs/.gitignore b/progs/.gitignore
index f42095f..eed1982 100644
--- a/progs/.gitignore
+++ b/progs/.gitignore
@@ -1,5 +1,8 @@
 capsh
+tcapsh-static
 getcap
 getpcaps
 setcap
 verify-caps
+compare-cap
+uns_test
diff --git a/progs/Makefile b/progs/Makefile
index ef51dc6..80f890a 100644
--- a/progs/Makefile
+++ b/progs/Makefile
@@ -4,35 +4,74 @@ include $(topdir)/Make.Rules
 #
 # Programs: all of the examples that we will compile
 #
-PROGS=getpcaps capsh
-ifeq ($(LIBATTR),yes)
-PROGS += getcap setcap
-endif
+PROGS=getpcaps getcap setcap
 
 BUILD=$(PROGS)
 
-ifneq ($(DYNAMIC),yes)
-LDFLAGS += --static
+all: $(BUILD) capsh
+
+ifeq ($(DYNAMIC),yes)
+LDPATH = LD_LIBRARY_PATH=../libcap
+DEPS = ../libcap/libcap.so
+else
+ifeq ($(LIBCSTATIC),yes)
+LDFLAGS = --static
+DEPS = ../libcap/libcap.a
+else
+# For this build variant override the LDFLAGS to link statically from
+# libraries within the build tree. If you never want this, use make
+# DYNAMIC=yes . Note, we can't reliably link statically against glibc
+# becasuse of https://sourceware.org/bugzilla/show_bug.cgi?id=12491 .
+LDFLAGS = -Wl,-Bstatic
+LDFLAGS_SUFFIX = -Wl,-Bdynamic
+DEPS = ../libcap/libcap.a
+endif
 endif
-LDLIBS += -L../libcap -lcap
 
-all: $(BUILD)
+../libcap/libcap.a:
+	$(MAKE) -C ../libcap libcap.a
 
-$(BUILD): %: %.o
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDLIBS)
+../libcap/libcap.so:
+	$(MAKE) -C ../libcap libcap.so
+
+$(BUILD): %: %.o $(DEPS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $< $(LIBCAPLIB) $(LDFLAGS_SUFFIX) -o $@
 
 %.o: %.c $(INCS)
-	$(CC) $(IPATH) $(CFLAGS) -c $< -o $@
+	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
 
 install: all
-	mkdir -p -m 0755 $(SBINDIR)
-	for p in $(PROGS) ; do \
-		install -m 0755 $$p $(SBINDIR) ; \
+	mkdir -p -m 0755 $(FAKEROOT)$(SBINDIR)
+	for p in $(PROGS) capsh ; do \
+		install -m 0755 $$p $(FAKEROOT)$(SBINDIR) ; \
 	done
 ifeq ($(RAISE_SETFCAP),yes)
-	$(SBINDIR)/setcap cap_setfcap=i $(SBINDIR)/setcap
+	$(FAKEROOT)$(SBINDIR)/setcap cap_setfcap=i $(FAKEROOT)$(SBINDIR)/setcap
 endif
 
+test:
+	@echo "no program tests without privilege, try 'make sudotest'"
+
+capshdoc.c.cf: capshdoc.c ./mkcapshdoc.sh
+	./mkcapshdoc.sh > $@
+	diff -u capshdoc.c $@ || (rm $@ ; exit 1)
+
+capsh: capsh.c capshdoc.c.cf capshdoc.h $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(CAPSH_SHELL) $(LDFLAGS) $< capshdoc.c $(LIBCAPLIB) $(LDFLAGS_SUFFIX) -o $@
+
+# Statically linked with minimal linkage flags to enable running in a
+# chroot and in other in-tree testing contexts.
+tcapsh-static: capsh.c capshdoc.c.cf capshdoc.h $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(CAPSH_SHELL) -o $@ $< capshdoc.c $(LIBCAPLIB) --static
+
+uns_test: ../tests/uns_test.c
+	$(MAKE) -C ../tests uns_test
+	cp ../tests/uns_test .
+
+sudotest: tcapsh-static uns_test capsh setcap getcap getpcaps tcapsh-static
+	$(SUDO) $(LDPATH) ./quicktest.sh
+
 clean:
 	$(LOCALCLEAN)
-	rm -f *.o $(BUILD) tcapsh ping hack.sh
+	rm -f *.o $(BUILD) privileged ping hack.sh compare-cap uns_test
+	rm -f capsh tcapsh* capshdoc.*.cf
diff --git a/progs/capsh.c b/progs/capsh.c
index 3ceadcd..f753291 100644
--- a/progs/capsh.c
+++ b/progs/capsh.c
@@ -1,33 +1,73 @@
 /*
- * Copyright (c) 2008-11 Andrew G. Morgan <morgan@kernel.org>
+ * Copyright (c) 2008-11,16,19,2020 Andrew G. Morgan <morgan@kernel.org>
  *
- * This is a simple 'bash' wrapper program that can be used to
- * raise and lower both the bset and pI capabilities before invoking
- * /bin/bash (hardcoded right now).
+ * This is a multifunction shell wrapper tool that can be used to
+ * launch capable files in various ways with a variety of settings. It
+ * also supports some testing modes, which are used extensively as
+ * part of the libcap build system.
  *
  * The --print option can be used as a quick test whether various
  * capability manipulations work as expected (or not).
  */
 
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
-#include <sys/prctl.h>
 #include <sys/types.h>
-#include <unistd.h>
 #include <pwd.h>
 #include <grp.h>
 #include <errno.h>
 #include <ctype.h>
 #include <sys/capability.h>
+#include <sys/prctl.h>
 #include <sys/securebits.h>
 #include <sys/wait.h>
-#include <sys/prctl.h>
+#include <unistd.h>
+
+#ifndef SHELL
+#define SHELL "/bin/bash"
+#endif /* ndef SHELL */
+
+#include "./capshdoc.h"
 
 #define MAX_GROUPS       100   /* max number of supplementary groups for user */
 
-static const cap_value_t raise_setpcap[1] = { CAP_SETPCAP };
-static const cap_value_t raise_chroot[1] = { CAP_SYS_CHROOT };
+/* parse a non-negative integer with some error handling */
+static unsigned long nonneg_uint(const char *text, const char *prefix, int *ok)
+{
+    char *remains;
+    unsigned long value;
+    ssize_t len = strlen(text);
+
+    if (len == 0 || *text == '-') {
+	goto fail;
+    }
+    value = strtoul(text, &remains, 0);
+    if (*remains) {
+	goto fail;
+    }
+    if (ok != NULL) {
+	*ok = 1;
+    }
+    return value;
+
+fail:
+    if (ok == NULL) {
+	fprintf(stderr, "%s: want non-negative integer, got \"%s\"\n",
+		prefix, text);
+	exit(1);
+    }
+    *ok = 0;
+    return 0;
+}
 
 static char *binary(unsigned long value)
 {
@@ -43,108 +83,493 @@ static char *binary(unsigned long value)
     return string + i;
 }
 
-int main(int argc, char *argv[], char *envp[])
+static void display_prctl_set(const char *name, int (*fn)(cap_value_t))
 {
-    pid_t child;
-    unsigned i;
+    unsigned cap;
+    const char *sep;
+    int set;
 
-    child = 0;
+    printf("%s set =", name);
+    for (sep = "", cap=0; (set = fn(cap)) >= 0; cap++) {
+	char *ptr;
+	if (!set) {
+	    continue;
+	}
 
-    for (i=1; i<argc; ++i) {
-	if (!memcmp("--drop=", argv[i], 4)) {
-	    char *ptr;
-	    cap_t orig, raised_for_setpcap;
+	ptr = cap_to_name(cap);
+	if (ptr == NULL) {
+	    printf("%s%u", sep, cap);
+	} else {
+	    printf("%s%s", sep, ptr);
+	    cap_free(ptr);
+	}
+	sep = ",";
+    }
+    if (!cap) {
+	printf(" <unsupported>\n");
+    } else {
+	printf("\n");
+    }
+}
 
-	    /*
-	     * We need to do this here because --inh=XXX may have reset
-	     * orig and it isn't until we are within the --drop code that
-	     * we know what the prevailing (orig) pI value is.
-	     */
-	    orig = cap_get_proc();
-	    if (orig == NULL) {
-		perror("Capabilities not available");
+static void display_current(void)
+{
+    cap_t all;
+    char *text;
+
+    all = cap_get_proc();
+    if (all == NULL) {
+	perror("failed to get process capabilities");
+	exit(1);
+    }
+    text = cap_to_text(all, NULL);
+    printf("Current: %s\n", text);
+    cap_free(text);
+    cap_free(all);
+}
+
+static void display_current_iab(void)
+{
+    cap_iab_t iab;
+    char *text;
+
+    iab = cap_iab_get_proc();
+    if (iab == NULL) {
+	perror("failed to get IAB for process");
+	exit(1);
+    }
+    text = cap_iab_to_text(iab);
+    if (text == NULL) {
+	perror("failed to obtain text for IAB");
+	cap_free(iab);
+	exit(1);
+    }
+    printf("Current IAB: %s\n", text);
+    cap_free(text);
+    cap_free(iab);
+}
+
+/* arg_print displays the current capability state of the process */
+static void arg_print(void)
+{
+    long set;
+    int status, j;
+    const char *sep;
+    struct group *g;
+    gid_t groups[MAX_GROUPS], gid;
+    uid_t uid, euid;
+    struct passwd *u, *eu;
+
+    display_current();
+    display_prctl_set("Bounding", cap_get_bound);
+    display_prctl_set("Ambient", cap_get_ambient);
+    display_current_iab();
+
+    set = cap_get_secbits();
+    if (set >= 0) {
+	const char *b = binary(set);  /* verilog convention for binary string */
+	printf("Securebits: 0%lo/0x%lx/%u'b%s (no-new-privs=%d)\n", set, set,
+	       (unsigned) strlen(b), b,
+	       prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0));
+	printf(" secure-noroot: %s (%s)\n",
+	       (set & SECBIT_NOROOT) ? "yes":"no",
+	       (set & SECBIT_NOROOT_LOCKED) ? "locked":"unlocked");
+	printf(" secure-no-suid-fixup: %s (%s)\n",
+	       (set & SECBIT_NO_SETUID_FIXUP) ? "yes":"no",
+	       (set & SECBIT_NO_SETUID_FIXUP_LOCKED) ? "locked":"unlocked");
+	printf(" secure-keep-caps: %s (%s)\n",
+	       (set & SECBIT_KEEP_CAPS) ? "yes":"no",
+	       (set & SECBIT_KEEP_CAPS_LOCKED) ? "locked":"unlocked");
+	if (CAP_AMBIENT_SUPPORTED()) {
+	    printf(" secure-no-ambient-raise: %s (%s)\n",
+		   (set & SECBIT_NO_CAP_AMBIENT_RAISE) ? "yes":"no",
+		   (set & SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED) ?
+		   "locked":"unlocked");
+	}
+    } else {
+	printf("[Securebits ABI not supported]\n");
+	set = prctl(PR_GET_KEEPCAPS);
+	if (set >= 0) {
+	    printf(" prctl-keep-caps: %s (locking not supported)\n",
+		   set ? "yes":"no");
+	} else {
+	    printf("[Keepcaps ABI not supported]\n");
+	}
+    }
+    uid = getuid();
+    u = getpwuid(uid);
+    euid = geteuid();
+    eu = getpwuid(euid);
+    printf("uid=%u(%s) euid=%u(%s)\n", uid, u ? u->pw_name : "???", euid, eu ? eu->pw_name : "???");
+    gid = getgid();
+    g = getgrgid(gid);
+    printf("gid=%u(%s)\n", gid, g ? g->gr_name : "???");
+    printf("groups=");
+    status = getgroups(MAX_GROUPS, groups);
+    sep = "";
+    for (j=0; j < status; j++) {
+	g = getgrgid(groups[j]);
+	printf("%s%u(%s)", sep, groups[j], g ? g->gr_name : "???");
+	sep = ",";
+    }
+    printf("\n");
+    cap_mode_t mode = cap_get_mode();
+    printf("Guessed mode: %s (%d)\n", cap_mode_name(mode), mode);
+}
+
+static const cap_value_t raise_setpcap[1] = { CAP_SETPCAP };
+static const cap_value_t raise_chroot[1] = { CAP_SYS_CHROOT };
+
+static cap_t will_need_setpcap(int strict)
+{
+    cap_flag_value_t enabled;
+    cap_t raised = NULL;
+
+    if (strict) {
+	return NULL;
+    }
+
+    raised = cap_get_proc();
+    if (raised == NULL) {
+	perror("Capabilities not available");
+	exit(1);
+    }
+    if (cap_get_flag(raised, CAP_SETPCAP, CAP_EFFECTIVE, &enabled) != 0) {
+	perror("Unable to check CAP_EFFECTIVE CAP_SETPCAP value");
+	exit(1);
+    }
+    if (enabled != CAP_SET) {
+	cap_set_flag(raised, CAP_EFFECTIVE, 1, raise_setpcap, CAP_SET);
+    } else {
+	/* no need to raise - since already raised */
+	cap_free(raised);
+	raised = NULL;
+    }
+    return raised;
+}
+
+static void push_pcap(int strict, cap_t *orig_p, cap_t *raised_for_setpcap_p)
+{
+    *orig_p = cap_get_proc();
+    if (NULL == *orig_p) {
+	perror("Capabilities not available");
+	exit(1);
+    }
+    *raised_for_setpcap_p = will_need_setpcap(strict);
+}
+
+static void pop_pcap(cap_t orig, cap_t raised_for_setpcap)
+{
+    cap_free(raised_for_setpcap);
+    cap_free(orig);
+}
+
+static void arg_drop(int strict, const char *arg_names)
+{
+    char *ptr;
+    cap_t orig, raised_for_setpcap;
+    char *names;
+
+    push_pcap(strict, &orig, &raised_for_setpcap);
+    if (strcmp("all", arg_names) == 0) {
+	unsigned j = 0;
+	while (CAP_IS_SUPPORTED(j)) {
+	    int status;
+	    if (raised_for_setpcap != NULL &&
+		cap_set_proc(raised_for_setpcap) != 0) {
+		perror("unable to raise CAP_SETPCAP for BSET changes");
 		exit(1);
 	    }
+	    status = cap_drop_bound(j);
+	    if (raised_for_setpcap != NULL && cap_set_proc(orig) != 0) {
+		perror("unable to lower CAP_SETPCAP post BSET change");
+		exit(1);
+	    }
+	    if (status != 0) {
+		char *name_ptr;
 
-	    raised_for_setpcap = cap_dup(orig);
-	    if (raised_for_setpcap == NULL) {
-		fprintf(stderr, "BSET modification requires CAP_SETPCAP\n");
+		name_ptr = cap_to_name(j);
+		fprintf(stderr, "Unable to drop bounding capability [%s]\n",
+			name_ptr);
+		cap_free(name_ptr);
 		exit(1);
 	    }
+	    j++;
+	}
+	pop_pcap(orig, raised_for_setpcap);
+	return;
+    }
+
+    names = strdup(arg_names);
+    if (NULL == names) {
+	fprintf(stderr, "failed to allocate names\n");
+	exit(1);
+    }
+    for (ptr = names; (ptr = strtok(ptr, ",")); ptr = NULL) {
+	/* find name for token */
+	cap_value_t cap;
+	int status;
+
+	if (cap_from_name(ptr, &cap) != 0) {
+	    fprintf(stderr, "capability [%s] is unknown to libcap\n", ptr);
+	    exit(1);
+	}
+	if (raised_for_setpcap != NULL &&
+	    cap_set_proc(raised_for_setpcap) != 0) {
+	    perror("unable to raise CAP_SETPCAP for BSET changes");
+	    exit(1);
+	}
+	status = cap_drop_bound(cap);
+	if (raised_for_setpcap != NULL && cap_set_proc(orig) != 0) {
+	    perror("unable to lower CAP_SETPCAP post BSET change");
+	    exit(1);
+	}
+	if (status != 0) {
+	    fprintf(stderr, "failed to drop [%s=%u]\n", ptr, cap);
+	    exit(1);
+	}
+    }
+    pop_pcap(orig, raised_for_setpcap);
+    free(names);
+}
+
+static void arg_change_amb(const char *arg_names, cap_flag_value_t set)
+{
+    char *ptr;
+    cap_t orig;
+    char *names;
+
+    orig = cap_get_proc();
+    if (strcmp("all", arg_names) == 0) {
+	unsigned j = 0;
+	while (CAP_IS_SUPPORTED(j)) {
+	    int status;
+	    status = cap_set_ambient(j, set);
+	    if (status != 0) {
+		char *name_ptr;
 
-	    if (cap_set_flag(raised_for_setpcap, CAP_EFFECTIVE, 1,
-			     raise_setpcap, CAP_SET) != 0) {
-		perror("unable to select CAP_SETPCAP");
+		name_ptr = cap_to_name(j);
+		fprintf(stderr, "Unable to %s ambient capability [%s]\n",
+			set == CAP_CLEAR ? "clear":"raise", name_ptr);
+		cap_free(name_ptr);
 		exit(1);
 	    }
+	    j++;
+	}
+	cap_free(orig);
+	return;
+    }
 
-	    if (strcmp("all", argv[i]+7) == 0) {
-		unsigned j = 0;
-		while (CAP_IS_SUPPORTED(j)) {
-		    if (cap_drop_bound(j) != 0) {
-			char *name_ptr;
+    names = strdup(arg_names);
+    if (NULL == names) {
+	fprintf(stderr, "failed to allocate names\n");
+	exit(1);
+    }
+    for (ptr = names; (ptr = strtok(ptr, ",")); ptr = NULL) {
+	/* find name for token */
+	cap_value_t cap;
+	int status;
 
-			name_ptr = cap_to_name(j);
-			fprintf(stderr,
-				"Unable to drop bounding capability [%s]\n",
-				name_ptr);
-			cap_free(name_ptr);
-			exit(1);
-		    }
-		    j++;
-		}
-	    } else {
-		for (ptr = argv[i]+7; (ptr = strtok(ptr, ",")); ptr = NULL) {
-		    /* find name for token */
-		    cap_value_t cap;
-		    int status;
-
-		    if (cap_from_name(ptr, &cap) != 0) {
-			fprintf(stderr,
-				"capability [%s] is unknown to libcap\n",
-				ptr);
-			exit(1);
-		    }
-		    if (cap_set_proc(raised_for_setpcap) != 0) {
-			perror("unable to raise CAP_SETPCAP for BSET changes");
-			exit(1);
-		    }
-		    status = prctl(PR_CAPBSET_DROP, cap);
-		    if (cap_set_proc(orig) != 0) {
-			perror("unable to lower CAP_SETPCAP post BSET change");
-			exit(1);
-		    }
-		    if (status) {
-			fprintf(stderr, "failed to drop [%s=%u]\n", ptr, cap);
-			exit(1);
-		    }
-		}
+	if (cap_from_name(ptr, &cap) != 0) {
+	    fprintf(stderr, "capability [%s] is unknown to libcap\n", ptr);
+	    exit(1);
+	}
+	status = cap_set_ambient(cap, set);
+	if (status != 0) {
+	    fprintf(stderr, "failed to %s ambient [%s=%u]\n",
+		    set == CAP_CLEAR ? "clear":"raise", ptr, cap);
+	    exit(1);
+	}
+    }
+    cap_free(orig);
+    free(names);
+}
+
+/*
+ * find_self locates and returns the full pathname of the named binary
+ * that is running. Importantly, it looks in the context of the
+ * prevailing CHROOT. Further, it does not fail over to invoking a
+ * shell if the target binary looks like something other than a
+ * executable. If an executable is not found, the function terminates
+ * the program with an error.
+ */
+static char *find_self(const char *arg0)
+{
+    int i, status=1;
+    char *p = NULL, *parts, *dir, *scratch;
+    const char *path;
+
+    for (i = strlen(arg0)-1; i >= 0 && arg0[i] != '/'; i--);
+    if (i >= 0) {
+        return strdup(arg0);
+    }
+
+    path = getenv("PATH");
+    if (path == NULL) {
+        fprintf(stderr, "no PATH environment variable found for re-execing\n");
+	exit(1);
+    }
+
+    parts = strdup(path);
+    if (parts == NULL) {
+        fprintf(stderr, "insufficient memory for parts of path\n");
+	exit(1);
+    }
+
+    scratch = malloc(2+strlen(path)+strlen(arg0));
+    if (scratch == NULL) {
+        fprintf(stderr, "insufficient memory for path building\n");
+	goto free_parts;
+    }
+
+    for (p = parts; (dir = strtok(p, ":")); p = NULL) {
+        sprintf(scratch, "%s/%s", dir, arg0);
+	if (access(scratch, X_OK) == 0) {
+	    status = 0;
+	    break;
+	}
+    }
+    if (status) {
+	fprintf(stderr, "unable to find executable '%s' in PATH\n", arg0);
+	free(scratch);
+    }
+
+free_parts:
+    free(parts);
+    if (status) {
+	exit(status);
+    }
+    return scratch;
+}
+
+static long safe_sysconf(int name)
+{
+    long ans = sysconf(name);
+    if (ans <= 0) {
+	fprintf(stderr, "sysconf(%d) returned a non-positive number: %ld\n", name, ans);
+	exit(1);
+    }
+    return ans;
+}
+
+static void describe(cap_value_t cap) {
+    int j;
+    const char **lines = explanations[cap];
+    char *name = cap_to_name(cap);
+    if (cap < cap_max_bits()) {
+	printf("%s (%d)", name, cap);
+    } else {
+	printf("<reserved for> %s (%d)", name, cap);
+    }
+    cap_free(name);
+    printf(" [/proc/self/status:CapXXX: 0x%016llx]\n\n", 1ULL<<cap);
+    for (j=0; lines[j]; j++) {
+	printf("    %s\n", lines[j]);
+    }
+}
+
+__attribute__ ((noreturn))
+static void do_launch(char *args[], char *envp[])
+{
+    cap_launch_t lau;
+    pid_t child;
+    int ret, result;
+
+    lau = cap_new_launcher(args[0], (void *) args, (void *) envp);
+    if (lau == NULL) {
+	perror("failed to create launcher");
+	exit(1);
+    }
+    child = cap_launch(lau, NULL);
+    if (child <= 0) {
+	perror("child failed to start");
+	exit(1);
+    }
+    cap_free(lau);
+    ret = waitpid(child, &result, 0);
+    if (ret != child) {
+	fprintf(stderr, "failed to wait for PID=%d, result=%x: ",
+		child, result);
+	perror("");
+	exit(1);
+    }
+    if (WIFEXITED(result)) {
+	exit(WEXITSTATUS(result));
+    }
+    if (WIFSIGNALED(result)) {
+	fprintf(stderr, "child PID=%d terminated by signo=%d\n",
+		child, WTERMSIG(result));
+	exit(1);
+    }
+    fprintf(stderr, "child PID=%d generated result=%0x\n", child, result);
+    exit(1);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+    pid_t child = 0;
+    unsigned i;
+    int strict = 0, quiet_start = 0, dont_set_env = 0;
+    const char *shell = SHELL;
+
+    for (i=1; i<argc; ++i) {
+	if (!strcmp("--quiet", argv[i])) {
+	    quiet_start = 1;
+	    continue;
+	}
+	if (i == 1) {
+	    char *temp_name = cap_to_name(cap_max_bits() - 1);
+	    if (temp_name == NULL) {
+		perror("obtaining highest capability name");
+		exit(1);
 	    }
-	    cap_free(raised_for_setpcap);
-	    cap_free(orig);
-	} else if (!memcmp("--inh=", argv[i], 6)) {
+	    if (temp_name[0] != 'c') {
+		printf("WARNING: libcap needs an update"
+		       " (cap=%d should have a name).\n",
+		       cap_max_bits() - 1);
+	    }
+	    cap_free(temp_name);
+	}
+	if (!strncmp("--drop=", argv[i], 7)) {
+	    arg_drop(strict, argv[i]+7);
+	} else if (!strncmp("--dropped=", argv[i], 10)) {
+	    cap_value_t cap;
+	    if (cap_from_name(argv[i]+10, &cap) < 0) {
+		fprintf(stderr, "cap[%s] not recognized by library\n",
+			argv[i] + 10);
+		exit(1);
+	    }
+	    if (cap_get_bound(cap) > 0) {
+		fprintf(stderr, "cap[%s] raised in bounding vector\n",
+			argv[i]+10);
+		exit(1);
+	    }
+	} else if (!strcmp("--has-ambient", argv[i])) {
+	    if (!CAP_AMBIENT_SUPPORTED()) {
+		perror("ambient set not supported");
+		exit(1);
+	    }
+	} else if (!strncmp("--addamb=", argv[i], 9)) {
+	    arg_change_amb(argv[i]+9, CAP_SET);
+	} else if (!strncmp("--delamb=", argv[i], 9)) {
+	    arg_change_amb(argv[i]+9, CAP_CLEAR);
+	} else if (!strncmp("--noamb", argv[i], 7)) {
+	    if (cap_reset_ambient() != 0) {
+		perror("failed to reset ambient set");
+		exit(1);
+	    }
+	} else if (!strcmp("--noenv", argv[i])) {
+	    dont_set_env = 1;
+	} else if (!strncmp("--inh=", argv[i], 6)) {
 	    cap_t all, raised_for_setpcap;
 	    char *text;
 	    char *ptr;
 
-	    all = cap_get_proc();
-	    if (all == NULL) {
-		perror("Capabilities not available");
-		exit(1);
-	    }
+	    push_pcap(strict, &all, &raised_for_setpcap);
 	    if (cap_clear_flag(all, CAP_INHERITABLE) != 0) {
 		perror("libcap:cap_clear_flag() internal error");
 		exit(1);
 	    }
-
-	    raised_for_setpcap = cap_dup(all);
-	    if ((raised_for_setpcap != NULL)
-		&& (cap_set_flag(raised_for_setpcap, CAP_EFFECTIVE, 1,
-				 raise_setpcap, CAP_SET) != 0)) {
-		cap_free(raised_for_setpcap);
-		raised_for_setpcap = NULL;
-	    }
-
 	    text = cap_to_text(all, NULL);
 	    cap_free(all);
 	    if (text == NULL) {
@@ -161,13 +586,13 @@ int main(int argc, char *argv[], char *envp[])
 	    } else {
 		strcpy(ptr, text);
 	    }
+	    cap_free(text);
 
 	    all = cap_from_text(ptr);
 	    if (all == NULL) {
 		perror("Fatal error internalizing capabilities");
 		exit(1);
 	    }
-	    cap_free(text);
 	    free(ptr);
 
 	    if (raised_for_setpcap != NULL) {
@@ -185,45 +610,29 @@ int main(int argc, char *argv[], char *envp[])
 		perror("Unable to set inheritable capabilities");
 		exit(1);
 	    }
-	    /*
-	     * Since status is based on orig, we don't want to restore
-	     * the previous value of 'all' again here!
-	     */
-
 	    cap_free(all);
-	} else if (!memcmp("--caps=", argv[i], 7)) {
+	} else if (!strcmp("--strict", argv[i])) {
+	    strict = !strict;
+	} else if (!strncmp("--caps=", argv[i], 7)) {
 	    cap_t all, raised_for_setpcap;
 
-	    raised_for_setpcap = cap_get_proc();
-	    if (raised_for_setpcap == NULL) {
-		perror("Capabilities not available");
-		exit(1);
-	    }
-
-	    if ((raised_for_setpcap != NULL)
-		&& (cap_set_flag(raised_for_setpcap, CAP_EFFECTIVE, 1,
-				 raise_setpcap, CAP_SET) != 0)) {
-		cap_free(raised_for_setpcap);
-		raised_for_setpcap = NULL;
-	    }
-
+	    raised_for_setpcap = will_need_setpcap(strict);
 	    all = cap_from_text(argv[i]+7);
 	    if (all == NULL) {
 		fprintf(stderr, "unable to interpret [%s]\n", argv[i]);
 		exit(1);
 	    }
-
 	    if (raised_for_setpcap != NULL) {
 		/*
-		 * This is only for the case that pP does not contain
-		 * the requested change to pI.. Failing here is not
-		 * indicative of the cap_set_proc(all) failing (always).
+		 * This is actually only for the case that pP does not
+		 * contain the requested change to pI.. Failing here
+		 * is not always indicative of the cap_set_proc(all)
+		 * failing.
 		 */
 		(void) cap_set_proc(raised_for_setpcap);
 		cap_free(raised_for_setpcap);
 		raised_for_setpcap = NULL;
 	    }
-
 	    if (cap_set_proc(all) != 0) {
 		fprintf(stderr, "Unable to set capabilities [%s]\n", argv[i]);
 		exit(1);
@@ -232,20 +641,71 @@ int main(int argc, char *argv[], char *envp[])
 	     * Since status is based on orig, we don't want to restore
 	     * the previous value of 'all' again here!
 	     */
-
 	    cap_free(all);
-	} else if (!memcmp("--keep=", argv[i], 7)) {
+	} else if (!strcmp("--modes", argv[i])) {
+	    cap_mode_t c;
+	    printf("Supported modes:");
+	    for (c = 1; ; c++) {
+		const char *m = cap_mode_name(c);
+		if (strcmp("UNKNOWN", m) == 0) {
+		    break;
+		}
+		printf(" %s", m);
+	    }
+	    printf("\n");
+	} else if (!strncmp("--mode", argv[i], 6)) {
+	    if (argv[i][6] == '=') {
+		const char *target = argv[i]+7;
+		cap_mode_t c;
+		int found = 0;
+		for (c = 1; ; c++) {
+		    const char *m = cap_mode_name(c);
+		    if (!strcmp("UNKNOWN", m)) {
+			found = 0;
+			break;
+		    }
+		    if (!strcmp(m, target)) {
+			found = 1;
+			break;
+		    }
+		}
+		if (!found) {
+		    printf("unsupported mode: %s\n", target);
+		    exit(1);
+		}
+		int ret = cap_set_mode(c);
+		if (ret != 0) {
+		    printf("failed to set mode [%s]: %s\n",
+			   target, strerror(errno));
+		    exit(1);
+		}
+	    } else if (argv[i][6]) {
+		printf("unrecognized command [%s]\n", argv[i]);
+		goto usage;
+	    } else {
+		cap_mode_t m = cap_get_mode();
+		printf("Mode: %s\n", cap_mode_name(m));
+	    }
+	} else if (!strncmp("--inmode=", argv[i], 9)) {
+	    const char *target = argv[i]+9;
+	    cap_mode_t c = cap_get_mode();
+	    const char *m = cap_mode_name(c);
+	    if (strcmp(m, target)) {
+		printf("mismatched mode got=%s want=%s\n", m, target);
+		exit(1);
+	    }
+	} else if (!strncmp("--keep=", argv[i], 7)) {
 	    unsigned value;
 	    int set;
 
-	    value = strtoul(argv[i]+7, NULL, 0);
+	    value = nonneg_uint(argv[i]+7, "invalid --keep value", NULL);
 	    set = prctl(PR_SET_KEEPCAPS, value);
 	    if (set < 0) {
 		fprintf(stderr, "prctl(PR_SET_KEEPCAPS, %u) failed: %s\n",
 			value, strerror(errno));
 		exit(1);
 	    }
-	} else if (!memcmp("--chroot=", argv[i], 9)) {
+	} else if (!strncmp("--chroot=", argv[i], 9)) {
 	    int status;
 	    cap_t orig, raised_for_chroot;
 
@@ -282,7 +742,9 @@ int main(int argc, char *argv[], char *envp[])
 	     * Given we are now in a new directory tree, its good practice
 	     * to start off in a sane location
 	     */
-	    status = chdir("/");
+	    if (status == 0) {
+		status = chdir("/");
+	    }
 
 	    cap_free(orig);
 
@@ -290,22 +752,25 @@ int main(int argc, char *argv[], char *envp[])
 		fprintf(stderr, "Unable to chroot/chdir to [%s]", argv[i]+9);
 		exit(1);
 	    }
-	} else if (!memcmp("--secbits=", argv[i], 10)) {
+	} else if (!strncmp("--secbits=", argv[i], 10)) {
 	    unsigned value;
 	    int status;
-
-	    value = strtoul(argv[i]+10, NULL, 0);
-	    status = prctl(PR_SET_SECUREBITS, value);
+	    value = nonneg_uint(argv[i]+10, "invalid --secbits value", NULL);
+	    status = cap_set_secbits(value);
 	    if (status < 0) {
 		fprintf(stderr, "failed to set securebits to 0%o/0x%x\n",
 			value, value);
 		exit(1);
 	    }
-	} else if (!memcmp("--forkfor=", argv[i], 10)) {
+	} else if (!strncmp("--forkfor=", argv[i], 10)) {
 	    unsigned value;
-
-	    value = strtoul(argv[i]+10, NULL, 0);
+	    if (child != 0) {
+		fprintf(stderr, "already forked\n");
+		exit(1);
+	    }
+	    value = nonneg_uint(argv[i]+10, "invalid --forkfor value", NULL);
 	    if (value == 0) {
+		fprintf(stderr, "require non-zero --forkfor value\n");
 		goto usage;
 	    }
 	    child = fork();
@@ -315,12 +780,13 @@ int main(int argc, char *argv[], char *envp[])
 		sleep(value);
 		exit(0);
 	    }
-	} else if (!memcmp("--killit=", argv[i], 9)) {
+	} else if (!strncmp("--killit=", argv[i], 9)) {
 	    int retval, status;
 	    pid_t result;
 	    unsigned value;
 
-	    value = strtoul(argv[i]+9, NULL, 0);
+	    value = nonneg_uint(argv[i]+9, "invalid --killit signo value",
+				NULL);
 	    if (!child) {
 		fprintf(stderr, "no forked process to kill\n");
 		exit(1);
@@ -341,42 +807,54 @@ int main(int argc, char *argv[], char *envp[])
 			, value, WTERMSIG(status));
 		exit(1);
 	    }
-	} else if (!memcmp("--uid=", argv[i], 6)) {
+	    child = 0;
+	} else if (!strncmp("--uid=", argv[i], 6)) {
 	    unsigned value;
 	    int status;
 
-	    value = strtoul(argv[i]+6, NULL, 0);
+	    value = nonneg_uint(argv[i]+6, "invalid --uid value", NULL);
 	    status = setuid(value);
 	    if (status < 0) {
 		fprintf(stderr, "Failed to set uid=%u: %s\n",
 			value, strerror(errno));
 		exit(1);
 	    }
-	} else if (!memcmp("--gid=", argv[i], 6)) {
+	} else if (!strncmp("--cap-uid=", argv[i], 10)) {
+	    unsigned value;
+	    int status;
+
+	    value = nonneg_uint(argv[i]+10, "invalid --cap-uid value", NULL);
+	    status = cap_setuid(value);
+	    if (status < 0) {
+		fprintf(stderr, "Failed to cap_setuid(%u): %s\n",
+			value, strerror(errno));
+		exit(1);
+	    }
+	} else if (!strncmp("--gid=", argv[i], 6)) {
 	    unsigned value;
 	    int status;
 
-	    value = strtoul(argv[i]+6, NULL, 0);
+	    value = nonneg_uint(argv[i]+6, "invalid --gid value", NULL);
 	    status = setgid(value);
 	    if (status < 0) {
 		fprintf(stderr, "Failed to set gid=%u: %s\n",
 			value, strerror(errno));
 		exit(1);
 	    }
-        } else if (!memcmp("--groups=", argv[i], 9)) {
+        } else if (!strncmp("--groups=", argv[i], 9)) {
 	  char *ptr, *buf;
 	  long length, max_groups;
 	  gid_t *group_list;
 	  int g_count;
 
-	  length = sysconf(_SC_GETGR_R_SIZE_MAX);
+	  length = safe_sysconf(_SC_GETGR_R_SIZE_MAX);
 	  buf = calloc(1, length);
 	  if (NULL == buf) {
 	    fprintf(stderr, "No memory for [%s] operation\n", argv[i]);
 	    exit(1);
 	  }
 
-	  max_groups = sysconf(_SC_NGROUPS_MAX);
+	  max_groups = safe_sysconf(_SC_NGROUPS_MAX);
 	  group_list = calloc(max_groups, sizeof(gid_t));
 	  if (NULL == group_list) {
 	    fprintf(stderr, "No memory for gid list\n");
@@ -392,8 +870,7 @@ int main(int argc, char *argv[], char *envp[])
 	    }
 	    if (!isdigit(*ptr)) {
 	      struct group *g, grp;
-	      getgrnam_r(ptr, &grp, buf, length, &g);
-	      if (NULL == g) {
+	      if (getgrnam_r(ptr, &grp, buf, length, &g) || NULL == g) {
 		fprintf(stderr, "Failed to identify gid for group [%s]\n", ptr);
 		exit(1);
 	      }
@@ -408,7 +885,7 @@ int main(int argc, char *argv[], char *envp[])
 	    exit(1);
 	  }
 	  free(group_list);
-	} else if (!memcmp("--user=", argv[i], 7)) {
+	} else if (!strncmp("--user=", argv[i], 7)) {
 	    struct passwd *pwd;
 	    const char *user;
 	    gid_t groups[MAX_GROUPS];
@@ -426,24 +903,32 @@ int main(int argc, char *argv[], char *envp[])
 	      perror("Unable to get group list for user");
 	      exit(1);
 	    }
-	    status = setgroups(ngroups, groups);
+	    status = cap_setgroups(pwd->pw_gid, ngroups, groups);
 	    if (status != 0) {
-	      perror("Unable to set group list for user");
-	      exit(1);
-	    }
-	    status = setgid(pwd->pw_gid);
-	    if (status < 0) {
-		fprintf(stderr, "Failed to set gid=%u(user=%s): %s\n",
-			pwd->pw_gid, user, strerror(errno));
+		perror("Unable to set group list for user");
 		exit(1);
 	    }
-	    status = setuid(pwd->pw_uid);
+	    status = cap_setuid(pwd->pw_uid);
 	    if (status < 0) {
 		fprintf(stderr, "Failed to set uid=%u(user=%s): %s\n",
 			pwd->pw_uid, user, strerror(errno));
 		exit(1);
 	    }
-	} else if (!memcmp("--decode=", argv[i], 9)) {
+	    if (!dont_set_env) {
+		/*
+		 * not setting this confuses bash at start up, but use
+		 * --noenv to preserve the HOME etc values instead.
+		 */
+		if (setenv("HOME", pwd->pw_dir, 1) != 0) {
+		    perror("unable to set HOME");
+		    exit(1);
+		}
+		if (setenv("USER", user, 1) != 0) {
+		    perror("unable to set USER");
+		    exit(1);
+		}
+	    }
+	} else if (!strncmp("--decode=", argv[i], 9)) {
 	    unsigned long long value;
 	    unsigned cap;
 	    const char *sep = "";
@@ -468,7 +953,7 @@ int main(int argc, char *argv[], char *envp[])
 		}
 	    }
 	    printf("\n");
-        } else if (!memcmp("--supports=", argv[i], 11)) {
+        } else if (!strncmp("--supports=", argv[i], 11)) {
 	    cap_value_t cap;
 
 	    if (cap_from_name(argv[i] + 11, &cap) < 0) {
@@ -482,111 +967,245 @@ int main(int argc, char *argv[], char *envp[])
 		exit(1);
 	    }
 	} else if (!strcmp("--print", argv[i])) {
-	    unsigned cap;
-	    int set, status, j;
-	    cap_t all;
-	    char *text;
-	    const char *sep;
-	    struct group *g;
-	    gid_t groups[MAX_GROUPS], gid;
-	    uid_t uid;
-	    struct passwd *u;
-
-	    all = cap_get_proc();
-	    text = cap_to_text(all, NULL);
-	    printf("Current: %s\n", text);
-	    cap_free(text);
-	    cap_free(all);
-
-	    printf("Bounding set =");
- 	    sep = "";
-	    for (cap=0; (set = cap_get_bound(cap)) >= 0; cap++) {
-		char *ptr;
-		if (!set) {
-		    continue;
+	    arg_print();
+	} else if ((!strcmp("--", argv[i])) || (!strcmp("==", argv[i]))
+		   || (!strcmp("-+", argv[i])) ||  (!strcmp("=+", argv[i]))) {
+	    int launch = argv[i][1] == '+';
+	    if (argv[i][0] == '=') {
+		if (quiet_start) {
+		    argv[i--] = strdup("--quiet");
 		}
+	        argv[i] = find_self(argv[0]);
+	    } else {
+	        argv[i] = strdup(shell);
+	    }
+	    argv[argc] = NULL;
+	    /* Two ways to chain load - use cap_launch() or execve() */
+	    if (launch) {
+		do_launch(argv+i, envp);
+	    }
+	    execve(argv[i], argv+i, envp);
+	    fprintf(stderr, "execve '%s' failed!\n", argv[i]);
+	    free(argv[i]);
+	    exit(1);
+	} else if (!strncmp("--shell=", argv[i], 8)) {
+	    shell = argv[i]+8;
+	} else if (!strncmp("--has-p=", argv[i], 8)) {
+	    cap_value_t cap;
+	    cap_flag_value_t enabled;
+	    cap_t orig;
 
-		ptr = cap_to_name(cap);
-		if (ptr == NULL) {
-		    printf("%s%u", sep, cap);
-		} else {
-		    printf("%s%s", sep, ptr);
-		    cap_free(ptr);
-		}
-		sep = ",";
+	    if (cap_from_name(argv[i]+8, &cap) < 0) {
+		fprintf(stderr, "cap[%s] not recognized by library\n",
+			argv[i] + 8);
+		exit(1);
 	    }
-	    printf("\n");
-	    set = prctl(PR_GET_SECUREBITS);
-	    if (set >= 0) {
-		const char *b;
-		b = binary(set);  /* use verilog convention for binary string */
-		printf("Securebits: 0%o/0x%x/%u'b%s\n", set, set,
-		       (unsigned) strlen(b), b);
-		printf(" secure-noroot: %s (%s)\n",
-		       (set & 1) ? "yes":"no",
-		       (set & 2) ? "locked":"unlocked");
-		printf(" secure-no-suid-fixup: %s (%s)\n",
-		       (set & 4) ? "yes":"no",
-		       (set & 8) ? "locked":"unlocked");
-		printf(" secure-keep-caps: %s (%s)\n",
-		       (set & 16) ? "yes":"no",
-		       (set & 32) ? "locked":"unlocked");
-	    } else {
-		printf("[Securebits ABI not supported]\n");
-		set = prctl(PR_GET_KEEPCAPS);
-		if (set >= 0) {
-		    printf(" prctl-keep-caps: %s (locking not supported)\n",
-			   set ? "yes":"no");
-		} else {
-		    printf("[Keepcaps ABI not supported]\n");
-		}
+	    orig = cap_get_proc();
+	    if (orig == NULL) {
+		perror("failed to get process capabilities");
+		exit(1);
+	    }
+	    if (cap_get_flag(orig, cap, CAP_PERMITTED, &enabled) || !enabled) {
+		fprintf(stderr, "cap[%s] not permitted\n", argv[i]+8);
+		exit(1);
+	    }
+	    cap_free(orig);
+	} else if (!strncmp("--has-i=", argv[i], 8)) {
+	    cap_value_t cap;
+	    cap_flag_value_t enabled;
+	    cap_t orig;
+
+	    if (cap_from_name(argv[i]+8, &cap) < 0) {
+		fprintf(stderr, "cap[%s] not recognized by library\n",
+			argv[i] + 8);
+		exit(1);
 	    }
+	    orig = cap_get_proc();
+	    if (orig == NULL) {
+		perror("failed to get process capabilities");
+		exit(1);
+	    }
+	    if (cap_get_flag(orig, cap, CAP_INHERITABLE, &enabled)
+		|| !enabled) {
+		fprintf(stderr, "cap[%s] not inheritable\n", argv[i]+8);
+		exit(1);
+	    }
+	    cap_free(orig);
+	} else if (!strncmp("--has-a=", argv[i], 8)) {
+	    cap_value_t cap;
+	    if (cap_from_name(argv[i]+8, &cap) < 0) {
+		fprintf(stderr, "cap[%s] not recognized by library\n",
+			argv[i] + 8);
+		exit(1);
+	    }
+	    if (!cap_get_ambient(cap)) {
+		fprintf(stderr, "cap[%s] not in ambient vector\n", argv[i]+8);
+		exit(1);
+	    }
+	} else if (!strncmp("--has-b=", argv[i], 8)) {
+	    cap_value_t cap;
+	    if (cap_from_name(argv[i]+8, &cap) < 0) {
+		fprintf(stderr, "cap[%s] not recognized by library\n",
+			argv[i] + 8);
+		exit(1);
+	    }
+	    if (!cap_get_bound(cap)) {
+		fprintf(stderr, "cap[%s] not in bounding vector\n", argv[i]+8);
+		exit(1);
+	    }
+	} else if (!strncmp("--is-uid=", argv[i], 9)) {
+	    unsigned value;
+	    uid_t uid;
+	    value = nonneg_uint(argv[i]+9, "invalid --is-uid value", NULL);
 	    uid = getuid();
-	    u = getpwuid(uid);
-	    printf("uid=%u(%s)\n", getuid(), u ? u->pw_name : "???");
+	    if (uid != value) {
+		fprintf(stderr, "uid: got=%d, want=%d\n", uid, value);
+		exit(1);
+	    }
+	} else if (!strncmp("--is-gid=", argv[i], 9)) {
+	    unsigned value;
+	    gid_t gid;
+	    value = nonneg_uint(argv[i]+9, "invalid --is-gid value", NULL);
 	    gid = getgid();
-	    g = getgrgid(gid);
-	    printf("gid=%u(%s)\n", gid, g ? g->gr_name : "???");
-	    printf("groups=");
-	    status = getgroups(MAX_GROUPS, groups);
-	    sep = "";
-	    for (j=0; j < status; j++) {
-		g = getgrgid(groups[j]);
-		printf("%s%u(%s)", sep, groups[j], g ? g->gr_name : "???");
-		sep = ",";
+	    if (gid != value) {
+		fprintf(stderr, "gid: got=%d, want=%d\n", gid, value);
+		exit(1);
 	    }
-	    printf("\n");
-	} else if ((!strcmp("--", argv[i])) || (!strcmp("==", argv[i]))) {
-	    argv[i] = strdup(argv[i][0] == '-' ? "/bin/bash" : argv[0]);
-	    argv[argc] = NULL;
-	    execve(argv[i], argv+i, envp);
-	    fprintf(stderr, "execve /bin/bash failed!\n");
-	    exit(1);
+	} else if (!strncmp("--iab=", argv[i], 6)) {
+	    cap_iab_t iab = cap_iab_from_text(argv[i]+6);
+	    if (iab == NULL) {
+		fprintf(stderr, "iab: '%s' malformed\n", argv[i]+6);
+		exit(1);
+	    }
+	    if (cap_iab_set_proc(iab)) {
+		perror("unable to set IAB tuple");
+		exit(1);
+	    }
+	    cap_free(iab);
+	} else if (!strcmp("--no-new-privs", argv[i])) {
+	    if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0) != 0) {
+		perror("unable to set no-new-privs");
+		exit(1);
+	    }
+	} else if (!strcmp("--has-no-new-privs", argv[i])) {
+	    if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0, 0) != 1) {
+		fprintf(stderr, "no-new-privs not set\n");
+		exit(1);
+	    }
+	} else if (!strcmp("--license", argv[i])) {
+	    printf(
+		"%s see License file for details.\n"
+		"Copyright (c) 2008-11,16,19-21 Andrew G. Morgan"
+		" <morgan@kernel.org>\n", argv[0]);
+	    exit(0);
+	} else if (!strncmp("--explain=", argv[i], 10)) {
+	    cap_value_t cap;
+	    if (cap_from_name(argv[i]+10, &cap) != 0) {
+		fprintf(stderr, "unrecognised value '%s'\n", argv[i]+10);
+		exit(1);
+	    }
+	    if (cap < 0) {
+		fprintf(stderr, "negative capability (%d) invalid\n", cap);
+		exit(1);
+	    }
+	    if (cap < capsh_doc_limit) {
+		describe(cap);
+		continue;
+	    }
+	    if (cap < cap_max_bits()) {
+		printf("<unnamed in libcap> (%d)", cap);
+	    } else {
+		printf("<unsupported> (%d)", cap);
+	    }
+	    printf(" [/proc/self/status:CapXXX: 0x%016llx]\n", 1ULL<<cap);
+	} else if (!strncmp("--suggest=", argv[i], 10)) {
+	    cap_value_t cap;
+	    int hits = 0;
+	    for (cap=0; cap < capsh_doc_limit; cap++) {
+		const char **lines = explanations[cap];
+		int j;
+		char *name = cap_to_name(cap);
+		if (name == NULL) {
+		    perror("invalid named cap");
+		    exit(1);
+		}
+		char *match = strcasestr(name, argv[i]+10);
+		cap_free(name);
+		if (match != NULL) {
+		    if (hits++) {
+			printf("\n");
+		    }
+		    describe(cap);
+		    continue;
+		}
+		for (j=0; lines[j]; j++) {
+		    if (strcasestr(lines[j], argv[i]+10) != NULL) {
+			if (hits++) {
+			    printf("\n");
+			}
+			describe(cap);
+			break;
+		    }
+		}
+	    }
+	} else if (strcmp("--current", argv[i]) == 0) {
+	    display_current();
+	    display_current_iab();
 	} else {
 	usage:
 	    printf("usage: %s [args ...]\n"
-		   "  --help         this message (or try 'man capsh')\n"
-		   "  --print        display capability relevant state\n"
-		   "  --decode=xxx   decode a hex string to a list of caps\n"
-		   "  --supports=xxx exit 1 if capability xxx unsupported\n"
-		   "  --drop=xxx     remove xxx,.. capabilities from bset\n"
+		   "  --addamb=xxx   add xxx,... capabilities to ambient set\n"
+		   "  --cap-uid=<n>  use libcap cap_setuid() to change uid\n"
 		   "  --caps=xxx     set caps as per cap_from_text()\n"
-		   "  --inh=xxx      set xxx,.. inheritiable set\n"
-		   "  --secbits=<n>  write a new value for securebits\n"
-		   "  --keep=<n>     set keep-capabability bit to <n>\n"
-		   "  --uid=<n>      set uid to <n> (hint: id <username>)\n"
+		   "  --chroot=path  chroot(2) to this path\n"
+		   "  --current      show current caps and IAB vectors\n"
+		   "  --decode=xxx   decode a hex string to a list of caps\n"
+		   "  --delamb=xxx   remove xxx,... capabilities from ambient\n"
+		   "  --drop=xxx     drop xxx,... caps from bounding set\n"
+		   "  --explain=xxx  explain what capability xxx permits\n"
+		   "  --forkfor=<n>  fork and make child sleep for <n> sec\n"
 		   "  --gid=<n>      set gid to <n> (hint: id <username>)\n"
 		   "  --groups=g,... set the supplemental groups\n"
-                   "  --user=<name>  set uid,gid and groups to that of user\n"
-		   "  --chroot=path  chroot(2) to this path\n"
+		   "  --has-a=xxx    exit 1 if capability xxx not ambient\n"
+		   "  --has-b=xxx    exit 1 if capability xxx not dropped\n"
+		   "  --has-ambient  exit 1 unless ambient vector supported\n"
+		   "  --has-i=xxx    exit 1 if capability xxx not inheritable\n"
+		   "  --has-p=xxx    exit 1 if capability xxx not permitted\n"
+		   "  --has-no-new-privs  exit 1 if privs not limited\n"
+		   "  --help, -h     this message (or try 'man capsh')\n"
+		   "  --iab=...      use cap_iab_from_text() to set iab\n"
+		   "  --inh=xxx      set xxx,.. inheritable set\n"
+		   "  --inmode=<xxx> exit 1 if current mode is not <xxx>\n"
+		   "  --is-uid=<n>   exit 1 if uid != <n>\n"
+		   "  --is-gid=<n>   exit 1 if gid != <n>\n"
+		   "  --keep=<n>     set keep-capability bit to <n>\n"
 		   "  --killit=<n>   send signal(n) to child\n"
-		   "  --forkfor=<n>  fork and make child sleep for <n> sec\n"
+		   "  --license      display license info\n"
+		   "  --mode         display current libcap mode\n"
+		   "  --mode=<xxx>   set libcap mode to <xxx>\n"
+		   "  --modes        list libcap named modes\n"
+		   "  --no-new-privs set sticky process privilege limiter\n"
+		   "  --noamb        reset (drop) all ambient capabilities\n"
+		   "  --noenv        no fixup of env vars (for --user)\n"
+		   "  --print        display capability relevant state\n"
+		   "  --quiet        if first argument skip max cap check\n"
+		   "  --secbits=<n>  write a new value for securebits\n"
+		   "  --shell=/xx/yy use /xx/yy instead of " SHELL " for --\n"
+		   "  --strict       toggle --caps, --drop and --inh fixups\n"
+		   "  --suggest=text search cap descriptions for text\n"
+		   "  --supports=xxx exit 1 if capability xxx unsupported\n"
+		   "  --uid=<n>      set uid to <n> (hint: id <username>)\n"
+                   "  --user=<name>  set uid,gid and groups to that of user\n"
 		   "  ==             re-exec(capsh) with args as for --\n"
-		   "  --             remaing arguments are for /bin/bash\n"
+		   "  =+             cap_launch capsh with args as for -+\n"
+		   "  --             remaining arguments are for " SHELL "\n"
+		   "  -+             cap_launch " SHELL " with remaining args\n"
 		   "                 (without -- [%s] will simply exit(0))\n",
 		   argv[0], argv[0]);
-
-	    exit(strcmp("--help", argv[i]) != 0);
+	    if (strcmp("--help", argv[1]) && strcmp("-h", argv[1])) {
+		exit(1);
+	    }
+	    exit(0);
 	}
     }
 
diff --git a/progs/capshdoc.c b/progs/capshdoc.c
new file mode 100644
index 0000000..5560ef9
--- /dev/null
+++ b/progs/capshdoc.c
@@ -0,0 +1,422 @@
+#include <stdio.h>
+
+#include "./capshdoc.h"
+
+/*
+ * A line by line explanation of each named capability value
+ */
+static const char *explanation0[] = {  /* cap_chown = 0 */
+    "Allows a process to arbitrarily change the user and",
+    "group ownership of a file.",
+    NULL
+};
+static const char *explanation1[] = {  /* cap_dac_override = 1 */
+    "Allows a process to override of all Discretionary",
+    "Access Control (DAC) access, including ACL execute",
+    "access. That is read, write or execute files that the",
+    "process would otherwise not have access to. This",
+    "excludes DAC access covered by CAP_LINUX_IMMUTABLE.",
+    NULL
+};
+static const char *explanation2[] = {  /* cap_dac_read_search = 2 */
+    "Allows a process to override all DAC restrictions",
+    "limiting the read and search of files and",
+    "directories. This excludes DAC access covered by",
+    "CAP_LINUX_IMMUTABLE.",
+    NULL
+};
+static const char *explanation3[] = {  /* cap_fowner = 3 */
+    "Allows a process to perform operations on files, even",
+    "where file owner ID should otherwise need be equal to",
+    "the UID, except where CAP_FSETID is applicable. It",
+    "doesn't override MAC and DAC restrictions.",
+    "",
+    "This capability permits the deletion of a file owned",
+    "by another UID in a directory protected by the sticky",
+    "(t) bit.",
+    NULL
+};
+static const char *explanation4[] = {  /* cap_fsetid = 4 */
+    "Allows a process to set the S_ISUID and S_ISUID bits of",
+    "the file permissions, even when the process' effective",
+    "UID or GID/supplementary GIDs do not match that of the",
+    "file.",
+    NULL
+};
+static const char *explanation5[] = {  /* cap_kill = 5 */
+    "Allows a process to send a kill(2) signal to any other",
+    "process - overriding the limitation that there be a",
+    "[E]UID match between source and target process.",
+    NULL
+};
+static const char *explanation6[] = {  /* cap_setgid = 6 */
+    "Allows a process to freely manipulate its own GIDs:",
+    "  - arbitrarily set the GID, EGID, REGID, RESGID values",
+    "  - arbitrarily set the supplementary GIDs",
+    "  - allows the forging of GID credentials passed over a",
+    "    socket",
+    NULL
+};
+static const char *explanation7[] = {  /* cap_setuid = 7 */
+    "Allows a process to freely manipulate its own UIDs:",
+    "  - arbitrarily set the UID, EUID, REUID and RESUID",
+    "    values",
+    "  - allows the forging of UID credentials passed over a",
+    "    socket",
+    NULL
+};
+static const char *explanation8[] = {  /* cap_setpcap = 8 */
+    "Allows a process to freely manipulate its inheritable",
+    "capabilities.",
+    "",
+    "Linux supports the POSIX.1e Inheritable set, the POXIX.1e (X",
+    "vector) known in Linux as the Bounding vector, as well as",
+    "the Linux extension Ambient vector.",
+    "",
+    "This capability permits dropping bits from the Bounding",
+    "vector (ie. raising B bits in the libcap IAB",
+    "representation). It also permits the process to raise",
+    "Ambient vector bits that are both raised in the Permitted",
+    "and Inheritable sets of the process. This capability cannot",
+    "be used to raise Permitted bits, Effective bits beyond those",
+    "already present in the process' permitted set, or",
+    "Inheritable bits beyond those present in the Bounding",
+    "vector.",
+    "",
+    "[Historical note: prior to the advent of file capabilities",
+    "(2008), this capability was suppressed by default, as its",
+    "unsuppressed behavior was not auditable: it could",
+    "asynchronously grant its own Permitted capabilities to and",
+    "remove capabilities from other processes arbitrarily. The",
+    "former leads to undefined behavior, and the latter is better",
+    "served by the kill system call.]",
+    NULL
+};
+static const char *explanation9[] = {  /* cap_linux_immutable = 9 */
+    "Allows a process to modify the S_IMMUTABLE and",
+    "S_APPEND file attributes.",
+    NULL
+};
+static const char *explanation10[] = {  /* cap_net_bind_service = 10 */
+    "Allows a process to bind to privileged ports:",
+    "  - TCP/UDP sockets below 1024",
+    "  - ATM VCIs below 32",
+    NULL
+};
+static const char *explanation11[] = {  /* cap_net_broadcast = 11 */
+    "Allows a process to broadcast to the network and to",
+    "listen to multicast.",
+    NULL
+};
+static const char *explanation12[] = {  /* cap_net_admin = 12 */
+    "Allows a process to perform network configuration",
+    "operations:",
+    "  - interface configuration",
+    "  - administration of IP firewall, masquerading and",
+    "    accounting",
+    "  - setting debug options on sockets",
+    "  - modification of routing tables",
+    "  - setting arbitrary process, and process group",
+    "    ownership on sockets",
+    "  - binding to any address for transparent proxying",
+    "    (this is also allowed via CAP_NET_RAW)",
+    "  - setting TOS (Type of service)",
+    "  - setting promiscuous mode",
+    "  - clearing driver statistics",
+    "  - multicasing",
+    "  - read/write of device-specific registers",
+    "  - activation of ATM control sockets",
+    NULL
+};
+static const char *explanation13[] = {  /* cap_net_raw = 13 */
+    "Allows a process to use raw networking:",
+    "  - RAW sockets",
+    "  - PACKET sockets",
+    "  - binding to any address for transparent proxying",
+    "    (also permitted via CAP_NET_ADMIN)",
+    NULL
+};
+static const char *explanation14[] = {  /* cap_ipc_lock = 14 */
+    "Allows a process to lock shared memory segments for IPC",
+    "purposes.  Also enables mlock and mlockall system",
+    "calls.",
+    NULL
+};
+static const char *explanation15[] = {  /* cap_ipc_owner = 15 */
+    "Allows a process to override IPC ownership checks.",
+    NULL
+};
+static const char *explanation16[] = {  /* cap_sys_module = 16 */
+    "Allows a process to initiate the loading and unloading",
+    "of kernel modules. This capability can effectively",
+    "modify kernel without limit.",
+    NULL
+};
+static const char *explanation17[] = {  /* cap_sys_rawio = 17 */
+    "Allows a process to perform raw IO:",
+    "  - permit ioper/iopl access",
+    "  - permit sending USB messages to any device via",
+    "    /dev/bus/usb",
+    NULL
+};
+static const char *explanation18[] = {  /* cap_sys_chroot = 18 */
+    "Allows a process to perform a chroot syscall to change",
+    "the effective root of the process' file system:",
+    "redirect to directory \"/\" to some other location.",
+    NULL
+};
+static const char *explanation19[] = {  /* cap_sys_ptrace = 19 */
+    "Allows a process to perform a ptrace() of any other",
+    "process.",
+    NULL
+};
+static const char *explanation20[] = {  /* cap_sys_pacct = 20 */
+    "Allows a process to configure process accounting.",
+    NULL
+};
+static const char *explanation21[] = {  /* cap_sys_admin = 21 */
+    "Allows a process to perform a somewhat arbitrary",
+    "grab-bag of privileged operations. Over time, this",
+    "capability should weaken as specific capabilities are",
+    "created for subsets of CAP_SYS_ADMINs functionality:",
+    "  - configuration of the secure attention key",
+    "  - administration of the random device",
+    "  - examination and configuration of disk quotas",
+    "  - setting the domainname",
+    "  - setting the hostname",
+    "  - calling bdflush()",
+    "  - mount() and umount(), setting up new SMB connection",
+    "  - some autofs root ioctls",
+    "  - nfsservctl",
+    "  - VM86_REQUEST_IRQ",
+    "  - to read/write pci config on alpha",
+    "  - irix_prctl on mips (setstacksize)",
+    "  - flushing all cache on m68k (sys_cacheflush)",
+    "  - removing semaphores",
+    "  - Used instead of CAP_CHOWN to \"chown\" IPC message",
+    "    queues, semaphores and shared memory",
+    "  - locking/unlocking of shared memory segment",
+    "  - turning swap on/off",
+    "  - forged pids on socket credentials passing",
+    "  - setting readahead and flushing buffers on block",
+    "    devices",
+    "  - setting geometry in floppy driver",
+    "  - turning DMA on/off in xd driver",
+    "  - administration of md devices (mostly the above, but",
+    "    some extra ioctls)",
+    "  - tuning the ide driver",
+    "  - access to the nvram device",
+    "  - administration of apm_bios, serial and bttv (TV)",
+    "    device",
+    "  - manufacturer commands in isdn CAPI support driver",
+    "  - reading non-standardized portions of PCI",
+    "    configuration space",
+    "  - DDI debug ioctl on sbpcd driver",
+    "  - setting up serial ports",
+    "  - sending raw qic-117 commands",
+    "  - enabling/disabling tagged queuing on SCSI",
+    "    controllers and sending arbitrary SCSI commands",
+    "  - setting encryption key on loopback filesystem",
+    "  - setting zone reclaim policy",
+    NULL
+};
+static const char *explanation22[] = {  /* cap_sys_boot = 22 */
+    "Allows a process to initiate a reboot of the system.",
+    NULL
+};
+static const char *explanation23[] = {  /* cap_sys_nice = 23 */
+    "Allows a process to maipulate the execution priorities",
+    "of arbitrary processes:",
+    "  - those involving different UIDs",
+    "  - setting their CPU affinity",
+    "  - alter the FIFO vs. round-robin (realtime)",
+    "    scheduling for itself and other processes.",
+    NULL
+};
+static const char *explanation24[] = {  /* cap_sys_resource = 24 */
+    "Allows a process to adjust resource related parameters",
+    "of processes and the system:",
+    "  - set and override resource limits",
+    "  - override quota limits",
+    "  - override the reserved space on ext2 filesystem",
+    "    (this can also be achieved via CAP_FSETID)",
+    "  - modify the data journaling mode on ext3 filesystem,",
+    "    which uses journaling resources",
+    "  - override size restrictions on IPC message queues",
+    "  - configure more than 64Hz interrupts from the",
+    "    real-time clock",
+    "  - override the maximum number of consoles for console",
+    "    allocation",
+    "  - override the maximum number of keymaps",
+    NULL
+};
+static const char *explanation25[] = {  /* cap_sys_time = 25 */
+    "Allows a process to perform time manipulation of clocks:",
+    "  - alter the system clock",
+    "  - enable irix_stime on MIPS",
+    "  - set the real-time clock",
+    NULL
+};
+static const char *explanation26[] = {  /* cap_sys_tty_config = 26 */
+    "Allows a process to manipulate tty devices:",
+    "  - configure tty devices",
+    "  - perform vhangup() of a tty",
+    NULL
+};
+static const char *explanation27[] = {  /* cap_mknod = 27 */
+    "Allows a process to perform privileged operations with",
+    "the mknod() system call.",
+    NULL
+};
+static const char *explanation28[] = {  /* cap_lease = 28 */
+    "Allows a process to take leases on files.",
+    NULL
+};
+static const char *explanation29[] = {  /* cap_audit_write = 29 */
+    "Allows a process to write to the audit log via a",
+    "unicast netlink socket.",
+    NULL
+};
+static const char *explanation30[] = {  /* cap_audit_control = 30 */
+    "Allows a process to configure audit logging via a",
+    "unicast netlink socket.",
+    NULL
+};
+static const char *explanation31[] = {  /* cap_setfcap = 31 */
+    "Allows a process to set capabilities on files.",
+    "Permits a process to uid_map the uid=0 of the",
+    "parent user namespace into that of the child",
+    "namespace. Also, permits a process to override",
+    "securebits locks through user namespace",
+    "creation.",
+    NULL
+};
+static const char *explanation32[] = {  /* cap_mac_override = 32 */
+    "Allows a process to override Manditory Access Control",
+    "(MAC) access. Not all kernels are configured with a MAC",
+    "mechanism, but this is the capability reserved for",
+    "overriding them.",
+    NULL
+};
+static const char *explanation33[] = {  /* cap_mac_admin = 33 */
+    "Allows a process to configure the Mandatory Access",
+    "Control (MAC) policy. Not all kernels are configured",
+    "with a MAC enabled, but if they are this capability is",
+    "reserved for code to perform administration tasks.",
+    NULL
+};
+static const char *explanation34[] = {  /* cap_syslog = 34 */
+    "Allows a process to configure the kernel's syslog",
+    "(printk) behavior.",
+    NULL
+};
+static const char *explanation35[] = {  /* cap_wake_alarm = 35 */
+    "Allows a process to trigger something that can wake the",
+    "system up.",
+    NULL
+};
+static const char *explanation36[] = {  /* cap_block_suspend = 36 */
+    "Allows a process to block system suspends - prevent the",
+    "system from entering a lower power state.",
+    NULL
+};
+static const char *explanation37[] = {  /* cap_audit_read = 37 */
+    "Allows a process to read the audit log via a multicast",
+    "netlink socket.",
+    NULL
+};
+static const char *explanation38[] = {  /* cap_perfmon = 38 */
+    "Allows a process to enable observability of privileged",
+    "operations related to performance. The mechanisms",
+    "include perf_events, i915_perf and other kernel",
+    "subsystems.",
+    NULL
+};
+static const char *explanation39[] = {  /* cap_bpf = 39 */
+    "Allows a process to manipulate aspects of the kernel",
+    "enhanced Berkeley Packet Filter (BPF) system. This is",
+    "an execution subsystem of the kernel, that manages BPF",
+    "programs. CAP_BPF permits a process to:",
+    "  - create all types of BPF maps",
+    "  - advanced verifier features:",
+    "    - indirect variable access",
+    "    - bounded loops",
+    "    - BPF to BPF function calls",
+    "    - scalar precision tracking",
+    "    - larger complexity limits",
+    "    - dead code elimination",
+    "    - potentially other features",
+    "",
+    "Other capabilities can be used together with CAP_BFP to",
+    "further manipulate the BPF system:",
+    "  - CAP_PERFMON relaxes the verifier checks as follows:",
+    "    - BPF programs can use pointer-to-integer",
+    "      conversions",
+    "    - speculation attack hardening measures can be",
+    "      bypassed",
+    "    - bpf_probe_read to read arbitrary kernel memory is",
+    "      permitted",
+    "    - bpf_trace_printk to print the content of kernel",
+    "      memory",
+    "  - CAP_SYS_ADMIN permits the following:",
+    "    - use of bpf_probe_write_user",
+    "    - iteration over the system-wide loaded programs,",
+    "      maps, links BTFs and convert their IDs to file",
+    "      descriptors.",
+    "  - CAP_PERFMON is required to load tracing programs.",
+    "  - CAP_NET_ADMIN is required to load networking",
+    "    programs.",
+    NULL
+};
+static const char *explanation40[] = {  /* cap_checkpoint_restore = 40 */
+    "Allows a process to perform checkpoint",
+    "and restore operations. Also permits",
+    "explicit PID control via clone3() and",
+    "also writing to ns_last_pid.",
+    NULL
+};
+const char **explanations[] = {
+    explanation0,
+    explanation1,
+    explanation2,
+    explanation3,
+    explanation4,
+    explanation5,
+    explanation6,
+    explanation7,
+    explanation8,
+    explanation9,
+    explanation10,
+    explanation11,
+    explanation12,
+    explanation13,
+    explanation14,
+    explanation15,
+    explanation16,
+    explanation17,
+    explanation18,
+    explanation19,
+    explanation20,
+    explanation21,
+    explanation22,
+    explanation23,
+    explanation24,
+    explanation25,
+    explanation26,
+    explanation27,
+    explanation28,
+    explanation29,
+    explanation30,
+    explanation31,
+    explanation32,
+    explanation33,
+    explanation34,
+    explanation35,
+    explanation36,
+    explanation37,
+    explanation38,
+    explanation39,
+    explanation40,
+};
+
+const int capsh_doc_limit = 41;
diff --git a/progs/capshdoc.h b/progs/capshdoc.h
new file mode 100644
index 0000000..d9cbab9
--- /dev/null
+++ b/progs/capshdoc.h
@@ -0,0 +1,7 @@
+#ifdef CAPSHDOC
+#error "don't include this twice"
+#endif
+#define CAPSHDOC
+
+extern const char **explanations[];
+extern const int capsh_doc_limit;
diff --git a/progs/getcap.c b/progs/getcap.c
index f6debc0..780943d 100644
--- a/progs/getcap.c
+++ b/progs/getcap.c
@@ -1,9 +1,10 @@
 /*
- * Copyright (c) 1997,2007 Andrew G. Morgan  <morgan@kernel.org>
+ * Copyright (c) 1997,2007 Andrew G. Morgan <morgan@kernel.org>
  *
  * This displays the capabilities of a given file.
  */
 
+#undef _XOPEN_SOURCE
 #define _XOPEN_SOURCE 500
 
 #include <errno.h>
@@ -20,15 +21,16 @@
 
 static int verbose = 0;
 static int recursive = 0;
+static int namespace = 0;
 
-static void usage(void)
+static void usage(int code)
 {
     fprintf(stderr,
-	    "usage: getcap [-v] [-r] [-h] <filename> [<filename> ...]\n"
-	    "\n"
-	    "\tdisplays the capabilities on the queried file(s).\n"
+    "usage: getcap [-h] [-l] [-n] [-r] [-v] <filename> [<filename> ...]\n"
+    "\n"
+    "\tdisplays the capabilities on the queried file(s).\n"
 	);
-    exit(1);
+    exit(code);
 }
 
 static int do_getcap(const char *fname, const struct stat *stbuf,
@@ -36,6 +38,7 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
 {
     cap_t cap_d;
     char *result;
+    uid_t rootid;
 
     if (tflag != FTW_F) {
 	if (verbose) {
@@ -46,8 +49,8 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
 
     cap_d = cap_get_file(fname);
     if (cap_d == NULL) {
-	if (errno != ENODATA) {
-	    fprintf(stderr, "Failed to get capabilities of file `%s' (%s)\n",
+	if (errno != ENODATA && errno != ENOTSUP) {
+	    fprintf(stderr, "Failed to get capabilities of file '%s' (%s)\n",
 		    fname, strerror(errno));
 	} else if (verbose) {
 	    printf("%s\n", fname);
@@ -58,12 +61,17 @@ static int do_getcap(const char *fname, const struct stat *stbuf,
     result = cap_to_text(cap_d, NULL);
     if (!result) {
 	fprintf(stderr,
-		"Failed to get capabilities of human readable format at `%s' (%s)\n",
+		"Failed to get capabilities of human readable format at '%s' (%s)\n",
 		fname, strerror(errno));
 	cap_free(cap_d);
 	return 0;
     }
-    printf("%s %s\n", fname, result);
+    rootid = cap_get_nsowner(cap_d);
+    if (namespace && (rootid+1 > 1)) {
+	printf("%s %s [rootid=%d]\n", fname, result, rootid);
+    } else {
+	printf("%s %s\n", fname, result);
+    }
     cap_free(cap_d);
     cap_free(result);
 
@@ -74,7 +82,7 @@ int main(int argc, char **argv)
 {
     int i, c;
 
-    while ((c = getopt(argc, argv, "rvh")) > 0) {
+    while ((c = getopt(argc, argv, "rvhnl")) > 0) {
 	switch(c) {
 	case 'r':
 	    recursive = 1;
@@ -82,21 +90,31 @@ int main(int argc, char **argv)
 	case 'v':
 	    verbose = 1;
 	    break;
+	case 'n':
+	    namespace = 1;
+	    break;
+	case 'h':
+	    usage(0);
+	case 'l':
+	    printf("%s see LICENSE file for details.\n"
+		"Copyright (c) 1997,2007,2021 Andrew G. Morgan"
+		" <morgan@kernel.org>\n", argv[0]);
+	    exit(0);
 	default:
-	    usage();
+	    usage(1);
 	}
     }
 
     if (!argv[optind])
-	usage();
+	usage(1);
 
     for (i=optind; argv[i] != NULL; i++) {
 	struct stat stbuf;
-
-	if (lstat(argv[i], &stbuf) != 0) {
-	    fprintf(stderr, "%s (%s)\n", argv[i], strerror(errno));
+	char *arg = argv[i];
+	if (lstat(arg, &stbuf) != 0) {
+	    fprintf(stderr, "%s (%s)\n", arg, strerror(errno));
 	} else if (recursive) {
-	    nftw(argv[i], do_getcap, 20, FTW_PHYS);
+	    nftw(arg, do_getcap, 20, FTW_PHYS);
 	} else {
 	    int tflag = S_ISREG(stbuf.st_mode) ? FTW_F :
 		(S_ISLNK(stbuf.st_mode) ? FTW_SL : FTW_NS);
diff --git a/progs/getpcaps.c b/progs/getpcaps.c
index e405a92..7e14c36 100644
--- a/progs/getpcaps.c
+++ b/progs/getpcaps.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997,2008 Andrew G. Morgan  <morgan@kernel.org>
+ * Copyright (c) 1997-8,2007-8,19,21-22 Andrew G. Morgan  <morgan@kernel.org>
  *
  * This displays the capabilities of given target process(es).
  */
@@ -11,45 +11,120 @@
 #include <stdlib.h>
 #include <sys/capability.h>
 
-static void usage(void)
+static void usage(int code)
 {
     fprintf(stderr,
-"usage: getcaps <pid> [<pid> ...]\n\n"
+"usage: getcaps [opts] <pid> [<pid> ...]\n\n"
 "  This program displays the capabilities on the queried process(es).\n"
-"  The capabilities are displayed in the cap_from_text(3) format.\n\n"
-"[Copyright (c) 1997-8,2007 Andrew G. Morgan  <morgan@kernel.org>]\n"
-	);
-    exit(1);
+	    "  The capabilities are displayed in the cap_from_text(3) format.\n"
+	    "\n"
+	    "  Optional arguments:\n"
+	    "     --help, -h or --usage display this message.\n"
+	    "     --verbose             use a more verbose output format.\n"
+	    "     --ugly or --legacy    use the archaic legacy output format.\n"
+	    "     --iab                 show IAB of process too.\n"
+	    "     --license             display license info\n");
+    exit(code);
 }
 
 int main(int argc, char **argv)
 {
     int retval = 0;
+    int verbose = 0;
+    int iab = 0;
+    cap_iab_t noiab = cap_iab_init();
 
     if (argc < 2) {
-	usage();
+	usage(1);
     }
 
-    for ( ++argv; --argc > 0; ++argv ) {
-	ssize_t length;
+    for (++argv; --argc > 0; ++argv) {
+	long lpid;
 	int pid;
+	char *endarg;
 	cap_t cap_d;
+	const char *arg = *argv;
 
-	pid = atoi(argv[0]);
+	if (!strcmp(arg, "--help") || !strcmp(arg, "--usage") ||
+	    !strcmp(arg, "-h")) {
+	    usage(0);
+	} else if (!strcmp(arg, "--license")) {
+	    printf("%s see LICENSE file for details.\n"
+		   "[Copyright (c) 1997-8,2007-8,19,21-22"
+		   " Andrew G. Morgan <morgan@kernel.org>]\n",
+		   arg);
+	    exit(0);
+	} else if (!strcmp(arg, "--verbose")) {
+	    verbose = 1;
+	    continue;
+	} else if (!strcmp(arg, "--ugly") || !strcmp(arg, "--legacy")) {
+	    verbose = 2;
+	    continue;
+	} else if (!strcmp(arg, "--iab")) {
+	    iab = 1;
+	    continue;
+	}
+
+	errno = 0;
+	lpid = strtol(arg, &endarg, 10);
+	if (errno == 0) {
+	    if (*endarg != '\0') {
+		errno = EINVAL;
+	    } else if (lpid < 0 || lpid != (pid_t) lpid) {
+		errno = EOVERFLOW;
+	    }
+	}
+	if (errno != 0) {
+	    fprintf(stderr, "Cannot parse pid %s: (%s)\n", arg, strerror(errno));
+	    retval = 1;
+	    continue;
+	}
+	pid = lpid;
 
 	cap_d = cap_get_pid(pid);
 	if (cap_d == NULL) {
-		fprintf(stderr, "Failed to get cap's for proccess %d:"
+		fprintf(stderr, "Failed to get cap's for process %d:"
 			" (%s)\n", pid, strerror(errno));
 		retval = 1;
 		continue;
+	}
+
+	char *result = cap_to_text(cap_d, NULL);
+	if (iab) {
+	    printf("%s:", arg);
+	    if (verbose || strcmp("=", result) != 0) {
+		printf(" \"%s\"", result);
+	    }
+	    cap_iab_t iab_val = cap_iab_get_pid(pid);
+	    if (iab_val == NULL) {
+		fprintf(stderr, " no IAB value for %d\n", pid);
+		exit(1);
+	    }
+	    int cf = cap_iab_compare(noiab, iab_val);
+	    if (verbose ||
+		CAP_IAB_DIFFERS(cf, CAP_IAB_AMB) ||
+		CAP_IAB_DIFFERS(cf, CAP_IAB_BOUND)) {
+		char *iab_text = cap_iab_to_text(iab_val);
+		if (iab_text == NULL) {
+		    perror(" no text for IAB");
+		    exit(1);
+		}
+		printf(" [%s]", iab_text);
+		cap_free(iab_text);
+	    }
+	    cap_free(iab_val);
+	    printf("\n");
+	} else if (verbose == 1) {
+	    printf("Capabilities for '%s': %s\n", arg, result);
+	} else if (verbose == 2) {
+	    fprintf(stderr, "Capabilities for `%s': %s\n", arg, result);
 	} else {
-	    char *result = cap_to_text(cap_d, &length);
-	    fprintf(stderr, "Capabilities for `%s': %s\n", *argv, result);
-	    cap_free(result);
-	    result = NULL;
-	    cap_free(cap_d);
+	    printf("%s: %s\n", arg, result);
 	}
+
+	cap_free(result);
+	result = NULL;
+	cap_free(cap_d);
     }
 
     return retval;
diff --git a/progs/mkcapshdoc.sh b/progs/mkcapshdoc.sh
new file mode 100755
index 0000000..d2ee4bd
--- /dev/null
+++ b/progs/mkcapshdoc.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# This script generates some C code for inclusion in the capsh binary.
+# The Makefile generally only generates the .c code and compares it
+# with the checked in code in the progs directory.
+
+cat<<EOF
+#include <stdio.h>
+
+#include "./capshdoc.h"
+
+/*
+ * A line by line explanation of each named capability value
+ */
+EOF
+
+let x=0
+while [ -f "../doc/values/${x}.txt" ]; do
+    name=$(grep -F ",${x}}" ../libcap/cap_names.list.h|sed -e 's/{"//' -e 's/",/ = /' -e 's/},//')
+    echo "static const char *explanation${x}[] = {  /* ${name} */"
+    sed -e 's/"/\\"/g' -e 's/^/    "/' -e 's/$/",/' "../doc/values/${x}.txt"
+    let x=1+${x}
+    echo "    NULL"
+    echo "};"
+done
+
+cat<<EOF
+const char **explanations[] = {
+EOF
+let y=0
+while [ "${y}" -lt "${x}" ]; do
+    echo "    explanation${y},"
+    let y=1+${y}
+done
+cat<<EOF
+};
+
+const int capsh_doc_limit = ${x};
+EOF
diff --git a/progs/quicktest.sh b/progs/quicktest.sh
index ca6bf1e..59e16b0 100755
--- a/progs/quicktest.sh
+++ b/progs/quicktest.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Run through a series of tests to try out the various capability
-# manipulations posible through exec.
+# manipulations possible through exec.
 #
 # [Run this as root in a root-enabled process tree.]
 
@@ -43,10 +43,17 @@ pass_capsh () {
 }
 
 pass_capsh --print
+pass_capsh --current
 
+# Validate that PATH expansion works
+PATH=$(/bin/pwd)/junk:$(/bin/pwd) capsh == == == --modes
+if [ $? -ne 0 ]; then
+    echo "Failed to execute capsh consecutively for capability manipulation"
+    exit 1
+fi
 
 # Make a local non-setuid-0 version of capsh and call it privileged
-cp ./capsh ./privileged && chmod -s ./privileged
+cp ./tcapsh-static ./privileged && /bin/chmod -s ./privileged
 if [ $? -ne 0 ]; then
     echo "Failed to copy capsh for capability manipulation"
     exit 1
@@ -64,21 +71,37 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
+# validate libcap modes:
+pass_capsh --inh=cap_chown --mode=PURE1E --print --inmode=PURE1E
+pass_capsh --mode=NOPRIV --print --inmode=NOPRIV
+pass_capsh --mode=PURE1E --print --mode=NOPRIV --inmode=NOPRIV
+fail_capsh --mode=NOPRIV --print --mode=PURE1E
+fail_capsh --user=nobody --mode=NOPRIV --print -- ./privileged
+
+# simple IAB setting (no ambient) in pure1e mode.
+pass_capsh --mode=PURE1E --iab='!%cap_chown,cap_setuid'
+
 # Explore keep_caps support
 pass_capsh --keep=0 --keep=1 --keep=0 --keep=1 --print
 
-rm -f tcapsh
-cp capsh tcapsh
-chown root.root tcapsh
-chmod u+s tcapsh
-ls -l tcapsh
+/bin/rm -f tcapsh
+/bin/cp tcapsh-static tcapsh
+/bin/chown root.root tcapsh
+/bin/chmod u+s tcapsh
+/bin/ls -l tcapsh
 
-# leverage keep caps maintain capabilities accross a change of uid
+# leverage keep caps to maintain capabilities across a change of euid
 # from setuid root to capable luser (as per wireshark/dumpcap 0.99.7)
-pass_capsh --uid=500 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --uid=500 --caps=\"cap_net_raw,cap_net_admin=pie\" --print"
+# This test is subtle. It is testing that a change to self, dropping
+# euid=0 back to that of the luser keeps capabilities.
+pass_capsh --uid=1 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_bind_service=ip\" --print --uid=1 --print --caps=\"cap_net_raw,cap_net_bind_service=pie\" --print"
+
+# this test is a change of user to a new user, note we need to raise
+# the cap_setuid capability (libcap has a function for that) in this case.
+pass_capsh --uid=1 -- -c "./tcapsh --caps=\"cap_net_raw,cap_net_bind_service=ip cap_setuid=p\" --print --cap-uid=2 --print --caps=\"cap_net_raw,cap_net_bind_service=pie\" --print"
 
 # This fails, on 2.6.24, but shouldn't
-pass_capsh --uid=500 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_admin=ip\" --uid=500 --forkfor=10 --caps= --print --killit=9 --print"
+pass_capsh --uid=1 -- -c "./tcapsh --keep=1 --caps=\"cap_net_raw,cap_net_bind_service=ip\" --uid=1 --forkfor=10 --caps= --print --killit=9 --print"
 
 # only continue with these if --secbits is supported
 ./capsh --secbits=0x2f > /dev/null 2>&1
@@ -89,21 +112,40 @@ if [ $? -ne 0 ]; then
     exit 0
 fi
 
+# nobody's uid. Static compilation of the capsh binary can disable pwd
+# info discovery.
+nouid=$(/usr/bin/id nobody -u)
+
 pass_capsh --secbits=42 --print
 fail_capsh --secbits=32 --keep=1 --keep=0 --print
 pass_capsh --secbits=10 --keep=0 --keep=1 --print
-fail_capsh --secbits=47 -- -c "./tcapsh --user=nobody"
+fail_capsh --secbits=47 -- -c "./tcapsh --uid=$nouid"
 
-rm -f tcapsh
+/bin/rm -f tcapsh
 
 # Suppress uid=0 privilege
-fail_capsh --secbits=47 --print -- -c "./capsh --user=nobody"
+fail_capsh --secbits=47 --print -- -c "./capsh --uid=$nouid"
 
 # suppress uid=0 privilege and test this privileged
-pass_capsh --secbits=0x2f --print -- -c "./privileged --user=nobody"
+pass_capsh --secbits=0x2f --print -- -c "./privileged --uid=$nouid"
 
 # observe that the bounding set can be used to suppress this forced capability
-fail_capsh --drop=cap_setuid --secbits=0x2f --print -- -c "./privileged --user=nobody"
+fail_capsh --drop=cap_setuid --secbits=0x2f --print -- \
+	   -c "./privileged --uid=$nouid"
+
+# observe that effective cap_setpcap is required to drop bset
+fail_capsh --caps="=ep cap_setpcap-ep" --drop=cap_setuid --current
+pass_capsh --strict --caps="cap_setpcap=ep" --drop=cap_setuid --current
+fail_capsh --strict --caps="cap_setpcap=p" --drop=cap_setuid --current
+fail_capsh --strict --caps="=ep cap_setpcap-e" --drop=cap_setuid --current
+
+# observe that effective cap_setpcap is required to raise non-p bits
+fail_capsh --strict --caps="cap_setpcap=p" --inh=cap_chown --current
+# non-strict mode and capsh figures it out
+pass_capsh --caps="cap_setpcap=p" --inh=cap_chown --current
+
+# permitted bits can be raised in inheritable flag without being effective.
+pass_capsh --strict --caps="cap_chown=p" --inh=cap_chown --current
 
 # change the way the capability is obtained (make it inheritable)
 ./setcap cap_setuid,cap_setgid=ei ./privileged
@@ -111,15 +153,14 @@ fail_capsh --drop=cap_setuid --secbits=0x2f --print -- -c "./privileged --user=n
 # Note, the bounding set (edited with --drop) only limits p
 # capabilities, not i's.
 pass_capsh --secbits=47 --inh=cap_setuid,cap_setgid --drop=cap_setuid \
-    --uid=500 --print -- -c "./privileged --user=nobody"
-
-rm -f ./privileged
+    --uid=1 --print -- -c "./privileged --uid=$nouid"
 
 # test that we do not support capabilities on setuid shell-scripts
-cat > hack.sh <<EOF
+/bin/cat > hack.sh <<EOF
 #!/bin/bash
+/usr/bin/id
 mypid=\$\$
-caps=\$(./getpcaps \$mypid 2>&1 | cut -d: -f2)
+caps=\$(./getpcaps \$mypid 2>&1 | /usr/bin/cut -d: -f2)
 if [ "\$caps" != " =" ]; then
   echo "Shell script got [\$caps] - you should upgrade your kernel"
   exit 1
@@ -129,20 +170,132 @@ else
 fi
 exit 0
 EOF
-chmod +xs hack.sh
-./capsh --uid=500 --inh=none --print -- ./hack.sh
+/bin/chmod +xs hack.sh
+./capsh --uid=1 --inh=none --print -- ./hack.sh
 status=$?
-rm -f ./hack.sh
+/bin/rm -f ./hack.sh
 if [ $status -ne 0 ]; then
     echo "shell scripts can have capabilities (bug)"
     exit 1
 fi
 
-# Max lockdown
-pass_capsh --keep=1 --user=nobody --caps=cap_setpcap=ep \
-    --drop=all --secbits=0x2f --caps= --print
+# Max lockdown (ie., pure capability model as POSIX.1e intended).
+secbits=0x2f
+if ./capsh --has-ambient ; then
+    secbits="0xef --noamb"
+fi
+pass_capsh --keep=1 --uid=$nouid --caps=cap_setpcap=ep \
+	   --drop=all --secbits=$secbits --caps= --print
 
 # Verify we can chroot
 pass_capsh --chroot=$(/bin/pwd)
-pass_capsh --chroot=$(/bin/pwd) ==
+pass_capsh -- -c "./tcapsh-static --chroot=$(/bin/pwd) =="
 fail_capsh --chroot=$(/bin/pwd) -- -c "echo oops"
+
+./capsh --has-ambient
+if [ $? -eq 0 ]; then
+    echo "test ambient capabilities"
+
+    # Ambient capabilities (any file can inherit capabilities)
+    pass_capsh --noamb
+
+    # test that shell scripts can inherit through ambient capabilities
+    /bin/cat > hack.sh <<EOF
+#!/bin/bash
+/usr/bin/id
+mypid=\$\$
+caps=\$(./getpcaps \$mypid 2>&1 | /usr/bin/cut -d: -f2)
+if [ "\$caps" != " = cap_setuid+i" ]; then
+  echo "Shell script got [\$caps]"
+  exit 0
+fi
+ls -l \$0
+echo "no capabilities [\$caps] for this shell script"
+exit 1
+EOF
+    /bin/chmod +x hack.sh
+    pass_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- \
+	       ./hack.sh
+
+    /bin/rm -f hack.sh
+
+    # Next force the privileged binary to have an empty capability set.
+    # This is sort of the opposite of privileged - it should ensure that
+    # the file can never acquire privilege by the ambient method.
+    ./setcap = ./privileged
+    fail_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- \
+	       -c "./privileged --print --uid=1"
+
+    pass_capsh --keep=1 --uid=$nouid --strict \
+	       --caps="cap_setuid=p cap_setpcap=ep" \
+	       --inh=cap_setuid --addamb=cap_setuid --current
+
+    # No effective capabilities are needed to raise or lower ambient values.
+    pass_capsh --keep=1 --uid=$nouid --strict --caps="cap_setuid=p" \
+	       --inh=cap_setuid --addamb=cap_setuid --current
+    pass_capsh --keep=1 --uid=$nouid --strict --iab="!^cap_setuid" \
+	       --caps="cap_setuid=pi" --current --delamb=cap_setuid --current
+
+
+    # finally remove the capability from the privileged binary and try again.
+    ./setcap -r ./privileged
+    pass_capsh --keep=1 --uid=$nouid --inh=cap_setuid --addamb=cap_setuid -- \
+	       -c "./privileged --print --uid=1"
+
+    # validate IAB setting with an ambient capability
+    pass_capsh --iab='!%cap_chown,^cap_setpcap,cap_setuid'
+    fail_capsh --mode=PURE1E --iab='!%cap_chown,^cap_setuid'
+fi
+/bin/rm -f ./privileged
+
+echo "testing namespaced file caps"
+
+# nsprivileged capsh will have an ns rootid value (this is
+# the same setup as an earlier test but with a ns file cap).
+rm -f nsprivileged
+cp ./tcapsh-static ./nsprivileged && /bin/chmod -s ./nsprivileged
+./setcap -n 1 all=ep ./nsprivileged
+if [ $? -eq 0 ]; then
+    ./getcap -n ./nsprivileged | grep -F "[rootid=1]"
+    if [ $? -ne 0 ]; then
+	echo "FAILED setting ns rootid on file"
+	exit 1
+    fi
+    # since this is a ns file cap and not a regular one, it should not
+    # lead to a privilege escalation outside of the namespace it
+    # refers to. We suppress uid=0 privilege and confirm this
+    # nsprivileged binary does not have the power to change uid.
+    fail_capsh --secbits=$secbits --print -- -c "./nsprivileged --uid=$nouid"
+else
+    echo "ns file caps not supported - skipping test"
+fi
+rm -f nsprivileged
+
+# If the build tree compiled the Go cap package.
+if [ -f ../go/compare-cap ]; then
+    cp ../go/compare-cap .
+    LD_LIBRARY_PATH=../libcap ./compare-cap
+    if [ $? -ne 0 ]; then
+	echo "FAILED to execute go binary"
+	exit 1
+    fi
+    LD_LIBRARY_PATH=../libcap ./compare-cap 2>&1 | \
+	grep "skipping file cap tests"
+    if [ $? -eq 0 ]; then
+	echo "FAILED not engaging file cap tests"
+	exit 1
+    fi
+    echo "PASSED"
+else
+    echo "no Go support compiled, so skipping Go tests"
+fi
+rm -f compare-cap
+
+echo "attempt to exploit kernel bug"
+./uns_test
+if [ $? -ne 0 ]; then
+    echo "upgrade your kernel"
+    exit 1
+fi
+
+echo "ALL TESTS PASSED!"
diff --git a/progs/setcap.c b/progs/setcap.c
index 83090ae..737efcc 100644
--- a/progs/setcap.c
+++ b/progs/setcap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997,2007-8 Andrew G. Morgan  <morgan@kernel.org>
+ * Copyright (c) 1997,2007-8,2020,21 Andrew G. Morgan <morgan@kernel.org>
  *
  * This sets/verifies the capabilities of a given file.
  */
@@ -11,18 +11,57 @@
 #include <sys/capability.h>
 #include <unistd.h>
 
-static void usage(void)
+static void usage(int status)
 {
     fprintf(stderr,
-	    "usage: setcap [-q] [-v] (-r|-|<caps>) <filename> "
+	    "usage: setcap [-h] [-q] [-v] [-n <rootid>] (-r|-|<caps>) <filename> "
 	    "[ ... (-r|-|<capsN>) <filenameN> ]\n"
 	    "\n"
 	    " Note <filename> must be a regular (non-symlink) file.\n"
+	    " -r          remove capability from file\n"
+	    " -           read capability text from stdin\n"
+	    " <capsN>     cap_from_text(3) formatted file capability\n"
+	    " [ Note: capsh --suggest=\"something...\" might help you pick. ]"
+	    "\n"
+	    " -h          this message and exit status 0\n"
+	    " -q          quietly\n"
+	    " -v          validate supplied capability matches file\n"
+	    " -n <rootid> write a user namespace (!= 0) limited capability\n"
+	    " --license   display the license info\n"
 	);
-    exit(1);
+    exit(status);
+}
+
+/* parse a positive integer with some error handling */
+static unsigned long pos_uint(const char *text, const char *prefix, int *ok)
+{
+    char *remains;
+    unsigned long value;
+    ssize_t len = strlen(text);
+
+    if (len == 0 || *text == '-') {
+	goto fail;
+    }
+    value = strtoul(text, &remains, 0);
+    if (*remains || value == 0) {
+	goto fail;
+    }
+    if (ok != NULL) {
+	*ok = 1;
+    }
+    return value;
+
+fail:
+    if (ok == NULL) {
+	fprintf(stderr, "%s: want positive integer, got \"%s\"\n",
+		prefix, text);
+	exit(1);
+    }
+    *ok = 0;
+    return 0;
 }
 
-#define MAXCAP  2048
+#define MAXCAP 2048
 
 static int read_caps(int quiet, const char *filename, char *buffer)
 {
@@ -60,12 +99,13 @@ int main(int argc, char **argv)
 {
     int tried_to_cap_setfcap = 0;
     char buffer[MAXCAP+1];
-    int retval, quiet=0, verify=0;
+    int retval, quiet = 0, verify = 0;
     cap_t mycaps;
     cap_value_t capflag;
+    uid_t rootid = 0, f_rootid;
 
-    if (argc < 3) {
-	usage();
+    if (argc < 2) {
+	usage(1);
     }
 
     mycaps = cap_get_proc();
@@ -74,26 +114,50 @@ int main(int argc, char **argv)
 		" (old libcap?)\n");
     }
 
+    cap_t cap_d = NULL;
     while (--argc > 0) {
 	const char *text;
-	cap_t cap_d;
+
+	cap_free(cap_d);
+	cap_d = NULL;
 
 	if (!strcmp(*++argv, "-q")) {
 	    quiet = 1;
 	    continue;
 	}
+	if (!strcmp("--license", *argv)) {
+	    printf(
+		"%s see LICENSE file for details.\n"
+		"Copyright (c) 1997,2007-8,2020-21 Andrew G. Morgan"
+		" <morgan@kernel.org>\n", argv[0]);
+	    exit(0);
+	}
+	if (!strcmp(*argv, "-h")) {
+	    usage(0);
+	}
 	if (!strcmp(*argv, "-v")) {
 	    verify = 1;
 	    continue;
 	}
+	if (!strcmp(*argv, "-n")) {
+	    if (argc < 2) {
+		fprintf(stderr,
+			"usage: .. -n <rootid> .. - rootid!=0 file caps");
+		exit(1);
+	    }
+	    --argc;
+	    rootid = (uid_t) pos_uint(*++argv, "bad ns rootid", NULL);
+	    continue;
+	}
 
 	if (!strcmp(*argv, "-r")) {
+	    cap_free(cap_d);
 	    cap_d = NULL;
 	} else {
 	    if (!strcmp(*argv,"-")) {
 		retval = read_caps(quiet, *argv, buffer);
 		if (retval)
-		    usage();
+		    usage(1);
 		text = buffer;
 	    } else {
 		text = *argv;
@@ -102,21 +166,23 @@ int main(int argc, char **argv)
 	    cap_d = cap_from_text(text);
 	    if (cap_d == NULL) {
 		perror("fatal error");
-		usage();
+		usage(1);
+	    }
+	    if (cap_set_nsowner(cap_d, rootid)) {
+		perror("unable to set nsowner");
+		exit(1);
 	    }
 #ifdef DEBUG
 	    {
-		ssize_t length;
-		const char *result;
-
-		result = cap_to_text(cap_d, &length);
+		char *result = cap_to_text(cap_d, NULL);
 		fprintf(stderr, "caps set to: [%s]\n", result);
+		cap_free(result);
 	    }
 #endif
 	}
 
 	if (--argc <= 0)
-	    usage();
+	    usage(1);
 	/*
 	 * Set the filesystem capability for this file.
 	 */
@@ -125,20 +191,31 @@ int main(int argc, char **argv)
 	    int cmp;
 
 	    if (cap_d == NULL) {
-		cap_d = cap_from_text("=");
+		cap_d = cap_init();
+		if (cap_d == NULL) {
+		    perror("unable to obtain empty capability");
+		    exit(1);
+		}
 	    }
 
 	    cap_on_file = cap_get_file(*++argv);
-
 	    if (cap_on_file == NULL) {
-		cap_on_file = cap_from_text("=");
+		cap_on_file = cap_init();
+		if (cap_on_file == NULL) {
+		    perror("unable to use missing capability");
+		    exit(1);
+		}
 	    }
 
 	    cmp = cap_compare(cap_on_file, cap_d);
+	    f_rootid = cap_get_nsowner(cap_on_file);
 	    cap_free(cap_on_file);
 
-	    if (cmp != 0) {
+	    if (cmp != 0 || rootid != f_rootid) {
 		if (!quiet) {
+		    if (rootid != f_rootid) {
+			printf("nsowner[got=%d, want=%d],", f_rootid, rootid);
+		    }
 		    printf("%s differs in [%s%s%s]\n", *argv,
 			   CAP_DIFFERS(cmp, CAP_PERMITTED) ? "p" : "",
 			   CAP_DIFFERS(cmp, CAP_INHERITABLE) ? "i" : "",
@@ -171,6 +248,8 @@ int main(int argc, char **argv)
 	    retval = cap_set_file(*++argv, cap_d);
 	    if (retval != 0) {
 		int explained = 0;
+		int oerrno = errno;
+		int somebits = 0;
 #ifdef linux
 		cap_value_t cap;
 		cap_flag_value_t per_state;
@@ -178,30 +257,48 @@ int main(int argc, char **argv)
 		for (cap = 0;
 		     cap_get_flag(cap_d, cap, CAP_PERMITTED, &per_state) != -1;
 		     cap++) {
-		    cap_flag_value_t inh_state, eff_state;
+		    cap_flag_value_t inh_state, eff_state, combined;
 
 		    cap_get_flag(cap_d, cap, CAP_INHERITABLE, &inh_state);
 		    cap_get_flag(cap_d, cap, CAP_EFFECTIVE, &eff_state);
-		    if ((inh_state | per_state) != eff_state) {
-			fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n"
-				"      exactly match the union of selected permitted and inheritable bits.\n");
+		    combined = (inh_state | per_state);
+		    somebits |= !!eff_state;
+		    if (combined != eff_state) {
 			explained = 1;
 			break;
 		    }
 		}
+		if (somebits && explained) {
+		    fprintf(stderr, "NOTE: Under Linux, effective file capabilities must either be empty, or\n"
+			    "      exactly match the union of selected permitted and inheritable bits.\n");
+		}
 #endif /* def linux */
-		
-		fprintf(stderr,
-			"Failed to set capabilities on file `%s' (%s)\n",
-			argv[0], strerror(errno));
-		if (!explained) {
-		    usage();
+
+		switch (oerrno) {
+		case EINVAL:
+		    fprintf(stderr,
+			    "Invalid file '%s' for capability operation\n",
+			    argv[0]);
+		    exit(1);
+		case ENODATA:
+		    if (cap_d == NULL) {
+			fprintf(stderr,
+				"File '%s' has no capablity to remove\n",
+				argv[0]);
+			exit(1);
+		    }
+		    /* FALLTHROUGH */
+		default:
+		    fprintf(stderr,
+			    "Failed to set capabilities on file '%s': %s\n",
+			    argv[0], strerror(oerrno));
+		    exit(1);
 		}
 	    }
 	}
-	if (cap_d) {
-	    cap_free(cap_d);
-	}
+    }
+    if (cap_d) {
+	cap_free(cap_d);
     }
 
     exit(0);
diff --git a/psx/License b/psx/License
new file mode 100644
index 0000000..39108c2
--- /dev/null
+++ b/psx/License
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only */
+
+Unless otherwise *explicitly* stated, the following text describes the
+licensed conditions under which the contents of this libcap/psx release
+may be used and distributed.
+
+The licensed conditions are one or the other of these two Licenses:
+
+  - BSD 3-clause
+  - GPL v2.0
+
+-------------------------------------------------------------------------
+BSD 3-clause:
+-------------
+
+Redistribution and use in source and binary forms of libcap/psx, with
+or without modification, are permitted provided that the following
+conditions are met:
+
+1. Redistributions of source code must retain any existing copyright
+   notice, and this entire permission notice in its entirety,
+   including the disclaimer of warranties.
+
+2. Redistributions in binary form must reproduce all prior and current
+   copyright notices, this list of conditions, and the following
+   disclaimer in the documentation and/or other materials provided
+   with the distribution.
+
+3. The name of any author may not be used to endorse or promote
+   products derived from this software without their specific prior
+   written permission.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+-------------------------------------------------------------------------
+GPL v2.0:
+---------
+
+ALTERNATIVELY, this product may be distributed under the terms of the
+GNU General Public License (v2.0 - see below), in which case the
+provisions of the GNU GPL are required INSTEAD OF the above
+restrictions.  (This clause is necessary due to a potential conflict
+between the GNU GPL and the restrictions contained in a BSD-style
+copyright.)
+
+-------------------------
+Full text of gpl-2.0.txt:
+-------------------------
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/psx/README b/psx/README
new file mode 100644
index 0000000..e4f9001
--- /dev/null
+++ b/psx/README
@@ -0,0 +1,28 @@
+Package "psx" provides an API for invoking system calls in a way that
+each system call is mirrored on all OS threads of the combined Go/CGo
+runtime. Since the Go runtime treats OS threads as interchangeable, a
+feature like this is needed to meaningfully change process privilege
+(including dropping privilege) in a Go program running on Linux. This
+package is required by:
+
+   "kernel.org/pub/linux/libs/security/libcap/cap"
+
+When compiled CGO_ENABLED=0, the functionality requires go1.16+ to
+build. That release of Go introduced syscall.AllThreadsSyscall*()
+APIs.  When compiled this way, the "psx" package functions
+psx.Syscall3() and psx.Syscall6() are aliased to
+syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6()
+respectively.
+
+When compiled CGO_ENABLED=1, the functionality is implemented by C
+code, [lib]psx, which is distributed with libcap.
+
+The official release announcement site for libcap and libpsx is:
+
+   https://sites.google.com/site/fullycapable/
+
+Like libcap/libpsx itself, the "psx" package is distributed with a
+"you choose" License. Specifically: BSD three clause, or GPL2. See the
+License file.
+
+Andrew G. Morgan <morgan@kernel.org>
diff --git a/psx/doc.go b/psx/doc.go
new file mode 100644
index 0000000..c4ba829
--- /dev/null
+++ b/psx/doc.go
@@ -0,0 +1,60 @@
+// Package psx provides support for system calls that are run
+// simultaneously on all threads under Linux.
+//
+// This property can be used to work around a historical lack of
+// native Go support for such a feature. Something that is the subject
+// of:
+//
+//   https://github.com/golang/go/issues/1435
+//
+// The package works differently depending on whether or not
+// CGO_ENABLED is 0 or 1.
+//
+// In the former case, psx is a low overhead wrapper for the two
+// native go calls: syscall.AllThreadsSyscall() and
+// syscall.AllThreadsSyscall6() introduced in go1.16. We provide this
+// wrapping to minimize client source code changes when compiling with
+// or without CGo enabled.
+//
+// In the latter case, and toolchains prior to go1.16, it works via
+// CGo wrappers for system call functions that call the C [lib]psx
+// functions of these names. This ensures that the system calls
+// execute simultaneously on all the pthreads of the Go (and CGo)
+// combined runtime.
+//
+// With CGo, the psx support works in the following way: the pthread
+// that is first asked to execute the syscall does so, and determines
+// if it succeeds or fails. If it fails, it returns immediately
+// without attempting the syscall on other pthreads. If the initial
+// attempt succeeds, however, then the runtime is stopped in order for
+// the same system call to be performed on all the remaining pthreads
+// of the runtime. Once all pthreads have completed the syscall, the
+// return codes are those obtained by the first pthread's invocation
+// of the syscall.
+//
+// Note, there is no need to use this variant of syscall where the
+// syscalls only read state from the kernel. However, since Go's
+// runtime freely migrates code execution between pthreads, support of
+// this type is required for any successful attempt to fully drop or
+// modify the privilege of a running Go program under Linux.
+//
+// More info on how Linux privilege works and examples of using this
+// package can be found here:
+//
+//    https://sites.google.com/site/fullycapable
+//
+// WARNING: For older go toolchains (prior to go1.15), correct
+// compilation of this package may require an extra workaround step:
+//
+// The workaround is to build with the following CGO_LDFLAGS_ALLOW in
+// effect (here the syntax is that of bash for defining an environment
+// variable):
+//
+//    export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*"
+//
+//
+// Copyright (c) 2019,20 Andrew G. Morgan <morgan@kernel.org>
+//
+// The psx package is licensed with a (you choose) BSD 3-clause or
+// GPL2. See LICENSE file for details.
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
diff --git a/psx/go.mod b/psx/go.mod
new file mode 100644
index 0000000..c5f34b6
--- /dev/null
+++ b/psx/go.mod
@@ -0,0 +1,3 @@
+module kernel.org/pub/linux/libs/security/libcap/psx
+
+go 1.11
diff --git a/psx/psx.c b/psx/psx.c
new file mode 100644
index 0000000..65eb2aa
--- /dev/null
+++ b/psx/psx.c
@@ -0,0 +1,765 @@
+/*
+ * Copyright (c) 2019-21 Andrew G Morgan <morgan@kernel.org>
+ *
+ * This file contains a collection of routines that perform thread
+ * synchronization to ensure that a whole process is running as a
+ * single privilege entity - independent of the number of pthreads.
+ *
+ * The whole file would be unnecessary if glibc exported an explicit
+ * psx_syscall()-like function that leveraged the nptl:setxid
+ * mechanism to synchronize thread state over the whole process.
+ */
+#undef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "psx_syscall.h"
+
+#ifdef _PSX_DEBUG_MEMORY
+
+static void *_psx_calloc(const char *file, const int line,
+			 size_t nmemb, size_t size) {
+    void *ptr = calloc(nmemb, size);
+    fprintf(stderr, "psx:%d:%s:%d: calloc(%ld, %ld) -> %p\n", gettid(),
+	    file, line, (long int)nmemb, (long int)size, ptr);
+    return ptr;
+}
+
+static void _psx_free(const char *file, const int line, void *ptr) {
+    fprintf(stderr, "psx:%d:%s:%d: free(%p)\n", gettid(), file, line, ptr);
+    return free(ptr);
+}
+
+#define calloc(a, b)  _psx_calloc(__FILE__, __LINE__, a, b)
+#define free(a)       _psx_free(__FILE__, __LINE__, a)
+
+#endif /* def _PSX_DEBUG_MEMORY */
+
+/*
+ * psx_load_syscalls() can be weakly defined in dependent libraries to
+ * provide a mechanism for a library to optionally leverage this psx
+ * mechanism. Specifically, when libcap calls psx_load_sycalls() it
+ * provides a weakly declared default that maps its system calls to
+ * the regular system call functions. However, when linked with psx,
+ * this function here overrides the syscalls to be the psx ones.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+					      long int, long int, long int),
+		       long int (**syscall6_fn)(long int,
+					       long int, long int, long int,
+					       long int, long int, long int))
+{
+    *syscall_fn = psx_syscall3;
+    *syscall6_fn = psx_syscall6;
+}
+
+/*
+ * type to keep track of registered threads.
+ */
+typedef struct registered_thread_s {
+    struct registered_thread_s *next, *prev;
+    pthread_t thread;
+    pthread_mutex_t mu;
+    int pending;
+    int gone;
+    long int retval;
+    pid_t tid;
+} registered_thread_t;
+
+static pthread_once_t psx_tracker_initialized = PTHREAD_ONCE_INIT;
+
+typedef enum {
+    _PSX_IDLE = 0,
+    _PSX_SETUP = 1,
+    _PSX_SYSCALL = 2,
+    _PSX_CREATE = 3,
+    _PSX_INFORK = 4,
+    _PSX_EXITING = 5,
+} psx_tracker_state_t;
+
+/*
+ * This global structure holds the global coordination state for
+ * libcap's psx_posix_syscall() support.
+ */
+static struct psx_tracker_s {
+    int has_forked;
+
+    pthread_mutex_t state_mu;
+    pthread_cond_t cond; /* this is only used to wait on 'state' changes */
+    psx_tracker_state_t state;
+    int initialized;
+    int psx_sig;
+    psx_sensitivity_t sensitivity;
+
+    struct {
+	long syscall_nr;
+	long arg1, arg2, arg3, arg4, arg5, arg6;
+	int six;
+	int active;
+    } cmd;
+
+    struct sigaction sig_action;
+    struct sigaction chained_action;
+    registered_thread_t *root;
+} psx_tracker;
+
+/*
+ * psx_action_key is used for thread local storage of the thread's
+ * registration.
+ */
+pthread_key_t psx_action_key;
+
+/*
+ * psx_do_registration called locked and creates a tracker entry for
+ * the current thread with a TLS specific key pointing at the threads
+ * specific tracker.
+ */
+static void *psx_do_registration(void) {
+    registered_thread_t *node = calloc(1, sizeof(registered_thread_t));
+    if (node == NULL) {
+	perror("unable to register psx handler");
+	_exit(1);
+    }
+    pthread_mutex_init(&node->mu, NULL);
+    node->thread = pthread_self();
+    pthread_setspecific(psx_action_key, node);
+    node->next = psx_tracker.root;
+    if (node->next) {
+	node->next->prev = node;
+    }
+    psx_tracker.root = node;
+    return node;
+}
+
+/*
+ * psx_posix_syscall_actor performs the system call on the targeted
+ * thread and signals it is no longer pending.
+ */
+static void psx_posix_syscall_actor(int signum, siginfo_t *info, void *ignore) {
+    /* bail early if this isn't something we recognize */
+    if (signum != psx_tracker.psx_sig || !psx_tracker.cmd.active ||
+	info == NULL || info->si_code != SI_TKILL || info->si_pid != getpid()) {
+	if (psx_tracker.chained_action.sa_sigaction != 0) {
+	    psx_tracker.chained_action.sa_sigaction(signum, info, ignore);
+	}
+	return;
+    }
+
+    long int retval;
+    if (!psx_tracker.cmd.six) {
+	retval = syscall(psx_tracker.cmd.syscall_nr,
+			 psx_tracker.cmd.arg1,
+			 psx_tracker.cmd.arg2,
+			 psx_tracker.cmd.arg3);
+    } else {
+	retval = syscall(psx_tracker.cmd.syscall_nr,
+			 psx_tracker.cmd.arg1,
+			 psx_tracker.cmd.arg2,
+			 psx_tracker.cmd.arg3,
+			 psx_tracker.cmd.arg4,
+			 psx_tracker.cmd.arg5,
+			 psx_tracker.cmd.arg6);
+    }
+
+    /*
+     * This handler can only be called on registered threads which
+     * have had this specific defined at start-up. (But see the
+     * subsequent test.)
+     */
+    registered_thread_t *ref = pthread_getspecific(psx_action_key);
+    if (ref) {
+	pthread_mutex_lock(&ref->mu);
+	ref->pending = 0;
+	ref->retval = retval;
+	ref->tid = syscall(SYS_gettid);
+	pthread_mutex_unlock(&ref->mu);
+    } /*
+       * else thread must be dying and its psx_action_key has already
+       * been cleaned up.
+       */
+}
+
+/*
+ * Some forward declarations for the initialization
+ * psx_syscall_start() routine.
+ */
+static void _psx_cleanup(void);
+static void _psx_prepare_fork(void);
+static void _psx_fork_completed(void);
+static void _psx_forked_child(void);
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+			  void *(*start_routine) (void *), void *arg);
+
+/*
+ * psx requires this function to be provided by the linkage wrapping.
+ */
+extern int __real_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+				 void *(*start_routine) (void *), void *arg);
+
+/*
+ * psx_confirm_sigaction reconfirms that the psx handler is the first
+ * handler to respond to the psx signal. It assumes that
+ * psx_tracker.psx_sig has been set.
+ */
+static void psx_confirm_sigaction(void) {
+    sigset_t mask, orig;
+    struct sigaction existing_sa;
+
+    /*
+     * Block interrupts while potentially rewriting the handler.
+     */
+    sigemptyset(&mask);
+    sigaddset(&mask, psx_tracker.psx_sig);
+    sigprocmask(SIG_BLOCK, &mask, &orig);
+
+    sigaction(psx_tracker.psx_sig, NULL, &existing_sa);
+    if (existing_sa.sa_sigaction != psx_posix_syscall_actor) {
+	memcpy(&psx_tracker.chained_action, &existing_sa, sizeof(struct sigaction));
+	psx_tracker.sig_action.sa_sigaction = psx_posix_syscall_actor;
+	sigemptyset(&psx_tracker.sig_action.sa_mask);
+	psx_tracker.sig_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
+	sigaction(psx_tracker.psx_sig, &psx_tracker.sig_action, NULL);
+    }
+
+    sigprocmask(SIG_SETMASK, &orig, NULL);
+}
+
+/*
+ * psx_syscall_start initializes the subsystem including initializing
+ * the mutex.
+ */
+static void psx_syscall_start(void) {
+    pthread_mutex_init(&psx_tracker.state_mu, NULL);
+    pthread_cond_init(&psx_tracker.cond, NULL);
+    pthread_key_create(&psx_action_key, NULL);
+    pthread_atfork(_psx_prepare_fork, _psx_fork_completed, _psx_forked_child);
+
+    /*
+     * All sorts of things are assumed by Linux and glibc and/or musl
+     * about signal handlers and which can be blocked. Go has its own
+     * idiosyncrasies too. We tried SIGRTMAX until
+     *
+     *   https://bugzilla.kernel.org/show_bug.cgi?id=210533
+     *
+     * Our current strategy is to aggressively intercept SIGSYS.
+     */
+    psx_tracker.psx_sig = SIGSYS;
+
+    psx_confirm_sigaction();
+    psx_do_registration(); /* register the main thread. */
+    atexit(_psx_cleanup);
+
+    psx_tracker.initialized = 1;
+}
+
+/*
+ * This is the only way this library globally locks. Note, this is not
+ * to be confused with psx_sig (interrupt) blocking - which is
+ * performed around thread creation and when the signal handler is
+ * being confirmed.
+ */
+static void psx_lock(void)
+{
+    pthread_once(&psx_tracker_initialized, psx_syscall_start);
+    pthread_mutex_lock(&psx_tracker.state_mu);
+}
+
+/*
+ * This is the only way this library unlocks.
+ */
+static void psx_unlock(void)
+{
+    pthread_mutex_unlock(&psx_tracker.state_mu);
+}
+
+/*
+ * under lock perform a state transition. Changing state is generally
+ * done via this function. However, there is a single exception in
+ * _psx_cleanup().
+ */
+static void psx_new_state(psx_tracker_state_t was, psx_tracker_state_t is)
+{
+    psx_lock();
+    while (psx_tracker.state != was) {
+	pthread_cond_wait(&psx_tracker.cond, &psx_tracker.state_mu);
+    }
+    psx_tracker.state = is;
+    if (is == _PSX_IDLE) {
+	/* only announce newly idle states since that is all we wait for */
+	pthread_cond_signal(&psx_tracker.cond);
+    }
+    psx_unlock();
+}
+
+long int psx_syscall3(long int syscall_nr,
+		      long int arg1, long int arg2, long int arg3) {
+    return psx_syscall(syscall_nr, arg1, arg2, arg3);
+}
+
+long int psx_syscall6(long int syscall_nr,
+		      long int arg1, long int arg2, long int arg3,
+		      long int arg4, long int arg5, long int arg6) {
+    return psx_syscall(syscall_nr, arg1, arg2, arg3, arg4, arg5, arg6);
+}
+
+static void _psx_prepare_fork(void) {
+    /*
+     * obtain global lock - we don't want any syscalls while the fork
+     * is occurring since it may interfere with the preparation for
+     * the fork.
+     */
+    psx_new_state(_PSX_IDLE, _PSX_INFORK);
+}
+
+static void _psx_fork_completed(void) {
+    /*
+     * The only way we can get here is if state is _PSX_INFORK and was
+     * previously _PSX_IDLE. Now that the fork has completed, the
+     * parent can continue as if it hadn't happened - the forked child
+     * does not tie its security state to that of the parent process
+     * and threads.
+     *
+     * We don't strictly need to change the psx_tracker.state since we
+     * hold the mutex over the fork, but we do to make deadlock
+     * debugging easier.
+     */
+    psx_new_state(_PSX_INFORK, _PSX_IDLE);
+}
+
+static void _psx_forked_child(void) {
+    /*
+     * The only way we can get here is if state is _PSX_INFORK and was
+     * previously _PSX_IDLE. However, none of the registered threads
+     * exist in this newly minted child process, so we have to reset
+     * the tracking structure to avoid any confusion. We also scuttle
+     * any chance of the PSX API working on more than one thread in
+     * the child by leaving the state as _PSX_INFORK. We do support
+     * all psx_syscall()s by reverting to them being direct in the
+     * fork()ed child.
+     *
+     * We do this because the glibc man page for fork() suggests that
+     * only a subset of things will work post fork(). Specifically,
+     * only a "async-signal-safe functions (see signal-safety(7))
+     * until such time as it calls execve(2)" can be relied upon. That
+     * man page suggests that you can't expect mutexes to work: "not
+     * async-signal-safe because it uses pthread_mutex_lock(3)
+     * internally.".
+     */
+    registered_thread_t *next, *old_root;
+    old_root = psx_tracker.root;
+    psx_tracker.root = NULL;
+
+    psx_tracker.has_forked = 1;
+
+    for (; old_root; old_root = next) {
+	next = old_root->next;
+	memset(old_root, 0, sizeof(*old_root));
+	free(old_root);
+    }
+}
+
+/*
+ * called locked to unregister a node from the tracker.
+ */
+static void psx_do_unregister(registered_thread_t *node) {
+    if (psx_tracker.root == node) {
+	psx_tracker.root = node->next;
+    }
+    if (node->next) {
+	node->next->prev = node->prev;
+    }
+    if (node->prev) {
+	node->prev->next = node->next;
+    }
+    pthread_mutex_destroy(&node->mu);
+    memset(node, 0, sizeof(*node));
+    free(node);
+}
+
+typedef struct {
+    void *(*fn)(void *);
+    void *arg;
+    sigset_t sigbits;
+} psx_starter_t;
+
+/*
+ * _psx_exiting is used to cleanup the node for the thread on its exit
+ * path. This is needed for musl libc:
+ *
+ *    https://bugzilla.kernel.org/show_bug.cgi?id=208477
+ *
+ * and likely wise for glibc too:
+ *
+ *    https://sourceware.org/bugzilla/show_bug.cgi?id=12889
+ */
+static void _psx_exiting(void *node) {
+    /*
+     * Until we are in the _PSX_EXITING state, we must not block the
+     * psx_sig interrupt for this dying thread. That is, until this
+     * exiting thread can set ref->gone to 1, this dying thread is
+     * still participating in the psx syscall distribution.
+     *
+     * See https://github.com/golang/go/issues/42494 for a situation
+     * where this code is called with psx_tracker.psx_sig blocked.
+     */
+    sigset_t sigbit, orig_sigbits;
+    sigemptyset(&sigbit);
+    pthread_sigmask(SIG_UNBLOCK, &sigbit, &orig_sigbits);
+    sigaddset(&sigbit, psx_tracker.psx_sig);
+    pthread_sigmask(SIG_UNBLOCK, &sigbit, NULL);
+
+    /*
+     * With psx_tracker.psx_sig unblocked we can wait until this
+     * thread can enter the _PSX_EXITING state.
+     */
+    psx_new_state(_PSX_IDLE, _PSX_EXITING);
+
+    /*
+     * We now indicate that this thread is no longer participating in
+     * the psx mechanism.
+     */
+    registered_thread_t *ref = node;
+    pthread_mutex_lock(&ref->mu);
+    ref->gone = 1;
+    pthread_mutex_unlock(&ref->mu);
+
+    /*
+     * At this point, we can restore the calling sigmask to whatever
+     * the caller thought was appropriate for a dying thread to have.
+     */
+    pthread_sigmask(SIG_SETMASK, &orig_sigbits, NULL);
+
+    /*
+     * Allow the rest of the psx system to carry on as per normal.
+     */
+    psx_new_state(_PSX_EXITING, _PSX_IDLE);
+}
+
+/*
+ * _psx_start_fn is a trampoline for the intended start function, it
+ * is called blocked (_PSX_CREATE), but releases the block before
+ * calling starter->fn. Before releasing the block, the TLS specific
+ * attributes are initialized for use by the interrupt handler under
+ * the psx mutex, so it doesn't race with an interrupt received by
+ * this thread and the interrupt handler does not need to poll for
+ * that specific attribute to be present (which is problematic during
+ * thread shutdown).
+ */
+static void *_psx_start_fn(void *data) {
+    void *node = psx_do_registration();
+
+    psx_new_state(_PSX_CREATE, _PSX_IDLE);
+
+    psx_starter_t *starter = data;
+    pthread_sigmask(SIG_SETMASK, &starter->sigbits, NULL);
+    void *(*fn)(void *) = starter->fn;
+    void *arg = starter->arg;
+
+    memset(data, 0, sizeof(*starter));
+    free(data);
+
+    void *ret;
+
+    pthread_cleanup_push(_psx_exiting, node);
+    ret = fn(arg);
+    pthread_cleanup_pop(1);
+
+    return ret;
+}
+
+/*
+ * __wrap_pthread_create is the wrapped destination of all regular
+ * pthread_create calls.
+ */
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+			  void *(*start_routine) (void *), void *arg) {
+    psx_starter_t *starter = calloc(1, sizeof(psx_starter_t));
+    if (starter == NULL) {
+	perror("failed at thread creation");
+	exit(1);
+    }
+    starter->fn = start_routine;
+    starter->arg = arg;
+    /*
+     * Until we are in the _PSX_IDLE state and locked, we must not
+     * block the psx_sig interrupt for this parent thread. Arrange
+     * that parent thread and newly created one can restore signal
+     * mask.
+     */
+    sigset_t sigbit, orig_sigbits;
+    sigemptyset(&sigbit);
+    pthread_sigmask(SIG_UNBLOCK, &sigbit, &starter->sigbits);
+    sigaddset(&sigbit, psx_tracker.psx_sig);
+    pthread_sigmask(SIG_UNBLOCK, &sigbit, &orig_sigbits);
+
+    psx_new_state(_PSX_IDLE, _PSX_CREATE);
+
+    /*
+     * until the child thread has been blessed with its own TLS
+     * specific attribute(s) we prevent either the parent thread or
+     * the new one from experiencing a PSX interrupt.
+     */
+    pthread_sigmask(SIG_BLOCK, &sigbit, NULL);
+
+    int ret = __real_pthread_create(thread, attr, _psx_start_fn, starter);
+    if (ret > 0) {
+	psx_new_state(_PSX_CREATE, _PSX_IDLE);
+	memset(starter, 0, sizeof(*starter));
+	free(starter);
+    } /* else unlock happens in _psx_start_fn */
+
+    /* the parent can once again receive psx interrupt signals */
+    pthread_sigmask(SIG_SETMASK, &orig_sigbits, NULL);
+
+    return ret;
+}
+
+/*
+ * __psx_immediate_syscall does one syscall using the current
+ * process.
+ */
+static long int __psx_immediate_syscall(long int syscall_nr,
+					int count, long int *arg) {
+    psx_tracker.cmd.syscall_nr = syscall_nr;
+    psx_tracker.cmd.arg1 = count > 0 ? arg[0] : 0;
+    psx_tracker.cmd.arg2 = count > 1 ? arg[1] : 0;
+    psx_tracker.cmd.arg3 = count > 2 ? arg[2] : 0;
+
+    if (count > 3) {
+	psx_tracker.cmd.six = 1;
+	psx_tracker.cmd.arg4 = arg[3];
+	psx_tracker.cmd.arg5 = count > 4 ? arg[4] : 0;
+	psx_tracker.cmd.arg6 = count > 5 ? arg[5] : 0;
+	return syscall(syscall_nr,
+		      psx_tracker.cmd.arg1,
+		      psx_tracker.cmd.arg2,
+		      psx_tracker.cmd.arg3,
+		      psx_tracker.cmd.arg4,
+		      psx_tracker.cmd.arg5,
+		      psx_tracker.cmd.arg6);
+    }
+
+    psx_tracker.cmd.six = 0;
+    return syscall(syscall_nr, psx_tracker.cmd.arg1,
+		   psx_tracker.cmd.arg2, psx_tracker.cmd.arg3);
+}
+
+/*
+ * __psx_syscall performs the syscall on the current thread and if no
+ * error is detected it ensures that the syscall is also performed on
+ * all (other) registered threads. The return code is the value for
+ * the first invocation. It uses a trick to figure out how many
+ * arguments the user has supplied. The other half of the trick is
+ * provided by the macro psx_syscall() in the <sys/psx_syscall.h>
+ * file. The trick is the 7th optional argument (8th over all) to
+ * __psx_syscall is the count of arguments supplied to psx_syscall.
+ *
+ * User:
+ *                       psx_syscall(nr, a, b);
+ * Expanded by macro to:
+ *                       __psx_syscall(nr, a, b, 6, 5, 4, 3, 2, 1, 0);
+ * The eighth arg is now ------------------------------------^
+ */
+long int __psx_syscall(long int syscall_nr, ...) {
+    long int arg[7];
+    int i;
+
+    va_list aptr;
+    va_start(aptr, syscall_nr);
+    for (i = 0; i < 7; i++) {
+	arg[i] = va_arg(aptr, long int);
+    }
+    va_end(aptr);
+
+    int count = arg[6];
+    if (count < 0 || count > 6) {
+	errno = EINVAL;
+	return -1;
+    }
+
+    if (psx_tracker.has_forked) {
+	return __psx_immediate_syscall(syscall_nr, count, arg);
+    }
+
+    psx_new_state(_PSX_IDLE, _PSX_SETUP);
+    psx_confirm_sigaction();
+
+    long int ret;
+
+    ret = __psx_immediate_syscall(syscall_nr, count, arg);
+    if (ret == -1 || !psx_tracker.initialized) {
+	psx_new_state(_PSX_SETUP, _PSX_IDLE);
+	goto defer;
+    }
+
+    int restore_errno = errno;
+
+    psx_new_state(_PSX_SETUP, _PSX_SYSCALL);
+    psx_tracker.cmd.active = 1;
+
+    pthread_t self = pthread_self();
+    registered_thread_t *next = NULL, *ref;
+
+    psx_lock();
+    for (ref = psx_tracker.root; ref; ref = next) {
+	next = ref->next;
+	if (ref->thread == self) {
+	    continue;
+	}
+	pthread_mutex_lock(&ref->mu);
+	ref->pending = 1;
+	int gone = ref->gone;
+	if (!gone) {
+	    gone = pthread_kill(ref->thread, psx_tracker.psx_sig) != 0;
+	}
+	pthread_mutex_unlock(&ref->mu);
+	if (!gone) {
+	    continue;
+	}
+	/*
+	 * need to remove invalid thread id from linked list
+	 */
+	psx_do_unregister(ref);
+    }
+    psx_unlock();
+
+    int mismatch = 0;
+    for (;;) {
+	int waiting = 0;
+	psx_lock();
+	for (ref = psx_tracker.root; ref; ref = next) {
+	    next = ref->next;
+	    if (ref->thread == self) {
+		continue;
+	    }
+
+	    pthread_mutex_lock(&ref->mu);
+	    int pending = ref->pending;
+	    int gone = ref->gone;
+	    if (!gone) {
+		if (pending) {
+		    gone = (pthread_kill(ref->thread, 0) != 0);
+		} else {
+		    mismatch |= (ref->retval != ret);
+		}
+	    }
+	    pthread_mutex_unlock(&ref->mu);
+	    if (!gone) {
+		waiting += pending;
+		continue;
+	    }
+	    /*
+	     * need to remove invalid thread id from linked list
+	     */
+	    psx_do_unregister(ref);
+	}
+	psx_unlock();
+	if (!waiting) {
+	    break;
+	}
+	sched_yield();
+    }
+
+    psx_tracker.cmd.active = 0;
+    if (mismatch) {
+	psx_lock();
+	switch (psx_tracker.sensitivity) {
+	case PSX_IGNORE:
+	    break;
+	default:
+	    fprintf(stderr, "psx_syscall result differs.\n");
+	    if (psx_tracker.cmd.six) {
+		fprintf(stderr, "trap:%ld a123456=[%ld,%ld,%ld,%ld,%ld,%ld]\n",
+			psx_tracker.cmd.syscall_nr,
+			psx_tracker.cmd.arg1,
+			psx_tracker.cmd.arg2,
+			psx_tracker.cmd.arg3,
+			psx_tracker.cmd.arg4,
+			psx_tracker.cmd.arg5,
+			psx_tracker.cmd.arg6);
+	    } else {
+		fprintf(stderr, "trap:%ld a123=[%ld,%ld,%ld]\n",
+			psx_tracker.cmd.syscall_nr,
+			psx_tracker.cmd.arg1,
+			psx_tracker.cmd.arg2,
+			psx_tracker.cmd.arg3);
+	    }
+	    fprintf(stderr, "results:");
+	    for (ref = psx_tracker.root; ref; ref = next) {
+		next = ref->next;
+		if (ref->thread == self) {
+		    continue;
+		}
+		if (ret != ref->retval) {
+		    fprintf(stderr, " %d={%ld}", ref->tid, ref->retval);
+		}
+	    }
+	    fprintf(stderr, " wanted={%ld}\n", ret);
+	    if (psx_tracker.sensitivity == PSX_WARNING) {
+		break;
+	    }
+	    pthread_kill(self, SIGSYS);
+	}
+	psx_unlock();
+    }
+    errno = restore_errno;
+    psx_new_state(_PSX_SYSCALL, _PSX_IDLE);
+
+defer:
+    return ret;
+}
+
+/*
+ * _psx_cleanup its called when the program exits. It is used to free
+ * any memory used by the thread tracker.
+ */
+static void _psx_cleanup(void) {
+    registered_thread_t *ref, *next;
+
+    /*
+     * We enter the exiting state. Unlike exiting a single thread we
+     * never leave this state since this cleanup is only done at
+     * program exit.
+     */
+    psx_lock();
+    while (psx_tracker.state != _PSX_IDLE && psx_tracker.state != _PSX_INFORK) {
+	pthread_cond_wait(&psx_tracker.cond, &psx_tracker.state_mu);
+    }
+    psx_tracker.state = _PSX_EXITING;
+    psx_unlock();
+
+    for (ref = psx_tracker.root; ref; ref = next) {
+	next = ref->next;
+	psx_do_unregister(ref);
+    }
+}
+
+/*
+ * Change the PSX sensitivity level. If the threads appear to have
+ * diverged in behavior, this can cause the library to notify the
+ * user.
+ */
+int psx_set_sensitivity(psx_sensitivity_t level) {
+    if (level < PSX_IGNORE || level > PSX_ERROR) {
+	errno = EINVAL;
+	return -1;
+    }
+    psx_lock();
+    psx_tracker.sensitivity = level;
+    psx_unlock();
+    return 0;
+}
diff --git a/psx/psx.go b/psx/psx.go
new file mode 100644
index 0000000..130f0cb
--- /dev/null
+++ b/psx/psx.go
@@ -0,0 +1,35 @@
+// +build linux,!cgo
+// +build go1.16
+
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
+
+import "syscall"
+
+// Documentation for these functions are provided in the psx_cgo.go
+// file.
+
+//go:uintptrescapes
+
+// Syscall3 performs a 3 argument syscall.  Syscall3 differs from
+// syscall.[Raw]Syscall() insofar as it is simultaneously executed on
+// every thread of the combined Go and CGo runtimes. It works
+// differently depending on whether CGO_ENABLED is 1 or 0 at compile
+// time.
+//
+// If CGO_ENABLED=1 it uses the libpsx function C.psx_syscall3().
+//
+// If CGO_ENABLED=0 it redirects to the go1.16+
+// syscall.AllThreadsSyscall() function.
+func Syscall3(syscallnr, arg1, arg2, arg3 uintptr) (uintptr, uintptr, syscall.Errno) {
+	return syscall.AllThreadsSyscall(syscallnr, arg1, arg2, arg3)
+}
+
+//go:uintptrescapes
+
+// Syscall6 performs a 6 argument syscall on every thread of the
+// combined Go and CGo runtimes. Other than the number of syscall
+// arguments, its behavior is identical to that of Syscall3() - see
+// above for the full documentation.
+func Syscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6 uintptr) (uintptr, uintptr, syscall.Errno) {
+	return syscall.AllThreadsSyscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6)
+}
diff --git a/psx/psx_cgo.go b/psx/psx_cgo.go
new file mode 100644
index 0000000..1f75137
--- /dev/null
+++ b/psx/psx_cgo.go
@@ -0,0 +1,91 @@
+// +build linux,cgo
+
+package psx // import "kernel.org/pub/linux/libs/security/libcap/psx"
+
+import (
+	"runtime"
+	"sync"
+	"syscall"
+)
+
+// #cgo LDFLAGS: -lpthread -Wl,-wrap,pthread_create
+//
+// #include <errno.h>
+// #include "psx_syscall.h"
+//
+// long __errno_too(long set_errno) {
+//     long v = errno;
+//     if (set_errno >= 0) {
+//       errno = set_errno;
+//     }
+//     return v;
+// }
+import "C"
+
+// setErrno returns the current C.errno value and, if v >= 0, sets the
+// CGo errno for a random pthread to value v. If you want some
+// consistency, this needs to be called from runtime.LockOSThread()
+// code. This function is only defined for testing purposes. The psx.c
+// code should properly handle the case that a non-zero errno is saved
+// and restored independently of what these Syscall[36]() functions
+// observe.
+func setErrno(v int) int {
+	return int(C.__errno_too(C.long(v)))
+}
+
+var makeFatal sync.Once
+
+// forceFatal configures the psx_syscall mechanism to PSX_ERROR.
+func forceFatal() {
+	makeFatal.Do(func() {
+		C.psx_set_sensitivity(C.PSX_ERROR)
+	})
+}
+
+//go:uintptrescapes
+
+// Syscall3 performs a 3 argument syscall. Syscall3 differs from
+// syscall.[Raw]Syscall() insofar as it is simultaneously executed on
+// every thread of the combined Go and CGo runtimes. It works
+// differently depending on whether CGO_ENABLED is 1 or 0 at compile
+// time.
+//
+// If CGO_ENABLED=1 it uses the libpsx function C.psx_syscall3().
+//
+// If CGO_ENABLED=0 it redirects to the go1.16+
+// syscall.AllThreadsSyscall() function.
+func Syscall3(syscallnr, arg1, arg2, arg3 uintptr) (uintptr, uintptr, syscall.Errno) {
+	forceFatal()
+	// We lock to the OSThread here because we may need errno to
+	// be the one for this thread.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	v := C.psx_syscall3(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3))
+	var errno syscall.Errno
+	if v < 0 {
+		errno = syscall.Errno(C.__errno_too(-1))
+	}
+	return uintptr(v), uintptr(v), errno
+}
+
+//go:uintptrescapes
+
+// Syscall6 performs a 6 argument syscall on every thread of the
+// combined Go and CGo runtimes. Other than the number of syscall
+// arguments, its behavior is identical to that of Syscall3() - see
+// above for the full documentation.
+func Syscall6(syscallnr, arg1, arg2, arg3, arg4, arg5, arg6 uintptr) (uintptr, uintptr, syscall.Errno) {
+	forceFatal()
+	// We lock to the OSThread here because we may need errno to
+	// be the one for this thread.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	v := C.psx_syscall6(C.long(syscallnr), C.long(arg1), C.long(arg2), C.long(arg3), C.long(arg4), C.long(arg5), C.long(arg6))
+	var errno syscall.Errno
+	if v < 0 {
+		errno = syscall.Errno(C.__errno_too(-1))
+	}
+	return uintptr(v), uintptr(v), errno
+}
diff --git a/psx/psx_cgo_test.go b/psx/psx_cgo_test.go
new file mode 100644
index 0000000..090a96a
--- /dev/null
+++ b/psx/psx_cgo_test.go
@@ -0,0 +1,40 @@
+// +build cgo
+
+package psx
+
+import (
+	"runtime"
+	"syscall"
+	"testing"
+)
+
+// The man page for errno indicates that it is never set to zero, so
+// validate that it retains its value over a successful Syscall[36]()
+// and is overwritten on a failing syscall.
+func TestErrno(t *testing.T) {
+	// This testing is much easier if we don't have to guess which
+	// thread is running this Go code.
+	runtime.LockOSThread()
+	defer runtime.UnlockOSThread()
+
+	// Start from a known bad state and clean up afterwards.
+	setErrno(int(syscall.EPERM))
+	defer setErrno(0)
+
+	v3, _, errno := Syscall3(syscall.SYS_GETUID, 0, 0, 0)
+	if errno != 0 {
+		t.Fatalf("psx getuid failed: %v", errno)
+	}
+	v6, _, errno := Syscall6(syscall.SYS_GETUID, 0, 0, 0, 0, 0, 0)
+	if errno != 0 {
+		t.Fatalf("psx getuid failed: %v", errno)
+	}
+
+	if v3 != v6 {
+		t.Errorf("psx getuid failed to match v3=%d, v6=%d", v3, v6)
+	}
+
+	if v := setErrno(-1); v != int(syscall.EPERM) {
+		t.Errorf("psx changes prevailing errno got=%v(%d) want=%v", syscall.Errno(v), v, syscall.EPERM)
+	}
+}
diff --git a/psx/psx_syscall.h b/psx/psx_syscall.h
new file mode 100644
index 0000000..7a8c9a1
--- /dev/null
+++ b/psx/psx_syscall.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2019 Andrew G. Morgan <morgan@kernel.org>
+ *
+ * This header, and the -lpsx library, provide a number of things to
+ * support POSIX semantics for syscalls associated with the pthread
+ * library. Linking this code is tricky and is done as follows:
+ *
+ *     ld ... -lpsx -lpthread --wrap=pthread_create
+ * or, gcc ... -lpsx -lpthread -Wl,-wrap,pthread_create
+ *
+ * glibc provides a subset of this functionality natively through the
+ * nptl:setxid mechanism and could implement psx_syscall() directly
+ * using that style of functionality but, as of 2019-11-30, the setxid
+ * mechanism is limited to 9 specific set*() syscalls that do not
+ * support the syscall6 API (needed for prctl functions and the ambient
+ * capabilities set for example).
+ */
+
+#ifndef _SYS_PSX_SYSCALL_H
+#define _SYS_PSX_SYSCALL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pthread.h>
+
+/*
+ * psx_syscall performs the specified syscall on all psx registered
+ * threads. The mechanism by which this occurs is much less efficient
+ * than a standard system call on Linux, so it should only be used
+ * when POSIX semantics are required to change process relevant
+ * security state.
+ *
+ * Glibc has native support for POSIX semantics on setgroups() and the
+ * 8 set*[gu]id() functions. So, there is no need to use psx_syscall()
+ * for these calls. This call exists for all the other system calls
+ * that need to maintain parity on all pthreads of a program.
+ *
+ * Some macrology is used to allow the caller to provide only as many
+ * arguments as needed, thus psx_syscall() cannot be used as a
+ * function pointer. For those situations, we define psx_syscall3()
+ * and psx_syscall6().
+ */
+#define psx_syscall(syscall_nr, ...) \
+    __psx_syscall(syscall_nr, __VA_ARGS__, (long int) 6, (long int) 5, \
+		  (long int) 4, (long int) 3, (long int) 2, \
+		  (long int) 1, (long int) 0)
+long int __psx_syscall(long int syscall_nr, ...);
+long int psx_syscall3(long int syscall_nr,
+		      long int arg1, long int arg2, long int arg3);
+long int psx_syscall6(long int syscall_nr,
+		      long int arg1, long int arg2, long int arg3,
+		      long int arg4, long int arg5, long int arg6);
+
+/*
+ * This function should be used by systems to obtain pointers to the
+ * two syscall functions provided by the PSX library. A linkage trick
+ * is to define this function as weak in a library that can optionally
+ * use libpsx and then, should the caller link -lpsx, that library can
+ * implicitly use these POSIX semantics syscalls. See libcap for an
+ * example of this usage.
+ */
+void psx_load_syscalls(long int (**syscall_fn)(long int,
+					       long int, long int, long int),
+		       long int (**syscall6_fn)(long int,
+						long int, long int, long int,
+						long int, long int, long int));
+
+/*
+ * psx_sensitivity_t holds the level of paranoia for non-POSIX syscall
+ * behavior. The default is PSX_IGNORE: which is best effort - no
+ * enforcement; PSX_WARNING will dump to stderr a warning when a
+ * syscall's results differ; PSX_ERROR will dump info as per
+ * PSX_WARNING and generate a SIGSYS. The current mode can be set with
+ * psx_set_sensitivity().
+ */
+typedef enum {
+    PSX_IGNORE = 0,
+    PSX_WARNING = 1,
+    PSX_ERROR = 2,
+} psx_sensitivity_t;
+
+/*
+ * psx_set_sensitivity sets the current sensitivity of the PSX
+ * mechanism.  The function returns 0 on success and -1 if the
+ * requested level is invalid.
+ */
+int psx_set_sensitivity(psx_sensitivity_t level);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PSX_SYSCALL_H */
diff --git a/psx/psx_test.go b/psx/psx_test.go
new file mode 100644
index 0000000..40a543f
--- /dev/null
+++ b/psx/psx_test.go
@@ -0,0 +1,125 @@
+package psx
+
+import (
+	"runtime"
+	"sync"
+	"syscall"
+	"testing"
+)
+
+func TestSyscall3(t *testing.T) {
+	want := syscall.Getpid()
+	if got, _, err := Syscall3(syscall.SYS_GETPID, 0, 0, 0); err != 0 {
+		t.Errorf("failed to get PID via libpsx: %v", err)
+	} else if int(got) != want {
+		t.Errorf("pid mismatch: got=%d want=%d", got, want)
+	}
+	if got, _, err := Syscall3(syscall.SYS_CAPGET, 0, 0, 0); err != 14 {
+		t.Errorf("malformed capget returned %d: %v (want 14: %v)", err, err, syscall.Errno(14))
+	} else if ^got != 0 {
+		t.Errorf("malformed capget did not return -1, got=%d", got)
+	}
+}
+
+func TestSyscall6(t *testing.T) {
+	want := syscall.Getpid()
+	if got, _, err := Syscall6(syscall.SYS_GETPID, 0, 0, 0, 0, 0, 0); err != 0 {
+		t.Errorf("failed to get PID via libpsx: %v", err)
+	} else if int(got) != want {
+		t.Errorf("pid mismatch: got=%d want=%d", got, want)
+	}
+	if got, _, err := Syscall6(syscall.SYS_CAPGET, 0, 0, 0, 0, 0, 0); err != 14 {
+		t.Errorf("malformed capget errno %d: %v (want 14: %v)", err, err, syscall.Errno(14))
+	} else if ^got != 0 {
+		t.Errorf("malformed capget did not return -1, got=%d", got)
+	}
+}
+
+// killAThread locks the goroutine to a thread and exits. This has the
+// effect of making the go runtime terminate the thread.
+func killAThread(c <-chan struct{}) {
+	runtime.LockOSThread()
+	<-c
+}
+
+// Test state is mirrored as expected.
+func TestShared(t *testing.T) {
+	const prGetKeepCaps = 7
+	const prSetKeepCaps = 8
+
+	var wg sync.WaitGroup
+
+	newTracker := func() chan<- uintptr {
+		ch := make(chan uintptr)
+		go func() {
+			runtime.LockOSThread()
+			defer wg.Done()
+			tid := syscall.Gettid()
+			for {
+				if _, ok := <-ch; !ok {
+					break
+				}
+				val, ok := <-ch
+				if !ok {
+					break
+				}
+				got, _, e := Syscall3(syscall.SYS_PRCTL, prGetKeepCaps, 0, 0)
+				if e != 0 {
+					t.Fatalf("[%d] psx:prctl(GET_KEEPCAPS) ?= %d failed: %v", tid, val, syscall.Errno(e))
+				}
+				if got != val {
+					t.Errorf("[%d] bad keepcaps value: got=%d, want=%d", tid, got, val)
+				}
+				if _, ok := <-ch; !ok {
+					break
+				}
+			}
+		}()
+		return ch
+	}
+
+	var tracked []chan<- uintptr
+	for i := 0; i <= 10; i++ {
+		val := uintptr(i & 1)
+		if _, _, e := Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, val, 0); e != 0 {
+			t.Fatalf("[%d] psx:prctl(SET_KEEPCAPS, %d) failed: %v", i, i&1, syscall.Errno(e))
+		}
+		wg.Add(1)
+		tracked = append(tracked, newTracker())
+		for _, ch := range tracked {
+			ch <- 2   // start serialization.
+			ch <- val // definitely written after change.
+			ch <- 3   // end serialization.
+		}
+	}
+	for _, ch := range tracked {
+		close(ch)
+	}
+	wg.Wait()
+}
+
+// Test to confirm no regression against:
+//
+//	https://github.com/golang/go/issues/42494
+func TestThreadChurn(t *testing.T) {
+	const prSetKeepCaps = 8
+
+	for j := 0; j < 4; j++ {
+		kill := (j & 1) != 0
+		sysc := (j & 2) != 0
+		t.Logf("[%d] testing kill=%v, sysc=%v", j, kill, sysc)
+		for i := 50; i > 0; i-- {
+			if kill {
+				c := make(chan struct{})
+				go killAThread(c)
+				close(c)
+			}
+			if sysc {
+				if _, _, e := Syscall3(syscall.SYS_PRCTL, prSetKeepCaps, uintptr(i&1), 0); e != 0 {
+					t.Fatalf("[%d] psx:prctl(SET_KEEPCAPS, %d) failed: %v", i, i&1, syscall.Errno(e))
+				}
+			}
+		}
+		t.Logf("[%d] PASSED kill=%v, sysc=%v", j, kill, sysc)
+	}
+}
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..d0b3f15
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,8 @@
+noop
+psx_test
+libcap_psx_test
+libcap_launch_test
+libcap_psx_launch_test
+exploit
+noexploit
+uns_test
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 0000000..ecb7d1b
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,121 @@
+#
+# NOTE the built tests are all designed to be run from this
+# working directory when built DYNAMIC=yes. That is, they
+# link to the shared libraries in ../libcap/ .
+#
+topdir=$(shell pwd)/..
+include ../Make.Rules
+#
+
+all:
+	@echo leave test building to test target
+
+install:
+	@echo nothing to install from tests
+
+ifeq ($(DYNAMIC),yes)
+LINKEXTRA=-Wl,-rpath,../libcap
+DEPS=../libcap/libcap.so
+ifeq ($(PTHREADS),yes)
+DEPS += ../libcap/libpsx.so
+endif
+else
+# For this build variant override the LDFLAGS to link statically from
+# libraries within the build tree. If you never want this, use
+# make DYNAMIC=yes ...
+LDFLAGS = --static
+DEPS=../libcap/libcap.a
+ifeq ($(PTHREADS),yes)
+DEPS +=  ../libcap/libpsx.a
+endif
+endif
+
+../libcap/libcap.so:
+	$(MAKE) -C ../libcap libcap.so
+
+../libcap/libcap.a:
+	$(MAKE) -C ../libcap libcap.a
+
+ifeq ($(PTHREADS),yes)
+../libcap/libpsx.so:
+	$(MAKE) -C ../libcap libpsx.so
+
+../libcap/libpsx.a:
+	$(MAKE) -C ../libcap libpsx.a
+endif
+
+../progs/tcapsh-static:
+	$(MAKE) -C ../progs tcapsh-static
+
+test:
+ifeq ($(PTHREADS),yes)
+	$(MAKE) run_psx_test run_libcap_psx_test
+endif
+
+sudotest: test
+	$(MAKE) run_uns_test
+	$(MAKE) run_libcap_launch_test
+ifeq ($(PTHREADS),yes)
+	$(MAKE) run_libcap_psx_launch_test run_exploit_test
+endif
+
+# unprivileged
+run_psx_test: psx_test
+	./psx_test
+
+psx_test: psx_test.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBPSXLIB)
+
+run_libcap_psx_test: libcap_psx_test
+	./libcap_psx_test
+
+libcap_psx_test: libcap_psx_test.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LIBPSXLIB)
+
+# privileged
+uns_test: uns_test.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB)
+
+run_uns_test: uns_test
+	echo exit | $(SUDO) ./uns_test
+
+run_libcap_launch_test: libcap_launch_test noop ../progs/tcapsh-static
+	$(SUDO) ./libcap_launch_test
+
+run_libcap_psx_launch_test: libcap_psx_launch_test ../progs/tcapsh-static
+	$(SUDO) ./libcap_psx_launch_test
+
+libcap_launch_test: libcap_launch_test.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB)
+
+# This varies only slightly from the above insofar as it currently
+# only links in the pthreads fork support. TODO() we need to change
+# the source to do something interesting with pthreads.
+libcap_psx_launch_test: libcap_launch_test.c $(DEPS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DWITH_PTHREADS $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) $(LIBPSXLIB)
+
+
+# This test demonstrates that libpsx is needed to secure multithreaded
+# programs that link against libcap.
+run_exploit_test: exploit noexploit
+	@echo exploit should succeed
+	$(SUDO) ./exploit ; if [ $$? -ne 0 ]; then exit 0; else exit 1 ; fi
+	@echo exploit should fail
+	$(SUDO) ./noexploit ; if [ $$? -eq 0 ]; then exit 0; else exit 1 ; fi
+
+exploit: exploit.o $(DEPS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBCAPLIB) -lpthread
+
+# Note, for some reason, the order of libraries is important to avoid
+# the exploit working for dynamic linking.
+noexploit: exploit.o $(DEPS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(LINKEXTRA) $(LIBPSXLIB) $(LIBCAPLIB)
+
+# This one runs in a chroot with no shared library files.
+noop: noop.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $< -o $@ --static
+
+clean:
+	rm -f psx_test libcap_psx_test libcap_launch_test uns_test *~
+	rm -f libcap_launch_test libcap_psx_launch_test core noop
+	rm -f exploit noexploit exploit.o
diff --git a/tests/exploit.c b/tests/exploit.c
new file mode 100644
index 0000000..814337c
--- /dev/null
+++ b/tests/exploit.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Andrew G Morgan <morgan@kernel.org>
+ *
+ * This program exploit demonstrates why libcap alone in a
+ * multithreaded C/C++ program is inherently vulnerable to privilege
+ * escalation.
+ *
+ * The code also serves as a demonstration of how linking with libpsx
+ * can eliminate this vulnerability by maintaining a process wide
+ * common security state.
+ *
+ * The basic idea (which is well known and why POSIX stipulates "posix
+ * semantics" for security relevant state at the abstraction of a
+ * process) is that, because of shared memory, if a single thread alone
+ * is vulnerable to code injection, then it can cause any other thread
+ * to execute arbitrary code. As such, if all but one thread drops
+ * privilege, privilege escalation is somewhat trivial.
+ */
+
+/* as per "man sigaction" */
+#define _POSIX_C_SOURCE 200809L
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+
+/* thread coordination */
+pthread_mutex_t mu;
+pthread_cond_t cond;
+int hits;
+
+/* evidence of highest privilege attained */
+ssize_t greatest_len;
+char *text;
+
+/*
+ * interrupt handler - potentially watching for an opportunity to
+ * perform an exploit when invoked as a privileged thread.
+ */
+static void handler(int signum, siginfo_t *info, void *ignore) {
+    ssize_t length;
+    char *working;
+    pthread_mutex_lock(&mu);
+
+    cap_t caps = cap_get_proc();
+    working = cap_to_text(caps, &length);
+    if (length > greatest_len) {
+	/*
+	 * This is where the exploit code might go.
+	 */
+	cap_free(text);
+	text = working;
+	greatest_len = length;
+    }
+    cap_free(caps);
+    hits++;
+
+    pthread_cond_signal(&cond);
+    pthread_mutex_unlock(&mu);
+
+}
+
+/*
+ * privileged thread code (imagine it doing whatever needs privilege).
+ */
+static void *victim(void *args) {
+    pthread_mutex_lock(&mu);
+    hits = 1;
+    printf("started privileged thread\n");
+    pthread_cond_signal(&cond);
+    pthread_mutex_unlock(&mu);
+
+    pthread_mutex_lock(&mu);
+    while (hits < 2) {
+	pthread_cond_wait(&cond, &mu);
+    }
+    pthread_mutex_unlock(&mu);
+
+    return NULL;
+}
+
+int main(int argc, char **argv) {
+    pthread_t peer;
+    cap_t caps = cap_init();
+    struct sigaction sig_action;
+
+    printf("program starting\n");
+    if (pthread_create(&peer, NULL, victim, NULL)) {
+	perror("unable to start the victim thread");
+	exit(1);
+    }
+
+    /*
+     * Wait until the peer thread is fully up.
+     */
+    pthread_mutex_lock(&mu);
+    while (hits < 1) {
+	pthread_cond_wait(&cond, &mu);
+    }
+    pthread_mutex_unlock(&mu);
+
+    printf("dropping privilege from main process thread\n");
+
+    if (cap_set_proc(caps)) {
+	perror("unable to drop capabilities from main process thread");
+	exit(1);
+    }
+    cap_free(caps);
+
+    /* confirm the low privilege of the process' main thread */
+
+    caps = cap_get_proc();
+    text = cap_to_text(caps, &greatest_len);
+    cap_free(caps);
+
+    printf("no privilege in main process thread: len:%ld, caps:\"%s\"\n",
+	   greatest_len, text);
+    if (greatest_len != 1) {
+	printf("failed to lower privilege as expected\n");
+	exit(1);
+    }
+
+    /*
+     * So, we have confirmed that this running thread has no
+     * privilege. From this thread we setup an interrupt handler and
+     * then trigger it on the privileged peer thread.
+     */
+
+    sig_action.sa_sigaction = &handler;
+    sigemptyset(&sig_action.sa_mask);
+    sig_action.sa_flags = SA_SIGINFO | SA_RESTART;;
+    sigaction(SIGRTMIN, &sig_action, NULL);
+
+    pthread_kill(peer, SIGRTMIN);
+
+    /*
+     * Wait for the thread to exit.
+     */
+    pthread_join(peer, NULL);
+
+    /*
+     * Let's see how we did with the exploit.
+     */
+
+    printf("greatest privilege in main process thread: len:%ld, caps:\"%s\"\n",
+	   greatest_len, text);
+
+    cap_free(text);
+    if (greatest_len != 1) {
+	printf("exploit succeeded\n");
+	exit(1);
+    }
+
+    printf("exploit failed\n");
+    exit(0);
+}
diff --git a/tests/libcap_launch_test.c b/tests/libcap_launch_test.c
new file mode 100644
index 0000000..b982573
--- /dev/null
+++ b/tests/libcap_launch_test.c
@@ -0,0 +1,239 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/*
+ * tests for cap_launch.
+ */
+
+#define MORE_THAN_ENOUGH 20
+#define NO_MORE 1
+
+struct test_case_s {
+    int pass_on;
+    const char *chroot;
+    uid_t uid;
+    gid_t gid;
+    int ngroups;
+    const gid_t groups[MORE_THAN_ENOUGH];
+    const char *args[MORE_THAN_ENOUGH];
+    const char **envp;
+    const char *iab;
+    cap_mode_t mode;
+    int launch_abort;
+    int result;
+    int (*callback_fn)(void *detail);
+};
+
+#ifdef WITH_PTHREADS
+#include <pthread.h>
+#else /* WITH_PTHREADS */
+#endif /* WITH_PTHREADS */
+
+/*
+ * clean_out drops all process capabilities.
+ */
+static int clean_out(void *data) {
+    cap_t empty;
+    empty = cap_init();
+    if (cap_set_proc(empty) != 0) {
+	_exit(1);
+    }
+    cap_free(empty);
+    return 0;
+}
+
+int main(int argc, char **argv) {
+    static struct test_case_s vs[] = {
+	{
+	    .args = { "../progs/tcapsh-static", "--", "-c", "echo hello" },
+	    .result = 0
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--", "-c", "echo hello" },
+	    .callback_fn = &clean_out,
+	    .result = 0
+	},
+	{
+	    .callback_fn = &clean_out,
+	    .result = 0
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--is-uid=123" },
+	    .result = 256
+	},
+	{
+	    .args = { "/", "won't", "work" },
+	    .launch_abort = 1,
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--is-uid=123" },
+	    .uid = 123,
+	    .result = 0,
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--is-uid=123" },
+	    .callback_fn = &clean_out,
+	    .uid = 123,
+	    .launch_abort = 1,
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--is-gid=123" },
+	    .result = 0,
+	    .gid = 123,
+	    .ngroups = 1,
+	    .groups = { 456 },
+	    .iab = "",
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--dropped=cap_chown",
+		      "--has-i=cap_chown" },
+	    .result = 0,
+	    .iab = "!%cap_chown"
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--dropped=cap_chown",
+		      "--has-i=cap_chown", "--is-uid=234",
+		      "--has-a=cap_chown", "--has-p=cap_chown" },
+	    .uid = 234,
+	    .result = 0,
+	    .iab = "!^cap_chown"
+	},
+	{
+	    .args = { "../progs/tcapsh-static", "--inmode=NOPRIV",
+		      "--has-no-new-privs" },
+	    .result = 0,
+	    .mode = CAP_MODE_NOPRIV
+	},
+	{
+	    .args = { "/noop" },
+	    .result = 0,
+	    .chroot = ".",
+	},
+	{
+	    .pass_on = NO_MORE
+	},
+    };
+
+    if (errno != 0) {
+	perror("unexpected initial value for errno");
+	exit(1);
+    }
+
+    cap_t orig = cap_get_proc();
+    if (orig == NULL) {
+	perror("failed to get process capabilities");
+	exit(1);
+    }
+
+    int success = 1, i;
+    for (i=0; vs[i].pass_on != NO_MORE; i++) {
+	cap_launch_t attr = NULL;
+	const struct test_case_s *v = &vs[i];
+	if (cap_launch(attr, NULL) != -1) {
+	    perror("NULL launch didn't fail");
+	    exit(1);
+	}
+	printf("[%d] test should %s\n", i,
+	       v->result || v->launch_abort ? "generate error" : "work");
+	if (v->args[0] != NULL) {
+	    attr = cap_new_launcher(v->args[0], v->args, v->envp);
+	    if (attr == NULL) {
+		perror("failed to obtain launcher");
+		exit(1);
+	    }
+	    if (v->callback_fn != NULL) {
+		cap_launcher_callback(attr, v->callback_fn);
+	    }
+	} else {
+	    attr = cap_func_launcher(v->callback_fn);
+	}
+	if (v->chroot) {
+	    cap_launcher_set_chroot(attr, v->chroot);
+	}
+	if (v->uid) {
+	    cap_launcher_setuid(attr, v->uid);
+	}
+	if (v->gid) {
+	    cap_launcher_setgroups(attr, v->gid, v->ngroups, v->groups);
+	}
+	if (v->iab) {
+	    cap_iab_t iab = cap_iab_from_text(v->iab);
+	    if (iab == NULL) {
+		fprintf(stderr, "[%d] failed to decode iab [%s]", i, v->iab);
+		perror(":");
+		success = 0;
+		continue;
+	    }
+	    cap_iab_t old = cap_launcher_set_iab(attr, iab);
+	    if (cap_free(old)) {
+		fprintf(stderr, "[%d] failed to decode iab [%s]", i, v->iab);
+		perror(":");
+		success = 0;
+		continue;
+	    }
+	}
+	if (v->mode) {
+	    cap_launcher_set_mode(attr, v->mode);
+	}
+
+	pid_t child = cap_launch(attr, NULL);
+
+	if (child <= 0) {
+	    fprintf(stderr, "[%d] failed to launch: ", i);
+	    perror("");
+	    if (!v->launch_abort) {
+		success = 0;
+	    }
+	    continue;
+	}
+	if (cap_free(attr)) {
+	    fprintf(stderr, "[%d] failed to free launcher: ", i);
+	    perror("");
+	    success = 0;
+	}
+	int result;
+	int ret = waitpid(child, &result, 0);
+	if (ret != child) {
+	    fprintf(stderr, "[%d] failed to wait: ", i);
+	    perror("");
+	    success = 0;
+	    continue;
+	}
+	if (result != v->result) {
+	    fprintf(stderr, "[%d] bad result: got=%d want=%d: ", i, result,
+		    v->result);
+	    perror("");
+	    success = 0;
+	    continue;
+	}
+    }
+
+    cap_t final = cap_get_proc();
+    if (final == NULL) {
+	perror("unable to get final capabilities");
+	exit(1);
+    }
+    if (cap_compare(orig, final)) {
+	char *was = cap_to_text(orig, NULL);
+	char *is = cap_to_text(final, NULL);
+	printf("cap_launch_test: orig:'%s' != final:'%s'\n", was, is);
+	cap_free(is);
+	cap_free(was);
+	success = 0;
+    }
+    cap_free(final);
+    cap_free(orig);
+
+    if (!success) {
+	printf("cap_launch_test: FAILED\n");
+	exit(1);
+    }
+    printf("cap_launch_test: PASSED\n");
+    exit(0);
+}
diff --git a/tests/libcap_psx_test.c b/tests/libcap_psx_test.c
new file mode 100644
index 0000000..9ef8cac
--- /dev/null
+++ b/tests/libcap_psx_test.c
@@ -0,0 +1,65 @@
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/psx_syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static void *thread_fork_exit(void *data) {
+    usleep(1234);
+    pid_t pid = fork();
+    cap_t start = cap_get_proc();
+    if (start == NULL) {
+	perror("FAILED: unable to start");
+	exit(1);
+    }
+    if (pid == 0) {
+	if (cap_set_proc(start)) {
+	    perror("setting empty caps failed");
+	    exit(1);
+	}
+	exit(0);
+    }
+    int res;
+    if (waitpid(pid, &res, 0) != pid || res != 0) {
+	printf("FAILED: pid=%d wait returned %d and/or error: %d\n",
+	       pid, res, errno);
+	exit(1);
+    }
+    cap_set_proc(start);
+    cap_free(start);
+    return NULL;
+}
+
+int main(int argc, char **argv) {
+    int i;
+    printf("hello libcap and libpsx ");
+    fflush(stdout);
+    cap_t start = cap_get_proc();
+    if (start == NULL) {
+	perror("FAILED: to actually start");
+	exit(1);
+    }
+    pthread_t ignored[10];
+    for (i = 0; i < 10; i++) {
+	pthread_create(&ignored[i], NULL, thread_fork_exit, NULL);
+    }
+    for (i = 0; i < 10; i++) {
+	printf(".");     /* because of fork, this may print double */
+	fflush(stdout);  /* try to limit the above effect */
+	if (cap_set_proc(start)) {
+	    perror("failed to set proc");
+	    exit(1);
+	}
+	usleep(1000);
+    }
+    printf(" PASSED\n");
+    exit(0);
+}
diff --git a/tests/noop.c b/tests/noop.c
new file mode 100644
index 0000000..ad51c89
--- /dev/null
+++ b/tests/noop.c
@@ -0,0 +1,5 @@
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+    exit(0);
+}
diff --git a/tests/psx_test.c b/tests/psx_test.c
new file mode 100644
index 0000000..7f16175
--- /dev/null
+++ b/tests/psx_test.c
@@ -0,0 +1,150 @@
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE
+#endif
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/psx_syscall.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+typedef union tp {
+    long long unsigned raw;
+    pthread_t pt;
+} thread_ptr;
+
+static void say_hello_expecting(const char *title, int n, int kept) {
+    int keeper = prctl(PR_GET_KEEPCAPS);
+    thread_ptr tp;
+    tp.pt = pthread_self();
+
+    printf("hello [%d], %s<%d> %llx (keepcaps=%d vs. want=%d)\n",
+	   getpid(), title, n, tp.raw, keeper, kept);
+    if (keeper != kept) {
+	printf("--> FAILURE %s thread=%llx has wrong keepcaps: got=%d want=%d\n",
+	       title, tp.raw, keeper, kept);
+	exit(1);
+    }
+}
+
+pthread_mutex_t mu;
+pthread_cond_t cond;
+
+int global_kept = 0;
+int step = 0;
+int replies = 0;
+int launched = 0;
+int started = 0;
+
+static void *say_hello(void *args) {
+    int count = 0;
+
+    pthread_mutex_lock(&mu);
+    started++;
+    int this_step = step+1;
+    pthread_cond_broadcast(&cond);
+    pthread_mutex_unlock(&mu);
+
+    pthread_mutex_lock(&mu);
+    do {
+	while (this_step > step) {
+	    pthread_cond_wait(&cond, &mu);
+	}
+	say_hello_expecting("thread", count, global_kept);
+
+	replies++;
+	pthread_cond_broadcast(&cond);
+	pthread_mutex_unlock(&mu);
+
+	this_step++;
+	pthread_mutex_lock(&mu);
+    } while (++count != 3);
+    pthread_mutex_unlock(&mu);
+
+    return NULL;
+}
+
+int main(int argc, char **argv) {
+    pthread_t tid[3];
+    int i;
+    pid_t child = 0;
+    char * const stop_argv[3] = { argv[0], strdup("stop"), NULL };
+
+    if (argc != 1) {
+	printf("child %d starting\n", getpid());
+	usleep(2000);
+	printf("child %d exiting\n", getpid());
+	exit(0);
+    }
+
+    for (i = 0; i<10; i++) {
+	printf("iteration [%d]: %d\n", getpid(), i);
+
+	pthread_mutex_lock(&mu);
+	global_kept = !global_kept;
+	replies = 0;
+	step = i;
+	pthread_mutex_unlock(&mu);
+
+	psx_syscall(SYS_prctl, PR_SET_KEEPCAPS, global_kept);
+
+	pthread_mutex_lock(&mu);
+	step++;
+	pthread_cond_broadcast(&cond);
+	pthread_mutex_unlock(&mu);
+
+	say_hello_expecting("main", i, global_kept);
+
+	pthread_mutex_lock(&mu);
+	while (replies < launched) {
+	    pthread_cond_wait(&cond, &mu);
+	}
+	pthread_mutex_unlock(&mu);
+
+	if (i < 3) {
+	    if (!child) {
+		child = fork();
+		if (!child) {
+		    usleep(2000);
+		    execve(argv[0], stop_argv, NULL);
+		    perror("failed to exec");
+		    exit(1);
+		} else {
+		    printf("pid=%d forked -> %d\n", getpid(), child);
+		}
+	    }
+	    launched++;
+	    pthread_create(&tid[i], NULL, say_hello, NULL);
+	    /* Confirm that the thread is started. */
+	    pthread_mutex_lock(&mu);
+	    while (started < launched) {
+		printf("[%d] started=%d vs %d\n", getpid(), started, launched);
+		pthread_cond_wait(&cond, &mu);
+	    }
+	    printf("[%d] started=%d vs %d\n", getpid(), started, launched);
+	    pthread_cond_broadcast(&cond);
+	    pthread_mutex_unlock(&mu);
+	} else if (i < 6) {
+	    /* Confirm one thread has finished. */
+	    pthread_join(tid[i-3], NULL);
+	    launched--;
+	}
+    }
+
+    if (child) {
+	int status;
+	waitpid(child, &status, 0);
+	if (status) {
+	    printf("child %d FAILED: %d\n", child, status);
+	    exit(1);
+	}
+    }
+    printf("%s PASSED\n", argv[0]);
+    exit(0);
+}
diff --git a/tests/uns_test.c b/tests/uns_test.c
new file mode 100644
index 0000000..3fe73af
--- /dev/null
+++ b/tests/uns_test.c
@@ -0,0 +1,171 @@
+/*
+ * Try unsharing where we remap the root user by rotating uids (0,1,2)
+ * and the corresponding gids too.
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define STACK_RESERVED 10*1024
+
+struct my_pipe {
+    int to[2];
+    int from[2];
+};
+
+static int child(void *data) {
+    struct my_pipe *fdsp = data;
+    static const char * const args[] = {"bash", NULL};
+
+    close(fdsp->to[1]);
+    close(fdsp->from[0]);
+    if (write(fdsp->from[1], "1", 1) != 1) {
+	fprintf(stderr, "failed to confirm setuid(1)\n");
+	exit(1);
+    }
+    close(fdsp->from[1]);
+
+    char datum[1];
+    if (read(fdsp->to[0], datum, 1) != 1) {
+	fprintf(stderr, "failed to wait for parent\n");
+	exit(1);
+    }
+    close(fdsp->to[0]);
+    if (datum[0] == '!') {
+	/* parent failed */
+	exit(0);
+    }
+
+    setsid();
+
+    execv("/bin/bash", (const void *) args);
+    perror("execv failed");
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+    static const char *file_formats[] = {
+	"/proc/%d/uid_map",
+	"/proc/%d/gid_map"
+    };
+    static const char id_map[] = "0 1 1\n1 2 1\n2 0 1\n3 3 49999997\n";
+    cap_value_t fscap = CAP_SETFCAP;
+    cap_t orig = cap_get_proc();
+    cap_flag_value_t present;
+
+    if (cap_get_flag(orig, CAP_SYS_ADMIN, CAP_EFFECTIVE, &present) != 0) {
+	perror("failed to read a capability flag");
+	exit(1);
+    }
+    if (present != CAP_SET) {
+	fprintf(stderr,
+		"environment missing cap_sys_admin - exploit not testable\n");
+	exit(0);
+    }
+
+    /* Run with this one lowered */
+    cap_set_flag(orig, CAP_EFFECTIVE, 1, &fscap, CAP_CLEAR);
+
+    struct my_pipe fds;
+    if (pipe(&fds.from[0]) || pipe(&fds.to[0])) {
+	perror("no pipes");
+	exit(1);
+    }
+
+    char *stack = mmap(NULL, STACK_RESERVED, PROT_READ|PROT_WRITE,
+		       MAP_ANONYMOUS|MAP_PRIVATE|MAP_STACK, -1, 0);
+    if (stack == MAP_FAILED) {
+	perror("no map for stack");
+	exit(1);
+    }
+
+    if (cap_setuid(1)) {
+	perror("failed to cap_setuid(1)");
+	exit(1);
+    }
+
+    if (cap_set_proc(orig)) {
+	perror("failed to raise caps again");
+	exit(1);
+    }
+
+    pid_t pid = clone(&child, stack+STACK_RESERVED, CLONE_NEWUSER|SIGCHLD, &fds);
+    if (pid == -1) {
+	perror("clone failed");
+	exit(1);
+    }
+
+    close(fds.from[1]);
+    close(fds.to[0]);
+
+    if (cap_setuid(0)) {
+	perror("failed to cap_setuid(0)");
+	exit(1);
+    }
+
+    if (cap_set_proc(orig)) {
+	perror("failed to raise caps again");
+	exit(1);
+    }
+
+    char datum[1];
+    if (read(fds.from[0], datum, 1) != 1 || datum[0] != '1') {
+	fprintf(stderr, "failed to read child status\n");
+	exit(1);
+    }
+    close(fds.from[0]);
+
+    int i;
+    for (i=0; i<2; i++) {
+	char *map_file;
+	if (asprintf(&map_file, file_formats[i], pid) < 0) {
+	    perror("allocate string");
+	    exit(1);
+	}
+
+	FILE *f = fopen(map_file, "w");
+	free(map_file);
+	if (f == NULL) {
+	    perror("fopen failed");
+	    exit(1);
+	}
+	int len = fwrite(id_map, 1, strlen(id_map), f);
+	if (len != strlen(id_map)) {
+	    goto bailok;
+	}
+	if (fclose(f)) {
+	    goto bailok;
+	}
+    }
+
+    if (write(fds.to[1], ".", 1) != 1) {
+	perror("failed to write '.'");
+	exit(1);
+    }
+    close(fds.to[1]);
+
+    fprintf(stderr, "user namespace launched exploit worked - upgrade kernel\n");
+    if (wait(NULL) == pid) {
+	exit(1);
+    }
+    perror("launch failed");
+    exit(1);
+
+bailok:
+    fprintf(stderr, "exploit attempt failed\n");
+    if (write(fds.to[1], "!", 1) != 1) {
+	perror("failed to inform child [ignored]");
+    }
+    exit(0);
+}
author	Dariusz Michaluk <d.michaluk@samsung.com>	2024-02-14 13:15:33 +0100
committer	Dariusz Michaluk <d.michaluk@samsung.com>	2024-02-14 13:15:33 +0100
commit	46a71608a1c1f277922adf2a82c4ab1e4d7ad320 (patch)
tree	13e7d9070ff0f463f1ebb644cf5afa3d90277043
parent	bcca19a055cbdb0d67985ac08fef491f6d4bb42b (diff)
download	libcap-upstream.tar.gz libcap-upstream.tar.bz2 libcap-upstream.zip