[PATCH] An attempt to have an unlimitedly extendable sys_clone

Serge E. Hallyn serue at us.ibm.com
Tue Jan 15 13:40:18 PST 2008


Quoting Cedric Le Goater (clg at fr.ibm.com):
> Cedric Le Goater wrote:
> > Pavel Emelyanov wrote:
> >> We have one bit in the clone_flags left, so we won't be
> >> able to create more namespaces after we make it busy.
> >> Besides, for checkpoint/restart jobs we might want to
> >> create tasks with pre-defined pids (virtual of course).
> >> What else might be required from clone() - nobody knows.
> >>
> >> This is an attempt to create a extendable API for clone.
> >>
> >> I use the last bit in the clone_flags for CLONE_NEWCLONE.
> >> When set it will denote that the child_tidptr is not a
> >> pointer on the tid storage, but the pointer on the struct
> >> long_clone_struct which currently looks like this:
> >>
> >> struct long_clone_arg {
> >> 	int size;
> >> };
> >>
> >> When we want to add a new argument for clone we just put
> >> it *at the end of this structure* and adjust the size.
> >> The binary compatibility with older long_clone_arg-s is
> >> facilitated with the clone_arg_has() macro.
> > 
> > hmm, I wonder how lkml@ will react to this. do we have 
> > similar apis in the kernel ?
> > 
> >> Sure, we lose the ability to clone tasks with extended
> >> argument and the CLONE_CHILD_SETTID/CLEARTID, but do we
> >> really need this?
> > 
> > not in the extended clone flag version. I think.
> > 
> >> The same thing is about to be done for unshare - we can
> >> add the second argument for it and iff the CLONE_NEWCLONE
> >> is specified - try to use it. Binary compatibility with
> >> the old ushare will be kept.
> >>
> >> The new argument is pulled up to the create_new_namespaces
> >> so that later we can easily use it w/o sending additional
> >> patches.
> >>
> >> This is a final, but a pre-review patch for sys_clone()
> >> that I plan to send to Andrew before we go on developing
> >> new namespaces.
> >>
> >> Made against 2.6.24-rc5-mm1.
> > 
> > The patch looks good and I compiled it and booted on x64 and 
> > x86_64. 
> > 
> > I think we should add the unshare support before sending to
> > andrew and also add an extended flag array to show how it will 
> > be used. I have a mq_namespace patchset pending we could use
> > for that and send all together ?
> 
> Here's the unshare part if you want to fold that with your patch.
> 
> Thanks,
> 
> C.
> 
> Signed-off-by: Cedric Le Goater <clg at fr.ibm.com>
> ---
>  include/linux/nsproxy.h  |    2 +-
>  include/linux/syscalls.h |    2 +-
>  kernel/fork.c            |   19 +++++++++++++++----
>  kernel/nsproxy.c         |    7 ++++---
>  4 files changed, 21 insertions(+), 9 deletions(-)
> 
> Index: 2.6.24-rc5-mm1/include/linux/nsproxy.h
> ===================================================================
> --- 2.6.24-rc5-mm1.orig/include/linux/nsproxy.h
> +++ 2.6.24-rc5-mm1/include/linux/nsproxy.h
> @@ -68,7 +68,7 @@ void exit_task_namespaces(struct task_st
>  void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
>  void free_nsproxy(struct nsproxy *ns);
>  int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
> -       struct fs_struct *);
> +               struct fs_struct *, struct long_clone_arg *carg);
> 
>  static inline void put_nsproxy(struct nsproxy *ns)
>  {
> Index: 2.6.24-rc5-mm1/include/linux/syscalls.h
> ===================================================================
> --- 2.6.24-rc5-mm1.orig/include/linux/syscalls.h
> +++ 2.6.24-rc5-mm1/include/linux/syscalls.h
> @@ -585,7 +585,7 @@ asmlinkage long compat_sys_newfstatat(un
>                                       int flag);
>  asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
>                                    int flags, int mode);
> -asmlinkage long sys_unshare(unsigned long unshare_flags);
> +asmlinkage long sys_unshare(unsigned long unshare_flags, int __user *flagptr);

Hmm, why not properly call this a struct long_clone_arg __user *flagptr
here?

> 
>  asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
>                            int fd_out, loff_t __user *off_out,
> Index: 2.6.24-rc5-mm1/kernel/fork.c
> ===================================================================
> --- 2.6.24-rc5-mm1.orig/kernel/fork.c
> +++ 2.6.24-rc5-mm1/kernel/fork.c
> @@ -1700,7 +1700,7 @@ static int unshare_semundo(unsigned long
>   * constructed. Here we are modifying the current, active,
>   * task_struct.
>   */
> -asmlinkage long sys_unshare(unsigned long unshare_flags)
> +asmlinkage long sys_unshare(unsigned long unshare_flags, int __user *flagptr)
>  {
>         int err = 0;
>         struct fs_struct *fs, *new_fs = NULL;
> @@ -1709,6 +1709,7 @@ asmlinkage long sys_unshare(unsigned lon
>         struct files_struct *fd, *new_fd = NULL;
>         struct sem_undo_list *new_ulist = NULL;
>         struct nsproxy *new_nsproxy = NULL;
> +       struct long_clone_arg *carg = NULL;
> 
>         check_unshare_flags(&unshare_flags);
> 
> @@ -1717,11 +1718,19 @@ asmlinkage long sys_unshare(unsigned lon
>         if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
>                                 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
>                                 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
> -                               CLONE_NEWNET))
> +                               CLONE_NEWNET|CLONE_NEWCLONE))
>                 goto bad_unshare_out;
> 
> +       if (unshare_flags & CLONE_NEWCLONE) {
> +               carg = get_long_clone_arg(flagptr);
> +               if (IS_ERR(carg)) {
> +                       err = PTR_ERR(carg);
> +                       goto bad_unshare_out;
> +               }
> +       }
> +
>         if ((err = unshare_thread(unshare_flags)))
> -               goto bad_unshare_out;
> +               goto bad_unshare_cleanup_carg;
>         if ((err = unshare_fs(unshare_flags, &new_fs)))
>                 goto bad_unshare_cleanup_thread;
>         if ((err = unshare_sighand(unshare_flags, &new_sigh)))
> @@ -1733,7 +1742,7 @@ asmlinkage long sys_unshare(unsigned lon
>         if ((err = unshare_semundo(unshare_flags, &new_ulist)))
>                 goto bad_unshare_cleanup_fd;
>         if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
> -                       new_fs)))
> +                       new_fs, carg)))
>                 goto bad_unshare_cleanup_semundo;
> 
>         if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
> @@ -1791,6 +1800,8 @@ bad_unshare_cleanup_fs:
>                 put_fs_struct(new_fs);
> 
>  bad_unshare_cleanup_thread:
> +bad_unshare_cleanup_carg:
> +       kfree(carg);
>  bad_unshare_out:
>         return err;
>  }
> Index: 2.6.24-rc5-mm1/kernel/nsproxy.c
> ===================================================================
> --- 2.6.24-rc5-mm1.orig/kernel/nsproxy.c
> +++ 2.6.24-rc5-mm1/kernel/nsproxy.c
> @@ -182,19 +182,20 @@ void free_nsproxy(struct nsproxy *ns)
>   * On success, returns the new nsproxy.
>   */
>  int unshare_nsproxy_namespaces(unsigned long unshare_flags,
> -               struct nsproxy **new_nsp, struct fs_struct *new_fs)
> +               struct nsproxy **new_nsp, struct fs_struct *new_fs,
> +               struct long_clone_arg *carg)
>  {
>         int err = 0;
> 
>         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
> -                              CLONE_NEWUSER | CLONE_NEWNET)))
> +                              CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWCLONE)))
>                 return 0;
> 
>         if (!capable(CAP_SYS_ADMIN))
>                 return -EPERM;
> 
>         *new_nsp = create_new_namespaces(unshare_flags, current,
> -                               new_fs ? new_fs : current->fs, NULL);
> +                               new_fs ? new_fs : current->fs, carg);
>         if (IS_ERR(*new_nsp)) {
>                 err = PTR_ERR(*new_nsp);
>                 goto out;


More information about the Containers mailing list