summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'openvz-sources/026.009/0100_patch-026test009-core.patch')
-rw-r--r--openvz-sources/026.009/0100_patch-026test009-core.patch76411
1 files changed, 0 insertions, 76411 deletions
diff --git a/openvz-sources/026.009/0100_patch-026test009-core.patch b/openvz-sources/026.009/0100_patch-026test009-core.patch
deleted file mode 100644
index de10b67..0000000
--- a/openvz-sources/026.009/0100_patch-026test009-core.patch
+++ /dev/null
@@ -1,76411 +0,0 @@
-diff -upr linux-2.6.16.orig/COPYING.SWsoft linux-2.6.16-026test009/COPYING.SWsoft
---- linux-2.6.16.orig/COPYING.SWsoft 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/COPYING.SWsoft 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,350 @@
-+
-+Nothing in this license should be construed as a grant by SWsoft of any rights
-+beyond the rights specified in the GNU General Public License, and nothing in
-+this license should be construed as a waiver by SWsoft of its patent, copyright
-+and/or trademark rights, beyond the waiver required by the GNU General Public
-+License. This license is expressly inapplicable to any product that is not
-+within the scope of the GNU General Public License
-+
-+----------------------------------------
-+
-+ GNU GENERAL PUBLIC LICENSE
-+ Version 2, June 1991
-+
-+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ Everyone is permitted to copy and distribute verbatim copies
-+ of this license document, but changing it is not allowed.
-+
-+ Preamble
-+
-+ The licenses for most software are designed to take away your
-+freedom to share and change it. By contrast, the GNU General Public
-+License is intended to guarantee your freedom to share and change free
-+software--to make sure the software is free for all its users. This
-+General Public License applies to most of the Free Software
-+Foundation's software and to any other program whose authors commit to
-+using it. (Some other Free Software Foundation software is covered by
-+the GNU Library General Public License instead.) You can apply it to
-+your programs, too.
-+
-+ When we speak of free software, we are referring to freedom, not
-+price. Our General Public Licenses are designed to make sure that you
-+have the freedom to distribute copies of free software (and charge for
-+this service if you wish), that you receive source code or can get it
-+if you want it, that you can change the software or use pieces of it
-+in new free programs; and that you know you can do these things.
-+
-+ To protect your rights, we need to make restrictions that forbid
-+anyone to deny you these rights or to ask you to surrender the rights.
-+These restrictions translate to certain responsibilities for you if you
-+distribute copies of the software, or if you modify it.
-+
-+ For example, if you distribute copies of such a program, whether
-+gratis or for a fee, you must give the recipients all the rights that
-+you have. You must make sure that they, too, receive or can get the
-+source code. And you must show them these terms so they know their
-+rights.
-+
-+ We protect your rights with two steps: (1) copyright the software, and
-+(2) offer you this license which gives you legal permission to copy,
-+distribute and/or modify the software.
-+
-+ Also, for each author's protection and ours, we want to make certain
-+that everyone understands that there is no warranty for this free
-+software. If the software is modified by someone else and passed on, we
-+want its recipients to know that what they have is not the original, so
-+that any problems introduced by others will not reflect on the original
-+authors' reputations.
-+
-+ Finally, any free program is threatened constantly by software
-+patents. We wish to avoid the danger that redistributors of a free
-+program will individually obtain patent licenses, in effect making the
-+program proprietary. To prevent this, we have made it clear that any
-+patent must be licensed for everyone's free use or not licensed at all.
-+
-+ The precise terms and conditions for copying, distribution and
-+modification follow.
-+
-+ GNU GENERAL PUBLIC LICENSE
-+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-+
-+ 0. This License applies to any program or other work which contains
-+a notice placed by the copyright holder saying it may be distributed
-+under the terms of this General Public License. The "Program", below,
-+refers to any such program or work, and a "work based on the Program"
-+means either the Program or any derivative work under copyright law:
-+that is to say, a work containing the Program or a portion of it,
-+either verbatim or with modifications and/or translated into another
-+language. (Hereinafter, translation is included without limitation in
-+the term "modification".) Each licensee is addressed as "you".
-+
-+Activities other than copying, distribution and modification are not
-+covered by this License; they are outside its scope. The act of
-+running the Program is not restricted, and the output from the Program
-+is covered only if its contents constitute a work based on the
-+Program (independent of having been made by running the Program).
-+Whether that is true depends on what the Program does.
-+
-+ 1. You may copy and distribute verbatim copies of the Program's
-+source code as you receive it, in any medium, provided that you
-+conspicuously and appropriately publish on each copy an appropriate
-+copyright notice and disclaimer of warranty; keep intact all the
-+notices that refer to this License and to the absence of any warranty;
-+and give any other recipients of the Program a copy of this License
-+along with the Program.
-+
-+You may charge a fee for the physical act of transferring a copy, and
-+you may at your option offer warranty protection in exchange for a fee.
-+
-+ 2. You may modify your copy or copies of the Program or any portion
-+of it, thus forming a work based on the Program, and copy and
-+distribute such modifications or work under the terms of Section 1
-+above, provided that you also meet all of these conditions:
-+
-+ a) You must cause the modified files to carry prominent notices
-+ stating that you changed the files and the date of any change.
-+
-+ b) You must cause any work that you distribute or publish, that in
-+ whole or in part contains or is derived from the Program or any
-+ part thereof, to be licensed as a whole at no charge to all third
-+ parties under the terms of this License.
-+
-+ c) If the modified program normally reads commands interactively
-+ when run, you must cause it, when started running for such
-+ interactive use in the most ordinary way, to print or display an
-+ announcement including an appropriate copyright notice and a
-+ notice that there is no warranty (or else, saying that you provide
-+ a warranty) and that users may redistribute the program under
-+ these conditions, and telling the user how to view a copy of this
-+ License. (Exception: if the Program itself is interactive but
-+ does not normally print such an announcement, your work based on
-+ the Program is not required to print an announcement.)
-+
-+These requirements apply to the modified work as a whole. If
-+identifiable sections of that work are not derived from the Program,
-+and can be reasonably considered independent and separate works in
-+themselves, then this License, and its terms, do not apply to those
-+sections when you distribute them as separate works. But when you
-+distribute the same sections as part of a whole which is a work based
-+on the Program, the distribution of the whole must be on the terms of
-+this License, whose permissions for other licensees extend to the
-+entire whole, and thus to each and every part regardless of who wrote it.
-+
-+Thus, it is not the intent of this section to claim rights or contest
-+your rights to work written entirely by you; rather, the intent is to
-+exercise the right to control the distribution of derivative or
-+collective works based on the Program.
-+
-+In addition, mere aggregation of another work not based on the Program
-+with the Program (or with a work based on the Program) on a volume of
-+a storage or distribution medium does not bring the other work under
-+the scope of this License.
-+
-+ 3. You may copy and distribute the Program (or a work based on it,
-+under Section 2) in object code or executable form under the terms of
-+Sections 1 and 2 above provided that you also do one of the following:
-+
-+ a) Accompany it with the complete corresponding machine-readable
-+ source code, which must be distributed under the terms of Sections
-+ 1 and 2 above on a medium customarily used for software interchange; or,
-+
-+ b) Accompany it with a written offer, valid for at least three
-+ years, to give any third party, for a charge no more than your
-+ cost of physically performing source distribution, a complete
-+ machine-readable copy of the corresponding source code, to be
-+ distributed under the terms of Sections 1 and 2 above on a medium
-+ customarily used for software interchange; or,
-+
-+ c) Accompany it with the information you received as to the offer
-+ to distribute corresponding source code. (This alternative is
-+ allowed only for noncommercial distribution and only if you
-+ received the program in object code or executable form with such
-+ an offer, in accord with Subsection b above.)
-+
-+The source code for a work means the preferred form of the work for
-+making modifications to it. For an executable work, complete source
-+code means all the source code for all modules it contains, plus any
-+associated interface definition files, plus the scripts used to
-+control compilation and installation of the executable. However, as a
-+special exception, the source code distributed need not include
-+anything that is normally distributed (in either source or binary
-+form) with the major components (compiler, kernel, and so on) of the
-+operating system on which the executable runs, unless that component
-+itself accompanies the executable.
-+
-+If distribution of executable or object code is made by offering
-+access to copy from a designated place, then offering equivalent
-+access to copy the source code from the same place counts as
-+distribution of the source code, even though third parties are not
-+compelled to copy the source along with the object code.
-+
-+ 4. You may not copy, modify, sublicense, or distribute the Program
-+except as expressly provided under this License. Any attempt
-+otherwise to copy, modify, sublicense or distribute the Program is
-+void, and will automatically terminate your rights under this License.
-+However, parties who have received copies, or rights, from you under
-+this License will not have their licenses terminated so long as such
-+parties remain in full compliance.
-+
-+ 5. You are not required to accept this License, since you have not
-+signed it. However, nothing else grants you permission to modify or
-+distribute the Program or its derivative works. These actions are
-+prohibited by law if you do not accept this License. Therefore, by
-+modifying or distributing the Program (or any work based on the
-+Program), you indicate your acceptance of this License to do so, and
-+all its terms and conditions for copying, distributing or modifying
-+the Program or works based on it.
-+
-+ 6. Each time you redistribute the Program (or any work based on the
-+Program), the recipient automatically receives a license from the
-+original licensor to copy, distribute or modify the Program subject to
-+these terms and conditions. You may not impose any further
-+restrictions on the recipients' exercise of the rights granted herein.
-+You are not responsible for enforcing compliance by third parties to
-+this License.
-+
-+ 7. If, as a consequence of a court judgment or allegation of patent
-+infringement or for any other reason (not limited to patent issues),
-+conditions are imposed on you (whether by court order, agreement or
-+otherwise) that contradict the conditions of this License, they do not
-+excuse you from the conditions of this License. If you cannot
-+distribute so as to satisfy simultaneously your obligations under this
-+License and any other pertinent obligations, then as a consequence you
-+may not distribute the Program at all. For example, if a patent
-+license would not permit royalty-free redistribution of the Program by
-+all those who receive copies directly or indirectly through you, then
-+the only way you could satisfy both it and this License would be to
-+refrain entirely from distribution of the Program.
-+
-+If any portion of this section is held invalid or unenforceable under
-+any particular circumstance, the balance of the section is intended to
-+apply and the section as a whole is intended to apply in other
-+circumstances.
-+
-+It is not the purpose of this section to induce you to infringe any
-+patents or other property right claims or to contest validity of any
-+such claims; this section has the sole purpose of protecting the
-+integrity of the free software distribution system, which is
-+implemented by public license practices. Many people have made
-+generous contributions to the wide range of software distributed
-+through that system in reliance on consistent application of that
-+system; it is up to the author/donor to decide if he or she is willing
-+to distribute software through any other system and a licensee cannot
-+impose that choice.
-+
-+This section is intended to make thoroughly clear what is believed to
-+be a consequence of the rest of this License.
-+
-+ 8. If the distribution and/or use of the Program is restricted in
-+certain countries either by patents or by copyrighted interfaces, the
-+original copyright holder who places the Program under this License
-+may add an explicit geographical distribution limitation excluding
-+those countries, so that distribution is permitted only in or among
-+countries not thus excluded. In such case, this License incorporates
-+the limitation as if written in the body of this License.
-+
-+ 9. The Free Software Foundation may publish revised and/or new versions
-+of the General Public License from time to time. Such new versions will
-+be similar in spirit to the present version, but may differ in detail to
-+address new problems or concerns.
-+
-+Each version is given a distinguishing version number. If the Program
-+specifies a version number of this License which applies to it and "any
-+later version", you have the option of following the terms and conditions
-+either of that version or of any later version published by the Free
-+Software Foundation. If the Program does not specify a version number of
-+this License, you may choose any version ever published by the Free Software
-+Foundation.
-+
-+ 10. If you wish to incorporate parts of the Program into other free
-+programs whose distribution conditions are different, write to the author
-+to ask for permission. For software which is copyrighted by the Free
-+Software Foundation, write to the Free Software Foundation; we sometimes
-+make exceptions for this. Our decision will be guided by the two goals
-+of preserving the free status of all derivatives of our free software and
-+of promoting the sharing and reuse of software generally.
-+
-+ NO WARRANTY
-+
-+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-+REPAIR OR CORRECTION.
-+
-+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-+POSSIBILITY OF SUCH DAMAGES.
-+
-+ END OF TERMS AND CONDITIONS
-+
-+ How to Apply These Terms to Your New Programs
-+
-+ If you develop a new program, and you want it to be of the greatest
-+possible use to the public, the best way to achieve this is to make it
-+free software which everyone can redistribute and change under these terms.
-+
-+ To do so, attach the following notices to the program. It is safest
-+to attach them to the start of each source file to most effectively
-+convey the exclusion of warranty; and each file should have at least
-+the "copyright" line and a pointer to where the full notice is found.
-+
-+ <one line to give the program's name and a brief idea of what it does.>
-+ Copyright (C) <year> <name of author>
-+
-+ This program is free software; you can redistribute it and/or modify
-+ it under the terms of the GNU General Public License as published by
-+ the Free Software Foundation; either version 2 of the License, or
-+ (at your option) any later version.
-+
-+ This program is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ GNU General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with this program; if not, write to the Free Software
-+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+
-+
-+Also add information on how to contact you by electronic and paper mail.
-+
-+If the program is interactive, make it output a short notice like this
-+when it starts in an interactive mode:
-+
-+ Gnomovision version 69, Copyright (C) year name of author
-+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-+ This is free software, and you are welcome to redistribute it
-+ under certain conditions; type `show c' for details.
-+
-+The hypothetical commands `show w' and `show c' should show the appropriate
-+parts of the General Public License. Of course, the commands you use may
-+be called something other than `show w' and `show c'; they could even be
-+mouse-clicks or menu items--whatever suits your program.
-+
-+You should also get your employer (if you work as a programmer) or your
-+school, if any, to sign a "copyright disclaimer" for the program, if
-+necessary. Here is a sample; alter the names:
-+
-+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
-+
-+ <signature of Ty Coon>, 1 April 1989
-+ Ty Coon, President of Vice
-+
-+This General Public License does not permit incorporating your program into
-+proprietary programs. If your program is a subroutine library, you may
-+consider it more useful to permit linking proprietary applications with the
-+library. If this is what you want to do, use the GNU Library General
-+Public License instead of this License.
-diff -upr linux-2.6.16.orig/Makefile linux-2.6.16-026test009/Makefile
---- linux-2.6.16.orig/Makefile 2006-04-19 15:02:13.000000000 +0400
-+++ linux-2.6.16-026test009/Makefile 2006-04-19 15:02:13.000000000 +0400
-@@ -1,7 +1,7 @@
- VERSION = 2
- PATCHLEVEL = 6
- SUBLEVEL = 16
--EXTRAVERSION =
-+EXTRAVERSION = -026test009
- NAME=Sliding Snow Leopard
-
- # *DOCUMENTATION*
-diff -upr linux-2.6.16.orig/arch/alpha/kernel/setup.c linux-2.6.16-026test009/arch/alpha/kernel/setup.c
---- linux-2.6.16.orig/arch/alpha/kernel/setup.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/alpha/kernel/setup.c 2006-04-19 15:02:11.000000000 +0400
-@@ -24,6 +24,7 @@
- #include <linux/config.h> /* CONFIG_ALPHA_LCA etc */
- #include <linux/mc146818rtc.h>
- #include <linux/console.h>
-+#include <linux/cpu.h>
- #include <linux/errno.h>
- #include <linux/init.h>
- #include <linux/string.h>
-@@ -477,6 +478,22 @@ page_is_ram(unsigned long pfn)
- #undef PFN_PHYS
- #undef PFN_MAX
-
-+static int __init
-+register_cpus(void)
-+{
-+ int i;
-+
-+ for_each_possible_cpu(i) {
-+ struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
-+ if (!p)
-+ return -ENOMEM;
-+ register_cpu(p, i, NULL);
-+ }
-+ return 0;
-+}
-+
-+arch_initcall(register_cpus);
-+
- void __init
- setup_arch(char **cmdline_p)
- {
-diff -upr linux-2.6.16.orig/arch/alpha/kernel/smp.c linux-2.6.16-026test009/arch/alpha/kernel/smp.c
---- linux-2.6.16.orig/arch/alpha/kernel/smp.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/alpha/kernel/smp.c 2006-04-19 15:02:11.000000000 +0400
-@@ -439,7 +439,7 @@ setup_smp(void)
- if ((cpu->flags & 0x1cc) == 0x1cc) {
- smp_num_probed++;
- /* Assume here that "whami" == index */
-- cpu_set(i, cpu_possible_map);
-+ cpu_set(i, cpu_present_mask);
- cpu->pal_revision = boot_cpu_palrev;
- }
-
-@@ -450,9 +450,8 @@ setup_smp(void)
- }
- } else {
- smp_num_probed = 1;
-- cpu_set(boot_cpuid, cpu_possible_map);
-+ cpu_set(boot_cpuid, cpu_present_mask);
- }
-- cpu_present_mask = cpumask_of_cpu(boot_cpuid);
-
- printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
- smp_num_probed, cpu_possible_map.bits[0]);
-@@ -488,9 +487,8 @@ void __devinit
- smp_prepare_boot_cpu(void)
- {
- /*
-- * Mark the boot cpu (current cpu) as both present and online
-+ * Mark the boot cpu (current cpu) as online
- */
-- cpu_set(smp_processor_id(), cpu_present_mask);
- cpu_set(smp_processor_id(), cpu_online_map);
- }
-
-diff -upr linux-2.6.16.orig/arch/arm/kernel/smp.c linux-2.6.16-026test009/arch/arm/kernel/smp.c
---- linux-2.6.16.orig/arch/arm/kernel/smp.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/arm/kernel/smp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -197,7 +197,7 @@ int __cpuexit __cpu_disable(void)
- local_flush_tlb_all();
-
- read_lock(&tasklist_lock);
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (p->mm)
- cpu_clear(cpu, p->mm->cpu_vm_mask);
- }
-diff -upr linux-2.6.16.orig/arch/frv/mm/mmu-context.c linux-2.6.16-026test009/arch/frv/mm/mmu-context.c
---- linux-2.6.16.orig/arch/frv/mm/mmu-context.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/frv/mm/mmu-context.c 2006-04-19 15:02:12.000000000 +0400
-@@ -181,7 +181,7 @@ int cxn_pin_by_pid(pid_t pid)
-
- /* get a handle on the mm_struct */
- read_lock(&tasklist_lock);
-- tsk = find_task_by_pid(pid);
-+ tsk = find_task_by_pid_ve(pid);
- if (tsk) {
- ret = -EINVAL;
-
-diff -upr linux-2.6.16.orig/arch/i386/Kconfig linux-2.6.16-026test009/arch/i386/Kconfig
---- linux-2.6.16.orig/arch/i386/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -1071,12 +1071,16 @@ endmenu
-
- source "arch/i386/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-
- source "lib/Kconfig"
-
-+source "kernel/ub/Kconfig"
-+
- #
- # Use the generic interrupt handling code in kernel/irq/:
- #
-diff -upr linux-2.6.16.orig/arch/i386/kernel/apic.c linux-2.6.16-026test009/arch/i386/kernel/apic.c
---- linux-2.6.16.orig/arch/i386/kernel/apic.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/apic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -1177,6 +1177,7 @@ inline void smp_local_timer_interrupt(st
- fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
- {
- int cpu = smp_processor_id();
-+ struct ve_struct *ve;
-
- /*
- * the NMI deadlock-detector uses this.
-@@ -1193,9 +1194,11 @@ fastcall void smp_apic_timer_interrupt(s
- * Besides, if we don't timer interrupts ignore the global
- * interrupt lock, which is the WrongThing (tm) to do.
- */
-+ ve = set_exec_env(get_ve0());
- irq_enter();
- smp_local_timer_interrupt(regs);
- irq_exit();
-+ (void)set_exec_env(ve);
- }
-
- #ifndef CONFIG_SMP
-diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/Kconfig linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/Kconfig
---- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -203,6 +203,7 @@ config X86_LONGRUN
- config X86_LONGHAUL
- tristate "VIA Cyrix III Longhaul"
- select CPU_FREQ_TABLE
-+ depends on BROKEN
- help
- This adds the CPUFreq driver for VIA Samuel/CyrixIII,
- VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
-diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
---- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2006-04-19 15:02:11.000000000 +0400
-@@ -244,7 +244,7 @@ static int cpufreq_p4_cpu_init(struct cp
- for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
- if ((i<2) && (has_N44_O17_errata[policy->cpu]))
- p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-- else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000)
-+ else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000)
- p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
- else
- p4clockmod_table[i].frequency = (stock_freq * i)/8;
-diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c
---- linux-2.6.16.orig/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c 2006-04-19 15:02:11.000000000 +0400
-@@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void
- __asm__ __volatile__(
- "out %%al, (%%dx)\n"
- : "=D" (result)
-- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic)
-+ : "a" (command), "b" (function), "c" (0), "d" (smi_port),
-+ "D" (0), "S" (magic)
-+ : "memory"
- );
-
- dprintk("result is %x\n", result);
-diff -upr linux-2.6.16.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.16-026test009/arch/i386/kernel/cpu/mtrr/if.c
---- linux-2.6.16.orig/arch/i386/kernel/cpu/mtrr/if.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/cpu/mtrr/if.c 2006-04-19 15:02:12.000000000 +0400
-@@ -392,7 +392,7 @@ static int __init mtrr_if_init(void)
- return -ENODEV;
-
- proc_root_mtrr =
-- create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
-+ create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL);
- if (proc_root_mtrr) {
- proc_root_mtrr->owner = THIS_MODULE;
- proc_root_mtrr->proc_fops = &mtrr_fops;
-diff -upr linux-2.6.16.orig/arch/i386/kernel/dmi_scan.c linux-2.6.16-026test009/arch/i386/kernel/dmi_scan.c
---- linux-2.6.16.orig/arch/i386/kernel/dmi_scan.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/dmi_scan.c 2006-04-19 15:02:11.000000000 +0400
-@@ -106,7 +106,7 @@ static void __init dmi_save_devices(stru
- struct dmi_device *dev;
-
- for (i = 0; i < count; i++) {
-- char *d = ((char *) dm) + (i * 2);
-+ char *d = (char *)(dm + 1) + (i * 2);
-
- /* Skip disabled device */
- if ((*d & 0x80) == 0)
-diff -upr linux-2.6.16.orig/arch/i386/kernel/irq.c linux-2.6.16-026test009/arch/i386/kernel/irq.c
---- linux-2.6.16.orig/arch/i386/kernel/irq.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/irq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -59,7 +59,9 @@ fastcall unsigned int do_IRQ(struct pt_r
- union irq_ctx *curctx, *irqctx;
- u32 *isp;
- #endif
-+ struct ve_struct *ve;
-
-+ ve = set_exec_env(get_ve0());
- irq_enter();
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
- /* Debugging check for stack overflow: is there less than 1KB free? */
-@@ -108,6 +110,7 @@ fastcall unsigned int do_IRQ(struct pt_r
- __do_IRQ(irq, regs);
-
- irq_exit();
-+ (void)set_exec_env(ve);
-
- return 1;
- }
-diff -upr linux-2.6.16.orig/arch/i386/kernel/ldt.c linux-2.6.16-026test009/arch/i386/kernel/ldt.c
---- linux-2.6.16.orig/arch/i386/kernel/ldt.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/ldt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,7 @@
- #include <linux/smp_lock.h>
- #include <linux/vmalloc.h>
- #include <linux/slab.h>
-+#include <linux/module.h>
-
- #include <asm/uaccess.h>
- #include <asm/system.h>
-@@ -20,6 +21,8 @@
- #include <asm/desc.h>
- #include <asm/mmu_context.h>
-
-+#include <ub/ub_mem.h>
-+
- #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
- static void flush_ldt(void *null)
- {
-@@ -39,9 +42,9 @@ static int alloc_ldt(mm_context_t *pc, i
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-+ newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
- else
-- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-+ newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-@@ -105,6 +108,7 @@ int init_new_context(struct task_struct
- }
- return retval;
- }
-+EXPORT_SYMBOL_GPL(init_new_context);
-
- /*
- * No need to lock the MM as we are the last user
-@@ -251,3 +255,5 @@ asmlinkage int sys_modify_ldt(int func,
- }
- return ret;
- }
-+
-+EXPORT_SYMBOL_GPL(default_ldt);
-diff -upr linux-2.6.16.orig/arch/i386/kernel/nmi.c linux-2.6.16-026test009/arch/i386/kernel/nmi.c
---- linux-2.6.16.orig/arch/i386/kernel/nmi.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/nmi.c 2006-04-19 15:02:11.000000000 +0400
-@@ -521,7 +521,22 @@ void touch_nmi_watchdog (void)
-
- extern void die_nmi(struct pt_regs *, const char *msg);
-
--void nmi_watchdog_tick (struct pt_regs * regs)
-+void smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+ static DEFINE_SPINLOCK(show_regs_lock);
-+
-+ if (regs == NULL)
-+ return;
-+
-+ bust_spinlocks(1);
-+ spin_lock(&show_regs_lock);
-+ printk("----------- IPI show regs -----------");
-+ show_regs(regs);
-+ spin_unlock(&show_regs_lock);
-+ bust_spinlocks(0);
-+}
-+
-+void nmi_watchdog_tick(struct pt_regs *regs)
- {
-
- /*
-diff -upr linux-2.6.16.orig/arch/i386/kernel/process.c linux-2.6.16-026test009/arch/i386/kernel/process.c
---- linux-2.6.16.orig/arch/i386/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -59,6 +59,7 @@
- #include <asm/cpu.h>
-
- asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-+EXPORT_SYMBOL_GPL(ret_from_fork);
-
- static int hlt_counter;
-
-@@ -289,11 +290,14 @@ __setup("idle=", idle_setup);
- void show_regs(struct pt_regs * regs)
- {
- unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-+ extern int die_counter;
-
- printk("\n");
-- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-+ printk("Pid: %d, comm: %20s, oopses: %d\n",
-+ current->pid, current->comm, die_counter);
- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
-- print_symbol("EIP is at %s\n", regs->eip);
-+ if (decode_call_traces)
-+ print_symbol("EIP is at %s\n", regs->eip);
-
- if (user_mode(regs))
- printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-@@ -314,6 +318,8 @@ void show_regs(struct pt_regs * regs)
- cr4 = read_cr4_safe();
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, &regs->esp);
-+ if (!decode_call_traces)
-+ printk(" EIP: [<%08lx>]\n",regs->eip);
- }
-
- /*
-@@ -339,6 +345,13 @@ int kernel_thread(int (*fn)(void *), voi
- {
- struct pt_regs regs;
-
-+ /* Don't allow kernel_thread() inside VE */
-+ if (!ve_is_super(get_exec_env())) {
-+ printk("kernel_thread call inside VE\n");
-+ dump_stack();
-+ return -EPERM;
-+ }
-+
- memset(&regs, 0, sizeof(regs));
-
- regs.ebx = (unsigned long) fn;
-diff -upr linux-2.6.16.orig/arch/i386/kernel/ptrace.c linux-2.6.16-026test009/arch/i386/kernel/ptrace.c
---- linux-2.6.16.orig/arch/i386/kernel/ptrace.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/ptrace.c 2006-04-19 15:02:12.000000000 +0400
-@@ -706,7 +706,9 @@ int do_syscall_trace(struct pt_regs *reg
- /* the 0x80 provides a way for the tracing parent to distinguish
- between a syscall stop and SIGTRAP delivery */
- /* Note that the debugger could change the result of test_thread_flag!*/
-+ set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY);
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0));
-+ clear_pn_state(current);
-
- /*
- * this isn't the same as continuing with a signal, but it will do
-diff -upr linux-2.6.16.orig/arch/i386/kernel/signal.c linux-2.6.16-026test009/arch/i386/kernel/signal.c
---- linux-2.6.16.orig/arch/i386/kernel/signal.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/signal.c 2006-04-19 15:02:11.000000000 +0400
-@@ -582,7 +582,7 @@ static void fastcall do_signal(struct pt
- if (!user_mode(regs))
- return;
-
-- if (try_to_freeze())
-+ if (try_to_freeze() && !signal_pending(current))
- goto no_signal;
-
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
-diff -upr linux-2.6.16.orig/arch/i386/kernel/smp.c linux-2.6.16-026test009/arch/i386/kernel/smp.c
---- linux-2.6.16.orig/arch/i386/kernel/smp.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/smp.c 2006-04-19 15:02:11.000000000 +0400
-@@ -21,6 +21,7 @@
- #include <linux/cpu.h>
- #include <linux/module.h>
-
-+#include <asm/nmi.h>
- #include <asm/mtrr.h>
- #include <asm/tlbflush.h>
- #include <mach_apic.h>
-@@ -566,6 +567,89 @@ int smp_call_function (void (*func) (voi
- }
- EXPORT_SYMBOL(smp_call_function);
-
-+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
-+static struct nmi_call_data_struct {
-+ smp_nmi_function func;
-+ void *info;
-+ atomic_t started;
-+ atomic_t finished;
-+ cpumask_t cpus_called;
-+ int wait;
-+} *nmi_call_data;
-+
-+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
-+{
-+ smp_nmi_function func;
-+ void *info;
-+ int wait;
-+
-+ func = nmi_call_data->func;
-+ info = nmi_call_data->info;
-+ wait = nmi_call_data->wait;
-+ ack_APIC_irq();
-+ /* prevent from calling func() multiple times */
-+ if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
-+ return 0;
-+ /*
-+ * notify initiating CPU that I've grabbed the data and am
-+ * about to execute the function
-+ */
-+ mb();
-+ atomic_inc(&nmi_call_data->started);
-+ /* at this point the nmi_call_data structure is out of scope */
-+ irq_enter();
-+ func(regs, info);
-+ irq_exit();
-+ if (wait)
-+ atomic_inc(&nmi_call_data->finished);
-+
-+ return 0;
-+}
-+
-+/*
-+ * This function tries to call func(regs, info) on each cpu.
-+ * Func must be fast and non-blocking.
-+ * May be called with disabled interrupts and from any context.
-+ */
-+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
-+{
-+ struct nmi_call_data_struct data;
-+ int cpus;
-+
-+ cpus = num_online_cpus() - 1;
-+ if (!cpus)
-+ return 0;
-+
-+ data.func = func;
-+ data.info = info;
-+ data.wait = wait;
-+ atomic_set(&data.started, 0);
-+ atomic_set(&data.finished, 0);
-+ cpus_clear(data.cpus_called);
-+ /* prevent this cpu from calling func if NMI happens */
-+ cpu_set(smp_processor_id(), data.cpus_called);
-+
-+ if (!spin_trylock(&nmi_call_lock))
-+ return -1;
-+
-+ nmi_call_data = &data;
-+ set_nmi_ipi_callback(smp_nmi_callback);
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_allbutself(APIC_DM_NMI);
-+ while (atomic_read(&data.started) != cpus)
-+ barrier();
-+
-+ unset_nmi_ipi_callback();
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ barrier();
-+ spin_unlock(&nmi_call_lock);
-+
-+ return 0;
-+}
-+
- static void stop_this_cpu (void * dummy)
- {
- /*
-diff -upr linux-2.6.16.orig/arch/i386/kernel/smpboot.c linux-2.6.16-026test009/arch/i386/kernel/smpboot.c
---- linux-2.6.16.orig/arch/i386/kernel/smpboot.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/smpboot.c 2006-04-19 15:02:12.000000000 +0400
-@@ -317,6 +317,10 @@ static void __init synchronize_tsc_bp (v
- }
- if (!buggy)
- printk("passed.\n");
-+#ifdef CONFIG_VE
-+ /* TSC reset. kill whatever might rely on old values */
-+ VE_TASK_INFO(current)->wakeup_stamp = 0;
-+#endif
- }
-
- static void __init synchronize_tsc_ap (void)
-@@ -342,6 +346,10 @@ static void __init synchronize_tsc_ap (v
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
- }
-+#ifdef CONFIG_VE
-+ /* TSC reset. kill whatever might rely on old values */
-+ VE_TASK_INFO(current)->wakeup_stamp = 0;
-+#endif
- }
- #undef NR_LOOPS
-
-@@ -908,6 +916,13 @@ static int __devinit do_boot_cpu(int api
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
- idle->thread.eip = (unsigned long) start_secondary;
-+
-+#ifdef CONFIG_VE
-+ /* Cosmetic: sleep_time won't be changed afterwards for the idle
-+ * thread; keep it 0 rather than -cycles. */
-+ VE_TASK_INFO(idle)->sleep_time = 0;
-+#endif
-+
- /* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
-
-diff -upr linux-2.6.16.orig/arch/i386/kernel/sys_i386.c linux-2.6.16-026test009/arch/i386/kernel/sys_i386.c
---- linux-2.6.16.orig/arch/i386/kernel/sys_i386.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/sys_i386.c 2006-04-19 15:02:12.000000000 +0400
-@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn
- if (!name)
- return -EFAULT;
- down_read(&uts_sem);
-- err=copy_to_user(name, &system_utsname, sizeof (*name));
-+ err=copy_to_user(name, &ve_utsname, sizeof (*name));
- up_read(&uts_sem);
- return err?-EFAULT:0;
- }
-@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol
-
- down_read(&uts_sem);
-
-- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-+ error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN);
- error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-+ error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN);
- error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-+ error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN);
- error |= __put_user(0,name->release+__OLD_UTS_LEN);
-- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-+ error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN);
- error |= __put_user(0,name->version+__OLD_UTS_LEN);
-- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-+ error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN);
- error |= __put_user(0,name->machine+__OLD_UTS_LEN);
-
- up_read(&uts_sem);
-diff -upr linux-2.6.16.orig/arch/i386/kernel/syscall_table.S linux-2.6.16-026test009/arch/i386/kernel/syscall_table.S
---- linux-2.6.16.orig/arch/i386/kernel/syscall_table.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/syscall_table.S 2006-04-19 15:02:11.000000000 +0400
-@@ -310,3 +310,12 @@ ENTRY(sys_call_table)
- .long sys_pselect6
- .long sys_ppoll
- .long sys_unshare /* 310 */
-+
-+ .rept 510-(.-sys_call_table)/4
-+ .long sys_ni_syscall
-+ .endr
-+
-+ .long sys_getluid /* 510 */
-+ .long sys_setluid
-+ .long sys_setublimit
-+ .long sys_ubstat
-diff -upr linux-2.6.16.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.16-026test009/arch/i386/kernel/timers/timer_tsc.c
---- linux-2.6.16.orig/arch/i386/kernel/timers/timer_tsc.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/timers/timer_tsc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -94,7 +94,7 @@ static int count2; /* counter for mark_o
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
--static unsigned long fast_gettimeoffset_quotient;
-+unsigned long fast_gettimeoffset_quotient;
-
- static unsigned long get_offset_tsc(void)
- {
-diff -upr linux-2.6.16.orig/arch/i386/kernel/traps.c linux-2.6.16-026test009/arch/i386/kernel/traps.c
---- linux-2.6.16.orig/arch/i386/kernel/traps.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/kernel/traps.c 2006-04-19 15:02:12.000000000 +0400
-@@ -116,8 +116,10 @@ static void print_addr_and_symbol(unsign
- {
- printk(log_lvl);
- printk(" [<%08lx>] ", addr);
-- print_symbol("%s", addr);
-- printk("\n");
-+ if (decode_call_traces) {
-+ print_symbol("%s", addr);
-+ printk("\n");
-+ }
- }
-
- static inline unsigned long print_context_stack(struct thread_info *tinfo,
-@@ -167,7 +169,10 @@ static void show_trace_log_lvl(struct ta
- if (!stack)
- break;
- printk(log_lvl);
-- printk(" =======================\n");
-+ if (decode_call_traces)
-+ printk(" =======================\n");
-+ else
-+ printk(" =<ctx>= ");
- }
- }
-
-@@ -203,8 +208,13 @@ static void show_stack_log_lvl(struct ta
- }
- printk("\n");
- printk(log_lvl);
-- printk("Call Trace:\n");
-+ if (decode_call_traces)
-+ printk("Call Trace:\n");
-+ else
-+ printk("Call Trace: ");
- show_trace_log_lvl(task, esp, log_lvl);
-+ if (!decode_call_traces)
-+ printk("\n");
- }
-
- void show_stack(struct task_struct *task, unsigned long *esp)
-@@ -220,6 +230,8 @@ void dump_stack(void)
- unsigned long stack;
-
- show_trace(current, &stack);
-+ if (!decode_call_traces)
-+ printk("\n");
- }
-
- EXPORT_SYMBOL(dump_stack);
-@@ -252,8 +264,11 @@ void show_registers(struct pt_regs *regs
- regs->esi, regs->edi, regs->ebp, esp);
- printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
-- printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
-- current->comm, current->pid, current_thread_info(), current);
-+ printk(KERN_EMERG "Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)",
-+ current->comm, current->pid,
-+ VEID(VE_TASK_INFO(current)->owner_env),
-+ current_thread_info(), current);
-+
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
-@@ -299,9 +314,9 @@ static void handle_BUG(struct pt_regs *r
- goto no_bug;
- if (ud2 != 0x0b0f)
- goto no_bug;
-- if (__get_user(line, (unsigned short __user *)(eip + 2)))
-+ if (__get_user(line, (unsigned short __user *)(eip + 4)))
- goto bug;
-- if (__get_user(file, (char * __user *)(eip + 4)) ||
-+ if (__get_user(file, (char * __user *)(eip + 7)) ||
- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
- file = "<bad filename>";
-
-@@ -316,6 +331,15 @@ bug:
- printk(KERN_EMERG "Kernel BUG\n");
- }
-
-+int die_counter = 0;
-+
-+static void inline check_kernel_csum_bug(void)
-+{
-+ if (kernel_text_csum_broken)
-+ printk("Kernel code checksum mismatch detected %d times\n",
-+ kernel_text_csum_broken);
-+}
-+
- /* This is gone through when something in the kernel
- * has done something bad and is about to be terminated.
- */
-@@ -330,7 +354,6 @@ void die(const char * str, struct pt_reg
- .lock_owner = -1,
- .lock_owner_depth = 0
- };
-- static int die_counter;
- unsigned long flags;
-
- if (die.lock_owner != raw_smp_processor_id()) {
-@@ -370,6 +393,7 @@ void die(const char * str, struct pt_reg
- } else
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
-
-+ check_kernel_csum_bug();
- bust_spinlocks(0);
- die.lock_owner = -1;
- spin_unlock_irqrestore(&die.lock, flags);
-@@ -597,12 +621,27 @@ static void unknown_nmi_error(unsigned c
- printk("Do you have a strange power saving mode enabled?\n");
- }
-
--static DEFINE_SPINLOCK(nmi_print_lock);
-+/*
-+ * Voyager doesn't implement these
-+ */
-+void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+}
-+
-+#ifdef CONFIG_SMP
-+int __attribute__((weak))
-+smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
-+{
-+ return 0;
-+}
-+#endif
-
- void die_nmi (struct pt_regs *regs, const char *msg)
- {
-+ static DEFINE_SPINLOCK(nmi_print_lock);
-+
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
-- NOTIFY_STOP)
-+ NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
-@@ -615,6 +654,11 @@ void die_nmi (struct pt_regs *regs, cons
- printk(" on CPU%d, eip %08lx, registers:\n",
- smp_processor_id(), regs->eip);
- show_registers(regs);
-+ smp_nmi_call_function(smp_show_regs, NULL, 1);
-+ bust_spinlocks(1);
-+ /* current CPU messages should go bottom */
-+ if (!decode_call_traces)
-+ smp_show_regs(regs, NULL);
- printk(KERN_EMERG "console shuts up ...\n");
- console_silent();
- spin_unlock(&nmi_print_lock);
-@@ -631,6 +675,14 @@ void die_nmi (struct pt_regs *regs, cons
- do_exit(SIGSEGV);
- }
-
-+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-+{
-+ return 0;
-+}
-+
-+static nmi_callback_t nmi_callback = dummy_nmi_callback;
-+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
-+
- static void default_do_nmi(struct pt_regs * regs)
- {
- unsigned char reason = 0;
-@@ -653,6 +705,9 @@ static void default_do_nmi(struct pt_reg
- return;
- }
- #endif
-+ if (nmi_ipi_callback != dummy_nmi_callback)
-+ return;
-+
- unknown_nmi_error(reason, regs);
- return;
- }
-@@ -669,13 +724,6 @@ static void default_do_nmi(struct pt_reg
- reassert_nmi();
- }
-
--static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
--{
-- return 0;
--}
--
--static nmi_callback_t nmi_callback = dummy_nmi_callback;
--
- fastcall void do_nmi(struct pt_regs * regs, long error_code)
- {
- int cpu;
-@@ -689,9 +737,20 @@ fastcall void do_nmi(struct pt_regs * re
- if (!rcu_dereference(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
-
-+ nmi_ipi_callback(regs, cpu);
- nmi_exit();
- }
-
-+void set_nmi_ipi_callback(nmi_callback_t callback)
-+{
-+ nmi_ipi_callback = callback;
-+}
-+
-+void unset_nmi_ipi_callback(void)
-+{
-+ nmi_ipi_callback = dummy_nmi_callback;
-+}
-+
- void set_nmi_callback(nmi_callback_t callback)
- {
- rcu_assign_pointer(nmi_callback, callback);
-diff -upr linux-2.6.16.orig/arch/i386/mm/fault.c linux-2.6.16-026test009/arch/i386/mm/fault.c
---- linux-2.6.16.orig/arch/i386/mm/fault.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/mm/fault.c 2006-04-19 15:02:12.000000000 +0400
-@@ -31,32 +31,6 @@
- extern void die(const char *,struct pt_regs *,long);
-
- /*
-- * Unlock any spinlocks which will prevent us from getting the
-- * message out
-- */
--void bust_spinlocks(int yes)
--{
-- int loglevel_save = console_loglevel;
--
-- if (yes) {
-- oops_in_progress = 1;
-- return;
-- }
--#ifdef CONFIG_VT
-- unblank_screen();
--#endif
-- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk()
-- * without oops_in_progress set so that printk will give klogd
-- * a poke. Hold onto your hats...
-- */
-- console_loglevel = 15; /* NMI oopser may have shut the console up */
-- printk(" ");
-- console_loglevel = loglevel_save;
--}
--
--/*
- * Return EIP plus the CS segment base. The segment limit is also
- * adjusted, clamped to the kernel/user address space (whichever is
- * appropriate), and returned in *eip_limit.
-@@ -347,7 +321,6 @@ good_area:
- goto bad_area;
- }
-
-- survive:
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
-@@ -485,14 +458,14 @@ no_context:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (tsk->pid == 1) {
-- yield();
-- down_read(&mm->mmap_sem);
-- goto survive;
-+ if (error_code & 4) {
-+ /*
-+ * 0-order allocation always success if something really
-+ * fatal not happen: beancounter overdraft or OOM.
-+ */
-+ force_sig(SIGKILL, tsk);
-+ return;
- }
-- printk("VM: killing process %s\n", tsk->comm);
-- if (error_code & 4)
-- do_exit(SIGKILL);
- goto no_context;
-
- do_sigbus:
-diff -upr linux-2.6.16.orig/arch/i386/mm/init.c linux-2.6.16-026test009/arch/i386/mm/init.c
---- linux-2.6.16.orig/arch/i386/mm/init.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/mm/init.c 2006-04-19 15:02:11.000000000 +0400
-@@ -677,7 +677,7 @@ void __init pgtable_cache_init(void)
- pmd_cache = kmem_cache_create("pmd",
- PTRS_PER_PMD*sizeof(pmd_t),
- PTRS_PER_PMD*sizeof(pmd_t),
-- 0,
-+ SLAB_UBC,
- pmd_ctor,
- NULL);
- if (!pmd_cache)
-@@ -686,7 +686,7 @@ void __init pgtable_cache_init(void)
- pgd_cache = kmem_cache_create("pgd",
- PTRS_PER_PGD*sizeof(pgd_t),
- PTRS_PER_PGD*sizeof(pgd_t),
-- 0,
-+ SLAB_UBC,
- pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
- if (!pgd_cache)
-diff -upr linux-2.6.16.orig/arch/i386/mm/pgtable.c linux-2.6.16-026test009/arch/i386/mm/pgtable.c
---- linux-2.6.16.orig/arch/i386/mm/pgtable.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/i386/mm/pgtable.c 2006-04-19 15:02:12.000000000 +0400
-@@ -5,8 +5,10 @@
- #include <linux/config.h>
- #include <linux/sched.h>
- #include <linux/kernel.h>
-+#include <linux/module.h>
- #include <linux/errno.h>
- #include <linux/mm.h>
-+#include <linux/vmalloc.h>
- #include <linux/swap.h>
- #include <linux/smp.h>
- #include <linux/highmem.h>
-@@ -64,7 +66,9 @@ void show_mem(void)
- printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
- printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
- printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
-+ vprintstat();
- }
-+EXPORT_SYMBOL(show_mem);
-
- /*
- * Associate a virtual page frame with a given physical page frame
-@@ -159,9 +163,11 @@ struct page *pte_alloc_one(struct mm_str
- struct page *pte;
-
- #ifdef CONFIG_HIGHPTE
-- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-+ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_HIGHMEM|
-+ __GFP_REPEAT|__GFP_ZERO, 0);
- #else
-- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-+ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|
-+ __GFP_REPEAT|__GFP_ZERO, 0);
- #endif
- return pte;
- }
-diff -upr linux-2.6.16.orig/arch/ia64/Kconfig linux-2.6.16-026test009/arch/ia64/Kconfig
---- linux-2.6.16.orig/arch/ia64/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -464,6 +464,10 @@ endmenu
-
- source "arch/ia64/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-+
-+source "kernel/ub/Kconfig"
-diff -upr linux-2.6.16.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16-026test009/arch/ia64/ia32/binfmt_elf32.c
---- linux-2.6.16.orig/arch/ia64/ia32/binfmt_elf32.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/ia32/binfmt_elf32.c 2006-04-19 15:02:11.000000000 +0400
-@@ -136,6 +136,12 @@ ia64_elf32_init (struct pt_regs *regs)
- up_write(&current->mm->mmap_sem);
- }
-
-+ if (ub_memory_charge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
-+ IA32_LDT_ENTRY_SIZE),
-+ VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE,
-+ NULL, UB_SOFT))
-+ goto skip;
-+
- /*
- * Install LDT as anonymous memory. This gives us all-zero segment descriptors
- * until a task modifies them via modify_ldt().
-@@ -157,7 +163,12 @@ ia64_elf32_init (struct pt_regs *regs)
- }
- }
- up_write(&current->mm->mmap_sem);
-- }
-+ } else
-+ ub_memory_uncharge(current->mm, PAGE_ALIGN(IA32_LDT_ENTRIES *
-+ IA32_LDT_ENTRY_SIZE),
-+ VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE, NULL);
-+
-+skip:
-
- ia64_psr(regs)->ac = 0; /* turn off alignment checking */
- regs->loadrs = 0;
-@@ -212,9 +223,15 @@ ia32_setup_arg_pages (struct linux_binpr
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
-+ ret = -ENOMEM;
-+ if (ub_memory_charge(mm, IA32_STACK_TOP -
-+ (PAGE_MASK & (unsigned long)bprm->p),
-+ VM_STACK_FLAGS, NULL, UB_SOFT))
-+ goto err_charge;
-+
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
-- return -ENOMEM;
-+ goto err_alloc;
-
- memset(mpnt, 0, sizeof(*mpnt));
-
-@@ -231,11 +248,8 @@ ia32_setup_arg_pages (struct linux_binpr
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
- PAGE_COPY_EXEC: PAGE_COPY;
-- if ((ret = insert_vm_struct(current->mm, mpnt))) {
-- up_write(&current->mm->mmap_sem);
-- kmem_cache_free(vm_area_cachep, mpnt);
-- return ret;
-- }
-+ if ((ret = insert_vm_struct(current->mm, mpnt)))
-+ goto err_insert;
- current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
- }
-
-@@ -254,6 +268,16 @@ ia32_setup_arg_pages (struct linux_binpr
- current->thread.ppl = ia32_init_pp_list();
-
- return 0;
-+
-+err_insert:
-+ up_write(&current->mm->mmap_sem);
-+ kmem_cache_free(vm_area_cachep, mpnt);
-+err_alloc:
-+ ub_memory_uncharge(mm, IA32_STACK_TOP -
-+ (PAGE_MASK & (unsigned long)bprm->p),
-+ VM_STACK_FLAGS, NULL);
-+err_charge:
-+ return ret;
- }
-
- static void
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/asm-offsets.c linux-2.6.16-026test009/arch/ia64/kernel/asm-offsets.c
---- linux-2.6.16.orig/arch/ia64/kernel/asm-offsets.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/asm-offsets.c 2006-04-19 15:02:12.000000000 +0400
-@@ -44,11 +44,21 @@ void foo(void)
- DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
- DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
- DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
-+#ifdef CONFIG_VE
-+ DEFINE(IA64_TASK_PID_OFFSET, offsetof
-+ (struct task_struct, pids[PIDTYPE_PID].vnr));
-+#else
- DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
-+#endif
- DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
- DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
- DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
-+#ifdef CONFIG_VE
-+ DEFINE(IA64_TASK_TGID_OFFSET, offsetof
-+ (struct task_struct, pids[PIDTYPE_TGID].vnr));
-+#else
- DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
-+#endif
- DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
- DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
-
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/entry.S linux-2.6.16-026test009/arch/ia64/kernel/entry.S
---- linux-2.6.16.orig/arch/ia64/kernel/entry.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/entry.S 2006-04-19 15:02:11.000000000 +0400
-@@ -1620,4 +1620,12 @@ sys_call_table:
- data8 sys_ni_syscall // 1295 reserved for ppoll
- data8 sys_unshare
-
-+.rept 1505-1297
-+ data8 sys_ni_syscall
-+.endr
-+ data8 sys_getluid // 1505
-+ data8 sys_setluid
-+ data8 sys_setublimit
-+ data8 sys_ubstat
-+
- .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/fsys.S linux-2.6.16-026test009/arch/ia64/kernel/fsys.S
---- linux-2.6.16.orig/arch/ia64/kernel/fsys.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/fsys.S 2006-04-19 15:02:12.000000000 +0400
-@@ -72,6 +72,7 @@ ENTRY(fsys_getpid)
- FSYS_RETURN
- END(fsys_getpid)
-
-+#ifndef CONFIG_VE
- ENTRY(fsys_getppid)
- .prologue
- .altrp b6
-@@ -118,6 +119,7 @@ ENTRY(fsys_getppid)
- #endif
- FSYS_RETURN
- END(fsys_getppid)
-+#endif
-
- ENTRY(fsys_set_tid_address)
- .prologue
-@@ -665,7 +667,11 @@ fsyscall_table:
- data8 0 // chown
- data8 0 // lseek // 1040
- data8 fsys_getpid // getpid
-+#ifdef CONFIG_VE
-+ data8 0
-+#else
- data8 fsys_getppid // getppid
-+#endif
- data8 0 // mount
- data8 0 // umount
- data8 0 // setuid // 1045
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/irq.c linux-2.6.16-026test009/arch/ia64/kernel/irq.c
---- linux-2.6.16.orig/arch/ia64/kernel/irq.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/irq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -163,7 +163,9 @@ void fixup_irqs(void)
- {
- unsigned int irq;
- extern void ia64_process_pending_intr(void);
-+ struct ve_struct *ve;
-
-+ ve = set_exec_env(get_ve0());
- ia64_set_itv(1<<16);
- /*
- * Phase 1: Locate irq's bound to this cpu and
-@@ -197,5 +199,6 @@ void fixup_irqs(void)
- */
- max_xtp();
- local_irq_disable();
-+ (void)set_exec_env(ve);
- }
- #endif
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/irq_ia64.c linux-2.6.16-026test009/arch/ia64/kernel/irq_ia64.c
---- linux-2.6.16.orig/arch/ia64/kernel/irq_ia64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/irq_ia64.c 2006-04-19 15:02:12.000000000 +0400
-@@ -103,6 +103,7 @@ void
- ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
- {
- unsigned long saved_tpr;
-+ struct ve_struct *ve;
-
- #if IRQ_DEBUG
- {
-@@ -139,6 +140,7 @@ ia64_handle_irq (ia64_vector vector, str
- * 16 (without this, it would be ~240, which could easily lead
- * to kernel stack overflows).
- */
-+ ve = set_exec_env(get_ve0());
- irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
-@@ -164,6 +166,7 @@ ia64_handle_irq (ia64_vector vector, str
- * come through until ia64_eoi() has been done.
- */
- irq_exit();
-+ (void)set_exec_env(get_ve0());
- }
-
- #ifdef CONFIG_HOTPLUG_CPU
-@@ -176,9 +179,11 @@ void ia64_process_pending_intr(void)
- ia64_vector vector;
- unsigned long saved_tpr;
- extern unsigned int vectors_in_migration[NR_IRQS];
-+ struct ve_struct *ve;
-
- vector = ia64_get_ivr();
-
-+ ve = set_exec_env(get_ve0());
- irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
-@@ -210,6 +215,7 @@ void ia64_process_pending_intr(void)
- vector = ia64_get_ivr();
- }
- irq_exit();
-+ (void)set_exec_env(ve);
- }
- #endif
-
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/mca.c linux-2.6.16-026test009/arch/ia64/kernel/mca.c
---- linux-2.6.16.orig/arch/ia64/kernel/mca.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/mca.c 2006-04-19 15:02:12.000000000 +0400
-@@ -1241,10 +1241,10 @@ default_monarch_init_process(struct noti
- }
- printk("\n\n");
- if (read_trylock(&tasklist_lock)) {
-- do_each_thread (g, t) {
-+ do_each_thread_all (g, t) {
- printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
- show_stack(t, NULL);
-- } while_each_thread (g, t);
-+ } while_each_thread_all (g, t);
- read_unlock(&tasklist_lock);
- }
- return NOTIFY_DONE;
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/perfmon.c linux-2.6.16-026test009/arch/ia64/kernel/perfmon.c
---- linux-2.6.16.orig/arch/ia64/kernel/perfmon.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/perfmon.c 2006-04-19 15:02:12.000000000 +0400
-@@ -2624,7 +2624,7 @@ pfm_get_task(pfm_context_t *ctx, pid_t p
-
- read_lock(&tasklist_lock);
-
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
-
- /* make sure task cannot go away while we operate on it */
- if (p) get_task_struct(p);
-@@ -4188,12 +4188,12 @@ pfm_check_task_exist(pfm_context_t *ctx)
-
- read_lock(&tasklist_lock);
-
-- do_each_thread (g, t) {
-+ do_each_thread_ve (g, t) {
- if (t->thread.pfm_context == ctx) {
- ret = 0;
- break;
- }
-- } while_each_thread (g, t);
-+ } while_each_thread_ve (g, t);
-
- read_unlock(&tasklist_lock);
-
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/process.c linux-2.6.16-026test009/arch/ia64/kernel/process.c
---- linux-2.6.16.orig/arch/ia64/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -681,6 +681,13 @@ kernel_thread (int (*fn)(void *), void *
- struct pt_regs pt;
- } regs;
-
-+ /* Don't allow kernel_thread() inside VE */
-+ if (!ve_is_super(get_exec_env())) {
-+ printk("kernel_thread call inside VE\n");
-+ dump_stack();
-+ return -EPERM;
-+ }
-+
- memset(&regs, 0, sizeof(regs));
- regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
- regs.pt.r1 = helper_fptr[1]; /* set GP */
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/ptrace.c linux-2.6.16-026test009/arch/ia64/kernel/ptrace.c
---- linux-2.6.16.orig/arch/ia64/kernel/ptrace.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/ptrace.c 2006-04-19 15:02:12.000000000 +0400
-@@ -1433,7 +1433,7 @@ sys_ptrace (long request, pid_t pid, uns
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- {
-- child = find_task_by_pid(pid);
-+ child = find_task_by_pid_ve(pid);
- if (child) {
- if (peek_or_poke)
- child = find_thread_for_addr(child, addr);
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/signal.c linux-2.6.16-026test009/arch/ia64/kernel/signal.c
---- linux-2.6.16.orig/arch/ia64/kernel/signal.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/signal.c 2006-04-19 15:02:12.000000000 +0400
-@@ -270,7 +270,7 @@ ia64_rt_sigreturn (struct sigscratch *sc
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = SI_KERNEL;
-- si.si_pid = current->pid;
-+ si.si_pid = virt_pid(current);
- si.si_uid = current->uid;
- si.si_addr = sc;
- force_sig_info(SIGSEGV, &si, current);
-@@ -375,7 +375,7 @@ force_sigsegv_info (int sig, void __user
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = SI_KERNEL;
-- si.si_pid = current->pid;
-+ si.si_pid = virt_pid(current);
- si.si_uid = current->uid;
- si.si_addr = addr;
- force_sig_info(SIGSEGV, &si, current);
-@@ -641,7 +641,7 @@ set_sigdelayed(pid_t pid, int signo, int
- for (i = 1; i <= 3; ++i) {
- switch (i) {
- case 1:
-- t = find_task_by_pid(pid);
-+ t = find_task_by_pid_ve(pid);
- if (t)
- start_time = start_time_ul(t);
- break;
-@@ -682,7 +682,7 @@ do_sigdelayed(void)
- siginfo.si_code = current_thread_info()->sigdelayed.code;
- siginfo.si_addr = current_thread_info()->sigdelayed.addr;
- pid = current_thread_info()->sigdelayed.pid;
-- t = find_task_by_pid(pid);
-+ t = find_task_by_pid_ve(pid);
- if (!t)
- return;
- if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/traps.c linux-2.6.16-026test009/arch/ia64/kernel/traps.c
---- linux-2.6.16.orig/arch/ia64/kernel/traps.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/traps.c 2006-04-19 15:02:11.000000000 +0400
-@@ -54,34 +54,6 @@ trap_init (void)
- fpswa_interface = __va(ia64_boot_param->fpswa);
- }
-
--/*
-- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
-- * is acquired through the console unblank code)
-- */
--void
--bust_spinlocks (int yes)
--{
-- int loglevel_save = console_loglevel;
--
-- if (yes) {
-- oops_in_progress = 1;
-- return;
-- }
--
--#ifdef CONFIG_VT
-- unblank_screen();
--#endif
-- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk() without
-- * oops_in_progress set so that printk will give klogd a poke. Hold onto
-- * your hats...
-- */
-- console_loglevel = 15; /* NMI oopser may have shut the console up */
-- printk(" ");
-- console_loglevel = loglevel_save;
--}
--
- void
- die (const char *str, struct pt_regs *regs, long err)
- {
-diff -upr linux-2.6.16.orig/arch/ia64/kernel/unaligned.c linux-2.6.16-026test009/arch/ia64/kernel/unaligned.c
---- linux-2.6.16.orig/arch/ia64/kernel/unaligned.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/kernel/unaligned.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1290,7 +1290,7 @@ within_logging_rate_limit (void)
- {
- static unsigned long count, last_time;
-
-- if (jiffies - last_time > 5*HZ)
-+ if (jiffies - last_time > 60 * HZ)
- count = 0;
- if (count < 5) {
- last_time = jiffies;
-diff -upr linux-2.6.16.orig/arch/ia64/mm/contig.c linux-2.6.16-026test009/arch/ia64/mm/contig.c
---- linux-2.6.16.orig/arch/ia64/mm/contig.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/mm/contig.c 2006-04-19 15:02:12.000000000 +0400
-@@ -64,6 +64,7 @@ show_mem (void)
- printk("%ld pages in page table cache\n",
- pgtable_quicklist_total_size());
- }
-+EXPORT_SYMBOL(show_mem);
-
- /* physical address where the bootmem map is located */
- unsigned long bootmap_start;
-diff -upr linux-2.6.16.orig/arch/ia64/mm/discontig.c linux-2.6.16-026test009/arch/ia64/mm/discontig.c
---- linux-2.6.16.orig/arch/ia64/mm/discontig.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/mm/discontig.c 2006-04-19 15:02:12.000000000 +0400
-@@ -594,6 +594,7 @@ void show_mem(void)
- pgtable_quicklist_total_size());
- printk("%d free buffer pages\n", nr_free_buffer_pages());
- }
-+EXPORT_SYMBOL(show_mem);
-
- /**
- * call_pernode_memory - use SRAT to call callback functions with node info
-diff -upr linux-2.6.16.orig/arch/ia64/mm/fault.c linux-2.6.16-026test009/arch/ia64/mm/fault.c
---- linux-2.6.16.orig/arch/ia64/mm/fault.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/mm/fault.c 2006-04-19 15:02:12.000000000 +0400
-@@ -116,7 +116,6 @@ ia64_do_page_fault (unsigned long addres
- if ((vma->vm_flags & mask) != mask)
- goto bad_area;
-
-- survive:
- /*
- * If for any reason at all we couldn't handle the fault, make
- * sure we exit gracefully rather than endlessly redo the
-@@ -241,13 +240,13 @@ ia64_do_page_fault (unsigned long addres
-
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (current->pid == 1) {
-- yield();
-- down_read(&mm->mmap_sem);
-- goto survive;
-- }
-- printk(KERN_CRIT "VM: killing process %s\n", current->comm);
-- if (user_mode(regs))
-- do_exit(SIGKILL);
-+ if (user_mode(regs)) {
-+ /*
-+ * 0-order allocation always success if something really
-+ * fatal not happen: beancounter overdraft or OOM.
-+ */
-+ force_sig(SIGKILL, current);
-+ return;
-+ }
- goto no_context;
- }
-diff -upr linux-2.6.16.orig/arch/ia64/mm/init.c linux-2.6.16-026test009/arch/ia64/mm/init.c
---- linux-2.6.16.orig/arch/ia64/mm/init.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ia64/mm/init.c 2006-04-19 15:02:11.000000000 +0400
-@@ -37,6 +37,8 @@
- #include <asm/unistd.h>
- #include <asm/mca.h>
-
-+#include <ub/ub_vmpages.h>
-+
- DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
- DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
-@@ -96,7 +98,7 @@ check_pgt_cache(void)
- preempt_disable();
- while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
- while (pages_to_free--) {
-- free_page((unsigned long)pgtable_quicklist_alloc());
-+ free_page((unsigned long)pgtable_quicklist_alloc(0));
- }
- preempt_enable();
- preempt_disable();
-@@ -146,6 +148,10 @@ ia64_init_addr_space (void)
-
- ia64_set_rbs_bot();
-
-+ if (ub_memory_charge(current->mm, PAGE_SIZE, VM_DATA_DEFAULT_FLAGS,
-+ NULL, UB_SOFT))
-+ goto skip;
-+
- /*
- * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
- * the problem. When the process attempts to write to the register backing store
-@@ -166,8 +172,11 @@ ia64_init_addr_space (void)
- return;
- }
- up_write(&current->mm->mmap_sem);
-- }
-+ } else
-+ ub_memory_uncharge(current->mm, PAGE_SIZE,
-+ VM_DATA_DEFAULT_FLAGS, NULL);
-
-+skip:
- /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
- if (!(current->personality & MMAP_PAGE_ZERO)) {
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-diff -upr linux-2.6.16.orig/arch/m32r/kernel/m32r_ksyms.c linux-2.6.16-026test009/arch/m32r/kernel/m32r_ksyms.c
---- linux-2.6.16.orig/arch/m32r/kernel/m32r_ksyms.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/m32r/kernel/m32r_ksyms.c 2006-04-19 15:02:11.000000000 +0400
-@@ -38,10 +38,6 @@ EXPORT_SYMBOL(__udelay);
- EXPORT_SYMBOL(__delay);
- EXPORT_SYMBOL(__const_udelay);
-
--EXPORT_SYMBOL(__get_user_1);
--EXPORT_SYMBOL(__get_user_2);
--EXPORT_SYMBOL(__get_user_4);
--
- EXPORT_SYMBOL(strpbrk);
- EXPORT_SYMBOL(strstr);
-
-diff -upr linux-2.6.16.orig/arch/m32r/kernel/setup.c linux-2.6.16-026test009/arch/m32r/kernel/setup.c
---- linux-2.6.16.orig/arch/m32r/kernel/setup.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/m32r/kernel/setup.c 2006-04-19 15:02:11.000000000 +0400
-@@ -9,6 +9,7 @@
-
- #include <linux/config.h>
- #include <linux/init.h>
-+#include <linux/kernel.h>
- #include <linux/stddef.h>
- #include <linux/fs.h>
- #include <linux/sched.h>
-@@ -218,8 +219,6 @@ static unsigned long __init setup_memory
- extern unsigned long setup_memory(void);
- #endif /* CONFIG_DISCONTIGMEM */
-
--#define M32R_PCC_PCATCR 0x00ef7014 /* will move to m32r.h */
--
- void __init setup_arch(char **cmdline_p)
- {
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
-@@ -268,15 +267,14 @@ void __init setup_arch(char **cmdline_p)
- paging_init();
- }
-
--static struct cpu cpu[NR_CPUS];
-+static struct cpu cpu_devices[NR_CPUS];
-
- static int __init topology_init(void)
- {
-- int cpu_id;
-+ int i;
-
-- for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++)
-- if (cpu_possible(cpu_id))
-- register_cpu(&cpu[cpu_id], cpu_id, NULL);
-+ for_each_present_cpu(i)
-+ register_cpu(&cpu_devices[i], i, NULL);
-
- return 0;
- }
-diff -upr linux-2.6.16.orig/arch/m32r/kernel/smpboot.c linux-2.6.16-026test009/arch/m32r/kernel/smpboot.c
---- linux-2.6.16.orig/arch/m32r/kernel/smpboot.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/m32r/kernel/smpboot.c 2006-04-19 15:02:11.000000000 +0400
-@@ -39,8 +39,10 @@
- * Martin J. Bligh : Added support for multi-quad systems
- */
-
-+#include <linux/module.h>
- #include <linux/config.h>
- #include <linux/init.h>
-+#include <linux/kernel.h>
- #include <linux/mm.h>
- #include <linux/smp_lock.h>
- #include <linux/irq.h>
-@@ -72,11 +74,15 @@ physid_mask_t phys_cpu_present_map;
-
- /* Bitmask of currently online CPUs */
- cpumask_t cpu_online_map;
-+EXPORT_SYMBOL(cpu_online_map);
-
- cpumask_t cpu_bootout_map;
- cpumask_t cpu_bootin_map;
--cpumask_t cpu_callout_map;
- static cpumask_t cpu_callin_map;
-+cpumask_t cpu_callout_map;
-+EXPORT_SYMBOL(cpu_callout_map);
-+cpumask_t cpu_possible_map = CPU_MASK_ALL;
-+EXPORT_SYMBOL(cpu_possible_map);
-
- /* Per CPU bogomips and other parameters */
- struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned;
-@@ -110,7 +116,6 @@ static unsigned int calibration_result;
-
- void smp_prepare_boot_cpu(void);
- void smp_prepare_cpus(unsigned int);
--static void smp_tune_scheduling(void);
- static void init_ipi_lock(void);
- static void do_boot_cpu(int);
- int __cpu_up(unsigned int);
-@@ -177,6 +182,9 @@ void __init smp_prepare_cpus(unsigned in
- }
- for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++)
- physid_set(phys_id, phys_cpu_present_map);
-+#ifndef CONFIG_HOTPLUG_CPU
-+ cpu_present_map = cpu_possible_map;
-+#endif
-
- show_mp_info(nr_cpu);
-
-@@ -186,7 +194,6 @@ void __init smp_prepare_cpus(unsigned in
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
-- smp_tune_scheduling();
-
- /*
- * If SMP should be disabled, then really disable it!
-@@ -230,11 +237,6 @@ smp_done:
- Dprintk("Boot done.\n");
- }
-
--static void __init smp_tune_scheduling(void)
--{
-- /* Nothing to do. */
--}
--
- /*
- * init_ipi_lock : Initialize IPI locks.
- */
-@@ -629,4 +631,3 @@ static void __init unmap_cpu_to_physid(i
- physid_2_cpu[phys_id] = -1;
- cpu_2_physid[cpu_id] = -1;
- }
--
-diff -upr linux-2.6.16.orig/arch/m32r/lib/Makefile linux-2.6.16-026test009/arch/m32r/lib/Makefile
---- linux-2.6.16.orig/arch/m32r/lib/Makefile 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/m32r/lib/Makefile 2006-04-19 15:02:11.000000000 +0400
-@@ -2,6 +2,6 @@
- # Makefile for M32R-specific library files..
- #
-
--lib-y := checksum.o ashxdi3.o memset.o memcpy.o getuser.o \
-- putuser.o delay.o strlen.o usercopy.o csum_partial_copy.o
-+lib-y := checksum.o ashxdi3.o memset.o memcpy.o \
-+ delay.o strlen.o usercopy.o csum_partial_copy.o
-
-diff -upr linux-2.6.16.orig/arch/mips/kernel/irixelf.c linux-2.6.16-026test009/arch/mips/kernel/irixelf.c
---- linux-2.6.16.orig/arch/mips/kernel/irixelf.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/mips/kernel/irixelf.c 2006-04-19 15:02:11.000000000 +0400
-@@ -432,7 +432,7 @@ static inline int look_for_irix_interpre
- if (retval < 0)
- goto out;
-
-- file = open_exec(*name);
-+ file = open_exec(*name, bprm);
- if (IS_ERR(file)) {
- retval = PTR_ERR(file);
- goto out;
-diff -upr linux-2.6.16.orig/arch/mips/kernel/sysirix.c linux-2.6.16-026test009/arch/mips/kernel/sysirix.c
---- linux-2.6.16.orig/arch/mips/kernel/sysirix.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/mips/kernel/sysirix.c 2006-04-19 15:02:12.000000000 +0400
-@@ -110,7 +110,7 @@ asmlinkage int irix_prctl(unsigned optio
- printk("irix_prctl[%s:%d]: Wants PR_ISBLOCKED\n",
- current->comm, current->pid);
- read_lock(&tasklist_lock);
-- task = find_task_by_pid(va_arg(args, pid_t));
-+ task = find_task_by_pid_ve(va_arg(args, pid_t));
- error = -ESRCH;
- if (error)
- error = (task->run_list.next != NULL);
-diff -upr linux-2.6.16.orig/arch/powerpc/Kconfig linux-2.6.16-026test009/arch/powerpc/Kconfig
---- linux-2.6.16.orig/arch/powerpc/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -956,6 +956,8 @@ source "arch/powerpc/platforms/iseries/K
-
- source "lib/Kconfig"
-
-+source "kernel/ub/Kconfig"
-+
- menu "Instrumentation Support"
- depends on EXPERIMENTAL
-
-@@ -974,6 +976,8 @@ endmenu
-
- source "arch/powerpc/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
- config KEYS_COMPAT
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/irq.c linux-2.6.16-026test009/arch/powerpc/kernel/irq.c
---- linux-2.6.16.orig/arch/powerpc/kernel/irq.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/irq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -50,6 +50,8 @@
- #include <linux/profile.h>
- #include <linux/bitops.h>
-
-+#include <ub/beancounter.h>
-+
- #include <asm/uaccess.h>
- #include <asm/system.h>
- #include <asm/io.h>
-@@ -189,7 +191,11 @@ void do_IRQ(struct pt_regs *regs)
- #ifdef CONFIG_IRQSTACKS
- struct thread_info *curtp, *irqtp;
- #endif
-+ struct ve_struct *ve;
-+ struct user_beancounter *ub;
-
-+ ve = set_exec_env(get_ve0());
-+ ub = set_exec_ub(get_ub0());
- irq_enter();
-
- #ifdef CONFIG_DEBUG_STACKOVERFLOW
-@@ -236,6 +242,8 @@ void do_IRQ(struct pt_regs *regs)
- ppc_spurious_interrupts++;
-
- irq_exit();
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(ve);
-
- #ifdef CONFIG_PPC_ISERIES
- if (get_lppaca()->int_dword.fields.decr_int) {
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/misc_32.S linux-2.6.16-026test009/arch/powerpc/kernel/misc_32.S
---- linux-2.6.16.orig/arch/powerpc/kernel/misc_32.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/misc_32.S 2006-04-19 15:02:12.000000000 +0400
-@@ -973,7 +973,7 @@ _GLOBAL(_get_SP)
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
--_GLOBAL(kernel_thread)
-+_GLOBAL(ppc_kernel_thread)
- stwu r1,-16(r1)
- stw r30,8(r1)
- stw r31,12(r1)
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/misc_64.S linux-2.6.16-026test009/arch/powerpc/kernel/misc_64.S
---- linux-2.6.16.orig/arch/powerpc/kernel/misc_64.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/misc_64.S 2006-04-19 15:02:12.000000000 +0400
-@@ -677,7 +677,7 @@ _GLOBAL(scom970_write)
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
--_GLOBAL(kernel_thread)
-+_GLOBAL(ppc_kernel_thread)
- std r29,-24(r1)
- std r30,-16(r1)
- stdu r1,-STACK_FRAME_OVERHEAD(r1)
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/pci_64.c linux-2.6.16-026test009/arch/powerpc/kernel/pci_64.c
---- linux-2.6.16.orig/arch/powerpc/kernel/pci_64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/pci_64.c 2006-04-19 15:02:11.000000000 +0400
-@@ -78,6 +78,7 @@ int global_phb_number; /* Global phb co
-
- /* Cached ISA bridge dev. */
- struct pci_dev *ppc64_isabridge_dev = NULL;
-+EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
-
- static void fixup_broken_pcnet32(struct pci_dev* dev)
- {
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/process.c linux-2.6.16-026test009/arch/powerpc/kernel/process.c
---- linux-2.6.16.orig/arch/powerpc/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -889,6 +889,20 @@ void dump_stack(void)
- }
- EXPORT_SYMBOL(dump_stack);
-
-+long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-+{
-+ extern long ppc_kernel_thread(int (*fn)(void *), void *arg,
-+ unsigned long flags);
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ printk("kernel_thread call inside VE\n");
-+ dump_stack();
-+ return -EPERM;
-+ }
-+
-+ return ppc_kernel_thread(fn, arg, flags);
-+}
-+
- #ifdef CONFIG_PPC64
- void ppc64_runlatch_on(void)
- {
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/setup_64.c linux-2.6.16-026test009/arch/powerpc/kernel/setup_64.c
---- linux-2.6.16.orig/arch/powerpc/kernel/setup_64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/setup_64.c 2006-04-19 15:02:11.000000000 +0400
-@@ -256,12 +256,10 @@ void __init early_setup(unsigned long dt
- /*
- * Initialize stab / SLB management except on iSeries
- */
-- if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
-- if (cpu_has_feature(CPU_FTR_SLB))
-- slb_initialize();
-- else
-- stab_initialize(lpaca->stab_real);
-- }
-+ if (cpu_has_feature(CPU_FTR_SLB))
-+ slb_initialize();
-+ else if (!firmware_has_feature(FW_FEATURE_ISERIES))
-+ stab_initialize(lpaca->stab_real);
-
- DBG(" <- early_setup()\n");
- }
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/signal_64.c linux-2.6.16-026test009/arch/powerpc/kernel/signal_64.c
---- linux-2.6.16.orig/arch/powerpc/kernel/signal_64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/signal_64.c 2006-04-19 15:02:11.000000000 +0400
-@@ -213,7 +213,7 @@ static inline void __user * get_sigframe
- /* Default to using normal stack */
- newsp = regs->gpr[1];
-
-- if (ka->sa.sa_flags & SA_ONSTACK) {
-+ if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
- if (! on_sig_stack(regs->gpr[1]))
- newsp = (current->sas_ss_sp + current->sas_ss_size);
- }
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/syscalls.c linux-2.6.16-026test009/arch/powerpc/kernel/syscalls.c
---- linux-2.6.16.orig/arch/powerpc/kernel/syscalls.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/syscalls.c 2006-04-19 15:02:12.000000000 +0400
-@@ -259,7 +259,7 @@ long ppc_newuname(struct new_utsname __u
- int err = 0;
-
- down_read(&uts_sem);
-- if (copy_to_user(name, &system_utsname, sizeof(*name)))
-+ if (copy_to_user(name, &ve_utsname, sizeof(*name)))
- err = -EFAULT;
- up_read(&uts_sem);
- if (!err)
-@@ -272,7 +272,7 @@ int sys_uname(struct old_utsname __user
- int err = 0;
-
- down_read(&uts_sem);
-- if (copy_to_user(name, &system_utsname, sizeof(*name)))
-+ if (copy_to_user(name, &ve_utsname, sizeof(*name)))
- err = -EFAULT;
- up_read(&uts_sem);
- if (!err)
-@@ -288,19 +288,19 @@ int sys_olduname(struct oldold_utsname _
- return -EFAULT;
-
- down_read(&uts_sem);
-- error = __copy_to_user(&name->sysname, &system_utsname.sysname,
-+ error = __copy_to_user(&name->sysname, &ve_utsname.sysname,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-- error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
-+ error |= __copy_to_user(&name->nodename, &ve_utsname.nodename,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-- error |= __copy_to_user(&name->release, &system_utsname.release,
-+ error |= __copy_to_user(&name->release, &ve_utsname.release,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->release + __OLD_UTS_LEN);
-- error |= __copy_to_user(&name->version, &system_utsname.version,
-+ error |= __copy_to_user(&name->version, &ve_utsname.version,
- __OLD_UTS_LEN);
- error |= __put_user(0, name->version + __OLD_UTS_LEN);
-- error |= __copy_to_user(&name->machine, &system_utsname.machine,
-+ error |= __copy_to_user(&name->machine, &ve_utsname.machine,
- __OLD_UTS_LEN);
- error |= override_machine(name->machine);
- up_read(&uts_sem);
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/systbl.S linux-2.6.16-026test009/arch/powerpc/kernel/systbl.S
---- linux-2.6.16.orig/arch/powerpc/kernel/systbl.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/systbl.S 2006-04-19 15:02:11.000000000 +0400
-@@ -322,3 +322,12 @@ SYSCALL(spu_create)
- COMPAT_SYS(pselect6)
- COMPAT_SYS(ppoll)
- SYSCALL(unshare)
-+
-+.rept 410 - (. - sys_call_table)/8
-+SYSX(sys_ni_syscall, sys_ni_syscall, sys_ni_syscall)
-+.endr
-+
-+SYSX(sys_getluid, sys_ni_syscall, sys_getluid)
-+SYSX(sys_setluid, sys_ni_syscall, sys_setluid)
-+SYSX(sys_setublimit, sys_ni_syscall, sys_setublimit)
-+SYSX(sys_ubstat, sys_ni_syscall, sys_ubstat)
-diff -upr linux-2.6.16.orig/arch/powerpc/kernel/time.c linux-2.6.16-026test009/arch/powerpc/kernel/time.c
---- linux-2.6.16.orig/arch/powerpc/kernel/time.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/kernel/time.c 2006-04-19 15:02:12.000000000 +0400
-@@ -431,12 +431,14 @@ void timer_interrupt(struct pt_regs * re
- int next_dec;
- int cpu = smp_processor_id();
- unsigned long ticks;
-+ struct ve_struct *ve;
-
- #ifdef CONFIG_PPC32
- if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
- #endif
-
-+ ve = set_exec_env(get_ve0());
- irq_enter();
-
- profile_tick(CPU_PROFILING, regs);
-@@ -496,6 +498,7 @@ void timer_interrupt(struct pt_regs * re
- #endif
-
- irq_exit();
-+ (void)set_exec_env(ve);
- }
-
- void wakeup_decrementer(void)
-diff -upr linux-2.6.16.orig/arch/powerpc/mm/fault.c linux-2.6.16-026test009/arch/powerpc/mm/fault.c
---- linux-2.6.16.orig/arch/powerpc/mm/fault.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/mm/fault.c 2006-04-19 15:02:12.000000000 +0400
-@@ -307,7 +307,6 @@ good_area:
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
-- survive:
- switch (handle_mm_fault(mm, vma, address, is_write)) {
-
- case VM_FAULT_MINOR:
-@@ -351,14 +350,12 @@ bad_area_nosemaphore:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (current->pid == 1) {
-- yield();
-- down_read(&mm->mmap_sem);
-- goto survive;
-- }
-- printk("VM: killing process %s\n", current->comm);
- if (user_mode(regs))
-- do_exit(SIGKILL);
-+ /*
-+ * 0-order allocation always success if something really
-+ * fatal not happen: beancounter overdraft or OOM. Den
-+ */
-+ force_sig(SIGKILL, current);
- return SIGKILL;
-
- do_sigbus:
-diff -upr linux-2.6.16.orig/arch/powerpc/mm/init_64.c linux-2.6.16-026test009/arch/powerpc/mm/init_64.c
---- linux-2.6.16.orig/arch/powerpc/mm/init_64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/mm/init_64.c 2006-04-19 15:02:11.000000000 +0400
-@@ -225,7 +225,8 @@ void pgtable_cache_init(void)
- pgtable_cache[i] = kmem_cache_create(name,
- size, size,
- SLAB_HWCACHE_ALIGN |
-- SLAB_MUST_HWCACHE_ALIGN,
-+ SLAB_MUST_HWCACHE_ALIGN |
-+ SLAB_UBC | SLAB_NO_CHARGE,
- zero_ctor,
- NULL);
- if (! pgtable_cache[i])
-diff -upr linux-2.6.16.orig/arch/powerpc/mm/mem.c linux-2.6.16-026test009/arch/powerpc/mm/mem.c
---- linux-2.6.16.orig/arch/powerpc/mm/mem.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/mm/mem.c 2006-04-19 15:02:12.000000000 +0400
-@@ -222,6 +222,7 @@ void show_mem(void)
- printk("%ld pages shared\n", shared);
- printk("%ld pages swap cached\n", cached);
- }
-+EXPORT_SYMBOL(show_mem);
-
- /*
- * Initialize the bootmem system and give it all the memory we
-diff -upr linux-2.6.16.orig/arch/powerpc/mm/pgtable_32.c linux-2.6.16-026test009/arch/powerpc/mm/pgtable_32.c
---- linux-2.6.16.orig/arch/powerpc/mm/pgtable_32.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/powerpc/mm/pgtable_32.c 2006-04-19 15:02:11.000000000 +0400
-@@ -85,7 +85,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- {
- pgd_t *ret;
-
-- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
-+ ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
-+ __GFP_ZERO, PGDIR_ORDER);
- return ret;
- }
-
-@@ -119,6 +120,7 @@ struct page *pte_alloc_one(struct mm_str
- #else
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
- #endif
-+ flags |= (__GFP_UBC | __GFP_SOFT_UBC);
-
- ptepage = alloc_pages(flags, 0);
- if (ptepage)
-diff -upr linux-2.6.16.orig/arch/ppc/Kconfig linux-2.6.16-026test009/arch/ppc/Kconfig
---- linux-2.6.16.orig/arch/ppc/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -1394,6 +1394,10 @@ source "arch/powerpc/oprofile/Kconfig"
-
- source "arch/ppc/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
-+source "kernel/ub/Kconfig"
-+
- source "crypto/Kconfig"
-diff -upr linux-2.6.16.orig/arch/ppc/kernel/misc.S linux-2.6.16-026test009/arch/ppc/kernel/misc.S
---- linux-2.6.16.orig/arch/ppc/kernel/misc.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/kernel/misc.S 2006-04-19 15:02:12.000000000 +0400
-@@ -1004,7 +1004,7 @@ _GLOBAL(_get_SP)
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
--_GLOBAL(kernel_thread)
-+_GLOBAL(ppc_kernel_thread)
- stwu r1,-16(r1)
- stw r30,8(r1)
- stw r31,12(r1)
-diff -upr linux-2.6.16.orig/arch/ppc/kernel/time.c linux-2.6.16-026test009/arch/ppc/kernel/time.c
---- linux-2.6.16.orig/arch/ppc/kernel/time.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/kernel/time.c 2006-04-19 15:02:12.000000000 +0400
-@@ -58,6 +58,8 @@
- #include <linux/init.h>
- #include <linux/profile.h>
-
-+#include <ub/beancounter.h>
-+
- #include <asm/io.h>
- #include <asm/nvram.h>
- #include <asm/cache.h>
-@@ -136,10 +138,14 @@ void timer_interrupt(struct pt_regs * re
- unsigned long cpu = smp_processor_id();
- unsigned jiffy_stamp = last_jiffy_stamp(cpu);
- extern void do_IRQ(struct pt_regs *);
-+ struct ve_struct *ve;
-+ struct user_beancounter *ub;
-
- if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
-
-+ ve = set_exec_env(get_ve0());
-+ ub = set_exec_ub(get_ub0());
- irq_enter();
-
- while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
-@@ -192,6 +198,8 @@ void timer_interrupt(struct pt_regs * re
- ppc_md.heartbeat();
-
- irq_exit();
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(ve);
- }
-
- /*
-diff -upr linux-2.6.16.orig/arch/ppc/mm/fault.c linux-2.6.16-026test009/arch/ppc/mm/fault.c
---- linux-2.6.16.orig/arch/ppc/mm/fault.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/mm/fault.c 2006-04-19 15:02:12.000000000 +0400
-@@ -247,7 +247,6 @@ good_area:
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
-- survive:
- switch (handle_mm_fault(mm, vma, address, is_write)) {
- case VM_FAULT_MINOR:
- current->min_flt++;
-@@ -290,14 +289,12 @@ bad_area:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (current->pid == 1) {
-- yield();
-- down_read(&mm->mmap_sem);
-- goto survive;
-- }
-- printk("VM: killing process %s\n", current->comm);
- if (user_mode(regs))
-- do_exit(SIGKILL);
-+ /*
-+ * 0-order allocation always success if something really
-+ * fatal not happen: beancounter overdraft or OOM. Den
-+ */
-+ force_sig(SIGKILL, current);
- return SIGKILL;
-
- do_sigbus:
-diff -upr linux-2.6.16.orig/arch/ppc/mm/init.c linux-2.6.16-026test009/arch/ppc/mm/init.c
---- linux-2.6.16.orig/arch/ppc/mm/init.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/mm/init.c 2006-04-19 15:02:12.000000000 +0400
-@@ -132,6 +132,7 @@ void show_mem(void)
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
- }
-+EXPORT_SYMBOL(show_mem);
-
- /* Free up now-unused memory */
- static void free_sec(unsigned long start, unsigned long end, const char *name)
-diff -upr linux-2.6.16.orig/arch/ppc/mm/pgtable.c linux-2.6.16-026test009/arch/ppc/mm/pgtable.c
---- linux-2.6.16.orig/arch/ppc/mm/pgtable.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/ppc/mm/pgtable.c 2006-04-19 15:02:11.000000000 +0400
-@@ -84,7 +84,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- {
- pgd_t *ret;
-
-- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
-+ ret = (pgd_t *)__get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC |
-+ __GFP_ZERO, PGDIR_ORDER);
- return ret;
- }
-
-@@ -118,6 +119,7 @@ struct page *pte_alloc_one(struct mm_str
- #else
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
- #endif
-+ flags |= (__GFP_UBC | __GFP_SOFT_UBC);
-
- ptepage = alloc_pages(flags, 0);
- if (ptepage)
-diff -upr linux-2.6.16.orig/arch/s390/Kconfig linux-2.6.16-026test009/arch/s390/Kconfig
---- linux-2.6.16.orig/arch/s390/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -472,8 +472,12 @@ source "arch/s390/oprofile/Kconfig"
-
- source "arch/s390/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "kernel/ub/Kconfig"
-diff -upr linux-2.6.16.orig/arch/s390/kernel/process.c linux-2.6.16-026test009/arch/s390/kernel/process.c
---- linux-2.6.16.orig/arch/s390/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -164,9 +164,10 @@ void show_regs(struct pt_regs *regs)
- struct task_struct *tsk = current;
-
- printk("CPU: %d %s\n", task_thread_info(tsk)->cpu, print_tainted());
-- printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-- current->comm, current->pid, (void *) tsk,
-- (void *) tsk->thread.ksp);
-+ printk("Process %s (pid: %d, veid: %d, task: %p, ksp: %p)\n",
-+ current->comm, current->pid,
-+ VEID(VE_TASK_INFO(current)->owner_env),
-+ (void *) tsk, (void *) tsk->thread.ksp);
-
- show_registers(regs);
- /* Show stack backtrace if pt_regs is from kernel mode */
-@@ -187,6 +188,13 @@ int kernel_thread(int (*fn)(void *), voi
- {
- struct pt_regs regs;
-
-+ if (!ve_is_super(get_exec_env())) {
-+ /* Don't allow kernel_thread() inside VE */
-+ printk("kernel_thread call inside VE\n");
-+ dump_stack();
-+ return -EPERM;
-+ }
-+
- memset(&regs, 0, sizeof(regs));
- regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT;
- regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
-diff -upr linux-2.6.16.orig/arch/s390/kernel/s390_ext.c linux-2.6.16-026test009/arch/s390/kernel/s390_ext.c
---- linux-2.6.16.orig/arch/s390/kernel/s390_ext.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/kernel/s390_ext.c 2006-04-19 15:02:12.000000000 +0400
-@@ -114,7 +114,9 @@ void do_extint(struct pt_regs *regs, uns
- {
- ext_int_info_t *p;
- int index;
-+ struct ve_struct *envid;
-
-+ envid = set_exec_env(get_ve0());
- irq_enter();
- asm volatile ("mc 0,0");
- if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-@@ -132,6 +134,7 @@ void do_extint(struct pt_regs *regs, uns
- }
- }
- irq_exit();
-+ (void)set_exec_env(envid);
- }
-
- EXPORT_SYMBOL(register_external_interrupt);
-diff -upr linux-2.6.16.orig/arch/s390/kernel/smp.c linux-2.6.16-026test009/arch/s390/kernel/smp.c
---- linux-2.6.16.orig/arch/s390/kernel/smp.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/kernel/smp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -526,6 +526,17 @@ int __devinit start_secondary(void *cpuv
- {
- /* Setup the cpu */
- cpu_init();
-+
-+#ifdef CONFIG_VE
-+ /* TSC reset. kill whatever might rely on old values */
-+ VE_TASK_INFO(current)->wakeup_stamp = 0;
-+ /*
-+ * Cosmetic: sleep_time won't be changed afterwards for the idle
-+ * thread; keep it 0 rather than -cycles.
-+ */
-+ VE_TASK_INFO(idle)->sleep_time = 0;
-+#endif
-+
- preempt_disable();
- /* init per CPU timer */
- init_cpu_timer();
-@@ -834,6 +845,11 @@ void __init smp_prepare_cpus(unsigned in
- for_each_cpu(cpu)
- if (cpu != smp_processor_id())
- smp_create_idle(cpu);
-+
-+#ifdef CONFIG_VE
-+ /* TSC reset. kill whatever might rely on old values */
-+ VE_TASK_INFO(current)->wakeup_stamp = 0;
-+#endif
- }
-
- void __devinit smp_prepare_boot_cpu(void)
-diff -upr linux-2.6.16.orig/arch/s390/kernel/syscalls.S linux-2.6.16-026test009/arch/s390/kernel/syscalls.S
---- linux-2.6.16.orig/arch/s390/kernel/syscalls.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/kernel/syscalls.S 2006-04-19 15:02:11.000000000 +0400
-@@ -312,3 +312,12 @@ SYSCALL(sys_faccessat,sys_faccessat,sys_
- SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper)
- SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper)
- SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper)
-+
-+.rept 410-(.-sys_call_table)/4
-+ NI_SYSCALL
-+.endr
-+
-+SYSCALL(sys_getluid, sys_getluid, sys_ni_syscall) /* 410 */
-+SYSCALL(sys_setluid, sys_setluid, sys_ni_syscall)
-+SYSCALL(sys_setublimit, sys_setublimit, sys_ni_syscall)
-+SYSCALL(sys_ubstat, sys_ubstat, sys_ni_syscall)
-diff -upr linux-2.6.16.orig/arch/s390/mm/fault.c linux-2.6.16-026test009/arch/s390/mm/fault.c
---- linux-2.6.16.orig/arch/s390/mm/fault.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/mm/fault.c 2006-04-19 15:02:11.000000000 +0400
-@@ -61,17 +61,9 @@ void bust_spinlocks(int yes)
- if (yes) {
- oops_in_progress = 1;
- } else {
-- int loglevel_save = console_loglevel;
- console_unblank();
- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk()
-- * without oops_in_progress set so that printk will give klogd
-- * a poke. Hold onto your hats...
-- */
-- console_loglevel = 15;
-- printk(" ");
-- console_loglevel = loglevel_save;
-+ wake_up_klogd();
- }
- }
-
-diff -upr linux-2.6.16.orig/arch/s390/mm/init.c linux-2.6.16-026test009/arch/s390/mm/init.c
---- linux-2.6.16.orig/arch/s390/mm/init.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/s390/mm/init.c 2006-04-19 15:02:12.000000000 +0400
-@@ -89,6 +89,7 @@ void show_mem(void)
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
- }
-+EXPORT_SYMBOL(show_mem);
-
- /* References to section boundaries */
-
-diff -upr linux-2.6.16.orig/arch/sh/kernel/kgdb_stub.c linux-2.6.16-026test009/arch/sh/kernel/kgdb_stub.c
---- linux-2.6.16.orig/arch/sh/kernel/kgdb_stub.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/sh/kernel/kgdb_stub.c 2006-04-19 15:02:12.000000000 +0400
-@@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
- if (pid == PID_MAX) pid = 0;
-
- /* First check via PID */
-- thread = find_task_by_pid(pid);
-+ thread = find_task_by_pid_all(pid);
-
- if (thread)
- return thread;
-diff -upr linux-2.6.16.orig/arch/sh64/kernel/process.c linux-2.6.16-026test009/arch/sh64/kernel/process.c
---- linux-2.6.16.orig/arch/sh64/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/sh64/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -906,7 +906,7 @@ asids_proc_info(char *buf, char **start,
- int len=0;
- struct task_struct *p;
- read_lock(&tasklist_lock);
-- for_each_process(p) {
-+ for_each_process_ve(p) {
- int pid = p->pid;
- struct mm_struct *mm;
- if (!pid) continue;
-diff -upr linux-2.6.16.orig/arch/sparc64/kernel/setup.c linux-2.6.16-026test009/arch/sparc64/kernel/setup.c
---- linux-2.6.16.orig/arch/sparc64/kernel/setup.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/sparc64/kernel/setup.c 2006-04-19 15:02:12.000000000 +0400
-@@ -156,7 +156,7 @@ int prom_callback(long *args)
- pte_t *ptep;
- pte_t pte;
-
-- for_each_process(p) {
-+ for_each_process_all(p) {
- mm = p->mm;
- if (CTX_NRBITS(mm->context) == ctx)
- break;
-diff -upr linux-2.6.16.orig/arch/um/drivers/mconsole_kern.c linux-2.6.16-026test009/arch/um/drivers/mconsole_kern.c
---- linux-2.6.16.orig/arch/um/drivers/mconsole_kern.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/um/drivers/mconsole_kern.c 2006-04-19 15:02:12.000000000 +0400
-@@ -600,7 +600,7 @@ static void do_stack_trace(struct mc_req
-
- from = current;
-
-- to = find_task_by_pid(pid_requested);
-+ to = find_task_by_pid_all(pid_requested);
- if((to == NULL) || (pid_requested == 0)) {
- mconsole_reply(req, "Couldn't find that pid", 1, 0);
- return;
-diff -upr linux-2.6.16.orig/arch/um/kernel/skas/process_kern.c linux-2.6.16-026test009/arch/um/kernel/skas/process_kern.c
---- linux-2.6.16.orig/arch/um/kernel/skas/process_kern.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/um/kernel/skas/process_kern.c 2006-04-19 15:02:12.000000000 +0400
-@@ -197,7 +197,7 @@ void kill_off_processes_skas(void)
- int pid, me;
-
- me = os_getpid();
-- for_each_process(p){
-+ for_each_process_all(p){
- if(p->mm == NULL)
- continue;
-
-diff -upr linux-2.6.16.orig/arch/um/kernel/tt/process_kern.c linux-2.6.16-026test009/arch/um/kernel/tt/process_kern.c
---- linux-2.6.16.orig/arch/um/kernel/tt/process_kern.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/um/kernel/tt/process_kern.c 2006-04-19 15:02:12.000000000 +0400
-@@ -301,7 +301,7 @@ void kill_off_processes_tt(void)
- int me;
-
- me = os_getpid();
-- for_each_process(p){
-+ for_each_process_all(p){
- if(p->thread.mode.tt.extern_pid != me)
- os_kill_process(p->thread.mode.tt.extern_pid, 0);
- }
-@@ -444,7 +444,7 @@ int is_valid_pid(int pid)
- struct task_struct *task;
-
- read_lock(&tasklist_lock);
-- for_each_process(task){
-+ for_each_process_all(task){
- if(task->thread.mode.tt.extern_pid == pid){
- read_unlock(&tasklist_lock);
- return(1);
-diff -upr linux-2.6.16.orig/arch/x86_64/Kconfig linux-2.6.16-026test009/arch/x86_64/Kconfig
---- linux-2.6.16.orig/arch/x86_64/Kconfig 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -588,8 +588,12 @@ endmenu
-
- source "arch/x86_64/Kconfig.debug"
-
-+source "kernel/Kconfig.openvz"
-+
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "kernel/ub/Kconfig"
-diff -upr linux-2.6.16.orig/arch/x86_64/boot/compressed/head.S linux-2.6.16-026test009/arch/x86_64/boot/compressed/head.S
---- linux-2.6.16.orig/arch/x86_64/boot/compressed/head.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/boot/compressed/head.S 2006-04-19 15:02:12.000000000 +0400
-@@ -34,7 +34,7 @@
- startup_32:
- cld
- cli
-- movl $(__KERNEL_DS),%eax
-+ movl $(__BOOT_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- movl %eax,%fs
-@@ -76,7 +76,7 @@ startup_32:
- jnz 3f
- addl $8,%esp
- xorl %ebx,%ebx
-- ljmp $(__KERNEL_CS), $__PHYSICAL_START
-+ ljmp $(__BOOT_CS), $__PHYSICAL_START
-
- /*
- * We come here, if we were loaded high.
-@@ -104,7 +104,7 @@ startup_32:
- popl %eax # hcount
- movl $__PHYSICAL_START,%edi
- cli # make sure we don't get interrupted
-- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
-+ ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
-
- /*
- * Routine (template) for moving the decompressed kernel in place,
-@@ -127,7 +127,7 @@ move_routine_start:
- movsl
- movl %ebx,%esi # Restore setup pointer
- xorl %ebx,%ebx
-- ljmp $(__KERNEL_CS), $__PHYSICAL_START
-+ ljmp $(__BOOT_CS), $__PHYSICAL_START
- move_routine_end:
-
-
-@@ -137,5 +137,5 @@ user_stack:
- .fill 4096,4,0
- stack_start:
- .long user_stack+4096
-- .word __KERNEL_DS
-+ .word __BOOT_DS
-
-diff -upr linux-2.6.16.orig/arch/x86_64/boot/setup.S linux-2.6.16-026test009/arch/x86_64/boot/setup.S
---- linux-2.6.16.orig/arch/x86_64/boot/setup.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/boot/setup.S 2006-04-19 15:02:12.000000000 +0400
-@@ -729,7 +729,7 @@ flush_instr:
- subw $DELTA_INITSEG, %si
- shll $4, %esi # Convert to 32-bit pointer
- # NOTE: For high loaded big kernels we need a
--# jmpi 0x100000,__KERNEL_CS
-+# jmpi 0x100000,__BOOT_CS
- #
- # but we yet haven't reloaded the CS register, so the default size
- # of the target offset still is 16 bit.
-@@ -740,7 +740,7 @@ flush_instr:
- .byte 0x66, 0xea # prefix + jmpi-opcode
- code32: .long 0x1000 # will be set to 0x100000
- # for big kernels
-- .word __KERNEL_CS
-+ .word __BOOT_CS
-
- # Here's a bunch of information about your current kernel..
- kernel_version: .ascii UTS_RELEASE
-diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.16-026test009/arch/x86_64/ia32/ia32_aout.c
---- linux-2.6.16.orig/arch/x86_64/ia32/ia32_aout.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/ia32/ia32_aout.c 2006-04-19 15:02:12.000000000 +0400
-@@ -347,14 +347,14 @@ static int load_aout_binary(struct linux
- if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
- {
-- printk(KERN_NOTICE "executable not page aligned\n");
-+ ve_printk(VE_LOG, KERN_NOTICE "executable not page aligned\n");
- error_time2 = jiffies;
- }
-
- if ((fd_offset & ~PAGE_MASK) != 0 &&
- (jiffies-error_time) > 5*HZ)
- {
-- printk(KERN_WARNING
-+ ve_printk(VE_LOG, KERN_WARNING
- "fd_offset is not page aligned. Please convert program: %s\n",
- bprm->file->f_dentry->d_name.name);
- error_time = jiffies;
-@@ -467,7 +467,7 @@ static int load_aout_library(struct file
- static unsigned long error_time;
- if ((jiffies-error_time) > 5*HZ)
- {
-- printk(KERN_WARNING
-+ ve_printk(VE_LOG, KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %s\n",
- file->f_dentry->d_name.name);
- error_time = jiffies;
-diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16-026test009/arch/x86_64/ia32/ia32_binfmt.c
---- linux-2.6.16.orig/arch/x86_64/ia32/ia32_binfmt.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/ia32/ia32_binfmt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -27,12 +27,14 @@
- #include <asm/ia32.h>
- #include <asm/vsyscall32.h>
-
-+#include <ub/ub_vmpages.h>
-+
- #define ELF_NAME "elf/i386"
-
- #define AT_SYSINFO 32
- #define AT_SYSINFO_EHDR 33
-
--int sysctl_vsyscall32 = 1;
-+int sysctl_vsyscall32 = 0;
-
- #define ARCH_DLINFO do { \
- if (sysctl_vsyscall32) { \
-@@ -347,9 +349,15 @@ int ia32_setup_arg_pages(struct linux_bi
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
-+ ret = -ENOMEM;
-+ if (ub_memory_charge(mm, IA32_STACK_TOP -
-+ (PAGE_MASK & (unsigned long)bprm->p),
-+ VM_STACK_FLAGS, NULL, UB_SOFT))
-+ goto err_charge;
-+
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
-- return -ENOMEM;
-+ goto err_alloc;
-
- memset(mpnt, 0, sizeof(*mpnt));
-
-@@ -366,11 +374,8 @@ int ia32_setup_arg_pages(struct linux_bi
- mpnt->vm_flags = VM_STACK_FLAGS;
- mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
- PAGE_COPY_EXEC : PAGE_COPY;
-- if ((ret = insert_vm_struct(mm, mpnt))) {
-- up_write(&mm->mmap_sem);
-- kmem_cache_free(vm_area_cachep, mpnt);
-- return ret;
-- }
-+ if ((ret = insert_vm_struct(mm, mpnt)))
-+ goto err_insert;
- mm->stack_vm = mm->total_vm = vma_pages(mpnt);
- }
-
-@@ -385,6 +390,16 @@ int ia32_setup_arg_pages(struct linux_bi
- up_write(&mm->mmap_sem);
-
- return 0;
-+
-+err_insert:
-+ up_write(&mm->mmap_sem);
-+ kmem_cache_free(vm_area_cachep, mpnt);
-+err_alloc:
-+ ub_memory_uncharge(mm, IA32_STACK_TOP -
-+ (PAGE_MASK & (unsigned long)bprm->p),
-+ VM_STACK_FLAGS, NULL);
-+err_charge:
-+ return ret;
- }
- EXPORT_SYMBOL(ia32_setup_arg_pages);
-
-diff -upr linux-2.6.16.orig/arch/x86_64/ia32/ia32_signal.c linux-2.6.16-026test009/arch/x86_64/ia32/ia32_signal.c
---- linux-2.6.16.orig/arch/x86_64/ia32/ia32_signal.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/ia32/ia32_signal.c 2006-04-19 15:02:12.000000000 +0400
-@@ -39,7 +39,6 @@
-
- #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
--asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
- void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
- int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
-@@ -118,22 +117,17 @@ asmlinkage long
- sys32_sigsuspend(int history0, int history1, old_sigset_t mask,
- struct pt_regs *regs)
- {
-- sigset_t saveset;
--
- mask &= _BLOCKABLE;
- spin_lock_irq(&current->sighand->siglock);
-- saveset = current->blocked;
-+ current->saved_sigmask = current->blocked;
- siginitset(&current->blocked, mask);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
-- regs->rax = -EINTR;
-- while (1) {
-- current->state = TASK_INTERRUPTIBLE;
-- schedule();
-- if (do_signal(regs, &saveset))
-- return -EINTR;
-- }
-+ current->state = TASK_INTERRUPTIBLE;
-+ schedule();
-+ set_thread_flag(TIF_RESTORE_SIGMASK);
-+ return -ERESTARTNOHAND;
- }
-
- asmlinkage long
-@@ -510,11 +504,11 @@ int ia32_setup_frame(int sig, struct k_s
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
-+ return -EFAULT;
- }
-
- int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-@@ -606,9 +600,9 @@ int ia32_setup_rt_frame(int sig, struct
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
-+ return -EFAULT;
- }
-diff -upr linux-2.6.16.orig/arch/x86_64/ia32/sys_ia32.c linux-2.6.16-026test009/arch/x86_64/ia32/sys_ia32.c
---- linux-2.6.16.orig/arch/x86_64/ia32/sys_ia32.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/ia32/sys_ia32.c 2006-04-19 15:02:12.000000000 +0400
-@@ -527,7 +527,7 @@ int sys32_ni_syscall(int call)
- static char lastcomm[sizeof(me->comm)];
-
- if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-- printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
-+ ve_printk(VE_LOG, KERN_INFO "IA32 syscall %d from %s not implemented\n",
- call, me->comm);
- strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
-@@ -890,13 +890,13 @@ asmlinkage long sys32_olduname(struct ol
-
- down_read(&uts_sem);
-
-- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-+ error = __copy_to_user(&name->sysname,&ve_utsname.sysname,__OLD_UTS_LEN);
- __put_user(0,name->sysname+__OLD_UTS_LEN);
-- __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-+ __copy_to_user(&name->nodename,&ve_utsname.nodename,__OLD_UTS_LEN);
- __put_user(0,name->nodename+__OLD_UTS_LEN);
-- __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-+ __copy_to_user(&name->release,&ve_utsname.release,__OLD_UTS_LEN);
- __put_user(0,name->release+__OLD_UTS_LEN);
-- __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-+ __copy_to_user(&name->version,&ve_utsname.version,__OLD_UTS_LEN);
- __put_user(0,name->version+__OLD_UTS_LEN);
- {
- char *arch = "x86_64";
-@@ -919,7 +919,7 @@ long sys32_uname(struct old_utsname __us
- if (!name)
- return -EFAULT;
- down_read(&uts_sem);
-- err=copy_to_user(name, &system_utsname, sizeof (*name));
-+ err=copy_to_user(name, &ve_utsname, sizeof (*name));
- up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
-@@ -1005,7 +1005,7 @@ long sys32_vm86_warning(void)
- struct task_struct *me = current;
- static char lastcomm[sizeof(me->comm)];
- if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-- printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
-+ ve_printk(VE_LOG, KERN_INFO "%s: vm87 mode not supported on 64 bit kernel\n",
- me->comm);
- strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
-diff -upr linux-2.6.16.orig/arch/x86_64/ia32/syscall32.c linux-2.6.16-026test009/arch/x86_64/ia32/syscall32.c
---- linux-2.6.16.orig/arch/x86_64/ia32/syscall32.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/ia32/syscall32.c 2006-04-19 15:02:11.000000000 +0400
-@@ -14,6 +14,8 @@
- #include <asm/tlbflush.h>
- #include <asm/ia32_unistd.h>
-
-+#include <ub/ub_vmpages.h>
-+
- extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
- extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
- extern int sysctl_vsyscall32;
-@@ -47,32 +49,45 @@ int syscall32_setup_pages(struct linux_b
- int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
- struct vm_area_struct *vma;
- struct mm_struct *mm = current->mm;
-+ unsigned long flags;
- int ret;
-
-+ flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC | VM_MAYWRITE |
-+ mm->def_flags;
-+
-+ ret = -ENOMEM;
-+ if (ub_memory_charge(mm, VSYSCALL32_END - VSYSCALL32_BASE,
-+ flags, NULL, UB_SOFT))
-+ goto err_charge;
-+
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma)
-- return -ENOMEM;
-+ goto err_alloc;
-
- memset(vma, 0, sizeof(struct vm_area_struct));
- /* Could randomize here */
- vma->vm_start = VSYSCALL32_BASE;
- vma->vm_end = VSYSCALL32_END;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
-- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
-- vma->vm_flags |= mm->def_flags;
-+ vma->vm_flags = flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall32_vm_ops;
- vma->vm_mm = mm;
-
- down_write(&mm->mmap_sem);
-- if ((ret = insert_vm_struct(mm, vma))) {
-- up_write(&mm->mmap_sem);
-- kmem_cache_free(vm_area_cachep, vma);
-- return ret;
-- }
-+ if ((ret = insert_vm_struct(mm, vma)))
-+ goto err_ins;
- mm->total_vm += npages;
- up_write(&mm->mmap_sem);
- return 0;
-+
-+err_ins:
-+ up_write(&mm->mmap_sem);
-+ kmem_cache_free(vm_area_cachep, vma);
-+err_alloc:
-+ ub_memory_uncharge(mm, VSYSCALL32_END - VSYSCALL32_BASE, flags, NULL);
-+err_charge:
-+ return ret;
- }
-
- static int __init init_syscall32(void)
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/acpi/wakeup.S linux-2.6.16-026test009/arch/x86_64/kernel/acpi/wakeup.S
---- linux-2.6.16.orig/arch/x86_64/kernel/acpi/wakeup.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/acpi/wakeup.S 2006-04-19 15:02:12.000000000 +0400
-@@ -77,7 +77,7 @@ wakeup_code:
-
- .byte 0x66, 0xea # prefix + jmpi-opcode
- .long wakeup_32 - __START_KERNEL_map
-- .word __KERNEL_CS
-+ .word __BOOT_CS
-
- .code32
- wakeup_32:
-@@ -96,13 +96,13 @@ wakeup_32:
- jnc bogus_cpu
- movl %edx,%edi
-
-- movw $__KERNEL_DS, %ax
-+ movw $__BOOT_DS, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %fs
- movw %ax, %gs
-
-- movw $__KERNEL_DS, %ax
-+ movw $__BOOT_DS, %ax
- movw %ax, %ss
-
- mov $(wakeup_stack - __START_KERNEL_map), %esp
-@@ -187,7 +187,7 @@ reach_compatibility_mode:
-
- wakeup_jumpvector:
- .long wakeup_long64 - __START_KERNEL_map
-- .word __KERNEL_CS
-+ .word __BOOT_CS
-
- .code64
-
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/apic.c linux-2.6.16-026test009/arch/x86_64/kernel/apic.c
---- linux-2.6.16.orig/arch/x86_64/kernel/apic.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/apic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -941,6 +941,7 @@ void smp_local_timer_interrupt(struct pt
- */
- void smp_apic_timer_interrupt(struct pt_regs *regs)
- {
-+ struct ve_struct *ve;
- /*
- * the NMI deadlock-detector uses this.
- */
-@@ -957,9 +958,11 @@ void smp_apic_timer_interrupt(struct pt_
- * interrupt lock, which is the WrongThing (tm) to do.
- */
- exit_idle();
-+ ve = set_exec_env(get_ve0());
- irq_enter();
- smp_local_timer_interrupt(regs);
- irq_exit();
-+ (void)set_exec_env(ve);
- }
-
- /*
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/entry.S linux-2.6.16-026test009/arch/x86_64/kernel/entry.S
---- linux-2.6.16.orig/arch/x86_64/kernel/entry.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/entry.S 2006-04-19 15:02:12.000000000 +0400
-@@ -180,6 +180,10 @@ rff_trace:
- *
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- * and report it properly in ps. Unfortunately we haven't.
-+ *
-+ * When user can change the frames always force IRET. That is because
-+ * it deals with uncanonical addresses better. SYSRET has trouble
-+ * with them due to bugs in both AMD and Intel CPUs.
- */
-
- ENTRY(system_call)
-@@ -244,7 +248,7 @@ sysret_careful:
- /* Handle a signal */
- sysret_signal:
- sti
-- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
-+ testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
- jz 1f
-
- /* Really a signal */
-@@ -254,7 +258,10 @@ sysret_signal:
- xorl %esi,%esi # oldset -> arg2
- call ptregscall_common
- 1: movl $_TIF_NEED_RESCHED,%edi
-- jmp sysret_check
-+ /* Use IRET because user could have changed frame. This
-+ works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-+ cli
-+ jmp int_with_check
-
- badsys:
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
-@@ -280,7 +287,8 @@ tracesys:
- call syscall_trace_leave
- RESTORE_TOP_OF_STACK %rbx
- RESTORE_REST
-- jmp ret_from_sys_call
-+ /* Use IRET because user could have changed frame */
-+ jmp int_ret_from_sys_call
- CFI_ENDPROC
-
- /*
-@@ -350,7 +358,7 @@ int_very_careful:
- jmp int_restore_rest
-
- int_signal:
-- testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
-+ testl $(_TIF_NOTIFY_RESUME|_TIF_RESTORE_SIGMASK|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
- jz 1f
- movq %rsp,%rdi # &ptregs -> arg1
- xorl %esi,%esi # oldset -> arg2
-@@ -408,25 +416,9 @@ ENTRY(stub_execve)
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
-- movq %r11, %r15
-- CFI_REGISTER rip, r15
- FIXUP_TOP_OF_STACK %r11
- call sys_execve
-- GET_THREAD_INFO(%rcx)
-- bt $TIF_IA32,threadinfo_flags(%rcx)
-- CFI_REMEMBER_STATE
-- jc exec_32bit
- RESTORE_TOP_OF_STACK %r11
-- movq %r15, %r11
-- CFI_REGISTER rip, r11
-- RESTORE_REST
-- pushq %r11
-- CFI_ADJUST_CFA_OFFSET 8
-- CFI_REL_OFFSET rip, 0
-- ret
--
--exec_32bit:
-- CFI_RESTORE_STATE
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
-@@ -574,7 +566,7 @@ retint_careful:
- jmp retint_check
-
- retint_signal:
-- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
-+ testl $(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
- jz retint_swapgs
- sti
- SAVE_REST
-@@ -845,7 +837,7 @@ ENTRY(kernel_thread)
- xorl %r9d,%r9d
-
- # clone now
-- call do_fork
-+ call do_fork_kthread
- movq %rax,RAX(%rsp)
- xorl %edi,%edi
-
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/head.S linux-2.6.16-026test009/arch/x86_64/kernel/head.S
---- linux-2.6.16.orig/arch/x86_64/kernel/head.S 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/head.S 2006-04-19 15:02:12.000000000 +0400
-@@ -40,7 +40,7 @@ startup_32:
- */
-
- /* Initialize the %ds segment register */
-- movl $__KERNEL_DS,%eax
-+ movl $__BOOT_DS,%eax
- movl %eax,%ds
-
- /* Load new GDT with the 64bit segments using 32bit descriptor */
-@@ -183,7 +183,14 @@ startup_64:
- /* esi is pointer to real mode structure with interesting info.
- pass it to C */
- movl %esi, %edi
--
-+
-+ /* Switch to __KERNEL_CS. The segment is the same, but selector
-+ * is different. */
-+ pushq $__KERNEL_CS
-+ pushq $switch_cs
-+ lretq
-+switch_cs:
-+
- /* Finally jump to run C code and to be on real kernel address
- * Since we are running on identity-mapped space we have to jump
- * to the full 64bit address , this is only possible as indirect
-@@ -243,7 +250,7 @@ pGDT32:
- .org 0xf10
- ljumpvector:
- .long startup_64-__START_KERNEL_map
-- .word __KERNEL_CS
-+ .word __BOOT_CS
-
- ENTRY(stext)
- ENTRY(_stext)
-@@ -355,21 +362,30 @@ gdt:
- .align PAGE_SIZE
-
- /* The TLS descriptors are currently at a different place compared to i386.
-- Hopefully nobody expects them at a fixed place (Wine?) */
-+ Hopefully nobody expects them at a fixed place (Wine?)
-+ Descriptors rearranged to plase 32bit and TLS selectors in the same
-+ places, because it is really necessary. sysret/exit mandates order
-+ of kernel/user cs/ds, so we have to extend gdt.
-+*/
-
- ENTRY(cpu_gdt_table)
-- .quad 0x0000000000000000 /* NULL descriptor */
-- .quad 0x0 /* unused */
-- .quad 0x00af9a000000ffff /* __KERNEL_CS */
-- .quad 0x00cf92000000ffff /* __KERNEL_DS */
-- .quad 0x00cffa000000ffff /* __USER32_CS */
-- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
-- .quad 0x00affa000000ffff /* __USER_CS */
-- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
-- .quad 0,0 /* TSS */
-- .quad 0,0 /* LDT */
-- .quad 0,0,0 /* three TLS descriptors */
-- .quad 0 /* unused */
-+ .quad 0x0000000000000000 /* 0 NULL descriptor */
-+ .quad 0x0 /* 1 unused */
-+ .quad 0x00af9a000000ffff /* 2 __BOOT_CS */
-+ .quad 0x00cf92000000ffff /* 3 __BOOT_DS */
-+ .quad 0,0 /* 4,5 TSS */
-+ .quad 0,0,0 /* 6-8 three TLS descriptors */
-+ .quad 0,0 /* 9,10 LDT */
-+ .quad 0x00cf9a000000ffff /* 11 __KERNEL32_CS */
-+ .quad 0x00af9a000000ffff /* 12 __KERNEL_CS */
-+ .quad 0x00cf92000000ffff /* 13 __KERNEL_DS */
-+ .quad 0x00cffa000000ffff /* 14 __USER32_CS */
-+ .quad 0x00cff2000000ffff /* 15 __USER_DS, __USER32_DS */
-+ .quad 0x00affa000000ffff /* 16 __USER_CS */
-+ .quad 0x0 /* 17 unused */
-+ .quad 0,0,0,0,0,0
-+ .quad 0,0,0,0,0,0,0,0
-+
- gdt_end:
- /* asm/segment.h:GDT_ENTRIES must match this */
- /* This should be a multiple of the cache line size */
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/irq.c linux-2.6.16-026test009/arch/x86_64/kernel/irq.c
---- linux-2.6.16.orig/arch/x86_64/kernel/irq.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/irq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -98,12 +98,15 @@ asmlinkage unsigned int do_IRQ(struct pt
- {
- /* high bits used in ret_from_ code */
- unsigned irq = regs->orig_rax & 0xff;
-+ struct ve_struct *ve;
-
- exit_idle();
-+ ve = set_exec_env(get_ve0());
- irq_enter();
-
- __do_IRQ(irq, regs);
- irq_exit();
-+ (void)set_exec_env(ve);
-
- return 1;
- }
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/ldt.c linux-2.6.16-026test009/arch/x86_64/kernel/ldt.c
---- linux-2.6.16.orig/arch/x86_64/kernel/ldt.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/ldt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -16,6 +16,7 @@
- #include <linux/smp_lock.h>
- #include <linux/vmalloc.h>
- #include <linux/slab.h>
-+#include <linux/module.h>
-
- #include <asm/uaccess.h>
- #include <asm/system.h>
-@@ -23,6 +24,8 @@
- #include <asm/desc.h>
- #include <asm/proto.h>
-
-+#include <ub/ub_mem.h>
-+
- #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
- static void flush_ldt(void *null)
- {
-@@ -42,9 +45,9 @@ static int alloc_ldt(mm_context_t *pc, u
- oldsize = pc->size;
- mincount = (mincount+511)&(~511);
- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
-- newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
-+ newldt = ub_vmalloc(mincount*LDT_ENTRY_SIZE);
- else
-- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-+ newldt = ub_kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
-
- if (!newldt)
- return -ENOMEM;
-@@ -109,6 +112,7 @@ int init_new_context(struct task_struct
- }
- return retval;
- }
-+EXPORT_SYMBOL_GPL(init_new_context);
-
- /*
- *
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/nmi.c linux-2.6.16-026test009/arch/x86_64/kernel/nmi.c
---- linux-2.6.16.orig/arch/x86_64/kernel/nmi.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/nmi.c 2006-04-19 15:02:11.000000000 +0400
-@@ -522,6 +522,7 @@ static __kprobes int dummy_nmi_callback(
- }
-
- static nmi_callback_t nmi_callback = dummy_nmi_callback;
-+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
-
- asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
- {
-@@ -531,9 +532,21 @@ asmlinkage __kprobes void do_nmi(struct
- add_pda(__nmi_count,1);
- if (!rcu_dereference(nmi_callback)(regs, cpu))
- default_do_nmi(regs);
-+
-+ nmi_ipi_callback(regs, cpu);
- nmi_exit();
- }
-
-+void set_nmi_ipi_callback(nmi_callback_t callback)
-+{
-+ nmi_ipi_callback = callback;
-+}
-+
-+void unset_nmi_ipi_callback(void)
-+{
-+ nmi_ipi_callback = dummy_nmi_callback;
-+}
-+
- void set_nmi_callback(nmi_callback_t callback)
- {
- rcu_assign_pointer(nmi_callback, callback);
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/process.c linux-2.6.16-026test009/arch/x86_64/kernel/process.c
---- linux-2.6.16.orig/arch/x86_64/kernel/process.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -54,6 +54,11 @@
- #include <asm/idle.h>
-
- asmlinkage extern void ret_from_fork(void);
-+asmlinkage extern void int_ret_from_sys_call(void);
-+asmlinkage extern void execve(void);
-+EXPORT_SYMBOL_GPL(ret_from_fork);
-+EXPORT_SYMBOL_GPL(int_ret_from_sys_call);
-+EXPORT_SYMBOL_GPL(execve);
-
- unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
-@@ -303,7 +308,8 @@ void __show_regs(struct pt_regs * regs)
- (int)strcspn(system_utsname.version, " "),
- system_utsname.version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
-- printk_address(regs->rip);
-+ if (decode_call_traces)
-+ printk_address(regs->rip);
- printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
- regs->eflags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
-@@ -345,6 +351,21 @@ void show_regs(struct pt_regs *regs)
- show_trace(&regs->rsp);
- }
-
-+void smp_show_regs(struct pt_regs *regs, void *data)
-+{
-+ static DEFINE_SPINLOCK(show_regs_lock);
-+
-+ if (regs == NULL)
-+ return;
-+
-+ bust_spinlocks(1);
-+ spin_lock(&show_regs_lock);
-+ printk("----------- IPI show regs -----------\n");
-+ show_regs(regs);
-+ spin_unlock(&show_regs_lock);
-+ bust_spinlocks(0);
-+}
-+
- /*
- * Free current thread data structures etc..
- */
-@@ -841,3 +862,20 @@ unsigned long arch_align_stack(unsigned
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
- }
-+
-+long do_fork_kthread(unsigned long clone_flags,
-+ unsigned long stack_start,
-+ struct pt_regs *regs,
-+ unsigned long stack_size,
-+ int __user *parent_tidptr,
-+ int __user *child_tidptr)
-+{
-+ if (ve_is_super(get_exec_env()))
-+ return do_fork(clone_flags, stack_start, regs, stack_size,
-+ parent_tidptr, child_tidptr);
-+
-+ /* Don't allow kernel_thread() inside VE */
-+ printk("kernel_thread call inside VE\n");
-+ dump_stack();
-+ return -EPERM;
-+}
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/ptrace.c linux-2.6.16-026test009/arch/x86_64/kernel/ptrace.c
---- linux-2.6.16.orig/arch/x86_64/kernel/ptrace.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/ptrace.c 2006-04-19 15:02:12.000000000 +0400
-@@ -300,6 +300,15 @@ static unsigned long getreg(struct task_
- return child->thread.fs;
- case offsetof(struct user_regs_struct, gs_base):
- return child->thread.gs;
-+ case offsetof(struct user_regs_struct, cs):
-+ if (test_tsk_thread_flag(child, TIF_SYSCALL_TRACE)) {
-+ val = get_stack_long(child, regno - sizeof(struct pt_regs));
-+ if (val == __USER_CS)
-+ return 0x33;
-+ if (val == __USER32_CS)
-+ return 0x23;
-+ }
-+ /* fall through */
- default:
- regno = regno - sizeof(struct pt_regs);
- val = get_stack_long(child, regno);
-@@ -581,8 +590,10 @@ static void syscall_trace(struct pt_regs
- current_thread_info()->flags, current->ptrace);
- #endif
-
-+ set_pn_state(current, (regs->rax != -ENOSYS) ? PN_STOP_LEAVE : PN_STOP_ENTRY);
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
-+ clear_pn_state(current);
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/setup64.c linux-2.6.16-026test009/arch/x86_64/kernel/setup64.c
---- linux-2.6.16.orig/arch/x86_64/kernel/setup64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/setup64.c 2006-04-19 15:02:12.000000000 +0400
-@@ -290,3 +290,5 @@ void __cpuinit cpu_init (void)
-
- fpu_init();
- }
-+
-+EXPORT_SYMBOL_GPL(cpu_gdt_descr);
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/signal.c linux-2.6.16-026test009/arch/x86_64/kernel/signal.c
---- linux-2.6.16.orig/arch/x86_64/kernel/signal.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/signal.c 2006-04-19 15:02:12.000000000 +0400
-@@ -40,37 +40,6 @@ int ia32_setup_frame(int sig, struct k_s
- sigset_t *set, struct pt_regs * regs);
-
- asmlinkage long
--sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
--{
-- sigset_t saveset, newset;
--
-- /* XXX: Don't preclude handling different sized sigset_t's. */
-- if (sigsetsize != sizeof(sigset_t))
-- return -EINVAL;
--
-- if (copy_from_user(&newset, unewset, sizeof(newset)))
-- return -EFAULT;
-- sigdelsetmask(&newset, ~_BLOCKABLE);
--
-- spin_lock_irq(&current->sighand->siglock);
-- saveset = current->blocked;
-- current->blocked = newset;
-- recalc_sigpending();
-- spin_unlock_irq(&current->sighand->siglock);
--#ifdef DEBUG_SIG
-- printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
-- saveset, newset, regs, regs->rip);
--#endif
-- regs->rax = -EINTR;
-- while (1) {
-- current->state = TASK_INTERRUPTIBLE;
-- schedule();
-- if (do_signal(regs, &saveset))
-- return -EINTR;
-- }
--}
--
--asmlinkage long
- sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
- struct pt_regs *regs)
- {
-@@ -344,11 +313,11 @@ static int setup_rt_frame(int sig, struc
- current->comm, current->pid, frame, regs->rip, frame->pretcode);
- #endif
-
-- return 1;
-+ return 0;
-
- give_sigsegv:
- force_sigsegv(sig, current);
-- return 0;
-+ return -EFAULT;
- }
-
- /*
-@@ -411,7 +380,7 @@ handle_signal(unsigned long sig, siginfo
- #endif
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
-
-- if (ret) {
-+ if (ret == 0) {
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
-@@ -428,9 +397,10 @@ handle_signal(unsigned long sig, siginfo
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
--int do_signal(struct pt_regs *regs, sigset_t *oldset)
-+static void do_signal(struct pt_regs *regs)
- {
- struct k_sigaction ka;
-+ sigset_t *oldset;
- siginfo_t info;
- int signr;
-
-@@ -441,12 +411,14 @@ int do_signal(struct pt_regs *regs, sigs
- * if so.
- */
- if (!user_mode(regs))
-- return 1;
-+ return;
-
-- if (try_to_freeze())
-+ if (try_to_freeze() && !signal_pending(current))
- goto no_signal;
-
-- if (!oldset)
-+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
-+ oldset = &current->saved_sigmask;
-+ else
- oldset = &current->blocked;
-
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-@@ -460,7 +432,15 @@ int do_signal(struct pt_regs *regs, sigs
- set_debugreg(current->thread.debugreg7, 7);
-
- /* Whee! Actually deliver the signal. */
-- return handle_signal(signr, &info, &ka, oldset, regs);
-+ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
-+ /* a signal was successfully delivered; the saved
-+ * sigmask will have been stored in the signal frame,
-+ * and will be restored by sigreturn, so we can simply
-+ * clear the TIF_RESTORE_SIGMASK flag */
-+ if (test_thread_flag(TIF_RESTORE_SIGMASK))
-+ clear_thread_flag(TIF_RESTORE_SIGMASK);
-+ }
-+ return;
- }
-
- no_signal:
-@@ -481,10 +461,16 @@ int do_signal(struct pt_regs *regs, sigs
- regs->rip -= 2;
- }
- }
-- return 0;
-+
-+ /* if there's no signal to deliver, we just put the saved sigmask
-+ * back */
-+ if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-+ clear_thread_flag(TIF_RESTORE_SIGMASK);
-+ sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-+ }
- }
-
--void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
-+void do_notify_resume(struct pt_regs *regs, sigset_t *unused, __u32 thread_info_flags)
- {
- #ifdef DEBUG_SIG
- printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
-@@ -498,8 +484,8 @@ void do_notify_resume(struct pt_regs *re
- }
-
- /* deal with pending signal delivery */
-- if (thread_info_flags & _TIF_SIGPENDING)
-- do_signal(regs,oldset);
-+ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
-+ do_signal(regs);
- }
-
- void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/smp.c linux-2.6.16-026test009/arch/x86_64/kernel/smp.c
---- linux-2.6.16.orig/arch/x86_64/kernel/smp.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/smp.c 2006-04-19 15:02:11.000000000 +0400
-@@ -28,6 +28,7 @@
- #include <asm/proto.h>
- #include <asm/apicdef.h>
- #include <asm/idle.h>
-+#include <asm/nmi.h>
-
- /*
- * Smarter SMP flushing macros.
-@@ -444,6 +445,84 @@ int smp_call_function (void (*func) (voi
- return 0;
- }
-
-+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED;
-+static struct nmi_call_data_struct {
-+ smp_nmi_function func;
-+ void *info;
-+ atomic_t started;
-+ atomic_t finished;
-+ cpumask_t cpus_called;
-+ int wait;
-+} *nmi_call_data;
-+
-+static int smp_nmi_callback(struct pt_regs * regs, int cpu)
-+{
-+ smp_nmi_function func;
-+ void *info;
-+ int wait;
-+
-+ func = nmi_call_data->func;
-+ info = nmi_call_data->info;
-+ wait = nmi_call_data->wait;
-+ ack_APIC_irq();
-+ /* prevent from calling func() multiple times */
-+ if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
-+ return 0;
-+ /*
-+ * notify initiating CPU that I've grabbed the data and am
-+ * about to execute the function
-+ */
-+ mb();
-+ atomic_inc(&nmi_call_data->started);
-+ /* at this point the nmi_call_data structure is out of scope */
-+ irq_enter();
-+ func(regs, info);
-+ irq_exit();
-+ if (wait)
-+ atomic_inc(&nmi_call_data->finished);
-+
-+ return 0;
-+}
-+
-+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
-+{
-+ struct nmi_call_data_struct data;
-+ int cpus;
-+
-+ cpus = num_online_cpus() - 1;
-+ if (!cpus)
-+ return 0;
-+
-+ data.func = func;
-+ data.info = info;
-+ data.wait = wait;
-+ atomic_set(&data.started, 0);
-+ atomic_set(&data.finished, 0);
-+ cpus_clear(data.cpus_called);
-+ /* prevent this cpu from calling func if NMI happens */
-+ cpu_set(smp_processor_id(), data.cpus_called);
-+
-+ if (!spin_trylock(&nmi_call_lock))
-+ return -1;
-+
-+ nmi_call_data = &data;
-+ set_nmi_ipi_callback(smp_nmi_callback);
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ send_IPI_allbutself(APIC_DM_NMI);
-+ while (atomic_read(&data.started) != cpus)
-+ barrier();
-+
-+ unset_nmi_ipi_callback();
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ barrier();
-+ spin_unlock(&nmi_call_lock);
-+
-+ return 0;
-+}
-+
- void smp_stop_cpu(void)
- {
- unsigned long flags;
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16-026test009/arch/x86_64/kernel/sys_x86_64.c
---- linux-2.6.16.orig/arch/x86_64/kernel/sys_x86_64.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/sys_x86_64.c 2006-04-19 15:02:12.000000000 +0400
-@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_uts
- {
- int err;
- down_read(&uts_sem);
-- err = copy_to_user(name, &system_utsname, sizeof (*name));
-+ err = copy_to_user(name, &ve_utsname, sizeof (*name));
- up_read(&uts_sem);
- if (personality(current->personality) == PER_LINUX32)
- err |= copy_to_user(&name->machine, "i686", 5);
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/time.c linux-2.6.16-026test009/arch/x86_64/kernel/time.c
---- linux-2.6.16.orig/arch/x86_64/kernel/time.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/time.c 2006-04-19 15:02:12.000000000 +0400
-@@ -66,6 +66,8 @@ unsigned long vxtime_hz = PIT_TICK_RATE;
- int report_lost_ticks; /* command line option */
- unsigned long long monotonic_base;
-
-+EXPORT_SYMBOL(cpu_khz);
-+
- struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
-
- volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-diff -upr linux-2.6.16.orig/arch/x86_64/kernel/traps.c linux-2.6.16-026test009/arch/x86_64/kernel/traps.c
---- linux-2.6.16.orig/arch/x86_64/kernel/traps.c 2006-04-19 15:02:00.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/kernel/traps.c 2006-04-19 15:02:12.000000000 +0400
-@@ -116,6 +116,9 @@ int printk_address(unsigned long address
- char *delim = ":";
- char namebuf[128];
-
-+ if (!decode_call_traces)
-+ return printk("[<%016lx>]", address);
-+
- symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf);
- if (!symname)
- return printk("[<%016lx>]", address);
-@@ -208,7 +211,7 @@ void show_trace(unsigned long *stack)
- do while (cond) { \
- unsigned long addr = *stack++; \
- if (kernel_text_address(addr)) { \
-- if (i > 50) { \
-+ if (i > 50 && decode_call_traces) { \
- printk("\n "); \
- i = 0; \
- } \
-@@ -319,10 +322,12 @@ void show_registers(struct pt_regs *regs
-
- rsp = regs->rsp;
-
-- printk("CPU %d ", cpu);
-+ printk("CPU: %d ", cpu);
- __show_regs(regs);
-- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
-- cur->comm, cur->pid, task_thread_info(cur), cur);
-+ printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n",
-+ cur->comm, cur->pid,
-+ VEID(VE_TASK_INFO(current)->owner_env),
-+ task_thread_info(cur), cur);
-
- /*
- * When in-kernel, we also print out the stack and code at the
-@@ -458,6 +463,7 @@ void __kprobes die_nmi(char *str, struct
- show_registers(regs);
- if (panic_on_timeout || panic_on_oops)
- panic("nmi watchdog");
-+ smp_nmi_call_function(smp_show_regs, NULL, 1);
- printk("console shuts up ...\n");
- oops_end(flags);
- do_exit(SIGSEGV);
-diff -upr linux-2.6.16.orig/arch/x86_64/mm/fault.c linux-2.6.16-026test009/arch/x86_64/mm/fault.c
---- linux-2.6.16.orig/arch/x86_64/mm/fault.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/mm/fault.c 2006-04-19 15:02:12.000000000 +0400
-@@ -41,27 +41,6 @@
- #define PF_RSVD (1<<3)
- #define PF_INSTR (1<<4)
-
--void bust_spinlocks(int yes)
--{
-- int loglevel_save = console_loglevel;
-- if (yes) {
-- oops_in_progress = 1;
-- } else {
--#ifdef CONFIG_VT
-- unblank_screen();
--#endif
-- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk()
-- * without oops_in_progress set so that printk will give klogd
-- * a poke. Hold onto your hats...
-- */
-- console_loglevel = 15; /* NMI oopser may have shut the console up */
-- printk(" ");
-- console_loglevel = loglevel_save;
-- }
--}
--
- /* Sometimes the CPU reports invalid exceptions on prefetch.
- Check that here and ignore.
- Opcode checker based on code by Richard Brunner */
-@@ -293,7 +272,7 @@ static int vmalloc_fault(unsigned long a
- }
-
- int page_fault_trace = 0;
--int exception_trace = 1;
-+int exception_trace = 0;
-
- /*
- * This routine handles page faults. It determines the address,
-@@ -322,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(
- local_irq_enable();
-
- if (unlikely(page_fault_trace))
-- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
-+ ve_printk(VE_LOG, "pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
- tsk = current;
-@@ -372,7 +351,6 @@ asmlinkage void __kprobes do_page_fault(
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
-
-- again:
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunatly, in the case of an
-@@ -476,7 +454,7 @@ bad_area_nosemaphore:
- return;
-
- if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
-- printk(
-+ ve_printk(VE_LOG,
- "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, tsk->pid, address, regs->rip,
-@@ -544,13 +522,14 @@ no_context:
- */
- out_of_memory:
- up_read(&mm->mmap_sem);
-- if (current->pid == 1) {
-- yield();
-- goto again;
-+ if (error_code & 4) {
-+ /*
-+ * 0-order allocation always success if something really
-+ * fatal not happen: beancounter overdraft or OOM.
-+ */
-+ force_sig(SIGKILL, tsk);
-+ return;
- }
-- printk("VM: killing process %s\n", tsk->comm);
-- if (error_code & 4)
-- do_exit(SIGKILL);
- goto no_context;
-
- do_sigbus:
-diff -upr linux-2.6.16.orig/arch/x86_64/mm/init.c linux-2.6.16-026test009/arch/x86_64/mm/init.c
---- linux-2.6.16.orig/arch/x86_64/mm/init.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/arch/x86_64/mm/init.c 2006-04-19 15:02:12.000000000 +0400
-@@ -89,6 +89,7 @@ void show_mem(void)
- printk(KERN_INFO "%lu pages shared\n",shared);
- printk(KERN_INFO "%lu pages swap cached\n",cached);
- }
-+EXPORT_SYMBOL(show_mem);
-
- /* References to section boundaries */
-
-diff -upr linux-2.6.16.orig/block/elevator.c linux-2.6.16-026test009/block/elevator.c
---- linux-2.6.16.orig/block/elevator.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/block/elevator.c 2006-04-19 15:02:12.000000000 +0400
-@@ -676,7 +676,7 @@ void elv_unregister(struct elevator_type
- * Iterate every thread in the process to remove the io contexts.
- */
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- struct io_context *ioc = p->io_context;
- if (ioc && ioc->cic) {
- ioc->cic->exit(ioc->cic);
-@@ -688,7 +688,7 @@ void elv_unregister(struct elevator_type
- ioc->aic->dtor(ioc->aic);
- ioc->aic = NULL;
- }
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
- read_unlock(&tasklist_lock);
-
- spin_lock_irq(&elv_list_lock);
-diff -upr linux-2.6.16.orig/block/genhd.c linux-2.6.16-026test009/block/genhd.c
---- linux-2.6.16.orig/block/genhd.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/block/genhd.c 2006-04-19 15:02:12.000000000 +0400
-@@ -18,7 +18,8 @@
-
- #define MAX_PROBE_HASH 255 /* random */
-
--static struct subsystem block_subsys;
-+struct subsystem block_subsys;
-+EXPORT_SYMBOL(block_subsys);
-
- static DECLARE_MUTEX(block_subsys_sem);
-
-@@ -592,7 +593,7 @@ static struct kset_uevent_ops block_ueve
- };
-
- /* declare block_subsys. */
--static decl_subsys(block, &ktype_block, &block_uevent_ops);
-+decl_subsys(block, &ktype_block, &block_uevent_ops);
-
-
- /*
-diff -upr linux-2.6.16.orig/drivers/base/class.c linux-2.6.16-026test009/drivers/base/class.c
---- linux-2.6.16.orig/drivers/base/class.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/base/class.c 2006-04-19 15:02:12.000000000 +0400
-@@ -72,8 +72,13 @@ static struct kobj_type ktype_class = {
- };
-
- /* Hotplug events for classes go to the class_obj subsys */
--static decl_subsys(class, &ktype_class, NULL);
-+decl_subsys(class, &ktype_class, NULL);
-
-+#ifndef CONFIG_VE
-+#define visible_class_subsys class_subsys
-+#else
-+#define visible_class_subsys (*get_exec_env()->class_subsys)
-+#endif
-
- int class_create_file(struct class * cls, const struct class_attribute * attr)
- {
-@@ -148,7 +153,7 @@ int class_register(struct class * cls)
- if (error)
- return error;
-
-- subsys_set_kset(cls, class_subsys);
-+ subsys_set_kset(cls, visible_class_subsys);
-
- error = subsystem_register(&cls->subsys);
- if (!error) {
-@@ -420,8 +425,13 @@ static struct kset_uevent_ops class_ueve
- .uevent = class_uevent,
- };
-
--static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
-+decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
-
-+#ifndef CONFIG_VE
-+#define visible_class_obj_subsys class_obj_subsys
-+#else
-+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys)
-+#endif
-
- static int class_device_add_attrs(struct class_device * cd)
- {
-@@ -470,7 +480,7 @@ static ssize_t store_uevent(struct class
-
- void class_device_initialize(struct class_device *class_dev)
- {
-- kobj_set_kset_s(class_dev, class_obj_subsys);
-+ kobj_set_kset_s(class_dev, visible_class_obj_subsys);
- kobject_init(&class_dev->kobj);
- INIT_LIST_HEAD(&class_dev->node);
- }
-@@ -805,12 +815,19 @@ void class_interface_unregister(struct c
- class_put(parent);
- }
-
--
-+void prepare_sysfs_classes(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->class_subsys = &class_subsys;
-+ get_ve0()->class_obj_subsys = &class_obj_subsys;
-+#endif
-+}
-
- int __init classes_init(void)
- {
- int retval;
-
-+ prepare_sysfs_classes();
- retval = subsystem_register(&class_subsys);
- if (retval)
- return retval;
-@@ -848,3 +865,6 @@ EXPORT_SYMBOL_GPL(class_device_remove_bi
-
- EXPORT_SYMBOL_GPL(class_interface_register);
- EXPORT_SYMBOL_GPL(class_interface_unregister);
-+
-+EXPORT_SYMBOL(class_subsys);
-+EXPORT_SYMBOL(class_obj_subsys);
-diff -upr linux-2.6.16.orig/drivers/base/cpu.c linux-2.6.16-026test009/drivers/base/cpu.c
---- linux-2.6.16.orig/drivers/base/cpu.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/base/cpu.c 2006-04-19 15:02:11.000000000 +0400
-@@ -141,7 +141,7 @@ int __devinit register_cpu(struct cpu *c
- return error;
- }
-
--struct sys_device *get_cpu_sysdev(int cpu)
-+struct sys_device *get_cpu_sysdev(unsigned cpu)
- {
- if (cpu < NR_CPUS)
- return cpu_sys_devices[cpu];
-diff -upr linux-2.6.16.orig/drivers/base/firmware_class.c linux-2.6.16-026test009/drivers/base/firmware_class.c
---- linux-2.6.16.orig/drivers/base/firmware_class.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/base/firmware_class.c 2006-04-19 15:02:11.000000000 +0400
-@@ -211,18 +211,20 @@ static int
- fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
- {
- u8 *new_data;
-+ int new_size = fw_priv->alloc_size;
-
- if (min_size <= fw_priv->alloc_size)
- return 0;
-
-- new_data = vmalloc(fw_priv->alloc_size + PAGE_SIZE);
-+ new_size = ALIGN(min_size, PAGE_SIZE);
-+ new_data = vmalloc(new_size);
- if (!new_data) {
- printk(KERN_ERR "%s: unable to alloc buffer\n", __FUNCTION__);
- /* Make sure that we don't keep incomplete data */
- fw_load_abort(fw_priv);
- return -ENOMEM;
- }
-- fw_priv->alloc_size += PAGE_SIZE;
-+ fw_priv->alloc_size = new_size;
- if (fw_priv->fw->data) {
- memcpy(new_data, fw_priv->fw->data, fw_priv->fw->size);
- vfree(fw_priv->fw->data);
-diff -upr linux-2.6.16.orig/drivers/base/node.c linux-2.6.16-026test009/drivers/base/node.c
---- linux-2.6.16.orig/drivers/base/node.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/base/node.c 2006-04-19 15:02:11.000000000 +0400
-@@ -106,7 +106,7 @@ static ssize_t node_read_numastat(struct
- other_node = 0;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- struct zone *z = &pg->node_zones[i];
-- for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+ for_each_online_cpu(cpu) {
- struct per_cpu_pageset *ps = zone_pcp(z,cpu);
- numa_hit += ps->numa_hit;
- numa_miss += ps->numa_miss;
-diff -upr linux-2.6.16.orig/drivers/block/cciss.c linux-2.6.16-026test009/drivers/block/cciss.c
---- linux-2.6.16.orig/drivers/block/cciss.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/block/cciss.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1181,6 +1181,53 @@ static int revalidate_allvol(ctlr_info_t
- return 0;
- }
-
-+static inline void complete_buffers(struct bio *bio, int status)
-+{
-+ while (bio) {
-+ struct bio *xbh = bio->bi_next;
-+ int nr_sectors = bio_sectors(bio);
-+
-+ bio->bi_next = NULL;
-+ blk_finished_io(len);
-+ bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
-+ bio = xbh;
-+ }
-+
-+}
-+
-+static void cciss_softirq_done(struct request *rq)
-+{
-+ CommandList_struct *cmd = rq->completion_data;
-+ ctlr_info_t *h = hba[cmd->ctlr];
-+ unsigned long flags;
-+ u64bit temp64;
-+ int i, ddir;
-+
-+ if (cmd->Request.Type.Direction == XFER_READ)
-+ ddir = PCI_DMA_FROMDEVICE;
-+ else
-+ ddir = PCI_DMA_TODEVICE;
-+
-+ /* command did not need to be retried */
-+ /* unmap the DMA mapping for all the scatter gather elements */
-+ for(i=0; i<cmd->Header.SGList; i++) {
-+ temp64.val32.lower = cmd->SG[i].Addr.lower;
-+ temp64.val32.upper = cmd->SG[i].Addr.upper;
-+ pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
-+ }
-+
-+ complete_buffers(rq->bio, rq->errors);
-+
-+#ifdef CCISS_DEBUG
-+ printk("Done with %p\n", rq);
-+#endif /* CCISS_DEBUG */
-+
-+ spin_lock_irqsave(&h->lock, flags);
-+ end_that_request_last(rq, rq->errors);
-+ cmd_free(h, cmd,1);
-+ spin_unlock_irqrestore(&h->lock, flags);
-+}
-+
- /* This function will check the usage_count of the drive to be updated/added.
- * If the usage_count is zero then the drive information will be updated and
- * the disk will be re-registered with the kernel. If not then it will be
-@@ -1249,6 +1296,8 @@ static void cciss_update_drive_info(int
-
- blk_queue_max_sectors(disk->queue, 512);
-
-+ blk_queue_softirq_done(disk->queue, cciss_softirq_done);
-+
- disk->queue->queuedata = hba[ctlr];
-
- blk_queue_hardsect_size(disk->queue,
-@@ -2148,20 +2197,6 @@ static void start_io( ctlr_info_t *h)
- addQ (&(h->cmpQ), c);
- }
- }
--
--static inline void complete_buffers(struct bio *bio, int status)
--{
-- while (bio) {
-- struct bio *xbh = bio->bi_next;
-- int nr_sectors = bio_sectors(bio);
--
-- bio->bi_next = NULL;
-- blk_finished_io(len);
-- bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
-- bio = xbh;
-- }
--
--}
- /* Assumes that CCISS_LOCK(h->ctlr) is held. */
- /* Zeros out the error record and then resends the command back */
- /* to the controller */
-@@ -2179,39 +2214,6 @@ static inline void resend_cciss_cmd( ctl
- start_io(h);
- }
-
--static void cciss_softirq_done(struct request *rq)
--{
-- CommandList_struct *cmd = rq->completion_data;
-- ctlr_info_t *h = hba[cmd->ctlr];
-- unsigned long flags;
-- u64bit temp64;
-- int i, ddir;
--
-- if (cmd->Request.Type.Direction == XFER_READ)
-- ddir = PCI_DMA_FROMDEVICE;
-- else
-- ddir = PCI_DMA_TODEVICE;
--
-- /* command did not need to be retried */
-- /* unmap the DMA mapping for all the scatter gather elements */
-- for(i=0; i<cmd->Header.SGList; i++) {
-- temp64.val32.lower = cmd->SG[i].Addr.lower;
-- temp64.val32.upper = cmd->SG[i].Addr.upper;
-- pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
-- }
--
-- complete_buffers(rq->bio, rq->errors);
--
--#ifdef CCISS_DEBUG
-- printk("Done with %p\n", rq);
--#endif /* CCISS_DEBUG */
--
-- spin_lock_irqsave(&h->lock, flags);
-- end_that_request_last(rq, rq->errors);
-- cmd_free(h, cmd,1);
-- spin_unlock_irqrestore(&h->lock, flags);
--}
--
- /* checks the status of the job and calls complete buffers to mark all
- * buffers for the completed job. Note that this function does not need
- * to hold the hba/queue lock.
-@@ -3269,8 +3271,8 @@ clean2:
- unregister_blkdev(hba[i]->major, hba[i]->devname);
- clean1:
- release_io_mem(hba[i]);
-- free_hba(i);
- hba[i]->busy_initializing = 0;
-+ free_hba(i);
- return(-1);
- }
-
-diff -upr linux-2.6.16.orig/drivers/char/Kconfig linux-2.6.16-026test009/drivers/char/Kconfig
---- linux-2.6.16.orig/drivers/char/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -187,6 +187,7 @@ config MOXA_SMARTIO
- config ISI
- tristate "Multi-Tech multiport card support (EXPERIMENTAL)"
- depends on SERIAL_NONSTANDARD
-+ select FW_LOADER
- help
- This is a driver for the Multi-Tech cards which provide several
- serial ports. The driver is experimental and can currently only be
-diff -upr linux-2.6.16.orig/drivers/char/pty.c linux-2.6.16-026test009/drivers/char/pty.c
---- linux-2.6.16.orig/drivers/char/pty.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/pty.c 2006-04-19 15:02:12.000000000 +0400
-@@ -32,16 +32,30 @@
- #include <linux/bitops.h>
- #include <linux/devpts_fs.h>
-
-+#include <ub/ub_misc.h>
-+
- /* These are global because they are accessed in tty_io.c */
- #ifdef CONFIG_UNIX98_PTYS
- struct tty_driver *ptm_driver;
--static struct tty_driver *pts_driver;
-+struct tty_driver *pts_driver;
-+EXPORT_SYMBOL(ptm_driver);
-+EXPORT_SYMBOL(pts_driver);
-+
-+void prepare_pty(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->ptm_driver = ptm_driver;
-+ /* don't clean ptm_driver and co. here, they are used in vecalls.c */
-+#endif
-+}
- #endif
-
- static void pty_close(struct tty_struct * tty, struct file * filp)
- {
- if (!tty)
- return;
-+
-+ ub_pty_uncharge(tty);
- if (tty->driver->subtype == PTY_TYPE_MASTER) {
- if (tty->count > 1)
- printk("master pty_close: count = %d!!\n", tty->count);
-@@ -61,8 +75,12 @@ static void pty_close(struct tty_struct
- if (tty->driver->subtype == PTY_TYPE_MASTER) {
- set_bit(TTY_OTHER_CLOSED, &tty->flags);
- #ifdef CONFIG_UNIX98_PTYS
-- if (tty->driver == ptm_driver)
-+ if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-+ struct ve_struct *old_env;
-+ old_env = set_exec_env(VE_OWNER_TTY(tty));
- devpts_pty_kill(tty->index);
-+ (void)set_exec_env(old_env);
-+ }
- #endif
- tty_vhangup(tty->link);
- }
-@@ -212,6 +230,10 @@ static int pty_open(struct tty_struct *t
- if (tty->link->count != 1)
- goto out;
-
-+ retval = -ENODEV;
-+ if (ub_pty_charge(tty))
-+ goto out;
-+
- clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
- set_bit(TTY_THROTTLED, &tty->flags);
- set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
-@@ -239,7 +261,9 @@ static struct tty_operations pty_ops = {
-
- /* Traditional BSD devices */
- #ifdef CONFIG_LEGACY_PTYS
--static struct tty_driver *pty_driver, *pty_slave_driver;
-+struct tty_driver *pty_driver, *pty_slave_driver;
-+EXPORT_SYMBOL(pty_driver);
-+EXPORT_SYMBOL(pty_slave_driver);
-
- static int pty_bsd_ioctl(struct tty_struct *tty, struct file *file,
- unsigned int cmd, unsigned long arg)
-@@ -397,6 +421,7 @@ static void __init unix98_pty_init(void)
- panic("Couldn't register Unix98 pts driver");
-
- pty_table[1].data = &ptm_driver->refcount;
-+ prepare_pty();
- }
- #else
- static inline void unix98_pty_init(void) { }
-diff -upr linux-2.6.16.orig/drivers/char/snsc_event.c linux-2.6.16-026test009/drivers/char/snsc_event.c
---- linux-2.6.16.orig/drivers/char/snsc_event.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/snsc_event.c 2006-04-19 15:02:12.000000000 +0400
-@@ -206,7 +206,7 @@ scdrv_dispatch_event(char *event, int le
-
- /* first find init's task */
- read_lock(&tasklist_lock);
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (p->pid == 1)
- break;
- }
-diff -upr linux-2.6.16.orig/drivers/char/sysrq.c linux-2.6.16-026test009/drivers/char/sysrq.c
---- linux-2.6.16.orig/drivers/char/sysrq.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/sysrq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -174,8 +174,13 @@ static struct sysrq_key_op sysrq_showloc
- static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs,
- struct tty_struct *tty)
- {
-+ bust_spinlocks(1);
- if (pt_regs)
- show_regs(pt_regs);
-+ bust_spinlocks(0);
-+#if defined(__i386__) || defined(__x86_64__)
-+ smp_nmi_call_function(smp_show_regs, NULL, 0);
-+#endif
- }
- static struct sysrq_key_op sysrq_showregs_op = {
- .handler = sysrq_handle_showregs,
-@@ -221,7 +226,7 @@ static void send_sig_all(int sig)
- {
- struct task_struct *p;
-
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (p->mm && p->pid != 1)
- /* Not swapper, init nor kernel thread */
- force_sig(sig, p);
-diff -upr linux-2.6.16.orig/drivers/char/tlclk.c linux-2.6.16-026test009/drivers/char/tlclk.c
---- linux-2.6.16.orig/drivers/char/tlclk.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/tlclk.c 2006-04-19 15:02:11.000000000 +0400
-@@ -327,7 +327,7 @@ static ssize_t store_received_ref_clk3a(
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(received_ref_clk3a, S_IWUGO, NULL,
-+static DEVICE_ATTR(received_ref_clk3a, (S_IWUSR|S_IWGRP), NULL,
- store_received_ref_clk3a);
-
-
-@@ -349,7 +349,7 @@ static ssize_t store_received_ref_clk3b(
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(received_ref_clk3b, S_IWUGO, NULL,
-+static DEVICE_ATTR(received_ref_clk3b, (S_IWUSR|S_IWGRP), NULL,
- store_received_ref_clk3b);
-
-
-@@ -371,7 +371,7 @@ static ssize_t store_enable_clk3b_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clk3b_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clk3b_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clk3b_output);
-
- static ssize_t store_enable_clk3a_output(struct device *d,
-@@ -392,7 +392,7 @@ static ssize_t store_enable_clk3a_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clk3a_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clk3a_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clk3a_output);
-
- static ssize_t store_enable_clkb1_output(struct device *d,
-@@ -413,7 +413,7 @@ static ssize_t store_enable_clkb1_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clkb1_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clkb1_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clkb1_output);
-
-
-@@ -435,7 +435,7 @@ static ssize_t store_enable_clka1_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clka1_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clka1_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clka1_output);
-
- static ssize_t store_enable_clkb0_output(struct device *d,
-@@ -456,7 +456,7 @@ static ssize_t store_enable_clkb0_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clkb0_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clkb0_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clkb0_output);
-
- static ssize_t store_enable_clka0_output(struct device *d,
-@@ -477,7 +477,7 @@ static ssize_t store_enable_clka0_output
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(enable_clka0_output, S_IWUGO, NULL,
-+static DEVICE_ATTR(enable_clka0_output, (S_IWUSR|S_IWGRP), NULL,
- store_enable_clka0_output);
-
- static ssize_t store_select_amcb2_transmit_clock(struct device *d,
-@@ -519,7 +519,7 @@ static ssize_t store_select_amcb2_transm
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(select_amcb2_transmit_clock, S_IWUGO, NULL,
-+static DEVICE_ATTR(select_amcb2_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
- store_select_amcb2_transmit_clock);
-
- static ssize_t store_select_amcb1_transmit_clock(struct device *d,
-@@ -560,7 +560,7 @@ static ssize_t store_select_amcb1_transm
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(select_amcb1_transmit_clock, S_IWUGO, NULL,
-+static DEVICE_ATTR(select_amcb1_transmit_clock, (S_IWUSR|S_IWGRP), NULL,
- store_select_amcb1_transmit_clock);
-
- static ssize_t store_select_redundant_clock(struct device *d,
-@@ -581,7 +581,7 @@ static ssize_t store_select_redundant_cl
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(select_redundant_clock, S_IWUGO, NULL,
-+static DEVICE_ATTR(select_redundant_clock, (S_IWUSR|S_IWGRP), NULL,
- store_select_redundant_clock);
-
- static ssize_t store_select_ref_frequency(struct device *d,
-@@ -602,7 +602,7 @@ static ssize_t store_select_ref_frequenc
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(select_ref_frequency, S_IWUGO, NULL,
-+static DEVICE_ATTR(select_ref_frequency, (S_IWUSR|S_IWGRP), NULL,
- store_select_ref_frequency);
-
- static ssize_t store_filter_select(struct device *d,
-@@ -623,7 +623,7 @@ static ssize_t store_filter_select(struc
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(filter_select, S_IWUGO, NULL, store_filter_select);
-+static DEVICE_ATTR(filter_select, (S_IWUSR|S_IWGRP), NULL, store_filter_select);
-
- static ssize_t store_hardware_switching_mode(struct device *d,
- struct device_attribute *attr, const char *buf, size_t count)
-@@ -643,7 +643,7 @@ static ssize_t store_hardware_switching_
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(hardware_switching_mode, S_IWUGO, NULL,
-+static DEVICE_ATTR(hardware_switching_mode, (S_IWUSR|S_IWGRP), NULL,
- store_hardware_switching_mode);
-
- static ssize_t store_hardware_switching(struct device *d,
-@@ -664,7 +664,7 @@ static ssize_t store_hardware_switching(
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(hardware_switching, S_IWUGO, NULL,
-+static DEVICE_ATTR(hardware_switching, (S_IWUSR|S_IWGRP), NULL,
- store_hardware_switching);
-
- static ssize_t store_refalign (struct device *d,
-@@ -684,7 +684,7 @@ static ssize_t store_refalign (struct de
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(refalign, S_IWUGO, NULL, store_refalign);
-+static DEVICE_ATTR(refalign, (S_IWUSR|S_IWGRP), NULL, store_refalign);
-
- static ssize_t store_mode_select (struct device *d,
- struct device_attribute *attr, const char *buf, size_t count)
-@@ -704,7 +704,7 @@ static ssize_t store_mode_select (struct
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(mode_select, S_IWUGO, NULL, store_mode_select);
-+static DEVICE_ATTR(mode_select, (S_IWUSR|S_IWGRP), NULL, store_mode_select);
-
- static ssize_t store_reset (struct device *d,
- struct device_attribute *attr, const char *buf, size_t count)
-@@ -724,7 +724,7 @@ static ssize_t store_reset (struct devic
- return strnlen(buf, count);
- }
-
--static DEVICE_ATTR(reset, S_IWUGO, NULL, store_reset);
-+static DEVICE_ATTR(reset, (S_IWUSR|S_IWGRP), NULL, store_reset);
-
- static struct attribute *tlclk_sysfs_entries[] = {
- &dev_attr_current_ref.attr,
-@@ -767,6 +767,7 @@ static int __init tlclk_init(void)
- printk(KERN_ERR "tlclk: can't get major %d.\n", tlclk_major);
- return ret;
- }
-+ tlclk_major = ret;
- alarm_events = kzalloc( sizeof(struct tlclk_alarms), GFP_KERNEL);
- if (!alarm_events)
- goto out1;
-diff -upr linux-2.6.16.orig/drivers/char/tty_io.c linux-2.6.16-026test009/drivers/char/tty_io.c
---- linux-2.6.16.orig/drivers/char/tty_io.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/char/tty_io.c 2006-04-19 15:02:12.000000000 +0400
-@@ -86,6 +86,7 @@
- #include <linux/string.h>
- #include <linux/slab.h>
- #include <linux/poll.h>
-+#include <linux/ve_owner.h>
- #include <linux/proc_fs.h>
- #include <linux/init.h>
- #include <linux/module.h>
-@@ -105,6 +106,7 @@
- #include <linux/devfs_fs_kernel.h>
-
- #include <linux/kmod.h>
-+#include <ub/ub_mem.h>
-
- #undef TTY_DEBUG_HANGUP
-
-@@ -122,11 +124,16 @@ struct termios tty_std_termios = { /* fo
-
- EXPORT_SYMBOL(tty_std_termios);
-
-+/* this lock protects tty_drivers list, this pretty guys do no locking */
-+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED;
-+EXPORT_SYMBOL(tty_driver_guard);
-+
- /* This list gets poked at by procfs and various bits of boot up code. This
- could do with some rationalisation such as pulling the tty proc function
- into this file */
-
- LIST_HEAD(tty_drivers); /* linked list of tty drivers */
-+EXPORT_SYMBOL(tty_drivers);
-
- /* Semaphore to protect creating and releasing a tty. This is shared with
- vt.c for deeply disgusting hack reasons */
-@@ -136,6 +143,15 @@ DECLARE_MUTEX(tty_sem);
- extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
- extern int pty_limit; /* Config limit on Unix98 ptys */
- static DEFINE_IDR(allocated_ptys);
-+#ifdef CONFIG_VE
-+#define __ve_allocated_ptys(ve) (*((ve)->allocated_ptys))
-+#define ve_allocated_ptys __ve_allocated_ptys(get_exec_env())
-+#define ve_ptm_driver (get_exec_env()->ptm_driver)
-+#else
-+#define __ve_allocated_ptys(ve) allocated_ptys
-+#define ve_allocated_ptys allocated_ptys
-+#define ve_ptm_driver ptm_driver
-+#endif
- static DECLARE_MUTEX(allocated_ptys_lock);
- static int ptmx_open(struct inode *, struct file *);
- #endif
-@@ -156,11 +172,25 @@ static int tty_fasync(int fd, struct fil
- static void release_mem(struct tty_struct *tty, int idx);
-
-
-+DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
-+DCL_VE_OWNER(TTY, struct tty_struct, owner_env)
-+
-+void prepare_tty(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->allocated_ptys = &allocated_ptys;
-+ /*
-+ * in this case, tty_register_driver() setups
-+ * owner_env correctly right from the bootup
-+ */
-+#endif
-+}
-+
- static struct tty_struct *alloc_tty_struct(void)
- {
- struct tty_struct *tty;
-
-- tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
-+ tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
- if (tty)
- memset(tty, 0, sizeof(struct tty_struct));
- return tty;
-@@ -857,14 +887,37 @@ static struct tty_driver *get_tty_driver
- {
- struct tty_driver *p;
-
-+ read_lock(&tty_driver_guard);
- list_for_each_entry(p, &tty_drivers, tty_drivers) {
- dev_t base = MKDEV(p->major, p->minor_start);
- if (device < base || device >= base + p->num)
- continue;
- *index = device - base;
-- return p;
-+#ifdef CONFIG_VE
-+ if (in_interrupt())
-+ goto found;
-+ if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR
-+#ifdef CONFIG_UNIX98_PTYS
-+ && (p->major<UNIX98_PTY_MASTER_MAJOR ||
-+ p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) &&
-+ (p->major<UNIX98_PTY_SLAVE_MAJOR ||
-+ p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1)
-+#endif
-+ ) goto found;
-+ if (ve_is_super(VE_OWNER_TTYDRV(p)) &&
-+ ve_is_super(get_exec_env()))
-+ goto found;
-+ if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env()))
-+ continue;
-+#endif
-+ goto found;
- }
-+ read_unlock(&tty_driver_guard);
- return NULL;
-+
-+found:
-+ read_unlock(&tty_driver_guard);
-+ return p;
- }
-
- /*
-@@ -1092,7 +1145,7 @@ static void do_tty_hangup(void *data)
-
- read_lock(&tasklist_lock);
- if (tty->session > 0) {
-- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
- if (p->signal->tty == tty)
- p->signal->tty = NULL;
- if (!p->signal->leader)
-@@ -1101,7 +1154,7 @@ static void do_tty_hangup(void *data)
- send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
- if (tty->pgrp > 0)
- p->signal->tty_old_pgrp = tty->pgrp;
-- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
- }
- read_unlock(&tasklist_lock);
-
-@@ -1218,9 +1271,9 @@ void disassociate_ctty(int on_exit)
-
- /* Now clear signal->tty under the lock */
- read_lock(&tasklist_lock);
-- do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
-- } while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p);
- read_unlock(&tasklist_lock);
- up(&tty_sem);
- unlock_kernel();
-@@ -1446,21 +1499,28 @@ static inline void tty_line_name(struct
- * really quite straightforward. The semaphore locking can probably be
- * relaxed for the (most common) case of reopening a tty.
- */
--static int init_dev(struct tty_driver *driver, int idx,
-- struct tty_struct **ret_tty)
-+static int init_dev(struct tty_driver *driver, int idx,
-+ struct tty_struct *i_tty, struct tty_struct **ret_tty)
- {
- struct tty_struct *tty, *o_tty;
- struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
- struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
-+ struct ve_struct * owner;
- int retval=0;
-
-- /* check whether we're reopening an existing tty */
-- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-- tty = devpts_get_tty(idx);
-- if (tty && driver->subtype == PTY_TYPE_MASTER)
-- tty = tty->link;
-- } else {
-- tty = driver->ttys[idx];
-+ owner = VE_OWNER_TTYDRV(driver);
-+
-+ if (i_tty)
-+ tty = i_tty;
-+ else {
-+ /* check whether we're reopening an existing tty */
-+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
-+ tty = devpts_get_tty(idx);
-+ if (tty && driver->subtype == PTY_TYPE_MASTER)
-+ tty = tty->link;
-+ } else {
-+ tty = driver->ttys[idx];
-+ }
- }
- if (tty) goto fast_track;
-
-@@ -1488,6 +1548,7 @@ static int init_dev(struct tty_driver *d
- tty->driver = driver;
- tty->index = idx;
- tty_line_name(driver, idx, tty->name);
-+ SET_VE_OWNER_TTY(tty, owner);
-
- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
- tp_loc = &tty->termios;
-@@ -1498,7 +1559,7 @@ static int init_dev(struct tty_driver *d
- }
-
- if (!*tp_loc) {
-- tp = (struct termios *) kmalloc(sizeof(struct termios),
-+ tp = (struct termios *) ub_kmalloc(sizeof(struct termios),
- GFP_KERNEL);
- if (!tp)
- goto free_mem_out;
-@@ -1506,7 +1567,7 @@ static int init_dev(struct tty_driver *d
- }
-
- if (!*ltp_loc) {
-- ltp = (struct termios *) kmalloc(sizeof(struct termios),
-+ ltp = (struct termios *) ub_kmalloc(sizeof(struct termios),
- GFP_KERNEL);
- if (!ltp)
- goto free_mem_out;
-@@ -1521,6 +1582,7 @@ static int init_dev(struct tty_driver *d
- o_tty->driver = driver->other;
- o_tty->index = idx;
- tty_line_name(driver->other, idx, o_tty->name);
-+ SET_VE_OWNER_TTY(o_tty, owner);
-
- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) {
- o_tp_loc = &o_tty->termios;
-@@ -1532,7 +1594,7 @@ static int init_dev(struct tty_driver *d
-
- if (!*o_tp_loc) {
- o_tp = (struct termios *)
-- kmalloc(sizeof(struct termios), GFP_KERNEL);
-+ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
- if (!o_tp)
- goto free_mem_out;
- *o_tp = driver->other->init_termios;
-@@ -1540,7 +1602,7 @@ static int init_dev(struct tty_driver *d
-
- if (!*o_ltp_loc) {
- o_ltp = (struct termios *)
-- kmalloc(sizeof(struct termios), GFP_KERNEL);
-+ ub_kmalloc(sizeof(struct termios), GFP_KERNEL);
- if (!o_ltp)
- goto free_mem_out;
- memset(o_ltp, 0, sizeof(struct termios));
-@@ -1558,6 +1620,10 @@ static int init_dev(struct tty_driver *d
- *o_ltp_loc = o_ltp;
- o_tty->termios = *o_tp_loc;
- o_tty->termios_locked = *o_ltp_loc;
-+#ifdef CONFIG_VE
-+ if (driver->other->refcount == 0)
-+ (void)get_ve(owner);
-+#endif
- driver->other->refcount++;
- if (driver->subtype == PTY_TYPE_MASTER)
- o_tty->count++;
-@@ -1582,6 +1648,10 @@ static int init_dev(struct tty_driver *d
- *ltp_loc = ltp;
- tty->termios = *tp_loc;
- tty->termios_locked = *ltp_loc;
-+#ifdef CONFIG_VE
-+ if (driver->refcount == 0)
-+ (void)get_ve(owner);
-+#endif
- driver->refcount++;
- tty->count++;
-
-@@ -1692,6 +1762,10 @@ static void release_mem(struct tty_struc
- }
- o_tty->magic = 0;
- o_tty->driver->refcount--;
-+#ifdef CONFIG_VE
-+ if (o_tty->driver->refcount == 0)
-+ put_ve(VE_OWNER_TTY(o_tty));
-+#endif
- file_list_lock();
- list_del_init(&o_tty->tty_files);
- file_list_unlock();
-@@ -1714,6 +1788,10 @@ static void release_mem(struct tty_struc
-
- tty->magic = 0;
- tty->driver->refcount--;
-+#ifdef CONFIG_VE
-+ if (tty->driver->refcount == 0)
-+ put_ve(VE_OWNER_TTY(tty));
-+#endif
- file_list_lock();
- list_del_init(&tty->tty_files);
- file_list_unlock();
-@@ -1737,7 +1815,10 @@ static void release_dev(struct file * fi
- int idx;
- char buf[64];
- unsigned long flags;
--
-+#ifdef CONFIG_UNIX98_PTYS
-+ struct idr *idr_alloced;
-+#endif
-+
- tty = (struct tty_struct *)filp->private_data;
- if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev"))
- return;
-@@ -1752,6 +1833,9 @@ static void release_dev(struct file * fi
- devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
- devpts_master = pty_master && devpts;
- o_tty = tty->link;
-+#ifdef CONFIG_UNIX98_PTYS
-+ idr_alloced = &__ve_allocated_ptys(tty->owner_env);
-+#endif
-
- #ifdef TTY_PARANOIA_CHECK
- if (idx < 0 || idx >= tty->driver->num) {
-@@ -1924,13 +2008,13 @@ static void release_dev(struct file * fi
- struct task_struct *p;
-
- read_lock(&tasklist_lock);
-- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
-- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
- if (o_tty)
-- do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
-- } while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p);
- read_unlock(&tasklist_lock);
- }
-
-@@ -2005,7 +2089,7 @@ static void release_dev(struct file * fi
- /* Make this pty number available for reallocation */
- if (devpts) {
- down(&allocated_ptys_lock);
-- idr_remove(&allocated_ptys, idx);
-+ idr_remove(idr_alloced, idx);
- up(&allocated_ptys_lock);
- }
- #endif
-@@ -2026,7 +2110,7 @@ static void release_dev(struct file * fi
- */
- static int tty_open(struct inode * inode, struct file * filp)
- {
-- struct tty_struct *tty;
-+ struct tty_struct *tty, *c_tty;
- int noctty, retval;
- struct tty_driver *driver;
- int index;
-@@ -2039,6 +2123,7 @@ retry_open:
- noctty = filp->f_flags & O_NOCTTY;
- index = -1;
- retval = 0;
-+ c_tty = NULL;
-
- down(&tty_sem);
-
-@@ -2049,6 +2134,7 @@ retry_open:
- }
- driver = current->signal->tty->driver;
- index = current->signal->tty->index;
-+ c_tty = current->signal->tty;
- filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
- /* noctty = 1; */
- goto got_driver;
-@@ -2056,6 +2142,12 @@ retry_open:
- #ifdef CONFIG_VT
- if (device == MKDEV(TTY_MAJOR,0)) {
- extern struct tty_driver *console_driver;
-+#ifdef CONFIG_VE
-+ if (!ve_is_super(get_exec_env())) {
-+ up(&tty_sem);
-+ return -ENODEV;
-+ }
-+#endif
- driver = console_driver;
- index = fg_console;
- noctty = 1;
-@@ -2063,6 +2155,12 @@ retry_open:
- }
- #endif
- if (device == MKDEV(TTYAUX_MAJOR,1)) {
-+#ifdef CONFIG_VE
-+ if (!ve_is_super(get_exec_env())) {
-+ up(&tty_sem);
-+ return -ENODEV;
-+ }
-+#endif
- driver = console_device(&index);
- if (driver) {
- /* Don't let /dev/console block */
-@@ -2080,7 +2178,7 @@ retry_open:
- return -ENODEV;
- }
- got_driver:
-- retval = init_dev(driver, index, &tty);
-+ retval = init_dev(driver, index, c_tty, &tty);
- up(&tty_sem);
- if (retval)
- return retval;
-@@ -2149,11 +2247,11 @@ static int ptmx_open(struct inode * inod
-
- /* find a device that is not in use. */
- down(&allocated_ptys_lock);
-- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
-+ if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) {
- up(&allocated_ptys_lock);
- return -ENOMEM;
- }
-- idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
-+ idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index);
- if (idr_ret < 0) {
- up(&allocated_ptys_lock);
- if (idr_ret == -EAGAIN)
-@@ -2161,14 +2259,14 @@ static int ptmx_open(struct inode * inod
- return -EIO;
- }
- if (index >= pty_limit) {
-- idr_remove(&allocated_ptys, index);
-+ idr_remove(&ve_allocated_ptys, index);
- up(&allocated_ptys_lock);
- return -EIO;
- }
- up(&allocated_ptys_lock);
-
- down(&tty_sem);
-- retval = init_dev(ptm_driver, index, &tty);
-+ retval = init_dev(ve_ptm_driver, index, NULL, &tty);
- up(&tty_sem);
-
- if (retval)
-@@ -2183,14 +2281,14 @@ static int ptmx_open(struct inode * inod
- goto out1;
-
- check_tty_count(tty, "tty_open");
-- retval = ptm_driver->open(tty, filp);
-+ retval = ve_ptm_driver->open(tty, filp);
- if (!retval)
- return 0;
- out1:
- release_dev(filp);
- out:
- down(&allocated_ptys_lock);
-- idr_remove(&allocated_ptys, index);
-+ idr_remove(&ve_allocated_ptys, index);
- up(&allocated_ptys_lock);
- return retval;
- }
-@@ -2303,6 +2401,8 @@ static int tioccons(struct file *file)
- {
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-+ if (!ve_is_super(get_exec_env()))
-+ return -EACCES;
- if (file->f_op->write == redirected_tty_write) {
- struct file *f;
- spin_lock(&redirect_lock);
-@@ -2363,9 +2463,9 @@ static int tiocsctty(struct tty_struct *
- */
-
- read_lock(&tasklist_lock);
-- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
-- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p);
- read_unlock(&tasklist_lock);
- } else
- return -EPERM;
-@@ -2387,7 +2487,7 @@ static int tiocgpgrp(struct tty_struct *
- */
- if (tty == real_tty && current->signal->tty != real_tty)
- return -ENOTTY;
-- return put_user(real_tty->pgrp, p);
-+ return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p);
- }
-
- static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
-@@ -2407,6 +2507,9 @@ static int tiocspgrp(struct tty_struct *
- return -EFAULT;
- if (pgrp < 0)
- return -EINVAL;
-+ pgrp = vpid_to_pid(pgrp);
-+ if (pgrp < 0)
-+ return -EPERM;
- if (session_of_pgrp(pgrp) != current->signal->session)
- return -EPERM;
- real_tty->pgrp = pgrp;
-@@ -2423,7 +2526,7 @@ static int tiocgsid(struct tty_struct *t
- return -ENOTTY;
- if (real_tty->session <= 0)
- return -ENOTTY;
-- return put_user(real_tty->session, p);
-+ return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p);
- }
-
- static int tiocsetd(struct tty_struct *tty, int __user *p)
-@@ -2696,7 +2799,7 @@ static void __do_SAK(void *arg)
- tty->driver->flush_buffer(tty);
-
- read_lock(&tasklist_lock);
-- do_each_task_pid(session, PIDTYPE_SID, p) {
-+ do_each_task_pid_all(session, PIDTYPE_SID, p) {
- if (p->signal->tty == tty || session > 0) {
- printk(KERN_NOTICE "SAK: killed process %d"
- " (%s): p->signal->session==tty->session\n",
-@@ -2724,7 +2827,7 @@ static void __do_SAK(void *arg)
- rcu_read_unlock();
- }
- task_unlock(p);
-- } while_each_task_pid(session, PIDTYPE_SID, p);
-+ } while_each_task_pid_all(session, PIDTYPE_SID, p);
- read_unlock(&tasklist_lock);
- #endif
- }
-@@ -3095,8 +3198,11 @@ int tty_register_driver(struct tty_drive
-
- if (!driver->put_char)
- driver->put_char = tty_default_put_char;
--
-+
-+ SET_VE_OWNER_TTYDRV(driver, get_exec_env());
-+ write_lock_irq(&tty_driver_guard);
- list_add(&driver->tty_drivers, &tty_drivers);
-+ write_unlock_irq(&tty_driver_guard);
-
- if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
- for(i = 0; i < driver->num; i++)
-@@ -3123,7 +3229,9 @@ int tty_unregister_driver(struct tty_dri
- unregister_chrdev_region(MKDEV(driver->major, driver->minor_start),
- driver->num);
-
-+ write_lock_irq(&tty_driver_guard);
- list_del(&driver->tty_drivers);
-+ write_unlock_irq(&tty_driver_guard);
-
- /*
- * Free the termios and termios_locked structures because
-@@ -3246,6 +3354,7 @@ static int __init tty_init(void)
-
- vty_init();
- #endif
-+ prepare_tty();
- return 0;
- }
- module_init(tty_init);
-diff -upr linux-2.6.16.orig/drivers/edac/Kconfig linux-2.6.16-026test009/drivers/edac/Kconfig
---- linux-2.6.16.orig/drivers/edac/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/edac/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -71,7 +71,7 @@ config EDAC_E7XXX
-
- config EDAC_E752X
- tristate "Intel e752x (e7520, e7525, e7320)"
-- depends on EDAC_MM_EDAC && PCI
-+ depends on EDAC_MM_EDAC && PCI && HOTPLUG
- help
- Support for error detection and correction on the Intel
- E7520, E7525, E7320 server chipsets.
-diff -upr linux-2.6.16.orig/drivers/ieee1394/sbp2.c linux-2.6.16-026test009/drivers/ieee1394/sbp2.c
---- linux-2.6.16.orig/drivers/ieee1394/sbp2.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/ieee1394/sbp2.c 2006-04-19 15:02:11.000000000 +0400
-@@ -495,22 +495,17 @@ static struct sbp2_command_info *sbp2uti
- /*
- * This function finds the sbp2_command for a given outstanding SCpnt.
- * Only looks at the inuse list.
-+ * Must be called with scsi_id->sbp2_command_orb_lock held.
- */
--static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(struct scsi_id_instance_data *scsi_id, void *SCpnt)
-+static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(
-+ struct scsi_id_instance_data *scsi_id, void *SCpnt)
- {
- struct sbp2_command_info *command;
-- unsigned long flags;
-
-- spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
-- if (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
-- list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list) {
-- if (command->Current_SCpnt == SCpnt) {
-- spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
-+ if (!list_empty(&scsi_id->sbp2_command_orb_inuse))
-+ list_for_each_entry(command, &scsi_id->sbp2_command_orb_inuse, list)
-+ if (command->Current_SCpnt == SCpnt)
- return command;
-- }
-- }
-- }
-- spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
- return NULL;
- }
-
-@@ -579,17 +574,15 @@ static void sbp2util_free_command_dma(st
-
- /*
- * This function moves a command to the completed orb list.
-+ * Must be called with scsi_id->sbp2_command_orb_lock held.
- */
--static void sbp2util_mark_command_completed(struct scsi_id_instance_data *scsi_id,
-- struct sbp2_command_info *command)
-+static void sbp2util_mark_command_completed(
-+ struct scsi_id_instance_data *scsi_id,
-+ struct sbp2_command_info *command)
- {
-- unsigned long flags;
--
-- spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
- list_del(&command->list);
- sbp2util_free_command_dma(command);
- list_add_tail(&command->list, &scsi_id->sbp2_command_orb_completed);
-- spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
- }
-
- /*
-@@ -2177,7 +2170,9 @@ static int sbp2_handle_status_write(stru
- * Matched status with command, now grab scsi command pointers and check status
- */
- SCpnt = command->Current_SCpnt;
-+ spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
- sbp2util_mark_command_completed(scsi_id, command);
-+ spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
-
- if (SCpnt) {
-
-@@ -2513,6 +2508,7 @@ static int sbp2scsi_abort(struct scsi_cm
- (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0];
- struct sbp2scsi_host_info *hi = scsi_id->hi;
- struct sbp2_command_info *command;
-+ unsigned long flags;
-
- SBP2_ERR("aborting sbp2 command");
- scsi_print_command(SCpnt);
-@@ -2523,6 +2519,7 @@ static int sbp2scsi_abort(struct scsi_cm
- * Right now, just return any matching command structures
- * to the free pool.
- */
-+ spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
- command = sbp2util_find_command_for_SCpnt(scsi_id, SCpnt);
- if (command) {
- SBP2_DEBUG("Found command to abort");
-@@ -2540,6 +2537,7 @@ static int sbp2scsi_abort(struct scsi_cm
- command->Current_done(command->Current_SCpnt);
- }
- }
-+ spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
-
- /*
- * Initiate a fetch agent reset.
-diff -upr linux-2.6.16.orig/drivers/md/dm.c linux-2.6.16-026test009/drivers/md/dm.c
---- linux-2.6.16.orig/drivers/md/dm.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/md/dm.c 2006-04-19 15:02:11.000000000 +0400
-@@ -533,30 +533,35 @@ static void __clone_and_map(struct clone
-
- } else {
- /*
-- * Create two copy bios to deal with io that has
-- * been split across a target.
-+ * Handle a bvec that must be split between two or more targets.
- */
- struct bio_vec *bv = bio->bi_io_vec + ci->idx;
-+ sector_t remaining = to_sector(bv->bv_len);
-+ unsigned int offset = 0;
-
-- clone = split_bvec(bio, ci->sector, ci->idx,
-- bv->bv_offset, max);
-- __map_bio(ti, clone, tio);
--
-- ci->sector += max;
-- ci->sector_count -= max;
-- ti = dm_table_find_target(ci->map, ci->sector);
--
-- len = to_sector(bv->bv_len) - max;
-- clone = split_bvec(bio, ci->sector, ci->idx,
-- bv->bv_offset + to_bytes(max), len);
-- tio = alloc_tio(ci->md);
-- tio->io = ci->io;
-- tio->ti = ti;
-- memset(&tio->info, 0, sizeof(tio->info));
-- __map_bio(ti, clone, tio);
-+ do {
-+ if (offset) {
-+ ti = dm_table_find_target(ci->map, ci->sector);
-+ max = max_io_len(ci->md, ci->sector, ti);
-+
-+ tio = alloc_tio(ci->md);
-+ tio->io = ci->io;
-+ tio->ti = ti;
-+ memset(&tio->info, 0, sizeof(tio->info));
-+ }
-+
-+ len = min(remaining, max);
-+
-+ clone = split_bvec(bio, ci->sector, ci->idx,
-+ bv->bv_offset + offset, len);
-+
-+ __map_bio(ti, clone, tio);
-+
-+ ci->sector += len;
-+ ci->sector_count -= len;
-+ offset += to_bytes(len);
-+ } while (remaining -= len);
-
-- ci->sector += len;
-- ci->sector_count -= len;
- ci->idx++;
- }
- }
-diff -upr linux-2.6.16.orig/drivers/media/video/Kconfig linux-2.6.16-026test009/drivers/media/video/Kconfig
---- linux-2.6.16.orig/drivers/media/video/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/media/video/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -349,6 +349,7 @@ config VIDEO_AUDIO_DECODER
- config VIDEO_DECODER
- tristate "Add support for additional video chipsets"
- depends on VIDEO_DEV && I2C && EXPERIMENTAL
-+ select FW_LOADER
- ---help---
- Say Y here to compile drivers for SAA7115, SAA7127 and CX25840
- video decoders.
-diff -upr linux-2.6.16.orig/drivers/media/video/tuner-types.c linux-2.6.16-026test009/drivers/media/video/tuner-types.c
---- linux-2.6.16.orig/drivers/media/video/tuner-types.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/media/video/tuner-types.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1087,8 +1087,8 @@ static struct tuner_params tuner_tnf_533
- /* ------------ TUNER_SAMSUNG_TCPN_2121P30A - Samsung NTSC ------------ */
-
- static struct tuner_range tuner_samsung_tcpn_2121p30a_ntsc_ranges[] = {
-- { 16 * 175.75 /*MHz*/, 0x01, },
-- { 16 * 410.25 /*MHz*/, 0x02, },
-+ { 16 * 130.00 /*MHz*/, 0x01, },
-+ { 16 * 364.50 /*MHz*/, 0x02, },
- { 16 * 999.99 , 0x08, },
- };
-
-diff -upr linux-2.6.16.orig/drivers/net/Makefile linux-2.6.16-026test009/drivers/net/Makefile
---- linux-2.6.16.orig/drivers/net/Makefile 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/Makefile 2006-04-19 15:02:12.000000000 +0400
-@@ -18,6 +18,9 @@ gianfar_driver-objs := gianfar.o \
- gianfar_mii.o \
- gianfar_sysfs.o
-
-+obj-$(CONFIG_VE_NETDEV) += vznetdev.o
-+vznetdev-objs := open_vznet.o venet_core.o
-+
- #
- # link order important here
- #
-diff -upr linux-2.6.16.orig/drivers/net/irda/irda-usb.c linux-2.6.16-026test009/drivers/net/irda/irda-usb.c
---- linux-2.6.16.orig/drivers/net/irda/irda-usb.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/irda/irda-usb.c 2006-04-19 15:02:11.000000000 +0400
-@@ -740,7 +740,7 @@ static void irda_usb_receive(struct urb
- struct sk_buff *newskb;
- struct sk_buff *dataskb;
- struct urb *next_urb;
-- int docopy;
-+ unsigned int len, docopy;
-
- IRDA_DEBUG(2, "%s(), len=%d\n", __FUNCTION__, urb->actual_length);
-
-@@ -851,10 +851,11 @@ static void irda_usb_receive(struct urb
- dataskb->dev = self->netdev;
- dataskb->mac.raw = dataskb->data;
- dataskb->protocol = htons(ETH_P_IRDA);
-+ len = dataskb->len;
- netif_rx(dataskb);
-
- /* Keep stats up to date */
-- self->stats.rx_bytes += dataskb->len;
-+ self->stats.rx_bytes += len;
- self->stats.rx_packets++;
- self->netdev->last_rx = jiffies;
-
-diff -upr linux-2.6.16.orig/drivers/net/loopback.c linux-2.6.16-026test009/drivers/net/loopback.c
---- linux-2.6.16.orig/drivers/net/loopback.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/loopback.c 2006-04-19 15:02:12.000000000 +0400
-@@ -130,6 +130,11 @@ static int loopback_xmit(struct sk_buff
- {
- struct net_device_stats *lb_stats;
-
-+ if (unlikely(get_exec_env()->disable_net)) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
-+
- skb_orphan(skb);
-
- skb->protocol = eth_type_trans(skb,dev);
-@@ -198,6 +203,34 @@ static struct ethtool_ops loopback_ethto
- .set_tso = ethtool_op_set_tso,
- };
-
-+static void loopback_destructor(struct net_device *dev)
-+{
-+ kfree(dev->priv);
-+ dev->priv = NULL;
-+}
-+
-+struct net_device templ_loopback_dev = {
-+ .name = "lo",
-+ .mtu = (16 * 1024) + 20 + 20 + 12,
-+ .hard_start_xmit = loopback_xmit,
-+ .hard_header = eth_header,
-+ .hard_header_cache = eth_header_cache,
-+ .header_cache_update = eth_header_cache_update,
-+ .hard_header_len = ETH_HLEN, /* 14 */
-+ .addr_len = ETH_ALEN, /* 6 */
-+ .tx_queue_len = 0,
-+ .type = ARPHRD_LOOPBACK, /* 0x0001*/
-+ .rebuild_header = eth_rebuild_header,
-+ .flags = IFF_LOOPBACK,
-+ .features = NETIF_F_SG|NETIF_F_FRAGLIST
-+ |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA
-+ |NETIF_F_LLTX|NETIF_F_VIRTUAL,
-+};
-+
-+#ifdef loopback_dev
-+#undef loopback_dev
-+#endif
-+
- struct net_device loopback_dev = {
- .name = "lo",
- .mtu = (16 * 1024) + 20 + 20 + 12,
-@@ -231,9 +264,13 @@ int __init loopback_init(void)
- memset(stats, 0, sizeof(struct net_device_stats));
- loopback_dev.priv = stats;
- loopback_dev.get_stats = &get_stats;
-+ loopback_dev.destructor = &loopback_destructor;
- }
--
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ get_ve0()->_loopback_dev = &loopback_dev;
-+#endif
- return register_netdev(&loopback_dev);
- };
-
- EXPORT_SYMBOL(loopback_dev);
-+EXPORT_SYMBOL(templ_loopback_dev);
-diff -upr linux-2.6.16.orig/drivers/net/open_vznet.c linux-2.6.16-026test009/drivers/net/open_vznet.c
---- linux-2.6.16.orig/drivers/net/open_vznet.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/open_vznet.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,190 @@
-+/*
-+ * open_vznet.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+/*
-+ * Virtual Networking device used to change VE ownership on packets
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/seq_file.h>
-+
-+#include <linux/inet.h>
-+#include <net/ip.h>
-+#include <linux/skbuff.h>
-+#include <linux/venet.h>
-+
-+void veip_stop(struct ve_struct *ve)
-+{
-+ struct list_head *p, *tmp;
-+
-+ write_lock_irq(&veip_hash_lock);
-+ if (ve->veip == NULL)
-+ goto unlock;
-+ list_for_each_safe(p, tmp, &ve->veip->ip_lh) {
-+ struct ip_entry_struct *ptr;
-+ ptr = list_entry(p, struct ip_entry_struct, ve_list);
-+ ptr->active_env = NULL;
-+ list_del(&ptr->ve_list);
-+ list_del(&ptr->ip_hash);
-+ kfree(ptr);
-+ }
-+ veip_put(ve->veip);
-+ ve->veip = NULL;
-+unlock:
-+ write_unlock_irq(&veip_hash_lock);
-+}
-+
-+int veip_start(struct ve_struct *ve)
-+{
-+ int err;
-+
-+ err = 0;
-+ write_lock_irq(&veip_hash_lock);
-+ ve->veip = veip_findcreate(ve->veid);
-+ if (ve->veip == NULL)
-+ err = -ENOMEM;
-+ write_unlock_irq(&veip_hash_lock);
-+ return err;
-+}
-+
-+int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr)
-+{
-+ struct ip_entry_struct *entry, *found;
-+ int err;
-+
-+ entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL);
-+ if (entry == NULL)
-+ return -ENOMEM;
-+
-+ memset(entry, 0, sizeof(struct ip_entry_struct));
-+ entry->ip = addr->sin_addr.s_addr;
-+
-+ write_lock_irq(&veip_hash_lock);
-+ err = -EADDRINUSE;
-+ found = ip_entry_lookup(entry->ip);
-+ if (found != NULL)
-+ goto out_unlock;
-+ else {
-+ ip_entry_hash(entry, ve->veip);
-+ found = entry;
-+ entry = NULL;
-+ }
-+ err = 0;
-+ found->active_env = ve;
-+out_unlock:
-+ write_unlock_irq(&veip_hash_lock);
-+ if (entry != NULL)
-+ kfree(entry);
-+ return err;
-+}
-+
-+int veip_entry_del(envid_t veid, struct sockaddr_in *addr)
-+{
-+ struct ip_entry_struct *found;
-+ int err;
-+
-+ err = -EADDRNOTAVAIL;
-+ write_lock_irq(&veip_hash_lock);
-+ found = ip_entry_lookup(addr->sin_addr.s_addr);
-+ if (found == NULL)
-+ goto out;
-+ if (found->active_env->veid != veid)
-+ goto out;
-+
-+ err = 0;
-+ found->active_env = NULL;
-+
-+ list_del(&found->ip_hash);
-+ list_del(&found->ve_list);
-+ kfree(found);
-+out:
-+ write_unlock_irq(&veip_hash_lock);
-+ return err;
-+}
-+
-+static struct ve_struct *venet_find_ve(__u32 ip)
-+{
-+ struct ip_entry_struct *entry;
-+
-+ entry = ip_entry_lookup(ip);
-+ if (entry == NULL)
-+ return NULL;
-+
-+ return entry->active_env;
-+}
-+
-+int venet_change_skb_owner(struct sk_buff *skb)
-+{
-+ struct ve_struct *ve, *ve_old;
-+ struct iphdr *iph;
-+
-+ ve_old = skb->owner_env;
-+ iph = skb->nh.iph;
-+
-+ read_lock(&veip_hash_lock);
-+ if (!ve_is_super(ve_old)) {
-+ /* from VE to host */
-+ ve = venet_find_ve(iph->saddr);
-+ if (ve == NULL)
-+ goto out_drop;
-+ if (!ve_accessible_strict(ve, ve_old))
-+ goto out_source;
-+ skb->owner_env = get_ve0();
-+ } else {
-+ /* from host to VE */
-+ ve = venet_find_ve(iph->daddr);
-+ if (ve == NULL)
-+ goto out_drop;
-+ skb->owner_env = ve;
-+ }
-+ read_unlock(&veip_hash_lock);
-+
-+ return 0;
-+
-+out_drop:
-+ read_unlock(&veip_hash_lock);
-+ return -ESRCH;
-+
-+out_source:
-+ read_unlock(&veip_hash_lock);
-+ if (net_ratelimit()) {
-+ printk(KERN_WARNING "Dropped packet, source wrong "
-+ "veid=%u src-IP=%u.%u.%u.%u "
-+ "dst-IP=%u.%u.%u.%u\n",
-+ skb->owner_env->veid,
-+ NIPQUAD(skb->nh.iph->saddr),
-+ NIPQUAD(skb->nh.iph->daddr));
-+ }
-+ return -EACCES;
-+}
-+
-+#ifdef CONFIG_PROC_FS
-+int veip_seq_show(struct seq_file *m, void *v)
-+{
-+ struct list_head *p;
-+ struct ip_entry_struct *entry;
-+ char s[16];
-+
-+ p = (struct list_head *)v;
-+ if (p == ip_entry_hash_table) {
-+ seq_puts(m, "Version: 2.5\n");
-+ return 0;
-+ }
-+ entry = list_entry(p, struct ip_entry_struct, ip_hash);
-+ sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip));
-+ seq_printf(m, "%15s %10u\n", s, 0);
-+ return 0;
-+}
-+#endif
-+
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device");
-+MODULE_LICENSE("GPL v2");
-diff -upr linux-2.6.16.orig/drivers/net/sky2.c linux-2.6.16-026test009/drivers/net/sky2.c
---- linux-2.6.16.orig/drivers/net/sky2.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/sky2.c 2006-04-19 15:02:11.000000000 +0400
-@@ -579,8 +579,8 @@ static void sky2_mac_init(struct sky2_hw
- reg = gma_read16(hw, port, GM_PHY_ADDR);
- gma_write16(hw, port, GM_PHY_ADDR, reg | GM_PAR_MIB_CLR);
-
-- for (i = 0; i < GM_MIB_CNT_SIZE; i++)
-- gma_read16(hw, port, GM_MIB_CNT_BASE + 8 * i);
-+ for (i = GM_MIB_CNT_BASE; i <= GM_MIB_CNT_END; i += 4)
-+ gma_read16(hw, port, i);
- gma_write16(hw, port, GM_PHY_ADDR, reg);
-
- /* transmit control */
-diff -upr linux-2.6.16.orig/drivers/net/sky2.h linux-2.6.16-026test009/drivers/net/sky2.h
---- linux-2.6.16.orig/drivers/net/sky2.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/sky2.h 2006-04-19 15:02:11.000000000 +0400
-@@ -1380,6 +1380,7 @@ enum {
- /* MIB Counters */
- #define GM_MIB_CNT_BASE 0x0100 /* Base Address of MIB Counters */
- #define GM_MIB_CNT_SIZE 44 /* Number of MIB Counters */
-+#define GM_MIB_CNT_END 0x025C /* Last MIB counter */
-
- /*
- * MIB Counters base address definitions (low word) -
-diff -upr linux-2.6.16.orig/drivers/net/tun.c linux-2.6.16-026test009/drivers/net/tun.c
---- linux-2.6.16.orig/drivers/net/tun.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/tun.c 2006-04-19 15:02:12.000000000 +0400
-@@ -62,6 +62,7 @@
-
- #include <asm/system.h>
- #include <asm/uaccess.h>
-+#include <ub/beancounter.h>
-
- #ifdef TUN_DEBUG
- static int debug;
-@@ -90,6 +91,7 @@ static int tun_net_close(struct net_devi
- static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- struct tun_struct *tun = netdev_priv(dev);
-+ struct user_beancounter *ub;
-
- DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
-
-@@ -114,6 +116,18 @@ static int tun_net_xmit(struct sk_buff *
- }
- }
-
-+ ub = netdev_bc(dev)->exec_ub;
-+ if (ub && (skb_bc(skb)->charged == 0)) {
-+ unsigned long charge;
-+ charge = skb_charge_fullsize(skb);
-+ if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1))
-+ goto drop;
-+ get_beancounter(ub);
-+ skb_bc(skb)->ub = ub;
-+ skb_bc(skb)->charged = charge;
-+ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
-+ }
-+
- /* Queue packet */
- skb_queue_tail(&tun->readq, skb);
- dev->trans_start = jiffies;
-@@ -410,12 +424,14 @@ static ssize_t tun_chr_readv(struct file
- tun->dev->name, addr[0], addr[1], addr[2],
- addr[3], addr[4], addr[5]);
- ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
-+ /* skb will be uncharged in kfree_skb() */
- kfree_skb(skb);
- break;
- } else {
- DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
- tun->dev->name, addr[0], addr[1], addr[2],
- addr[3], addr[4], addr[5]);
-+ /* skb will be uncharged in kfree_skb() */
- kfree_skb(skb);
- continue;
- }
-@@ -451,6 +467,7 @@ static void tun_setup(struct net_device
- dev->get_stats = tun_net_stats;
- dev->ethtool_ops = &tun_ethtool_ops;
- dev->destructor = free_netdev;
-+ dev->features |= NETIF_F_VIRTUAL;
- }
-
- static struct tun_struct *tun_get_by_name(const char *name)
-@@ -459,8 +476,9 @@ static struct tun_struct *tun_get_by_nam
-
- ASSERT_RTNL();
- list_for_each_entry(tun, &tun_dev_list, list) {
-- if (!strncmp(tun->dev->name, name, IFNAMSIZ))
-- return tun;
-+ if (ve_accessible_strict(tun->dev->owner_env, get_exec_env()) &&
-+ !strncmp(tun->dev->name, name, IFNAMSIZ))
-+ return tun;
- }
-
- return NULL;
-@@ -479,7 +497,8 @@ static int tun_set_iff(struct file *file
-
- /* Check permissions */
- if (tun->owner != -1 &&
-- current->euid != tun->owner && !capable(CAP_NET_ADMIN))
-+ current->euid != tun->owner &&
-+ !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN))
- return -EPERM;
- }
- else if (__dev_get_by_name(ifr->ifr_name))
-diff -upr linux-2.6.16.orig/drivers/net/venet_core.c linux-2.6.16-026test009/drivers/net/venet_core.c
---- linux-2.6.16.orig/drivers/net/venet_core.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/venet_core.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,626 @@
-+/*
-+ * venet_core.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+/*
-+ * Common part for Virtuozzo virtual network devices
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/interrupt.h>
-+#include <linux/fs.h>
-+#include <linux/types.h>
-+#include <linux/string.h>
-+#include <linux/socket.h>
-+#include <linux/errno.h>
-+#include <linux/fcntl.h>
-+#include <linux/in.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/tcp.h>
-+#include <linux/proc_fs.h>
-+#include <linux/seq_file.h>
-+
-+#include <asm/system.h>
-+#include <asm/uaccess.h>
-+#include <asm/io.h>
-+#include <asm/unistd.h>
-+
-+#include <linux/inet.h>
-+#include <linux/netdevice.h>
-+#include <linux/etherdevice.h>
-+#include <net/ip.h>
-+#include <linux/skbuff.h>
-+#include <net/sock.h>
-+#include <linux/if_ether.h> /* For the statistics structure. */
-+#include <linux/if_arp.h> /* For ARPHRD_ETHER */
-+#include <linux/venet.h>
-+#include <linux/ve_proto.h>
-+#include <linux/vzctl.h>
-+#include <linux/vzctl_venet.h>
-+
-+struct list_head ip_entry_hash_table[VEIP_HASH_SZ];
-+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED;
-+LIST_HEAD(veip_lh);
-+
-+#define ip_entry_hash_function(ip) (ntohl(ip) & (VEIP_HASH_SZ - 1))
-+
-+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip)
-+{
-+ list_add(&entry->ip_hash,
-+ ip_entry_hash_table + ip_entry_hash_function(entry->ip));
-+ list_add(&entry->ve_list, &veip->ip_lh);
-+}
-+
-+void veip_put(struct veip_struct *veip)
-+{
-+ if (!list_empty(&veip->ip_lh))
-+ return;
-+ if (!list_empty(&veip->src_lh))
-+ return;
-+ if (!list_empty(&veip->dst_lh))
-+ return;
-+
-+ list_del(&veip->list);
-+ kfree(veip);
-+}
-+
-+struct ip_entry_struct *ip_entry_lookup(u32 addr)
-+{
-+ struct ip_entry_struct *entry;
-+ struct list_head *tmp;
-+
-+ list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) {
-+ entry = list_entry(tmp, struct ip_entry_struct, ip_hash);
-+ if (entry->ip != addr)
-+ continue;
-+ return entry;
-+ }
-+ return NULL;
-+}
-+
-+struct veip_struct *veip_find(envid_t veid)
-+{
-+ struct veip_struct *ptr;
-+ list_for_each_entry(ptr, &veip_lh, list) {
-+ if (ptr->veid != veid)
-+ continue;
-+ return ptr;
-+ }
-+ return NULL;
-+}
-+
-+struct veip_struct *veip_findcreate(envid_t veid)
-+{
-+ struct veip_struct *ptr;
-+
-+ ptr = veip_find(veid);
-+ if (ptr != NULL)
-+ return ptr;
-+
-+ ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC);
-+ if (ptr == NULL)
-+ return NULL;
-+ memset(ptr, 0, sizeof(struct veip_struct));
-+ INIT_LIST_HEAD(&ptr->ip_lh);
-+ INIT_LIST_HEAD(&ptr->src_lh);
-+ INIT_LIST_HEAD(&ptr->dst_lh);
-+ list_add(&ptr->list, &veip_lh);
-+ ptr->veid = veid;
-+ return ptr;
-+}
-+
-+/*
-+ * Device functions
-+ */
-+
-+static int venet_open(struct net_device *dev)
-+{
-+ if (!try_module_get(THIS_MODULE))
-+ return -EBUSY;
-+ return 0;
-+}
-+
-+static int venet_close(struct net_device *master)
-+{
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+static void venet_destructor(struct net_device *dev)
-+{
-+ kfree(dev->priv);
-+ dev->priv = NULL;
-+}
-+
-+/*
-+ * The higher levels take care of making this non-reentrant (it's
-+ * called with bh's disabled).
-+ */
-+static int venet_xmit(struct sk_buff *skb, struct net_device *dev)
-+{
-+ struct net_device_stats *stats = (struct net_device_stats *)dev->priv;
-+ struct net_device *rcv = NULL;
-+ struct iphdr *iph;
-+ int length;
-+
-+ if (unlikely(get_exec_env()->disable_net))
-+ goto outf;
-+
-+ /*
-+ * Optimise so buffers with skb->free=1 are not copied but
-+ * instead are lobbed from tx queue to rx queue
-+ */
-+ if (atomic_read(&skb->users) != 1) {
-+ struct sk_buff *skb2 = skb;
-+ skb = skb_clone(skb, GFP_ATOMIC); /* Clone the buffer */
-+ if (skb == NULL) {
-+ kfree_skb(skb2);
-+ goto out;
-+ }
-+ kfree_skb(skb2);
-+ } else
-+ skb_orphan(skb);
-+
-+ if (skb->protocol != __constant_htons(ETH_P_IP))
-+ goto outf;
-+
-+ iph = skb->nh.iph;
-+ if (MULTICAST(iph->daddr))
-+ goto outf;
-+
-+ if (venet_change_skb_owner(skb) < 0)
-+ goto outf;
-+
-+ if (unlikely(VE_OWNER_SKB(skb)->disable_net))
-+ goto outf;
-+
-+ rcv = VE_OWNER_SKB(skb)->_venet_dev;
-+ if (!rcv)
-+ /* VE going down */
-+ goto outf;
-+
-+ dev_hold(rcv);
-+
-+ if (!(rcv->flags & IFF_UP)) {
-+ /* Target VE does not want to receive packets */
-+ dev_put(rcv);
-+ goto outf;
-+ }
-+
-+ skb->pkt_type = PACKET_HOST;
-+ skb->dev = rcv;
-+
-+ skb->mac.raw = skb->data;
-+ memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len);
-+
-+ dst_release(skb->dst);
-+ skb->dst = NULL;
-+#ifdef CONFIG_NETFILTER
-+ nf_conntrack_put(skb->nfct);
-+ skb->nfct = NULL;
-+#ifdef CONFIG_NETFILTER_DEBUG
-+ skb->nf_debug = 0;
-+#endif
-+#endif
-+ length = skb->len;
-+
-+ netif_rx(skb);
-+
-+ stats->tx_bytes += length;
-+ stats->tx_packets++;
-+ if (rcv) {
-+ struct net_device_stats *rcv_stats =
-+ (struct net_device_stats *)rcv->priv;
-+ rcv_stats->rx_bytes += length;
-+ rcv_stats->rx_packets++;
-+ dev_put(rcv);
-+ }
-+
-+ return 0;
-+
-+outf:
-+ kfree_skb(skb);
-+ ++stats->tx_dropped;
-+out:
-+ return 0;
-+}
-+
-+static struct net_device_stats *get_stats(struct net_device *dev)
-+{
-+ return (struct net_device_stats *)dev->priv;
-+}
-+
-+/* Initialize the rest of the LOOPBACK device. */
-+int venet_init_dev(struct net_device *dev)
-+{
-+ dev->hard_start_xmit = venet_xmit;
-+ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
-+ if (dev->priv == NULL)
-+ return -ENOMEM;
-+ memset(dev->priv, 0, sizeof(struct net_device_stats));
-+ dev->get_stats = get_stats;
-+ dev->open = venet_open;
-+ dev->stop = venet_close;
-+ dev->destructor = venet_destructor;
-+
-+ /*
-+ * Fill in the generic fields of the device structure.
-+ */
-+ dev->type = ARPHRD_VOID;
-+ dev->hard_header_len = ETH_HLEN;
-+ dev->mtu = 1500; /* eth_mtu */
-+ dev->tx_queue_len = 0;
-+
-+ memset(dev->broadcast, 0xFF, ETH_ALEN);
-+
-+ /* New-style flags. */
-+ dev->flags = IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT;
-+ return 0;
-+}
-+
-+static void venet_setup(struct net_device *dev)
-+{
-+ dev->init = venet_init_dev;
-+ /*
-+ * No other features, as they are:
-+ * - checksumming is required, and nobody else will done our job
-+ */
-+ dev->features |= NETIF_F_VENET | NETIF_F_VIRTUAL;
-+}
-+
-+#ifdef CONFIG_PROC_FS
-+static int veinfo_seq_show(struct seq_file *m, void *v)
-+{
-+ struct ve_struct *ve = (struct ve_struct *)v;
-+ struct list_head *tmp;
-+
-+ seq_printf(m, "%10u %5u %5u", ve->veid,
-+ ve->class_id, atomic_read(&ve->pcounter));
-+ read_lock(&veip_hash_lock);
-+ if (ve->veip == NULL)
-+ goto unlock;
-+ list_for_each(tmp, &ve->veip->ip_lh) {
-+ char ip[16];
-+ struct ip_entry_struct *entry;
-+
-+ entry = list_entry(tmp, struct ip_entry_struct, ve_list);
-+ if (entry->active_env == NULL)
-+ continue;
-+
-+ sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip));
-+ seq_printf(m, " %15s", ip);
-+ }
-+unlock:
-+ read_unlock(&veip_hash_lock);
-+ seq_putc(m, '\n');
-+ return 0;
-+}
-+
-+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
-+{
-+ struct ve_struct *ve, *curve;
-+ loff_t l;
-+
-+ curve = get_exec_env();
-+ read_lock(&ve_list_guard);
-+ if (!ve_is_super(curve)) {
-+ if (*pos != 0)
-+ return NULL;
-+ return curve;
-+ }
-+ for (ve = ve_list_head, l = *pos;
-+ ve != NULL && l > 0;
-+ ve = ve->next, l--);
-+ return ve;
-+}
-+
-+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
-+{
-+ struct ve_struct *ve = (struct ve_struct *)v;
-+
-+ if (!ve_is_super(get_exec_env()))
-+ return NULL;
-+ (*pos)++;
-+ return ve->next;
-+}
-+
-+static void ve_seq_stop(struct seq_file *m, void *v)
-+{
-+ read_unlock(&ve_list_guard);
-+}
-+
-+
-+static struct seq_operations veinfo_seq_op = {
-+ start: ve_seq_start,
-+ next: ve_seq_next,
-+ stop: ve_seq_stop,
-+ show: veinfo_seq_show
-+};
-+
-+static int veinfo_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &veinfo_seq_op);
-+}
-+
-+static struct file_operations proc_veinfo_operations = {
-+ open: veinfo_open,
-+ read: seq_read,
-+ llseek: seq_lseek,
-+ release: seq_release
-+};
-+
-+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
-+{
-+ loff_t l;
-+ struct list_head *p;
-+ int i;
-+
-+ l = *pos;
-+ write_lock_irq(&veip_hash_lock);
-+ if (l == 0)
-+ return ip_entry_hash_table;
-+ for (i = 0; i < VEIP_HASH_SZ; i++) {
-+ list_for_each(p, ip_entry_hash_table + i) {
-+ if (--l == 0)
-+ return p;
-+ }
-+ }
-+ return NULL;
-+}
-+
-+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos)
-+{
-+ struct list_head *p;
-+
-+ p = (struct list_head *)v;
-+ while (1) {
-+ p = p->next;
-+ if (p < ip_entry_hash_table ||
-+ p >= ip_entry_hash_table + VEIP_HASH_SZ) {
-+ (*pos)++;
-+ return p;
-+ }
-+ if (++p >= ip_entry_hash_table + VEIP_HASH_SZ)
-+ return NULL;
-+ }
-+ return NULL;
-+}
-+
-+static void veip_seq_stop(struct seq_file *m, void *v)
-+{
-+ write_unlock_irq(&veip_hash_lock);
-+}
-+
-+static struct seq_operations veip_seq_op = {
-+ start: veip_seq_start,
-+ next: veip_seq_next,
-+ stop: veip_seq_stop,
-+ show: veip_seq_show
-+};
-+
-+static int veip_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &veip_seq_op);
-+}
-+
-+static struct file_operations proc_veip_operations = {
-+ open: veip_open,
-+ read: seq_read,
-+ llseek: seq_lseek,
-+ release: seq_release
-+};
-+#endif
-+
-+int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen)
-+{
-+ int err;
-+ struct sockaddr_in addr;
-+ struct ve_struct *ve;
-+
-+ err = -EPERM;
-+ if (!capable(CAP_SETVEID))
-+ goto out;
-+
-+ err = -EINVAL;
-+ if (addrlen != sizeof(struct sockaddr_in))
-+ goto out;
-+
-+ err = move_addr_to_kernel(uservaddr, addrlen, &addr);
-+ if (err < 0)
-+ goto out;
-+
-+ switch (op)
-+ {
-+ case VE_IP_ADD:
-+ ve = get_ve_by_id(veid);
-+ err = -ESRCH;
-+ if (!ve)
-+ goto out;
-+
-+ down_read(&ve->op_sem);
-+ if (ve->is_running)
-+ err = veip_entry_add(ve, &addr);
-+ up_read(&ve->op_sem);
-+ put_ve(ve);
-+ break;
-+
-+ case VE_IP_DEL:
-+ err = veip_entry_del(veid, &addr);
-+ break;
-+ default:
-+ err = -EINVAL;
-+ }
-+
-+out:
-+ return err;
-+}
-+
-+int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ int err;
-+
-+ err = -ENOTTY;
-+ switch(cmd) {
-+ case VENETCTL_VE_IP_MAP: {
-+ struct vzctl_ve_ip_map s;
-+ err = -EFAULT;
-+ if (copy_from_user(&s, (void *)arg, sizeof(s)))
-+ break;
-+ err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen);
-+ }
-+ break;
-+ }
-+ return err;
-+}
-+
-+static struct vzioctlinfo venetcalls = {
-+ type: VENETCTLTYPE,
-+ func: venet_ioctl,
-+ owner: THIS_MODULE,
-+};
-+
-+int venet_dev_start(struct ve_struct *env)
-+{
-+ struct net_device *dev_venet;
-+ int err;
-+
-+ dev_venet = alloc_netdev(0, "venet%d", venet_setup);
-+ if (!dev_venet)
-+ return -ENOMEM;
-+ err = dev_alloc_name(dev_venet, dev_venet->name);
-+ if (err<0)
-+ goto err;
-+ if ((err = register_netdev(dev_venet)) != 0)
-+ goto err;
-+ env->_venet_dev = dev_venet;
-+ return 0;
-+err:
-+ free_netdev(dev_venet);
-+ printk(KERN_ERR "VENET initialization error err=%d\n", err);
-+ return err;
-+}
-+
-+static int venet_start(unsigned int hooknum, void *data)
-+{
-+ struct ve_struct *env;
-+ int err;
-+
-+ env = (struct ve_struct *)data;
-+ if (env->veip)
-+ return -EEXIST;
-+ if (!ve_is_super(env) && !try_module_get(THIS_MODULE))
-+ return 0;
-+
-+ err = veip_start(env);
-+ if (err)
-+ goto err;
-+
-+ err = venet_dev_start(env);
-+ if (err)
-+ goto err_free;
-+ return 0;
-+
-+err_free:
-+ veip_stop(env);
-+err:
-+ if (!ve_is_super(env))
-+ module_put(THIS_MODULE);
-+ return err;
-+}
-+
-+static int venet_stop(unsigned int hooknum, void *data)
-+{
-+ struct ve_struct *env;
-+
-+ env = (struct ve_struct *)data;
-+ veip_stop(env);
-+ if (!ve_is_super(env))
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+#define VE_HOOK_PRI_NET 0
-+
-+static struct ve_hook venet_ve_hook_init = {
-+ hook: venet_start,
-+ undo: venet_stop,
-+ hooknum: VE_HOOK_INIT,
-+ priority: VE_HOOK_PRI_NET
-+};
-+
-+static struct ve_hook venet_ve_hook_fini = {
-+ hook: venet_stop,
-+ hooknum: VE_HOOK_FINI,
-+ priority: VE_HOOK_PRI_NET
-+};
-+
-+__init int venet_init(void)
-+{
-+#ifdef CONFIG_PROC_FS
-+ struct proc_dir_entry *de;
-+#endif
-+ int i, err;
-+
-+ if (get_ve0()->_venet_dev != NULL)
-+ return -EEXIST;
-+
-+ for (i = 0; i < VEIP_HASH_SZ; i++)
-+ INIT_LIST_HEAD(ip_entry_hash_table + i);
-+
-+ err = venet_start(VE_HOOK_INIT, (void *)get_ve0());
-+ if (err)
-+ return err;
-+
-+#ifdef CONFIG_PROC_FS
-+ de = create_proc_glob_entry("vz/veinfo",
-+ S_IFREG|S_IRUSR, NULL);
-+ if (de)
-+ de->proc_fops = &proc_veinfo_operations;
-+ else
-+ printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
-+
-+ de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL);
-+ if (de)
-+ de->proc_fops = &proc_veip_operations;
-+ else
-+ printk(KERN_WARNING "venet: can't make veip proc entry\n");
-+#endif
-+
-+ ve_hook_register(&venet_ve_hook_init);
-+ ve_hook_register(&venet_ve_hook_fini);
-+ vzioctl_register(&venetcalls);
-+ return 0;
-+}
-+
-+__exit void venet_exit(void)
-+{
-+ struct net_device *dev_venet;
-+
-+ vzioctl_unregister(&venetcalls);
-+ ve_hook_unregister(&venet_ve_hook_fini);
-+ ve_hook_unregister(&venet_ve_hook_init);
-+#ifdef CONFIG_PROC_FS
-+ remove_proc_entry("vz/veip", NULL);
-+ remove_proc_entry("vz/veinfo", NULL);
-+#endif
-+
-+ dev_venet = get_ve0()->_venet_dev;
-+ if (dev_venet != NULL) {
-+ get_ve0()->_venet_dev = NULL;
-+ unregister_netdev(dev_venet);
-+ free_netdev(dev_venet);
-+ }
-+ veip_stop(get_ve0());
-+}
-+
-+module_init(venet_init);
-+module_exit(venet_exit);
-diff -upr linux-2.6.16.orig/drivers/net/wireless/Kconfig linux-2.6.16-026test009/drivers/net/wireless/Kconfig
---- linux-2.6.16.orig/drivers/net/wireless/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/wireless/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -239,7 +239,8 @@ config IPW2200_DEBUG
-
- config AIRO
- tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
-- depends on NET_RADIO && ISA_DMA_API && CRYPTO && (PCI || BROKEN)
-+ depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
-+ select CRYPTO
- ---help---
- This is the standard Linux driver to support Cisco/Aironet ISA and
- PCI 802.11 wireless cards.
-@@ -374,6 +375,7 @@ config PCMCIA_HERMES
- config PCMCIA_SPECTRUM
- tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
- depends on NET_RADIO && PCMCIA && HERMES
-+ select FW_LOADER
- ---help---
-
- This is a driver for 802.11b cards using RAM-loadable Symbol
-@@ -387,6 +389,7 @@ config PCMCIA_SPECTRUM
- config AIRO_CS
- tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
- depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
-+ select CRYPTO
- ---help---
- This is the standard Linux driver to support Cisco/Aironet PCMCIA
- 802.11 wireless cards. This driver is the same as the Aironet
-diff -upr linux-2.6.16.orig/drivers/net/wireless/hostap/hostap_80211_tx.c linux-2.6.16-026test009/drivers/net/wireless/hostap/hostap_80211_tx.c
---- linux-2.6.16.orig/drivers/net/wireless/hostap/hostap_80211_tx.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/wireless/hostap/hostap_80211_tx.c 2006-04-19 15:02:11.000000000 +0400
-@@ -469,7 +469,7 @@ int hostap_master_start_xmit(struct sk_b
- }
-
- if (local->ieee_802_1x && meta->ethertype == ETH_P_PAE && tx.crypt &&
-- !(fc & IEEE80211_FCTL_VERS)) {
-+ !(fc & IEEE80211_FCTL_PROTECTED)) {
- no_encrypt = 1;
- PDEBUG(DEBUG_EXTRA2, "%s: TX: IEEE 802.1X - passing "
- "unencrypted EAPOL frame\n", dev->name);
-diff -upr linux-2.6.16.orig/drivers/net/wireless/ipw2200.c linux-2.6.16-026test009/drivers/net/wireless/ipw2200.c
---- linux-2.6.16.orig/drivers/net/wireless/ipw2200.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/net/wireless/ipw2200.c 2006-04-19 15:02:11.000000000 +0400
-@@ -9956,9 +9956,8 @@ static int ipw_ethtool_set_eeprom(struct
- return -EINVAL;
- down(&p->sem);
- memcpy(&p->eeprom[eeprom->offset], bytes, eeprom->len);
-- for (i = IPW_EEPROM_DATA;
-- i < IPW_EEPROM_DATA + IPW_EEPROM_IMAGE_SIZE; i++)
-- ipw_write8(p, i, p->eeprom[i]);
-+ for (i = 0; i < IPW_EEPROM_IMAGE_SIZE; i++)
-+ ipw_write8(p, i + IPW_EEPROM_DATA, p->eeprom[i]);
- up(&p->sem);
- return 0;
- }
-diff -upr linux-2.6.16.orig/drivers/pci/probe.c linux-2.6.16-026test009/drivers/pci/probe.c
---- linux-2.6.16.orig/drivers/pci/probe.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/pci/probe.c 2006-04-19 15:02:12.000000000 +0400
-@@ -21,6 +21,7 @@ LIST_HEAD(pci_root_buses);
- EXPORT_SYMBOL(pci_root_buses);
-
- LIST_HEAD(pci_devices);
-+EXPORT_SYMBOL(pci_devices);
-
- #ifdef HAVE_PCI_LEGACY
- /**
-diff -upr linux-2.6.16.orig/drivers/pcmcia/ds.c linux-2.6.16-026test009/drivers/pcmcia/ds.c
---- linux-2.6.16.orig/drivers/pcmcia/ds.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/pcmcia/ds.c 2006-04-19 15:02:11.000000000 +0400
-@@ -546,7 +546,7 @@ static int pcmcia_device_query(struct pc
- tmp = vers1->str + vers1->ofs[i];
-
- length = strlen(tmp) + 1;
-- if ((length < 3) || (length > 255))
-+ if ((length < 2) || (length > 255))
- continue;
-
- p_dev->prod_id[i] = kmalloc(sizeof(char) * length,
-diff -upr linux-2.6.16.orig/drivers/s390/cio/cio.c linux-2.6.16-026test009/drivers/s390/cio/cio.c
---- linux-2.6.16.orig/drivers/s390/cio/cio.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/s390/cio/cio.c 2006-04-19 15:02:12.000000000 +0400
-@@ -610,7 +610,11 @@ do_IRQ (struct pt_regs *regs)
- struct tpi_info *tpi_info;
- struct subchannel *sch;
- struct irb *irb;
-+ struct ve_struct *ve;
-+ struct user_beancounter *ub;
-
-+ ve = set_exec_env(get_ve0());
-+ ub = set_exec_ub(get_ub0());
- irq_enter ();
- asm volatile ("mc 0,0");
- if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-@@ -657,6 +661,8 @@ do_IRQ (struct pt_regs *regs)
- */
- } while (!MACHINE_IS_VM && tpi (NULL) != 0);
- irq_exit ();
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(ve);
- }
-
- #ifdef CONFIG_CCW_CONSOLE
-diff -upr linux-2.6.16.orig/drivers/scsi/sata_mv.c linux-2.6.16-026test009/drivers/scsi/sata_mv.c
---- linux-2.6.16.orig/drivers/scsi/sata_mv.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/scsi/sata_mv.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1102,6 +1102,7 @@ static u8 mv_get_crpb_status(struct ata_
- void __iomem *port_mmio = mv_ap_base(ap);
- struct mv_port_priv *pp = ap->private_data;
- u32 out_ptr;
-+ u8 ata_status;
-
- out_ptr = readl(port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
-
-@@ -1109,6 +1110,8 @@ static u8 mv_get_crpb_status(struct ata_
- assert(((out_ptr >> EDMA_RSP_Q_PTR_SHIFT) & MV_MAX_Q_DEPTH_MASK) ==
- pp->rsp_consumer);
-
-+ ata_status = pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT;
-+
- /* increment our consumer index... */
- pp->rsp_consumer = mv_inc_q_index(&pp->rsp_consumer);
-
-@@ -1123,7 +1126,7 @@ static u8 mv_get_crpb_status(struct ata_
- writelfl(out_ptr, port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
-
- /* Return ATA status register for completed CRPB */
-- return (pp->crpb[pp->rsp_consumer].flags >> CRPB_FLAG_STATUS_SHIFT);
-+ return ata_status;
- }
-
- /**
-@@ -1192,7 +1195,6 @@ static void mv_host_intr(struct ata_host
- u32 hc_irq_cause;
- int shift, port, port0, hard_port, handled;
- unsigned int err_mask;
-- u8 ata_status = 0;
-
- if (hc == 0) {
- port0 = 0;
-@@ -1210,6 +1212,7 @@ static void mv_host_intr(struct ata_host
- hc,relevant,hc_irq_cause);
-
- for (port = port0; port < port0 + MV_PORTS_PER_HC; port++) {
-+ u8 ata_status = 0;
- ap = host_set->ports[port];
- hard_port = port & MV_PORT_MASK; /* range 0-3 */
- handled = 0; /* ensure ata_status is set if handled++ */
-diff -upr linux-2.6.16.orig/drivers/usb/core/message.c linux-2.6.16-026test009/drivers/usb/core/message.c
---- linux-2.6.16.orig/drivers/usb/core/message.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/usb/core/message.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1388,11 +1388,13 @@ free_interfaces:
- if (dev->state != USB_STATE_ADDRESS)
- usb_disable_device (dev, 1); // Skip ep0
-
-- i = dev->bus_mA - cp->desc.bMaxPower * 2;
-- if (i < 0)
-- dev_warn(&dev->dev, "new config #%d exceeds power "
-- "limit by %dmA\n",
-- configuration, -i);
-+ if (cp) {
-+ i = dev->bus_mA - cp->desc.bMaxPower * 2;
-+ if (i < 0)
-+ dev_warn(&dev->dev, "new config #%d exceeds power "
-+ "limit by %dmA\n",
-+ configuration, -i);
-+ }
-
- if ((ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
- USB_REQ_SET_CONFIGURATION, 0, configuration, 0,
-diff -upr linux-2.6.16.orig/drivers/usb/host/ehci-sched.c linux-2.6.16-026test009/drivers/usb/host/ehci-sched.c
---- linux-2.6.16.orig/drivers/usb/host/ehci-sched.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/usb/host/ehci-sched.c 2006-04-19 15:02:11.000000000 +0400
-@@ -707,6 +707,7 @@ iso_stream_init (
- } else {
- u32 addr;
- int think_time;
-+ int hs_transfers;
-
- addr = dev->ttport << 24;
- if (!ehci_is_TDI(ehci)
-@@ -719,6 +720,7 @@ iso_stream_init (
- think_time = dev->tt ? dev->tt->think_time : 0;
- stream->tt_usecs = NS_TO_US (think_time + usb_calc_bus_time (
- dev->speed, is_input, 1, maxp));
-+ hs_transfers = max (1u, (maxp + 187) / 188);
- if (is_input) {
- u32 tmp;
-
-@@ -727,12 +729,11 @@ iso_stream_init (
- stream->usecs = HS_USECS_ISO (1);
- stream->raw_mask = 1;
-
-- /* pessimistic c-mask */
-- tmp = usb_calc_bus_time (USB_SPEED_FULL, 1, 0, maxp)
-- / (125 * 1000);
-- stream->raw_mask |= 3 << (tmp + 9);
-+ /* c-mask as specified in USB 2.0 11.18.4 3.c */
-+ tmp = (1 << (hs_transfers + 2)) - 1;
-+ stream->raw_mask |= tmp << (8 + 2);
- } else
-- stream->raw_mask = smask_out [maxp / 188];
-+ stream->raw_mask = smask_out [hs_transfers - 1];
- bandwidth = stream->usecs + stream->c_usecs;
- bandwidth /= 1 << (interval + 2);
-
-diff -upr linux-2.6.16.orig/drivers/usb/serial/console.c linux-2.6.16-026test009/drivers/usb/serial/console.c
---- linux-2.6.16.orig/drivers/usb/serial/console.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/usb/serial/console.c 2006-04-19 15:02:11.000000000 +0400
-@@ -54,7 +54,7 @@ static struct console usbcons;
- * serial.c code, except that the specifier is "ttyUSB" instead
- * of "ttyS".
- */
--static int __init usb_console_setup(struct console *co, char *options)
-+static int usb_console_setup(struct console *co, char *options)
- {
- struct usbcons_info *info = &usbcons_info;
- int baud = 9600;
-diff -upr linux-2.6.16.orig/drivers/usb/storage/Kconfig linux-2.6.16-026test009/drivers/usb/storage/Kconfig
---- linux-2.6.16.orig/drivers/usb/storage/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/usb/storage/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -48,7 +48,8 @@ config USB_STORAGE_FREECOM
-
- config USB_STORAGE_ISD200
- bool "ISD-200 USB/ATA Bridge support"
-- depends on USB_STORAGE && BLK_DEV_IDE
-+ depends on USB_STORAGE
-+ depends on BLK_DEV_IDE=y || BLK_DEV_IDE=USB_STORAGE
- ---help---
- Say Y here if you want to use USB Mass Store devices based
- on the In-Systems Design ISD-200 USB/ATA bridge.
-diff -upr linux-2.6.16.orig/drivers/video/cfbimgblt.c linux-2.6.16-026test009/drivers/video/cfbimgblt.c
---- linux-2.6.16.orig/drivers/video/cfbimgblt.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/video/cfbimgblt.c 2006-04-19 15:02:11.000000000 +0400
-@@ -169,7 +169,7 @@ static inline void slow_imageblit(const
-
- while (j--) {
- l--;
-- color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
-+ color = (*s & (1 << l)) ? fgcolor : bgcolor;
- val |= FB_SHIFT_HIGH(color, shift);
-
- /* Did the bitshift spill bits to the next long? */
-diff -upr linux-2.6.16.orig/drivers/video/i810/i810_main.c linux-2.6.16-026test009/drivers/video/i810/i810_main.c
---- linux-2.6.16.orig/drivers/video/i810/i810_main.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/drivers/video/i810/i810_main.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info
- int size = ((cursor->image.width + 7) >> 3) *
- cursor->image.height;
- int i;
-- u8 *data = kmalloc(64 * 8, GFP_KERNEL);
-+ u8 *data = kmalloc(64 * 8, GFP_ATOMIC);
-
- if (data == NULL)
- return -ENOMEM;
-diff -upr linux-2.6.16.orig/fs/9p/vfs_inode.c linux-2.6.16-026test009/fs/9p/vfs_inode.c
---- linux-2.6.16.orig/fs/9p/vfs_inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/9p/vfs_inode.c 2006-04-19 15:02:11.000000000 +0400
-@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(st
-
- sb = dir->i_sb;
- v9ses = v9fs_inode2v9ses(dir);
-+ dentry->d_op = &v9fs_dentry_operations;
- dirfid = v9fs_fid_lookup(dentry->d_parent);
-
- if (!dirfid) {
-@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(st
- goto FreeFcall;
-
- fid->qid = fcall->params.rstat.stat.qid;
--
-- dentry->d_op = &v9fs_dentry_operations;
- v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
-
- d_add(dentry, inode);
-diff -upr linux-2.6.16.orig/fs/Kconfig linux-2.6.16-026test009/fs/Kconfig
---- linux-2.6.16.orig/fs/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/Kconfig 2006-04-19 15:02:12.000000000 +0400
-@@ -433,6 +433,38 @@ config QFMT_V2
- This quota format allows using quotas with 32-bit UIDs/GIDs. If you
- need this functionality say Y here.
-
-+config SIM_FS
-+ tristate "VPS filesystem"
-+ depends on VZ_QUOTA
-+ default m
-+ help
-+ This file system is a part of Virtuozzo. It intoduces a fake
-+ superblock and blockdev to VE to hide real device and show
-+ statfs results taken from quota.
-+
-+config VZ_QUOTA
-+ tristate "Virtuozzo Disk Quota support"
-+ depends on QUOTA
-+ default m
-+ help
-+ Virtuozzo Disk Quota imposes disk quota on directories with their
-+ files and subdirectories in total. Such disk quota is used to
-+ account and limit disk usage by Virtuozzo VPS, but also may be used
-+ separately.
-+
-+config VZ_QUOTA_UNLOAD
-+ bool "Unloadable Virtuozzo Disk Quota module"
-+ depends on VZ_QUOTA=m
-+ default n
-+ help
-+ Make Virtuozzo Disk Quota module unloadable.
-+ Doesn't work reliably now.
-+
-+config VZ_QUOTA_UGID
-+ bool "Per-user and per-group quota in Virtuozzo quota partitions"
-+ depends on VZ_QUOTA!=n
-+ default y
-+
- config QUOTACTL
- bool
- depends on XFS_QUOTA || QUOTA
-diff -upr linux-2.6.16.orig/fs/Makefile linux-2.6.16-026test009/fs/Makefile
---- linux-2.6.16.orig/fs/Makefile 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/Makefile 2006-04-19 15:02:12.000000000 +0400
-@@ -39,9 +39,15 @@ obj-$(CONFIG_QUOTA) += dquot.o
- obj-$(CONFIG_QFMT_V1) += quota_v1.o
- obj-$(CONFIG_QFMT_V2) += quota_v2.o
- obj-$(CONFIG_QUOTACTL) += quota.o
-+obj-$(CONFIG_VZ_QUOTA) += vzdquota.o
-+vzdquota-y += vzdquot.o vzdq_mgmt.o vzdq_ops.o vzdq_tree.o
-+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_ugid.o
-+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
-
- obj-$(CONFIG_DNOTIFY) += dnotify.o
-
-+obj-$(CONFIG_SIM_FS) += simfs.o
-+
- obj-$(CONFIG_PROC_FS) += proc/
- obj-y += partitions/
- obj-$(CONFIG_SYSFS) += sysfs/
-diff -upr linux-2.6.16.orig/fs/aio.c linux-2.6.16-026test009/fs/aio.c
---- linux-2.6.16.orig/fs/aio.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/aio.c 2006-04-19 15:02:12.000000000 +0400
-@@ -41,13 +41,16 @@
- #endif
-
- /*------ sysctl variables----*/
--static DEFINE_SPINLOCK(aio_nr_lock);
-+DEFINE_SPINLOCK(aio_nr_lock);
- unsigned long aio_nr; /* current system wide number of aio requests */
- unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
-+EXPORT_SYMBOL_GPL(aio_nr_lock);
-+EXPORT_SYMBOL_GPL(aio_nr);
- /*----end sysctl variables---*/
-
- static kmem_cache_t *kiocb_cachep;
--static kmem_cache_t *kioctx_cachep;
-+kmem_cache_t *kioctx_cachep;
-+EXPORT_SYMBOL_GPL(kioctx_cachep);
-
- static struct workqueue_struct *aio_wq;
-
-@@ -58,7 +61,7 @@ static DECLARE_WORK(fput_work, aio_fput_
- static DEFINE_SPINLOCK(fput_lock);
- static LIST_HEAD(fput_head);
-
--static void aio_kick_handler(void *);
-+void aio_kick_handler(void *);
- static void aio_queue_work(struct kioctx *);
-
- /* aio_setup
-@@ -293,7 +296,7 @@ static void aio_cancel_all(struct kioctx
- spin_unlock_irq(&ctx->ctx_lock);
- }
-
--static void wait_for_all_aios(struct kioctx *ctx)
-+void wait_for_all_aios(struct kioctx *ctx)
- {
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
-@@ -310,6 +313,7 @@ static void wait_for_all_aios(struct kio
- __set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(&ctx->wait, &wait);
- }
-+EXPORT_SYMBOL_GPL(wait_for_all_aios);
-
- /* wait_on_sync_kiocb:
- * Waits on the given sync kiocb to complete.
-@@ -856,7 +860,7 @@ static inline void aio_run_all_iocbs(str
- * space.
- * Run on aiod's context.
- */
--static void aio_kick_handler(void *data)
-+void aio_kick_handler(void *data)
- {
- struct kioctx *ctx = data;
- mm_segment_t oldfs = get_fs();
-@@ -875,6 +879,7 @@ static void aio_kick_handler(void *data)
- if (requeue)
- queue_work(aio_wq, &ctx->wq);
- }
-+EXPORT_SYMBOL_GPL(aio_kick_handler);
-
-
- /*
-diff -upr linux-2.6.16.orig/fs/autofs/autofs_i.h linux-2.6.16-026test009/fs/autofs/autofs_i.h
---- linux-2.6.16.orig/fs/autofs/autofs_i.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs/autofs_i.h 2006-04-19 15:02:12.000000000 +0400
-@@ -124,7 +124,7 @@ static inline struct autofs_sb_info *aut
- filesystem without "magic".) */
-
- static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-- return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
-+ return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
- }
-
- /* Hash operations */
-diff -upr linux-2.6.16.orig/fs/autofs/init.c linux-2.6.16-026test009/fs/autofs/init.c
---- linux-2.6.16.orig/fs/autofs/init.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs/init.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
- .name = "autofs",
- .get_sb = autofs_get_sb,
- .kill_sb = kill_anon_super,
-+ .fs_flags = FS_VIRTUALIZED,
- };
-
- static int __init init_autofs_fs(void)
-diff -upr linux-2.6.16.orig/fs/autofs/inode.c linux-2.6.16-026test009/fs/autofs/inode.c
---- linux-2.6.16.orig/fs/autofs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -66,7 +66,7 @@ static int parse_options(char *options,
-
- *uid = current->uid;
- *gid = current->gid;
-- *pgrp = process_group(current);
-+ *pgrp = virt_pgid(current);
-
- *minproto = *maxproto = AUTOFS_PROTO_VERSION;
-
-@@ -138,7 +138,7 @@ int autofs_fill_super(struct super_block
- sbi->magic = AUTOFS_SBI_MAGIC;
- sbi->catatonic = 0;
- sbi->exp_timeout = 0;
-- sbi->oz_pgrp = process_group(current);
-+ sbi->oz_pgrp = virt_pgid(current);
- autofs_initialize_hash(&sbi->dirhash);
- sbi->queues = NULL;
- memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
-diff -upr linux-2.6.16.orig/fs/autofs/root.c linux-2.6.16-026test009/fs/autofs/root.c
---- linux-2.6.16.orig/fs/autofs/root.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs/root.c 2006-04-19 15:02:12.000000000 +0400
-@@ -354,7 +354,7 @@ static int autofs_root_unlink(struct ino
-
- /* This allows root to remove symlinks */
- lock_kernel();
-- if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) {
-+ if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) ) {
- unlock_kernel();
- return -EACCES;
- }
-@@ -541,7 +541,7 @@ static int autofs_root_ioctl(struct inod
- _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
- return -ENOTTY;
-
-- if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
-+ if ( !autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
- return -EPERM;
-
- switch(cmd) {
-diff -upr linux-2.6.16.orig/fs/autofs4/autofs_i.h linux-2.6.16-026test009/fs/autofs4/autofs_i.h
---- linux-2.6.16.orig/fs/autofs4/autofs_i.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs4/autofs_i.h 2006-04-19 15:02:12.000000000 +0400
-@@ -122,7 +122,7 @@ static inline struct autofs_info *autofs
- filesystem without "magic".) */
-
- static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-- return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
-+ return sbi->catatonic || virt_pgid(current) == sbi->oz_pgrp;
- }
-
- /* Does a dentry have some pending activity? */
-diff -upr linux-2.6.16.orig/fs/autofs4/init.c linux-2.6.16-026test009/fs/autofs4/init.c
---- linux-2.6.16.orig/fs/autofs4/init.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs4/init.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,7 @@ static struct file_system_type autofs_fs
- .name = "autofs",
- .get_sb = autofs_get_sb,
- .kill_sb = kill_anon_super,
-+ .fs_flags = FS_VIRTUALIZED,
- };
-
- static int __init init_autofs4_fs(void)
-diff -upr linux-2.6.16.orig/fs/autofs4/inode.c linux-2.6.16-026test009/fs/autofs4/inode.c
---- linux-2.6.16.orig/fs/autofs4/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs4/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -179,7 +179,7 @@ static int parse_options(char *options,
-
- *uid = current->uid;
- *gid = current->gid;
-- *pgrp = process_group(current);
-+ *pgrp = virt_pgid(current);
-
- *minproto = AUTOFS_MIN_PROTO_VERSION;
- *maxproto = AUTOFS_MAX_PROTO_VERSION;
-@@ -265,7 +265,7 @@ int autofs4_fill_super(struct super_bloc
- sbi->root = NULL;
- sbi->catatonic = 0;
- sbi->exp_timeout = 0;
-- sbi->oz_pgrp = process_group(current);
-+ sbi->oz_pgrp = virt_pgid(current);
- sbi->sb = s;
- sbi->version = 0;
- sbi->sub_version = 0;
-diff -upr linux-2.6.16.orig/fs/autofs4/root.c linux-2.6.16-026test009/fs/autofs4/root.c
---- linux-2.6.16.orig/fs/autofs4/root.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/autofs4/root.c 2006-04-19 15:02:12.000000000 +0400
-@@ -592,7 +592,7 @@ static int autofs4_dir_unlink(struct ino
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
-
- /* This allows root to remove symlinks */
-- if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
-+ if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
- return -EACCES;
-
- dput(ino->dentry);
-@@ -784,7 +784,7 @@ static int autofs4_root_ioctl(struct ino
- _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT )
- return -ENOTTY;
-
-- if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) )
-+ if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN) )
- return -EPERM;
-
- switch(cmd) {
-diff -upr linux-2.6.16.orig/fs/binfmt_aout.c linux-2.6.16-026test009/fs/binfmt_aout.c
---- linux-2.6.16.orig/fs/binfmt_aout.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_aout.c 2006-04-19 15:02:12.000000000 +0400
-@@ -446,9 +446,11 @@ beyond_if:
- #endif
- start_thread(regs, ex.a_entry, current->mm->start_stack);
- if (unlikely(current->ptrace & PT_PTRACED)) {
-- if (current->ptrace & PT_TRACE_EXEC)
-+ if (current->ptrace & PT_TRACE_EXEC) {
-+ set_pn_state(current, PN_STOP_EXEC);
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-- else
-+ clear_pn_state(current);
-+ } else
- send_sig(SIGTRAP, current, 0);
- }
- return 0;
-diff -upr linux-2.6.16.orig/fs/binfmt_elf.c linux-2.6.16-026test009/fs/binfmt_elf.c
---- linux-2.6.16.orig/fs/binfmt_elf.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_elf.c 2006-04-19 15:02:12.000000000 +0400
-@@ -361,7 +361,7 @@ static unsigned long load_elf_interp(str
- eppnt = elf_phdata;
- for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
- if (eppnt->p_type == PT_LOAD) {
-- int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-+ int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO;
- int elf_prot = 0;
- unsigned long vaddr = 0;
- unsigned long k, map_addr;
-@@ -669,7 +669,7 @@ static int load_elf_binary(struct linux_
- */
- SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
-
-- interpreter = open_exec(elf_interpreter);
-+ interpreter = open_exec(elf_interpreter, NULL);
- retval = PTR_ERR(interpreter);
- if (IS_ERR(interpreter))
- goto out_free_interp;
-@@ -834,7 +834,7 @@ static int load_elf_binary(struct linux_
- if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
- if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-
-- elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
-+ elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE|MAP_EXECPRIO;
-
- vaddr = elf_ppnt->p_vaddr;
- if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
-@@ -1000,9 +1000,11 @@ static int load_elf_binary(struct linux_
-
- start_thread(regs, elf_entry, bprm->p);
- if (unlikely(current->ptrace & PT_PTRACED)) {
-- if (current->ptrace & PT_TRACE_EXEC)
-+ if (current->ptrace & PT_TRACE_EXEC) {
-+ set_pn_state(current, PN_STOP_EXEC);
- ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
-- else
-+ clear_pn_state(current);
-+ } else
- send_sig(SIGTRAP, current, 0);
- }
- retval = 0;
-@@ -1022,8 +1024,13 @@ out_free_file:
- sys_close(elf_exec_fileno);
- out_free_fh:
- if (files) {
-- put_files_struct(current->files);
-+ struct files_struct *old;
-+
-+ old = current->files;
-+ task_lock(current);
- current->files = files;
-+ task_unlock(current);
-+ put_files_struct(old);
- }
- out_free_ph:
- kfree(elf_phdata);
-@@ -1281,10 +1288,10 @@ static void fill_prstatus(struct elf_prs
- prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
- prstatus->pr_sigpend = p->pending.signal.sig[0];
- prstatus->pr_sighold = p->blocked.sig[0];
-- prstatus->pr_pid = p->pid;
-- prstatus->pr_ppid = p->parent->pid;
-- prstatus->pr_pgrp = process_group(p);
-- prstatus->pr_sid = p->signal->session;
-+ prstatus->pr_pid = virt_pid(p);
-+ prstatus->pr_ppid = virt_pid(p->parent);
-+ prstatus->pr_pgrp = virt_pgid(p);
-+ prstatus->pr_sid = virt_sid(p);
- if (thread_group_leader(p)) {
- /*
- * This is the record for the group leader. Add in the
-@@ -1327,10 +1334,10 @@ static int fill_psinfo(struct elf_prpsin
- psinfo->pr_psargs[i] = ' ';
- psinfo->pr_psargs[len] = 0;
-
-- psinfo->pr_pid = p->pid;
-- psinfo->pr_ppid = p->parent->pid;
-- psinfo->pr_pgrp = process_group(p);
-- psinfo->pr_sid = p->signal->session;
-+ psinfo->pr_pid = virt_pid(p);
-+ psinfo->pr_ppid = virt_pid(p->parent);
-+ psinfo->pr_pgrp = virt_pgid(p);
-+ psinfo->pr_sid = virt_sid(p);
-
- i = p->state ? ffz(~p->state) + 1 : 0;
- psinfo->pr_state = i;
-@@ -1463,7 +1470,7 @@ static int elf_core_dump(long signr, str
- if (signr) {
- struct elf_thread_status *tmp;
- read_lock(&tasklist_lock);
-- do_each_thread(g,p)
-+ do_each_thread_ve(g,p)
- if (current->mm == p->mm && current != p) {
- tmp = kmalloc(sizeof(*tmp), GFP_ATOMIC);
- if (!tmp) {
-@@ -1475,7 +1482,7 @@ static int elf_core_dump(long signr, str
- tmp->thread = p;
- list_add(&tmp->list, &thread_list);
- }
-- while_each_thread(g,p);
-+ while_each_thread_ve(g,p);
- read_unlock(&tasklist_lock);
- list_for_each(t, &thread_list) {
- struct elf_thread_status *tmp;
-diff -upr linux-2.6.16.orig/fs/binfmt_elf_fdpic.c linux-2.6.16-026test009/fs/binfmt_elf_fdpic.c
---- linux-2.6.16.orig/fs/binfmt_elf_fdpic.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_elf_fdpic.c 2006-04-19 15:02:11.000000000 +0400
-@@ -205,7 +205,7 @@ static int load_elf_fdpic_binary(struct
- kdebug("Using ELF interpreter %s", interpreter_name);
-
- /* replace the program with the interpreter */
-- interpreter = open_exec(interpreter_name);
-+ interpreter = open_exec(interpreter_name, bprm);
- retval = PTR_ERR(interpreter);
- if (IS_ERR(interpreter)) {
- interpreter = NULL;
-diff -upr linux-2.6.16.orig/fs/binfmt_em86.c linux-2.6.16-026test009/fs/binfmt_em86.c
---- linux-2.6.16.orig/fs/binfmt_em86.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_em86.c 2006-04-19 15:02:11.000000000 +0400
-@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm
- * Note that we use open_exec() as the name is now in kernel
- * space, and we don't need to copy it.
- */
-- file = open_exec(interp);
-+ file = open_exec(interp, bprm);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
-diff -upr linux-2.6.16.orig/fs/binfmt_flat.c linux-2.6.16-026test009/fs/binfmt_flat.c
---- linux-2.6.16.orig/fs/binfmt_flat.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_flat.c 2006-04-19 15:02:11.000000000 +0400
-@@ -774,7 +774,7 @@ static int load_flat_shared_library(int
-
- /* Open the file up */
- bprm.filename = buf;
-- bprm.file = open_exec(bprm.filename);
-+ bprm.file = open_exec(bprm.filename, bprm);
- res = PTR_ERR(bprm.file);
- if (IS_ERR(bprm.file))
- return res;
-diff -upr linux-2.6.16.orig/fs/binfmt_misc.c linux-2.6.16-026test009/fs/binfmt_misc.c
---- linux-2.6.16.orig/fs/binfmt_misc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_misc.c 2006-04-19 15:02:11.000000000 +0400
-@@ -179,7 +179,7 @@ static int load_misc_binary(struct linux
-
- bprm->interp = iname; /* for binfmt_script */
-
-- interp_file = open_exec (iname);
-+ interp_file = open_exec (iname, bprm);
- retval = PTR_ERR (interp_file);
- if (IS_ERR (interp_file))
- goto _error;
-@@ -216,8 +216,13 @@ _error:
- bprm->interp_data = 0;
- _unshare:
- if (files) {
-- put_files_struct(current->files);
-+ struct files_struct *old;
-+
-+ old = current->files;
-+ task_lock(current);
- current->files = files;
-+ task_unlock(current);
-+ put_files_struct(old);
- }
- goto _ret;
- }
-diff -upr linux-2.6.16.orig/fs/binfmt_script.c linux-2.6.16-026test009/fs/binfmt_script.c
---- linux-2.6.16.orig/fs/binfmt_script.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/binfmt_script.c 2006-04-19 15:02:11.000000000 +0400
-@@ -85,7 +85,7 @@ static int load_script(struct linux_binp
- /*
- * OK, now restart the process with the interpreter's dentry.
- */
-- file = open_exec(interp);
-+ file = open_exec(interp, bprm);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
-diff -upr linux-2.6.16.orig/fs/block_dev.c linux-2.6.16-026test009/fs/block_dev.c
---- linux-2.6.16.orig/fs/block_dev.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/block_dev.c 2006-04-19 15:02:12.000000000 +0400
-@@ -561,9 +561,16 @@ static int do_open(struct block_device *
- {
- struct module *owner = NULL;
- struct gendisk *disk;
-- int ret = -ENXIO;
-+ int ret;
- int part;
-
-+#ifdef CONFIG_VE
-+ ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev,
-+ file->f_mode&(FMODE_READ|FMODE_WRITE));
-+ if (ret)
-+ return ret;
-+#endif
-+ ret = -ENXIO;
- file->f_mapping = bdev->bd_inode->i_mapping;
- lock_kernel();
- disk = get_gendisk(bdev->bd_dev, &part);
-@@ -832,7 +839,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
- * namespace if possible and return it. Return ERR_PTR(error)
- * otherwise.
- */
--struct block_device *lookup_bdev(const char *path)
-+struct block_device *lookup_bdev(const char *path, int mode)
- {
- struct block_device *bdev;
- struct inode *inode;
-@@ -850,6 +857,11 @@ struct block_device *lookup_bdev(const c
- error = -ENOTBLK;
- if (!S_ISBLK(inode->i_mode))
- goto fail;
-+#ifdef CONFIG_VE
-+ error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode);
-+ if (error)
-+ goto fail;
-+#endif
- error = -EACCES;
- if (nd.mnt->mnt_flags & MNT_NODEV)
- goto fail;
-@@ -881,12 +893,13 @@ struct block_device *open_bdev_excl(cons
- mode_t mode = FMODE_READ;
- int error = 0;
-
-- bdev = lookup_bdev(path);
-+ if (!(flags & MS_RDONLY))
-+ mode |= FMODE_WRITE;
-+
-+ bdev = lookup_bdev(path, mode);
- if (IS_ERR(bdev))
- return bdev;
-
-- if (!(flags & MS_RDONLY))
-- mode |= FMODE_WRITE;
- error = blkdev_get(bdev, mode, 0);
- if (error)
- return ERR_PTR(error);
-diff -upr linux-2.6.16.orig/fs/char_dev.c linux-2.6.16-026test009/fs/char_dev.c
---- linux-2.6.16.orig/fs/char_dev.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/char_dev.c 2006-04-19 15:02:12.000000000 +0400
-@@ -342,6 +342,13 @@ int chrdev_open(struct inode * inode, st
- struct cdev *new = NULL;
- int ret = 0;
-
-+#ifdef CONFIG_VE
-+ ret = get_device_perms_ve(S_IFCHR, inode->i_rdev,
-+ filp->f_mode&(FMODE_READ|FMODE_WRITE));
-+ if (ret)
-+ return ret;
-+#endif
-+
- spin_lock(&cdev_lock);
- p = inode->i_cdev;
- if (!p) {
-diff -upr linux-2.6.16.orig/fs/cifs/cifsencrypt.c linux-2.6.16-026test009/fs/cifs/cifsencrypt.c
---- linux-2.6.16.orig/fs/cifs/cifsencrypt.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/cifs/cifsencrypt.c 2006-04-19 15:02:11.000000000 +0400
-@@ -56,9 +56,6 @@ int cifs_sign_smb(struct smb_hdr * cifs_
- int rc = 0;
- char smb_signature[20];
-
-- /* BB remember to initialize sequence number elsewhere and initialize mac_signing key elsewhere BB */
-- /* BB remember to add code to save expected sequence number in midQ entry BB */
--
- if((cifs_pdu == NULL) || (server == NULL))
- return -EINVAL;
-
-@@ -85,20 +82,33 @@ int cifs_sign_smb(struct smb_hdr * cifs_
- static int cifs_calc_signature2(const struct kvec * iov, int n_vec,
- const char * key, char * signature)
- {
-- struct MD5Context context;
--
-- if((iov == NULL) || (signature == NULL))
-- return -EINVAL;
-+ struct MD5Context context;
-+ int i;
-
-- MD5Init(&context);
-- MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
-+ if((iov == NULL) || (signature == NULL))
-+ return -EINVAL;
-
--/* MD5Update(&context,cifs_pdu->Protocol,cifs_pdu->smb_buf_length); */ /* BB FIXME BB */
-+ MD5Init(&context);
-+ MD5Update(&context,key,CIFS_SESSION_KEY_SIZE+16);
-+ for(i=0;i<n_vec;i++) {
-+ if(iov[i].iov_base == NULL) {
-+ cERROR(1,("null iovec entry"));
-+ return -EIO;
-+ } else if(iov[i].iov_len == 0)
-+ break; /* bail out if we are sent nothing to sign */
-+ /* The first entry includes a length field (which does not get
-+ signed that occupies the first 4 bytes before the header */
-+ if(i==0) {
-+ if (iov[0].iov_len <= 8 ) /* cmd field at offset 9 */
-+ break; /* nothing to sign or corrupt header */
-+ MD5Update(&context,iov[0].iov_base+4, iov[0].iov_len-4);
-+ } else
-+ MD5Update(&context,iov[i].iov_base, iov[i].iov_len);
-+ }
-
-- MD5Final(signature,&context);
-+ MD5Final(signature,&context);
-
-- return -EOPNOTSUPP;
--/* return 0; */
-+ return 0;
- }
-
-
-diff -upr linux-2.6.16.orig/fs/cifs/cifsfs.c linux-2.6.16-026test009/fs/cifs/cifsfs.c
---- linux-2.6.16.orig/fs/cifs/cifsfs.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/cifs/cifsfs.c 2006-04-19 15:02:11.000000000 +0400
-@@ -220,7 +220,8 @@ cifs_statfs(struct super_block *sb, stru
- longer available? */
- }
-
--static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
-+static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- struct cifs_sb_info *cifs_sb;
-
-@@ -232,7 +233,7 @@ static int cifs_permission(struct inode
- on the client (above and beyond ACL on servers) for
- servers which do not support setting and viewing mode bits,
- so allowing client to check permissions is useful */
-- return generic_permission(inode, mask, NULL);
-+ return generic_permission(inode, mask, NULL, perm);
- }
-
- static kmem_cache_t *cifs_inode_cachep;
-diff -upr linux-2.6.16.orig/fs/coda/dir.c linux-2.6.16-026test009/fs/coda/dir.c
---- linux-2.6.16.orig/fs/coda/dir.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/coda/dir.c 2006-04-19 15:02:11.000000000 +0400
-@@ -151,7 +151,8 @@ exit:
- }
-
-
--int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
-+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- int error = 0;
-
-diff -upr linux-2.6.16.orig/fs/coda/pioctl.c linux-2.6.16-026test009/fs/coda/pioctl.c
---- linux-2.6.16.orig/fs/coda/pioctl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/coda/pioctl.c 2006-04-19 15:02:11.000000000 +0400
-@@ -25,7 +25,7 @@
-
- /* pioctl ops */
- static int coda_ioctl_permission(struct inode *inode, int mask,
-- struct nameidata *nd);
-+ struct nameidata *nd, struct exec_perm *perm);
- static int coda_pioctl(struct inode * inode, struct file * filp,
- unsigned int cmd, unsigned long user_data);
-
-@@ -43,7 +43,7 @@ struct file_operations coda_ioctl_operat
-
- /* the coda pioctl inode ops */
- static int coda_ioctl_permission(struct inode *inode, int mask,
-- struct nameidata *nd)
-+ struct nameidata *nd, struct exec_perm *perm)
- {
- return 0;
- }
-diff -upr linux-2.6.16.orig/fs/compat.c linux-2.6.16-026test009/fs/compat.c
---- linux-2.6.16.orig/fs/compat.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/compat.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1479,7 +1479,7 @@ int compat_do_execve(char * filename,
- goto out_ret;
- memset(bprm, 0, sizeof(*bprm));
-
-- file = open_exec(filename);
-+ file = open_exec(filename, bprm);
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- goto out_kfree;
-diff -upr linux-2.6.16.orig/fs/dcache.c linux-2.6.16-026test009/fs/dcache.c
---- linux-2.6.16.orig/fs/dcache.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/dcache.c 2006-04-19 15:02:12.000000000 +0400
-@@ -28,11 +28,16 @@
- #include <linux/module.h>
- #include <linux/mount.h>
- #include <linux/file.h>
-+#include <linux/namei.h>
- #include <asm/uaccess.h>
- #include <linux/security.h>
- #include <linux/seqlock.h>
- #include <linux/swap.h>
- #include <linux/bootmem.h>
-+#include <linux/kernel_stat.h>
-+#include <net/inet_sock.h>
-+
-+#include <ub/ub_dcache.h>
-
- /* #define DCACHE_DEBUG 1 */
-
-@@ -44,7 +49,7 @@ static seqlock_t rename_lock __cacheline
-
- EXPORT_SYMBOL(dcache_lock);
-
--static kmem_cache_t *dentry_cache;
-+kmem_cache_t *dentry_cache;
-
- #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-
-@@ -114,6 +119,75 @@ static void dentry_iput(struct dentry *
- }
- }
-
-+struct dcache_shrinker {
-+ struct list_head list;
-+ struct dentry *dentry;
-+};
-+
-+DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq);
-+
-+/* called under dcache_lock */
-+static void dcache_shrinker_add(struct dcache_shrinker *ds,
-+ struct dentry *parent, struct dentry *dentry)
-+{
-+ struct super_block *sb;
-+
-+ sb = parent->d_sb;
-+ ds->dentry = parent;
-+ list_add(&ds->list, &sb->s_dshrinkers);
-+}
-+
-+/* called under dcache_lock */
-+static void dcache_shrinker_del(struct dcache_shrinker *ds)
-+{
-+ if (ds == NULL || list_empty(&ds->list))
-+ return;
-+
-+ list_del_init(&ds->list);
-+ wake_up_all(&dcache_shrinker_wq);
-+}
-+
-+/* called under dcache_lock, drops inside */
-+static void dcache_shrinker_wait(struct super_block *sb)
-+{
-+ DECLARE_WAITQUEUE(wq, current);
-+
-+ __set_current_state(TASK_UNINTERRUPTIBLE);
-+ add_wait_queue(&dcache_shrinker_wq, &wq);
-+ spin_unlock(&dcache_lock);
-+
-+ schedule();
-+ remove_wait_queue(&dcache_shrinker_wq, &wq);
-+ __set_current_state(TASK_RUNNING);
-+}
-+
-+void dcache_shrinker_wait_sb(struct super_block *sb)
-+{
-+ /* the root dentry can be held in dput_recursive */
-+ spin_lock(&dcache_lock);
-+ while (!list_empty(&sb->s_dshrinkers)) {
-+ dcache_shrinker_wait(sb);
-+ spin_lock(&dcache_lock);
-+ }
-+ spin_unlock(&dcache_lock);
-+}
-+
-+/* dcache_lock protects shrinker's list */
-+static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck)
-+{
-+ struct super_block *sb;
-+ struct dcache_shrinker *ds;
-+
-+ sb = parent->d_sb;
-+ list_for_each_entry(ds, &sb->s_dshrinkers, list) {
-+ /* is one of dcache shrinkers working on the dentry? */
-+ if (ds->dentry == parent) {
-+ *racecheck = 1;
-+ break;
-+ }
-+ }
-+}
-+
- /*
- * This is dput
- *
-@@ -132,8 +206,9 @@ static void dentry_iput(struct dentry *
- */
-
- /*
-- * dput - release a dentry
-- * @dentry: dentry to release
-+ * dput_recursive - go upward through the dentry tree and release dentries
-+ * @dentry: starting dentry
-+ * @ds: shrinker to be added to active list (see shrink_dcache_parent)
- *
- * Release a dentry. This will drop the usage count and if appropriate
- * call the dentry unlink method as well as removing it from the queues and
-@@ -142,18 +217,15 @@ static void dentry_iput(struct dentry *
- *
- * no dcache lock, please.
- */
--
--void dput(struct dentry *dentry)
-+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds)
- {
-- if (!dentry)
-- return;
--
--repeat:
- if (atomic_read(&dentry->d_count) == 1)
- might_sleep();
- if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
- return;
-+ dcache_shrinker_del(ds);
-
-+repeat:
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
- spin_unlock(&dentry->d_lock);
-@@ -185,6 +257,7 @@ unhash_it:
-
- kill_it: {
- struct dentry *parent;
-+ struct dcache_shrinker lds;
-
- /* If dentry was on d_lru list
- * delete it from there
-@@ -194,18 +267,50 @@ kill_it: {
- dentry_stat.nr_unused--;
- }
- list_del(&dentry->d_u.d_child);
-+ parent = dentry->d_parent;
-+ dcache_shrinker_add(&lds, parent, dentry);
- dentry_stat.nr_dentry--; /* For d_free, below */
- /*drops the locks, at that point nobody can reach this dentry */
- dentry_iput(dentry);
-- parent = dentry->d_parent;
- d_free(dentry);
-- if (dentry == parent)
-+ if (unlikely(dentry == parent)) {
-+ spin_lock(&dcache_lock);
-+ dcache_shrinker_del(&lds);
-+ spin_unlock(&dcache_lock);
- return;
-+ }
- dentry = parent;
-- goto repeat;
-+ spin_lock(&dcache_lock);
-+ dcache_shrinker_del(&lds);
-+ if (atomic_dec_and_test(&dentry->d_count))
-+ goto repeat;
-+ spin_unlock(&dcache_lock);
- }
- }
-
-+/*
-+ * dput - release a dentry
-+ * @dentry: dentry to release
-+ *
-+ * Release a dentry. This will drop the usage count and if appropriate
-+ * call the dentry unlink method as well as removing it from the queues and
-+ * releasing its resources. If the parent dentries were scheduled for release
-+ * they too may now get deleted.
-+ *
-+ * no dcache lock, please.
-+ */
-+
-+void dput(struct dentry *dentry)
-+{
-+ if (!dentry)
-+ return;
-+
-+ spin_lock(&dcache_lock);
-+ ub_dentry_uncharge(dentry);
-+ spin_unlock(&dcache_lock);
-+ dput_recursive(dentry, NULL);
-+}
-+
- /**
- * d_invalidate - invalidate a dentry
- * @dentry: dentry to invalidate
-@@ -272,6 +377,8 @@ static inline struct dentry * __dget_loc
- dentry_stat.nr_unused--;
- list_del_init(&dentry->d_lru);
- }
-+
-+ ub_dentry_charge_nofail(dentry);
- return dentry;
- }
-
-@@ -362,19 +469,27 @@ restart:
- * removed.
- * Called with dcache_lock, drops it and then regains.
- */
--static inline void prune_one_dentry(struct dentry * dentry)
-+static void prune_one_dentry(struct dentry * dentry)
- {
- struct dentry * parent;
-+ struct dcache_shrinker ds;
-
- __d_drop(dentry);
- list_del(&dentry->d_u.d_child);
-+ parent = dentry->d_parent;
-+ dcache_shrinker_add(&ds, parent, dentry);
- dentry_stat.nr_dentry--; /* For d_free, below */
- dentry_iput(dentry);
- parent = dentry->d_parent;
- d_free(dentry);
- if (parent != dentry)
-- dput(parent);
-+ /*
-+ * dentry is not in use, only child (not outside)
-+ * references change, so parent->d_inuse does not change
-+ */
-+ dput_recursive(parent, &ds);
- spin_lock(&dcache_lock);
-+ dcache_shrinker_del(&ds);
- }
-
- /**
-@@ -486,6 +601,7 @@ repeat:
- continue;
- }
- prune_one_dentry(dentry);
-+ cond_resched_lock(&dcache_lock);
- goto repeat;
- }
- spin_unlock(&dcache_lock);
-@@ -557,13 +673,12 @@ positive:
- * drop the lock and return early due to latency
- * constraints.
- */
--static int select_parent(struct dentry * parent)
-+static int select_parent(struct dentry * parent, int * racecheck)
- {
- struct dentry *this_parent = parent;
- struct list_head *next;
- int found = 0;
-
-- spin_lock(&dcache_lock);
- repeat:
- next = this_parent->d_subdirs.next;
- resume:
-@@ -605,6 +720,9 @@ dentry->d_parent->d_name.name, dentry->d
- #endif
- goto repeat;
- }
-+
-+ if (!found && racecheck != NULL)
-+ shrink_dcache_racecheck(dentry, racecheck);
- }
- /*
- * All done at this level ... ascend and resume the search.
-@@ -619,7 +737,6 @@ this_parent->d_parent->d_name.name, this
- goto resume;
- }
- out:
-- spin_unlock(&dcache_lock);
- return found;
- }
-
-@@ -632,10 +749,66 @@ out:
-
- void shrink_dcache_parent(struct dentry * parent)
- {
-- int found;
-+ int found, r;
-+
-+ while (1) {
-+ spin_lock(&dcache_lock);
-+ found = select_parent(parent, NULL);
-+ if (found)
-+ goto found;
-
-- while ((found = select_parent(parent)) != 0)
-+ /*
-+ * try again with a dput_recursive() race check.
-+ * it returns quickly if everything was really shrinked
-+ */
-+ r = 0;
-+ found = select_parent(parent, &r);
-+ if (found)
-+ goto found;
-+ if (!r)
-+ break;
-+
-+ /* drops the lock inside */
-+ dcache_shrinker_wait(parent->d_sb);
-+ continue;
-+
-+found:
-+ spin_unlock(&dcache_lock);
- prune_dcache(found);
-+ }
-+ spin_unlock(&dcache_lock);
-+}
-+
-+/*
-+ * Move any unused anon dentries to the end of the unused list.
-+ * called under dcache_lock
-+ */
-+static int select_anon(struct hlist_head *head, int *racecheck)
-+{
-+ struct hlist_node *lp;
-+ int found = 0;
-+
-+ hlist_for_each(lp, head) {
-+ struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-+ if (!list_empty(&this->d_lru)) {
-+ dentry_stat.nr_unused--;
-+ list_del_init(&this->d_lru);
-+ }
-+
-+ /*
-+ * move only zero ref count dentries to the end
-+ * of the unused list for prune_dcache
-+ */
-+ if (!atomic_read(&this->d_count)) {
-+ list_add_tail(&this->d_lru, &dentry_unused);
-+ dentry_stat.nr_unused++;
-+ found++;
-+ }
-+
-+ if (!found && racecheck != NULL)
-+ shrink_dcache_racecheck(this, racecheck);
-+ }
-+ return found;
- }
-
- /**
-@@ -648,33 +821,36 @@ void shrink_dcache_parent(struct dentry
- * done under dcache_lock.
- *
- */
--void shrink_dcache_anon(struct hlist_head *head)
-+void shrink_dcache_anon(struct super_block *sb)
- {
-- struct hlist_node *lp;
-- int found;
-- do {
-- found = 0;
-+ int found, r;
-+
-+ while (1) {
- spin_lock(&dcache_lock);
-- hlist_for_each(lp, head) {
-- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
-- if (!list_empty(&this->d_lru)) {
-- dentry_stat.nr_unused--;
-- list_del_init(&this->d_lru);
-- }
-+ found = select_anon(&sb->s_anon, NULL);
-+ if (found)
-+ goto found;
-
-- /*
-- * move only zero ref count dentries to the end
-- * of the unused list for prune_dcache
-- */
-- if (!atomic_read(&this->d_count)) {
-- list_add_tail(&this->d_lru, &dentry_unused);
-- dentry_stat.nr_unused++;
-- found++;
-- }
-- }
-+ /*
-+ * try again with a dput_recursive() race check.
-+ * it returns quickly if everything was really shrinked
-+ */
-+ r = 0;
-+ found = select_anon(&sb->s_anon, &r);
-+ if (found)
-+ goto found;
-+ if (!r)
-+ break;
-+
-+ /* drops the lock inside */
-+ dcache_shrinker_wait(sb);
-+ continue;
-+
-+found:
- spin_unlock(&dcache_lock);
- prune_dcache(found);
-- } while(found);
-+ }
-+ spin_unlock(&dcache_lock);
- }
-
- /*
-@@ -691,12 +867,18 @@ void shrink_dcache_anon(struct hlist_hea
- */
- static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
- {
-+ int res = -1;
-+
-+ KSTAT_PERF_ENTER(shrink_dcache)
- if (nr) {
- if (!(gfp_mask & __GFP_FS))
-- return -1;
-+ goto out;
- prune_dcache(nr);
- }
-- return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-+ res = (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-+out:
-+ KSTAT_PERF_LEAVE(shrink_dcache)
-+ return res;
- }
-
- /**
-@@ -716,19 +898,20 @@ struct dentry *d_alloc(struct dentry * p
-
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
- if (!dentry)
-- return NULL;
-+ goto err_alloc;
-
- if (name->len > DNAME_INLINE_LEN-1) {
- dname = kmalloc(name->len + 1, GFP_KERNEL);
-- if (!dname) {
-- kmem_cache_free(dentry_cache, dentry);
-- return NULL;
-- }
-+ if (!dname)
-+ goto err_name;
- } else {
- dname = dentry->d_iname;
- }
- dentry->d_name.name = dname;
-
-+ if (ub_dentry_alloc(dentry))
-+ goto err_charge;
-+
- dentry->d_name.len = name->len;
- dentry->d_name.hash = name->hash;
- memcpy(dname, name->name, name->len);
-@@ -759,12 +942,23 @@ struct dentry *d_alloc(struct dentry * p
- }
-
- spin_lock(&dcache_lock);
-- if (parent)
-+ if (parent) {
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-+ if (parent->d_flags & DCACHE_VIRTUAL)
-+ dentry->d_flags |= DCACHE_VIRTUAL;
-+ }
- dentry_stat.nr_dentry++;
- spin_unlock(&dcache_lock);
-
- return dentry;
-+
-+err_charge:
-+ if (name->len > DNAME_INLINE_LEN - 1)
-+ kfree(dname);
-+err_name:
-+ kmem_cache_free(dentry_cache, dentry);
-+err_alloc:
-+ return NULL;
- }
-
- struct dentry *d_alloc_name(struct dentry *parent, const char *name)
-@@ -1048,7 +1242,6 @@ struct dentry * __d_lookup(struct dentry
- unsigned int hash = name->hash;
- const unsigned char *str = name->name;
- struct hlist_head *head = d_hash(parent,hash);
-- struct dentry *found = NULL;
- struct hlist_node *node;
- struct dentry *dentry;
-
-@@ -1089,7 +1282,7 @@ struct dentry * __d_lookup(struct dentry
-
- if (!d_unhashed(dentry)) {
- atomic_inc(&dentry->d_count);
-- found = dentry;
-+ goto found;
- }
- spin_unlock(&dentry->d_lock);
- break;
-@@ -1098,7 +1291,18 @@ next:
- }
- rcu_read_unlock();
-
-- return found;
-+ return NULL;
-+
-+found:
-+ /*
-+ * d_lock and rcu_read_lock
-+ * are dropped in ub_dentry_charge()
-+ */
-+ if (ub_dentry_charge(dentry)) {
-+ dput(dentry);
-+ dentry = NULL;
-+ }
-+ return dentry;
- }
-
- /**
-@@ -1345,6 +1549,32 @@ already_unhashed:
- }
-
- /**
-+ * __d_path_add_deleted - prepend "(deleted) " text
-+ * @end: a pointer to the character after free space at the beginning of the
-+ * buffer
-+ * @buflen: remaining free space
-+ */
-+static inline char * __d_path_add_deleted(char * end, int buflen)
-+{
-+ buflen -= 10;
-+ if (buflen < 0)
-+ return ERR_PTR(-ENAMETOOLONG);
-+ end -= 10;
-+ memcpy(end, "(deleted) ", 10);
-+ return end;
-+}
-+
-+/**
-+ * d_root_check - checks if dentry is accessible from current's fs root
-+ * @dentry: dentry to be verified
-+ * @vfsmnt: vfsmnt to which the dentry belongs
-+ */
-+int d_root_check(struct dentry *dentry, struct vfsmount *vfsmnt)
-+{
-+ return PTR_ERR(d_path(dentry, vfsmnt, NULL, 0));
-+}
-+
-+/**
- * d_path - return the path of a dentry
- * @dentry: dentry to report
- * @vfsmnt: vfsmnt to which the dentry belongs
-@@ -1365,36 +1595,35 @@ static char * __d_path( struct dentry *d
- char *buffer, int buflen)
- {
- char * end = buffer+buflen;
-- char * retval;
-+ char * retval = NULL;
- int namelen;
-+ int deleted;
-+ struct vfsmount *oldvfsmnt;
-
-- *--end = '\0';
-- buflen--;
-- if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-- buflen -= 10;
-- end -= 10;
-- if (buflen < 0)
-+ oldvfsmnt = vfsmnt;
-+ deleted = (!IS_ROOT(dentry) && d_unhashed(dentry));
-+ if (buffer != NULL) {
-+ *--end = '\0';
-+ buflen--;
-+
-+ if (buflen < 1)
- goto Elong;
-- memcpy(end, " (deleted)", 10);
-+ /* Get '/' right */
-+ retval = end-1;
-+ *retval = '/';
- }
-
-- if (buflen < 1)
-- goto Elong;
-- /* Get '/' right */
-- retval = end-1;
-- *retval = '/';
--
- for (;;) {
- struct dentry * parent;
-
- if (dentry == root && vfsmnt == rootmnt)
- break;
- if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-- /* Global root? */
-+ /* root of a tree? */
- spin_lock(&vfsmount_lock);
- if (vfsmnt->mnt_parent == vfsmnt) {
- spin_unlock(&vfsmount_lock);
-- goto global_root;
-+ goto other_root;
- }
- dentry = vfsmnt->mnt_mountpoint;
- vfsmnt = vfsmnt->mnt_parent;
-@@ -1403,27 +1632,51 @@ static char * __d_path( struct dentry *d
- }
- parent = dentry->d_parent;
- prefetch(parent);
-+ if (buffer != NULL) {
-+ namelen = dentry->d_name.len;
-+ buflen -= namelen + 1;
-+ if (buflen < 0)
-+ goto Elong;
-+ end -= namelen;
-+ memcpy(end, dentry->d_name.name, namelen);
-+ *--end = '/';
-+ retval = end;
-+ }
-+ dentry = parent;
-+ }
-+ /* the given root point is reached */
-+finish:
-+ if (buffer != NULL && deleted)
-+ retval = __d_path_add_deleted(end, buflen);
-+ return retval;
-+
-+other_root:
-+ /*
-+ * We traversed the tree upward and reached a root, but the given
-+ * lookup terminal point wasn't encountered. It means either that the
-+ * dentry is out of our scope or belongs to an abstract space like
-+ * sock_mnt or pipe_mnt. Check for it.
-+ *
-+ * There are different options to check it.
-+ * We may assume that any dentry tree is unreachable unless it's
-+ * connected to `root' (defined as fs root of init aka child reaper)
-+ * and expose all paths that are not connected to it.
-+ * The other option is to allow exposing of known abstract spaces
-+ * explicitly and hide the path information for other cases.
-+ * This approach is more safe, let's take it. 2001/04/22 SAW
-+ */
-+ if (!(oldvfsmnt->mnt_sb->s_flags & MS_NOUSER))
-+ return ERR_PTR(-EINVAL);
-+ if (buffer != NULL) {
- namelen = dentry->d_name.len;
-- buflen -= namelen + 1;
-+ buflen -= namelen;
- if (buflen < 0)
- goto Elong;
-- end -= namelen;
-- memcpy(end, dentry->d_name.name, namelen);
-- *--end = '/';
-- retval = end;
-- dentry = parent;
-+ retval -= namelen-1; /* hit the slash */
-+ memcpy(retval, dentry->d_name.name, namelen);
- }
-+ goto finish;
-
-- return retval;
--
--global_root:
-- namelen = dentry->d_name.len;
-- buflen -= namelen;
-- if (buflen < 0)
-- goto Elong;
-- retval -= namelen-1; /* hit the slash */
-- memcpy(retval, dentry->d_name.name, namelen);
-- return retval;
- Elong:
- return ERR_PTR(-ENAMETOOLONG);
- }
-@@ -1448,6 +1701,228 @@ char * d_path(struct dentry *dentry, str
- return res;
- }
-
-+#ifdef CONFIG_VE
-+#include <net/sock.h>
-+#include <linux/ip.h>
-+#include <linux/file.h>
-+#include <linux/namespace.h>
-+#include <linux/vzratelimit.h>
-+
-+static void mark_sub_tree_virtual(struct dentry *d)
-+{
-+ struct dentry *orig_root;
-+
-+ orig_root = d;
-+ while (1) {
-+ spin_lock(&d->d_lock);
-+ d->d_flags |= DCACHE_VIRTUAL;
-+ spin_unlock(&d->d_lock);
-+
-+ if (!list_empty(&d->d_subdirs)) {
-+ d = list_entry(d->d_subdirs.next,
-+ struct dentry, d_u.d_child);
-+ continue;
-+ }
-+ if (d == orig_root)
-+ break;
-+ while (d == list_entry(d->d_parent->d_subdirs.prev,
-+ struct dentry, d_u.d_child)) {
-+ d = d->d_parent;
-+ if (d == orig_root)
-+ goto out;
-+ }
-+ d = list_entry(d->d_u.d_child.next,
-+ struct dentry, d_u.d_child);
-+ }
-+out:
-+ return;
-+}
-+
-+void mark_tree_virtual(struct vfsmount *m, struct dentry *d)
-+{
-+ struct vfsmount *orig_rootmnt;
-+
-+ spin_lock(&dcache_lock);
-+ spin_lock(&vfsmount_lock);
-+ orig_rootmnt = m;
-+ while (1) {
-+ mark_sub_tree_virtual(d);
-+ if (!list_empty(&m->mnt_mounts)) {
-+ m = list_entry(m->mnt_mounts.next,
-+ struct vfsmount, mnt_child);
-+ d = m->mnt_root;
-+ continue;
-+ }
-+ if (m == orig_rootmnt)
-+ break;
-+ while (m == list_entry(m->mnt_parent->mnt_mounts.prev,
-+ struct vfsmount, mnt_child)) {
-+ m = m->mnt_parent;
-+ if (m == orig_rootmnt)
-+ goto out;
-+ }
-+ m = list_entry(m->mnt_child.next,
-+ struct vfsmount, mnt_child);
-+ d = m->mnt_root;
-+ }
-+out:
-+ spin_unlock(&vfsmount_lock);
-+ spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(mark_tree_virtual);
-+
-+static struct vz_rate_info area_ri = { 20, 10*HZ };
-+#define VE_AREA_ACC_CHECK 0x0001
-+#define VE_AREA_ACC_DENY 0x0002
-+#define VE_AREA_EXEC_CHECK 0x0010
-+#define VE_AREA_EXEC_DENY 0x0020
-+#define VE0_AREA_ACC_CHECK 0x0100
-+#define VE0_AREA_ACC_DENY 0x0200
-+#define VE0_AREA_EXEC_CHECK 0x1000
-+#define VE0_AREA_EXEC_DENY 0x2000
-+int ve_area_access_check = 0;
-+
-+static void print_connection_info(struct task_struct *tsk)
-+{
-+ struct files_struct *files;
-+ struct fdtable *fdt;
-+ int fd;
-+
-+ files = get_files_struct(tsk);
-+ if (!files)
-+ return;
-+
-+ spin_lock(&files->file_lock);
-+ fdt = files_fdtable(files);
-+ for (fd = 0; fd < fdt->max_fds; fd++) {
-+ struct file *file;
-+ struct inode *inode;
-+ struct socket *socket;
-+ struct sock *sk;
-+ struct inet_sock *inet;
-+
-+ file = fdt->fd[fd];
-+ if (file == NULL)
-+ continue;
-+
-+ inode = file->f_dentry->d_inode;
-+ if (!S_ISSOCK(inode->i_mode))
-+ continue;
-+
-+ socket = SOCKET_I(inode);
-+ if (socket == NULL)
-+ continue;
-+
-+ sk = socket->sk;
-+ if (sk->sk_family != PF_INET || sk->sk_type != SOCK_STREAM)
-+ continue;
-+
-+ inet = inet_sk(sk);
-+ printk(KERN_ALERT "connection from %u.%u.%u.%u:%u to port %u\n",
-+ NIPQUAD(inet->daddr), ntohs(inet->dport),
-+ inet->num);
-+ }
-+ spin_unlock(&files->file_lock);
-+ put_files_struct(files);
-+}
-+
-+static void check_alert(struct vfsmount *vfsmnt, struct dentry *dentry,
-+ char *str)
-+{
-+ struct task_struct *tsk;
-+ unsigned long page;
-+ struct super_block *sb;
-+ char *p;
-+
-+ if (!vz_ratelimit(&area_ri))
-+ return;
-+
-+ tsk = current;
-+ p = ERR_PTR(-ENOMEM);
-+ page = __get_free_page(GFP_KERNEL);
-+ if (page) {
-+ spin_lock(&dcache_lock);
-+ p = __d_path(dentry, vfsmnt, tsk->fs->root, tsk->fs->rootmnt,
-+ (char *)page, PAGE_SIZE);
-+ spin_unlock(&dcache_lock);
-+ }
-+ if (IS_ERR(p))
-+ p = "(undefined)";
-+
-+ sb = dentry->d_sb;
-+ printk(KERN_ALERT "%s check alert! file:[%s] from %d/%s, dev%x\n"
-+ "Task %d/%d[%s] from VE%d, execenv %d\n",
-+ str, p, VE_OWNER_FSTYPE(sb->s_type)->veid,
-+ sb->s_type->name, sb->s_dev,
-+ tsk->pid, virt_pid(tsk), tsk->comm,
-+ VE_TASK_INFO(tsk)->owner_env->veid,
-+ get_exec_env()->veid);
-+
-+ free_page(page);
-+
-+ print_connection_info(tsk);
-+
-+ read_lock(&tasklist_lock);
-+ tsk = tsk->real_parent;
-+ get_task_struct(tsk);
-+ read_unlock(&tasklist_lock);
-+
-+ printk(KERN_ALERT "Parent %d/%d[%s] from VE%d\n",
-+ tsk->pid, virt_pid(tsk), tsk->comm,
-+ VE_TASK_INFO(tsk)->owner_env->veid);
-+
-+ print_connection_info(tsk);
-+ put_task_struct(tsk);
-+ dump_stack();
-+}
-+#endif
-+
-+int check_area_access_ve(struct dentry *dentry, struct vfsmount *mnt)
-+{
-+#ifdef CONFIG_VE
-+ int check, alert, deny;
-+
-+ if (ve_is_super(get_exec_env())) {
-+ check = ve_area_access_check & VE0_AREA_ACC_CHECK;
-+ alert = dentry->d_flags & DCACHE_VIRTUAL;
-+ deny = ve_area_access_check & VE0_AREA_ACC_DENY;
-+ } else {
-+ check = ve_area_access_check & VE_AREA_ACC_CHECK;
-+ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
-+ deny = ve_area_access_check & VE_AREA_ACC_DENY;
-+ }
-+
-+ if (check && alert)
-+ check_alert(mnt, dentry, "Access");
-+ if (deny && alert)
-+ return -EACCES;
-+#endif
-+ return 0;
-+}
-+
-+int check_area_execute_ve(struct dentry *dentry, struct vfsmount *mnt)
-+{
-+#ifdef CONFIG_VE
-+ int check, alert, deny;
-+
-+ if (ve_is_super(get_exec_env())) {
-+ check = ve_area_access_check & VE0_AREA_EXEC_CHECK;
-+ alert = dentry->d_flags & DCACHE_VIRTUAL;
-+ deny = ve_area_access_check & VE0_AREA_EXEC_DENY;
-+ } else {
-+ check = ve_area_access_check & VE_AREA_EXEC_CHECK;
-+ alert = !(dentry->d_flags & DCACHE_VIRTUAL);
-+ deny = ve_area_access_check & VE_AREA_EXEC_DENY;
-+ }
-+
-+ if (check && alert)
-+ check_alert(mnt, dentry, "Exec");
-+ if (deny && alert)
-+ return -EACCES;
-+#endif
-+ return 0;
-+}
-+
- /*
- * NOTE! The user-level library version returns a
- * character pointer. The kernel system call just
-@@ -1584,10 +2059,12 @@ resume:
- goto repeat;
- }
- atomic_dec(&dentry->d_count);
-+ ub_dentry_uncharge(dentry);
- }
- if (this_parent != root) {
- next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
-+ ub_dentry_uncharge(this_parent);
- this_parent = this_parent->d_parent;
- goto resume;
- }
-@@ -1736,7 +2213,8 @@ void __init vfs_caches_init(unsigned lon
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-
- filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
-+ NULL, NULL);
-
- dcache_init(mempages);
- inode_init(mempages);
-diff -upr linux-2.6.16.orig/fs/devpts/inode.c linux-2.6.16-026test009/fs/devpts/inode.c
---- linux-2.6.16.orig/fs/devpts/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/devpts/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -12,6 +12,7 @@
-
- #include <linux/module.h>
- #include <linux/init.h>
-+#include <linux/ve.h>
- #include <linux/fs.h>
- #include <linux/sched.h>
- #include <linux/namei.h>
-@@ -21,16 +22,17 @@
-
- #define DEVPTS_SUPER_MAGIC 0x1cd1
-
-+struct devpts_config devpts_config = {.mode = 0600};
-+
-+#ifndef CONFIG_VE
- static struct vfsmount *devpts_mnt;
- static struct dentry *devpts_root;
--
--static struct {
-- int setuid;
-- int setgid;
-- uid_t uid;
-- gid_t gid;
-- umode_t mode;
--} config = {.mode = 0600};
-+#define config devpts_config
-+#else
-+#define devpts_mnt (get_exec_env()->devpts_mnt)
-+#define devpts_root (get_exec_env()->devpts_root)
-+#define config (*(get_exec_env()->devpts_config))
-+#endif
-
- static int devpts_remount(struct super_block *sb, int *flags, char *data)
- {
-@@ -56,7 +58,8 @@ static int devpts_remount(struct super_b
- } else if (sscanf(this_char, "mode=%o%c", &n, &dummy) == 1)
- mode = n & ~S_IFMT;
- else {
-- printk("devpts: called with bogus options\n");
-+ ve_printk(VE_LOG,
-+ "devpts: called with bogus options\n");
- return -EINVAL;
- }
- }
-@@ -114,13 +117,15 @@ static struct super_block *devpts_get_sb
- return get_sb_single(fs_type, flags, data, devpts_fill_super);
- }
-
--static struct file_system_type devpts_fs_type = {
-+struct file_system_type devpts_fs_type = {
- .owner = THIS_MODULE,
- .name = "devpts",
- .get_sb = devpts_get_sb,
- .kill_sb = kill_anon_super,
- };
-
-+EXPORT_SYMBOL(devpts_fs_type);
-+
- /*
- * The normal naming convention is simply /dev/pts/<number>; this conforms
- * to the System V naming convention
-@@ -212,6 +217,7 @@ static int __init init_devpts_fs(void)
-
- static void __exit exit_devpts_fs(void)
- {
-+ /* the code is never called, the argument is irrelevant */
- unregister_filesystem(&devpts_fs_type);
- mntput(devpts_mnt);
- }
-diff -upr linux-2.6.16.orig/fs/eventpoll.c linux-2.6.16-026test009/fs/eventpoll.c
---- linux-2.6.16.orig/fs/eventpoll.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/eventpoll.c 2006-04-19 15:02:12.000000000 +0400
-@@ -105,11 +105,6 @@
- #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
-
-
--struct epoll_filefd {
-- struct file *file;
-- int fd;
--};
--
- /*
- * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
- * It is used to keep track on all tasks that are currently inside the wake_up() code
-@@ -132,36 +127,6 @@ struct poll_safewake {
- spinlock_t lock;
- };
-
--/*
-- * This structure is stored inside the "private_data" member of the file
-- * structure and rapresent the main data sructure for the eventpoll
-- * interface.
-- */
--struct eventpoll {
-- /* Protect the this structure access */
-- rwlock_t lock;
--
-- /*
-- * This semaphore is used to ensure that files are not removed
-- * while epoll is using them. This is read-held during the event
-- * collection loop and it is write-held during the file cleanup
-- * path, the epoll file exit code and the ctl operations.
-- */
-- struct rw_semaphore sem;
--
-- /* Wait queue used by sys_epoll_wait() */
-- wait_queue_head_t wq;
--
-- /* Wait queue used by file->poll() */
-- wait_queue_head_t poll_wait;
--
-- /* List of ready file descriptors */
-- struct list_head rdllist;
--
-- /* RB-Tree root used to store monitored fd structs */
-- struct rb_root rbr;
--};
--
- /* Wait structure used by the poll hooks */
- struct eppoll_entry {
- /* List header used to link this structure to the "struct epitem" */
-@@ -180,51 +145,6 @@ struct eppoll_entry {
- wait_queue_head_t *whead;
- };
-
--/*
-- * Each file descriptor added to the eventpoll interface will
-- * have an entry of this type linked to the hash.
-- */
--struct epitem {
-- /* RB-Tree node used to link this structure to the eventpoll rb-tree */
-- struct rb_node rbn;
--
-- /* List header used to link this structure to the eventpoll ready list */
-- struct list_head rdllink;
--
-- /* The file descriptor information this item refers to */
-- struct epoll_filefd ffd;
--
-- /* Number of active wait queue attached to poll operations */
-- int nwait;
--
-- /* List containing poll wait queues */
-- struct list_head pwqlist;
--
-- /* The "container" of this item */
-- struct eventpoll *ep;
--
-- /* The structure that describe the interested events and the source fd */
-- struct epoll_event event;
--
-- /*
-- * Used to keep track of the usage count of the structure. This avoids
-- * that the structure will desappear from underneath our processing.
-- */
-- atomic_t usecnt;
--
-- /* List header used to link this item to the "struct file" items list */
-- struct list_head fllink;
--
-- /* List header used to link the item to the transfer list */
-- struct list_head txlink;
--
-- /*
-- * This is used during the collection/transfer of events to userspace
-- * to pin items empty events set.
-- */
-- unsigned int revents;
--};
--
- /* Wrapper struct used by poll queueing */
- struct ep_pqueue {
- poll_table pt;
-@@ -239,14 +159,10 @@ static int ep_getfd(int *efd, struct ino
- struct eventpoll *ep);
- static int ep_alloc(struct eventpoll **pep);
- static void ep_free(struct eventpoll *ep);
--static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
- static void ep_use_epitem(struct epitem *epi);
--static void ep_release_epitem(struct epitem *epi);
- static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
- poll_table *pt);
- static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi);
--static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-- struct file *tfile, int fd);
- static int ep_modify(struct eventpoll *ep, struct epitem *epi,
- struct epoll_event *event);
- static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi);
-@@ -274,7 +190,8 @@ static struct super_block *eventpollfs_g
- /*
- * This semaphore is used to serialize ep_free() and eventpoll_release_file().
- */
--static struct semaphore epsem;
-+struct semaphore epsem;
-+EXPORT_SYMBOL_GPL(epsem);
-
- /* Safe wake up implementation */
- static struct poll_safewake psw;
-@@ -293,6 +210,7 @@ static struct file_operations eventpoll_
- .release = ep_eventpoll_close,
- .poll = ep_eventpoll_poll
- };
-+EXPORT_SYMBOL_GPL(eventpoll_fops);
-
- /*
- * This is used to register the virtual file system from where
-@@ -542,7 +460,7 @@ eexit_1:
- current, size, error));
- return error;
- }
--
-+EXPORT_SYMBOL_GPL(sys_epoll_create);
-
- /*
- * The following function implements the controller interface for
-@@ -852,7 +770,7 @@ static void ep_free(struct eventpoll *ep
- * the returned item, so the caller must call ep_release_epitem()
- * after finished using the "struct epitem".
- */
--static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
-+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
- {
- int kcmp;
- unsigned long flags;
-@@ -882,6 +800,7 @@ static struct epitem *ep_find(struct eve
-
- return epir;
- }
-+EXPORT_SYMBOL_GPL(ep_find);
-
-
- /*
-@@ -900,13 +819,13 @@ static void ep_use_epitem(struct epitem
- * has finished using the structure. It might lead to freeing the
- * structure itself if the count goes to zero.
- */
--static void ep_release_epitem(struct epitem *epi)
-+void ep_release_epitem(struct epitem *epi)
- {
-
- if (atomic_dec_and_test(&epi->usecnt))
- kmem_cache_free(epi_cache, epi);
- }
--
-+EXPORT_SYMBOL_GPL(ep_release_epitem);
-
- /*
- * This is the callback that is used to add our wait queue to the
-@@ -952,7 +871,7 @@ static void ep_rbtree_insert(struct even
- }
-
-
--static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- struct file *tfile, int fd)
- {
- int error, revents, pwake = 0;
-@@ -1044,6 +963,7 @@ eexit_2:
- eexit_1:
- return error;
- }
-+EXPORT_SYMBOL_GPL(ep_insert);
-
-
- /*
-diff -upr linux-2.6.16.orig/fs/exec.c linux-2.6.16-026test009/fs/exec.c
---- linux-2.6.16.orig/fs/exec.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/exec.c 2006-04-19 15:02:12.000000000 +0400
-@@ -53,6 +53,8 @@
- #include <asm/uaccess.h>
- #include <asm/mmu_context.h>
-
-+#include <ub/ub_vmpages.h>
-+
- #ifdef CONFIG_KMOD
- #include <linux/kmod.h>
- #endif
-@@ -64,6 +66,8 @@ int suid_dumpable = 0;
- EXPORT_SYMBOL(suid_dumpable);
- /* The maximal length of core_pattern is also specified in sysctl.c */
-
-+int sysctl_at_vsyscall;
-+
- static struct linux_binfmt *formats;
- static DEFINE_RWLOCK(binfmt_lock);
-
-@@ -135,7 +139,7 @@ asmlinkage long sys_uselib(const char __
- if (!S_ISREG(nd.dentry->d_inode->i_mode))
- goto exit;
-
-- error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
-+ error = vfs_permission(&nd, MAY_READ | MAY_EXEC, NULL);
- if (error)
- goto exit;
-
-@@ -308,6 +312,10 @@ void install_arg_page(struct vm_area_str
- struct mm_struct *mm = vma->vm_mm;
- pte_t * pte;
- spinlock_t *ptl;
-+ struct page_beancounter *pb;
-+
-+ if (unlikely(pb_alloc(&pb)))
-+ goto out_nopb;
-
- if (unlikely(anon_vma_prepare(vma)))
- goto out;
-@@ -321,15 +329,21 @@ void install_arg_page(struct vm_area_str
- goto out;
- }
- inc_mm_counter(mm, anon_rss);
-+ inc_vma_rss(vma);
- lru_cache_add_active(page);
- set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
- page, vma->vm_page_prot))));
-+ pb_add_ref(page, mm, &pb);
-+ ub_unused_privvm_dec(mm, vma);
-+ pb_free(&pb);
- page_add_new_anon_rmap(page, vma, address);
- pte_unmap_unlock(pte, ptl);
-
- /* no need for flush_tlb */
- return;
- out:
-+ pb_free(&pb);
-+out_nopb:
- __free_page(page);
- force_sig(SIGKILL, current);
- }
-@@ -404,9 +418,13 @@ int setup_arg_pages(struct linux_binprm
- bprm->loader += stack_base;
- bprm->exec += stack_base;
-
-- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-+ if (ub_memory_charge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags,
-+ NULL, UB_SOFT))
-+ goto fail_charge;
-+
-+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL | __GFP_SOFT_UBC);
- if (!mpnt)
-- return -ENOMEM;
-+ goto fail_alloc;
-
- memset(mpnt, 0, sizeof(*mpnt));
-
-@@ -450,6 +468,11 @@ int setup_arg_pages(struct linux_binprm
- up_write(&mm->mmap_sem);
-
- return 0;
-+
-+fail_alloc:
-+ ub_memory_uncharge(mm, arg_size, VM_STACK_FLAGS | mm->def_flags, NULL);
-+fail_charge:
-+ return -ENOMEM;
- }
-
- EXPORT_SYMBOL(setup_arg_pages);
-@@ -471,7 +494,7 @@ static inline void free_arg_pages(struct
-
- #endif /* CONFIG_MMU */
-
--struct file *open_exec(const char *name)
-+struct file *open_exec(const char *name, struct linux_binprm *bprm)
- {
- struct nameidata nd;
- int err;
-@@ -485,9 +508,16 @@ struct file *open_exec(const char *name)
- file = ERR_PTR(-EACCES);
- if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
- S_ISREG(inode->i_mode)) {
-- int err = vfs_permission(&nd, MAY_EXEC);
-- if (!err && !(inode->i_mode & 0111))
-- err = -EACCES;
-+ int err;
-+ struct exec_perm *perm;
-+
-+ if (bprm != NULL) {
-+ perm = &bprm->perm;
-+ perm->set = 0;
-+ } else
-+ perm = NULL;
-+
-+ err = vfs_permission(&nd, MAY_EXEC, perm);
- file = ERR_PTR(err);
- if (!err) {
- file = nameidata_to_filp(&nd, O_RDONLY);
-@@ -657,7 +687,7 @@ static int de_thread(struct task_struct
- */
- if (!thread_group_leader(current)) {
- struct task_struct *parent;
-- struct dentry *proc_dentry1, *proc_dentry2;
-+ struct dentry *proc_dentry1[2], *proc_dentry2[2];
- unsigned long ptrace;
-
- /*
-@@ -671,8 +701,8 @@ static int de_thread(struct task_struct
-
- spin_lock(&leader->proc_lock);
- spin_lock(&current->proc_lock);
-- proc_dentry1 = proc_pid_unhash(current);
-- proc_dentry2 = proc_pid_unhash(leader);
-+ proc_pid_unhash(current, proc_dentry1);
-+ proc_pid_unhash(leader, proc_dentry2);
- write_lock_irq(&tasklist_lock);
-
- BUG_ON(leader->tgid != current->tgid);
-@@ -829,7 +859,7 @@ int flush_old_exec(struct linux_binprm *
- {
- char * name;
- int i, ch, retval;
-- struct files_struct *files;
-+ struct files_struct *files, *old;
- char tcomm[sizeof(current->comm)];
-
- /*
-@@ -897,6 +927,7 @@ int flush_old_exec(struct linux_binprm *
- suid_keys(current);
- current->mm->dumpable = suid_dumpable;
- }
-+ current->mm->vps_dumpable = 1;
-
- /* An exec changes our domain. We are no longer part of the thread
- group */
-@@ -909,8 +940,11 @@ int flush_old_exec(struct linux_binprm *
- return 0;
-
- mmap_failed:
-- put_files_struct(current->files);
-+ old = current->files;
-+ task_lock(current);
- current->files = files;
-+ task_unlock(current);
-+ put_files_struct(old);
- out:
- return retval;
- }
-@@ -927,13 +961,6 @@ int prepare_binprm(struct linux_binprm *
- struct inode * inode = bprm->file->f_dentry->d_inode;
- int retval;
-
-- mode = inode->i_mode;
-- /*
-- * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-- * generic_permission lets a non-executable through
-- */
-- if (!(mode & 0111)) /* with at least _one_ execute bit set */
-- return -EACCES;
- if (bprm->file->f_op == NULL)
- return -EACCES;
-
-@@ -941,10 +968,24 @@ int prepare_binprm(struct linux_binprm *
- bprm->e_gid = current->egid;
-
- if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
-+ if (!bprm->perm.set) {
-+ /*
-+ * This piece of code creates a time window between
-+ * MAY_EXEC permission check and setuid/setgid
-+ * operations and may be considered as a security hole.
-+ * This code is here for compatibility reasons,
-+ * if the filesystem is unable to return info now.
-+ */
-+ bprm->perm.mode = inode->i_mode;
-+ bprm->perm.uid = inode->i_uid;
-+ bprm->perm.gid = inode->i_gid;
-+ }
-+ mode = bprm->perm.mode;
-+
- /* Set-uid? */
- if (mode & S_ISUID) {
- current->personality &= ~PER_CLEAR_ON_SETID;
-- bprm->e_uid = inode->i_uid;
-+ bprm->e_uid = bprm->perm.uid;
- }
-
- /* Set-gid? */
-@@ -955,7 +996,7 @@ int prepare_binprm(struct linux_binprm *
- */
- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- current->personality &= ~PER_CLEAR_ON_SETID;
-- bprm->e_gid = inode->i_gid;
-+ bprm->e_gid = bprm->perm.gid;
- }
- }
-
-@@ -1054,7 +1095,7 @@ int search_binary_handler(struct linux_b
-
- loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-
-- file = open_exec("/sbin/loader");
-+ file = open_exec("/sbin/loader", bprm);
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- return retval;
-@@ -1148,7 +1189,7 @@ int do_execve(char * filename,
- goto out_ret;
- memset(bprm, 0, sizeof(*bprm));
-
-- file = open_exec(filename);
-+ file = open_exec(filename, bprm);
- retval = PTR_ERR(file);
- if (IS_ERR(file))
- goto out_kfree;
-@@ -1288,7 +1329,7 @@ static void format_corename(char *corena
- case 'p':
- pid_in_pattern = 1;
- rc = snprintf(out_ptr, out_end - out_ptr,
-- "%d", current->tgid);
-+ "%d", virt_tgid(current));
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
-@@ -1332,7 +1373,7 @@ static void format_corename(char *corena
- case 'h':
- down_read(&uts_sem);
- rc = snprintf(out_ptr, out_end - out_ptr,
-- "%s", system_utsname.nodename);
-+ "%s", ve_utsname.nodename);
- up_read(&uts_sem);
- if (rc > out_end - out_ptr)
- goto out;
-@@ -1360,7 +1401,7 @@ static void format_corename(char *corena
- if (!pid_in_pattern
- && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
- rc = snprintf(out_ptr, out_end - out_ptr,
-- ".%d", current->tgid);
-+ ".%d", virt_tgid(current));
- if (rc > out_end - out_ptr)
- goto out;
- out_ptr += rc;
-@@ -1386,7 +1427,7 @@ static void zap_threads (struct mm_struc
- }
-
- read_lock(&tasklist_lock);
-- do_each_thread(g,p)
-+ do_each_thread_ve(g,p)
- if (mm == p->mm && p != tsk) {
- force_sig_specific(SIGKILL, p);
- mm->core_waiters++;
-@@ -1394,7 +1435,7 @@ static void zap_threads (struct mm_struc
- unlikely(p->parent->mm == mm))
- traced = 1;
- }
-- while_each_thread(g,p);
-+ while_each_thread_ve(g,p);
-
- read_unlock(&tasklist_lock);
-
-@@ -1406,12 +1447,12 @@ static void zap_threads (struct mm_struc
- * coredump to finish. Detach them so they can both die.
- */
- write_lock_irq(&tasklist_lock);
-- do_each_thread(g,p) {
-+ do_each_thread_ve(g,p) {
- if (mm == p->mm && p != tsk &&
- p->ptrace && p->parent->mm == mm) {
- __ptrace_detach(p, 0);
- }
-- } while_each_thread(g,p);
-+ } while_each_thread_ve(g,p);
- write_unlock_irq(&tasklist_lock);
- }
- }
-@@ -1447,7 +1488,8 @@ int do_coredump(long signr, int exit_cod
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- down_write(&mm->mmap_sem);
-- if (!mm->dumpable) {
-+ if (!mm->dumpable ||
-+ (!mm->vps_dumpable && !ve_is_super(get_exec_env()))) {
- up_write(&mm->mmap_sem);
- goto fail;
- }
-diff -upr linux-2.6.16.orig/fs/ext2/acl.c linux-2.6.16-026test009/fs/ext2/acl.c
---- linux-2.6.16.orig/fs/ext2/acl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext2/acl.c 2006-04-19 15:02:11.000000000 +0400
-@@ -294,9 +294,10 @@ ext2_check_acl(struct inode *inode, int
- }
-
- int
--ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
-+ext2_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
-- return generic_permission(inode, mask, ext2_check_acl);
-+ return generic_permission(inode, mask, ext2_check_acl, perm);
- }
-
- /*
-diff -upr linux-2.6.16.orig/fs/ext2/acl.h linux-2.6.16-026test009/fs/ext2/acl.h
---- linux-2.6.16.orig/fs/ext2/acl.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext2/acl.h 2006-04-19 15:02:11.000000000 +0400
-@@ -58,7 +58,8 @@ static inline int ext2_acl_count(size_t
- #define EXT2_ACL_NOT_CACHED ((void *)-1)
-
- /* acl.c */
--extern int ext2_permission (struct inode *, int, struct nameidata *);
-+extern int ext2_permission (struct inode *, int, struct nameidata *,
-+ struct exec_perm *);
- extern int ext2_acl_chmod (struct inode *);
- extern int ext2_init_acl (struct inode *, struct inode *);
-
-diff -upr linux-2.6.16.orig/fs/ext2/namei.c linux-2.6.16-026test009/fs/ext2/namei.c
---- linux-2.6.16.orig/fs/ext2/namei.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext2/namei.c 2006-04-19 15:02:12.000000000 +0400
-@@ -31,6 +31,7 @@
- */
-
- #include <linux/pagemap.h>
-+#include <linux/quotaops.h>
- #include "ext2.h"
- #include "xattr.h"
- #include "acl.h"
-@@ -273,6 +274,8 @@ static int ext2_unlink(struct inode * di
- struct page * page;
- int err = -ENOENT;
-
-+ DQUOT_INIT(inode);
-+
- de = ext2_find_entry (dir, dentry, &page);
- if (!de)
- goto out;
-@@ -315,6 +318,9 @@ static int ext2_rename (struct inode * o
- struct ext2_dir_entry_2 * old_de;
- int err = -ENOENT;
-
-+ if (new_inode)
-+ DQUOT_INIT(new_inode);
-+
- old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
- if (!old_de)
- goto out;
-diff -upr linux-2.6.16.orig/fs/ext2/super.c linux-2.6.16-026test009/fs/ext2/super.c
---- linux-2.6.16.orig/fs/ext2/super.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext2/super.c 2006-04-19 15:02:12.000000000 +0400
-@@ -996,7 +996,7 @@ static int ext2_remount (struct super_bl
- es = sbi->s_es;
- if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
- (old_mount_opt & EXT2_MOUNT_XIP)) &&
-- invalidate_inodes(sb))
-+ invalidate_inodes(sb, 0))
- ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
- "xip remain in cache (no functional problem)");
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
-@@ -1205,7 +1205,7 @@ static struct file_system_type ext2_fs_t
- .name = "ext2",
- .get_sb = ext2_get_sb,
- .kill_sb = kill_block_super,
-- .fs_flags = FS_REQUIRES_DEV,
-+ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
- };
-
- static int __init init_ext2_fs(void)
-diff -upr linux-2.6.16.orig/fs/ext3/acl.c linux-2.6.16-026test009/fs/ext3/acl.c
---- linux-2.6.16.orig/fs/ext3/acl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext3/acl.c 2006-04-19 15:02:11.000000000 +0400
-@@ -299,9 +299,10 @@ ext3_check_acl(struct inode *inode, int
- }
-
- int
--ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
-+ext3_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
-- return generic_permission(inode, mask, ext3_check_acl);
-+ return generic_permission(inode, mask, ext3_check_acl, perm);
- }
-
- /*
-diff -upr linux-2.6.16.orig/fs/ext3/acl.h linux-2.6.16-026test009/fs/ext3/acl.h
---- linux-2.6.16.orig/fs/ext3/acl.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext3/acl.h 2006-04-19 15:02:11.000000000 +0400
-@@ -58,7 +58,8 @@ static inline int ext3_acl_count(size_t
- #define EXT3_ACL_NOT_CACHED ((void *)-1)
-
- /* acl.c */
--extern int ext3_permission (struct inode *, int, struct nameidata *);
-+extern int ext3_permission (struct inode *, int, struct nameidata *,
-+ struct exec_perm *);
- extern int ext3_acl_chmod (struct inode *);
- extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
-
-diff -upr linux-2.6.16.orig/fs/ext3/resize.c linux-2.6.16-026test009/fs/ext3/resize.c
---- linux-2.6.16.orig/fs/ext3/resize.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext3/resize.c 2006-04-19 15:02:11.000000000 +0400
-@@ -974,6 +974,7 @@ int ext3_group_extend(struct super_block
- if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
- ext3_warning(sb, __FUNCTION__,
- "multiple resizers run on filesystem!");
-+ unlock_super(sb);
- err = -EBUSY;
- goto exit_put;
- }
-diff -upr linux-2.6.16.orig/fs/ext3/super.c linux-2.6.16-026test009/fs/ext3/super.c
---- linux-2.6.16.orig/fs/ext3/super.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ext3/super.c 2006-04-19 15:02:12.000000000 +0400
-@@ -2661,7 +2661,7 @@ static struct file_system_type ext3_fs_t
- .name = "ext3",
- .get_sb = ext3_get_sb,
- .kill_sb = kill_block_super,
-- .fs_flags = FS_REQUIRES_DEV,
-+ .fs_flags = FS_REQUIRES_DEV | FS_VIRTUALIZED,
- };
-
- static int __init init_ext3_fs(void)
-diff -upr linux-2.6.16.orig/fs/fcntl.c linux-2.6.16-026test009/fs/fcntl.c
---- linux-2.6.16.orig/fs/fcntl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/fcntl.c 2006-04-19 15:02:12.000000000 +0400
-@@ -18,6 +18,7 @@
- #include <linux/ptrace.h>
- #include <linux/signal.h>
- #include <linux/rcupdate.h>
-+#include <linux/ve_owner.h>
-
- #include <asm/poll.h>
- #include <asm/siginfo.h>
-@@ -190,6 +191,7 @@ out_fput:
- fput(file);
- goto out;
- }
-+EXPORT_SYMBOL_GPL(sys_dup2);
-
- asmlinkage long sys_dup(unsigned int fildes)
- {
-@@ -254,6 +256,7 @@ static int setfl(int fd, struct file * f
- static void f_modown(struct file *filp, unsigned long pid,
- uid_t uid, uid_t euid, int force)
- {
-+ pid = comb_vpid_to_pid(pid);
- write_lock_irq(&filp->f_owner.lock);
- if (force || !filp->f_owner.pid) {
- filp->f_owner.pid = pid;
-@@ -320,7 +323,7 @@ static long do_fcntl(int fd, unsigned in
- * current syscall conventions, the only way
- * to fix this will be in libc.
- */
-- err = filp->f_owner.pid;
-+ err = comb_pid_to_vpid(filp->f_owner.pid);
- force_successful_syscall_return();
- break;
- case F_SETOWN:
-@@ -472,23 +475,29 @@ static void send_sigio_to_task(struct ta
- void send_sigio(struct fown_struct *fown, int fd, int band)
- {
- struct task_struct *p;
-+ struct file *f;
-+ struct ve_struct *ve;
- int pid;
-
- read_lock(&fown->lock);
- pid = fown->pid;
- if (!pid)
- goto out_unlock_fown;
-+
-+ /* hack: fown's are always embedded in struct file */
-+ f = container_of(fown, struct file, f_owner);
-+ ve = VE_OWNER_FILP(f);
-
- read_lock(&tasklist_lock);
- if (pid > 0) {
-- p = find_task_by_pid(pid);
-- if (p) {
-+ p = find_task_by_pid_all(pid);
-+ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
- send_sigio_to_task(p, fown, fd, band);
- }
- } else {
-- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
-+ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
- send_sigio_to_task(p, fown, fd, band);
-- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
-+ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
- }
- read_unlock(&tasklist_lock);
- out_unlock_fown:
-@@ -505,6 +514,8 @@ static void send_sigurg_to_task(struct t
- int send_sigurg(struct fown_struct *fown)
- {
- struct task_struct *p;
-+ struct file *f;
-+ struct ve_struct *ve;
- int pid, ret = 0;
-
- read_lock(&fown->lock);
-@@ -513,17 +524,19 @@ int send_sigurg(struct fown_struct *fown
- goto out_unlock_fown;
-
- ret = 1;
-+ f = container_of(fown, struct file, f_owner);
-+ ve = VE_OWNER_FILP(f);
-
- read_lock(&tasklist_lock);
- if (pid > 0) {
-- p = find_task_by_pid(pid);
-- if (p) {
-+ p = find_task_by_pid_all(pid);
-+ if (p && ve_accessible(VE_TASK_INFO(p)->owner_env, ve)) {
- send_sigurg_to_task(p, fown);
- }
- } else {
-- do_each_task_pid(-pid, PIDTYPE_PGID, p) {
-+ __do_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve) {
- send_sigurg_to_task(p, fown);
-- } while_each_task_pid(-pid, PIDTYPE_PGID, p);
-+ } __while_each_task_pid_ve(-pid, PIDTYPE_PGID, p, ve);
- }
- read_unlock(&tasklist_lock);
- out_unlock_fown:
-diff -upr linux-2.6.16.orig/fs/file.c linux-2.6.16-026test009/fs/file.c
---- linux-2.6.16.orig/fs/file.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/file.c 2006-04-19 15:02:12.000000000 +0400
-@@ -8,6 +8,7 @@
-
- #include <linux/fs.h>
- #include <linux/mm.h>
-+#include <linux/module.h>
- #include <linux/time.h>
- #include <linux/slab.h>
- #include <linux/vmalloc.h>
-@@ -18,6 +19,8 @@
- #include <linux/rcupdate.h>
- #include <linux/workqueue.h>
-
-+#include <ub/ub_mem.h>
-+
- struct fdtable_defer {
- spinlock_t lock;
- struct work_struct wq;
-@@ -44,9 +47,9 @@ struct file ** alloc_fd_array(int num)
- int size = num * sizeof(struct file *);
-
- if (size <= PAGE_SIZE)
-- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
-+ new_fds = (struct file **) ub_kmalloc(size, GFP_KERNEL);
- else
-- new_fds = (struct file **) vmalloc(size);
-+ new_fds = (struct file **) ub_vmalloc(size);
- return new_fds;
- }
-
-@@ -212,9 +215,9 @@ fd_set * alloc_fdset(int num)
- int size = num / 8;
-
- if (size <= PAGE_SIZE)
-- new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
-+ new_fdset = (fd_set *) ub_kmalloc(size, GFP_KERNEL);
- else
-- new_fdset = (fd_set *) vmalloc(size);
-+ new_fdset = (fd_set *) ub_vmalloc(size);
- return new_fdset;
- }
-
-@@ -302,7 +305,7 @@ out:
- * both fd array and fdset. It is expected to be called with the
- * files_lock held.
- */
--static int expand_fdtable(struct files_struct *files, int nr)
-+int expand_fdtable(struct files_struct *files, int nr)
- __releases(files->file_lock)
- __acquires(files->file_lock)
- {
-@@ -338,6 +341,7 @@ static int expand_fdtable(struct files_s
- out:
- return error;
- }
-+EXPORT_SYMBOL_GPL(expand_fdtable);
-
- /*
- * Expand files.
-diff -upr linux-2.6.16.orig/fs/file_table.c linux-2.6.16-026test009/fs/file_table.c
---- linux-2.6.16.orig/fs/file_table.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/file_table.c 2006-04-19 15:02:12.000000000 +0400
-@@ -9,6 +9,7 @@
- #include <linux/string.h>
- #include <linux/slab.h>
- #include <linux/file.h>
-+#include <linux/ve_owner.h>
- #include <linux/init.h>
- #include <linux/module.h>
- #include <linux/smp_lock.h>
-@@ -25,6 +26,8 @@
-
- #include <asm/atomic.h>
-
-+#include <ub/ub_misc.h>
-+
- /* sysctl tunables... */
- struct files_stat_struct files_stat = {
- .max_files = NR_FILE
-@@ -38,6 +41,8 @@ static struct percpu_counter nr_files __
- static inline void file_free_rcu(struct rcu_head *head)
- {
- struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
-+ ub_file_uncharge(f);
-+ put_ve(VE_OWNER_FILP(f));
- kmem_cache_free(filp_cachep, f);
- }
-
-@@ -109,6 +114,12 @@ struct file *get_empty_filp(void)
-
- percpu_counter_inc(&nr_files);
- memset(f, 0, sizeof(*f));
-+
-+ if (ub_file_charge(f))
-+ goto fail_ch;
-+
-+ SET_VE_OWNER_FILP(f, get_ve(get_exec_env()));
-+
- if (security_file_alloc(f))
- goto fail_sec;
-
-@@ -134,6 +145,10 @@ fail_sec:
- file_free(f);
- fail:
- return NULL;
-+
-+fail_ch:
-+ kmem_cache_free(filp_cachep, f);
-+ return NULL;
- }
-
- EXPORT_SYMBOL(get_empty_filp);
-diff -upr linux-2.6.16.orig/fs/filesystems.c linux-2.6.16-026test009/fs/filesystems.c
---- linux-2.6.16.orig/fs/filesystems.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/filesystems.c 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,7 @@
- #include <linux/init.h>
- #include <linux/module.h>
- #include <linux/sched.h> /* for 'current' */
-+#include <linux/ve_owner.h>
- #include <asm/uaccess.h>
-
- /*
-@@ -22,8 +23,8 @@
- * During the unload module must call unregister_filesystem().
- * We can access the fields of list element if:
- * 1) spinlock is held or
-- * 2) we hold the reference to the module.
-- * The latter can be guaranteed by call of try_module_get(); if it
-+ * 2) we hold the reference to the element.
-+ * The latter can be guaranteed by call of try_filesystem(); if it
- * returned 0 we must skip the element, otherwise we got the reference.
- * Once the reference is obtained we can drop the spinlock.
- */
-@@ -31,23 +32,51 @@
- static struct file_system_type *file_systems;
- static DEFINE_RWLOCK(file_systems_lock);
-
-+int try_get_filesystem(struct file_system_type *fs)
-+{
-+ if (try_module_get(fs->owner)) {
-+#ifdef CONFIG_VE
-+ get_ve(VE_OWNER_FSTYPE(fs));
-+#endif
-+ return 1;
-+ }
-+ return 0;
-+}
-+
- /* WARNING: This can be used only if we _already_ own a reference */
- void get_filesystem(struct file_system_type *fs)
- {
-+#ifdef CONFIG_VE
-+ get_ve(VE_OWNER_FSTYPE(fs));
-+#endif
- __module_get(fs->owner);
- }
-
- void put_filesystem(struct file_system_type *fs)
- {
- module_put(fs->owner);
-+#ifdef CONFIG_VE
-+ put_ve(VE_OWNER_FSTYPE(fs));
-+#endif
-+}
-+
-+static inline int check_ve_fstype(struct file_system_type *p,
-+ struct ve_struct *env)
-+{
-+ return ((p->fs_flags & FS_VIRTUALIZED) ||
-+ ve_accessible_strict(VE_OWNER_FSTYPE(p), env));
- }
-
--static struct file_system_type **find_filesystem(const char *name)
-+static struct file_system_type **find_filesystem(const char *name,
-+ struct ve_struct *env)
- {
- struct file_system_type **p;
-- for (p=&file_systems; *p; p=&(*p)->next)
-+ for (p=&file_systems; *p; p=&(*p)->next) {
-+ if (!check_ve_fstype(*p, env))
-+ continue;
- if (strcmp((*p)->name,name) == 0)
- break;
-+ }
- return p;
- }
-
-@@ -74,8 +103,10 @@ int register_filesystem(struct file_syst
- if (fs->next)
- return -EBUSY;
- INIT_LIST_HEAD(&fs->fs_supers);
-+ if (VE_OWNER_FSTYPE(fs) == NULL)
-+ SET_VE_OWNER_FSTYPE(fs, get_ve0());
- write_lock(&file_systems_lock);
-- p = find_filesystem(fs->name);
-+ p = find_filesystem(fs->name, VE_OWNER_FSTYPE(fs));
- if (*p)
- res = -EBUSY;
- else
-@@ -132,11 +163,14 @@ static int fs_index(const char __user *
-
- err = -EINVAL;
- read_lock(&file_systems_lock);
-- for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
-+ for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next) {
-+ if (!check_ve_fstype(tmp, get_exec_env()))
-+ continue;
- if (strcmp(tmp->name,name) == 0) {
- err = index;
- break;
- }
-+ index++;
- }
- read_unlock(&file_systems_lock);
- putname(name);
-@@ -149,9 +183,15 @@ static int fs_name(unsigned int index, c
- int len, res;
-
- read_lock(&file_systems_lock);
-- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
-- if (index <= 0 && try_module_get(tmp->owner))
-- break;
-+ for (tmp = file_systems; tmp; tmp = tmp->next) {
-+ if (!check_ve_fstype(tmp, get_exec_env()))
-+ continue;
-+ if (!index) {
-+ if (try_get_filesystem(tmp))
-+ break;
-+ } else
-+ index--;
-+ }
- read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
-@@ -169,8 +209,9 @@ static int fs_maxindex(void)
- int index;
-
- read_lock(&file_systems_lock);
-- for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
-- ;
-+ for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next)
-+ if (check_ve_fstype(tmp, get_exec_env()))
-+ index++;
- read_unlock(&file_systems_lock);
- return index;
- }
-@@ -206,9 +247,10 @@ int get_filesystem_list(char * buf)
- read_lock(&file_systems_lock);
- tmp = file_systems;
- while (tmp && len < PAGE_SIZE - 80) {
-- len += sprintf(buf+len, "%s\t%s\n",
-- (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-- tmp->name);
-+ if (check_ve_fstype(tmp, get_exec_env()))
-+ len += sprintf(buf+len, "%s\t%s\n",
-+ (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-+ tmp->name);
- tmp = tmp->next;
- }
- read_unlock(&file_systems_lock);
-@@ -220,14 +262,14 @@ struct file_system_type *get_fs_type(con
- struct file_system_type *fs;
-
- read_lock(&file_systems_lock);
-- fs = *(find_filesystem(name));
-- if (fs && !try_module_get(fs->owner))
-+ fs = *(find_filesystem(name, get_exec_env()));
-+ if (fs && !try_get_filesystem(fs))
- fs = NULL;
- read_unlock(&file_systems_lock);
- if (!fs && (request_module("%s", name) == 0)) {
- read_lock(&file_systems_lock);
-- fs = *(find_filesystem(name));
-- if (fs && !try_module_get(fs->owner))
-+ fs = *(find_filesystem(name, get_exec_env()));
-+ if (fs && !try_get_filesystem(fs))
- fs = NULL;
- read_unlock(&file_systems_lock);
- }
-@@ -235,3 +277,5 @@ struct file_system_type *get_fs_type(con
- }
-
- EXPORT_SYMBOL(get_fs_type);
-+EXPORT_SYMBOL(get_filesystem);
-+EXPORT_SYMBOL(put_filesystem);
-diff -upr linux-2.6.16.orig/fs/fuse/dir.c linux-2.6.16-026test009/fs/fuse/dir.c
---- linux-2.6.16.orig/fs/fuse/dir.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/fuse/dir.c 2006-04-19 15:02:11.000000000 +0400
-@@ -708,14 +708,15 @@ static int fuse_access(struct inode *ino
- * access request is sent. Execute permission is still checked
- * locally based on file mode.
- */
--static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
-+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- struct fuse_conn *fc = get_fuse_conn(inode);
-
- if (!fuse_allow_task(fc, current))
- return -EACCES;
- else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-- int err = generic_permission(inode, mask, NULL);
-+ int err = generic_permission(inode, mask, NULL, perm);
-
- /* If permission is denied, try to refresh file
- attributes. This is also needed, because the root
-@@ -723,7 +724,7 @@ static int fuse_permission(struct inode
- if (err == -EACCES) {
- err = fuse_do_getattr(inode);
- if (!err)
-- err = generic_permission(inode, mask, NULL);
-+ err = generic_permission(inode, mask, NULL, perm);
- }
-
- /* Note: the opposite of the above test does not
-diff -upr linux-2.6.16.orig/fs/fuse/file.c linux-2.6.16-026test009/fs/fuse/file.c
---- linux-2.6.16.orig/fs/fuse/file.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/fuse/file.c 2006-04-19 15:02:11.000000000 +0400
-@@ -397,8 +397,12 @@ static int fuse_readpages(struct file *f
- return -EINTR;
-
- err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
-- if (!err)
-- fuse_send_readpages(data.req, file, inode);
-+ if (!err) {
-+ if (data.req->num_pages)
-+ fuse_send_readpages(data.req, file, inode);
-+ else
-+ fuse_put_request(fc, data.req);
-+ }
- return err;
- }
-
-diff -upr linux-2.6.16.orig/fs/hfs/inode.c linux-2.6.16-026test009/fs/hfs/inode.c
---- linux-2.6.16.orig/fs/hfs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/hfs/inode.c 2006-04-19 15:02:11.000000000 +0400
-@@ -520,11 +520,11 @@ void hfs_clear_inode(struct inode *inode
- }
-
- static int hfs_permission(struct inode *inode, int mask,
-- struct nameidata *nd)
-+ struct nameidata *nd, struct exec_perm *perm)
- {
- if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
- return 0;
-- return generic_permission(inode, mask, NULL);
-+ return generic_permission(inode, mask, NULL, perm);
- }
-
- static int hfs_file_open(struct inode *inode, struct file *file)
-diff -upr linux-2.6.16.orig/fs/hfsplus/inode.c linux-2.6.16-026test009/fs/hfsplus/inode.c
---- linux-2.6.16.orig/fs/hfsplus/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/hfsplus/inode.c 2006-04-19 15:02:11.000000000 +0400
-@@ -237,7 +237,8 @@ static void hfsplus_set_perms(struct ino
- perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
- }
-
--static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
-+static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
- * open_exec has the same test, so it's still not executable, if a x bit
-@@ -245,7 +246,7 @@ static int hfsplus_permission(struct ino
- */
- if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
- return 0;
-- return generic_permission(inode, mask, NULL);
-+ return generic_permission(inode, mask, NULL, perm);
- }
-
-
-diff -upr linux-2.6.16.orig/fs/hostfs/hostfs_kern.c linux-2.6.16-026test009/fs/hostfs/hostfs_kern.c
---- linux-2.6.16.orig/fs/hostfs/hostfs_kern.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/hostfs/hostfs_kern.c 2006-04-19 15:02:11.000000000 +0400
-@@ -796,7 +796,8 @@ int hostfs_rename(struct inode *from_ino
- return(err);
- }
-
--int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
-+int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- char *name;
- int r = 0, w = 0, x = 0, err;
-@@ -814,7 +815,7 @@ int hostfs_permission(struct inode *ino,
- err = access_file(name, r, w, x);
- kfree(name);
- if(!err)
-- err = generic_permission(ino, desired, NULL);
-+ err = generic_permission(ino, desired, NULL, perm);
- return err;
- }
-
-diff -upr linux-2.6.16.orig/fs/hpfs/namei.c linux-2.6.16-026test009/fs/hpfs/namei.c
---- linux-2.6.16.orig/fs/hpfs/namei.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/hpfs/namei.c 2006-04-19 15:02:11.000000000 +0400
-@@ -415,7 +415,7 @@ again:
- d_drop(dentry);
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1 ||
-- permission(inode, MAY_WRITE, NULL) ||
-+ permission(inode, MAY_WRITE, NULL, NULL) ||
- !S_ISREG(inode->i_mode) ||
- get_write_access(inode)) {
- spin_unlock(&dentry->d_lock);
-diff -upr linux-2.6.16.orig/fs/hugetlbfs/inode.c linux-2.6.16-026test009/fs/hugetlbfs/inode.c
---- linux-2.6.16.orig/fs/hugetlbfs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/hugetlbfs/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -800,7 +800,7 @@ struct file *hugetlb_zero_setup(size_t s
- struct inode *inode;
- struct dentry *dentry, *root;
- struct qstr quick_string;
-- char buf[16];
-+ char buf[64];
-
- if (!can_do_hugetlb_shm())
- return ERR_PTR(-EPERM);
-@@ -812,7 +812,8 @@ struct file *hugetlb_zero_setup(size_t s
- return ERR_PTR(-ENOMEM);
-
- root = hugetlbfs_vfsmount->mnt_root;
-- snprintf(buf, 16, "%lu", hugetlbfs_counter());
-+ snprintf(buf, sizeof(buf), "VE%d-%lu",
-+ get_exec_env()->veid, hugetlbfs_counter());
- quick_string.name = buf;
- quick_string.len = strlen(quick_string.name);
- quick_string.hash = 0;
-diff -upr linux-2.6.16.orig/fs/inode.c linux-2.6.16-026test009/fs/inode.c
---- linux-2.6.16.orig/fs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -9,6 +9,7 @@
- #include <linux/mm.h>
- #include <linux/dcache.h>
- #include <linux/init.h>
-+#include <linux/kernel_stat.h>
- #include <linux/quotaops.h>
- #include <linux/slab.h>
- #include <linux/writeback.h>
-@@ -98,13 +99,15 @@ DECLARE_MUTEX(iprune_sem);
- */
- struct inodes_stat_t inodes_stat;
-
--static kmem_cache_t * inode_cachep;
-+kmem_cache_t *inode_cachep;
-+
-+static struct address_space_operations vfs_empty_aops;
-+struct inode_operations vfs_empty_iops;
-+static struct file_operations vfs_empty_fops;
-+EXPORT_SYMBOL(vfs_empty_iops);
-
- static struct inode *alloc_inode(struct super_block *sb)
- {
-- static struct address_space_operations empty_aops;
-- static struct inode_operations empty_iops;
-- static struct file_operations empty_fops;
- struct inode *inode;
-
- if (sb->s_op->alloc_inode)
-@@ -119,8 +122,8 @@ static struct inode *alloc_inode(struct
- inode->i_blkbits = sb->s_blocksize_bits;
- inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
-- inode->i_op = &empty_iops;
-- inode->i_fop = &empty_fops;
-+ inode->i_op = &vfs_empty_iops;
-+ inode->i_fop = &vfs_empty_fops;
- inode->i_nlink = 1;
- atomic_set(&inode->i_writecount, 0);
- inode->i_size = 0;
-@@ -144,7 +147,7 @@ static struct inode *alloc_inode(struct
- return NULL;
- }
-
-- mapping->a_ops = &empty_aops;
-+ mapping->a_ops = &vfs_empty_aops;
- mapping->host = inode;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
-@@ -303,13 +306,57 @@ static void dispose_list(struct list_hea
- spin_unlock(&inode_lock);
- }
-
-+static void show_header(struct inode *inode)
-+{
-+ struct super_block *sb = inode->i_sb;
-+
-+ printk("VFS: Busy inodes after unmount. "
-+ "sb = %p, fs type = %s, sb count = %d, "
-+ "sb->s_root = %s\n", sb,
-+ (sb->s_type != NULL) ? sb->s_type->name : "",
-+ sb->s_count,
-+ (sb->s_root != NULL) ?
-+ (char *)sb->s_root->d_name.name : "");
-+}
-+
-+static void show_inode(struct list_head *tmp, struct inode *inode)
-+{
-+ struct dentry *d;
-+ int i;
-+
-+ printk("inode = %p, inode->i_count = %d, "
-+ "inode->i_nlink = %d, "
-+ "inode->i_mode = %d, "
-+ "inode->i_state = %ld, "
-+ "inode->i_flags = %d, "
-+ "inode->i_devices.next = %p, "
-+ "inode->i_devices.prev = %p, "
-+ "inode->i_ino = %ld\n",
-+ tmp,
-+ atomic_read(&inode->i_count),
-+ inode->i_nlink,
-+ inode->i_mode,
-+ inode->i_state,
-+ inode->i_flags,
-+ inode->i_devices.next,
-+ inode->i_devices.prev,
-+ inode->i_ino);
-+ printk("inode dump: ");
-+ for (i = 0; i < sizeof(*tmp); i++)
-+ printk("%2.2x ", *((u_char *)tmp + i));
-+ printk("\n");
-+ list_for_each_entry(d, &inode->i_dentry, d_alias)
-+ printk(" d_alias %s\n",
-+ d->d_name.name);
-+}
-+
- /*
- * Invalidate all inodes for a device.
- */
--static int invalidate_list(struct list_head *head, struct list_head *dispose)
-+static int invalidate_list(struct list_head *head, struct list_head *dispose, int check)
- {
- struct list_head *next;
-- int busy = 0, count = 0;
-+ int busy = 0, count = 0, once = 0;
-
- next = head->next;
- for (;;) {
-@@ -336,6 +383,14 @@ static int invalidate_list(struct list_h
- continue;
- }
- busy = 1;
-+
-+ if (check) {
-+ if (once) {
-+ once = 0;
-+ show_header(inode);
-+ }
-+ show_inode(tmp, inode);
-+ }
- }
- /* only unused inodes may be cached with i_count zero */
- inodes_stat.nr_unused -= count;
-@@ -350,7 +405,7 @@ static int invalidate_list(struct list_h
- * fails because there are busy inodes then a non zero value is returned.
- * If the discard is successful all the inodes have been discarded.
- */
--int invalidate_inodes(struct super_block * sb)
-+int invalidate_inodes(struct super_block * sb, int check)
- {
- int busy;
- LIST_HEAD(throw_away);
-@@ -358,7 +413,7 @@ int invalidate_inodes(struct super_block
- down(&iprune_sem);
- spin_lock(&inode_lock);
- inotify_unmount_inodes(&sb->s_inodes);
-- busy = invalidate_list(&sb->s_inodes, &throw_away);
-+ busy = invalidate_list(&sb->s_inodes, &throw_away, check);
- spin_unlock(&inode_lock);
-
- dispose_list(&throw_away);
-@@ -382,7 +437,7 @@ int __invalidate_device(struct block_dev
- * hold).
- */
- shrink_dcache_sb(sb);
-- res = invalidate_inodes(sb);
-+ res = invalidate_inodes(sb, 0);
- drop_super(sb);
- }
- invalidate_bdev(bdev, 0);
-@@ -478,6 +533,7 @@ static void prune_icache(int nr_to_scan)
- */
- static int shrink_icache_memory(int nr, gfp_t gfp_mask)
- {
-+ KSTAT_PERF_ENTER(shrink_icache)
- if (nr) {
- /*
- * Nasty deadlock avoidance. We may hold various FS locks,
-@@ -488,6 +544,7 @@ static int shrink_icache_memory(int nr,
- return -1;
- prune_icache(nr);
- }
-+ KSTAT_PERF_LEAVE(shrink_icache)
- return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
- }
-
-@@ -737,7 +794,7 @@ EXPORT_SYMBOL(iunique);
- struct inode *igrab(struct inode *inode)
- {
- spin_lock(&inode_lock);
-- if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
-+ if (inode && !(inode->i_state & (I_FREEING|I_WILL_FREE)))
- __iget(inode);
- else
- /*
-diff -upr linux-2.6.16.orig/fs/inotify.c linux-2.6.16-026test009/fs/inotify.c
---- linux-2.6.16.orig/fs/inotify.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/inotify.c 2006-04-19 15:02:11.000000000 +0400
-@@ -374,7 +374,7 @@ static int find_inode(const char __user
- if (error)
- return error;
- /* you can only watch an inode if you have read permissions on it */
-- error = vfs_permission(nd, MAY_READ);
-+ error = vfs_permission(nd, MAY_READ, NULL);
- if (error)
- path_release(nd);
- return error;
-diff -upr linux-2.6.16.orig/fs/ioprio.c linux-2.6.16-026test009/fs/ioprio.c
---- linux-2.6.16.orig/fs/ioprio.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ioprio.c 2006-04-19 15:02:12.000000000 +0400
-@@ -53,6 +53,9 @@ asmlinkage long sys_ioprio_set(int which
- struct user_struct *user;
- int ret;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return -EPERM;
-+
- switch (class) {
- case IOPRIO_CLASS_RT:
- if (!capable(CAP_SYS_ADMIN))
-@@ -78,18 +81,18 @@ asmlinkage long sys_ioprio_set(int which
- if (!who)
- p = current;
- else
-- p = find_task_by_pid(who);
-+ p = find_task_by_pid_all(who);
- if (p)
- ret = set_task_ioprio(p, ioprio);
- break;
- case IOPRIO_WHO_PGRP:
- if (!who)
- who = process_group(current);
-- do_each_task_pid(who, PIDTYPE_PGID, p) {
-+ do_each_task_pid_all(who, PIDTYPE_PGID, p) {
- ret = set_task_ioprio(p, ioprio);
- if (ret)
- break;
-- } while_each_task_pid(who, PIDTYPE_PGID, p);
-+ } while_each_task_pid_all(who, PIDTYPE_PGID, p);
- break;
- case IOPRIO_WHO_USER:
- if (!who)
-@@ -100,13 +103,13 @@ asmlinkage long sys_ioprio_set(int which
- if (!user)
- break;
-
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- if (p->uid != who)
- continue;
- ret = set_task_ioprio(p, ioprio);
- if (ret)
- break;
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
-
- if (who)
- free_uid(user);
-@@ -131,19 +134,19 @@ asmlinkage long sys_ioprio_get(int which
- if (!who)
- p = current;
- else
-- p = find_task_by_pid(who);
-+ p = find_task_by_pid_ve(who);
- if (p)
- ret = p->ioprio;
- break;
- case IOPRIO_WHO_PGRP:
- if (!who)
- who = process_group(current);
-- do_each_task_pid(who, PIDTYPE_PGID, p) {
-+ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
- if (ret == -ESRCH)
- ret = p->ioprio;
- else
- ret = ioprio_best(ret, p->ioprio);
-- } while_each_task_pid(who, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
- break;
- case IOPRIO_WHO_USER:
- if (!who)
-@@ -154,14 +157,14 @@ asmlinkage long sys_ioprio_get(int which
- if (!user)
- break;
-
-- do_each_thread(g, p) {
-+ do_each_thread_ve(g, p) {
- if (p->uid != user->uid)
- continue;
- if (ret == -ESRCH)
- ret = p->ioprio;
- else
- ret = ioprio_best(ret, p->ioprio);
-- } while_each_thread(g, p);
-+ } while_each_thread_ve(g, p);
-
- if (who)
- free_uid(user);
-diff -upr linux-2.6.16.orig/fs/jfs/acl.c linux-2.6.16-026test009/fs/jfs/acl.c
---- linux-2.6.16.orig/fs/jfs/acl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/jfs/acl.c 2006-04-19 15:02:11.000000000 +0400
-@@ -140,9 +140,10 @@ static int jfs_check_acl(struct inode *i
- return -EAGAIN;
- }
-
--int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-+int jfs_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
-- return generic_permission(inode, mask, jfs_check_acl);
-+ return generic_permission(inode, mask, jfs_check_acl, perm);
- }
-
- int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
-diff -upr linux-2.6.16.orig/fs/jfs/jfs_acl.h linux-2.6.16-026test009/fs/jfs/jfs_acl.h
---- linux-2.6.16.orig/fs/jfs/jfs_acl.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/jfs/jfs_acl.h 2006-04-19 15:02:11.000000000 +0400
-@@ -20,7 +20,7 @@
-
- #ifdef CONFIG_JFS_POSIX_ACL
-
--int jfs_permission(struct inode *, int, struct nameidata *);
-+int jfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
- int jfs_init_acl(tid_t, struct inode *, struct inode *);
- int jfs_setattr(struct dentry *, struct iattr *);
-
-diff -upr linux-2.6.16.orig/fs/lockd/clntproc.c linux-2.6.16-026test009/fs/lockd/clntproc.c
---- linux-2.6.16.orig/fs/lockd/clntproc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/lockd/clntproc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -130,10 +130,10 @@ static void nlmclnt_setlockargs(struct n
- nlmclnt_next_cookie(&argp->cookie);
- argp->state = nsm_local_state;
- memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-- lock->caller = system_utsname.nodename;
-+ lock->caller = ve_utsname.nodename;
- lock->oh.data = req->a_owner;
- lock->oh.len = sprintf(req->a_owner, "%d@%s",
-- current->pid, system_utsname.nodename);
-+ current->pid, ve_utsname.nodename);
- locks_copy_lock(&lock->fl, fl);
- }
-
-@@ -154,7 +154,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
- {
- locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
- memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-- call->a_args.lock.caller = system_utsname.nodename;
-+ call->a_args.lock.caller = ve_utsname.nodename;
- call->a_args.lock.oh.len = lock->oh.len;
-
- /* set default data area */
-diff -upr linux-2.6.16.orig/fs/lockd/mon.c linux-2.6.16-026test009/fs/lockd/mon.c
---- linux-2.6.16.orig/fs/lockd/mon.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/lockd/mon.c 2006-04-19 15:02:12.000000000 +0400
-@@ -147,7 +147,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
- */
- sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
- if (!(p = xdr_encode_string(p, buffer))
-- || !(p = xdr_encode_string(p, system_utsname.nodename)))
-+ || !(p = xdr_encode_string(p, ve_utsname.nodename)))
- return ERR_PTR(-EIO);
- *p++ = htonl(argp->prog);
- *p++ = htonl(argp->vers);
-diff -upr linux-2.6.16.orig/fs/locks.c linux-2.6.16-026test009/fs/locks.c
---- linux-2.6.16.orig/fs/locks.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/locks.c 2006-04-19 15:02:12.000000000 +0400
-@@ -129,6 +129,8 @@
- #include <asm/semaphore.h>
- #include <asm/uaccess.h>
-
-+#include <ub/ub_misc.h>
-+
- #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
- #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
- #define IS_LEASE(fl) (fl->fl_flags & FL_LEASE)
-@@ -148,11 +150,28 @@ static LIST_HEAD(blocked_list);
- static kmem_cache_t *filelock_cache;
-
- /* Allocate an empty lock structure. */
--static struct file_lock *locks_alloc_lock(void)
-+static struct file_lock *locks_alloc_lock(int charge)
- {
-- return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
-+ struct file_lock *fl;
-+
-+ fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
-+#ifdef CONFIG_USER_RESOURCE
-+ if (fl == NULL)
-+ goto out;
-+ fl->fl_charged = 0;
-+ if (!charge)
-+ goto out;
-+ if (!ub_flock_charge(fl, 1))
-+ goto out;
-+
-+ kmem_cache_free(filelock_cache, fl);
-+ fl = NULL;
-+out:
-+#endif
-+ return fl;
- }
-
-+
- /* Free a lock which is not in use. */
- static void locks_free_lock(struct file_lock *fl)
- {
-@@ -181,6 +200,7 @@ static void locks_free_lock(struct file_
- fl->fl_lmops = NULL;
- }
-
-+ ub_flock_uncharge(fl);
- kmem_cache_free(filelock_cache, fl);
- }
-
-@@ -263,7 +283,7 @@ static int flock_make_lock(struct file *
- if (type < 0)
- return type;
-
-- fl = locks_alloc_lock();
-+ fl = locks_alloc_lock(type != F_UNLCK);
- if (fl == NULL)
- return -ENOMEM;
-
-@@ -451,7 +471,7 @@ static int lease_init(struct file *filp,
- /* Allocate a file_lock initialised to this type of lease */
- static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
- {
-- struct file_lock *fl = locks_alloc_lock();
-+ struct file_lock *fl = locks_alloc_lock(1);
- int error;
-
- if (fl == NULL)
-@@ -784,8 +804,11 @@ static int __posix_lock_file(struct inod
- * We may need two file_lock structures for this operation,
- * so we get them in advance to avoid races.
- */
-- new_fl = locks_alloc_lock();
-- new_fl2 = locks_alloc_lock();
-+ if (request->fl_type != F_UNLCK)
-+ new_fl = locks_alloc_lock(1);
-+ else
-+ new_fl = NULL;
-+ new_fl2 = locks_alloc_lock(0);
-
- lock_kernel();
- if (request->fl_type != F_UNLCK) {
-@@ -813,7 +836,7 @@ static int __posix_lock_file(struct inod
- goto out;
-
- error = -ENOLCK; /* "no luck" */
-- if (!(new_fl && new_fl2))
-+ if (!((request->fl_type == F_UNLCK || new_fl) && new_fl2))
- goto out;
-
- /*
-@@ -919,19 +942,30 @@ static int __posix_lock_file(struct inod
- if (!added) {
- if (request->fl_type == F_UNLCK)
- goto out;
-+ error = -ENOLCK;
-+ if (right && (left == right) && ub_flock_charge(new_fl, 1))
-+ goto out;
- locks_copy_lock(new_fl, request);
- locks_insert_lock(before, new_fl);
- new_fl = NULL;
-+ error = 0;
- }
- if (right) {
- if (left == right) {
- /* The new lock breaks the old one in two pieces,
- * so we have to use the second new lock.
- */
-+ error = -ENOLCK;
-+ if (added && ub_flock_charge(new_fl2,
-+ request->fl_type != F_UNLCK))
-+ goto out;
-+ /* FIXME move all fl_charged manipulations in ub code */
-+ set_flock_charged(new_fl2);
- left = new_fl2;
- new_fl2 = NULL;
- locks_copy_lock(left, right);
- locks_insert_lock(before, left);
-+ error = 0;
- }
- right->fl_start = request->fl_end + 1;
- locks_wake_up_blocks(right);
-@@ -1538,6 +1572,7 @@ asmlinkage long sys_flock(unsigned int f
- out:
- return error;
- }
-+EXPORT_SYMBOL_GPL(sys_flock);
-
- /* Report the first existing lock that would conflict with l.
- * This implements the F_GETLK command of fcntl().
-@@ -1573,7 +1608,7 @@ int fcntl_getlk(struct file *filp, struc
-
- flock.l_type = F_UNLCK;
- if (fl != NULL) {
-- flock.l_pid = fl->fl_pid;
-+ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
- #if BITS_PER_LONG == 32
- /*
- * Make sure we can represent the posix lock via
-@@ -1605,7 +1640,7 @@ out:
- int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
- struct flock __user *l)
- {
-- struct file_lock *file_lock = locks_alloc_lock();
-+ struct file_lock *file_lock = locks_alloc_lock(0);
- struct flock flock;
- struct inode *inode;
- int error;
-@@ -1727,7 +1762,7 @@ int fcntl_getlk64(struct file *filp, str
-
- flock.l_type = F_UNLCK;
- if (fl != NULL) {
-- flock.l_pid = fl->fl_pid;
-+ flock.l_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
- flock.l_start = fl->fl_start;
- flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
- fl->fl_end - fl->fl_start + 1;
-@@ -1748,7 +1783,7 @@ out:
- int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
- struct flock64 __user *l)
- {
-- struct file_lock *file_lock = locks_alloc_lock();
-+ struct file_lock *file_lock = locks_alloc_lock(0);
- struct flock64 flock;
- struct inode *inode;
- int error;
-@@ -1976,7 +2011,9 @@ EXPORT_SYMBOL(posix_unblock_lock);
- static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
- {
- struct inode *inode = NULL;
-+ unsigned int fl_pid;
-
-+ fl_pid = pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
- if (fl->fl_file != NULL)
- inode = fl->fl_file->f_dentry->d_inode;
-
-@@ -2018,16 +2055,16 @@ static void lock_get_status(char* out, s
- }
- if (inode) {
- #ifdef WE_CAN_BREAK_LSLK_NOW
-- out += sprintf(out, "%d %s:%ld ", fl->fl_pid,
-+ out += sprintf(out, "%d %s:%ld ", fl_pid,
- inode->i_sb->s_id, inode->i_ino);
- #else
- /* userspace relies on this representation of dev_t ;-( */
-- out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid,
-+ out += sprintf(out, "%d %02x:%02x:%ld ", fl_pid,
- MAJOR(inode->i_sb->s_dev),
- MINOR(inode->i_sb->s_dev), inode->i_ino);
- #endif
- } else {
-- out += sprintf(out, "%d <none>:0 ", fl->fl_pid);
-+ out += sprintf(out, "%d <none>:0 ", fl_pid);
- }
- if (IS_POSIX(fl)) {
- if (fl->fl_end == OFFSET_MAX)
-@@ -2076,11 +2113,17 @@ int get_locks_status(char *buffer, char
- char *q = buffer;
- off_t pos = 0;
- int i = 0;
-+ struct ve_struct *env;
-
- lock_kernel();
-+ env = get_exec_env();
- list_for_each(tmp, &file_lock_list) {
- struct list_head *btmp;
- struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
-+
-+ if (!ve_accessible(VE_OWNER_FILP(fl->fl_file), env))
-+ continue;
-+
- lock_get_status(q, fl, ++i, "");
- move_lock_status(&q, &pos, offset);
-
-@@ -2238,7 +2281,7 @@ EXPORT_SYMBOL(steal_locks);
- static int __init filelock_init(void)
- {
- filelock_cache = kmem_cache_create("file_lock_cache",
-- sizeof(struct file_lock), 0, SLAB_PANIC,
-+ sizeof(struct file_lock), 0, SLAB_PANIC | SLAB_UBC,
- init_once, NULL);
- return 0;
- }
-diff -upr linux-2.6.16.orig/fs/namei.c linux-2.6.16-026test009/fs/namei.c
---- linux-2.6.16.orig/fs/namei.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/namei.c 2006-04-19 15:02:12.000000000 +0400
-@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
- * for filesystem access without changing the "normal" uids which
- * are used for other things..
- */
--int generic_permission(struct inode *inode, int mask,
-+static int __generic_permission(struct inode *inode, int mask,
- int (*check_acl)(struct inode *inode, int mask))
- {
- umode_t mode = inode->i_mode;
-@@ -225,7 +225,26 @@ int generic_permission(struct inode *ino
- return -EACCES;
- }
-
--int permission(struct inode *inode, int mask, struct nameidata *nd)
-+int generic_permission(struct inode *inode, int mask,
-+ int (*check_acl)(struct inode *inode, int mask),
-+ struct exec_perm *perm)
-+{
-+ int ret;
-+
-+ if (perm == NULL)
-+ return __generic_permission(inode, mask, check_acl);
-+
-+ mutex_lock(&inode->i_mutex);
-+ ret = __generic_permission(inode, mask, check_acl);
-+ if (!ret)
-+ set_exec_perm(perm, inode);
-+ mutex_unlock(&inode->i_mutex);
-+ return ret;
-+}
-+
-+
-+int permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- int retval, submask;
-
-@@ -250,9 +269,9 @@ int permission(struct inode *inode, int
- /* Ordinary permission routines do not understand MAY_APPEND. */
- submask = mask & ~MAY_APPEND;
- if (inode->i_op && inode->i_op->permission)
-- retval = inode->i_op->permission(inode, submask, nd);
-+ retval = inode->i_op->permission(inode, submask, nd, perm);
- else
-- retval = generic_permission(inode, submask, NULL);
-+ retval = generic_permission(inode, submask, NULL, perm);
- if (retval)
- return retval;
-
-@@ -269,9 +288,9 @@ int permission(struct inode *inode, int
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
- */
--int vfs_permission(struct nameidata *nd, int mask)
-+int vfs_permission(struct nameidata *nd, int mask, struct exec_perm *perm)
- {
-- return permission(nd->dentry->d_inode, mask, nd);
-+ return permission(nd->dentry->d_inode, mask, nd, perm);
- }
-
- /**
-@@ -288,7 +307,7 @@ int vfs_permission(struct nameidata *nd,
- */
- int file_permission(struct file *file, int mask)
- {
-- return permission(file->f_dentry->d_inode, mask, NULL);
-+ return permission(file->f_dentry->d_inode, mask, NULL, NULL);
- }
-
- /*
-@@ -704,7 +723,14 @@ static __always_inline void follow_dotdo
- read_unlock(&current->fs->lock);
- break;
- }
-- read_unlock(&current->fs->lock);
-+#ifdef CONFIG_VE
-+ if (nd->dentry == get_exec_env()->fs_root &&
-+ nd->mnt == get_exec_env()->fs_rootmnt) {
-+ read_unlock(&current->fs->lock);
-+ break;
-+ }
-+#endif
-+ read_unlock(&current->fs->lock);
- spin_lock(&dcache_lock);
- if (nd->dentry != nd->mnt->mnt_root) {
- nd->dentry = dget(nd->dentry->d_parent);
-@@ -745,6 +771,10 @@ static int do_lookup(struct nameidata *n
- if (dentry->d_op && dentry->d_op->d_revalidate)
- goto need_revalidate;
- done:
-+ if ((nd->flags & LOOKUP_STRICT) && d_mountpoint(dentry)) {
-+ dput(dentry);
-+ return -ENOENT;
-+ }
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount(path);
-@@ -801,7 +831,7 @@ static fastcall int __link_path_walk(con
- nd->flags |= LOOKUP_CONTINUE;
- err = exec_permission_lite(inode, nd);
- if (err == -EAGAIN)
-- err = vfs_permission(nd, MAY_EXEC);
-+ err = vfs_permission(nd, MAY_EXEC, NULL);
- if (err)
- break;
-
-@@ -864,6 +894,9 @@ static fastcall int __link_path_walk(con
- goto out_dput;
-
- if (inode->i_op->follow_link) {
-+ err = -ENOENT;
-+ if (lookup_flags & LOOKUP_STRICT)
-+ goto out_dput;
- err = do_follow_link(&next, nd);
- if (err)
- goto return_err;
-@@ -911,6 +944,7 @@ last_component:
- break;
- inode = next.dentry->d_inode;
- if ((lookup_flags & LOOKUP_FOLLOW)
-+ && !(lookup_flags & LOOKUP_STRICT)
- && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(&next, nd);
- if (err)
-@@ -951,6 +985,11 @@ return_reval:
- break;
- }
- return_base:
-+ if (!(nd->flags & LOOKUP_NOAREACHECK)) {
-+ err = check_area_access_ve(nd->dentry, nd->mnt);
-+ if (err)
-+ break;
-+ }
- return 0;
- out_dput:
- dput_path(&next, nd);
-@@ -1219,7 +1258,7 @@ static struct dentry * __lookup_hash(str
- int err;
-
- inode = base->d_inode;
-- err = permission(inode, MAY_EXEC, nd);
-+ err = permission(inode, MAY_EXEC, nd, NULL);
- dentry = ERR_PTR(err);
- if (err)
- goto out;
-@@ -1354,7 +1393,7 @@ static int may_delete(struct inode *dir,
-
- BUG_ON(victim->d_parent->d_inode != dir);
-
-- error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
-+ error = permission(dir,MAY_WRITE | MAY_EXEC, NULL, NULL);
- if (error)
- return error;
- if (IS_APPEND(dir))
-@@ -1391,7 +1430,7 @@ static inline int may_create(struct inod
- return -EEXIST;
- if (IS_DEADDIR(dir))
- return -ENOENT;
-- return permission(dir,MAY_WRITE | MAY_EXEC, nd);
-+ return permission(dir,MAY_WRITE | MAY_EXEC, nd, NULL);
- }
-
- /*
-@@ -1491,7 +1530,7 @@ int may_open(struct nameidata *nd, int a
- if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
- return -EISDIR;
-
-- error = vfs_permission(nd, acc_mode);
-+ error = vfs_permission(nd, acc_mode, NULL);
- if (error)
- return error;
-
-@@ -1851,6 +1890,7 @@ asmlinkage long sys_mknod(const char __u
- {
- return sys_mknodat(AT_FDCWD, filename, mode, dev);
- }
-+EXPORT_SYMBOL_GPL(sys_mknod);
-
- int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
- {
-@@ -1909,6 +1949,7 @@ asmlinkage long sys_mkdir(const char __u
- {
- return sys_mkdirat(AT_FDCWD, pathname, mode);
- }
-+EXPORT_SYMBOL_GPL(sys_mkdir);
-
- /*
- * We try to drop the dentry early: we should have
-@@ -2016,6 +2057,7 @@ asmlinkage long sys_rmdir(const char __u
- {
- return do_rmdir(AT_FDCWD, pathname);
- }
-+EXPORT_SYMBOL_GPL(sys_rmdir);
-
- int vfs_unlink(struct inode *dir, struct dentry *dentry)
- {
-@@ -2115,6 +2157,7 @@ asmlinkage long sys_unlink(const char __
- {
- return do_unlinkat(AT_FDCWD, pathname);
- }
-+EXPORT_SYMBOL_GPL(sys_unlink);
-
- int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
- {
-@@ -2313,7 +2356,7 @@ static int vfs_rename_dir(struct inode *
- * we'll need to flip '..'.
- */
- if (new_dir != old_dir) {
-- error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
-+ error = permission(old_dentry->d_inode, MAY_WRITE, NULL, NULL);
- if (error)
- return error;
- }
-@@ -2380,6 +2423,9 @@ int vfs_rename(struct inode *old_dir, st
- int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
- const char *old_name;
-
-+ if (DQUOT_RENAME(old_dentry->d_inode, old_dir, new_dir))
-+ return -EXDEV;
-+
- if (old_dentry->d_inode == new_dentry->d_inode)
- return 0;
-
-diff -upr linux-2.6.16.orig/fs/namespace.c linux-2.6.16-026test009/fs/namespace.c
---- linux-2.6.16.orig/fs/namespace.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/namespace.c 2006-04-19 15:02:12.000000000 +0400
-@@ -40,13 +40,15 @@ static inline int sysfs_init(void)
-
- /* spinlock for vfsmount related operations, inplace of dcache_lock */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
-+EXPORT_SYMBOL(vfsmount_lock);
-
- static int event;
-
- static struct list_head *mount_hashtable;
- static int hash_mask __read_mostly, hash_bits __read_mostly;
- static kmem_cache_t *mnt_cache;
--static struct rw_semaphore namespace_sem;
-+struct rw_semaphore namespace_sem;
-+EXPORT_SYMBOL(namespace_sem);
-
- /* /sys/fs */
- decl_subsys(fs, NULL, NULL);
-@@ -371,10 +373,32 @@ static int show_vfsmnt(struct seq_file *
- { 0, NULL }
- };
- struct proc_fs_info *fs_infop;
-+ char *path_buf, *path;
-
-- mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-+ /* skip FS_NOMOUNT mounts (rootfs) */
-+ if (mnt->mnt_sb->s_flags & MS_NOUSER)
-+ return 0;
-+
-+ path_buf = (char *) __get_free_page(GFP_KERNEL);
-+ if (!path_buf)
-+ return -ENOMEM;
-+ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
-+ if (IS_ERR(path)) {
-+ free_page((unsigned long) path_buf);
-+ /*
-+ * This means that the file position will be incremented, i.e.
-+ * the total number of "invisible" vfsmnt will leak.
-+ */
-+ return 0;
-+ }
-+
-+ if (ve_is_super(get_exec_env()))
-+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-+ else
-+ mangle(m, mnt->mnt_sb->s_type->name);
- seq_putc(m, ' ');
-- seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
-+ mangle(m, path);
-+ free_page((unsigned long) path_buf);
- seq_putc(m, ' ');
- mangle(m, mnt->mnt_sb->s_type->name);
- seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
-@@ -474,6 +498,7 @@ void release_mounts(struct list_head *he
- mntput(mnt);
- }
- }
-+EXPORT_SYMBOL(release_mounts);
-
- void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
- {
-@@ -498,6 +523,7 @@ void umount_tree(struct vfsmount *mnt, i
- change_mnt_propagation(p, MS_PRIVATE);
- }
- }
-+EXPORT_SYMBOL(umount_tree);
-
- static int do_umount(struct vfsmount *mnt, int flags)
- {
-@@ -608,7 +634,7 @@ asmlinkage long sys_umount(char __user *
- goto dput_and_out;
-
- retval = -EPERM;
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- goto dput_and_out;
-
- retval = do_umount(nd.mnt, flags);
-@@ -632,7 +658,7 @@ asmlinkage long sys_oldumount(char __use
-
- static int mount_is_safe(struct nameidata *nd)
- {
-- if (capable(CAP_SYS_ADMIN))
-+ if (capable(CAP_VE_SYS_ADMIN))
- return 0;
- return -EPERM;
- #ifdef notyet
-@@ -642,7 +668,7 @@ static int mount_is_safe(struct nameidat
- if (current->uid != nd->dentry->d_inode->i_uid)
- return -EPERM;
- }
-- if (vfs_permission(nd, MAY_WRITE))
-+ if (vfs_permission(nd, MAY_WRITE, NULL))
- return -EPERM;
- return 0;
- #endif
-@@ -917,7 +943,7 @@ static int do_remount(struct nameidata *
- int err;
- struct super_block *sb = nd->mnt->mnt_sb;
-
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
-
- if (!check_mnt(nd->mnt))
-@@ -951,7 +977,7 @@ static int do_move_mount(struct nameidat
- struct nameidata old_nd, parent_nd;
- struct vfsmount *p;
- int err = 0;
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- if (!old_name || !*old_name)
- return -EINVAL;
-@@ -1031,7 +1057,7 @@ static int do_new_mount(struct nameidata
- return -EINVAL;
-
- /* we need capabilities... */
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
-
- mnt = do_kern_mount(type, flags, name, data);
-@@ -1072,6 +1098,10 @@ int do_add_mount(struct vfsmount *newmnt
- if ((err = graft_tree(newmnt, nd)))
- goto unlock;
-
-+ if (newmnt->mnt_mountpoint->d_flags & DCACHE_VIRTUAL)
-+ /* unaccessible yet - no lock */
-+ newmnt->mnt_root->d_flags |= DCACHE_VIRTUAL;
-+
- if (fslist) {
- /* add to the specified expiration list */
- spin_lock(&vfsmount_lock);
-@@ -1469,6 +1499,7 @@ out1:
- free_page(type_page);
- return retval;
- }
-+EXPORT_SYMBOL_GPL(sys_mount);
-
- /*
- * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
-@@ -1520,7 +1551,7 @@ static void chroot_fs_refs(struct nameid
- struct fs_struct *fs;
-
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_ve(g, p) {
- task_lock(p);
- fs = p->fs;
- if (fs) {
-@@ -1535,7 +1566,7 @@ static void chroot_fs_refs(struct nameid
- put_fs_struct(fs);
- } else
- task_unlock(p);
-- } while_each_thread(g, p);
-+ } while_each_thread_ve(g, p);
- read_unlock(&tasklist_lock);
- }
-
-@@ -1688,10 +1719,10 @@ static void __init init_mount_tree(void)
-
- init_task.namespace = namespace;
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- get_namespace(namespace);
- p->namespace = namespace;
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
- read_unlock(&tasklist_lock);
-
- set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
-@@ -1707,7 +1738,8 @@ void __init mnt_init(unsigned long mempa
- init_rwsem(&namespace_sem);
-
- mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
-- 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
-+ 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_UBC,
-+ NULL, NULL);
-
- mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
-
-diff -upr linux-2.6.16.orig/fs/nfs/dir.c linux-2.6.16-026test009/fs/nfs/dir.c
---- linux-2.6.16.orig/fs/nfs/dir.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfs/dir.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1635,7 +1635,8 @@ out:
- return -EACCES;
- }
-
--int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-+int nfs_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- struct rpc_cred *cred;
- int res = 0;
-@@ -1683,7 +1684,7 @@ out:
- out_notsup:
- res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (res == 0)
-- res = generic_permission(inode, mask, NULL);
-+ res = generic_permission(inode, mask, NULL, perm);
- unlock_kernel();
- return res;
- }
-diff -upr linux-2.6.16.orig/fs/nfs/nfsroot.c linux-2.6.16-026test009/fs/nfs/nfsroot.c
---- linux-2.6.16.orig/fs/nfs/nfsroot.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfs/nfsroot.c 2006-04-19 15:02:12.000000000 +0400
-@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *na
- /* Override them by options set on kernel command-line */
- root_nfs_parse(name, buf);
-
-- cp = system_utsname.nodename;
-+ cp = ve_utsname.nodename;
- if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
- printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
- return -1;
-diff -upr linux-2.6.16.orig/fs/nfsd/nfs3proc.c linux-2.6.16-026test009/fs/nfsd/nfs3proc.c
---- linux-2.6.16.orig/fs/nfsd/nfs3proc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfsd/nfs3proc.c 2006-04-19 15:02:11.000000000 +0400
-@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_proced
- PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
- PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
- PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
-- PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
-+ PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
- PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
- PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
- PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
-diff -upr linux-2.6.16.orig/fs/nfsd/nfs4proc.c linux-2.6.16-026test009/fs/nfsd/nfs4proc.c
---- linux-2.6.16.orig/fs/nfsd/nfs4proc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfsd/nfs4proc.c 2006-04-19 15:02:11.000000000 +0400
-@@ -975,7 +975,7 @@ struct nfsd4_voidargs { int dummy; };
- */
- static struct svc_procedure nfsd_procedures4[2] = {
- PROC(null, void, void, void, RC_NOCACHE, 1),
-- PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE)
-+ PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
- };
-
- struct svc_version nfsd_version4 = {
-diff -upr linux-2.6.16.orig/fs/nfsd/nfsfh.c linux-2.6.16-026test009/fs/nfsd/nfsfh.c
---- linux-2.6.16.orig/fs/nfsd/nfsfh.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfsd/nfsfh.c 2006-04-19 15:02:11.000000000 +0400
-@@ -56,7 +56,7 @@ static int nfsd_acceptable(void *expv, s
- /* make sure parents give x permission to user */
- int err;
- parent = dget_parent(tdentry);
-- err = permission(parent->d_inode, MAY_EXEC, NULL);
-+ err = permission(parent->d_inode, MAY_EXEC, NULL, NULL);
- if (err < 0) {
- dput(parent);
- break;
-diff -upr linux-2.6.16.orig/fs/nfsd/nfsproc.c linux-2.6.16-026test009/fs/nfsd/nfsproc.c
---- linux-2.6.16.orig/fs/nfsd/nfsproc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfsd/nfsproc.c 2006-04-19 15:02:11.000000000 +0400
-@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_proced
- PROC(none, void, void, none, RC_NOCACHE, ST),
- PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
- PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
-- PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
-+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
- PROC(none, void, void, none, RC_NOCACHE, ST),
- PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
- PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
-diff -upr linux-2.6.16.orig/fs/nfsd/vfs.c linux-2.6.16-026test009/fs/nfsd/vfs.c
---- linux-2.6.16.orig/fs/nfsd/vfs.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/nfsd/vfs.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1817,12 +1817,13 @@ nfsd_permission(struct svc_export *exp,
- inode->i_uid == current->fsuid)
- return 0;
-
-- err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
-+ err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC),
-+ NULL, NULL);
-
- /* Allow read access to binaries even when mode 111 */
- if (err == -EACCES && S_ISREG(inode->i_mode) &&
- acc == (MAY_READ | MAY_OWNER_OVERRIDE))
-- err = permission(inode, MAY_EXEC, NULL);
-+ err = permission(inode, MAY_EXEC, NULL, NULL);
-
- return err? nfserrno(err) : 0;
- }
-diff -upr linux-2.6.16.orig/fs/ntfs/super.c linux-2.6.16-026test009/fs/ntfs/super.c
---- linux-2.6.16.orig/fs/ntfs/super.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/ntfs/super.c 2006-04-19 15:02:11.000000000 +0400
-@@ -3033,7 +3033,7 @@ iput_tmp_ino_err_out_now:
- * method again... FIXME: Do we need to do this twice now because of
- * attribute inodes? I think not, so leave as is for now... (AIA)
- */
-- if (invalidate_inodes(sb)) {
-+ if (invalidate_inodes(sb, 0)) {
- ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
- "driver bug.");
- /* Copied from fs/super.c. I just love this message. (-; */
-diff -upr linux-2.6.16.orig/fs/open.c linux-2.6.16-026test009/fs/open.c
---- linux-2.6.16.orig/fs/open.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/open.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,7 @@
- #include <linux/fs.h>
- #include <linux/personality.h>
- #include <linux/pagemap.h>
-+#include <linux/faudit.h>
- #include <linux/syscalls.h>
- #include <linux/rcupdate.h>
-
-@@ -121,6 +122,34 @@ static int vfs_statfs64(struct super_blo
- return 0;
- }
-
-+static int faudit_statfs(struct vfsmount *mnt, struct dentry *dentry,
-+ struct statfs *buf)
-+{
-+ struct faudit_stat_arg arg;
-+
-+ arg.mnt = mnt;
-+ arg.dentry = dentry;
-+ arg.stat = buf;
-+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS, &arg)
-+ != NOTIFY_DONE)
-+ return arg.err;
-+ return 0;
-+}
-+
-+static int faudit_statfs64(struct vfsmount *mnt, struct dentry *dentry,
-+ struct statfs64 *buf)
-+{
-+ struct faudit_stat_arg arg;
-+
-+ arg.mnt = mnt;
-+ arg.dentry = dentry;
-+ arg.stat = buf;
-+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STATFS64,
-+ &arg) != NOTIFY_DONE)
-+ return arg.err;
-+ return 0;
-+}
-+
- asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
- {
- struct nameidata nd;
-@@ -130,6 +159,8 @@ asmlinkage long sys_statfs(const char __
- if (!error) {
- struct statfs tmp;
- error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
-+ if (!error)
-+ error = faudit_statfs(nd.mnt, nd.dentry, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_release(&nd);
-@@ -149,6 +180,8 @@ asmlinkage long sys_statfs64(const char
- if (!error) {
- struct statfs64 tmp;
- error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
-+ if (!error)
-+ error = faudit_statfs64(nd.mnt, nd.dentry, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_release(&nd);
-@@ -168,6 +201,8 @@ asmlinkage long sys_fstatfs(unsigned int
- if (!file)
- goto out;
- error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
-+ if (!error)
-+ error = faudit_statfs(file->f_vfsmnt, file->f_dentry, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-@@ -189,6 +224,8 @@ asmlinkage long sys_fstatfs64(unsigned i
- if (!file)
- goto out;
- error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
-+ if (!error)
-+ error = faudit_statfs64(file->f_vfsmnt, file->f_dentry, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-@@ -243,7 +280,7 @@ static long do_sys_truncate(const char _
- if (!S_ISREG(inode->i_mode))
- goto dput_and_out;
-
-- error = vfs_permission(&nd, MAY_WRITE);
-+ error = vfs_permission(&nd, MAY_WRITE, NULL);
- if (error)
- goto dput_and_out;
-
-@@ -397,7 +434,7 @@ asmlinkage long sys_utime(char __user *
- goto dput_and_out;
-
- if (current->fsuid != inode->i_uid &&
-- (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-+ (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
- goto dput_and_out;
- }
- mutex_lock(&inode->i_mutex);
-@@ -450,7 +487,7 @@ long do_utimes(int dfd, char __user *fil
- goto dput_and_out;
-
- if (current->fsuid != inode->i_uid &&
-- (error = vfs_permission(&nd, MAY_WRITE)) != 0)
-+ (error = vfs_permission(&nd, MAY_WRITE, NULL)) != 0)
- goto dput_and_out;
- }
- mutex_lock(&inode->i_mutex);
-@@ -514,7 +551,7 @@ asmlinkage long sys_faccessat(int dfd, c
-
- res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
- if (!res) {
-- res = vfs_permission(&nd, mode);
-+ res = vfs_permission(&nd, mode, NULL);
- /* SuS v2 requires we report a read only fs too */
- if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
- && !special_file(nd.dentry->d_inode->i_mode))
-@@ -543,7 +580,7 @@ asmlinkage long sys_chdir(const char __u
- if (error)
- goto out;
-
-- error = vfs_permission(&nd, MAY_EXEC);
-+ error = vfs_permission(&nd, MAY_EXEC, NULL);
- if (error)
- goto dput_and_out;
-
-@@ -594,7 +631,7 @@ asmlinkage long sys_chroot(const char __
- if (error)
- goto out;
-
-- error = vfs_permission(&nd, MAY_EXEC);
-+ error = vfs_permission(&nd, MAY_EXEC, NULL);
- if (error)
- goto dput_and_out;
-
-@@ -733,6 +770,7 @@ asmlinkage long sys_chown(const char __u
- }
- return error;
- }
-+EXPORT_SYMBOL_GPL(sys_chown);
-
- asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
- gid_t group, int flag)
-diff -upr linux-2.6.16.orig/fs/partitions/check.c linux-2.6.16-026test009/fs/partitions/check.c
---- linux-2.6.16.orig/fs/partitions/check.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/partitions/check.c 2006-04-19 15:02:12.000000000 +0400
-@@ -128,6 +128,7 @@ char *disk_name(struct gendisk *hd, int
-
- return buf;
- }
-+EXPORT_SYMBOL(disk_name);
-
- const char *bdevname(struct block_device *bdev, char *buf)
- {
-@@ -345,6 +346,7 @@ static char *make_block_name(struct gend
- char *name;
- static char *block_str = "block:";
- int size;
-+ char *s;
-
- size = strlen(block_str) + strlen(disk->disk_name) + 1;
- name = kmalloc(size, GFP_KERNEL);
-@@ -352,6 +354,10 @@ static char *make_block_name(struct gend
- return NULL;
- strcpy(name, block_str);
- strcat(name, disk->disk_name);
-+ /* ewww... some of these buggers have / in name... */
-+ s = strchr(name, '/');
-+ if (s)
-+ *s = '!';
- return name;
- }
-
-diff -upr linux-2.6.16.orig/fs/pipe.c linux-2.6.16-026test009/fs/pipe.c
---- linux-2.6.16.orig/fs/pipe.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/pipe.c 2006-04-19 15:02:12.000000000 +0400
-@@ -797,6 +797,7 @@ close_f1:
- no_files:
- return error;
- }
-+EXPORT_SYMBOL_GPL(do_pipe);
-
- /*
- * pipefs should _never_ be mounted by userland - too much of security hassle,
-diff -upr linux-2.6.16.orig/fs/proc/array.c linux-2.6.16-026test009/fs/proc/array.c
---- linux-2.6.16.orig/fs/proc/array.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/array.c 2006-04-19 15:02:12.000000000 +0400
-@@ -76,6 +76,8 @@
- #include <linux/cpuset.h>
- #include <linux/rcupdate.h>
-
-+#include <ub/beancounter.h>
-+
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
- #include <asm/io.h>
-@@ -161,8 +163,13 @@ static inline char * task_state(struct t
- struct group_info *group_info;
- int g;
- struct fdtable *fdt = NULL;
-+ pid_t pid, ppid, tgid;
-+
-+ pid = get_task_pid(p);
-+ tgid = get_task_tgid(p);
-
- read_lock(&tasklist_lock);
-+ ppid = get_task_ppid(p);
- buffer += sprintf(buffer,
- "State:\t%s\n"
- "SleepAVG:\t%lu%%\n"
-@@ -174,9 +181,9 @@ static inline char * task_state(struct t
- "Gid:\t%d\t%d\t%d\t%d\n",
- get_task_state(p),
- (p->sleep_avg/1024)*100/(1020000000/1024),
-- p->tgid,
-- p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
-- pid_alive(p) && p->ptrace ? p->parent->pid : 0,
-+ tgid,
-+ pid, ppid,
-+ pid_alive(p) && p->ptrace ? get_task_pid(p->parent) : 0,
- p->uid, p->euid, p->suid, p->fsuid,
- p->gid, p->egid, p->sgid, p->fsgid);
- read_unlock(&tasklist_lock);
-@@ -199,6 +206,18 @@ static inline char * task_state(struct t
- put_group_info(group_info);
-
- buffer += sprintf(buffer, "\n");
-+
-+#ifdef CONFIG_VE
-+ buffer += sprintf(buffer,
-+ "envID:\t%d\n"
-+ "VPid:\t%d\n"
-+ "PNState:\t%u\n"
-+ "StopState:\t%u\n",
-+ VE_TASK_INFO(p)->owner_env->veid,
-+ virt_pid(p),
-+ p->pn_state,
-+ p->stopped_state);
-+#endif
- return buffer;
- }
-
-@@ -244,7 +263,7 @@ static void collect_sigign_sigcatch(stru
-
- static inline char * task_sig(struct task_struct *p, char *buffer)
- {
-- sigset_t pending, shpending, blocked, ignored, caught;
-+ sigset_t pending, shpending, blocked, ignored, caught, saved;
- int num_threads = 0;
- unsigned long qsize = 0;
- unsigned long qlim = 0;
-@@ -254,6 +273,7 @@ static inline char * task_sig(struct tas
- sigemptyset(&blocked);
- sigemptyset(&ignored);
- sigemptyset(&caught);
-+ sigemptyset(&saved);
-
- /* Gather all the data with the appropriate locks held */
- read_lock(&tasklist_lock);
-@@ -262,6 +282,7 @@ static inline char * task_sig(struct tas
- pending = p->pending.signal;
- shpending = p->signal->shared_pending.signal;
- blocked = p->blocked;
-+ saved = p->saved_sigmask;
- collect_sigign_sigcatch(p, &ignored, &caught);
- num_threads = atomic_read(&p->signal->count);
- qsize = atomic_read(&p->user->sigpending);
-@@ -279,6 +300,7 @@ static inline char * task_sig(struct tas
- buffer = render_sigset_t("SigBlk:\t", &blocked, buffer);
- buffer = render_sigset_t("SigIgn:\t", &ignored, buffer);
- buffer = render_sigset_t("SigCgt:\t", &caught, buffer);
-+ buffer = render_sigset_t("SigSvd:\t", &saved, buffer);
-
- return buffer;
- }
-@@ -293,10 +315,27 @@ static inline char *task_cap(struct task
- cap_t(p->cap_effective));
- }
-
-+#ifdef CONFIG_USER_RESOURCE
-+static inline void ub_dump_task_info(struct task_struct *tsk,
-+ char *stsk, int ltsk, char *smm, int lmm)
-+{
-+ print_ub_uid(tsk->task_bc.task_ub, stsk, ltsk);
-+ task_lock(tsk);
-+ if (tsk->mm)
-+ print_ub_uid(tsk->mm->mm_ub, smm, lmm);
-+ else
-+ strncpy(smm, "N/A", lmm);
-+ task_unlock(tsk);
-+}
-+#endif
-+
- int proc_pid_status(struct task_struct *task, char * buffer)
- {
- char * orig = buffer;
- struct mm_struct *mm = get_task_mm(task);
-+#ifdef CONFIG_USER_RESOURCE
-+ char tsk_ub_info[64], mm_ub_info[64];
-+#endif
-
- buffer = task_name(task, buffer);
- buffer = task_state(task, buffer);
-@@ -311,6 +350,14 @@ int proc_pid_status(struct task_struct *
- #if defined(CONFIG_S390)
- buffer = task_show_regs(task, buffer);
- #endif
-+#ifdef CONFIG_USER_RESOURCE
-+ ub_dump_task_info(task,
-+ tsk_ub_info, sizeof(tsk_ub_info),
-+ mm_ub_info, sizeof(mm_ub_info));
-+
-+ buffer += sprintf(buffer, "TaskUB:\t%s\n", tsk_ub_info);
-+ buffer += sprintf(buffer, "MMUB:\t%s\n", mm_ub_info);
-+#endif
- return buffer - orig;
- }
-
-@@ -333,6 +380,10 @@ static int do_task_stat(struct task_stru
- DEFINE_KTIME(it_real_value);
- struct task_struct *t;
- char tcomm[sizeof(task->comm)];
-+#ifdef CONFIG_USER_RESOURCE
-+ char ub_task_info[64];
-+ char ub_mm_info[64];
-+#endif
-
- state = *get_task_state(task);
- vsize = eip = esp = 0;
-@@ -370,11 +421,12 @@ static int do_task_stat(struct task_stru
- }
- if (task->signal) {
- if (task->signal->tty) {
-- tty_pgrp = task->signal->tty->pgrp;
-+ tty_pgrp = pid_type_to_vpid(PIDTYPE_PGID,
-+ task->signal->tty->pgrp);
- tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
- }
-- pgid = process_group(task);
-- sid = task->signal->session;
-+ pgid = get_task_pgid(task);
-+ sid = get_task_sid(task);
- cmin_flt = task->signal->cmin_flt;
- cmaj_flt = task->signal->cmaj_flt;
- cutime = task->signal->cutime;
-@@ -388,7 +440,7 @@ static int do_task_stat(struct task_stru
- }
- it_real_value = task->signal->real_timer.expires;
- }
-- ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
-+ ppid = get_task_ppid(task);
- read_unlock(&tasklist_lock);
-
- if (!whole || num_threads<2)
-@@ -407,14 +459,34 @@ static int do_task_stat(struct task_stru
-
- /* Temporary variable needed for gcc-2.96 */
- /* convert timespec -> nsec*/
-+#ifndef CONFIG_VE
- start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
- + task->start_time.tv_nsec;
-+#else
-+ start_time = (unsigned long long)(task->start_time.tv_sec -
-+ get_exec_env()->init_entry->start_time.tv_sec) *
-+ NSEC_PER_SEC + task->start_time.tv_nsec -
-+ get_exec_env()->init_entry->start_time.tv_nsec;
-+#endif
- /* convert nsec -> ticks */
- start_time = nsec_to_clock_t(start_time);
-
-+#ifdef CONFIG_USER_RESOURCE
-+ ub_dump_task_info(task,
-+ ub_task_info, sizeof(ub_task_info),
-+ ub_mm_info, sizeof(ub_mm_info));
-+#endif
-+
- res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
- %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
--%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
-+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu"
-+#ifdef CONFIG_VE
-+"0 0 0 0 0 0 0 0 %d %u"
-+#endif
-+#ifdef CONFIG_USER_RESOURCE
-+ " %s %s"
-+#endif
-+ "\n",
- task->pid,
- tcomm,
- state,
-@@ -459,7 +531,16 @@ static int do_task_stat(struct task_stru
- task->exit_signal,
- task_cpu(task),
- task->rt_priority,
-- task->policy);
-+ task->policy
-+#ifdef CONFIG_VE
-+ , virt_pid(task),
-+ VEID(VE_TASK_INFO(task)->owner_env)
-+#endif
-+#ifdef CONFIG_USER_RESOURCE
-+ , ub_task_info,
-+ ub_mm_info
-+#endif
-+ );
- if(mm)
- mmput(mm);
- return res;
-diff -upr linux-2.6.16.orig/fs/proc/base.c linux-2.6.16-026test009/fs/proc/base.c
---- linux-2.6.16.orig/fs/proc/base.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/base.c 2006-04-19 15:02:12.000000000 +0400
-@@ -291,22 +291,25 @@ static int proc_fd_link(struct inode *in
- struct files_struct *files;
- struct file *file;
- int fd = proc_type(inode) - PROC_TID_FD_DIR;
-+ int err = -ENOENT;
-
- files = get_files_struct(task);
- if (files) {
- rcu_read_lock();
- file = fcheck_files(files, fd);
- if (file) {
-- *mnt = mntget(file->f_vfsmnt);
-- *dentry = dget(file->f_dentry);
-- rcu_read_unlock();
-- put_files_struct(files);
-- return 0;
-+ if (d_root_check(file->f_dentry, file->f_vfsmnt)) {
-+ err = -EACCES;
-+ } else {
-+ *mnt = mntget(file->f_vfsmnt);
-+ *dentry = dget(file->f_dentry);
-+ err = 0;
-+ }
- }
- rcu_read_unlock();
- put_files_struct(files);
- }
-- return -ENOENT;
-+ return err;
- }
-
- static struct fs_struct *get_fs_struct(struct task_struct *task)
-@@ -326,10 +329,12 @@ static int proc_cwd_link(struct inode *i
- int result = -ENOENT;
- if (fs) {
- read_lock(&fs->lock);
-- *mnt = mntget(fs->pwdmnt);
-- *dentry = dget(fs->pwd);
-+ result = d_root_check(fs->pwd, fs->pwdmnt);
-+ if (!result) {
-+ *mnt = mntget(fs->pwdmnt);
-+ *dentry = dget(fs->pwd);
-+ }
- read_unlock(&fs->lock);
-- result = 0;
- put_fs_struct(fs);
- }
- return result;
-@@ -579,19 +584,21 @@ static int proc_check_root(struct inode
- return proc_check_chroot(root, vfsmnt);
- }
-
--static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
-+static int proc_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
-- if (generic_permission(inode, mask, NULL) != 0)
-+ if (generic_permission(inode, mask, NULL, perm) != 0)
- return -EACCES;
- return proc_check_root(inode);
- }
-
--static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
-+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- struct dentry *root;
- struct vfsmount *vfsmnt;
-
-- if (generic_permission(inode, mask, NULL) != 0)
-+ if (generic_permission(inode, mask, NULL, perm) != 0)
- return -EACCES;
-
- if (proc_task_root_link(inode, &root, &vfsmnt))
-@@ -1303,6 +1310,10 @@ static struct inode *proc_pid_make_inode
- struct inode * inode;
- struct proc_inode *ei;
-
-+ if (!ve_accessible(VE_TASK_INFO(task)->owner_env,
-+ VE_OWNER_FSTYPE(sb->s_type)))
-+ return NULL;
-+
- /* We need a new inode */
-
- inode = new_inode(sb);
-@@ -1406,6 +1417,10 @@ static void pid_base_iput(struct dentry
- spin_lock(&task->proc_lock);
- if (task->proc_dentry == dentry)
- task->proc_dentry = NULL;
-+#ifdef CONFIG_VE
-+ if (VE_TASK_INFO(task)->glob_proc_dentry == dentry)
-+ VE_TASK_INFO(task)->glob_proc_dentry = NULL;
-+#endif
- spin_unlock(&task->proc_lock);
- iput(inode);
- }
-@@ -1879,14 +1894,14 @@ static int proc_self_readlink(struct den
- int buflen)
- {
- char tmp[30];
-- sprintf(tmp, "%d", current->tgid);
-+ sprintf(tmp, "%d", get_task_tgid(current));
- return vfs_readlink(dentry,buffer,buflen,tmp);
- }
-
- static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
- {
- char tmp[30];
-- sprintf(tmp, "%d", current->tgid);
-+ sprintf(tmp, "%d", get_task_tgid(current));
- return ERR_PTR(vfs_follow_link(nd,tmp));
- }
-
-@@ -1911,11 +1926,8 @@ static struct inode_operations proc_self
- * of PIDTYPE_PID.
- */
-
--struct dentry *proc_pid_unhash(struct task_struct *p)
-+struct dentry *__proc_pid_unhash(struct task_struct *p, struct dentry *proc_dentry)
- {
-- struct dentry *proc_dentry;
--
-- proc_dentry = p->proc_dentry;
- if (proc_dentry != NULL) {
-
- spin_lock(&dcache_lock);
-@@ -1933,6 +1945,14 @@ struct dentry *proc_pid_unhash(struct ta
- return proc_dentry;
- }
-
-+void proc_pid_unhash(struct task_struct *p, struct dentry *pd[2])
-+{
-+ pd[0] = __proc_pid_unhash(p, p->proc_dentry);
-+#ifdef CONFIG_VE
-+ pd[1] = __proc_pid_unhash(p, VE_TASK_INFO(p)->glob_proc_dentry);
-+#endif
-+}
-+
- /**
- * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
- * @proc_dentry: directoy to prune.
-@@ -1940,7 +1960,7 @@ struct dentry *proc_pid_unhash(struct ta
- * Shrink the /proc directory that was used by the just killed thread.
- */
-
--void proc_pid_flush(struct dentry *proc_dentry)
-+void __proc_pid_flush(struct dentry *proc_dentry)
- {
- might_sleep();
- if(proc_dentry != NULL) {
-@@ -1949,12 +1969,21 @@ void proc_pid_flush(struct dentry *proc_
- }
- }
-
-+void proc_pid_flush(struct dentry *proc_dentry[2])
-+{
-+ __proc_pid_flush(proc_dentry[0]);
-+#ifdef CONFIG_VE
-+ __proc_pid_flush(proc_dentry[1]);
-+#endif
-+}
-+
- /* SMP-safe */
- struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
- {
- struct task_struct *task;
- struct inode *inode;
- struct proc_inode *ei;
-+ struct dentry *pd[2];
- unsigned tgid;
- int died;
-
-@@ -1978,7 +2007,19 @@ struct dentry *proc_pid_lookup(struct in
- goto out;
-
- read_lock(&tasklist_lock);
-- task = find_task_by_pid(tgid);
-+ task = find_task_by_pid_ve(tgid);
-+ /* In theory we are allowed to lookup both /proc/VIRT_PID and
-+ * /proc/GLOBAL_PID inside VE. However, current /proc implementation
-+ * cannot maintain two references to one task, so that we have
-+ * to prohibit /proc/GLOBAL_PID.
-+ */
-+ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tgid)) {
-+ /* However, VE_ENTERed tasks are exception, they use global
-+ * pids.
-+ */
-+ if (virt_pid(task) != tgid)
-+ task = NULL;
-+ }
- if (task)
- get_task_struct(task);
- read_unlock(&tasklist_lock);
-@@ -2007,16 +2048,23 @@ struct dentry *proc_pid_lookup(struct in
- died = 0;
- d_add(dentry, inode);
- spin_lock(&task->proc_lock);
-+#ifdef CONFIG_VE
-+ if (ve_is_super(VE_OWNER_FSTYPE(inode->i_sb->s_type)))
-+ VE_TASK_INFO(task)->glob_proc_dentry = dentry;
-+ else
-+ task->proc_dentry = dentry;
-+#else
- task->proc_dentry = dentry;
-+#endif
- if (!pid_alive(task)) {
-- dentry = proc_pid_unhash(task);
-+ proc_pid_unhash(task, pd);
- died = 1;
- }
- spin_unlock(&task->proc_lock);
-
- put_task_struct(task);
- if (died) {
-- proc_pid_flush(dentry);
-+ proc_pid_flush(pd);
- goto out;
- }
- return NULL;
-@@ -2037,7 +2085,12 @@ static struct dentry *proc_task_lookup(s
- goto out;
-
- read_lock(&tasklist_lock);
-- task = find_task_by_pid(tid);
-+ task = find_task_by_pid_ve(tid);
-+ /* See comment above in similar place. */
-+ if (task && !ve_is_super(get_exec_env()) && !is_virtual_pid(tid)) {
-+ if (virt_pid(task) != tid)
-+ task = NULL;
-+ }
- if (task)
- get_task_struct(task);
- read_unlock(&tasklist_lock);
-@@ -2081,7 +2134,8 @@ out:
- * tasklist lock while doing this, and we must release it before
- * we actually do the filldir itself, so we use a temp buffer..
- */
--static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
-+static int get_tgid_list(int index, unsigned long version, unsigned int *tgids,
-+ struct ve_struct *ve)
- {
- struct task_struct *p;
- int nr_tgids = 0;
-@@ -2090,7 +2144,11 @@ static int get_tgid_list(int index, unsi
- read_lock(&tasklist_lock);
- p = NULL;
- if (version) {
-- p = find_task_by_pid(version);
-+ struct ve_struct *oldve;
-+
-+ oldve = set_exec_env(ve);
-+ p = find_task_by_pid_ve(version);
-+ (void)set_exec_env(oldve);
- if (p && !thread_group_leader(p))
- p = NULL;
- }
-@@ -2098,10 +2156,10 @@ static int get_tgid_list(int index, unsi
- if (p)
- index = 0;
- else
-- p = next_task(&init_task);
-+ p = __first_task_ve(ve);
-
-- for ( ; p != &init_task; p = next_task(p)) {
-- int tgid = p->pid;
-+ for ( ; p != NULL; p = __next_task_ve(ve, p)) {
-+ int tgid = get_task_pid_ve(p, ve);
- if (!pid_alive(p))
- continue;
- if (--index >= 0)
-@@ -2134,7 +2192,7 @@ static int get_tid_list(int index, unsig
- * via next_thread().
- */
- if (pid_alive(task)) do {
-- int tid = task->pid;
-+ int tid = get_task_pid(task);
-
- if (--index >= 0)
- continue;
-@@ -2171,7 +2229,8 @@ int proc_pid_readdir(struct file * filp,
- next_tgid = filp->f_version;
- filp->f_version = 0;
- for (;;) {
-- nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
-+ nr_tgids = get_tgid_list(nr, next_tgid, tgid_array,
-+ filp->f_dentry->d_sb->s_type->owner_env);
- if (!nr_tgids) {
- /* no more entries ! */
- break;
-diff -upr linux-2.6.16.orig/fs/proc/generic.c linux-2.6.16-026test009/fs/proc/generic.c
---- linux-2.6.16.orig/fs/proc/generic.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/generic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -10,7 +10,9 @@
-
- #include <linux/errno.h>
- #include <linux/time.h>
-+#include <linux/fs.h>
- #include <linux/proc_fs.h>
-+#include <linux/ve_owner.h>
- #include <linux/stat.h>
- #include <linux/module.h>
- #include <linux/mount.h>
-@@ -29,6 +31,8 @@ static ssize_t proc_file_write(struct fi
- size_t count, loff_t *ppos);
- static loff_t proc_file_lseek(struct file *, loff_t, int);
-
-+static DEFINE_RWLOCK(proc_tree_lock);
-+
- int proc_match(int len, const char *name, struct proc_dir_entry *de)
- {
- if (de->namelen != len)
-@@ -229,6 +233,7 @@ proc_file_lseek(struct file *file, loff_
- return retval;
- }
-
-+#ifndef CONFIG_VE
- static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
- {
- struct inode *inode = dentry->d_inode;
-@@ -261,9 +266,12 @@ static int proc_getattr(struct vfsmount
- generic_fillattr(inode, stat);
- return 0;
- }
-+#endif
-
- static struct inode_operations proc_file_inode_operations = {
-+#ifndef CONFIG_VE
- .setattr = proc_notify_change,
-+#endif
- };
-
- /*
-@@ -271,14 +279,20 @@ static struct inode_operations proc_file
- * returns the struct proc_dir_entry for "/proc/tty/driver", and
- * returns "serial" in residual.
- */
--static int xlate_proc_name(const char *name,
-+static int __xlate_proc_name(struct proc_dir_entry *root, const char *name,
- struct proc_dir_entry **ret, const char **residual)
- {
- const char *cp = name, *next;
- struct proc_dir_entry *de;
- int len;
-
-- de = &proc_root;
-+ if (*ret) {
-+ de_get(*ret);
-+ return 0;
-+ }
-+
-+ read_lock(&proc_tree_lock);
-+ de = root;
- while (1) {
- next = strchr(cp, '/');
- if (!next)
-@@ -289,15 +303,35 @@ static int xlate_proc_name(const char *n
- if (proc_match(len, cp, de))
- break;
- }
-- if (!de)
-+ if (!de) {
-+ read_unlock(&proc_tree_lock);
- return -ENOENT;
-+ }
- cp += len + 1;
- }
- *residual = cp;
-- *ret = de;
-+ *ret = de_get(de);
-+ read_unlock(&proc_tree_lock);
- return 0;
- }
-
-+#ifndef CONFIG_VE
-+#define xlate_proc_loc_name xlate_proc_name
-+#else
-+static int xlate_proc_loc_name(const char *name,
-+ struct proc_dir_entry **ret, const char **residual)
-+{
-+ return __xlate_proc_name(get_exec_env()->proc_root,
-+ name, ret, residual);
-+}
-+#endif
-+
-+static int xlate_proc_name(const char *name,
-+ struct proc_dir_entry **ret, const char **residual)
-+{
-+ return __xlate_proc_name(&proc_root, name, ret, residual);
-+}
-+
- static DEFINE_IDR(proc_inum_idr);
- static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
-
-@@ -369,6 +403,20 @@ static struct dentry_operations proc_den
- .d_delete = proc_delete_dentry,
- };
-
-+static struct proc_dir_entry *__proc_lookup(struct proc_dir_entry *dir,
-+ struct dentry *d)
-+{
-+ struct proc_dir_entry *de;
-+
-+ for (de = dir->subdir; de; de = de->next) {
-+ if (de->namelen != d->d_name.len)
-+ continue;
-+ if (!memcmp(d->d_name.name, de->name, de->namelen))
-+ break;
-+ }
-+ return de_get(de);
-+}
-+
- /*
- * Don't create negative dentries here, return -ENOENT by hand
- * instead.
-@@ -376,34 +424,147 @@ static struct dentry_operations proc_den
- struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
- {
- struct inode *inode = NULL;
-- struct proc_dir_entry * de;
-+ struct proc_dir_entry *lde, *gde;
- int error = -ENOENT;
-
- lock_kernel();
-- de = PDE(dir);
-- if (de) {
-- for (de = de->subdir; de ; de = de->next) {
-- if (de->namelen != dentry->d_name.len)
-- continue;
-- if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-- unsigned int ino = de->low_ino;
-+ lde = LPDE(dir);
-
-- error = -EINVAL;
-- inode = proc_get_inode(dir->i_sb, ino, de);
-- break;
-- }
-- }
-- }
-+ if (!lde)
-+ goto out;
-+
-+ read_lock(&proc_tree_lock);
-+ lde = __proc_lookup(lde, dentry);
-+#ifdef CONFIG_VE
-+ gde = GPDE(dir);
-+ if (gde)
-+ gde = __proc_lookup(gde, dentry);
-+#else
-+ gde = NULL;
-+#endif
-+ read_unlock(&proc_tree_lock);
-+
-+ /*
-+ * There are following possible cases after lookup:
-+ *
-+ * lde gde
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * NULL NULL ENOENT
-+ * loc NULL found in local tree
-+ * loc glob found in both trees
-+ * NULL glob found in global tree
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ *
-+ * We initialized inode as follows after lookup:
-+ *
-+ * inode->lde inode->gde
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * loc NULL in local tree
-+ * loc glob both trees
-+ * glob glob global tree
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * i.e. inode->lde is always initialized
-+ */
-+
-+ if (lde == NULL && gde == NULL)
-+ goto out;
-+
-+ if (lde != NULL)
-+ inode = proc_get_inode(dir->i_sb, lde->low_ino, lde);
-+ else
-+ inode = proc_get_inode(dir->i_sb, gde->low_ino, gde);
-+
-+ /*
-+ * We can sleep in proc_get_inode(), but since we have i_sem
-+ * being taken, no one can setup GPDE/LPDE on this inode.
-+ */
-+ if (!inode)
-+ goto out_put;
-+
-+#ifdef CONFIG_VE
-+ GPDE(inode) = de_get(gde);
-+ if (gde)
-+ __module_get(gde->owner);
-+
-+ /* if dentry is found in both trees and it is a directory
-+ * then inode's nlink count must be altered, because local
-+ * and global subtrees may differ.
-+ * on the other hand, they may intersect, so actual nlink
-+ * value is difficult to calculate - upper estimate is used
-+ * instead of it.
-+ * dentry found in global tree only must not be writable
-+ * in non-super ve.
-+ */
-+ if (lde && gde && lde != gde && gde->nlink > 1)
-+ inode->i_nlink += gde->nlink - 2;
-+ if (lde == NULL && !ve_is_super(
-+ VE_OWNER_FSTYPE(dir->i_sb->s_type)))
-+ inode->i_mode &= ~S_IWUGO;
-+#endif
- unlock_kernel();
-+ dentry->d_op = &proc_dentry_operations;
-+ d_add(dentry, inode);
-+ de_put(lde);
-+ de_put(gde);
-+ return NULL;
-
-- if (inode) {
-- dentry->d_op = &proc_dentry_operations;
-- d_add(dentry, inode);
-- return NULL;
-- }
-+out_put:
-+ de_put(lde);
-+ de_put(gde);
-+out:
-+ unlock_kernel();
- return ERR_PTR(error);
- }
-
-+struct proc_dir_reader {
-+ struct list_head list;
-+ struct proc_dir_entry *next;
-+};
-+
-+static LIST_HEAD(proc_dir_readers);
-+static DEFINE_SPINLOCK(proc_dir_readers_lock);
-+
-+static inline void add_reader(struct proc_dir_reader *r,
-+ struct proc_dir_entry *cur)
-+{
-+ r->next = cur->next;
-+ spin_lock(&proc_dir_readers_lock);
-+ list_add(&r->list, &proc_dir_readers);
-+ spin_unlock(&proc_dir_readers_lock);
-+}
-+
-+static inline struct proc_dir_entry *del_reader(struct proc_dir_reader *r)
-+{
-+ spin_lock(&proc_dir_readers_lock);
-+ list_del(&r->list);
-+ spin_unlock(&proc_dir_readers_lock);
-+ return r->next;
-+}
-+
-+static void notify_readers(struct proc_dir_entry *de)
-+{
-+ struct proc_dir_reader *r;
-+
-+ /* lockless since proc_tree_lock is taken for writing */
-+ list_for_each_entry(r, &proc_dir_readers, list)
-+ if (r->next == de)
-+ r->next = de->next;
-+}
-+
-+static inline int in_tree(struct proc_dir_entry *de, struct proc_dir_entry *dir)
-+{
-+ struct proc_dir_entry *gde;
-+
-+ for (gde = dir->subdir; gde; gde = gde->next) {
-+ if (de->namelen != gde->namelen)
-+ continue;
-+ if (memcmp(de->name, gde->name, gde->namelen))
-+ continue;
-+ return 1;
-+ }
-+ return 0;
-+}
-+
- /*
- * This returns non-zero if at EOF, so that the /proc
- * root directory can use this and check if it should
-@@ -421,6 +582,7 @@ int proc_readdir(struct file * filp,
- int i;
- struct inode *inode = filp->f_dentry->d_inode;
- int ret = 0;
-+ struct proc_dir_reader this;
-
- lock_kernel();
-
-@@ -447,13 +609,12 @@ int proc_readdir(struct file * filp,
- filp->f_pos++;
- /* fall through */
- default:
-+ read_lock(&proc_tree_lock);
- de = de->subdir;
- i -= 2;
- for (;;) {
-- if (!de) {
-- ret = 1;
-- goto out;
-- }
-+ if (!de)
-+ goto chk_global;
- if (!i)
- break;
- de = de->next;
-@@ -461,12 +622,60 @@ int proc_readdir(struct file * filp,
- }
-
- do {
-- if (filldir(dirent, de->name, de->namelen, filp->f_pos,
-- de->low_ino, de->mode >> 12) < 0)
-+ de_get(de);
-+ add_reader(&this, de);
-+ read_unlock(&proc_tree_lock);
-+ ret = filldir(dirent, de->name, de->namelen,
-+ filp->f_pos, de->low_ino,
-+ de->mode >> 12);
-+ read_lock(&proc_tree_lock);
-+ de_put(de);
-+ de = del_reader(&this);
-+ if (ret < 0) {
-+ read_unlock(&proc_tree_lock);
-+ ret = 0;
- goto out;
-+ }
- filp->f_pos++;
-- de = de->next;
- } while (de);
-+chk_global:
-+#ifdef CONFIG_VE
-+ de = GPDE(inode);
-+ if (de == NULL)
-+ goto done;
-+
-+ de = de->subdir;
-+ while (de) {
-+ if (in_tree(de, LPDE(inode))) {
-+ de = de->next;
-+ continue;
-+ }
-+
-+ if (i > 0) {
-+ i--;
-+ de = de->next;
-+ continue;
-+ }
-+
-+ de_get(de);
-+ add_reader(&this, de);
-+ read_unlock(&proc_tree_lock);
-+ ret = filldir(dirent, de->name, de->namelen,
-+ filp->f_pos, de->low_ino,
-+ de->mode >> 12);
-+ read_lock(&proc_tree_lock);
-+ de_put(de);
-+ de = del_reader(&this);
-+ if (ret < 0) {
-+ read_unlock(&proc_tree_lock);
-+ ret = 0;
-+ goto out;
-+ }
-+ filp->f_pos++;
-+ }
-+done:
-+#endif
-+ read_unlock(&proc_tree_lock);
- }
- ret = 1;
- out: unlock_kernel();
-@@ -488,8 +697,10 @@ static struct file_operations proc_dir_o
- */
- static struct inode_operations proc_dir_inode_operations = {
- .lookup = proc_lookup,
-+#ifndef CONFIG_VE
- .getattr = proc_getattr,
- .setattr = proc_notify_change,
-+#endif
- };
-
- static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
-@@ -499,10 +710,20 @@ static int proc_register(struct proc_dir
- i = get_inode_number();
- if (i == 0)
- return -EAGAIN;
-+
-+ write_lock(&proc_tree_lock);
-+ if (dir->deleted) {
-+ write_unlock(&proc_tree_lock);
-+ release_inode_number(i);
-+ return -ENOENT;
-+ }
-+
- dp->low_ino = i;
- dp->next = dir->subdir;
-- dp->parent = dir;
-+ dp->parent = de_get(dir);
- dir->subdir = dp;
-+ write_unlock(&proc_tree_lock);
-+
- if (S_ISDIR(dp->mode)) {
- if (dp->proc_iops == NULL) {
- dp->proc_fops = &proc_dir_operations;
-@@ -556,24 +777,26 @@ static struct proc_dir_entry *proc_creat
- mode_t mode,
- nlink_t nlink)
- {
-- struct proc_dir_entry *ent = NULL;
-+ struct proc_dir_entry *ent;
- const char *fn = name;
- int len;
-
- /* make sure name is valid */
-- if (!name || !strlen(name)) goto out;
-+ if (!name || !strlen(name))
-+ goto out;
-
-- if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
-+ if (xlate_proc_loc_name(name, parent, &fn) != 0)
- goto out;
-
- /* At this point there must not be any '/' characters beyond *fn */
- if (strchr(fn, '/'))
-- goto out;
-+ goto out_put;
-
- len = strlen(fn);
-
- ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
-- if (!ent) goto out;
-+ if (!ent)
-+ goto out_put;
-
- memset(ent, 0, sizeof(struct proc_dir_entry));
- memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
-@@ -581,8 +804,13 @@ static struct proc_dir_entry *proc_creat
- ent->namelen = len;
- ent->mode = mode;
- ent->nlink = nlink;
-- out:
-+ atomic_set(&ent->count, 1);
- return ent;
-+
-+out_put:
-+ de_put(*parent);
-+out:
-+ return NULL;
- }
-
- struct proc_dir_entry *proc_symlink(const char *name,
-@@ -606,6 +834,7 @@ struct proc_dir_entry *proc_symlink(cons
- kfree(ent);
- ent = NULL;
- }
-+ de_put(parent);
- }
- return ent;
- }
-@@ -624,6 +853,7 @@ struct proc_dir_entry *proc_mkdir_mode(c
- kfree(ent);
- ent = NULL;
- }
-+ de_put(parent);
- }
- return ent;
- }
-@@ -662,9 +892,28 @@ struct proc_dir_entry *create_proc_entry
- kfree(ent);
- ent = NULL;
- }
-+ de_put(parent);
- }
- return ent;
- }
-+EXPORT_SYMBOL(remove_proc_glob_entry);
-+
-+struct proc_dir_entry *create_proc_glob_entry(const char *name, mode_t mode,
-+ struct proc_dir_entry *parent)
-+{
-+ const char *path;
-+ struct proc_dir_entry *ent;
-+
-+ path = name;
-+ if (xlate_proc_name(path, &parent, &name) != 0)
-+ return NULL;
-+
-+ ent = create_proc_entry(name, mode, parent);
-+ de_put(parent);
-+ return ent;
-+}
-+
-+EXPORT_SYMBOL(create_proc_glob_entry);
-
- void free_proc_entry(struct proc_dir_entry *de)
- {
-@@ -684,20 +933,21 @@ void free_proc_entry(struct proc_dir_ent
- * Remove a /proc entry and free it if it's not currently in use.
- * If it is in use, we set the 'deleted' flag.
- */
--void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
-+static void __remove_proc_entry(const char *name, struct proc_dir_entry *parent)
- {
- struct proc_dir_entry **p;
- struct proc_dir_entry *de;
- const char *fn = name;
- int len;
-
-- if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
-- goto out;
- len = strlen(fn);
-+ write_lock(&proc_tree_lock);
- for (p = &parent->subdir; *p; p=&(*p)->next ) {
- if (!proc_match(len, fn, *p))
- continue;
-+
- de = *p;
-+ notify_readers(de);
- *p = de->next;
- de->next = NULL;
- if (S_ISDIR(de->mode))
-@@ -705,15 +955,43 @@ void remove_proc_entry(const char *name,
- proc_kill_inodes(de);
- de->nlink = 0;
- WARN_ON(de->subdir);
-- if (!atomic_read(&de->count))
-- free_proc_entry(de);
-- else {
-- de->deleted = 1;
-- printk("remove_proc_entry: %s/%s busy, count=%d\n",
-- parent->name, de->name, atomic_read(&de->count));
-- }
-+ de->deleted = 1;
-+ de_put(de);
-+ de_put(parent);
- break;
- }
--out:
-- return;
-+ write_unlock(&proc_tree_lock);
-+}
-+
-+void remove_proc_loc_entry(const char *name, struct proc_dir_entry *parent)
-+{
-+ const char *path;
-+
-+ path = name;
-+ if (xlate_proc_loc_name(path, &parent, &name) != 0)
-+ return;
-+
-+ __remove_proc_entry(name, parent);
-+ de_put(parent);
-+}
-+
-+void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent)
-+{
-+ const char *path;
-+
-+ path = name;
-+ if (xlate_proc_name(path, &parent, &name) != 0)
-+ return;
-+
-+ __remove_proc_entry(name, parent);
-+ de_put(parent);
-+}
-+
-+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
-+{
-+ remove_proc_loc_entry(name, parent);
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env()))
-+ remove_proc_glob_entry(name, parent);
-+#endif
- }
-diff -upr linux-2.6.16.orig/fs/proc/inode.c linux-2.6.16-026test009/fs/proc/inode.c
---- linux-2.6.16.orig/fs/proc/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -8,6 +8,7 @@
- #include <linux/proc_fs.h>
- #include <linux/kernel.h>
- #include <linux/mm.h>
-+#include <linux/ve_owner.h>
- #include <linux/string.h>
- #include <linux/stat.h>
- #include <linux/file.h>
-@@ -21,34 +22,25 @@
-
- #include "internal.h"
-
--static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
--{
-- if (de)
-- atomic_inc(&de->count);
-- return de;
--}
--
- /*
- * Decrements the use count and checks for deferred deletion.
- */
--static void de_put(struct proc_dir_entry *de)
-+void de_put(struct proc_dir_entry *de)
- {
- if (de) {
-- lock_kernel();
- if (!atomic_read(&de->count)) {
- printk("de_put: entry %s already free!\n", de->name);
-- unlock_kernel();
- return;
- }
-
- if (atomic_dec_and_test(&de->count)) {
-- if (de->deleted) {
-- printk("de_put: deferred delete of %s\n",
-+ if (unlikely(!de->deleted)) {
-+ printk("de_put: early delete of %s\n",
- de->name);
-- free_proc_entry(de);
-+ return;
- }
-+ free_proc_entry(de);
- }
-- unlock_kernel();
- }
- }
-
-@@ -68,12 +60,19 @@ static void proc_delete_inode(struct ino
- put_task_struct(tsk);
-
- /* Let go of any associated proc directory entry */
-- de = PROC_I(inode)->pde;
-+ de = LPDE(inode);
- if (de) {
- if (de->owner)
- module_put(de->owner);
- de_put(de);
- }
-+#ifdef CONFIG_VE
-+ de = GPDE(inode);
-+ if (de) {
-+ module_put(de->owner);
-+ de_put(de);
-+ }
-+#endif
- clear_inode(inode);
- }
-
-@@ -100,6 +99,9 @@ static struct inode *proc_alloc_inode(st
- ei->pde = NULL;
- inode = &ei->vfs_inode;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-+#ifdef CONFIG_VE
-+ GPDE(inode) = NULL;
-+#endif
- return inode;
- }
-
-@@ -209,6 +211,12 @@ int proc_fill_super(struct super_block *
- s->s_root = d_alloc_root(root_inode);
- if (!s->s_root)
- goto out_no_root;
-+#ifdef CONFIG_VE
-+ LPDE(root_inode) = de_get(get_exec_env()->proc_root);
-+ GPDE(root_inode) = &proc_root;
-+#else
-+ LPDE(root_inode) = &proc_root;
-+#endif
- return 0;
-
- out_no_root:
-diff -upr linux-2.6.16.orig/fs/proc/kmsg.c linux-2.6.16-026test009/fs/proc/kmsg.c
---- linux-2.6.16.orig/fs/proc/kmsg.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/kmsg.c 2006-04-19 15:02:12.000000000 +0400
-@@ -11,6 +11,7 @@
- #include <linux/kernel.h>
- #include <linux/poll.h>
- #include <linux/fs.h>
-+#include <linux/veprintk.h>
-
- #include <asm/uaccess.h>
- #include <asm/io.h>
-@@ -40,7 +41,7 @@ static ssize_t kmsg_read(struct file *fi
-
- static unsigned int kmsg_poll(struct file *file, poll_table *wait)
- {
-- poll_wait(file, &log_wait, wait);
-+ poll_wait(file, &ve_log_wait, wait);
- if (do_syslog(9, NULL, 0))
- return POLLIN | POLLRDNORM;
- return 0;
-diff -upr linux-2.6.16.orig/fs/proc/proc_misc.c linux-2.6.16-026test009/fs/proc/proc_misc.c
---- linux-2.6.16.orig/fs/proc/proc_misc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/proc_misc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -32,6 +32,7 @@
- #include <linux/pagemap.h>
- #include <linux/swap.h>
- #include <linux/slab.h>
-+#include <linux/virtinfo.h>
- #include <linux/smp.h>
- #include <linux/signal.h>
- #include <linux/module.h>
-@@ -45,6 +46,8 @@
- #include <linux/jiffies.h>
- #include <linux/sysrq.h>
- #include <linux/vmalloc.h>
-+#include <linux/version.h>
-+#include <linux/compile.h>
- #include <linux/crash_dump.h>
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
-@@ -53,8 +56,10 @@
- #include <asm/div64.h>
- #include "internal.h"
-
--#define LOAD_INT(x) ((x) >> FSHIFT)
--#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-+#ifdef CONFIG_FAIRSCHED
-+#include <linux/fairsched.h>
-+#endif
-+
- /*
- * Warning: stuff below (imported functions) assumes that its output will fit
- * into one page. For some of those functions it may be wrong. Moreover, we
-@@ -84,15 +89,33 @@ static int loadavg_read_proc(char *page,
- {
- int a, b, c;
- int len;
--
-- a = avenrun[0] + (FIXED_1/200);
-- b = avenrun[1] + (FIXED_1/200);
-- c = avenrun[2] + (FIXED_1/200);
-+ unsigned long __nr_running;
-+ int __nr_threads;
-+ unsigned long *__avenrun;
-+ struct ve_struct *ve;
-+
-+ ve = get_exec_env();
-+
-+ if (ve_is_super(ve)) {
-+ __avenrun = &avenrun[0];
-+ __nr_running = nr_running();
-+ __nr_threads = nr_threads;
-+ }
-+#ifdef CONFIG_VE
-+ else {
-+ __avenrun = &ve->avenrun[0];
-+ __nr_running = nr_running_ve(ve);
-+ __nr_threads = atomic_read(&ve->pcounter);
-+ }
-+#endif
-+ a = __avenrun[0] + (FIXED_1/200);
-+ b = __avenrun[1] + (FIXED_1/200);
-+ c = __avenrun[2] + (FIXED_1/200);
- len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
- LOAD_INT(a), LOAD_FRAC(a),
- LOAD_INT(b), LOAD_FRAC(b),
- LOAD_INT(c), LOAD_FRAC(c),
-- nr_running(), nr_threads, last_pid);
-+ __nr_running, __nr_threads, last_pid);
- return proc_calc_metrics(page, start, off, count, eof, len);
- }
-
-@@ -105,6 +128,13 @@ static int uptime_read_proc(char *page,
- cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
-
- do_posix_clock_monotonic_gettime(&uptime);
-+#ifdef CONFIG_VE
-+ if (!ve_is_super(get_exec_env())) {
-+ set_normalized_timespec(&uptime,
-+ uptime.tv_sec - get_exec_env()->start_timespec.tv_sec,
-+ uptime.tv_nsec - get_exec_env()->start_timespec.tv_nsec);
-+ }
-+#endif
- cputime_to_timespec(idletime, &idle);
- len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
- (unsigned long) uptime.tv_sec,
-@@ -118,35 +148,37 @@ static int uptime_read_proc(char *page,
- static int meminfo_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
- {
-- struct sysinfo i;
-+ struct meminfo mi;
- int len;
-- struct page_state ps;
-- unsigned long inactive;
-- unsigned long active;
-- unsigned long free;
-- unsigned long committed;
-- unsigned long allowed;
-+ unsigned long dummy;
- struct vmalloc_info vmi;
-- long cached;
-
-- get_page_state(&ps);
-- get_zone_counts(&active, &inactive, &free);
-+ get_page_state(&mi.ps);
-+ get_zone_counts(&mi.active, &mi.inactive, &dummy);
-
- /*
- * display in kilobytes.
- */
- #define K(x) ((x) << (PAGE_SHIFT - 10))
-- si_meminfo(&i);
-- si_swapinfo(&i);
-- committed = atomic_read(&vm_committed_space);
-- allowed = ((totalram_pages - hugetlb_total_pages())
-- * sysctl_overcommit_ratio / 100) + total_swap_pages;
-+ si_meminfo(&mi.si);
-+ si_swapinfo(&mi.si);
-+ mi.committed_space = atomic_read(&vm_committed_space);
-+ mi.swapcache = total_swapcache_pages;
-+ mi.cache = get_page_cache_size() - mi.swapcache - mi.si.bufferram;
-+ if (mi.cache < 0)
-+ mi.cache = 0;
-
-- cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
-- if (cached < 0)
-- cached = 0;
-+ mi.vmalloc_total = (VMALLOC_END - VMALLOC_START) >> PAGE_SHIFT;
-+ mi.allowed = ((totalram_pages - hugetlb_total_pages())
-+ * sysctl_overcommit_ratio / 100) + total_swap_pages;
-
- get_vmalloc_info(&vmi);
-+ mi.vmalloc_used = vmi.used >> PAGE_SHIFT;
-+ mi.vmalloc_largest = vmi.largest_chunk >> PAGE_SHIFT;
-+
-+ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
-+ & NOTIFY_FAIL)
-+ return -ENOMSG;
-
- /*
- * Tagged format, for easy grepping and expansion.
-@@ -175,29 +207,29 @@ static int meminfo_read_proc(char *page,
- "VmallocTotal: %8lu kB\n"
- "VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
-- K(i.totalram),
-- K(i.freeram),
-- K(i.bufferram),
-- K(cached),
-- K(total_swapcache_pages),
-- K(active),
-- K(inactive),
-- K(i.totalhigh),
-- K(i.freehigh),
-- K(i.totalram-i.totalhigh),
-- K(i.freeram-i.freehigh),
-- K(i.totalswap),
-- K(i.freeswap),
-- K(ps.nr_dirty),
-- K(ps.nr_writeback),
-- K(ps.nr_mapped),
-- K(ps.nr_slab),
-- K(allowed),
-- K(committed),
-- K(ps.nr_page_table_pages),
-- (unsigned long)VMALLOC_TOTAL >> 10,
-- vmi.used >> 10,
-- vmi.largest_chunk >> 10
-+ K(mi.si.totalram),
-+ K(mi.si.freeram),
-+ K(mi.si.bufferram),
-+ K(mi.cache),
-+ K(mi.swapcache),
-+ K(mi.active),
-+ K(mi.inactive),
-+ K(mi.si.totalhigh),
-+ K(mi.si.freehigh),
-+ K(mi.si.totalram-mi.si.totalhigh),
-+ K(mi.si.freeram-mi.si.freehigh),
-+ K(mi.si.totalswap),
-+ K(mi.si.freeswap),
-+ K(mi.ps.nr_dirty),
-+ K(mi.ps.nr_writeback),
-+ K(mi.ps.nr_mapped),
-+ K(mi.ps.nr_slab),
-+ K(mi.allowed),
-+ K(mi.committed_space),
-+ K(mi.ps.nr_page_table_pages),
-+ K(mi.vmalloc_total),
-+ K(mi.vmalloc_used),
-+ K(mi.vmalloc_largest)
- );
-
- len += hugetlb_report_meminfo(page + len);
-@@ -237,8 +269,15 @@ static int version_read_proc(char *page,
- int count, int *eof, void *data)
- {
- int len;
-+ struct new_utsname *utsname = &ve_utsname;
-
-- strcpy(page, linux_banner);
-+ if (ve_is_super(get_exec_env()))
-+ strcpy(page, linux_banner);
-+ else
-+ sprintf(page, "Linux version %s ("
-+ LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") ("
-+ LINUX_COMPILER ") %s\n",
-+ utsname->release, utsname->version);
- len = strlen(page);
- return proc_calc_metrics(page, start, off, count, eof, len);
- }
-@@ -312,7 +351,7 @@ static void *devinfo_next(struct seq_fil
- case BLK_HDR:
- info->state = BLK_LIST;
- (*pos)++;
-- break;
-+ /*fallthrough*/
- case BLK_LIST:
- if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
- /*
-@@ -487,18 +526,15 @@ static struct file_operations proc_slabi
- };
- #endif
-
--static int show_stat(struct seq_file *p, void *v)
-+static void show_stat_ve0(struct seq_file *p)
- {
- int i;
-- unsigned long jif;
-+ struct page_state page_state;
- cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
- u64 sum = 0;
-
- user = nice = system = idle = iowait =
- irq = softirq = steal = cputime64_zero;
-- jif = - wall_to_monotonic.tv_sec;
-- if (wall_to_monotonic.tv_nsec)
-- --jif;
-
- for_each_cpu(i) {
- int j;
-@@ -552,9 +588,84 @@ static int show_stat(struct seq_file *p,
- for (i = 0; i < NR_IRQS; i++)
- seq_printf(p, " %u", kstat_irqs(i));
- #endif
-+ get_full_page_state(&page_state);
-+ seq_printf(p, "\nswap %lu %lu\n", page_state.pswpin, page_state.pswpout);
-+}
-+
-+#ifdef CONFIG_VE
-+static void show_stat_ve(struct seq_file *p, struct ve_struct *env)
-+{
-+ int i;
-+ u64 user, nice, system;
-+ cycles_t idle, iowait;
-+ cpumask_t ve_cpus;
-+
-+ ve_cpu_online_map(env, &ve_cpus);
-+
-+ user = nice = system = idle = iowait = 0;
-+ for_each_cpu_mask(i, ve_cpus) {
-+ user += VE_CPU_STATS(env, i)->user;
-+ nice += VE_CPU_STATS(env, i)->nice;
-+ system += VE_CPU_STATS(env, i)->system;
-+ idle += ve_sched_get_idle_time(env, i);
-+ iowait += ve_sched_get_iowait_time(env, i);
-+ }
-+
-+ seq_printf(p, "cpu %llu %llu %llu %llu %llu 0 0 0\n",
-+ (unsigned long long)cputime64_to_clock_t(user),
-+ (unsigned long long)cputime64_to_clock_t(nice),
-+ (unsigned long long)cputime64_to_clock_t(system),
-+ (unsigned long long)cycles_to_clocks(idle),
-+ (unsigned long long)cycles_to_clocks(iowait));
-+
-+ for_each_cpu_mask(i, ve_cpus) {
-+ user = VE_CPU_STATS(env, i)->user;
-+ nice = VE_CPU_STATS(env, i)->nice;
-+ system = VE_CPU_STATS(env, i)->system;
-+ idle = ve_sched_get_idle_time(env, i);
-+ iowait = ve_sched_get_iowait_time(env, i);
-+ seq_printf(p, "cpu%d %llu %llu %llu %llu %llu 0 0 0\n",
-+ i,
-+ (unsigned long long)cputime64_to_clock_t(user),
-+ (unsigned long long)cputime64_to_clock_t(nice),
-+ (unsigned long long)cputime64_to_clock_t(system),
-+ (unsigned long long)cycles_to_clocks(idle),
-+ (unsigned long long)cycles_to_clocks(iowait));
-+ }
-+ seq_printf(p, "intr 0\nswap 0 0\n");
-+}
-+#endif
-+
-+int show_stat(struct seq_file *p, void *v)
-+{
-+ extern unsigned long total_forks;
-+ unsigned long seq, jif;
-+ struct ve_struct *env;
-+ unsigned long __nr_running, __nr_iowait;
-+
-+ do {
-+ seq = read_seqbegin(&xtime_lock);
-+ jif = - wall_to_monotonic.tv_sec;
-+ if (wall_to_monotonic.tv_nsec)
-+ --jif;
-+ } while (read_seqretry(&xtime_lock, seq));
-+
-+ env = get_exec_env();
-+ if (ve_is_super(env)) {
-+ show_stat_ve0(p);
-+ __nr_running = nr_running();
-+ __nr_iowait = nr_iowait();
-+ }
-+#ifdef CONFIG_VE
-+ else {
-+ show_stat_ve(p, env);
-+ __nr_running = nr_running_ve(env);
-+ __nr_iowait = nr_iowait_ve(env);
-+ }
-+#endif
-
- seq_printf(p,
-- "\nctxt %llu\n"
-+ "ctxt %llu\n"
- "btime %lu\n"
- "processes %lu\n"
- "procs_running %lu\n"
-@@ -562,8 +673,8 @@ static int show_stat(struct seq_file *p,
- nr_context_switches(),
- (unsigned long)jif,
- total_forks,
-- nr_running(),
-- nr_iowait());
-+ __nr_running,
-+ __nr_iowait);
-
- return 0;
- }
-@@ -652,7 +763,8 @@ static int cmdline_read_proc(char *page,
- {
- int len;
-
-- len = sprintf(page, "%s\n", saved_command_line);
-+ len = sprintf(page, "%s\n",
-+ ve_is_super(get_exec_env()) ? saved_command_line : "");
- return proc_calc_metrics(page, start, off, count, eof, len);
- }
-
-diff -upr linux-2.6.16.orig/fs/proc/proc_tty.c linux-2.6.16-026test009/fs/proc/proc_tty.c
---- linux-2.6.16.orig/fs/proc/proc_tty.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/proc_tty.c 2006-04-19 15:02:12.000000000 +0400
-@@ -6,6 +6,7 @@
-
- #include <asm/uaccess.h>
-
-+#include <linux/ve_owner.h>
- #include <linux/init.h>
- #include <linux/errno.h>
- #include <linux/time.h>
-@@ -106,24 +107,35 @@ static int show_tty_driver(struct seq_fi
- /* iterator */
- static void *t_start(struct seq_file *m, loff_t *pos)
- {
-- struct list_head *p;
-+ struct tty_driver *drv;
-+
- loff_t l = *pos;
-- list_for_each(p, &tty_drivers)
-+ read_lock(&tty_driver_guard);
-+ list_for_each_entry(drv, &tty_drivers, tty_drivers) {
-+ if (!ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
-+ continue;
- if (!l--)
-- return list_entry(p, struct tty_driver, tty_drivers);
-+ return drv;
-+ }
- return NULL;
- }
-
- static void *t_next(struct seq_file *m, void *v, loff_t *pos)
- {
-- struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next;
-+ struct tty_driver *drv;
-+
- (*pos)++;
-- return p==&tty_drivers ? NULL :
-- list_entry(p, struct tty_driver, tty_drivers);
-+ drv = (struct tty_driver *)v;
-+ list_for_each_entry_continue(drv, &tty_drivers, tty_drivers) {
-+ if (ve_accessible_strict(VE_OWNER_TTYDRV(drv), get_exec_env()))
-+ return drv;
-+ }
-+ return NULL;
- }
-
- static void t_stop(struct seq_file *m, void *v)
- {
-+ read_unlock(&tty_driver_guard);
- }
-
- static struct seq_operations tty_drivers_op = {
-diff -upr linux-2.6.16.orig/fs/proc/root.c linux-2.6.16-026test009/fs/proc/root.c
---- linux-2.6.16.orig/fs/proc/root.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/root.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,7 +20,10 @@
-
- #include "internal.h"
-
--struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
-+#ifndef CONFIG_VE
-+struct proc_dir_entry *proc_net, *proc_net_stat;
-+#endif
-+struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
-
- #ifdef CONFIG_SYSCTL
- struct proc_dir_entry *proc_sys_root;
-@@ -32,12 +35,14 @@ static struct super_block *proc_get_sb(s
- return get_sb_single(fs_type, flags, data, proc_fill_super);
- }
-
--static struct file_system_type proc_fs_type = {
-+struct file_system_type proc_fs_type = {
- .name = "proc",
- .get_sb = proc_get_sb,
- .kill_sb = kill_anon_super,
- };
-
-+EXPORT_SYMBOL(proc_fs_type);
-+
- void __init proc_root_init(void)
- {
- int err = proc_init_inodecache();
-@@ -157,7 +162,9 @@ EXPORT_SYMBOL(create_proc_entry);
- EXPORT_SYMBOL(remove_proc_entry);
- EXPORT_SYMBOL(proc_root);
- EXPORT_SYMBOL(proc_root_fs);
-+#ifndef CONFIG_VE
- EXPORT_SYMBOL(proc_net);
- EXPORT_SYMBOL(proc_net_stat);
-+#endif
- EXPORT_SYMBOL(proc_bus);
- EXPORT_SYMBOL(proc_root_driver);
-diff -upr linux-2.6.16.orig/fs/proc/task_mmu.c linux-2.6.16-026test009/fs/proc/task_mmu.c
---- linux-2.6.16.orig/fs/proc/task_mmu.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/task_mmu.c 2006-04-19 15:02:12.000000000 +0400
-@@ -90,9 +90,12 @@ int proc_exe_link(struct inode *inode, s
- }
-
- if (vma) {
-- *mnt = mntget(vma->vm_file->f_vfsmnt);
-- *dentry = dget(vma->vm_file->f_dentry);
-- result = 0;
-+ result = d_root_check(vma->vm_file->f_dentry,
-+ vma->vm_file->f_vfsmnt);
-+ if (!result) {
-+ *mnt = mntget(vma->vm_file->f_vfsmnt);
-+ *dentry = dget(vma->vm_file->f_dentry);
-+ }
- }
-
- up_read(&mm->mmap_sem);
-diff -upr linux-2.6.16.orig/fs/proc/task_nommu.c linux-2.6.16-026test009/fs/proc/task_nommu.c
---- linux-2.6.16.orig/fs/proc/task_nommu.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/task_nommu.c 2006-04-19 15:02:12.000000000 +0400
-@@ -126,9 +126,12 @@ int proc_exe_link(struct inode *inode, s
- }
-
- if (vma) {
-- *mnt = mntget(vma->vm_file->f_vfsmnt);
-- *dentry = dget(vma->vm_file->f_dentry);
-- result = 0;
-+ result = d_root_check(vma->vm_file->f_dentry,
-+ vma->vm_file->f_vfsmnt);
-+ if (!result) {
-+ *mnt = mntget(vma->vm_file->f_vfsmnt);
-+ *dentry = dget(vma->vm_file->f_dentry);
-+ }
- }
-
- up_read(&mm->mmap_sem);
-diff -upr linux-2.6.16.orig/fs/proc/vmcore.c linux-2.6.16-026test009/fs/proc/vmcore.c
---- linux-2.6.16.orig/fs/proc/vmcore.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/proc/vmcore.c 2006-04-19 15:02:11.000000000 +0400
-@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *
- size_t buflen, loff_t *fpos)
- {
- ssize_t acc = 0, tmp;
-- size_t tsz, nr_bytes;
-- u64 start;
-+ size_t tsz;
-+ u64 start, nr_bytes;
- struct vmcore *curr_m = NULL;
-
- if (buflen == 0 || *fpos >= vmcore_size)
-diff -upr linux-2.6.16.orig/fs/quota.c linux-2.6.16-026test009/fs/quota.c
---- linux-2.6.16.orig/fs/quota.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/quota.c 2006-04-19 15:02:12.000000000 +0400
-@@ -81,11 +81,11 @@ static int generic_quotactl_valid(struct
- if (cmd == Q_GETQUOTA) {
- if (((type == USRQUOTA && current->euid != id) ||
- (type == GRPQUOTA && !in_egroup_p(id))) &&
-- !capable(CAP_SYS_ADMIN))
-+ !capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- }
- else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
-
- return 0;
-@@ -132,10 +132,10 @@ static int xqm_quotactl_valid(struct sup
- if (cmd == Q_XGETQUOTA) {
- if (((type == XQM_USRQUOTA && current->euid != id) ||
- (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
-- !capable(CAP_SYS_ADMIN))
-+ !capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- }
-
-@@ -216,7 +216,7 @@ restart:
- sb->s_count++;
- spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
-- if (sb->s_root && sb->s_qcop->quota_sync)
-+ if (sb->s_root && sb->s_qcop && sb->s_qcop->quota_sync)
- quota_sync_sb(sb, type);
- up_read(&sb->s_umount);
- spin_lock(&sb_lock);
-@@ -358,7 +358,7 @@ asmlinkage long sys_quotactl(unsigned in
- tmp = getname(special);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
-- bdev = lookup_bdev(tmp);
-+ bdev = lookup_bdev(tmp, FMODE_QUOTACTL);
- putname(tmp);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
-diff -upr linux-2.6.16.orig/fs/reiserfs/namei.c linux-2.6.16-026test009/fs/reiserfs/namei.c
---- linux-2.6.16.orig/fs/reiserfs/namei.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/reiserfs/namei.c 2006-04-19 15:02:12.000000000 +0400
-@@ -864,6 +864,9 @@ static int reiserfs_rmdir(struct inode *
- INITIALIZE_PATH(path);
- struct reiserfs_dir_entry de;
-
-+ inode = dentry->d_inode;
-+ DQUOT_INIT(inode);
-+
- /* we will be doing 2 balancings and update 2 stat data, we change quotas
- * of the owner of the directory and of the owner of the parent directory.
- * The quota structure is possibly deleted only on last iput => outside
-@@ -888,8 +891,6 @@ static int reiserfs_rmdir(struct inode *
- goto end_rmdir;
- }
-
-- inode = dentry->d_inode;
--
- reiserfs_update_inode_transaction(inode);
- reiserfs_update_inode_transaction(dir);
-
-@@ -952,6 +953,7 @@ static int reiserfs_unlink(struct inode
- unsigned long savelink;
-
- inode = dentry->d_inode;
-+ DQUOT_INIT(inode);
-
- /* in this transaction we can be doing at max two balancings and update
- * two stat datas, we change quotas of the owner of the directory and of
-@@ -1259,6 +1261,8 @@ static int reiserfs_rename(struct inode
-
- old_inode = old_dentry->d_inode;
- new_dentry_inode = new_dentry->d_inode;
-+ if (new_dentry_inode)
-+ DQUOT_INIT(new_dentry_inode);
-
- // make sure, that oldname still exists and points to an object we
- // are going to rename
-diff -upr linux-2.6.16.orig/fs/reiserfs/xattr.c linux-2.6.16-026test009/fs/reiserfs/xattr.c
---- linux-2.6.16.orig/fs/reiserfs/xattr.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/reiserfs/xattr.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1343,7 +1343,8 @@ static int reiserfs_check_acl(struct ino
- return error;
- }
-
--int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-+int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- /*
- * We don't do permission checks on the internal objects.
-@@ -1356,7 +1357,7 @@ int reiserfs_permission(struct inode *in
- * Stat data v1 doesn't support ACLs.
- */
- if (get_inode_sd_version(inode) == STAT_DATA_V1)
-- return generic_permission(inode, mask, NULL);
-+ return generic_permission(inode, mask, NULL, perm);
- else
-- return generic_permission(inode, mask, reiserfs_check_acl);
-+ return generic_permission(inode, mask, reiserfs_check_acl, perm);
- }
-diff -upr linux-2.6.16.orig/fs/select.c linux-2.6.16-026test009/fs/select.c
---- linux-2.6.16.orig/fs/select.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/select.c 2006-04-19 15:02:11.000000000 +0400
-@@ -24,6 +24,8 @@
- #include <linux/fs.h>
- #include <linux/rcupdate.h>
-
-+#include <ub/ub_mem.h>
-+
- #include <asm/uaccess.h>
-
- #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
-@@ -286,7 +288,7 @@ int do_select(int n, fd_set_bits *fds, s
-
- static void *select_bits_alloc(int size)
- {
-- return kmalloc(6 * size, GFP_KERNEL);
-+ return ub_kmalloc(6 * size, GFP_KERNEL);
- }
-
- static void select_bits_free(void *bits, int size)
-@@ -645,7 +647,7 @@ int do_sys_poll(struct pollfd __user *uf
- err = -ENOMEM;
- while(i!=0) {
- struct poll_list *pp;
-- pp = kmalloc(sizeof(struct poll_list)+
-+ pp = ub_kmalloc(sizeof(struct poll_list)+
- sizeof(struct pollfd)*
- (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),
- GFP_KERNEL);
-diff -upr linux-2.6.16.orig/fs/seq_file.c linux-2.6.16-026test009/fs/seq_file.c
---- linux-2.6.16.orig/fs/seq_file.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/seq_file.c 2006-04-19 15:02:12.000000000 +0400
-@@ -345,6 +345,8 @@ int seq_path(struct seq_file *m,
- if (m->count < m->size) {
- char *s = m->buf + m->count;
- char *p = d_path(dentry, mnt, s, m->size - m->count);
-+ if (IS_ERR(p) && PTR_ERR(p) != -ENAMETOOLONG)
-+ return 0;
- if (!IS_ERR(p)) {
- while (s <= p) {
- char c = *p++;
-diff -upr linux-2.6.16.orig/fs/simfs.c linux-2.6.16-026test009/fs/simfs.c
---- linux-2.6.16.orig/fs/simfs.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/simfs.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,319 @@
-+/*
-+ * fs/simfs.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/fs.h>
-+#include <linux/file.h>
-+#include <linux/init.h>
-+#include <linux/namei.h>
-+#include <linux/err.h>
-+#include <linux/module.h>
-+#include <linux/mount.h>
-+#include <linux/vzquota.h>
-+#include <linux/statfs.h>
-+#include <linux/virtinfo.h>
-+#include <linux/faudit.h>
-+#include <linux/genhd.h>
-+
-+#include <asm/unistd.h>
-+#include <asm/uaccess.h>
-+
-+#define SIMFS_GET_LOWER_FS_SB(sb) sb->s_root->d_sb
-+
-+static struct super_operations sim_super_ops;
-+
-+static int sim_getattr(struct vfsmount *mnt, struct dentry *dentry,
-+ struct kstat *stat)
-+{
-+ struct super_block *sb;
-+ struct inode *inode;
-+
-+ inode = dentry->d_inode;
-+ if (!inode->i_op->getattr) {
-+ generic_fillattr(inode, stat);
-+ if (!stat->blksize) {
-+ unsigned blocks;
-+
-+ sb = inode->i_sb;
-+ blocks = (stat->size + sb->s_blocksize-1) >>
-+ sb->s_blocksize_bits;
-+ stat->blocks = (sb->s_blocksize / 512) * blocks;
-+ stat->blksize = sb->s_blocksize;
-+ }
-+ } else {
-+ int err;
-+
-+ err = inode->i_op->getattr(mnt, dentry, stat);
-+ if (err)
-+ return err;
-+ }
-+
-+ sb = mnt->mnt_sb;
-+ if (sb->s_op == &sim_super_ops)
-+ stat->dev = sb->s_dev;
-+ return 0;
-+}
-+
-+static void quota_get_stat(struct super_block *sb, struct kstatfs *buf)
-+{
-+ int err;
-+ struct dq_stat qstat;
-+ struct virt_info_quota q;
-+ long free_file, adj_file;
-+ s64 blk, free_blk, adj_blk;
-+ int bsize_bits;
-+
-+ q.super = sb;
-+ q.qstat = &qstat;
-+ err = virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_GETSTAT, &q);
-+ if (err != NOTIFY_OK)
-+ return;
-+
-+ bsize_bits = ffs(buf->f_bsize) - 1;
-+ free_blk = (s64)(qstat.bsoftlimit - qstat.bcurrent) >> bsize_bits;
-+ if (free_blk < 0)
-+ free_blk = 0;
-+ /*
-+ * In the regular case, we always set buf->f_bfree and buf->f_blocks to
-+ * the values reported by quota. In case of real disk space shortage,
-+ * we adjust the values. We want this adjustment to look as if the
-+ * total disk space were reduced, not as if the usage were increased.
-+ * -- SAW
-+ */
-+ adj_blk = 0;
-+ if (buf->f_bfree < free_blk)
-+ adj_blk = free_blk - buf->f_bfree;
-+ buf->f_bfree = (long)(free_blk - adj_blk);
-+
-+ if (free_blk < buf->f_bavail)
-+ buf->f_bavail = (long)free_blk; /* min(f_bavail, free_blk) */
-+
-+ blk = (qstat.bsoftlimit >> bsize_bits) - adj_blk;
-+ buf->f_blocks = blk > LONG_MAX ? LONG_MAX : blk;
-+
-+ free_file = qstat.isoftlimit - qstat.icurrent;
-+ if (free_file < 0)
-+ free_file = 0;
-+ if (buf->f_ffree == -1)
-+ /*
-+ * One filesystem uses -1 to represent the fact that it doesn't
-+ * have a detached limit for inode number.
-+ * May be, because -1 is a good pretendent for the maximum value
-+ * of signed long type, may be, because it's just nice to have
-+ * an exceptional case... Guess what that filesystem is :-)
-+ * -- SAW
-+ */
-+ buf->f_ffree = free_file;
-+ adj_file = 0;
-+ if (buf->f_ffree < free_file)
-+ adj_file = free_file - buf->f_ffree;
-+ buf->f_ffree = free_file - adj_file;
-+ buf->f_files = qstat.isoftlimit - adj_file;
-+}
-+
-+static int sim_statfs(struct super_block *sb, struct statfs *buf)
-+{
-+ int err;
-+ struct super_block *lsb;
-+ struct kstatfs statbuf;
-+
-+ err = 0;
-+ if (sb->s_op != &sim_super_ops)
-+ goto out;
-+
-+ lsb = SIMFS_GET_LOWER_FS_SB(sb);
-+
-+ err = -ENOSYS;
-+ if (lsb && lsb->s_op && lsb->s_op->statfs)
-+ err = lsb->s_op->statfs(lsb, &statbuf);
-+ if (err)
-+ goto out;
-+
-+ quota_get_stat(sb, &statbuf);
-+
-+ buf->f_files = statbuf.f_files;
-+ buf->f_ffree = statbuf.f_ffree;
-+ buf->f_blocks = statbuf.f_blocks;
-+ buf->f_bfree = statbuf.f_bfree;
-+ buf->f_bavail = statbuf.f_bavail;
-+out:
-+ return err;
-+}
-+
-+static int sim_statfs64(struct super_block *sb, struct statfs64 *buf)
-+{
-+ int err;
-+ struct super_block *lsb;
-+ struct kstatfs statbuf;
-+
-+ err = 0;
-+ if (sb->s_op != &sim_super_ops)
-+ goto out;
-+
-+ lsb = SIMFS_GET_LOWER_FS_SB(sb);
-+
-+ err = -ENOSYS;
-+ if (lsb && lsb->s_op && lsb->s_op->statfs)
-+ err = lsb->s_op->statfs(lsb, &statbuf);
-+ if (err)
-+ goto out;
-+
-+ quota_get_stat(sb, &statbuf);
-+
-+ buf->f_files = (__u64)statbuf.f_files;
-+ buf->f_ffree = (__u64)statbuf.f_ffree;
-+ buf->f_blocks = (__u64)statbuf.f_blocks;
-+ buf->f_bfree = (__u64)statbuf.f_bfree;
-+ buf->f_bavail = (__u64)statbuf.f_bavail;
-+out:
-+ return err;
-+}
-+
-+static int sim_systemcall(struct vnotifier_block *me, unsigned long n,
-+ void *d, int old_ret)
-+{
-+ int err;
-+ struct faudit_stat_arg *arg;
-+
-+ arg = (struct faudit_stat_arg *)d;
-+ switch (n) {
-+ case VIRTINFO_FAUDIT_STAT:
-+ err = sim_getattr(arg->mnt, arg->dentry,
-+ (struct kstat *)arg->stat);
-+ break;
-+ case VIRTINFO_FAUDIT_STATFS:
-+ err = sim_statfs(arg->mnt->mnt_sb,
-+ (struct statfs *)arg->stat);
-+ break;
-+ case VIRTINFO_FAUDIT_STATFS64:
-+ err = sim_statfs64(arg->mnt->mnt_sb,
-+ (struct statfs64 *)arg->stat);
-+ break;
-+ default:
-+ return old_ret;
-+ }
-+ arg->err = err;
-+ return (err ? NOTIFY_BAD : NOTIFY_OK);
-+}
-+
-+static struct inode *sim_quota_root(struct super_block *sb)
-+{
-+ return sb->s_root->d_inode;
-+}
-+
-+void sim_put_super(struct super_block *sb)
-+{
-+ struct virt_info_quota viq;
-+
-+ viq.super = sb;
-+ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_OFF, &viq);
-+ bdput(sb->s_bdev);
-+}
-+
-+static struct super_operations sim_super_ops = {
-+ .get_quota_root = sim_quota_root,
-+ .put_super = sim_put_super,
-+};
-+
-+static int sim_fill_super(struct super_block *s, void *data)
-+{
-+ int err;
-+ struct nameidata *nd;
-+
-+ err = set_anon_super(s, NULL);
-+ if (err)
-+ goto out;
-+
-+ err = 0;
-+ nd = (struct nameidata *)data;
-+ s->s_root = dget(nd->dentry);
-+ s->s_op = &sim_super_ops;
-+out:
-+ return err;
-+}
-+
-+struct super_block *sim_get_sb(struct file_system_type *type,
-+ int flags, const char *dev_name, void *opt)
-+{
-+ int err;
-+ struct nameidata nd;
-+ struct super_block *sb;
-+ struct block_device *bd;
-+ struct virt_info_quota viq;
-+ static struct hd_struct fake_hds;
-+
-+ sb = ERR_PTR(-EINVAL);
-+ if (opt == NULL)
-+ goto out;
-+
-+ err = path_lookup(opt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
-+ sb = ERR_PTR(err);
-+ if (err)
-+ goto out;
-+
-+ sb = sget(type, NULL, sim_fill_super, &nd);
-+ if (IS_ERR(sb))
-+ goto out_path;
-+
-+ bd = bdget(sb->s_dev);
-+ if (!bd)
-+ goto out_killsb;
-+
-+ sb->s_bdev = bd;
-+ bd->bd_part = &fake_hds;
-+ viq.super = sb;
-+ virtinfo_notifier_call(VITYPE_QUOTA, VIRTINFO_QUOTA_ON, &viq);
-+out_path:
-+ path_release(&nd);
-+out:
-+ return sb;
-+
-+out_killsb:
-+ up_write(&sb->s_umount);
-+ deactivate_super(sb);
-+ sb = ERR_PTR(-ENODEV);
-+ goto out_path;
-+}
-+
-+static struct file_system_type sim_fs_type = {
-+ .owner = THIS_MODULE,
-+ .name = "simfs",
-+ .get_sb = sim_get_sb,
-+ .kill_sb = kill_anon_super,
-+};
-+
-+static struct vnotifier_block sim_syscalls = {
-+ .notifier_call = sim_systemcall,
-+};
-+
-+static int __init init_simfs(void)
-+{
-+ int err;
-+
-+ err = register_filesystem(&sim_fs_type);
-+ if (err)
-+ return err;
-+
-+ virtinfo_notifier_register(VITYPE_FAUDIT, &sim_syscalls);
-+ return 0;
-+}
-+
-+static void __exit exit_simfs(void)
-+{
-+ virtinfo_notifier_unregister(VITYPE_FAUDIT, &sim_syscalls);
-+ unregister_filesystem(&sim_fs_type);
-+}
-+
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Open Virtuozzo Simulation of File System");
-+MODULE_LICENSE("GPL v2");
-+
-+module_init(init_simfs);
-+module_exit(exit_simfs);
-diff -upr linux-2.6.16.orig/fs/smbfs/file.c linux-2.6.16-026test009/fs/smbfs/file.c
---- linux-2.6.16.orig/fs/smbfs/file.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/smbfs/file.c 2006-04-19 15:02:11.000000000 +0400
-@@ -387,7 +387,8 @@ smb_file_release(struct inode *inode, st
- * privileges, so we need our own check for this.
- */
- static int
--smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
-+smb_file_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- int mode = inode->i_mode;
- int error = 0;
-diff -upr linux-2.6.16.orig/fs/smbfs/inode.c linux-2.6.16-026test009/fs/smbfs/inode.c
---- linux-2.6.16.orig/fs/smbfs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/smbfs/inode.c 2006-04-19 15:02:11.000000000 +0400
-@@ -233,7 +233,7 @@ smb_invalidate_inodes(struct smb_sb_info
- {
- VERBOSE("\n");
- shrink_dcache_sb(SB_of(server));
-- invalidate_inodes(SB_of(server));
-+ invalidate_inodes(SB_of(server), 0);
- }
-
- /*
-diff -upr linux-2.6.16.orig/fs/stat.c linux-2.6.16-026test009/fs/stat.c
---- linux-2.6.16.orig/fs/stat.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/stat.c 2006-04-19 15:02:12.000000000 +0400
-@@ -15,6 +15,7 @@
- #include <linux/namei.h>
- #include <linux/security.h>
- #include <linux/syscalls.h>
-+#include <linux/faudit.h>
-
- #include <asm/uaccess.h>
- #include <asm/unistd.h>
-@@ -42,11 +43,19 @@ int vfs_getattr(struct vfsmount *mnt, st
- {
- struct inode *inode = dentry->d_inode;
- int retval;
-+ struct faudit_stat_arg arg;
-
- retval = security_inode_getattr(mnt, dentry);
- if (retval)
- return retval;
-
-+ arg.mnt = mnt;
-+ arg.dentry = dentry;
-+ arg.stat = stat;
-+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_STAT, &arg)
-+ != NOTIFY_DONE)
-+ return arg.err;
-+
- if (inode->i_op->getattr)
- return inode->i_op->getattr(mnt, dentry, stat);
-
-diff -upr linux-2.6.16.orig/fs/super.c linux-2.6.16-026test009/fs/super.c
---- linux-2.6.16.orig/fs/super.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/super.c 2006-04-19 15:02:12.000000000 +0400
-@@ -23,6 +23,7 @@
- #include <linux/config.h>
- #include <linux/module.h>
- #include <linux/slab.h>
-+#include <linux/ve_owner.h>
- #include <linux/init.h>
- #include <linux/smp_lock.h>
- #include <linux/acct.h>
-@@ -69,6 +70,7 @@ static struct super_block *alloc_super(v
- INIT_LIST_HEAD(&s->s_io);
- INIT_LIST_HEAD(&s->s_files);
- INIT_LIST_HEAD(&s->s_instances);
-+ INIT_LIST_HEAD(&s->s_dshrinkers);
- INIT_HLIST_HEAD(&s->s_anon);
- INIT_LIST_HEAD(&s->s_inodes);
- init_rwsem(&s->s_umount);
-@@ -231,13 +233,14 @@ void generic_shutdown_super(struct super
- if (root) {
- sb->s_root = NULL;
- shrink_dcache_parent(root);
-- shrink_dcache_anon(&sb->s_anon);
-+ shrink_dcache_anon(sb);
- dput(root);
-+ dcache_shrinker_wait_sb(sb);
- fsync_super(sb);
- lock_super(sb);
- sb->s_flags &= ~MS_ACTIVE;
- /* bad name - it should be evict_inodes() */
-- invalidate_inodes(sb);
-+ invalidate_inodes(sb, 0);
- lock_kernel();
-
- if (sop->write_super && sb->s_dirt)
-@@ -246,7 +249,7 @@ void generic_shutdown_super(struct super
- sop->put_super(sb);
-
- /* Forget any remaining inodes */
-- if (invalidate_inodes(sb)) {
-+ if (invalidate_inodes(sb, 1)) {
- printk("VFS: Busy inodes after unmount of %s. "
- "Self-destruct in 5 seconds. Have a nice day...\n",
- sb->s_id);
-@@ -481,11 +484,20 @@ asmlinkage long sys_ustat(unsigned dev,
- struct super_block *s;
- struct ustat tmp;
- struct kstatfs sbuf;
-- int err = -EINVAL;
-+ dev_t kdev;
-+ int err;
-+
-+ kdev = new_decode_dev(dev);
-+#ifdef CONFIG_VE
-+ err = get_device_perms_ve(S_IFBLK, kdev, FMODE_READ);
-+ if (err)
-+ goto out;
-+#endif
-
-- s = user_get_super(new_decode_dev(dev));
-- if (s == NULL)
-- goto out;
-+ err = -EINVAL;
-+ s = user_get_super(kdev);
-+ if (s == NULL)
-+ goto out;
- err = vfs_statfs(s, &sbuf);
- drop_super(s);
- if (err)
-@@ -599,6 +611,13 @@ void emergency_remount(void)
- static struct idr unnamed_dev_idr;
- static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
-
-+/* for compatibility with coreutils still unaware of new minor sizes */
-+int unnamed_dev_majors[] = {
-+ 0, 144, 145, 146, 242, 243, 244, 245,
-+ 246, 247, 248, 249, 250, 251, 252, 253
-+};
-+EXPORT_SYMBOL(unnamed_dev_majors);
-+
- int set_anon_super(struct super_block *s, void *data)
- {
- int dev;
-@@ -616,13 +635,13 @@ int set_anon_super(struct super_block *s
- else if (error)
- return -EAGAIN;
-
-- if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
-+ if ((dev & MAX_ID_MASK) >= (1 << MINORBITS)) {
- spin_lock(&unnamed_dev_lock);
- idr_remove(&unnamed_dev_idr, dev);
- spin_unlock(&unnamed_dev_lock);
- return -EMFILE;
- }
-- s->s_dev = MKDEV(0, dev & MINORMASK);
-+ s->s_dev = make_unnamed_dev(dev);
- return 0;
- }
-
-@@ -630,8 +649,9 @@ EXPORT_SYMBOL(set_anon_super);
-
- void kill_anon_super(struct super_block *sb)
- {
-- int slot = MINOR(sb->s_dev);
-+ int slot;
-
-+ slot = unnamed_dev_idx(sb->s_dev);
- generic_shutdown_super(sb);
- spin_lock(&unnamed_dev_lock);
- idr_remove(&unnamed_dev_idr, slot);
-diff -upr linux-2.6.16.orig/fs/sysfs/bin.c linux-2.6.16-026test009/fs/sysfs/bin.c
---- linux-2.6.16.orig/fs/sysfs/bin.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/bin.c 2006-04-19 15:02:12.000000000 +0400
-@@ -120,6 +120,9 @@ static int open(struct inode * inode, st
- struct bin_attribute * attr = to_bin_attr(file->f_dentry);
- int error = -EINVAL;
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- if (!kobj || !attr)
- goto Done;
-
-@@ -196,6 +199,9 @@ int sysfs_create_bin_file(struct kobject
-
- int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
- {
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
- return 0;
- }
-diff -upr linux-2.6.16.orig/fs/sysfs/dir.c linux-2.6.16-026test009/fs/sysfs/dir.c
---- linux-2.6.16.orig/fs/sysfs/dir.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/dir.c 2006-04-19 15:02:12.000000000 +0400
-@@ -144,6 +144,9 @@ int sysfs_create_dir(struct kobject * ko
- struct dentry * parent;
- int error = 0;
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- BUG_ON(!kobj);
-
- if (kobj->parent)
-@@ -278,10 +281,14 @@ void sysfs_remove_subdir(struct dentry *
-
- void sysfs_remove_dir(struct kobject * kobj)
- {
-- struct dentry * dentry = dget(kobj->dentry);
-+ struct dentry * dentry;
- struct sysfs_dirent * parent_sd;
- struct sysfs_dirent * sd, * tmp;
-
-+ if (!ve_sysfs_alowed())
-+ return;
-+
-+ dentry = dget(kobj->dentry);
- if (!dentry)
- return;
-
-@@ -302,6 +309,7 @@ void sysfs_remove_dir(struct kobject * k
- * Drop reference from dget() on entrance.
- */
- dput(dentry);
-+ kobj->dentry = NULL;
- }
-
- int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
-@@ -309,6 +317,9 @@ int sysfs_rename_dir(struct kobject * ko
- int error = 0;
- struct dentry * new_dentry, * parent;
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- if (!strcmp(kobject_name(kobj), new_name))
- return -EINVAL;
-
-diff -upr linux-2.6.16.orig/fs/sysfs/file.c linux-2.6.16-026test009/fs/sysfs/file.c
---- linux-2.6.16.orig/fs/sysfs/file.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/file.c 2006-04-19 15:02:12.000000000 +0400
-@@ -183,7 +183,7 @@ fill_write_buffer(struct sysfs_buffer *
- return -ENOMEM;
-
- if (count >= PAGE_SIZE)
-- count = PAGE_SIZE;
-+ count = PAGE_SIZE - 1;
- error = copy_from_user(buffer->page,buf,count);
- buffer->needs_read_fill = 1;
- return error ? -EFAULT : count;
-@@ -380,6 +380,9 @@ int sysfs_add_file(struct dentry * dir,
-
- int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
- {
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- BUG_ON(!kobj || !kobj->dentry || !attr);
-
- return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
-@@ -398,6 +401,9 @@ int sysfs_update_file(struct kobject * k
- struct dentry * victim;
- int res = -ENOENT;
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- mutex_lock(&dir->d_inode->i_mutex);
- victim = lookup_one_len(attr->name, dir, strlen(attr->name));
- if (!IS_ERR(victim)) {
-@@ -473,6 +479,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
-
- void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
- {
-+ if (!ve_sysfs_alowed())
-+ return;
-+
- sysfs_hash_and_remove(kobj->dentry,attr->name);
- }
-
-diff -upr linux-2.6.16.orig/fs/sysfs/group.c linux-2.6.16-026test009/fs/sysfs/group.c
---- linux-2.6.16.orig/fs/sysfs/group.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/group.c 2006-04-19 15:02:12.000000000 +0400
-@@ -46,6 +46,9 @@ int sysfs_create_group(struct kobject *
- struct dentry * dir;
- int error;
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- BUG_ON(!kobj || !kobj->dentry);
-
- if (grp->name) {
-@@ -68,6 +71,9 @@ void sysfs_remove_group(struct kobject *
- {
- struct dentry * dir;
-
-+ if (!ve_sysfs_alowed())
-+ return;
-+
- if (grp->name)
- dir = lookup_one_len(grp->name, kobj->dentry,
- strlen(grp->name));
-diff -upr linux-2.6.16.orig/fs/sysfs/inode.c linux-2.6.16-026test009/fs/sysfs/inode.c
---- linux-2.6.16.orig/fs/sysfs/inode.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/inode.c 2006-04-19 15:02:12.000000000 +0400
-@@ -8,14 +8,13 @@
-
- #undef DEBUG
-
-+#include <linux/config.h>
- #include <linux/pagemap.h>
- #include <linux/namei.h>
- #include <linux/backing-dev.h>
- #include <linux/capability.h>
- #include "sysfs.h"
-
--extern struct super_block * sysfs_sb;
--
- static struct address_space_operations sysfs_aops = {
- .readpage = simple_readpage,
- .prepare_write = simple_prepare_write,
-@@ -227,12 +226,16 @@ void sysfs_drop_dentry(struct sysfs_dire
- void sysfs_hash_and_remove(struct dentry * dir, const char * name)
- {
- struct sysfs_dirent * sd;
-- struct sysfs_dirent * parent_sd = dir->d_fsdata;
-+ struct sysfs_dirent * parent_sd;
-+
-+ if (!dir)
-+ return;
-
- if (dir->d_inode == NULL)
- /* no inode means this hasn't been made visible yet */
- return;
-
-+ parent_sd = dir->d_fsdata;
- mutex_lock(&dir->d_inode->i_mutex);
- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
- if (!sd->s_element)
-diff -upr linux-2.6.16.orig/fs/sysfs/mount.c linux-2.6.16-026test009/fs/sysfs/mount.c
---- linux-2.6.16.orig/fs/sysfs/mount.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/mount.c 2006-04-19 15:02:12.000000000 +0400
-@@ -7,6 +7,7 @@
- #include <linux/fs.h>
- #include <linux/mount.h>
- #include <linux/pagemap.h>
-+#include <linux/module.h>
- #include <linux/init.h>
-
- #include "sysfs.h"
-@@ -14,8 +15,11 @@
- /* Random magic number */
- #define SYSFS_MAGIC 0x62656572
-
-+#ifndef CONFIG_VE
- struct vfsmount *sysfs_mount;
- struct super_block * sysfs_sb = NULL;
-+#endif
-+
- kmem_cache_t *sysfs_dir_cachep;
-
- static struct super_operations sysfs_ops = {
-@@ -31,6 +35,15 @@ static struct sysfs_dirent sysfs_root =
- .s_iattr = NULL,
- };
-
-+#ifdef CONFIG_VE
-+static void init_ve0_sysfs_root(void)
-+{
-+ get_ve0()->sysfs_root = &sysfs_root;
-+}
-+
-+#define sysfs_root (*(get_exec_env()->sysfs_root))
-+#endif
-+
- static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
- {
- struct inode *inode;
-@@ -72,16 +85,21 @@ static struct super_block *sysfs_get_sb(
- return get_sb_single(fs_type, flags, data, sysfs_fill_super);
- }
-
--static struct file_system_type sysfs_fs_type = {
-+struct file_system_type sysfs_fs_type = {
- .name = "sysfs",
- .get_sb = sysfs_get_sb,
- .kill_sb = kill_litter_super,
- };
-
-+EXPORT_SYMBOL(sysfs_fs_type);
-+
- int __init sysfs_init(void)
- {
- int err = -ENOMEM;
-
-+#ifdef CONFIG_VE
-+ init_ve0_sysfs_root();
-+#endif
- sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
- sizeof(struct sysfs_dirent),
- 0, 0, NULL, NULL);
-diff -upr linux-2.6.16.orig/fs/sysfs/symlink.c linux-2.6.16-026test009/fs/sysfs/symlink.c
---- linux-2.6.16.orig/fs/sysfs/symlink.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/symlink.c 2006-04-19 15:02:12.000000000 +0400
-@@ -66,6 +66,7 @@ static int sysfs_add_link(struct dentry
- if (!error)
- return 0;
-
-+ kobject_put(target);
- kfree(sl->link_name);
- exit2:
- kfree(sl);
-@@ -86,6 +87,9 @@ int sysfs_create_link(struct kobject * k
-
- BUG_ON(!kobj || !kobj->dentry || !name);
-
-+ if (!ve_sysfs_alowed())
-+ return 0;
-+
- mutex_lock(&dentry->d_inode->i_mutex);
- error = sysfs_add_link(dentry, name, target);
- mutex_unlock(&dentry->d_inode->i_mutex);
-@@ -101,6 +105,9 @@ int sysfs_create_link(struct kobject * k
-
- void sysfs_remove_link(struct kobject * kobj, const char * name)
- {
-+ if(!ve_sysfs_alowed())
-+ return;
-+
- sysfs_hash_and_remove(kobj->dentry,name);
- }
-
-diff -upr linux-2.6.16.orig/fs/sysfs/sysfs.h linux-2.6.16-026test009/fs/sysfs/sysfs.h
---- linux-2.6.16.orig/fs/sysfs/sysfs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/sysfs/sysfs.h 2006-04-19 15:02:12.000000000 +0400
-@@ -1,5 +1,14 @@
-
--extern struct vfsmount * sysfs_mount;
-+#ifndef CONFIG_VE
-+extern struct vfsmount *sysfs_mount;
-+extern struct super_block *sysfs_sb;
-+#define ve_sysfs_alowed() (1)
-+#else
-+#define sysfs_mount (get_exec_env()->sysfs_mnt)
-+#define sysfs_sb (get_exec_env()->sysfs_sb)
-+#define ve_sysfs_alowed() (sysfs_sb != NULL)
-+#endif
-+
- extern kmem_cache_t *sysfs_dir_cachep;
-
- extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
-@@ -19,7 +28,6 @@ extern void sysfs_drop_dentry(struct sys
- extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-
- extern struct rw_semaphore sysfs_rename_sem;
--extern struct super_block * sysfs_sb;
- extern struct file_operations sysfs_dir_operations;
- extern struct file_operations sysfs_file_operations;
- extern struct file_operations bin_fops;
-diff -upr linux-2.6.16.orig/fs/vzdq_file.c linux-2.6.16-026test009/fs/vzdq_file.c
---- linux-2.6.16.orig/fs/vzdq_file.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdq_file.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,852 @@
-+/*
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains Virtuozzo quota files as proc entry implementation.
-+ * It is required for std quota tools to work correctly as they are expecting
-+ * aquota.user and aquota.group files.
-+ */
-+
-+#include <linux/ctype.h>
-+#include <linux/slab.h>
-+#include <linux/list.h>
-+#include <linux/module.h>
-+#include <linux/proc_fs.h>
-+#include <linux/sysctl.h>
-+#include <linux/mount.h>
-+#include <linux/namespace.h>
-+#include <linux/quotaio_v2.h>
-+#include <asm/uaccess.h>
-+
-+#include <linux/ve.h>
-+#include <linux/ve_proto.h>
-+#include <linux/vzdq_tree.h>
-+#include <linux/vzquota.h>
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * File read operation
-+ *
-+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
-+ * perhaps) abuse vz_quota_sem.
-+ * Taking a global semaphore for lengthy and user-controlled operations inside
-+ * VPSs is not a good idea in general.
-+ * In this case, the reasons for taking this semaphore are completely unclear,
-+ * especially taking into account that the only function that has comments
-+ * about the necessity to be called under this semaphore
-+ * (create_proc_quotafile) is actually called OUTSIDE it.
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+#define DQBLOCK_SIZE 1024
-+#define DQUOTBLKNUM 21U
-+#define DQTREE_DEPTH 4
-+#define TREENUM_2_BLKNUM(num) (((num) + 1) << 1)
-+#define ISINDBLOCK(num) ((num)%2 != 0)
-+#define FIRST_DATABLK 2 /* first even number */
-+#define LAST_IND_LEVEL (DQTREE_DEPTH - 1)
-+#define CONVERT_LEVEL(level) ((level) * (QUOTAID_EBITS/QUOTAID_BBITS))
-+#define GETLEVINDX(ind, lev) (((ind) >> QUOTAID_BBITS*(lev)) \
-+ & QUOTATREE_BMASK)
-+
-+#if (QUOTAID_EBITS / QUOTAID_BBITS) != (QUOTATREE_DEPTH / DQTREE_DEPTH)
-+#error xBITS and DQTREE_DEPTH does not correspond
-+#endif
-+
-+#define BLOCK_NOT_FOUND 1
-+
-+/* data for quota file -- one per proc entry */
-+struct quotatree_data {
-+ struct list_head list;
-+ struct vz_quota_master *qmblk;
-+ int type; /* type of the tree */
-+};
-+
-+/* serialized by vz_quota_sem */
-+static LIST_HEAD(qf_data_head);
-+
-+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
-+static const u_int32_t vzquota_versions[] = V2_INITQVERSIONS;
-+
-+static inline loff_t get_depoff(int depth)
-+{
-+ loff_t res = 1;
-+ while (depth) {
-+ res += (1 << ((depth - 1)*QUOTAID_EBITS + 1));
-+ depth--;
-+ }
-+ return res;
-+}
-+
-+static inline loff_t get_blknum(loff_t num, int depth)
-+{
-+ loff_t res;
-+ res = (num << 1) + get_depoff(depth);
-+ return res;
-+}
-+
-+static int get_depth(loff_t num)
-+{
-+ int i;
-+ for (i = 0; i < DQTREE_DEPTH; i++) {
-+ if (num >= get_depoff(i) && (i == DQTREE_DEPTH - 1
-+ || num < get_depoff(i + 1)))
-+ return i;
-+ }
-+ return -1;
-+}
-+
-+static inline loff_t get_offset(loff_t num)
-+{
-+ loff_t res, tmp;
-+
-+ tmp = get_depth(num);
-+ if (tmp < 0)
-+ return -1;
-+ num -= get_depoff(tmp);
-+ BUG_ON(num < 0);
-+ res = num >> 1;
-+
-+ return res;
-+}
-+
-+static inline loff_t get_quot_blk_num(struct quotatree_tree *tree, int level)
-+{
-+ /* return maximum available block num */
-+ return tree->levels[level].freenum;
-+}
-+
-+static inline loff_t get_block_num(struct quotatree_tree *tree)
-+{
-+ loff_t ind_blk_num, quot_blk_num, max_ind, max_quot;
-+
-+ quot_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH) - 1);
-+ max_quot = TREENUM_2_BLKNUM(quot_blk_num);
-+ ind_blk_num = get_quot_blk_num(tree, CONVERT_LEVEL(DQTREE_DEPTH - 1));
-+ max_ind = (quot_blk_num) ? get_blknum(ind_blk_num, LAST_IND_LEVEL)
-+ : get_blknum(ind_blk_num, 0);
-+
-+ return (max_ind > max_quot) ? max_ind + 1 : max_quot + 1;
-+}
-+
-+/* Write quota file header */
-+static int read_header(void *buf, struct quotatree_tree *tree,
-+ struct dq_info *dq_ugid_info, int type)
-+{
-+ struct v2_disk_dqheader *dqh;
-+ struct v2_disk_dqinfo *dq_disk_info;
-+
-+ dqh = buf;
-+ dq_disk_info = buf + sizeof(struct v2_disk_dqheader);
-+
-+ dqh->dqh_magic = vzquota_magics[type];
-+ dqh->dqh_version = vzquota_versions[type];
-+
-+ dq_disk_info->dqi_bgrace = dq_ugid_info[type].bexpire;
-+ dq_disk_info->dqi_igrace = dq_ugid_info[type].iexpire;
-+ dq_disk_info->dqi_flags = 0; /* no flags */
-+ dq_disk_info->dqi_blocks = get_block_num(tree);
-+ dq_disk_info->dqi_free_blk = 0; /* first block in the file */
-+ dq_disk_info->dqi_free_entry = FIRST_DATABLK;
-+
-+ return 0;
-+}
-+
-+static int get_block_child(int depth, struct quotatree_node *p, u_int32_t *buf)
-+{
-+ int i, j, lev_num;
-+
-+ lev_num = QUOTATREE_DEPTH/DQTREE_DEPTH - 1;
-+ for (i = 0; i < BLOCK_SIZE/sizeof(u_int32_t); i++) {
-+ struct quotatree_node *next, *parent;
-+
-+ parent = p;
-+ next = p;
-+ for (j = lev_num; j >= 0; j--) {
-+ if (!next->blocks[GETLEVINDX(i,j)]) {
-+ buf[i] = 0;
-+ goto bad_branch;
-+ }
-+ parent = next;
-+ next = next->blocks[GETLEVINDX(i,j)];
-+ }
-+ buf[i] = (depth == DQTREE_DEPTH - 1) ?
-+ TREENUM_2_BLKNUM(parent->num)
-+ : get_blknum(next->num, depth + 1);
-+
-+ bad_branch:
-+ ;
-+ }
-+
-+ return 0;
-+}
-+
-+/*
-+ * Write index block to disk (or buffer)
-+ * @buf has length 256*sizeof(u_int32_t) bytes
-+ */
-+static int read_index_block(int num, u_int32_t *buf,
-+ struct quotatree_tree *tree)
-+{
-+ struct quotatree_node *p;
-+ u_int32_t index;
-+ loff_t off;
-+ int depth, res;
-+
-+ res = BLOCK_NOT_FOUND;
-+ index = 0;
-+ depth = get_depth(num);
-+ off = get_offset(num);
-+ if (depth < 0 || off < 0)
-+ return -EINVAL;
-+
-+ list_for_each_entry(p, &tree->levels[CONVERT_LEVEL(depth)].usedlh,
-+ list) {
-+ if (p->num >= off)
-+ res = 0;
-+ if (p->num != off)
-+ continue;
-+ get_block_child(depth, p, buf);
-+ break;
-+ }
-+
-+ return res;
-+}
-+
-+static inline void convert_quot_format(struct v2_disk_dqblk *dq,
-+ struct vz_quota_ugid *vzq)
-+{
-+ dq->dqb_id = vzq->qugid_id;
-+ dq->dqb_ihardlimit = vzq->qugid_stat.ihardlimit;
-+ dq->dqb_isoftlimit = vzq->qugid_stat.isoftlimit;
-+ dq->dqb_curinodes = vzq->qugid_stat.icurrent;
-+ dq->dqb_bhardlimit = vzq->qugid_stat.bhardlimit / QUOTABLOCK_SIZE;
-+ dq->dqb_bsoftlimit = vzq->qugid_stat.bsoftlimit / QUOTABLOCK_SIZE;
-+ dq->dqb_curspace = vzq->qugid_stat.bcurrent;
-+ dq->dqb_btime = vzq->qugid_stat.btime;
-+ dq->dqb_itime = vzq->qugid_stat.itime;
-+}
-+
-+static int read_dquot(loff_t num, void *buf, struct quotatree_tree *tree)
-+{
-+ int res, i, entries = 0;
-+ struct v2_disk_dqdbheader *dq_header;
-+ struct quotatree_node *p;
-+ struct v2_disk_dqblk *blk = buf + sizeof(struct v2_disk_dqdbheader);
-+
-+ res = BLOCK_NOT_FOUND;
-+ dq_header = buf;
-+ memset(dq_header, 0, sizeof(*dq_header));
-+
-+ list_for_each_entry(p, &(tree->levels[QUOTATREE_DEPTH - 1].usedlh),
-+ list) {
-+ if (TREENUM_2_BLKNUM(p->num) >= num)
-+ res = 0;
-+ if (TREENUM_2_BLKNUM(p->num) != num)
-+ continue;
-+
-+ for (i = 0; i < QUOTATREE_BSIZE; i++) {
-+ if (!p->blocks[i])
-+ continue;
-+ convert_quot_format(blk + entries,
-+ (struct vz_quota_ugid *)p->blocks[i]);
-+ entries++;
-+ res = 0;
-+ }
-+ break;
-+ }
-+ dq_header->dqdh_entries = entries;
-+
-+ return res;
-+}
-+
-+static int read_block(int num, void *buf, struct quotatree_tree *tree,
-+ struct dq_info *dq_ugid_info, int magic)
-+{
-+ int res;
-+
-+ memset(buf, 0, DQBLOCK_SIZE);
-+ if (!num)
-+ res = read_header(buf, tree, dq_ugid_info, magic);
-+ else if (ISINDBLOCK(num))
-+ res = read_index_block(num, (u_int32_t*)buf, tree);
-+ else
-+ res = read_dquot(num, buf, tree);
-+
-+ return res;
-+}
-+
-+/*
-+ * FIXME: this function can handle quota files up to 2GB only.
-+ */
-+static int read_proc_quotafile(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ off_t blk_num, blk_off, buf_off;
-+ char *tmp;
-+ size_t buf_size;
-+ struct quotatree_data *qtd;
-+ struct quotatree_tree *tree;
-+ struct dq_info *dqi;
-+ int res;
-+
-+ qtd = data;
-+ down(&vz_quota_sem);
-+ down(&qtd->qmblk->dq_sem);
-+
-+ res = 0;
-+ tree = QUGID_TREE(qtd->qmblk, qtd->type);
-+ if (!tree) {
-+ *eof = 1;
-+ goto out_dq;
-+ }
-+
-+ res = -ENOMEM;
-+ tmp = kmalloc(DQBLOCK_SIZE, GFP_KERNEL);
-+ if (!tmp)
-+ goto out_dq;
-+
-+ dqi = &qtd->qmblk->dq_ugid_info[qtd->type];
-+
-+ buf_off = 0;
-+ buf_size = count;
-+ blk_num = off / DQBLOCK_SIZE;
-+ blk_off = off % DQBLOCK_SIZE;
-+
-+ while (buf_size > 0) {
-+ off_t len;
-+
-+ len = min((size_t)(DQBLOCK_SIZE-blk_off), buf_size);
-+ res = read_block(blk_num, tmp, tree, dqi, qtd->type);
-+ if (res < 0)
-+ goto out_err;
-+ if (res == BLOCK_NOT_FOUND) {
-+ *eof = 1;
-+ break;
-+ }
-+ memcpy(page + buf_off, tmp + blk_off, len);
-+
-+ blk_num++;
-+ buf_size -= len;
-+ blk_off = 0;
-+ buf_off += len;
-+ }
-+ res = buf_off;
-+
-+out_err:
-+ kfree(tmp);
-+ *start = NULL + count;
-+out_dq:
-+ up(&qtd->qmblk->dq_sem);
-+ up(&vz_quota_sem);
-+
-+ return res;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * /proc/vz/vzaquota/QID/aquota.* files
-+ *
-+ * FIXME: this code lacks serialization of read/readdir/lseek.
-+ * However, this problem should be fixed after the mainstream issue of what
-+ * appears to be non-atomic read and update of file position in sys_read.
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+static inline unsigned long vzdq_aquot_getino(dev_t dev)
-+{
-+ return 0xec000000UL + dev;
-+}
-+
-+static inline dev_t vzdq_aquot_getidev(struct inode *inode)
-+{
-+ return (dev_t)(unsigned long)PROC_I(inode)->op.proc_get_link;
-+}
-+
-+static inline void vzdq_aquot_setidev(struct inode *inode, dev_t dev)
-+{
-+ PROC_I(inode)->op.proc_get_link = (void *)(unsigned long)dev;
-+}
-+
-+static ssize_t vzdq_aquotf_read(struct file *file,
-+ char __user *buf, size_t size, loff_t *ppos)
-+{
-+ char *page;
-+ size_t bufsize;
-+ ssize_t l, l2, copied;
-+ char *start;
-+ struct inode *inode;
-+ struct block_device *bdev;
-+ struct super_block *sb;
-+ struct quotatree_data data;
-+ int eof, err;
-+
-+ err = -ENOMEM;
-+ page = (char *)__get_free_page(GFP_KERNEL);
-+ if (page == NULL)
-+ goto out_err;
-+
-+ err = -ENODEV;
-+ inode = file->f_dentry->d_inode;
-+ bdev = bdget(vzdq_aquot_getidev(inode));
-+ if (bdev == NULL)
-+ goto out_err;
-+ sb = get_super(bdev);
-+ bdput(bdev);
-+ if (sb == NULL)
-+ goto out_err;
-+ data.qmblk = vzquota_find_qmblk(sb);
-+ data.type = PROC_I(inode)->type - 1;
-+ drop_super(sb);
-+ if (data.qmblk == NULL || data.qmblk == VZ_QUOTA_BAD)
-+ goto out_err;
-+
-+ copied = 0;
-+ l = l2 = 0;
-+ while (1) {
-+ bufsize = min(size, (size_t)PAGE_SIZE);
-+ if (bufsize <= 0)
-+ break;
-+
-+ l = read_proc_quotafile(page, &start, *ppos, bufsize,
-+ &eof, &data);
-+ if (l <= 0)
-+ break;
-+
-+ l2 = copy_to_user(buf, page, l);
-+ copied += l - l2;
-+ if (l2)
-+ break;
-+
-+ buf += l;
-+ size -= l;
-+ *ppos += (unsigned long)start;
-+ l = l2 = 0;
-+ }
-+
-+ qmblk_put(data.qmblk);
-+ free_page((unsigned long)page);
-+ if (copied)
-+ return copied;
-+ else if (l2) /* last copy_to_user failed */
-+ return -EFAULT;
-+ else /* read error or EOF */
-+ return l;
-+
-+out_err:
-+ if (page != NULL)
-+ free_page((unsigned long)page);
-+ return err;
-+}
-+
-+static struct file_operations vzdq_aquotf_file_operations = {
-+ .read = &vzdq_aquotf_read,
-+};
-+
-+static struct inode_operations vzdq_aquotf_inode_operations = {
-+};
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * /proc/vz/vzaquota/QID directory
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+static int vzdq_aquotq_readdir(struct file *file, void *data, filldir_t filler)
-+{
-+ loff_t n;
-+ int err;
-+
-+ n = file->f_pos;
-+ for (err = 0; !err; n++) {
-+ switch (n) {
-+ case 0:
-+ err = (*filler)(data, ".", 1, n,
-+ file->f_dentry->d_inode->i_ino,
-+ DT_DIR);
-+ break;
-+ case 1:
-+ err = (*filler)(data, "..", 2, n,
-+ parent_ino(file->f_dentry), DT_DIR);
-+ break;
-+ case 2:
-+ err = (*filler)(data, "aquota.user", 11, n,
-+ file->f_dentry->d_inode->i_ino
-+ + USRQUOTA + 1,
-+ DT_REG);
-+ break;
-+ case 3:
-+ err = (*filler)(data, "aquota.group", 12, n,
-+ file->f_dentry->d_inode->i_ino
-+ + GRPQUOTA + 1,
-+ DT_REG);
-+ break;
-+ default:
-+ goto out;
-+ }
-+ }
-+out:
-+ file->f_pos = n;
-+ return err;
-+}
-+
-+struct vzdq_aquotq_lookdata {
-+ dev_t dev;
-+ int type;
-+};
-+
-+static int vzdq_aquotq_looktest(struct inode *inode, void *data)
-+{
-+ struct vzdq_aquotq_lookdata *d;
-+
-+ d = data;
-+ return inode->i_op == &vzdq_aquotf_inode_operations &&
-+ vzdq_aquot_getidev(inode) == d->dev &&
-+ PROC_I(inode)->type == d->type + 1;
-+}
-+
-+static int vzdq_aquotq_lookset(struct inode *inode, void *data)
-+{
-+ struct vzdq_aquotq_lookdata *d;
-+
-+ d = data;
-+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-+ inode->i_ino = vzdq_aquot_getino(d->dev) + d->type + 1;
-+ inode->i_mode = S_IFREG | S_IRUSR;
-+ inode->i_uid = 0;
-+ inode->i_gid = 0;
-+ inode->i_nlink = 1;
-+ inode->i_op = &vzdq_aquotf_inode_operations;
-+ inode->i_fop = &vzdq_aquotf_file_operations;
-+ PROC_I(inode)->type = d->type + 1;
-+ vzdq_aquot_setidev(inode, d->dev);
-+ return 0;
-+}
-+
-+static struct dentry *vzdq_aquotq_lookup(struct inode *dir,
-+ struct dentry *dentry,
-+ struct nameidata *nd)
-+{
-+ struct inode *inode;
-+ struct vzdq_aquotq_lookdata d;
-+ int k;
-+
-+ if (dentry->d_name.len == 11) {
-+ if (memcmp(dentry->d_name.name, "aquota.user", 11))
-+ goto out;
-+ k = USRQUOTA;
-+ } else if (dentry->d_name.len == 12) {
-+ if (memcmp(dentry->d_name.name, "aquota.group", 11))
-+ goto out;
-+ k = GRPQUOTA;
-+ } else
-+ goto out;
-+ d.dev = vzdq_aquot_getidev(dir);
-+ d.type = k;
-+ inode = iget5_locked(dir->i_sb, dir->i_ino + k + 1,
-+ vzdq_aquotq_looktest, vzdq_aquotq_lookset, &d);
-+ if (inode == NULL)
-+ goto out;
-+ unlock_new_inode(inode);
-+ d_add(dentry, inode);
-+ return NULL;
-+
-+out:
-+ return ERR_PTR(-ENOENT);
-+}
-+
-+static struct file_operations vzdq_aquotq_file_operations = {
-+ .read = &generic_read_dir,
-+ .readdir = &vzdq_aquotq_readdir,
-+};
-+
-+static struct inode_operations vzdq_aquotq_inode_operations = {
-+ .lookup = &vzdq_aquotq_lookup,
-+};
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * /proc/vz/vzaquota directory
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+struct vzdq_aquot_de {
-+ struct list_head list;
-+ struct vfsmount *mnt;
-+};
-+
-+static int vzdq_aquot_buildmntlist(struct ve_struct *ve,
-+ struct list_head *head)
-+{
-+ struct vfsmount *rmnt, *mnt;
-+ struct vzdq_aquot_de *p;
-+ int err;
-+
-+#ifdef CONFIG_VE
-+ rmnt = mntget(ve->fs_rootmnt);
-+#else
-+ read_lock(&current->fs->lock);
-+ rmnt = mntget(current->fs->rootmnt);
-+ read_unlock(&current->fs->lock);
-+#endif
-+ mnt = rmnt;
-+ spin_lock(&vfsmount_lock);
-+ while (1) {
-+ list_for_each_entry(p, head, list) {
-+ if (p->mnt->mnt_sb == mnt->mnt_sb)
-+ goto skip;
-+ }
-+
-+ err = -ENOMEM;
-+ p = kmalloc(sizeof(*p), GFP_KERNEL);
-+ if (p == NULL)
-+ goto out;
-+ p->mnt = mntget(mnt);
-+ list_add_tail(&p->list, head);
-+
-+skip:
-+ err = 0;
-+ if (list_empty(&mnt->mnt_mounts)) {
-+ while (1) {
-+ if (mnt == rmnt)
-+ goto out;
-+ if (mnt->mnt_child.next !=
-+ &mnt->mnt_parent->mnt_mounts)
-+ break;
-+ mnt = mnt->mnt_parent;
-+ }
-+ mnt = list_entry(mnt->mnt_child.next,
-+ struct vfsmount, mnt_child);
-+ } else
-+ mnt = list_entry(mnt->mnt_mounts.next,
-+ struct vfsmount, mnt_child);
-+ }
-+out:
-+ spin_unlock(&vfsmount_lock);
-+ mntput(rmnt);
-+ return err;
-+}
-+
-+static void vzdq_aquot_releasemntlist(struct ve_struct *ve,
-+ struct list_head *head)
-+{
-+ struct vzdq_aquot_de *p;
-+
-+ while (!list_empty(head)) {
-+ p = list_entry(head->next, typeof(*p), list);
-+ mntput(p->mnt);
-+ list_del(&p->list);
-+ kfree(p);
-+ }
-+}
-+
-+static int vzdq_aquotd_readdir(struct file *file, void *data, filldir_t filler)
-+{
-+ struct ve_struct *ve, *old_ve;
-+ struct list_head mntlist;
-+ struct vzdq_aquot_de *de;
-+ struct super_block *sb;
-+ struct vz_quota_master *qmblk;
-+ loff_t i, n;
-+ char buf[24];
-+ int l, err;
-+
-+ i = 0;
-+ n = file->f_pos;
-+ ve = VE_OWNER_FSTYPE(file->f_dentry->d_sb->s_type);
-+ old_ve = set_exec_env(ve);
-+
-+ INIT_LIST_HEAD(&mntlist);
-+#ifdef CONFIG_VE
-+ /*
-+ * The only reason of disabling readdir for the host system is that
-+ * this readdir can be slow and CPU consuming with large number of VPSs
-+ * (or just mount points).
-+ */
-+ err = ve_is_super(ve);
-+#else
-+ err = 0;
-+#endif
-+ if (!err) {
-+ err = vzdq_aquot_buildmntlist(ve, &mntlist);
-+ if (err)
-+ goto out_err;
-+ }
-+
-+ if (i >= n) {
-+ if ((*filler)(data, ".", 1, i,
-+ file->f_dentry->d_inode->i_ino, DT_DIR))
-+ goto out_fill;
-+ }
-+ i++;
-+
-+ if (i >= n) {
-+ if ((*filler)(data, "..", 2, i,
-+ parent_ino(file->f_dentry), DT_DIR))
-+ goto out_fill;
-+ }
-+ i++;
-+
-+ list_for_each_entry (de, &mntlist, list) {
-+ sb = de->mnt->mnt_sb;
-+#ifdef CONFIG_VE
-+ if (get_device_perms_ve(S_IFBLK, sb->s_dev, FMODE_QUOTACTL))
-+ continue;
-+#endif
-+ qmblk = vzquota_find_qmblk(sb);
-+ if (qmblk == NULL || qmblk == VZ_QUOTA_BAD)
-+ continue;
-+
-+ qmblk_put(qmblk);
-+ i++;
-+ if (i <= n)
-+ continue;
-+
-+ l = sprintf(buf, "%08x", new_encode_dev(sb->s_dev));
-+ if ((*filler)(data, buf, l, i - 1,
-+ vzdq_aquot_getino(sb->s_dev), DT_DIR))
-+ break;
-+ }
-+
-+out_fill:
-+ err = 0;
-+ file->f_pos = i;
-+out_err:
-+ vzdq_aquot_releasemntlist(ve, &mntlist);
-+ (void)set_exec_env(old_ve);
-+ return err;
-+}
-+
-+static int vzdq_aquotd_looktest(struct inode *inode, void *data)
-+{
-+ return inode->i_op == &vzdq_aquotq_inode_operations &&
-+ vzdq_aquot_getidev(inode) == (dev_t)(unsigned long)data;
-+}
-+
-+static int vzdq_aquotd_lookset(struct inode *inode, void *data)
-+{
-+ dev_t dev;
-+
-+ dev = (dev_t)(unsigned long)data;
-+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-+ inode->i_ino = vzdq_aquot_getino(dev);
-+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
-+ inode->i_uid = 0;
-+ inode->i_gid = 0;
-+ inode->i_nlink = 2;
-+ inode->i_op = &vzdq_aquotq_inode_operations;
-+ inode->i_fop = &vzdq_aquotq_file_operations;
-+ vzdq_aquot_setidev(inode, dev);
-+ return 0;
-+}
-+
-+static struct dentry *vzdq_aquotd_lookup(struct inode *dir,
-+ struct dentry *dentry,
-+ struct nameidata *nd)
-+{
-+ struct ve_struct *ve, *old_ve;
-+ const unsigned char *s;
-+ int l;
-+ dev_t dev;
-+ struct inode *inode;
-+
-+ ve = VE_OWNER_FSTYPE(dir->i_sb->s_type);
-+ old_ve = set_exec_env(ve);
-+#ifdef CONFIG_VE
-+ /*
-+ * Lookup is much lighter than readdir, so it can be allowed for the
-+ * host system. But it would be strange to be able to do lookup only
-+ * without readdir...
-+ */
-+ if (ve_is_super(ve))
-+ goto out;
-+#endif
-+
-+ dev = 0;
-+ l = dentry->d_name.len;
-+ if (l <= 0)
-+ goto out;
-+ for (s = dentry->d_name.name; l > 0; s++, l--) {
-+ if (!isxdigit(*s))
-+ goto out;
-+ if (dev & ~(~0UL >> 4))
-+ goto out;
-+ dev <<= 4;
-+ if (isdigit(*s))
-+ dev += *s - '0';
-+ else if (islower(*s))
-+ dev += *s - 'a' + 10;
-+ else
-+ dev += *s - 'A' + 10;
-+ }
-+ dev = new_decode_dev(dev);
-+
-+#ifdef CONFIG_VE
-+ if (get_device_perms_ve(S_IFBLK, dev, FMODE_QUOTACTL))
-+ goto out;
-+#endif
-+
-+ inode = iget5_locked(dir->i_sb, vzdq_aquot_getino(dev),
-+ vzdq_aquotd_looktest, vzdq_aquotd_lookset,
-+ (void *)(unsigned long)dev);
-+ if (inode == NULL)
-+ goto out;
-+ unlock_new_inode(inode);
-+
-+ d_add(dentry, inode);
-+ (void)set_exec_env(old_ve);
-+ return NULL;
-+
-+out:
-+ (void)set_exec_env(old_ve);
-+ return ERR_PTR(-ENOENT);
-+}
-+
-+static struct file_operations vzdq_aquotd_file_operations = {
-+ .read = &generic_read_dir,
-+ .readdir = &vzdq_aquotd_readdir,
-+};
-+
-+static struct inode_operations vzdq_aquotd_inode_operations = {
-+ .lookup = &vzdq_aquotd_lookup,
-+};
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Initialization and deinitialization
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * FIXME: creation of proc entries here is unsafe with respect to module
-+ * unloading.
-+ */
-+void vzaquota_init(void)
-+{
-+ struct proc_dir_entry *de;
-+
-+ de = create_proc_glob_entry("vz/vzaquota",
-+ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
-+ if (de != NULL) {
-+ de->proc_iops = &vzdq_aquotd_inode_operations;
-+ de->proc_fops = &vzdq_aquotd_file_operations;
-+ } else
-+ printk("VZDQ: vz/vzaquota creation failed\n");
-+#if defined(CONFIG_SYSCTL)
-+ de = create_proc_glob_entry("sys/fs/quota",
-+ S_IFDIR | S_IRUSR | S_IXUSR, NULL);
-+ if (de == NULL)
-+ printk("VZDQ: sys/fs/quota creation failed\n");
-+#endif
-+}
-+
-+void vzaquota_fini(void)
-+{
-+}
-diff -upr linux-2.6.16.orig/fs/vzdq_mgmt.c linux-2.6.16-026test009/fs/vzdq_mgmt.c
---- linux-2.6.16.orig/fs/vzdq_mgmt.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdq_mgmt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,735 @@
-+/*
-+ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/kernel.h>
-+#include <linux/string.h>
-+#include <linux/list.h>
-+#include <asm/semaphore.h>
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/dcache.h>
-+#include <linux/mount.h>
-+#include <linux/namei.h>
-+#include <linux/writeback.h>
-+#include <linux/gfp.h>
-+#include <asm/uaccess.h>
-+#include <linux/proc_fs.h>
-+#include <linux/quota.h>
-+#include <linux/vzctl_quota.h>
-+#include <linux/vzquota.h>
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Switching quota on.
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * check limits copied from user
-+ */
-+int vzquota_check_sane_limits(struct dq_stat *qstat)
-+{
-+ int err;
-+
-+ err = -EINVAL;
-+
-+ /* softlimit must be less then hardlimit */
-+ if (qstat->bsoftlimit > qstat->bhardlimit)
-+ goto out;
-+
-+ if (qstat->isoftlimit > qstat->ihardlimit)
-+ goto out;
-+
-+ err = 0;
-+out:
-+ return err;
-+}
-+
-+/*
-+ * check usage values copied from user
-+ */
-+int vzquota_check_sane_values(struct dq_stat *qstat)
-+{
-+ int err;
-+
-+ err = -EINVAL;
-+
-+ /* expiration time must not be set if softlimit was not exceeded */
-+ if (qstat->bcurrent < qstat->bsoftlimit && qstat->btime != (time_t)0)
-+ goto out;
-+
-+ if (qstat->icurrent < qstat->isoftlimit && qstat->itime != (time_t)0)
-+ goto out;
-+
-+ err = vzquota_check_sane_limits(qstat);
-+out:
-+ return err;
-+}
-+
-+/*
-+ * create new quota master block
-+ * this function should:
-+ * - copy limits and usage parameters from user buffer;
-+ * - allock, initialize quota block and insert it to hash;
-+ */
-+static int vzquota_create(unsigned int quota_id, struct vz_quota_stat *u_qstat)
-+{
-+ int err;
-+ struct vz_quota_stat qstat;
-+ struct vz_quota_master *qmblk;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -EFAULT;
-+ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
-+ goto out;
-+
-+ err = -EINVAL;
-+ if (quota_id == 0)
-+ goto out;
-+
-+ if (vzquota_check_sane_values(&qstat.dq_stat))
-+ goto out;
-+ err = 0;
-+ qmblk = vzquota_alloc_master(quota_id, &qstat);
-+
-+ if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
-+ err = PTR_ERR(qmblk);
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+/**
-+ * vzquota_on - turn quota on
-+ *
-+ * This function should:
-+ * - find and get refcnt of directory entry for quota root and corresponding
-+ * mountpoint;
-+ * - find corresponding quota block and mark it with given path;
-+ * - check quota tree;
-+ * - initialize quota for the tree root.
-+ */
-+static int vzquota_on(unsigned int quota_id, const char *quota_root)
-+{
-+ int err;
-+ struct nameidata nd;
-+ struct vz_quota_master *qmblk;
-+ struct super_block *dqsb;
-+
-+ dqsb = NULL;
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EBUSY;
-+ if (qmblk->dq_state != VZDQ_STARTING)
-+ goto out;
-+
-+ err = user_path_walk(quota_root, &nd);
-+ if (err)
-+ goto out;
-+ /* init path must be a directory */
-+ err = -ENOTDIR;
-+ if (!S_ISDIR(nd.dentry->d_inode->i_mode))
-+ goto out_path;
-+
-+ qmblk->dq_root_dentry = nd.dentry;
-+ qmblk->dq_root_mnt = nd.mnt;
-+ qmblk->dq_sb = nd.dentry->d_inode->i_sb;
-+ err = vzquota_get_super(qmblk->dq_sb);
-+ if (err)
-+ goto out_super;
-+
-+ /*
-+ * Serialization with quota initialization and operations is performed
-+ * through generation check: generation is memorized before qmblk is
-+ * found and compared under inode_qmblk_lock with assignment.
-+ *
-+ * Note that the dentry tree is shrunk only for high-level logical
-+ * serialization, purely as a courtesy to the user: to have consistent
-+ * quota statistics, files should be closed etc. on quota on.
-+ */
-+ err = vzquota_on_qmblk(qmblk->dq_sb, qmblk->dq_root_dentry->d_inode,
-+ qmblk);
-+ if (err)
-+ goto out_init;
-+ qmblk->dq_state = VZDQ_WORKING;
-+
-+ up(&vz_quota_sem);
-+ return 0;
-+
-+out_init:
-+ dqsb = qmblk->dq_sb;
-+out_super:
-+ /* clear for qmblk_put/quota_free_master */
-+ qmblk->dq_sb = NULL;
-+ qmblk->dq_root_dentry = NULL;
-+ qmblk->dq_root_mnt = NULL;
-+out_path:
-+ path_release(&nd);
-+out:
-+ if (dqsb)
-+ vzquota_put_super(dqsb);
-+ up(&vz_quota_sem);
-+ return err;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Switching quota off.
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * destroy quota block by ID
-+ */
-+static int vzquota_destroy(unsigned int quota_id)
-+{
-+ int err;
-+ struct vz_quota_master *qmblk;
-+ struct dentry *dentry;
-+ struct vfsmount *mnt;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EBUSY;
-+ if (qmblk->dq_state == VZDQ_WORKING)
-+ goto out; /* quota_off first */
-+
-+ list_del_init(&qmblk->dq_hash);
-+ dentry = qmblk->dq_root_dentry;
-+ qmblk->dq_root_dentry = NULL;
-+ mnt = qmblk->dq_root_mnt;
-+ qmblk->dq_root_mnt = NULL;
-+
-+ if (qmblk->dq_sb)
-+ vzquota_put_super(qmblk->dq_sb);
-+ up(&vz_quota_sem);
-+
-+ qmblk_put(qmblk);
-+ dput(dentry);
-+ mntput(mnt);
-+ return 0;
-+
-+out:
-+ up(&vz_quota_sem);
-+ return err;
-+}
-+
-+/**
-+ * vzquota_off - turn quota off
-+ */
-+
-+static int __vzquota_sync_list(struct list_head *lh,
-+ struct vz_quota_master *qmblk,
-+ enum writeback_sync_modes sync_mode)
-+{
-+ struct writeback_control wbc;
-+ LIST_HEAD(list);
-+ struct vz_quota_ilink *qlnk;
-+ struct inode *inode;
-+ int err;
-+
-+ memset(&wbc, 0, sizeof(wbc));
-+ wbc.sync_mode = sync_mode;
-+
-+ err = 0;
-+ while (!list_empty(lh) && !err) {
-+ if (need_resched()) {
-+ inode_qmblk_unlock(qmblk->dq_sb);
-+ schedule();
-+ inode_qmblk_lock(qmblk->dq_sb);
-+ }
-+
-+ qlnk = list_first_entry(lh, struct vz_quota_ilink, list);
-+ list_move(&qlnk->list, &list);
-+
-+ inode = igrab(QLNK_INODE(qlnk));
-+ if (!inode)
-+ continue;
-+
-+ inode_qmblk_unlock(qmblk->dq_sb);
-+
-+ wbc.nr_to_write = LONG_MAX;
-+ err = sync_inode(inode, &wbc);
-+ iput(inode);
-+
-+ inode_qmblk_lock(qmblk->dq_sb);
-+ }
-+
-+ list_splice(&list, lh);
-+ return err;
-+}
-+
-+static int vzquota_sync_list(struct list_head *lh,
-+ struct vz_quota_master *qmblk)
-+{
-+ int err;
-+
-+ err = __vzquota_sync_list(lh, qmblk, WB_SYNC_NONE);
-+ if (err)
-+ return err;
-+
-+ err = __vzquota_sync_list(lh, qmblk, WB_SYNC_ALL);
-+ if (err)
-+ return err;
-+
-+ return 0;
-+}
-+
-+static int vzquota_sync_inodes(struct vz_quota_master *qmblk)
-+{
-+ int err;
-+ LIST_HEAD(qlnk_list);
-+
-+ list_splice_init(&qmblk->dq_ilink_list, &qlnk_list);
-+ err = vzquota_sync_list(&qlnk_list, qmblk);
-+ if (!err && !list_empty(&qmblk->dq_ilink_list))
-+ err = -EBUSY;
-+ list_splice(&qlnk_list, &qmblk->dq_ilink_list);
-+
-+ return err;
-+}
-+
-+static int vzquota_off(unsigned int quota_id)
-+{
-+ int err;
-+ struct vz_quota_master *qmblk;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EALREADY;
-+ if (qmblk->dq_state != VZDQ_WORKING)
-+ goto out;
-+
-+ inode_qmblk_lock(qmblk->dq_sb); /* protects dq_ilink_list also */
-+ err = vzquota_sync_inodes(qmblk);
-+ if (err)
-+ goto out_unlock;
-+ inode_qmblk_unlock(qmblk->dq_sb);
-+
-+ err = vzquota_off_qmblk(qmblk->dq_sb, qmblk);
-+ if (err)
-+ goto out;
-+
-+ /* vzquota_destroy will free resources */
-+ qmblk->dq_state = VZDQ_STOPING;
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+
-+out_unlock:
-+ inode_qmblk_unlock(qmblk->dq_sb);
-+ goto out;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Other VZQUOTA ioctl's.
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * this function should:
-+ * - set new limits/buffer under quota master block lock
-+ * - if new softlimit less then usage, then set expiration time
-+ * - no need to alloc ugid hash table - we'll do that on demand
-+ */
-+int vzquota_update_limit(struct dq_stat *_qstat,
-+ struct dq_stat *qstat)
-+{
-+ int err;
-+
-+ err = -EINVAL;
-+ if (vzquota_check_sane_limits(qstat))
-+ goto out;
-+
-+ err = 0;
-+
-+ /* limits */
-+ _qstat->bsoftlimit = qstat->bsoftlimit;
-+ _qstat->bhardlimit = qstat->bhardlimit;
-+ /*
-+ * If the soft limit is exceeded, administrator can override the moment
-+ * when the grace period for limit exceeding ends.
-+ * Specifying the moment may be useful if the soft limit is set to be
-+ * lower than the current usage. In the latter case, if the grace
-+ * period end isn't specified, the grace period will start from the
-+ * moment of the first write operation.
-+ * There is a race with the user level. Soft limit may be already
-+ * exceeded before the limit change, and grace period end calculated by
-+ * the kernel will be overriden. User level may check if the limit is
-+ * already exceeded, but check and set calls are not atomic.
-+ * This race isn't dangerous. Under normal cicrumstances, the
-+ * difference between the grace period end calculated by the kernel and
-+ * the user level should be not greater than as the difference between
-+ * the moments of check and set calls, i.e. not bigger than the quota
-+ * timer resolution - 1 sec.
-+ */
-+ if (qstat->btime != (time_t)0 &&
-+ _qstat->bcurrent >= _qstat->bsoftlimit)
-+ _qstat->btime = qstat->btime;
-+
-+ _qstat->isoftlimit = qstat->isoftlimit;
-+ _qstat->ihardlimit = qstat->ihardlimit;
-+ if (qstat->itime != (time_t)0 &&
-+ _qstat->icurrent >= _qstat->isoftlimit)
-+ _qstat->itime = qstat->itime;
-+
-+out:
-+ return err;
-+}
-+
-+/*
-+ * set new quota limits.
-+ * this function should:
-+ * copy new limits from user level
-+ * - find quota block
-+ * - set new limits and flags.
-+ */
-+static int vzquota_setlimit(unsigned int quota_id,
-+ struct vz_quota_stat *u_qstat)
-+{
-+ int err;
-+ struct vz_quota_stat qstat;
-+ struct vz_quota_master *qmblk;
-+
-+ down(&vz_quota_sem); /* for hash list protection */
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EFAULT;
-+ if (copy_from_user(&qstat, u_qstat, sizeof(qstat)))
-+ goto out;
-+
-+ qmblk_data_write_lock(qmblk);
-+ err = vzquota_update_limit(&qmblk->dq_stat, &qstat.dq_stat);
-+ if (err == 0)
-+ qmblk->dq_info = qstat.dq_info;
-+ qmblk_data_write_unlock(qmblk);
-+
-+out:
-+ up(&vz_quota_sem);
-+ return err;
-+}
-+
-+/*
-+ * get quota limits.
-+ * very simple - just return stat buffer to user
-+ */
-+static int vzquota_getstat(unsigned int quota_id,
-+ struct vz_quota_stat *u_qstat)
-+{
-+ int err;
-+ struct vz_quota_stat qstat;
-+ struct vz_quota_master *qmblk;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ qmblk_data_read_lock(qmblk);
-+ /* copy whole buffer under lock */
-+ memcpy(&qstat.dq_stat, &qmblk->dq_stat, sizeof(qstat.dq_stat));
-+ memcpy(&qstat.dq_info, &qmblk->dq_info, sizeof(qstat.dq_info));
-+ qmblk_data_read_unlock(qmblk);
-+
-+ err = copy_to_user(u_qstat, &qstat, sizeof(qstat));
-+ if (err)
-+ err = -EFAULT;
-+
-+out:
-+ up(&vz_quota_sem);
-+ return err;
-+}
-+
-+/*
-+ * This is a system call to turn per-VE disk quota on.
-+ * Note this call is allowed to run ONLY from VE0
-+ */
-+long do_vzquotactl(int cmd, unsigned int quota_id,
-+ struct vz_quota_stat *qstat, const char *ve_root)
-+{
-+ int ret;
-+
-+ ret = -EPERM;
-+ /* access allowed only from root of VE0 */
-+ if (!capable(CAP_SYS_RESOURCE) ||
-+ !capable(CAP_SYS_ADMIN))
-+ goto out;
-+
-+ switch (cmd) {
-+ case VZ_DQ_CREATE:
-+ ret = vzquota_create(quota_id, qstat);
-+ break;
-+ case VZ_DQ_DESTROY:
-+ ret = vzquota_destroy(quota_id);
-+ break;
-+ case VZ_DQ_ON:
-+ ret = vzquota_on(quota_id, ve_root);
-+ break;
-+ case VZ_DQ_OFF:
-+ ret = vzquota_off(quota_id);
-+ break;
-+ case VZ_DQ_SETLIMIT:
-+ ret = vzquota_setlimit(quota_id, qstat);
-+ break;
-+ case VZ_DQ_GETSTAT:
-+ ret = vzquota_getstat(quota_id, qstat);
-+ break;
-+
-+ default:
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+
-+out:
-+ return ret;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Proc filesystem routines
-+ * ---------------------------------------------------------------------*/
-+
-+#if defined(CONFIG_PROC_FS)
-+
-+#define QUOTA_UINT_LEN 15
-+#define QUOTA_TIME_LEN_FMT_UINT "%11u"
-+#define QUOTA_NUM_LEN_FMT_UINT "%15u"
-+#define QUOTA_NUM_LEN_FMT_ULL "%15Lu"
-+#define QUOTA_TIME_LEN_FMT_STR "%11s"
-+#define QUOTA_NUM_LEN_FMT_STR "%15s"
-+#define QUOTA_PROC_MAX_LINE_LEN 2048
-+
-+/*
-+ * prints /proc/ve_dq header line
-+ */
-+static int print_proc_header(char * buffer)
-+{
-+ return sprintf(buffer,
-+ "%-11s"
-+ QUOTA_NUM_LEN_FMT_STR
-+ QUOTA_NUM_LEN_FMT_STR
-+ QUOTA_NUM_LEN_FMT_STR
-+ QUOTA_TIME_LEN_FMT_STR
-+ QUOTA_TIME_LEN_FMT_STR
-+ "\n",
-+ "qid: path",
-+ "usage", "softlimit", "hardlimit", "time", "expire");
-+}
-+
-+/*
-+ * prints proc master record id, dentry path
-+ */
-+static int print_proc_master_id(char * buffer, char * path_buf,
-+ struct vz_quota_master * qp)
-+{
-+ char *path;
-+ int over;
-+
-+ path = NULL;
-+ switch (qp->dq_state) {
-+ case VZDQ_WORKING:
-+ if (!path_buf) {
-+ path = "";
-+ break;
-+ }
-+ path = d_path(qp->dq_root_dentry,
-+ qp->dq_root_mnt, path_buf, PAGE_SIZE);
-+ if (IS_ERR(path)) {
-+ path = "";
-+ break;
-+ }
-+ /* do not print large path, truncate it */
-+ over = strlen(path) -
-+ (QUOTA_PROC_MAX_LINE_LEN - 3 - 3 -
-+ QUOTA_UINT_LEN);
-+ if (over > 0) {
-+ path += over - 3;
-+ path[0] = path[1] = path[3] = '.';
-+ }
-+ break;
-+ case VZDQ_STARTING:
-+ path = "-- started --";
-+ break;
-+ case VZDQ_STOPING:
-+ path = "-- stopped --";
-+ break;
-+ }
-+
-+ return sprintf(buffer, "%u: %s\n", qp->dq_id, path);
-+}
-+
-+/*
-+ * prints struct vz_quota_stat data
-+ */
-+static int print_proc_stat(char * buffer, struct dq_stat *qs,
-+ struct dq_info *qi)
-+{
-+ return sprintf(buffer,
-+ "%11s"
-+ QUOTA_NUM_LEN_FMT_ULL
-+ QUOTA_NUM_LEN_FMT_ULL
-+ QUOTA_NUM_LEN_FMT_ULL
-+ QUOTA_TIME_LEN_FMT_UINT
-+ QUOTA_TIME_LEN_FMT_UINT
-+ "\n"
-+ "%11s"
-+ QUOTA_NUM_LEN_FMT_UINT
-+ QUOTA_NUM_LEN_FMT_UINT
-+ QUOTA_NUM_LEN_FMT_UINT
-+ QUOTA_TIME_LEN_FMT_UINT
-+ QUOTA_TIME_LEN_FMT_UINT
-+ "\n",
-+ "1k-blocks",
-+ qs->bcurrent >> 10,
-+ qs->bsoftlimit >> 10,
-+ qs->bhardlimit >> 10,
-+ (unsigned int)qs->btime,
-+ (unsigned int)qi->bexpire,
-+ "inodes",
-+ qs->icurrent,
-+ qs->isoftlimit,
-+ qs->ihardlimit,
-+ (unsigned int)qs->itime,
-+ (unsigned int)qi->iexpire);
-+}
-+
-+
-+/*
-+ * for /proc filesystem output
-+ */
-+static int vzquota_read_proc(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ int len, i;
-+ off_t printed = 0;
-+ char *p = page;
-+ struct vz_quota_master *qp;
-+ struct vz_quota_ilink *ql2;
-+ struct list_head *listp;
-+ char *path_buf;
-+
-+ path_buf = (char*)__get_free_page(GFP_KERNEL);
-+ if (path_buf == NULL)
-+ return -ENOMEM;
-+
-+ len = print_proc_header(p);
-+ printed += len;
-+ if (off < printed) /* keep header in output */ {
-+ *start = p + off;
-+ p += len;
-+ }
-+
-+ down(&vz_quota_sem);
-+
-+ /* traverse master hash table for all records */
-+ for (i = 0; i < vzquota_hash_size; i++) {
-+ list_for_each(listp, &vzquota_hash_table[i]) {
-+ qp = list_entry(listp,
-+ struct vz_quota_master, dq_hash);
-+
-+ /* Skip other VE's information if not root of VE0 */
-+ if ((!capable(CAP_SYS_ADMIN) ||
-+ !capable(CAP_SYS_RESOURCE))) {
-+ ql2 = INODE_QLNK(current->fs->root->d_inode);
-+ if (ql2 == NULL || qp != ql2->qmblk)
-+ continue;
-+ }
-+ /*
-+ * Now print the next record
-+ */
-+ len = 0;
-+ /* we print quotaid and path only in VE0 */
-+ if (capable(CAP_SYS_ADMIN))
-+ len += print_proc_master_id(p+len,path_buf, qp);
-+ len += print_proc_stat(p+len, &qp->dq_stat,
-+ &qp->dq_info);
-+ printed += len;
-+ /* skip unnecessary lines */
-+ if (printed <= off)
-+ continue;
-+ p += len;
-+ /* provide start offset */
-+ if (*start == NULL)
-+ *start = p + (off - printed);
-+ /* have we printed all requested size? */
-+ if (PAGE_SIZE - (p - page) < QUOTA_PROC_MAX_LINE_LEN ||
-+ (p - *start) >= count)
-+ goto out;
-+ }
-+ }
-+
-+ *eof = 1; /* checked all hash */
-+out:
-+ up(&vz_quota_sem);
-+
-+ len = 0;
-+ if (*start != NULL) {
-+ len = (p - *start);
-+ if (len > count)
-+ len = count;
-+ }
-+
-+ if (path_buf)
-+ free_page((unsigned long) path_buf);
-+
-+ return len;
-+}
-+
-+/*
-+ * Register procfs read callback
-+ */
-+int vzquota_proc_init(void)
-+{
-+ struct proc_dir_entry *de;
-+
-+ de = create_proc_entry("vz/vzquota", S_IFREG|S_IRUSR, NULL);
-+ if (de == NULL) {
-+ /* create "vz" subdirectory, if not exist */
-+ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
-+ if (de == NULL)
-+ goto out_err;
-+ de = create_proc_entry("vzquota", S_IFREG|S_IRUSR, de);
-+ if (de == NULL)
-+ goto out_err;
-+ }
-+ de->read_proc = vzquota_read_proc;
-+ de->data = NULL;
-+ return 0;
-+out_err:
-+ return -EBUSY;
-+}
-+
-+void vzquota_proc_release(void)
-+{
-+ /* Unregister procfs read callback */
-+ remove_proc_entry("vz/vzquota", NULL);
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/fs/vzdq_ops.c linux-2.6.16-026test009/fs/vzdq_ops.c
---- linux-2.6.16.orig/fs/vzdq_ops.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdq_ops.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,565 @@
-+/*
-+ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+#include <asm/semaphore.h>
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/quota.h>
-+#include <linux/vzquota.h>
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Quota superblock operations - helper functions.
-+ * --------------------------------------------------------------------- */
-+
-+static inline void vzquota_incr_inodes(struct dq_stat *dqstat,
-+ unsigned long number)
-+{
-+ dqstat->icurrent += number;
-+}
-+
-+static inline void vzquota_incr_space(struct dq_stat *dqstat,
-+ __u64 number)
-+{
-+ dqstat->bcurrent += number;
-+}
-+
-+static inline void vzquota_decr_inodes(struct dq_stat *dqstat,
-+ unsigned long number)
-+{
-+ if (dqstat->icurrent > number)
-+ dqstat->icurrent -= number;
-+ else
-+ dqstat->icurrent = 0;
-+ if (dqstat->icurrent < dqstat->isoftlimit)
-+ dqstat->itime = (time_t) 0;
-+}
-+
-+static inline void vzquota_decr_space(struct dq_stat *dqstat,
-+ __u64 number)
-+{
-+ if (dqstat->bcurrent > number)
-+ dqstat->bcurrent -= number;
-+ else
-+ dqstat->bcurrent = 0;
-+ if (dqstat->bcurrent < dqstat->bsoftlimit)
-+ dqstat->btime = (time_t) 0;
-+}
-+
-+/*
-+ * better printk() message or use /proc/vzquotamsg interface
-+ * similar to /proc/kmsg
-+ */
-+static inline void vzquota_warn(struct dq_info *dq_info, int dq_id, int flag,
-+ const char *fmt)
-+{
-+ if (dq_info->flags & flag) /* warning already printed for this
-+ masterblock */
-+ return;
-+ printk(fmt, dq_id);
-+ dq_info->flags |= flag;
-+}
-+
-+/*
-+ * ignore_hardlimit -
-+ *
-+ * Intended to allow superuser of VE0 to overwrite hardlimits.
-+ *
-+ * ignore_hardlimit() has a very bad feature:
-+ *
-+ * writepage() operation for writable mapping of a file with holes
-+ * may trigger get_block() with wrong current and as a consequence,
-+ * opens a possibility to overcommit hardlimits
-+ */
-+/* for the reason above, it is disabled now */
-+static inline int ignore_hardlimit(struct dq_info *dqstat)
-+{
-+#if 0
-+ return ve_is_super(get_exec_env()) &&
-+ capable(CAP_SYS_RESOURCE) &&
-+ (dqstat->options & VZ_QUOTA_OPT_RSQUASH);
-+#else
-+ return 0;
-+#endif
-+}
-+
-+static int vzquota_check_inodes(struct dq_info *dq_info,
-+ struct dq_stat *dqstat,
-+ unsigned long number, int dq_id)
-+{
-+ if (number == 0)
-+ return QUOTA_OK;
-+
-+ if (dqstat->icurrent + number > dqstat->ihardlimit &&
-+ !ignore_hardlimit(dq_info)) {
-+ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
-+ "VZ QUOTA: file hardlimit reached for id=%d\n");
-+ return NO_QUOTA;
-+ }
-+
-+ if (dqstat->icurrent + number > dqstat->isoftlimit) {
-+ if (dqstat->itime == (time_t)0) {
-+ vzquota_warn(dq_info, dq_id, 0,
-+ "VZ QUOTA: file softlimit exceeded "
-+ "for id=%d\n");
-+ dqstat->itime = CURRENT_TIME_SECONDS +
-+ dq_info->iexpire;
-+ } else if (CURRENT_TIME_SECONDS >= dqstat->itime &&
-+ !ignore_hardlimit(dq_info)) {
-+ vzquota_warn(dq_info, dq_id, VZ_QUOTA_INODES,
-+ "VZ QUOTA: file softlimit expired "
-+ "for id=%d\n");
-+ return NO_QUOTA;
-+ }
-+ }
-+
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_check_space(struct dq_info *dq_info,
-+ struct dq_stat *dqstat,
-+ __u64 number, int dq_id, char prealloc)
-+{
-+ if (number == 0)
-+ return QUOTA_OK;
-+
-+ if (dqstat->bcurrent + number > dqstat->bhardlimit &&
-+ !ignore_hardlimit(dq_info)) {
-+ if (!prealloc)
-+ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
-+ "VZ QUOTA: disk hardlimit reached "
-+ "for id=%d\n");
-+ return NO_QUOTA;
-+ }
-+
-+ if (dqstat->bcurrent + number > dqstat->bsoftlimit) {
-+ if (dqstat->btime == (time_t)0) {
-+ if (!prealloc) {
-+ vzquota_warn(dq_info, dq_id, 0,
-+ "VZ QUOTA: disk softlimit exceeded "
-+ "for id=%d\n");
-+ dqstat->btime = CURRENT_TIME_SECONDS
-+ + dq_info->bexpire;
-+ } else {
-+ /*
-+ * Original Linux quota doesn't allow
-+ * preallocation to exceed softlimit so
-+ * exceeding will be always printed
-+ */
-+ return NO_QUOTA;
-+ }
-+ } else if (CURRENT_TIME_SECONDS >= dqstat->btime &&
-+ !ignore_hardlimit(dq_info)) {
-+ if (!prealloc)
-+ vzquota_warn(dq_info, dq_id, VZ_QUOTA_SPACE,
-+ "VZ QUOTA: disk quota "
-+ "softlimit expired "
-+ "for id=%d\n");
-+ return NO_QUOTA;
-+ }
-+ }
-+
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_check_ugid_inodes(struct vz_quota_master *qmblk,
-+ struct vz_quota_ugid *qugid[],
-+ int type, unsigned long number)
-+{
-+ struct dq_info *dqinfo;
-+ struct dq_stat *dqstat;
-+
-+ if (qugid[type] == NULL)
-+ return QUOTA_OK;
-+ if (qugid[type] == VZ_QUOTA_UGBAD)
-+ return NO_QUOTA;
-+
-+ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
-+ return QUOTA_OK;
-+ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
-+ return QUOTA_OK;
-+ if (number == 0)
-+ return QUOTA_OK;
-+
-+ dqinfo = &qmblk->dq_ugid_info[type];
-+ dqstat = &qugid[type]->qugid_stat;
-+
-+ if (dqstat->ihardlimit != 0 &&
-+ dqstat->icurrent + number > dqstat->ihardlimit)
-+ return NO_QUOTA;
-+
-+ if (dqstat->isoftlimit != 0 &&
-+ dqstat->icurrent + number > dqstat->isoftlimit) {
-+ if (dqstat->itime == (time_t)0)
-+ dqstat->itime = CURRENT_TIME_SECONDS +
-+ dqinfo->iexpire;
-+ else if (CURRENT_TIME_SECONDS >= dqstat->itime)
-+ return NO_QUOTA;
-+ }
-+
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_check_ugid_space(struct vz_quota_master *qmblk,
-+ struct vz_quota_ugid *qugid[],
-+ int type, __u64 number, char prealloc)
-+{
-+ struct dq_info *dqinfo;
-+ struct dq_stat *dqstat;
-+
-+ if (qugid[type] == NULL)
-+ return QUOTA_OK;
-+ if (qugid[type] == VZ_QUOTA_UGBAD)
-+ return NO_QUOTA;
-+
-+ if (type == USRQUOTA && !(qmblk->dq_flags & VZDQ_USRQUOTA))
-+ return QUOTA_OK;
-+ if (type == GRPQUOTA && !(qmblk->dq_flags & VZDQ_GRPQUOTA))
-+ return QUOTA_OK;
-+ if (number == 0)
-+ return QUOTA_OK;
-+
-+ dqinfo = &qmblk->dq_ugid_info[type];
-+ dqstat = &qugid[type]->qugid_stat;
-+
-+ if (dqstat->bhardlimit != 0 &&
-+ dqstat->bcurrent + number > dqstat->bhardlimit)
-+ return NO_QUOTA;
-+
-+ if (dqstat->bsoftlimit != 0 &&
-+ dqstat->bcurrent + number > dqstat->bsoftlimit) {
-+ if (dqstat->btime == (time_t)0) {
-+ if (!prealloc)
-+ dqstat->btime = CURRENT_TIME_SECONDS
-+ + dqinfo->bexpire;
-+ else
-+ /*
-+ * Original Linux quota doesn't allow
-+ * preallocation to exceed softlimit so
-+ * exceeding will be always printed
-+ */
-+ return NO_QUOTA;
-+ } else if (CURRENT_TIME_SECONDS >= dqstat->btime)
-+ return NO_QUOTA;
-+ }
-+
-+ return QUOTA_OK;
-+}
-+
-+/* ----------------------------------------------------------------------
-+ * Quota superblock operations
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * S_NOQUOTA note.
-+ * In the current kernel (2.6.8.1), S_NOQUOTA flag is set only for
-+ * - quota file (absent in our case)
-+ * - after explicit DQUOT_DROP (earlier than clear_inode) in functions like
-+ * filesystem-specific new_inode, before the inode gets outside links.
-+ * For the latter case, the only quota operation where care about S_NOQUOTA
-+ * might be required is vzquota_drop, but there S_NOQUOTA has already been
-+ * checked in DQUOT_DROP().
-+ * So, S_NOQUOTA may be ignored for now in the VZDQ code.
-+ *
-+ * The above note is not entirely correct.
-+ * Both for ext2 and ext3 filesystems, DQUOT_FREE_INODE is called from
-+ * delete_inode if new_inode fails (for example, because of inode quota
-+ * limits), so S_NOQUOTA check is needed in free_inode.
-+ * This seems to be the dark corner of the current quota API.
-+ */
-+
-+/*
-+ * Initialize quota operations for the specified inode.
-+ */
-+static int vzquota_initialize(struct inode *inode, int type)
-+{
-+ vzquota_inode_init_call(inode);
-+ return 0; /* ignored by caller */
-+}
-+
-+/*
-+ * Release quota for the specified inode.
-+ */
-+static int vzquota_drop(struct inode *inode)
-+{
-+ vzquota_inode_drop_call(inode);
-+ return 0; /* ignored by caller */
-+}
-+
-+/*
-+ * Allocate block callback.
-+ *
-+ * If (prealloc) disk quota exceeding warning is not printed.
-+ * See Linux quota to know why.
-+ *
-+ * Return:
-+ * QUOTA_OK == 0 on SUCCESS
-+ * NO_QUOTA == 1 if allocation should fail
-+ */
-+static int vzquota_alloc_space(struct inode *inode,
-+ qsize_t number, int prealloc)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+ int ret = QUOTA_OK;
-+
-+ qmblk = vzquota_inode_data(inode, &data);
-+ if (qmblk == VZ_QUOTA_BAD)
-+ return NO_QUOTA;
-+ if (qmblk != NULL) {
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ int cnt;
-+ struct vz_quota_ugid * qugid[MAXQUOTAS];
-+#endif
-+
-+ /* checking first */
-+ ret = vzquota_check_space(&qmblk->dq_info, &qmblk->dq_stat,
-+ number, qmblk->dq_id, prealloc);
-+ if (ret == NO_QUOTA)
-+ goto no_quota;
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
-+ ret = vzquota_check_ugid_space(qmblk, qugid,
-+ cnt, number, prealloc);
-+ if (ret == NO_QUOTA)
-+ goto no_quota;
-+ }
-+ /* check ok, may increment */
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ if (qugid[cnt] == NULL)
-+ continue;
-+ vzquota_incr_space(&qugid[cnt]->qugid_stat, number);
-+ }
-+#endif
-+ vzquota_incr_space(&qmblk->dq_stat, number);
-+ vzquota_data_unlock(inode, &data);
-+ }
-+
-+ inode_add_bytes(inode, number);
-+ might_sleep();
-+ return QUOTA_OK;
-+
-+no_quota:
-+ vzquota_data_unlock(inode, &data);
-+ return NO_QUOTA;
-+}
-+
-+/*
-+ * Allocate inodes callback.
-+ *
-+ * Return:
-+ * QUOTA_OK == 0 on SUCCESS
-+ * NO_QUOTA == 1 if allocation should fail
-+ */
-+static int vzquota_alloc_inode(const struct inode *inode, unsigned long number)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+ int ret = QUOTA_OK;
-+
-+ qmblk = vzquota_inode_data((struct inode *)inode, &data);
-+ if (qmblk == VZ_QUOTA_BAD)
-+ return NO_QUOTA;
-+ if (qmblk != NULL) {
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ int cnt;
-+ struct vz_quota_ugid *qugid[MAXQUOTAS];
-+#endif
-+
-+ /* checking first */
-+ ret = vzquota_check_inodes(&qmblk->dq_info, &qmblk->dq_stat,
-+ number, qmblk->dq_id);
-+ if (ret == NO_QUOTA)
-+ goto no_quota;
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ qugid[cnt] = INODE_QLNK(inode)->qugid[cnt];
-+ ret = vzquota_check_ugid_inodes(qmblk, qugid,
-+ cnt, number);
-+ if (ret == NO_QUOTA)
-+ goto no_quota;
-+ }
-+ /* check ok, may increment */
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ if (qugid[cnt] == NULL)
-+ continue;
-+ vzquota_incr_inodes(&qugid[cnt]->qugid_stat, number);
-+ }
-+#endif
-+ vzquota_incr_inodes(&qmblk->dq_stat, number);
-+ vzquota_data_unlock((struct inode *)inode, &data);
-+ }
-+
-+ might_sleep();
-+ return QUOTA_OK;
-+
-+no_quota:
-+ vzquota_data_unlock((struct inode *)inode, &data);
-+ return NO_QUOTA;
-+}
-+
-+/*
-+ * Free space callback.
-+ */
-+static int vzquota_free_space(struct inode *inode, qsize_t number)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+
-+ qmblk = vzquota_inode_data(inode, &data);
-+ if (qmblk == VZ_QUOTA_BAD)
-+ return NO_QUOTA; /* isn't checked by the caller */
-+ if (qmblk != NULL) {
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ int cnt;
-+ struct vz_quota_ugid * qugid;
-+#endif
-+
-+ vzquota_decr_space(&qmblk->dq_stat, number);
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ qugid = INODE_QLNK(inode)->qugid[cnt];
-+ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
-+ continue;
-+ vzquota_decr_space(&qugid->qugid_stat, number);
-+ }
-+#endif
-+ vzquota_data_unlock(inode, &data);
-+ }
-+ inode_sub_bytes(inode, number);
-+ might_sleep();
-+ return QUOTA_OK;
-+}
-+
-+/*
-+ * Free inodes callback.
-+ */
-+static int vzquota_free_inode(const struct inode *inode, unsigned long number)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+
-+ if (IS_NOQUOTA(inode))
-+ return QUOTA_OK;
-+
-+ qmblk = vzquota_inode_data((struct inode *)inode, &data);
-+ if (qmblk == VZ_QUOTA_BAD)
-+ return NO_QUOTA;
-+ if (qmblk != NULL) {
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ int cnt;
-+ struct vz_quota_ugid * qugid;
-+#endif
-+
-+ vzquota_decr_inodes(&qmblk->dq_stat, number);
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-+ qugid = INODE_QLNK(inode)->qugid[cnt];
-+ if (qugid == NULL || qugid == VZ_QUOTA_UGBAD)
-+ continue;
-+ vzquota_decr_inodes(&qugid->qugid_stat, number);
-+ }
-+#endif
-+ vzquota_data_unlock((struct inode *)inode, &data);
-+ }
-+ might_sleep();
-+ return QUOTA_OK;
-+}
-+
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+
-+/*
-+ * helper function for quota_transfer
-+ * check that we can add inode to this quota_id
-+ */
-+static int vzquota_transfer_check(struct vz_quota_master *qmblk,
-+ struct vz_quota_ugid *qugid[],
-+ unsigned int type, __u64 size)
-+{
-+ if (vzquota_check_ugid_space(qmblk, qugid, type, size, 0) != QUOTA_OK ||
-+ vzquota_check_ugid_inodes(qmblk, qugid, type, 1) != QUOTA_OK)
-+ return -1;
-+ return 0;
-+}
-+
-+int vzquota_transfer_usage(struct inode *inode,
-+ int mask,
-+ struct vz_quota_ilink *qlnk)
-+{
-+ struct vz_quota_ugid *qugid_old;
-+ __u64 space;
-+ int i;
-+
-+ space = inode_get_bytes(inode);
-+ for (i = 0; i < MAXQUOTAS; i++) {
-+ if (!(mask & (1 << i)))
-+ continue;
-+ if (vzquota_transfer_check(qlnk->qmblk, qlnk->qugid, i, space))
-+ return -1;
-+ }
-+
-+ for (i = 0; i < MAXQUOTAS; i++) {
-+ if (!(mask & (1 << i)))
-+ continue;
-+ qugid_old = INODE_QLNK(inode)->qugid[i];
-+ vzquota_decr_space(&qugid_old->qugid_stat, space);
-+ vzquota_decr_inodes(&qugid_old->qugid_stat, 1);
-+ vzquota_incr_space(&qlnk->qugid[i]->qugid_stat, space);
-+ vzquota_incr_inodes(&qlnk->qugid[i]->qugid_stat, 1);
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Transfer the inode between diffent user/group quotas.
-+ */
-+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
-+{
-+ return vzquota_inode_transfer_call(inode, iattr) ?
-+ NO_QUOTA : QUOTA_OK;
-+}
-+
-+#else /* CONFIG_VZ_QUOTA_UGID */
-+
-+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
-+{
-+ return QUOTA_OK;
-+}
-+
-+#endif
-+
-+/*
-+ * Called under following semaphores:
-+ * old_d->d_inode->i_sb->s_vfs_rename_sem
-+ * old_d->d_inode->i_sem
-+ * new_d->d_inode->i_sem
-+ * [not verified --SAW]
-+ */
-+static int vzquota_rename(struct inode *inode,
-+ struct inode *old_dir, struct inode *new_dir)
-+{
-+ return vzquota_rename_check(inode, old_dir, new_dir) ?
-+ NO_QUOTA : QUOTA_OK;
-+}
-+
-+/*
-+ * Structure of superblock diskquota operations.
-+ */
-+struct dquot_operations vz_quota_operations = {
-+ initialize: vzquota_initialize,
-+ drop: vzquota_drop,
-+ alloc_space: vzquota_alloc_space,
-+ alloc_inode: vzquota_alloc_inode,
-+ free_space: vzquota_free_space,
-+ free_inode: vzquota_free_inode,
-+ transfer: vzquota_transfer,
-+ rename: vzquota_rename
-+};
-diff -upr linux-2.6.16.orig/fs/vzdq_tree.c linux-2.6.16-026test009/fs/vzdq_tree.c
---- linux-2.6.16.orig/fs/vzdq_tree.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdq_tree.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,286 @@
-+/*
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains Virtuozzo quota tree implementation
-+ */
-+
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/vzdq_tree.h>
-+
-+struct quotatree_tree *quotatree_alloc(void)
-+{
-+ int l;
-+ struct quotatree_tree *tree;
-+
-+ tree = kmalloc(sizeof(struct quotatree_tree), GFP_KERNEL);
-+ if (tree == NULL)
-+ goto out;
-+
-+ for (l = 0; l < QUOTATREE_DEPTH; l++) {
-+ INIT_LIST_HEAD(&tree->levels[l].usedlh);
-+ INIT_LIST_HEAD(&tree->levels[l].freelh);
-+ tree->levels[l].freenum = 0;
-+ }
-+ tree->root = NULL;
-+ tree->leaf_num = 0;
-+out:
-+ return tree;
-+}
-+
-+static struct quotatree_node *
-+quotatree_follow(struct quotatree_tree *tree, quotaid_t id, int level,
-+ struct quotatree_find_state *st)
-+{
-+ void **block;
-+ struct quotatree_node *parent;
-+ int l, index;
-+
-+ parent = NULL;
-+ block = (void **)&tree->root;
-+ l = 0;
-+ while (l < level && *block != NULL) {
-+ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
-+ parent = *block;
-+ block = parent->blocks + index;
-+ l++;
-+ }
-+ if (st != NULL) {
-+ st->block = block;
-+ st->level = l;
-+ }
-+
-+ return parent;
-+}
-+
-+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
-+ struct quotatree_find_state *st)
-+{
-+ quotatree_follow(tree, id, QUOTATREE_DEPTH, st);
-+ if (st->level == QUOTATREE_DEPTH)
-+ return *st->block;
-+ else
-+ return NULL;
-+}
-+
-+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index)
-+{
-+ int i, count;
-+ struct quotatree_node *p;
-+ void *leaf;
-+
-+ if (QTREE_LEAFNUM(tree) <= index)
-+ return NULL;
-+
-+ count = 0;
-+ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
-+ for (i = 0; i < QUOTATREE_BSIZE; i++) {
-+ leaf = p->blocks[i];
-+ if (leaf == NULL)
-+ continue;
-+ if (count == index)
-+ return leaf;
-+ count++;
-+ }
-+ }
-+ return NULL;
-+}
-+
-+/* returns data leaf (vz_quota_ugid) after _existent_ ugid (@id)
-+ * in the tree... */
-+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id)
-+{
-+ int off;
-+ struct quotatree_node *parent, *p;
-+ struct list_head *lh;
-+
-+ /* get parent refering correct quota tree node of the last level */
-+ parent = quotatree_follow(tree, id, QUOTATREE_DEPTH, NULL);
-+ if (!parent)
-+ return NULL;
-+
-+ off = (id & QUOTATREE_BMASK) + 1; /* next ugid */
-+ lh = &parent->list;
-+ do {
-+ p = list_entry(lh, struct quotatree_node, list);
-+ for ( ; off < QUOTATREE_BSIZE; off++)
-+ if (p->blocks[off])
-+ return p->blocks[off];
-+ off = 0;
-+ lh = lh->next;
-+ } while (lh != &QTREE_LEAFLVL(tree)->usedlh);
-+
-+ return NULL;
-+}
-+
-+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
-+ struct quotatree_find_state *st, void *data)
-+{
-+ struct quotatree_node *p;
-+ int l, index;
-+
-+ while (st->level < QUOTATREE_DEPTH) {
-+ l = st->level;
-+ if (!list_empty(&tree->levels[l].freelh)) {
-+ p = list_entry(tree->levels[l].freelh.next,
-+ struct quotatree_node, list);
-+ list_del(&p->list);
-+ } else {
-+ p = kmalloc(sizeof(struct quotatree_node), GFP_KERNEL);
-+ if (p == NULL)
-+ return -ENOMEM;
-+ /* save block number in the l-level
-+ * it uses for quota file generation */
-+ p->num = tree->levels[l].freenum++;
-+ }
-+ list_add(&p->list, &tree->levels[l].usedlh);
-+ memset(p->blocks, 0, sizeof(p->blocks));
-+ *st->block = p;
-+
-+ index = (id >> QUOTATREE_BSHIFT(l)) & QUOTATREE_BMASK;
-+ st->block = p->blocks + index;
-+ st->level++;
-+ }
-+ tree->leaf_num++;
-+ *st->block = data;
-+
-+ return 0;
-+}
-+
-+static struct quotatree_node *
-+quotatree_remove_ptr(struct quotatree_tree *tree, quotaid_t id,
-+ int level)
-+{
-+ struct quotatree_node *parent;
-+ struct quotatree_find_state st;
-+
-+ parent = quotatree_follow(tree, id, level, &st);
-+ if (st.level == QUOTATREE_DEPTH)
-+ tree->leaf_num--;
-+ *st.block = NULL;
-+ return parent;
-+}
-+
-+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id)
-+{
-+ struct quotatree_node *p;
-+ int level, i;
-+
-+ p = quotatree_remove_ptr(tree, id, QUOTATREE_DEPTH);
-+ for (level = QUOTATREE_DEPTH - 1; level >= QUOTATREE_CDEPTH; level--) {
-+ for (i = 0; i < QUOTATREE_BSIZE; i++)
-+ if (p->blocks[i] != NULL)
-+ return;
-+ list_move(&p->list, &tree->levels[level].freelh);
-+ p = quotatree_remove_ptr(tree, id, level);
-+ }
-+}
-+
-+#if 0
-+static void quotatree_walk(struct quotatree_tree *tree,
-+ struct quotatree_node *node_start,
-+ quotaid_t id_start,
-+ int level_start, int level_end,
-+ int (*callback)(struct quotatree_tree *,
-+ quotaid_t id,
-+ int level,
-+ void *ptr,
-+ void *data),
-+ void *data)
-+{
-+ struct quotatree_node *p;
-+ int l, shift, index;
-+ quotaid_t id;
-+ struct quotatree_find_state st;
-+
-+ p = node_start;
-+ l = level_start;
-+ shift = (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
-+ id = id_start;
-+ index = 0;
-+
-+ /*
-+ * Invariants:
-+ * shift == (QUOTATREE_DEPTH - l) * QUOTAID_BBITS;
-+ * id & ((1 << shift) - 1) == 0
-+ * p is l-level node corresponding to id
-+ */
-+ do {
-+ if (!p)
-+ break;
-+
-+ if (l < level_end) {
-+ for (; index < QUOTATREE_BSIZE; index++)
-+ if (p->blocks[index] != NULL)
-+ break;
-+ if (index < QUOTATREE_BSIZE) {
-+ /* descend */
-+ p = p->blocks[index];
-+ l++;
-+ shift -= QUOTAID_BBITS;
-+ id += (quotaid_t)index << shift;
-+ index = 0;
-+ continue;
-+ }
-+ }
-+
-+ if ((*callback)(tree, id, l, p, data))
-+ break;
-+
-+ /* ascend and to the next node */
-+ p = quotatree_follow(tree, id, l, &st);
-+
-+ index = ((id >> shift) & QUOTATREE_BMASK) + 1;
-+ l--;
-+ shift += QUOTAID_BBITS;
-+ id &= ~(((quotaid_t)1 << shift) - 1);
-+ } while (l >= level_start);
-+}
-+#endif
-+
-+static void free_list(struct list_head *node_list)
-+{
-+ struct quotatree_node *p, *tmp;
-+
-+ list_for_each_entry_safe(p, tmp, node_list, list) {
-+ list_del(&p->list);
-+ kfree(p);
-+ }
-+}
-+
-+static inline void quotatree_free_nodes(struct quotatree_tree *tree)
-+{
-+ int i;
-+
-+ for (i = 0; i < QUOTATREE_DEPTH; i++) {
-+ free_list(&tree->levels[i].usedlh);
-+ free_list(&tree->levels[i].freelh);
-+ }
-+}
-+
-+static void quotatree_free_leafs(struct quotatree_tree *tree,
-+ void (*dtor)(void *))
-+{
-+ int i;
-+ struct quotatree_node *p;
-+
-+ list_for_each_entry(p, &QTREE_LEAFLVL(tree)->usedlh, list) {
-+ for (i = 0; i < QUOTATREE_BSIZE; i++) {
-+ if (p->blocks[i] == NULL)
-+ continue;
-+
-+ dtor(p->blocks[i]);
-+ }
-+ }
-+}
-+
-+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *))
-+{
-+ quotatree_free_leafs(tree, dtor);
-+ quotatree_free_nodes(tree);
-+ kfree(tree);
-+}
-diff -upr linux-2.6.16.orig/fs/vzdq_ugid.c linux-2.6.16-026test009/fs/vzdq_ugid.c
---- linux-2.6.16.orig/fs/vzdq_ugid.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdq_ugid.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,1116 @@
-+/*
-+ * Copyright (C) 2002 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains Virtuozzo UID/GID disk quota implementation
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/string.h>
-+#include <linux/slab.h>
-+#include <linux/list.h>
-+#include <linux/smp_lock.h>
-+#include <linux/rcupdate.h>
-+#include <asm/uaccess.h>
-+#include <linux/proc_fs.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/quota.h>
-+#include <linux/quotaio_v2.h>
-+#include <linux/virtinfo.h>
-+
-+#include <linux/vzctl.h>
-+#include <linux/vzctl_quota.h>
-+#include <linux/vzquota.h>
-+
-+/*
-+ * XXX
-+ * may be something is needed for sb->s_dquot->info[]?
-+ */
-+
-+#define USRQUOTA_MASK (1 << USRQUOTA)
-+#define GRPQUOTA_MASK (1 << GRPQUOTA)
-+#define QTYPE2MASK(type) (1 << (type))
-+
-+static kmem_cache_t *vz_quota_ugid_cachep;
-+
-+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
-+ * list on the hash table */
-+extern struct semaphore vz_quota_sem;
-+
-+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
-+{
-+ if (qugid != VZ_QUOTA_UGBAD)
-+ atomic_inc(&qugid->qugid_count);
-+ return qugid;
-+}
-+
-+/* we don't limit users with zero limits */
-+static inline int vzquota_fake_stat(struct dq_stat *stat)
-+{
-+ return stat->bhardlimit == 0 && stat->bsoftlimit == 0 &&
-+ stat->ihardlimit == 0 && stat->isoftlimit == 0;
-+}
-+
-+/* callback function for quotatree_free() */
-+static inline void vzquota_free_qugid(void *ptr)
-+{
-+ kmem_cache_free(vz_quota_ugid_cachep, ptr);
-+}
-+
-+/*
-+ * destroy ugid, if it have zero refcount, limits and usage
-+ * must be called under qmblk->dq_sem
-+ */
-+void vzquota_put_ugid(struct vz_quota_master *qmblk,
-+ struct vz_quota_ugid *qugid)
-+{
-+ if (qugid == VZ_QUOTA_UGBAD)
-+ return;
-+ qmblk_data_read_lock(qmblk);
-+ if (atomic_dec_and_test(&qugid->qugid_count) &&
-+ (qmblk->dq_flags & VZDQUG_FIXED_SET) == 0 &&
-+ vzquota_fake_stat(&qugid->qugid_stat) &&
-+ qugid->qugid_stat.bcurrent == 0 &&
-+ qugid->qugid_stat.icurrent == 0) {
-+ quotatree_remove(QUGID_TREE(qmblk, qugid->qugid_type),
-+ qugid->qugid_id);
-+ qmblk->dq_ugid_count--;
-+ vzquota_free_qugid(qugid);
-+ }
-+ qmblk_data_read_unlock(qmblk);
-+}
-+
-+/*
-+ * Get ugid block by its index, like it would present in array.
-+ * In reality, this is not array - this is leafs chain of the tree.
-+ * NULL if index is out of range.
-+ * qmblk semaphore is required to protect the tree.
-+ */
-+static inline struct vz_quota_ugid *
-+vzquota_get_byindex(struct vz_quota_master *qmblk, unsigned int index, int type)
-+{
-+ return quotatree_leaf_byindex(QUGID_TREE(qmblk, type), index);
-+}
-+
-+/*
-+ * get next element from ugid "virtual array"
-+ * ugid must be in current array and this array may not be changed between
-+ * two accesses (quaranteed by "stopped" quota state and quota semaphore)
-+ * qmblk semaphore is required to protect the tree
-+ */
-+static inline struct vz_quota_ugid *
-+vzquota_get_next(struct vz_quota_master *qmblk, struct vz_quota_ugid *qugid)
-+{
-+ return quotatree_get_next(QUGID_TREE(qmblk, qugid->qugid_type),
-+ qugid->qugid_id);
-+}
-+
-+/*
-+ * requires dq_sem
-+ */
-+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
-+ unsigned int quota_id, int type, int flags)
-+{
-+ struct vz_quota_ugid *qugid;
-+ struct quotatree_tree *tree;
-+ struct quotatree_find_state st;
-+
-+ tree = QUGID_TREE(qmblk, type);
-+ qugid = quotatree_find(tree, quota_id, &st);
-+ if (qugid)
-+ goto success;
-+
-+ /* caller does not want alloc */
-+ if (flags & VZDQUG_FIND_DONT_ALLOC)
-+ goto fail;
-+
-+ if (flags & VZDQUG_FIND_FAKE)
-+ goto doit;
-+
-+ /* check limit */
-+ if (qmblk->dq_ugid_count >= qmblk->dq_ugid_max)
-+ goto fail;
-+
-+ /* see comment at VZDQUG_FIXED_SET define */
-+ if (qmblk->dq_flags & VZDQUG_FIXED_SET)
-+ goto fail;
-+
-+doit:
-+ /* alloc new structure */
-+ qugid = kmem_cache_alloc(vz_quota_ugid_cachep,
-+ SLAB_NOFS | __GFP_NOFAIL);
-+ if (qugid == NULL)
-+ goto fail;
-+
-+ /* initialize new structure */
-+ qugid->qugid_id = quota_id;
-+ memset(&qugid->qugid_stat, 0, sizeof(qugid->qugid_stat));
-+ qugid->qugid_type = type;
-+ atomic_set(&qugid->qugid_count, 0);
-+
-+ /* insert in tree */
-+ if (quotatree_insert(tree, quota_id, &st, qugid) < 0)
-+ goto fail_insert;
-+ qmblk->dq_ugid_count++;
-+
-+success:
-+ vzquota_get_ugid(qugid);
-+ return qugid;
-+
-+fail_insert:
-+ vzquota_free_qugid(qugid);
-+fail:
-+ return VZ_QUOTA_UGBAD;
-+}
-+
-+/*
-+ * takes dq_sem, may schedule
-+ */
-+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
-+ unsigned int quota_id, int type, int flags)
-+{
-+ struct vz_quota_ugid *qugid;
-+
-+ down(&qmblk->dq_sem);
-+ qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
-+ up(&qmblk->dq_sem);
-+
-+ return qugid;
-+}
-+
-+/*
-+ * destroy all ugid records on given quota master
-+ */
-+void vzquota_kill_ugid(struct vz_quota_master *qmblk)
-+{
-+ BUG_ON((qmblk->dq_gid_tree == NULL && qmblk->dq_uid_tree != NULL) ||
-+ (qmblk->dq_uid_tree == NULL && qmblk->dq_gid_tree != NULL));
-+
-+ if (qmblk->dq_uid_tree != NULL) {
-+ quotatree_free(qmblk->dq_uid_tree, vzquota_free_qugid);
-+ quotatree_free(qmblk->dq_gid_tree, vzquota_free_qugid);
-+ }
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Management interface to ugid quota for (super)users.
-+ * --------------------------------------------------------------------- */
-+
-+/**
-+ * vzquota_find_qmblk - helper to emulate quota on virtual filesystems
-+ *
-+ * This function finds a quota master block corresponding to the root of
-+ * a virtual filesystem.
-+ * Returns a quota master block with reference taken, or %NULL if not under
-+ * quota, or %VZ_QUOTA_BAD if quota inconsistency is found (and all allocation
-+ * operations will fail).
-+ *
-+ * Note: this function uses vzquota_inode_qmblk().
-+ * The latter is a rather confusing function: it returns qmblk that used to be
-+ * on the inode some time ago (without guarantee that it still has any
-+ * relations to the inode). So, vzquota_find_qmblk() leaves it up to the
-+ * caller to think whether the inode could have changed its qmblk and what to
-+ * do in that case.
-+ * Currently, the callers appear to not care :(
-+ */
-+struct vz_quota_master *vzquota_find_qmblk(struct super_block *sb)
-+{
-+ struct inode *qrinode;
-+ struct vz_quota_master *qmblk;
-+
-+ qmblk = NULL;
-+ qrinode = NULL;
-+ if (sb->s_op->get_quota_root != NULL)
-+ qrinode = sb->s_op->get_quota_root(sb);
-+ if (qrinode != NULL)
-+ qmblk = vzquota_inode_qmblk(qrinode);
-+ return qmblk;
-+}
-+
-+static int vzquota_initialize2(struct inode *inode, int type)
-+{
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_drop2(struct inode *inode)
-+{
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_alloc_space2(struct inode *inode,
-+ qsize_t number, int prealloc)
-+{
-+ inode_add_bytes(inode, number);
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_alloc_inode2(const struct inode *inode, unsigned long number)
-+{
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_free_space2(struct inode *inode, qsize_t number)
-+{
-+ inode_sub_bytes(inode, number);
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_free_inode2(const struct inode *inode, unsigned long number)
-+{
-+ return QUOTA_OK;
-+}
-+
-+static int vzquota_transfer2(struct inode *inode, struct iattr *iattr)
-+{
-+ return QUOTA_OK;
-+}
-+
-+struct dquot_operations vz_quota_operations2 = {
-+ initialize: vzquota_initialize2,
-+ drop: vzquota_drop2,
-+ alloc_space: vzquota_alloc_space2,
-+ alloc_inode: vzquota_alloc_inode2,
-+ free_space: vzquota_free_space2,
-+ free_inode: vzquota_free_inode2,
-+ transfer: vzquota_transfer2
-+};
-+
-+static int vz_quota_on(struct super_block *sb, int type,
-+ int format_id, char *path)
-+{
-+ struct vz_quota_master *qmblk;
-+ int mask, mask2;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+
-+ mask = 0;
-+ mask2 = 0;
-+ sb->dq_op = &vz_quota_operations2;
-+ sb->s_qcop = &vz_quotactl_operations;
-+ if (type == USRQUOTA) {
-+ mask = DQUOT_USR_ENABLED;
-+ mask2 = VZDQ_USRQUOTA;
-+ }
-+ if (type == GRPQUOTA) {
-+ mask = DQUOT_GRP_ENABLED;
-+ mask2 = VZDQ_GRPQUOTA;
-+ }
-+ err = -EBUSY;
-+ if (qmblk->dq_flags & mask2)
-+ goto out;
-+
-+ err = 0;
-+ qmblk->dq_flags |= mask2;
-+ sb->s_dquot.flags |= mask;
-+
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+static int vz_quota_off(struct super_block *sb, int type)
-+{
-+ struct vz_quota_master *qmblk;
-+ int mask2;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+
-+ mask2 = 0;
-+ if (type == USRQUOTA)
-+ mask2 = VZDQ_USRQUOTA;
-+ if (type == GRPQUOTA)
-+ mask2 = VZDQ_GRPQUOTA;
-+ err = -EINVAL;
-+ if (!(qmblk->dq_flags & mask2))
-+ goto out;
-+
-+ qmblk->dq_flags &= ~mask2;
-+ err = 0;
-+
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+static int vz_quota_sync(struct super_block *sb, int type)
-+{
-+ return 0; /* vz quota is always uptodate */
-+}
-+
-+static int vz_get_dqblk(struct super_block *sb, int type,
-+ qid_t id, struct if_dqblk *di)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid *ugid;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+
-+ err = 0;
-+ ugid = vzquota_find_ugid(qmblk, id, type, VZDQUG_FIND_DONT_ALLOC);
-+ if (ugid != VZ_QUOTA_UGBAD) {
-+ qmblk_data_read_lock(qmblk);
-+ di->dqb_bhardlimit = ugid->qugid_stat.bhardlimit >> 10;
-+ di->dqb_bsoftlimit = ugid->qugid_stat.bsoftlimit >> 10;
-+ di->dqb_curspace = ugid->qugid_stat.bcurrent;
-+ di->dqb_ihardlimit = ugid->qugid_stat.ihardlimit;
-+ di->dqb_isoftlimit = ugid->qugid_stat.isoftlimit;
-+ di->dqb_curinodes = ugid->qugid_stat.icurrent;
-+ di->dqb_btime = ugid->qugid_stat.btime;
-+ di->dqb_itime = ugid->qugid_stat.itime;
-+ qmblk_data_read_unlock(qmblk);
-+ di->dqb_valid = QIF_ALL;
-+ vzquota_put_ugid(qmblk, ugid);
-+ } else {
-+ memset(di, 0, sizeof(*di));
-+ di->dqb_valid = QIF_ALL;
-+ }
-+
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+/* must be called under vz_quota_sem */
-+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
-+ int type, qid_t id, struct if_dqblk *di)
-+{
-+ struct vz_quota_ugid *ugid;
-+
-+ ugid = vzquota_find_ugid(qmblk, id, type, 0);
-+ if (ugid == VZ_QUOTA_UGBAD)
-+ return -ESRCH;
-+
-+ qmblk_data_write_lock(qmblk);
-+ /*
-+ * Subtle compatibility breakage.
-+ *
-+ * Some old non-vz kernel quota didn't start grace period
-+ * if the new soft limit happens to be below the usage.
-+ * Non-vz kernel quota in 2.4.20 starts the grace period
-+ * (if it hasn't been started).
-+ * Current non-vz kernel performs even more complicated
-+ * manipulations...
-+ *
-+ * Also, current non-vz kernels have inconsistency related to
-+ * the grace time start. In regular operations the grace period
-+ * is started if the usage is greater than the soft limit (and,
-+ * strangely, is cancelled if the usage is less).
-+ * However, set_dqblk starts the grace period if the usage is greater
-+ * or equal to the soft limit.
-+ *
-+ * Here we try to mimic the behavior of the current non-vz kernel.
-+ */
-+ if (di->dqb_valid & QIF_BLIMITS) {
-+ ugid->qugid_stat.bhardlimit =
-+ (__u64)di->dqb_bhardlimit << 10;
-+ ugid->qugid_stat.bsoftlimit =
-+ (__u64)di->dqb_bsoftlimit << 10;
-+ if (di->dqb_bsoftlimit == 0 ||
-+ ugid->qugid_stat.bcurrent < ugid->qugid_stat.bsoftlimit)
-+ ugid->qugid_stat.btime = 0;
-+ else if (!(di->dqb_valid & QIF_BTIME))
-+ ugid->qugid_stat.btime = CURRENT_TIME_SECONDS
-+ + qmblk->dq_ugid_info[type].bexpire;
-+ else
-+ ugid->qugid_stat.btime = di->dqb_btime;
-+ }
-+ if (di->dqb_valid & QIF_ILIMITS) {
-+ ugid->qugid_stat.ihardlimit = di->dqb_ihardlimit;
-+ ugid->qugid_stat.isoftlimit = di->dqb_isoftlimit;
-+ if (di->dqb_isoftlimit == 0 ||
-+ ugid->qugid_stat.icurrent < ugid->qugid_stat.isoftlimit)
-+ ugid->qugid_stat.itime = 0;
-+ else if (!(di->dqb_valid & QIF_ITIME))
-+ ugid->qugid_stat.itime = CURRENT_TIME_SECONDS
-+ + qmblk->dq_ugid_info[type].iexpire;
-+ else
-+ ugid->qugid_stat.itime = di->dqb_itime;
-+ }
-+ qmblk_data_write_unlock(qmblk);
-+ vzquota_put_ugid(qmblk, ugid);
-+
-+ return 0;
-+}
-+
-+static int vz_set_dqblk(struct super_block *sb, int type,
-+ qid_t id, struct if_dqblk *di)
-+{
-+ struct vz_quota_master *qmblk;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+ err = __vz_set_dqblk(qmblk, type, id, di);
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+static int vz_get_dqinfo(struct super_block *sb, int type,
-+ struct if_dqinfo *ii)
-+{
-+ struct vz_quota_master *qmblk;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+
-+ err = 0;
-+ ii->dqi_bgrace = qmblk->dq_ugid_info[type].bexpire;
-+ ii->dqi_igrace = qmblk->dq_ugid_info[type].iexpire;
-+ ii->dqi_flags = 0;
-+ ii->dqi_valid = IIF_ALL;
-+
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+/* must be called under vz_quota_sem */
-+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
-+ int type, struct if_dqinfo *ii)
-+{
-+ if (ii->dqi_valid & IIF_FLAGS)
-+ if (ii->dqi_flags & DQF_MASK)
-+ return -EINVAL;
-+
-+ if (ii->dqi_valid & IIF_BGRACE)
-+ qmblk->dq_ugid_info[type].bexpire = ii->dqi_bgrace;
-+ if (ii->dqi_valid & IIF_IGRACE)
-+ qmblk->dq_ugid_info[type].iexpire = ii->dqi_igrace;
-+ return 0;
-+}
-+
-+static int vz_set_dqinfo(struct super_block *sb, int type,
-+ struct if_dqinfo *ii)
-+{
-+ struct vz_quota_master *qmblk;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+ err = __vz_set_dqinfo(qmblk, type, ii);
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+ return err;
-+}
-+
-+#ifdef CONFIG_QUOTA_COMPAT
-+
-+#define Q_GETQUOTI_SIZE 1024
-+
-+#define UGID2DQBLK(dst, src) \
-+ do { \
-+ (dst).dqb_ihardlimit = (src)->qugid_stat.ihardlimit; \
-+ (dst).dqb_isoftlimit = (src)->qugid_stat.isoftlimit; \
-+ (dst).dqb_curinodes = (src)->qugid_stat.icurrent; \
-+ /* in 1K blocks */ \
-+ (dst).dqb_bhardlimit = (src)->qugid_stat.bhardlimit >> 10; \
-+ /* in 1K blocks */ \
-+ (dst).dqb_bsoftlimit = (src)->qugid_stat.bsoftlimit >> 10; \
-+ /* in bytes, 64 bit */ \
-+ (dst).dqb_curspace = (src)->qugid_stat.bcurrent; \
-+ (dst).dqb_btime = (src)->qugid_stat.btime; \
-+ (dst).dqb_itime = (src)->qugid_stat.itime; \
-+ } while (0)
-+
-+static int vz_get_quoti(struct super_block *sb, int type, qid_t idx,
-+ struct v2_disk_dqblk *dqblk)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct v2_disk_dqblk data;
-+ struct vz_quota_ugid *ugid;
-+ int count;
-+ int err;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
-+ err = -ESRCH;
-+ if (qmblk == NULL)
-+ goto out;
-+ err = -EIO;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out;
-+
-+ down(&qmblk->dq_sem);
-+ for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
-+ ugid != NULL && count < Q_GETQUOTI_SIZE;
-+ count++)
-+ {
-+ qmblk_data_read_lock(qmblk);
-+ UGID2DQBLK(data, ugid);
-+ qmblk_data_read_unlock(qmblk);
-+ data.dqb_id = ugid->qugid_id;
-+ if (copy_to_user(dqblk, &data, sizeof(data)))
-+ goto fault;
-+ dqblk++;
-+
-+ /* Find next entry */
-+ ugid = vzquota_get_next(qmblk, ugid);
-+ BUG_ON(ugid != NULL && ugid->qugid_type != type);
-+ }
-+ err = count;
-+out_ugid:
-+ up(&qmblk->dq_sem);
-+out:
-+ up(&vz_quota_sem);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qmblk);
-+
-+ return err;
-+
-+fault:
-+ err = count ? count : -EFAULT;
-+ goto out_ugid;
-+}
-+
-+#endif
-+
-+struct quotactl_ops vz_quotactl_operations = {
-+ quota_on: vz_quota_on,
-+ quota_off: vz_quota_off,
-+ quota_sync: vz_quota_sync,
-+ get_info: vz_get_dqinfo,
-+ set_info: vz_set_dqinfo,
-+ get_dqblk: vz_get_dqblk,
-+ set_dqblk: vz_set_dqblk,
-+#ifdef CONFIG_QUOTA_COMPAT
-+ get_quoti: vz_get_quoti
-+#endif
-+};
-+
-+
-+/* ----------------------------------------------------------------------
-+ * Management interface for host system admins.
-+ * --------------------------------------------------------------------- */
-+
-+static int quota_ugid_addstat(unsigned int quota_id, unsigned int ugid_size,
-+ struct vz_quota_iface *u_ugid_buf)
-+{
-+ struct vz_quota_master *qmblk;
-+ int ret;
-+
-+ down(&vz_quota_sem);
-+
-+ ret = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ ret = -EBUSY;
-+ if (qmblk->dq_state != VZDQ_STARTING)
-+ goto out; /* working quota doesn't accept new ugids */
-+
-+ ret = 0;
-+ /* start to add ugids */
-+ for (ret = 0; ret < ugid_size; ret++) {
-+ struct vz_quota_iface ugid_buf;
-+ struct vz_quota_ugid *ugid;
-+
-+ if (copy_from_user(&ugid_buf, u_ugid_buf, sizeof(ugid_buf)))
-+ break;
-+
-+ if (ugid_buf.qi_type >= MAXQUOTAS)
-+ break; /* bad quota type - this is the only check */
-+
-+ ugid = vzquota_find_ugid(qmblk,
-+ ugid_buf.qi_id, ugid_buf.qi_type, 0);
-+ if (ugid == VZ_QUOTA_UGBAD) {
-+ qmblk->dq_flags |= VZDQUG_FIXED_SET;
-+ break; /* limit reached */
-+ }
-+
-+ /* update usage/limits
-+ * we can copy the data without the lock, because the data
-+ * cannot be modified in VZDQ_STARTING state */
-+ ugid->qugid_stat = ugid_buf.qi_stat;
-+
-+ vzquota_put_ugid(qmblk, ugid);
-+
-+ u_ugid_buf++; /* next user buffer */
-+ }
-+out:
-+ up(&vz_quota_sem);
-+
-+ return ret;
-+}
-+
-+static int quota_ugid_setgrace(unsigned int quota_id,
-+ struct dq_info u_dq_info[])
-+{
-+ struct vz_quota_master *qmblk;
-+ struct dq_info dq_info[MAXQUOTAS];
-+ struct dq_info *target;
-+ int err, type;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EBUSY;
-+ if (qmblk->dq_state != VZDQ_STARTING)
-+ goto out; /* working quota doesn't accept changing options */
-+
-+ err = -EFAULT;
-+ if (copy_from_user(dq_info, u_dq_info, sizeof(dq_info)))
-+ goto out;
-+
-+ err = 0;
-+
-+ /* update in qmblk */
-+ for (type = 0; type < MAXQUOTAS; type ++) {
-+ target = &qmblk->dq_ugid_info[type];
-+ target->bexpire = dq_info[type].bexpire;
-+ target->iexpire = dq_info[type].iexpire;
-+ }
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+static int do_quota_ugid_getstat(struct vz_quota_master *qmblk, int index, int size,
-+ struct vz_quota_iface *u_ugid_buf)
-+{
-+ int type, count;
-+ struct vz_quota_ugid *ugid;
-+
-+ if (QTREE_LEAFNUM(qmblk->dq_uid_tree) +
-+ QTREE_LEAFNUM(qmblk->dq_gid_tree)
-+ <= index)
-+ return 0;
-+
-+ count = 0;
-+
-+ type = index < QTREE_LEAFNUM(qmblk->dq_uid_tree) ? USRQUOTA : GRPQUOTA;
-+ if (type == GRPQUOTA)
-+ index -= QTREE_LEAFNUM(qmblk->dq_uid_tree);
-+
-+ /* loop through ugid and then qgid quota */
-+repeat:
-+ for (ugid = vzquota_get_byindex(qmblk, index, type);
-+ ugid != NULL && count < size;
-+ ugid = vzquota_get_next(qmblk, ugid), count++)
-+ {
-+ struct vz_quota_iface ugid_buf;
-+
-+ /* form interface buffer and send in to user-level */
-+ qmblk_data_read_lock(qmblk);
-+ memcpy(&ugid_buf.qi_stat, &ugid->qugid_stat,
-+ sizeof(ugid_buf.qi_stat));
-+ qmblk_data_read_unlock(qmblk);
-+ ugid_buf.qi_id = ugid->qugid_id;
-+ ugid_buf.qi_type = ugid->qugid_type;
-+
-+ if (copy_to_user(u_ugid_buf, &ugid_buf, sizeof(ugid_buf)))
-+ goto fault;
-+ u_ugid_buf++; /* next portion of user buffer */
-+ }
-+
-+ if (type == USRQUOTA && count < size) {
-+ type = GRPQUOTA;
-+ index = 0;
-+ goto repeat;
-+ }
-+
-+ return count;
-+
-+fault:
-+ return count ? count : -EFAULT;
-+}
-+
-+static int quota_ugid_getstat(unsigned int quota_id,
-+ int index, int size, struct vz_quota_iface *u_ugid_buf)
-+{
-+ struct vz_quota_master *qmblk;
-+ int err;
-+
-+ if (index < 0 || size < 0)
-+ return -EINVAL;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ down(&qmblk->dq_sem);
-+ err = do_quota_ugid_getstat(qmblk, index, size, u_ugid_buf);
-+ up(&qmblk->dq_sem);
-+
-+out:
-+ up(&vz_quota_sem);
-+ return err;
-+}
-+
-+static int quota_ugid_getgrace(unsigned int quota_id,
-+ struct dq_info u_dq_info[])
-+{
-+ struct vz_quota_master *qmblk;
-+ struct dq_info dq_info[MAXQUOTAS];
-+ struct dq_info *target;
-+ int err, type;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = 0;
-+ /* update from qmblk */
-+ for (type = 0; type < MAXQUOTAS; type ++) {
-+ target = &qmblk->dq_ugid_info[type];
-+ dq_info[type].bexpire = target->bexpire;
-+ dq_info[type].iexpire = target->iexpire;
-+ dq_info[type].flags = target->flags;
-+ }
-+
-+ if (copy_to_user(u_dq_info, dq_info, sizeof(dq_info)))
-+ err = -EFAULT;
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+static int quota_ugid_getconfig(unsigned int quota_id,
-+ struct vz_quota_ugid_stat *info)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid_stat kinfo;
-+ int err;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = 0;
-+ kinfo.limit = qmblk->dq_ugid_max;
-+ kinfo.count = qmblk->dq_ugid_count;
-+ kinfo.flags = qmblk->dq_flags;
-+
-+ if (copy_to_user(info, &kinfo, sizeof(kinfo)))
-+ err = -EFAULT;
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+static int quota_ugid_setconfig(unsigned int quota_id,
-+ struct vz_quota_ugid_stat *info)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid_stat kinfo;
-+ int err;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ENOENT;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EFAULT;
-+ if (copy_from_user(&kinfo, info, sizeof(kinfo)))
-+ goto out;
-+
-+ err = 0;
-+ qmblk->dq_ugid_max = kinfo.limit;
-+ if (qmblk->dq_state == VZDQ_STARTING) {
-+ qmblk->dq_flags = kinfo.flags;
-+ if (qmblk->dq_flags & VZDQUG_ON)
-+ qmblk->dq_flags |= VZDQ_USRQUOTA | VZDQ_GRPQUOTA;
-+ }
-+
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+static int quota_ugid_setlimit(unsigned int quota_id,
-+ struct vz_quota_ugid_setlimit *u_lim)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid_setlimit lim;
-+ int err;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ESRCH;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EFAULT;
-+ if (copy_from_user(&lim, u_lim, sizeof(lim)))
-+ goto out;
-+
-+ err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
-+
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+static int quota_ugid_setinfo(unsigned int quota_id,
-+ struct vz_quota_ugid_setinfo *u_info)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid_setinfo info;
-+ int err;
-+
-+ down(&vz_quota_sem);
-+
-+ err = -ESRCH;
-+ qmblk = vzquota_find_master(quota_id);
-+ if (qmblk == NULL)
-+ goto out;
-+
-+ err = -EFAULT;
-+ if (copy_from_user(&info, u_info, sizeof(info)))
-+ goto out;
-+
-+ err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
-+
-+out:
-+ up(&vz_quota_sem);
-+
-+ return err;
-+}
-+
-+/*
-+ * This is a system call to maintain UGID quotas
-+ * Note this call is allowed to run ONLY from VE0
-+ */
-+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub)
-+{
-+ int ret;
-+
-+ ret = -EPERM;
-+ /* access allowed only from root of VE0 */
-+ if (!capable(CAP_SYS_RESOURCE) ||
-+ !capable(CAP_SYS_ADMIN))
-+ goto out;
-+
-+ switch (qub->cmd) {
-+ case VZ_DQ_UGID_GETSTAT:
-+ ret = quota_ugid_getstat(qub->quota_id,
-+ qub->ugid_index, qub->ugid_size,
-+ (struct vz_quota_iface *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_ADDSTAT:
-+ ret = quota_ugid_addstat(qub->quota_id, qub->ugid_size,
-+ (struct vz_quota_iface *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_GETGRACE:
-+ ret = quota_ugid_getgrace(qub->quota_id,
-+ (struct dq_info *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_SETGRACE:
-+ ret = quota_ugid_setgrace(qub->quota_id,
-+ (struct dq_info *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_GETCONFIG:
-+ ret = quota_ugid_getconfig(qub->quota_id,
-+ (struct vz_quota_ugid_stat *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_SETCONFIG:
-+ ret = quota_ugid_setconfig(qub->quota_id,
-+ (struct vz_quota_ugid_stat *)qub->addr);
-+ break;
-+ case VZ_DQ_UGID_SETLIMIT:
-+ ret = quota_ugid_setlimit(qub->quota_id,
-+ (struct vz_quota_ugid_setlimit *)
-+ qub->addr);
-+ break;
-+ case VZ_DQ_UGID_SETINFO:
-+ ret = quota_ugid_setinfo(qub->quota_id,
-+ (struct vz_quota_ugid_setinfo *)
-+ qub->addr);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+out:
-+ return ret;
-+}
-+
-+static void ugid_quota_on_sb(struct super_block *sb)
-+{
-+ struct super_block *real_sb;
-+ struct vz_quota_master *qmblk;
-+
-+ if (!sb->s_op->get_quota_root)
-+ return;
-+
-+ real_sb = sb->s_op->get_quota_root(sb)->i_sb;
-+ if (real_sb->dq_op != &vz_quota_operations)
-+ return;
-+
-+ sb->dq_op = &vz_quota_operations2;
-+ sb->s_qcop = &vz_quotactl_operations;
-+ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
-+ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
-+ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
-+ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
-+
-+ qmblk = vzquota_find_qmblk(sb);
-+ if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
-+ return;
-+ down(&vz_quota_sem);
-+ if (qmblk->dq_flags & VZDQ_USRQUOTA)
-+ sb->s_dquot.flags |= DQUOT_USR_ENABLED;
-+ if (qmblk->dq_flags & VZDQ_GRPQUOTA)
-+ sb->s_dquot.flags |= DQUOT_GRP_ENABLED;
-+ up(&vz_quota_sem);
-+ qmblk_put(qmblk);
-+}
-+
-+static void ugid_quota_off_sb(struct super_block *sb)
-+{
-+ /* can't make quota off on mounted super block */
-+ BUG_ON(sb->s_root != NULL);
-+}
-+
-+static int ugid_notifier_call(struct vnotifier_block *self,
-+ unsigned long n, void *data, int old_ret)
-+{
-+ struct virt_info_quota *viq;
-+
-+ viq = (struct virt_info_quota *)data;
-+
-+ switch (n) {
-+ case VIRTINFO_QUOTA_ON:
-+ ugid_quota_on_sb(viq->super);
-+ break;
-+ case VIRTINFO_QUOTA_OFF:
-+ ugid_quota_off_sb(viq->super);
-+ break;
-+ case VIRTINFO_QUOTA_GETSTAT:
-+ break;
-+ default:
-+ return old_ret;
-+ }
-+ return NOTIFY_OK;
-+}
-+
-+static struct vnotifier_block ugid_notifier_block = {
-+ .notifier_call = ugid_notifier_call,
-+};
-+
-+/* ----------------------------------------------------------------------
-+ * Init/exit.
-+ * --------------------------------------------------------------------- */
-+
-+struct quota_format_type vz_quota_empty_v2_format = {
-+ qf_fmt_id: QFMT_VFS_V0,
-+ qf_ops: NULL,
-+ qf_owner: THIS_MODULE
-+};
-+
-+int vzquota_ugid_init()
-+{
-+ int err;
-+
-+ vz_quota_ugid_cachep = kmem_cache_create("vz_quota_ugid",
-+ sizeof(struct vz_quota_ugid),
-+ 0, SLAB_HWCACHE_ALIGN,
-+ NULL, NULL);
-+ if (vz_quota_ugid_cachep == NULL)
-+ goto err_slab;
-+
-+ err = register_quota_format(&vz_quota_empty_v2_format);
-+ if (err)
-+ goto err_reg;
-+
-+ virtinfo_notifier_register(VITYPE_QUOTA, &ugid_notifier_block);
-+ return 0;
-+
-+err_reg:
-+ kmem_cache_destroy(vz_quota_ugid_cachep);
-+ return err;
-+
-+err_slab:
-+ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
-+ return -ENOMEM;
-+}
-+
-+void vzquota_ugid_release()
-+{
-+ virtinfo_notifier_unregister(VITYPE_QUOTA, &ugid_notifier_block);
-+ unregister_quota_format(&vz_quota_empty_v2_format);
-+
-+ if (kmem_cache_destroy(vz_quota_ugid_cachep))
-+ printk(KERN_ERR "VZQUOTA: kmem_cache_destroy failed\n");
-+}
-diff -upr linux-2.6.16.orig/fs/vzdquot.c linux-2.6.16-026test009/fs/vzdquot.c
---- linux-2.6.16.orig/fs/vzdquot.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/fs/vzdquot.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,1705 @@
-+/*
-+ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains the core of Virtuozzo disk quota implementation:
-+ * maintenance of VZDQ information in inodes,
-+ * external interfaces,
-+ * module entry.
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/kernel.h>
-+#include <linux/string.h>
-+#include <linux/list.h>
-+#include <asm/atomic.h>
-+#include <linux/spinlock.h>
-+#include <asm/semaphore.h>
-+#include <linux/slab.h>
-+#include <linux/fs.h>
-+#include <linux/dcache.h>
-+#include <linux/quota.h>
-+#include <linux/rcupdate.h>
-+#include <linux/module.h>
-+#include <asm/uaccess.h>
-+#include <linux/vzctl.h>
-+#include <linux/vzctl_quota.h>
-+#include <linux/vzquota.h>
-+#include <linux/virtinfo.h>
-+#include <linux/vzdq_tree.h>
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Locking
-+ *
-+ * ---------------------------------------------------------------------- */
-+
-+/*
-+ * Serializes on/off and all other do_vzquotactl operations.
-+ * Protects qmblk hash.
-+ */
-+struct semaphore vz_quota_sem;
-+
-+/*
-+ * Data access locks
-+ * inode_qmblk
-+ * protects qmblk pointers in all inodes and qlnk content in general
-+ * (but not qmblk content);
-+ * also protects related qmblk invalidation procedures;
-+ * can't be per-inode because of vzquota_dtree_qmblk complications
-+ * and problems with serialization with quota_on,
-+ * but can be per-superblock;
-+ * qmblk_data
-+ * protects qmblk fields (such as current usage)
-+ * quota_data
-+ * protects charge/uncharge operations, thus, implies
-+ * qmblk_data lock and, if CONFIG_VZ_QUOTA_UGID, inode_qmblk lock
-+ * (to protect ugid pointers).
-+ *
-+ * Lock order:
-+ * inode_qmblk_lock -> dcache_lock
-+ * inode_qmblk_lock -> qmblk_data
-+ */
-+static spinlock_t vzdq_qmblk_lock = SPIN_LOCK_UNLOCKED;
-+
-+inline void inode_qmblk_lock(struct super_block *sb)
-+{
-+ spin_lock(&vzdq_qmblk_lock);
-+}
-+
-+inline void inode_qmblk_unlock(struct super_block *sb)
-+{
-+ spin_unlock(&vzdq_qmblk_lock);
-+}
-+
-+inline void qmblk_data_read_lock(struct vz_quota_master *qmblk)
-+{
-+ spin_lock(&qmblk->dq_data_lock);
-+}
-+
-+inline void qmblk_data_read_unlock(struct vz_quota_master *qmblk)
-+{
-+ spin_unlock(&qmblk->dq_data_lock);
-+}
-+
-+inline void qmblk_data_write_lock(struct vz_quota_master *qmblk)
-+{
-+ spin_lock(&qmblk->dq_data_lock);
-+}
-+
-+inline void qmblk_data_write_unlock(struct vz_quota_master *qmblk)
-+{
-+ spin_unlock(&qmblk->dq_data_lock);
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Master hash table handling.
-+ *
-+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+static kmem_cache_t *vzquota_cachep;
-+
-+/*
-+ * Hash function.
-+ */
-+#define QHASH_BITS 6
-+#define VZ_QUOTA_HASH_SIZE (1 << QHASH_BITS)
-+#define QHASH_MASK (VZ_QUOTA_HASH_SIZE - 1)
-+
-+struct list_head vzquota_hash_table[VZ_QUOTA_HASH_SIZE];
-+int vzquota_hash_size = VZ_QUOTA_HASH_SIZE;
-+
-+static inline int vzquota_hash_func(unsigned int qid)
-+{
-+ return (((qid >> QHASH_BITS) ^ qid) & QHASH_MASK);
-+}
-+
-+/**
-+ * vzquota_alloc_master - alloc and instantiate master quota record
-+ *
-+ * Returns:
-+ * pointer to newly created record if SUCCESS
-+ * -ENOMEM if out of memory
-+ * -EEXIST if record with given quota_id already exist
-+ */
-+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
-+ struct vz_quota_stat *qstat)
-+{
-+ int err;
-+ struct vz_quota_master *qmblk;
-+
-+ err = -EEXIST;
-+ if (vzquota_find_master(quota_id) != NULL)
-+ goto out;
-+
-+ err = -ENOMEM;
-+ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
-+ if (qmblk == NULL)
-+ goto out;
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ qmblk->dq_uid_tree = quotatree_alloc();
-+ if (!qmblk->dq_uid_tree)
-+ goto out_free;
-+
-+ qmblk->dq_gid_tree = quotatree_alloc();
-+ if (!qmblk->dq_gid_tree)
-+ goto out_free_tree;
-+#endif
-+
-+ qmblk->dq_state = VZDQ_STARTING;
-+ init_MUTEX(&qmblk->dq_sem);
-+ spin_lock_init(&qmblk->dq_data_lock);
-+
-+ qmblk->dq_id = quota_id;
-+ qmblk->dq_stat = qstat->dq_stat;
-+ qmblk->dq_info = qstat->dq_info;
-+ qmblk->dq_root_dentry = NULL;
-+ qmblk->dq_root_mnt = NULL;
-+ qmblk->dq_sb = NULL;
-+ qmblk->dq_ugid_count = 0;
-+ qmblk->dq_ugid_max = 0;
-+ qmblk->dq_flags = 0;
-+ memset(qmblk->dq_ugid_info, 0, sizeof(qmblk->dq_ugid_info));
-+ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
-+
-+ atomic_set(&qmblk->dq_count, 1);
-+
-+ /* insert in hash chain */
-+ list_add(&qmblk->dq_hash,
-+ &vzquota_hash_table[vzquota_hash_func(quota_id)]);
-+
-+ /* success */
-+ return qmblk;
-+
-+out_free_tree:
-+ quotatree_free(qmblk->dq_uid_tree, NULL);
-+out_free:
-+ kmem_cache_free(vzquota_cachep, qmblk);
-+out:
-+ return ERR_PTR(err);
-+}
-+
-+static struct vz_quota_master *vzquota_alloc_fake(void)
-+{
-+ struct vz_quota_master *qmblk;
-+
-+ qmblk = kmem_cache_alloc(vzquota_cachep, SLAB_KERNEL);
-+ if (qmblk == NULL)
-+ return NULL;
-+ memset(qmblk, 0, sizeof(*qmblk));
-+ qmblk->dq_state = VZDQ_STOPING;
-+ qmblk->dq_flags = VZDQ_NOQUOT;
-+ spin_lock_init(&qmblk->dq_data_lock);
-+ INIT_LIST_HEAD(&qmblk->dq_ilink_list);
-+ atomic_set(&qmblk->dq_count, 1);
-+ return qmblk;
-+}
-+
-+/**
-+ * vzquota_find_master - find master record with given id
-+ *
-+ * Returns qmblk without touching its refcounter.
-+ * Called under vz_quota_sem.
-+ */
-+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
-+{
-+ int i;
-+ struct vz_quota_master *qp;
-+
-+ i = vzquota_hash_func(quota_id);
-+ list_for_each_entry(qp, &vzquota_hash_table[i], dq_hash) {
-+ if (qp->dq_id == quota_id)
-+ return qp;
-+ }
-+ return NULL;
-+}
-+
-+/**
-+ * vzquota_free_master - release resources taken by qmblk, freeing memory
-+ *
-+ * qmblk is assumed to be already taken out from the hash.
-+ * Should be called outside vz_quota_sem.
-+ */
-+void vzquota_free_master(struct vz_quota_master *qmblk)
-+{
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ vzquota_kill_ugid(qmblk);
-+#endif
-+ BUG_ON(!list_empty(&qmblk->dq_ilink_list));
-+ kmem_cache_free(vzquota_cachep, qmblk);
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Passing quota information through current
-+ *
-+ * Used in inode -> qmblk lookup at inode creation stage (since at that
-+ * time there are no links between the inode being created and its parent
-+ * directory).
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+#define VZDQ_CUR_MAGIC 0x57d0fee2
-+
-+static inline int vzquota_cur_qmblk_check(void)
-+{
-+ return current->magic == VZDQ_CUR_MAGIC;
-+}
-+
-+static inline struct inode *vzquota_cur_qmblk_fetch(void)
-+{
-+ return current->ino;
-+}
-+
-+static inline void vzquota_cur_qmblk_set(struct inode *data)
-+{
-+ struct task_struct *tsk;
-+
-+ tsk = current;
-+ tsk->magic = VZDQ_CUR_MAGIC;
-+ tsk->ino = data;
-+}
-+
-+#if 0
-+static inline void vzquota_cur_qmblk_reset(void)
-+{
-+ current->magic = 0;
-+}
-+#endif
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Superblock quota operations
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+/*
-+ * Kernel structure abuse.
-+ * We use files[0] pointer as an int variable:
-+ * reference counter of how many quota blocks uses this superblock.
-+ * files[1] is used for generations structure which helps us to track
-+ * when traversing of dentries is really required.
-+ */
-+#define __VZ_QUOTA_NOQUOTA(sb) sb->s_dquot.vzdq_master
-+#define __VZ_QUOTA_TSTAMP(sb) ((struct timeval *)\
-+ &sb->s_dquot.dqio_sem)
-+
-+#if defined(VZ_QUOTA_UNLOAD)
-+
-+#define __VZ_QUOTA_SBREF(sb) sb->s_dquot.vzdq_count
-+
-+struct dquot_operations *orig_dq_op;
-+struct quotactl_ops *orig_dq_cop;
-+
-+/**
-+ * quota_get_super - account for new a quoted tree under the superblock
-+ *
-+ * One superblock can have multiple directory subtrees with different VZ
-+ * quotas. We keep a counter of such subtrees and set VZ quota operations or
-+ * reset the default ones.
-+ *
-+ * Called under vz_quota_sem (from quota_on).
-+ */
-+int vzquota_get_super(struct super_block *sb)
-+{
-+ if (sb->dq_op != &vz_quota_operations) {
-+ down(&sb->s_dquot.dqonoff_sem);
-+ if (sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) {
-+ up(&sb->s_dquot.dqonoff_sem);
-+ return -EEXIST;
-+ }
-+ if (orig_dq_op == NULL && sb->dq_op != NULL)
-+ orig_dq_op = sb->dq_op;
-+ sb->dq_op = &vz_quota_operations;
-+ if (orig_dq_cop == NULL && sb->s_qcop != NULL)
-+ orig_dq_cop = sb->s_qcop;
-+ /* XXX this may race with sys_quotactl */
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ sb->s_qcop = &vz_quotactl_operations;
-+#else
-+ sb->s_qcop = NULL;
-+#endif
-+ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
-+ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
-+
-+ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
-+ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
-+ sb->s_dquot.info[USRQUOTA].dqi_format = &vz_quota_empty_v2_format;
-+ sb->s_dquot.info[GRPQUOTA].dqi_format = &vz_quota_empty_v2_format;
-+ /*
-+ * To get quotaops.h call us we need to mark superblock
-+ * as having quota. These flags mark the moment when
-+ * our dq_op start to be called.
-+ *
-+ * The ordering of dq_op and s_dquot.flags assignment
-+ * needs to be enforced, but other CPUs do not do rmb()
-+ * between s_dquot.flags and dq_op accesses.
-+ */
-+ wmb(); synchronize_sched();
-+ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
-+ __module_get(THIS_MODULE);
-+ up(&sb->s_dquot.dqonoff_sem);
-+ }
-+ /* protected by vz_quota_sem */
-+ __VZ_QUOTA_SBREF(sb)++;
-+ return 0;
-+}
-+
-+/**
-+ * quota_put_super - release superblock when one quota tree goes away
-+ *
-+ * Called under vz_quota_sem.
-+ */
-+void vzquota_put_super(struct super_block *sb)
-+{
-+ int count;
-+
-+ count = --__VZ_QUOTA_SBREF(sb);
-+ if (count == 0) {
-+ down(&sb->s_dquot.dqonoff_sem);
-+ sb->s_dquot.flags = 0;
-+ wmb(); synchronize_sched();
-+ sema_init(&sb->s_dquot.dqio_sem, 1);
-+ sb->s_qcop = orig_dq_cop;
-+ sb->dq_op = orig_dq_op;
-+ inode_qmblk_lock(sb);
-+ quota_gen_put(SB_QGEN(sb));
-+ SB_QGEN(sb) = NULL;
-+ /* release qlnk's without qmblk */
-+ remove_inode_quota_links_list(&non_vzquota_inodes_lh,
-+ sb, NULL);
-+ /*
-+ * Races with quota initialization:
-+ * after this inode_qmblk_unlock all inode's generations are
-+ * invalidated, quota_inode_qmblk checks superblock operations.
-+ */
-+ inode_qmblk_unlock(sb);
-+ /*
-+ * Module refcounting: in theory, this is the best place
-+ * to call module_put(THIS_MODULE).
-+ * In reality, it can't be done because we can't be sure that
-+ * other CPUs do not enter our code segment through dq_op
-+ * cached long time ago. Quotaops interface isn't supposed to
-+ * go into modules currently (that is, into unloadable
-+ * modules). By omitting module_put, our module isn't
-+ * unloadable.
-+ */
-+ up(&sb->s_dquot.dqonoff_sem);
-+ }
-+}
-+
-+#else
-+
-+struct vzquota_new_sop {
-+ struct super_operations new_op;
-+ struct super_operations *old_op;
-+};
-+
-+/**
-+ * vzquota_shutdown_super - callback on umount
-+ */
-+void vzquota_shutdown_super(struct super_block *sb)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vzquota_new_sop *sop;
-+
-+ qmblk = __VZ_QUOTA_NOQUOTA(sb);
-+ __VZ_QUOTA_NOQUOTA(sb) = NULL;
-+ if (qmblk != NULL)
-+ qmblk_put(qmblk);
-+ sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
-+ sb->s_op = sop->old_op;
-+ kfree(sop);
-+ (*sb->s_op->put_super)(sb);
-+}
-+
-+/**
-+ * vzquota_get_super - account for new a quoted tree under the superblock
-+ *
-+ * One superblock can have multiple directory subtrees with different VZ
-+ * quotas.
-+ *
-+ * Called under vz_quota_sem (from vzquota_on).
-+ */
-+int vzquota_get_super(struct super_block *sb)
-+{
-+ struct vz_quota_master *qnew;
-+ struct vzquota_new_sop *sop;
-+ int err;
-+
-+ down(&sb->s_dquot.dqonoff_sem);
-+ err = -EEXIST;
-+ if ((sb->s_dquot.flags & (DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED)) &&
-+ sb->dq_op != &vz_quota_operations)
-+ goto out_up;
-+
-+ /*
-+ * This allocation code should be under sb->dq_op check below, but
-+ * it doesn't really matter...
-+ */
-+ if (__VZ_QUOTA_NOQUOTA(sb) == NULL) {
-+ qnew = vzquota_alloc_fake();
-+ if (qnew == NULL)
-+ goto out_up;
-+ __VZ_QUOTA_NOQUOTA(sb) = qnew;
-+ }
-+
-+ if (sb->dq_op != &vz_quota_operations) {
-+ sop = kmalloc(sizeof(*sop), GFP_KERNEL);
-+ if (sop == NULL) {
-+ vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
-+ __VZ_QUOTA_NOQUOTA(sb) = NULL;
-+ goto out_up;
-+ }
-+ memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
-+ sop->new_op.put_super = &vzquota_shutdown_super;
-+ sop->old_op = sb->s_op;
-+ sb->s_op = &sop->new_op;
-+
-+ sb->dq_op = &vz_quota_operations;
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ sb->s_qcop = &vz_quotactl_operations;
-+#else
-+ sb->s_qcop = NULL;
-+#endif
-+ do_gettimeofday(__VZ_QUOTA_TSTAMP(sb));
-+
-+ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
-+ /* these 2 list heads are checked in sync_dquots() */
-+ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
-+ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
-+ sb->s_dquot.info[USRQUOTA].dqi_format =
-+ &vz_quota_empty_v2_format;
-+ sb->s_dquot.info[GRPQUOTA].dqi_format =
-+ &vz_quota_empty_v2_format;
-+
-+ /*
-+ * To get quotaops.h to call us we need to mark superblock
-+ * as having quota. These flags mark the moment when
-+ * our dq_op start to be called.
-+ *
-+ * The ordering of dq_op and s_dquot.flags assignment
-+ * needs to be enforced, but other CPUs do not do rmb()
-+ * between s_dquot.flags and dq_op accesses.
-+ */
-+ wmb(); synchronize_sched();
-+ sb->s_dquot.flags = DQUOT_USR_ENABLED|DQUOT_GRP_ENABLED;
-+ }
-+ err = 0;
-+
-+out_up:
-+ up(&sb->s_dquot.dqonoff_sem);
-+ return err;
-+}
-+
-+/**
-+ * vzquota_put_super - one quota tree less on this superblock
-+ *
-+ * Called under vz_quota_sem.
-+ */
-+void vzquota_put_super(struct super_block *sb)
-+{
-+ /*
-+ * Even if this put is the last one,
-+ * sb->s_dquot.flags can't be cleared, because otherwise vzquota_drop
-+ * won't be called and the remaining qmblk references won't be put.
-+ */
-+}
-+
-+#endif
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Helpers for inode -> qmblk link maintenance
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+#define __VZ_QUOTA_EMPTY ((void *)0xbdbdbdbd)
-+#define VZ_QUOTA_IS_NOQUOTA(qm, sb) ((qm)->dq_flags & VZDQ_NOQUOT)
-+#define VZ_QUOTA_EMPTY_IOPS (&vfs_empty_iops)
-+extern struct inode_operations vfs_empty_iops;
-+
-+static int VZ_QUOTA_IS_ACTUAL(struct inode *inode)
-+{
-+ struct vz_quota_master *qmblk;
-+
-+ qmblk = INODE_QLNK(inode)->qmblk;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ return 1;
-+ if (qmblk == __VZ_QUOTA_EMPTY)
-+ return 0;
-+ if (qmblk->dq_flags & VZDQ_NOACT)
-+ /* not actual (invalidated) qmblk */
-+ return 0;
-+ return 1;
-+}
-+
-+static inline int vzquota_qlnk_is_empty(struct vz_quota_ilink *qlnk)
-+{
-+ return qlnk->qmblk == __VZ_QUOTA_EMPTY;
-+}
-+
-+static inline void vzquota_qlnk_set_empty(struct vz_quota_ilink *qlnk)
-+{
-+ qlnk->qmblk = __VZ_QUOTA_EMPTY;
-+ qlnk->origin = VZ_QUOTAO_SETE;
-+}
-+
-+void vzquota_qlnk_init(struct vz_quota_ilink *qlnk)
-+{
-+ memset(qlnk, 0, sizeof(*qlnk));
-+ INIT_LIST_HEAD(&qlnk->list);
-+ vzquota_qlnk_set_empty(qlnk);
-+ qlnk->origin = VZ_QUOTAO_INIT;
-+}
-+
-+void vzquota_qlnk_destroy(struct vz_quota_ilink *qlnk)
-+{
-+ might_sleep();
-+ if (vzquota_qlnk_is_empty(qlnk))
-+ return;
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD) {
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid *quid, *qgid;
-+ qmblk = qlnk->qmblk;
-+ quid = qlnk->qugid[USRQUOTA];
-+ qgid = qlnk->qugid[GRPQUOTA];
-+ if (quid != NULL || qgid != NULL) {
-+ down(&qmblk->dq_sem);
-+ if (qgid != NULL)
-+ vzquota_put_ugid(qmblk, qgid);
-+ if (quid != NULL)
-+ vzquota_put_ugid(qmblk, quid);
-+ up(&qmblk->dq_sem);
-+ }
-+ }
-+#endif
-+ if (qlnk->qmblk != NULL && qlnk->qmblk != VZ_QUOTA_BAD)
-+ qmblk_put(qlnk->qmblk);
-+ qlnk->origin = VZ_QUOTAO_DESTR;
-+}
-+
-+/**
-+ * vzquota_qlnk_swap - swap inode's and temporary vz_quota_ilink contents
-+ * @qlt: temporary
-+ * @qli: inode's
-+ *
-+ * Locking is provided by the caller (depending on the context).
-+ * After swap, @qli is inserted into the corresponding dq_ilink_list,
-+ * @qlt list is reinitialized.
-+ */
-+static void vzquota_qlnk_swap(struct vz_quota_ilink *qlt,
-+ struct vz_quota_ilink *qli)
-+{
-+ struct vz_quota_master *qb;
-+ struct vz_quota_ugid *qu;
-+ int i;
-+
-+ qb = qlt->qmblk;
-+ qlt->qmblk = qli->qmblk;
-+ qli->qmblk = qb;
-+ list_del_init(&qli->list);
-+ if (qb != __VZ_QUOTA_EMPTY && qb != VZ_QUOTA_BAD)
-+ list_add(&qli->list, &qb->dq_ilink_list);
-+ INIT_LIST_HEAD(&qlt->list);
-+ qli->origin = VZ_QUOTAO_SWAP;
-+
-+ for (i = 0; i < MAXQUOTAS; i++) {
-+ qu = qlt->qugid[i];
-+ qlt->qugid[i] = qli->qugid[i];
-+ qli->qugid[i] = qu;
-+ }
-+}
-+
-+/**
-+ * vzquota_qlnk_reinit_locked - destroy qlnk content, called under locks
-+ *
-+ * Called under dcache_lock and inode_qmblk locks.
-+ * Returns 1 if locks were dropped inside, 0 if atomic.
-+ */
-+static int vzquota_qlnk_reinit_locked(struct vz_quota_ilink *qlnk,
-+ struct inode *inode)
-+{
-+ if (vzquota_qlnk_is_empty(qlnk))
-+ return 0;
-+ if (qlnk->qmblk == VZ_QUOTA_BAD) {
-+ vzquota_qlnk_set_empty(qlnk);
-+ return 0;
-+ }
-+ spin_unlock(&dcache_lock);
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(qlnk);
-+ vzquota_qlnk_init(qlnk);
-+ inode_qmblk_lock(inode->i_sb);
-+ spin_lock(&dcache_lock);
-+ return 1;
-+}
-+
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+/**
-+ * vzquota_qlnk_reinit_attr - destroy and reinit qlnk content
-+ *
-+ * Similar to vzquota_qlnk_reinit_locked, called under different locks.
-+ */
-+static int vzquota_qlnk_reinit_attr(struct vz_quota_ilink *qlnk,
-+ struct inode *inode,
-+ struct vz_quota_master *qmblk)
-+{
-+ if (vzquota_qlnk_is_empty(qlnk))
-+ return 0;
-+ /* may be optimized if qlnk->qugid all NULLs */
-+ qmblk_data_write_unlock(qmblk);
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(qlnk);
-+ vzquota_qlnk_init(qlnk);
-+ inode_qmblk_lock(inode->i_sb);
-+ qmblk_data_write_lock(qmblk);
-+ return 1;
-+}
-+#endif
-+
-+/**
-+ * vzquota_qlnk_fill - fill vz_quota_ilink content
-+ * @qlnk: vz_quota_ilink to fill
-+ * @inode: inode for which @qlnk is filled (i_sb, i_uid, i_gid)
-+ * @qmblk: qmblk to which this @qlnk will belong
-+ *
-+ * Called under dcache_lock and inode_qmblk locks.
-+ * Returns 1 if locks were dropped inside, 0 if atomic.
-+ * @qlnk is expected to be empty.
-+ */
-+static int vzquota_qlnk_fill(struct vz_quota_ilink *qlnk,
-+ struct inode *inode,
-+ struct vz_quota_master *qmblk)
-+{
-+ if (qmblk != VZ_QUOTA_BAD)
-+ qmblk_get(qmblk);
-+ qlnk->qmblk = qmblk;
-+
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+ if (qmblk != VZ_QUOTA_BAD &&
-+ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
-+ (qmblk->dq_flags & VZDQUG_ON)) {
-+ struct vz_quota_ugid *quid, *qgid;
-+
-+ spin_unlock(&dcache_lock);
-+ inode_qmblk_unlock(inode->i_sb);
-+
-+ down(&qmblk->dq_sem);
-+ quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
-+ qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
-+ up(&qmblk->dq_sem);
-+
-+ inode_qmblk_lock(inode->i_sb);
-+ spin_lock(&dcache_lock);
-+ qlnk->qugid[USRQUOTA] = quid;
-+ qlnk->qugid[GRPQUOTA] = qgid;
-+ return 1;
-+ }
-+#endif
-+
-+ return 0;
-+}
-+
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+/**
-+ * vzquota_qlnk_fill_attr - fill vz_quota_ilink content for uid, gid
-+ *
-+ * This function is a helper for vzquota_transfer, and differs from
-+ * vzquota_qlnk_fill only by locking.
-+ */
-+static int vzquota_qlnk_fill_attr(struct vz_quota_ilink *qlnk,
-+ struct inode *inode,
-+ struct iattr *iattr,
-+ int mask,
-+ struct vz_quota_master *qmblk)
-+{
-+ qmblk_get(qmblk);
-+ qlnk->qmblk = qmblk;
-+
-+ if (mask) {
-+ struct vz_quota_ugid *quid, *qgid;
-+
-+ quid = qgid = NULL; /* to make gcc happy */
-+ if (!(mask & (1 << USRQUOTA)))
-+ quid = vzquota_get_ugid(INODE_QLNK(inode)->
-+ qugid[USRQUOTA]);
-+ if (!(mask & (1 << GRPQUOTA)))
-+ qgid = vzquota_get_ugid(INODE_QLNK(inode)->
-+ qugid[GRPQUOTA]);
-+
-+ qmblk_data_write_unlock(qmblk);
-+ inode_qmblk_unlock(inode->i_sb);
-+
-+ down(&qmblk->dq_sem);
-+ if (mask & (1 << USRQUOTA))
-+ quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
-+ USRQUOTA, 0);
-+ if (mask & (1 << GRPQUOTA))
-+ qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
-+ GRPQUOTA, 0);
-+ up(&qmblk->dq_sem);
-+
-+ inode_qmblk_lock(inode->i_sb);
-+ qmblk_data_write_lock(qmblk);
-+ qlnk->qugid[USRQUOTA] = quid;
-+ qlnk->qugid[GRPQUOTA] = qgid;
-+ return 1;
-+ }
-+
-+ return 0;
-+}
-+#endif
-+
-+/**
-+ * __vzquota_inode_init - make sure inode's qlnk is initialized
-+ *
-+ * May be called if qlnk is already initialized, detects this situation itself.
-+ * Called under inode_qmblk_lock.
-+ */
-+static void __vzquota_inode_init(struct inode *inode, unsigned char origin)
-+{
-+ if (inode->i_dquot[USRQUOTA] == NODQUOT) {
-+ vzquota_qlnk_init(INODE_QLNK(inode));
-+ inode->i_dquot[USRQUOTA] = (void *)~(unsigned long)NODQUOT;
-+ }
-+ INODE_QLNK(inode)->origin = origin;
-+}
-+
-+/**
-+ * vzquota_inode_drop - destroy VZ quota information in the inode
-+ *
-+ * Inode must not be externally accessible or dirty.
-+ */
-+static void vzquota_inode_drop(struct inode *inode)
-+{
-+ struct vz_quota_ilink qlnk;
-+
-+ vzquota_qlnk_init(&qlnk);
-+ inode_qmblk_lock(inode->i_sb);
-+ vzquota_qlnk_swap(&qlnk, INODE_QLNK(inode));
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_DRCAL;
-+ inode->i_dquot[USRQUOTA] = NODQUOT;
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(&qlnk);
-+}
-+
-+/**
-+ * vzquota_inode_qmblk_set - initialize inode's qlnk
-+ * @inode: inode to be initialized
-+ * @qmblk: quota master block to which this inode should belong (may be BAD)
-+ * @qlnk: placeholder to store data to resolve locking issues
-+ *
-+ * Returns 1 if locks were dropped and rechecks possibly needed, 0 otherwise.
-+ * Called under dcache_lock and inode_qmblk locks.
-+ * @qlnk will be destroyed in the caller chain.
-+ *
-+ * It is not mandatory to restart parent checks since quota on/off currently
-+ * shrinks dentry tree and checks that there are not outside references.
-+ * But if at some time that shink is removed, restarts will be required.
-+ * Additionally, the restarts prevent inconsistencies if the dentry tree
-+ * changes (inode is moved). This is not a big deal, but anyway...
-+ */
-+static int vzquota_inode_qmblk_set(struct inode *inode,
-+ struct vz_quota_master *qmblk,
-+ struct vz_quota_ilink *qlnk)
-+{
-+ if (qmblk == NULL) {
-+ printk(KERN_ERR "VZDQ: NULL in set, "
-+ "orig %u, dev %s, inode %lu, fs %s\n",
-+ INODE_QLNK(inode)->origin,
-+ inode->i_sb->s_id, inode->i_ino,
-+ inode->i_sb->s_type->name);
-+ printk(KERN_ERR "current %d (%s), VE %d\n",
-+ current->pid, current->comm,
-+ VEID(get_exec_env()));
-+ dump_stack();
-+ qmblk = VZ_QUOTA_BAD;
-+ }
-+ while (1) {
-+ if (vzquota_qlnk_is_empty(qlnk) &&
-+ vzquota_qlnk_fill(qlnk, inode, qmblk))
-+ return 1;
-+ if (qlnk->qmblk == qmblk)
-+ break;
-+ if (vzquota_qlnk_reinit_locked(qlnk, inode))
-+ return 1;
-+ }
-+ vzquota_qlnk_swap(qlnk, INODE_QLNK(inode));
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_QSET;
-+ return 0;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * vzquota_inode_qmblk (inode -> qmblk lookup) parts
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+static int vzquota_dparents_check_attach(struct inode *inode)
-+{
-+ if (!list_empty(&inode->i_dentry))
-+ return 0;
-+ printk(KERN_ERR "VZDQ: no parent for "
-+ "dev %s, inode %lu, fs %s\n",
-+ inode->i_sb->s_id,
-+ inode->i_ino,
-+ inode->i_sb->s_type->name);
-+ return -1;
-+}
-+
-+static struct inode *vzquota_dparents_check_actual(struct inode *inode)
-+{
-+ struct dentry *de;
-+
-+ list_for_each_entry(de, &inode->i_dentry, d_alias) {
-+ if (de->d_parent == de) /* detached dentry, perhaps */
-+ continue;
-+ /* first access to parent, make sure its qlnk initialized */
-+ __vzquota_inode_init(de->d_parent->d_inode, VZ_QUOTAO_ACT);
-+ if (!VZ_QUOTA_IS_ACTUAL(de->d_parent->d_inode))
-+ return de->d_parent->d_inode;
-+ }
-+ return NULL;
-+}
-+
-+static struct vz_quota_master *vzquota_dparents_check_same(struct inode *inode)
-+{
-+ struct dentry *de;
-+ struct vz_quota_master *qmblk;
-+
-+ qmblk = NULL;
-+ list_for_each_entry(de, &inode->i_dentry, d_alias) {
-+ if (de->d_parent == de) /* detached dentry, perhaps */
-+ continue;
-+ if (qmblk == NULL) {
-+ qmblk = INODE_QLNK(de->d_parent->d_inode)->qmblk;
-+ continue;
-+ }
-+ if (INODE_QLNK(de->d_parent->d_inode)->qmblk != qmblk) {
-+ printk(KERN_WARNING "VZDQ: multiple quotas for "
-+ "dev %s, inode %lu, fs %s\n",
-+ inode->i_sb->s_id,
-+ inode->i_ino,
-+ inode->i_sb->s_type->name);
-+ qmblk = VZ_QUOTA_BAD;
-+ break;
-+ }
-+ }
-+ if (qmblk == NULL) {
-+ printk(KERN_WARNING "VZDQ: not attached to tree, "
-+ "dev %s, inode %lu, fs %s\n",
-+ inode->i_sb->s_id,
-+ inode->i_ino,
-+ inode->i_sb->s_type->name);
-+ qmblk = VZ_QUOTA_BAD;
-+ }
-+ return qmblk;
-+}
-+
-+static void vzquota_dbranch_actualize(struct inode *inode,
-+ struct inode *refinode)
-+{
-+ struct inode *pinode;
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ilink qlnk;
-+
-+ vzquota_qlnk_init(&qlnk);
-+
-+start:
-+ if (inode == inode->i_sb->s_root->d_inode) {
-+ /* filesystem root */
-+ atomic_inc(&inode->i_count);
-+ do {
-+ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
-+ } while (vzquota_inode_qmblk_set(inode, qmblk, &qlnk));
-+ goto out;
-+ }
-+
-+ if (!vzquota_dparents_check_attach(inode)) {
-+ pinode = vzquota_dparents_check_actual(inode);
-+ if (pinode != NULL) {
-+ inode = pinode;
-+ goto start;
-+ }
-+ }
-+
-+ atomic_inc(&inode->i_count);
-+ while (1) {
-+ if (VZ_QUOTA_IS_ACTUAL(inode)) /* actualized without us */
-+ break;
-+ /*
-+ * Need to check parents again if we have slept inside
-+ * vzquota_inode_qmblk_set() in the loop.
-+ * If the state of parents is different, just return and repeat
-+ * the actualizing process again from the inode passed to
-+ * vzquota_inode_qmblk_recalc().
-+ */
-+ if (!vzquota_dparents_check_attach(inode)) {
-+ if (vzquota_dparents_check_actual(inode) != NULL)
-+ break;
-+ qmblk = vzquota_dparents_check_same(inode);
-+ } else
-+ qmblk = VZ_QUOTA_BAD;
-+ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk)){/* success */
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_ACT;
-+ break;
-+ }
-+ }
-+
-+out:
-+ spin_unlock(&dcache_lock);
-+ inode_qmblk_unlock(refinode->i_sb);
-+ vzquota_qlnk_destroy(&qlnk);
-+ iput(inode);
-+ inode_qmblk_lock(refinode->i_sb);
-+ spin_lock(&dcache_lock);
-+}
-+
-+static void vzquota_dtree_qmblk_recalc(struct inode *inode,
-+ struct vz_quota_ilink *qlnk)
-+{
-+ struct inode *pinode;
-+ struct vz_quota_master *qmblk;
-+
-+ if (inode == inode->i_sb->s_root->d_inode) {
-+ /* filesystem root */
-+ do {
-+ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
-+ } while (vzquota_inode_qmblk_set(inode, qmblk, qlnk));
-+ return;
-+ }
-+
-+start:
-+ if (VZ_QUOTA_IS_ACTUAL(inode))
-+ return;
-+ /*
-+ * Here qmblk is (re-)initialized for all ancestors.
-+ * This is not a very efficient procedure, but it guarantees that
-+ * the quota tree is consistent (that is, the inode doesn't have two
-+ * ancestors with different qmblk).
-+ */
-+ if (!vzquota_dparents_check_attach(inode)) {
-+ pinode = vzquota_dparents_check_actual(inode);
-+ if (pinode != NULL) {
-+ vzquota_dbranch_actualize(pinode, inode);
-+ goto start;
-+ }
-+ qmblk = vzquota_dparents_check_same(inode);
-+ } else
-+ qmblk = VZ_QUOTA_BAD;
-+
-+ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
-+ goto start;
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_DTREE;
-+}
-+
-+static void vzquota_det_qmblk_recalc(struct inode *inode,
-+ struct vz_quota_ilink *qlnk)
-+{
-+ struct inode *parent;
-+ struct vz_quota_master *qmblk;
-+ char *msg;
-+ int cnt;
-+ time_t timeout;
-+
-+ cnt = 0;
-+ parent = NULL;
-+start:
-+ /*
-+ * qmblk of detached inodes shouldn't be considered as not actual.
-+ * They are not in any dentry tree, so quota on/off shouldn't affect
-+ * them.
-+ */
-+ if (!vzquota_qlnk_is_empty(INODE_QLNK(inode)))
-+ return;
-+
-+ timeout = 3;
-+ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
-+ msg = "detached inode not in creation";
-+ if (inode->i_op != VZ_QUOTA_EMPTY_IOPS)
-+ goto fail;
-+ qmblk = VZ_QUOTA_BAD;
-+ msg = "unexpected creation context";
-+ if (!vzquota_cur_qmblk_check())
-+ goto fail;
-+ timeout = 0;
-+ parent = vzquota_cur_qmblk_fetch();
-+ msg = "uninitialized parent";
-+ if (vzquota_qlnk_is_empty(INODE_QLNK(parent)))
-+ goto fail;
-+ msg = "parent not in tree";
-+ if (list_empty(&parent->i_dentry))
-+ goto fail;
-+ msg = "parent has 0 refcount";
-+ if (!atomic_read(&parent->i_count))
-+ goto fail;
-+ msg = "parent has different sb";
-+ if (parent->i_sb != inode->i_sb)
-+ goto fail;
-+ if (!VZ_QUOTA_IS_ACTUAL(parent)) {
-+ vzquota_dbranch_actualize(parent, inode);
-+ goto start;
-+ }
-+
-+ qmblk = INODE_QLNK(parent)->qmblk;
-+set:
-+ if (vzquota_inode_qmblk_set(inode, qmblk, qlnk))
-+ goto start;
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_DET;
-+ return;
-+
-+fail:
-+ {
-+ struct timeval tv, tvo;
-+ do_gettimeofday(&tv);
-+ memcpy(&tvo, __VZ_QUOTA_TSTAMP(inode->i_sb), sizeof(tvo));
-+ tv.tv_sec -= tvo.tv_sec;
-+ if (tv.tv_usec < tvo.tv_usec) {
-+ tv.tv_sec--;
-+ tv.tv_usec += USEC_PER_SEC - tvo.tv_usec;
-+ } else
-+ tv.tv_usec -= tvo.tv_usec;
-+ if (tv.tv_sec < timeout)
-+ goto set;
-+ printk(KERN_ERR "VZDQ: %s, orig %u,"
-+ " dev %s, inode %lu, fs %s\n",
-+ msg, INODE_QLNK(inode)->origin,
-+ inode->i_sb->s_id, inode->i_ino,
-+ inode->i_sb->s_type->name);
-+ if (!cnt++) {
-+ printk(KERN_ERR "current %d (%s), VE %d,"
-+ " time %ld.%06ld\n",
-+ current->pid, current->comm,
-+ VEID(get_exec_env()),
-+ tv.tv_sec, tv.tv_usec);
-+ dump_stack();
-+ }
-+ if (parent != NULL)
-+ printk(KERN_ERR "VZDQ: parent of %lu is %lu\n",
-+ inode->i_ino, parent->i_ino);
-+ }
-+ goto set;
-+}
-+
-+static void vzquota_inode_qmblk_recalc(struct inode *inode,
-+ struct vz_quota_ilink *qlnk)
-+{
-+ spin_lock(&dcache_lock);
-+ if (!list_empty(&inode->i_dentry))
-+ vzquota_dtree_qmblk_recalc(inode, qlnk);
-+ else
-+ vzquota_det_qmblk_recalc(inode, qlnk);
-+ spin_unlock(&dcache_lock);
-+}
-+
-+/**
-+ * vzquota_inode_qmblk - obtain inode's qmblk
-+ *
-+ * Returns qmblk with refcounter taken, %NULL if not under
-+ * VZ quota or %VZ_QUOTA_BAD.
-+ *
-+ * FIXME: This function should be removed when vzquota_find_qmblk /
-+ * get_quota_root / vzquota_dstat code is cleaned up.
-+ */
-+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ilink qlnk;
-+
-+ might_sleep();
-+
-+ if (inode->i_sb->dq_op != &vz_quota_operations)
-+ return NULL;
-+#if defined(VZ_QUOTA_UNLOAD)
-+#error Make sure qmblk does not disappear
-+#endif
-+
-+ vzquota_qlnk_init(&qlnk);
-+ inode_qmblk_lock(inode->i_sb);
-+ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
-+
-+ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
-+ !VZ_QUOTA_IS_ACTUAL(inode))
-+ vzquota_inode_qmblk_recalc(inode, &qlnk);
-+
-+ qmblk = INODE_QLNK(inode)->qmblk;
-+ if (qmblk != VZ_QUOTA_BAD) {
-+ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb))
-+ qmblk_get(qmblk);
-+ else
-+ qmblk = NULL;
-+ }
-+
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(&qlnk);
-+ return qmblk;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Calls from quota operations
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+/**
-+ * vzquota_inode_init_call - call from DQUOT_INIT
-+ */
-+void vzquota_inode_init_call(struct inode *inode)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+
-+ /* initializes inode's quota inside */
-+ qmblk = vzquota_inode_data(inode, &data);
-+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
-+ vzquota_data_unlock(inode, &data);
-+
-+ /*
-+ * The check is needed for repeated new_inode() calls from a single
-+ * ext3 call like create or mkdir in case of -ENOSPC.
-+ */
-+ spin_lock(&dcache_lock);
-+ if (!list_empty(&inode->i_dentry))
-+ vzquota_cur_qmblk_set(inode);
-+ spin_unlock(&dcache_lock);
-+}
-+
-+/**
-+ * vzquota_inode_drop_call - call from DQUOT_DROP
-+ */
-+void vzquota_inode_drop_call(struct inode *inode)
-+{
-+ vzquota_inode_drop(inode);
-+}
-+
-+/**
-+ * vzquota_inode_data - initialize (if nec.) and lock inode quota ptrs
-+ * @inode: the inode
-+ * @data: storage space
-+ *
-+ * Returns: qmblk is NULL or VZ_QUOTA_BAD or actualized qmblk.
-+ * On return if qmblk is neither NULL nor VZ_QUOTA_BAD:
-+ * qmblk in inode's qlnk is the same as returned,
-+ * ugid pointers inside inode's qlnk are valid,
-+ * some locks are taken (and should be released by vzquota_data_unlock).
-+ * If qmblk is NULL or VZ_QUOTA_BAD, locks are NOT taken.
-+ */
-+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
-+ struct vz_quota_datast *data)
-+{
-+ struct vz_quota_master *qmblk;
-+
-+ might_sleep();
-+
-+ vzquota_qlnk_init(&data->qlnk);
-+ inode_qmblk_lock(inode->i_sb);
-+ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
-+
-+ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
-+ !VZ_QUOTA_IS_ACTUAL(inode))
-+ vzquota_inode_qmblk_recalc(inode, &data->qlnk);
-+
-+ qmblk = INODE_QLNK(inode)->qmblk;
-+ if (qmblk != VZ_QUOTA_BAD) {
-+ if (!VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb)) {
-+ /*
-+ * Note that in the current implementation,
-+ * inode_qmblk_lock can theoretically be dropped here.
-+ * This place is serialized with quota_off because
-+ * quota_off fails when there are extra dentry
-+ * references and syncs inodes before removing quota
-+ * information from them.
-+ * However, quota usage information should stop being
-+ * updated immediately after vzquota_off.
-+ */
-+ qmblk_data_write_lock(qmblk);
-+ } else {
-+ inode_qmblk_unlock(inode->i_sb);
-+ qmblk = NULL;
-+ }
-+ } else {
-+ inode_qmblk_unlock(inode->i_sb);
-+ }
-+ return qmblk;
-+}
-+
-+void vzquota_data_unlock(struct inode *inode,
-+ struct vz_quota_datast *data)
-+{
-+ qmblk_data_write_unlock(INODE_QLNK(inode)->qmblk);
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(&data->qlnk);
-+}
-+
-+#if defined(CONFIG_VZ_QUOTA_UGID)
-+/**
-+ * vzquota_inode_transfer_call - call from vzquota_transfer
-+ */
-+int vzquota_inode_transfer_call(struct inode *inode, struct iattr *iattr)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_datast data;
-+ struct vz_quota_ilink qlnew;
-+ int mask;
-+ int ret;
-+
-+ might_sleep();
-+ vzquota_qlnk_init(&qlnew);
-+start:
-+ qmblk = vzquota_inode_data(inode, &data);
-+ ret = NO_QUOTA;
-+ if (qmblk == VZ_QUOTA_BAD)
-+ goto out_destr;
-+ ret = QUOTA_OK;
-+ if (qmblk == NULL)
-+ goto out_destr;
-+ qmblk_get(qmblk);
-+
-+ ret = QUOTA_OK;
-+ if (!(qmblk->dq_flags & VZDQUG_ON))
-+ /* no ugid quotas */
-+ goto out_unlock;
-+
-+ mask = 0;
-+ if ((iattr->ia_valid & ATTR_UID) && iattr->ia_uid != inode->i_uid)
-+ mask |= 1 << USRQUOTA;
-+ if ((iattr->ia_valid & ATTR_GID) && iattr->ia_gid != inode->i_gid)
-+ mask |= 1 << GRPQUOTA;
-+ while (1) {
-+ if (vzquota_qlnk_is_empty(&qlnew) &&
-+ vzquota_qlnk_fill_attr(&qlnew, inode, iattr, mask, qmblk))
-+ break;
-+ if (qlnew.qmblk == INODE_QLNK(inode)->qmblk &&
-+ qlnew.qmblk == qmblk)
-+ goto finish;
-+ if (vzquota_qlnk_reinit_attr(&qlnew, inode, qmblk))
-+ break;
-+ }
-+
-+ /* prepare for restart */
-+ vzquota_data_unlock(inode, &data);
-+ qmblk_put(qmblk);
-+ goto start;
-+
-+finish:
-+ /* all references obtained successfully */
-+ ret = vzquota_transfer_usage(inode, mask, &qlnew);
-+ if (!ret) {
-+ vzquota_qlnk_swap(&qlnew, INODE_QLNK(inode));
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_TRANS;
-+ }
-+out_unlock:
-+ vzquota_data_unlock(inode, &data);
-+ qmblk_put(qmblk);
-+out_destr:
-+ vzquota_qlnk_destroy(&qlnew);
-+ return ret;
-+}
-+#endif
-+
-+int vzquota_rename_check(struct inode *inode,
-+ struct inode *old_dir, struct inode *new_dir)
-+{
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ilink qlnk1, qlnk2;
-+ int c, ret;
-+
-+ if (inode->i_sb != old_dir->i_sb || inode->i_sb != new_dir->i_sb)
-+ return -1;
-+
-+ might_sleep();
-+
-+ vzquota_qlnk_init(&qlnk1);
-+ vzquota_qlnk_init(&qlnk2);
-+ inode_qmblk_lock(inode->i_sb);
-+ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
-+ __vzquota_inode_init(old_dir, VZ_QUOTAO_INICAL);
-+ __vzquota_inode_init(new_dir, VZ_QUOTAO_INICAL);
-+
-+ do {
-+ c = 0;
-+ if (vzquota_qlnk_is_empty(INODE_QLNK(inode)) ||
-+ !VZ_QUOTA_IS_ACTUAL(inode)) {
-+ vzquota_inode_qmblk_recalc(inode, &qlnk1);
-+ c++;
-+ }
-+ if (vzquota_qlnk_is_empty(INODE_QLNK(new_dir)) ||
-+ !VZ_QUOTA_IS_ACTUAL(new_dir)) {
-+ vzquota_inode_qmblk_recalc(new_dir, &qlnk2);
-+ c++;
-+ }
-+ } while (c);
-+
-+ ret = 0;
-+ qmblk = INODE_QLNK(inode)->qmblk;
-+ if (qmblk != INODE_QLNK(new_dir)->qmblk) {
-+ ret = -1;
-+ if (qmblk != VZ_QUOTA_BAD &&
-+ !VZ_QUOTA_IS_NOQUOTA(qmblk, inode->i_sb) &&
-+ qmblk->dq_root_dentry->d_inode == inode &&
-+ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(new_dir)->qmblk,
-+ inode->i_sb) &&
-+ VZ_QUOTA_IS_NOQUOTA(INODE_QLNK(old_dir)->qmblk,
-+ inode->i_sb))
-+ /* quota root rename is allowed */
-+ ret = 0;
-+ }
-+
-+ inode_qmblk_unlock(inode->i_sb);
-+ vzquota_qlnk_destroy(&qlnk2);
-+ vzquota_qlnk_destroy(&qlnk1);
-+ return ret;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * qmblk-related parts of on/off operations
-+ *
-+ * --------------------------------------------------------------------- */
-+
-+/**
-+ * vzquota_check_dtree - check dentry tree if quota on/off is allowed
-+ *
-+ * This function doesn't allow quota to be turned on/off if some dentries in
-+ * the tree have external references.
-+ * In addition to technical reasons, it enforces user-space correctness:
-+ * current usage (taken from or reported to the user space) can be meaningful
-+ * and accurate only if the tree is not being modified.
-+ * Side effect: additional vfsmount structures referencing the tree (bind
-+ * mounts of tree nodes to some other places) are not allowed at on/off time.
-+ */
-+int vzquota_check_dtree(struct vz_quota_master *qmblk, int off)
-+{
-+ struct dentry *dentry;
-+ int err, count;
-+
-+ err = -EBUSY;
-+ dentry = qmblk->dq_root_dentry;
-+
-+ if (d_unhashed(dentry) && dentry != dentry->d_sb->s_root)
-+ goto unhashed;
-+
-+ /* attempt to shrink */
-+ if (!list_empty(&dentry->d_subdirs)) {
-+ spin_unlock(&dcache_lock);
-+ inode_qmblk_unlock(dentry->d_sb);
-+ shrink_dcache_parent(dentry);
-+ inode_qmblk_lock(dentry->d_sb);
-+ spin_lock(&dcache_lock);
-+ if (!list_empty(&dentry->d_subdirs))
-+ goto out;
-+
-+ count = 1;
-+ if (dentry == dentry->d_sb->s_root)
-+ count += 2; /* sb and mnt refs */
-+ if (atomic_read(&dentry->d_count) < count) {
-+ printk(KERN_ERR "%s: too small count %d vs %d.\n",
-+ __FUNCTION__,
-+ atomic_read(&dentry->d_count), count);
-+ goto out;
-+ }
-+ if (atomic_read(&dentry->d_count) > count)
-+ goto out;
-+ }
-+
-+ err = 0;
-+out:
-+ return err;
-+
-+unhashed:
-+ /*
-+ * Quota root is removed.
-+ * Allow to turn quota off, but not on.
-+ */
-+ if (off)
-+ err = 0;
-+ goto out;
-+}
-+
-+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
-+ struct vz_quota_master *qmblk)
-+{
-+ struct vz_quota_ilink qlnk;
-+ struct vz_quota_master *qold, *qnew;
-+ int err;
-+
-+ might_sleep();
-+
-+ qold = NULL;
-+ qnew = vzquota_alloc_fake();
-+ if (qnew == NULL)
-+ return -ENOMEM;
-+
-+ vzquota_qlnk_init(&qlnk);
-+ inode_qmblk_lock(sb);
-+ __vzquota_inode_init(inode, VZ_QUOTAO_INICAL);
-+
-+ spin_lock(&dcache_lock);
-+ while (1) {
-+ err = vzquota_check_dtree(qmblk, 0);
-+ if (err)
-+ break;
-+ if (!vzquota_inode_qmblk_set(inode, qmblk, &qlnk))
-+ break;
-+ }
-+ INODE_QLNK(inode)->origin = VZ_QUOTAO_ON;
-+ spin_unlock(&dcache_lock);
-+
-+ if (!err) {
-+ qold = __VZ_QUOTA_NOQUOTA(sb);
-+ qold->dq_flags |= VZDQ_NOACT;
-+ __VZ_QUOTA_NOQUOTA(sb) = qnew;
-+ }
-+
-+ inode_qmblk_unlock(sb);
-+ vzquota_qlnk_destroy(&qlnk);
-+ if (qold != NULL)
-+ qmblk_put(qold);
-+
-+ return err;
-+}
-+
-+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk)
-+{
-+ int ret;
-+
-+ ret = 0;
-+ inode_qmblk_lock(sb);
-+
-+ spin_lock(&dcache_lock);
-+ if (vzquota_check_dtree(qmblk, 1))
-+ ret = -EBUSY;
-+ spin_unlock(&dcache_lock);
-+
-+ if (!ret)
-+ qmblk->dq_flags |= VZDQ_NOACT | VZDQ_NOQUOT;
-+ inode_qmblk_unlock(sb);
-+ return ret;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * External interfaces
-+ *
-+ * ---------------------------------------------------------------------*/
-+
-+static int vzquota_ioctl(struct inode *ino, struct file *file,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ int err;
-+ struct vzctl_quotactl qb;
-+ struct vzctl_quotaugidctl qub;
-+
-+ switch (cmd) {
-+ case VZCTL_QUOTA_CTL:
-+ err = -ENOTTY;
-+ break;
-+ case VZCTL_QUOTA_NEW_CTL:
-+ err = -EFAULT;
-+ if (copy_from_user(&qb, (void *)arg, sizeof(qb)))
-+ break;
-+ err = do_vzquotactl(qb.cmd, qb.quota_id,
-+ qb.qstat, qb.ve_root);
-+ break;
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ case VZCTL_QUOTA_UGID_CTL:
-+ err = -EFAULT;
-+ if (copy_from_user(&qub, (void *)arg, sizeof(qub)))
-+ break;
-+ err = do_vzquotaugidctl(&qub);
-+ break;
-+#endif
-+ default:
-+ err = -ENOTTY;
-+ }
-+ might_sleep(); /* debug */
-+ return err;
-+}
-+
-+static struct vzioctlinfo vzdqcalls = {
-+ .type = VZDQCTLTYPE,
-+ .func = vzquota_ioctl,
-+ .owner = THIS_MODULE,
-+};
-+
-+/**
-+ * vzquota_dstat - get quota usage info for virtual superblock
-+ */
-+static int vzquota_dstat(struct super_block *super, struct dq_stat *qstat)
-+{
-+ struct vz_quota_master *qmblk;
-+
-+ qmblk = vzquota_find_qmblk(super);
-+ if (qmblk == NULL)
-+ return -ENOENT;
-+ if (qmblk == VZ_QUOTA_BAD) {
-+ memset(qstat, 0, sizeof(*qstat));
-+ return 0;
-+ }
-+
-+ qmblk_data_read_lock(qmblk);
-+ memcpy(qstat, &qmblk->dq_stat, sizeof(*qstat));
-+ qmblk_data_read_unlock(qmblk);
-+ qmblk_put(qmblk);
-+ return 0;
-+}
-+
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Init/exit helpers
-+ *
-+ * ---------------------------------------------------------------------*/
-+
-+static int vzquota_cache_init(void)
-+{
-+ int i;
-+
-+ vzquota_cachep = kmem_cache_create("vz_quota_master",
-+ sizeof(struct vz_quota_master),
-+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ if (vzquota_cachep == NULL) {
-+ printk(KERN_ERR "Cannot create VZ_QUOTA SLAB cache\n");
-+ goto nomem2;
-+ }
-+ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
-+ INIT_LIST_HEAD(&vzquota_hash_table[i]);
-+
-+ return 0;
-+
-+nomem2:
-+ return -ENOMEM;
-+}
-+
-+static void vzquota_cache_release(void)
-+{
-+ int i;
-+
-+ /* sanity check */
-+ for (i = 0; i < VZ_QUOTA_HASH_SIZE; i++)
-+ if (!list_empty(&vzquota_hash_table[i]))
-+ BUG();
-+
-+ /* release caches */
-+ if (kmem_cache_destroy(vzquota_cachep))
-+ printk(KERN_ERR
-+ "VZQUOTA: vz_quota_master kmem_cache_destroy failed\n");
-+ vzquota_cachep = NULL;
-+}
-+
-+static int quota_notifier_call(struct vnotifier_block *self,
-+ unsigned long n, void *data, int err)
-+{
-+ struct virt_info_quota *viq;
-+ struct super_block *sb;
-+
-+ viq = (struct virt_info_quota *)data;
-+ switch (n) {
-+ case VIRTINFO_QUOTA_ON:
-+ err = NOTIFY_BAD;
-+ if (!try_module_get(THIS_MODULE))
-+ break;
-+ sb = viq->super;
-+ memset(&sb->s_dquot.info, 0, sizeof(sb->s_dquot.info));
-+ INIT_LIST_HEAD(&sb->s_dquot.info[USRQUOTA].dqi_dirty_list);
-+ INIT_LIST_HEAD(&sb->s_dquot.info[GRPQUOTA].dqi_dirty_list);
-+ err = NOTIFY_OK;
-+ break;
-+ case VIRTINFO_QUOTA_OFF:
-+ module_put(THIS_MODULE);
-+ err = NOTIFY_OK;
-+ break;
-+ case VIRTINFO_QUOTA_GETSTAT:
-+ err = NOTIFY_BAD;
-+ if (vzquota_dstat(viq->super, viq->qstat))
-+ break;
-+ err = NOTIFY_OK;
-+ break;
-+ }
-+ return err;
-+}
-+
-+struct vnotifier_block quota_notifier_block = {
-+ .notifier_call = quota_notifier_call,
-+ .priority = INT_MAX,
-+};
-+
-+/* ----------------------------------------------------------------------
-+ *
-+ * Init/exit procedures
-+ *
-+ * ---------------------------------------------------------------------*/
-+
-+static int __init vzquota_init(void)
-+{
-+ int err;
-+
-+ if ((err = vzquota_cache_init()) != 0)
-+ goto out_cache;
-+
-+ if ((err = vzquota_proc_init()) != 0)
-+ goto out_proc;
-+
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+ if ((err = vzquota_ugid_init()) != 0)
-+ goto out_ugid;
-+#endif
-+
-+ init_MUTEX(&vz_quota_sem);
-+ vzioctl_register(&vzdqcalls);
-+ virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
-+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
-+ vzaquota_init();
-+#endif
-+
-+ return 0;
-+
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+out_ugid:
-+ vzquota_proc_release();
-+#endif
-+out_proc:
-+ vzquota_cache_release();
-+out_cache:
-+ return err;
-+}
-+
-+#if defined(VZ_QUOTA_UNLOAD)
-+static void __exit vzquota_release(void)
-+{
-+ virtinfo_notifier_unregister(VITYPE_QUOTA, &quota_notifier_block);
-+ vzioctl_unregister(&vzdqcalls);
-+#ifdef CONFIG_VZ_QUOTA_UGID
-+#ifdef CONFIG_PROC_FS
-+ vzaquota_fini();
-+#endif
-+ vzquota_ugid_release();
-+#endif
-+ vzquota_proc_release();
-+ vzquota_cache_release();
-+}
-+#endif
-+
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Virtuozzo Disk Quota");
-+MODULE_LICENSE("GPL v2");
-+
-+module_init(vzquota_init)
-+#if defined(VZ_QUOTA_UNLOAD)
-+module_exit(vzquota_release)
-+#endif
-diff -upr linux-2.6.16.orig/fs/xattr.c linux-2.6.16-026test009/fs/xattr.c
---- linux-2.6.16.orig/fs/xattr.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/xattr.c 2006-04-19 15:02:11.000000000 +0400
-@@ -58,7 +58,7 @@ xattr_permission(struct inode *inode, co
- return -EPERM;
- }
-
-- return permission(inode, mask, NULL);
-+ return permission(inode, mask, NULL, NULL);
- }
-
- int
-diff -upr linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_aops.c linux-2.6.16-026test009/fs/xfs/linux-2.6/xfs_aops.c
---- linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_aops.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/xfs/linux-2.6/xfs_aops.c 2006-04-19 15:02:11.000000000 +0400
-@@ -616,7 +616,7 @@ xfs_is_delayed_page(
- acceptable = (type == IOMAP_UNWRITTEN);
- else if (buffer_delay(bh))
- acceptable = (type == IOMAP_DELAY);
-- else if (buffer_mapped(bh))
-+ else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable = (type == 0);
- else
- break;
-diff -upr linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.16-026test009/fs/xfs/linux-2.6/xfs_iops.c
---- linux-2.6.16.orig/fs/xfs/linux-2.6/xfs_iops.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/fs/xfs/linux-2.6/xfs_iops.c 2006-04-19 15:02:11.000000000 +0400
-@@ -615,7 +615,8 @@ STATIC int
- linvfs_permission(
- struct inode *inode,
- int mode,
-- struct nameidata *nd)
-+ struct nameidata *nd,
-+ struct exec_perm *perm)
- {
- vnode_t *vp = LINVFS_GET_VP(inode);
- int error;
-@@ -673,8 +674,7 @@ linvfs_setattr(
- if (ia_valid & ATTR_ATIME) {
- vattr.va_mask |= XFS_AT_ATIME;
- vattr.va_atime = attr->ia_atime;
-- if (ia_valid & ATTR_ATIME_SET)
-- inode->i_atime = attr->ia_atime;
-+ inode->i_atime = attr->ia_atime;
- }
- if (ia_valid & ATTR_MTIME) {
- vattr.va_mask |= XFS_AT_MTIME;
-diff -upr linux-2.6.16.orig/include/asm-arm26/tlbflush.h linux-2.6.16-026test009/include/asm-arm26/tlbflush.h
---- linux-2.6.16.orig/include/asm-arm26/tlbflush.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-arm26/tlbflush.h 2006-04-19 15:02:12.000000000 +0400
-@@ -25,7 +25,7 @@ static inline void memc_update_all(void)
- {
- struct task_struct *p;
- cpu_memc_update_all(init_mm.pgd);
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (!p->mm)
- continue;
- cpu_memc_update_all(p->mm->pgd);
-diff -upr linux-2.6.16.orig/include/asm-generic/atomic.h linux-2.6.16-026test009/include/asm-generic/atomic.h
---- linux-2.6.16.orig/include/asm-generic/atomic.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-generic/atomic.h 2006-04-19 15:02:12.000000000 +0400
-@@ -66,6 +66,13 @@ static inline void atomic_long_sub(long
- atomic64_sub(i, v);
- }
-
-+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-+{
-+ atomic64_t *v = (atomic64_t *)l;
-+
-+ return atomic64_add_negative(i, v);
-+}
-+
- #else
-
- typedef atomic_t atomic_long_t;
-@@ -113,5 +120,12 @@ static inline void atomic_long_sub(long
- atomic_sub(i, v);
- }
-
-+static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-+{
-+ atomic_t *v = (atomic_t *)l;
-+
-+ return atomic_add_negative(i, v);
-+}
-+
- #endif
- #endif
-diff -upr linux-2.6.16.orig/include/asm-i386/bug.h linux-2.6.16-026test009/include/asm-i386/bug.h
---- linux-2.6.16.orig/include/asm-i386/bug.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/bug.h 2006-04-19 15:02:11.000000000 +0400
-@@ -14,7 +14,10 @@
- #ifdef CONFIG_DEBUG_BUGVERBOSE
- #define BUG() \
- __asm__ __volatile__( "ud2\n" \
-+ "\t.byte 0x66\n"\
-+ "\t.byte 0xb8\n" /* mov $xxx, %ax */\
- "\t.word %c0\n" \
-+ "\t.byte 0xb8\n" /* mov $xxx, %eax */\
- "\t.long %c1\n" \
- : : "i" (__LINE__), "i" (__FILE__))
- #else
-diff -upr linux-2.6.16.orig/include/asm-i386/elf.h linux-2.6.16-026test009/include/asm-i386/elf.h
---- linux-2.6.16.orig/include/asm-i386/elf.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/elf.h 2006-04-19 15:02:12.000000000 +0400
-@@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
- For the moment, we have only optimizations for the Intel generations,
- but that could change... */
-
--#define ELF_PLATFORM (system_utsname.machine)
-+#define ELF_PLATFORM (ve_utsname.machine)
-
- #ifdef __KERNEL__
- #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
-@@ -136,8 +136,10 @@ extern void __kernel_vsyscall;
-
- #define ARCH_DLINFO \
- do { \
-+ if (sysctl_at_vsyscall) { \
- NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
- NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
-+ } \
- } while (0)
-
- /*
-diff -upr linux-2.6.16.orig/include/asm-i386/mman.h linux-2.6.16-026test009/include/asm-i386/mman.h
---- linux-2.6.16.orig/include/asm-i386/mman.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/mman.h 2006-04-19 15:02:11.000000000 +0400
-@@ -10,6 +10,7 @@
- #define MAP_NORESERVE 0x4000 /* don't check for reservations */
- #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
- #define MAP_NONBLOCK 0x10000 /* do not block on IO */
-+#define MAP_EXECPRIO 0x20000 /* do soft ubc charge */
-
- #define MCL_CURRENT 1 /* lock all current mappings */
- #define MCL_FUTURE 2 /* lock all future mappings */
-diff -upr linux-2.6.16.orig/include/asm-i386/nmi.h linux-2.6.16-026test009/include/asm-i386/nmi.h
---- linux-2.6.16.orig/include/asm-i386/nmi.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/nmi.h 2006-04-19 15:02:11.000000000 +0400
-@@ -17,6 +17,7 @@ typedef int (*nmi_callback_t)(struct pt_
- * set. Return 1 if the NMI was handled.
- */
- void set_nmi_callback(nmi_callback_t callback);
-+void set_nmi_ipi_callback(nmi_callback_t callback);
-
- /**
- * unset_nmi_callback
-@@ -24,5 +25,6 @@ void set_nmi_callback(nmi_callback_t cal
- * Remove the handler previously set.
- */
- void unset_nmi_callback(void);
-+void unset_nmi_ipi_callback(void);
-
- #endif /* ASM_NMI_H */
-diff -upr linux-2.6.16.orig/include/asm-i386/thread_info.h linux-2.6.16-026test009/include/asm-i386/thread_info.h
---- linux-2.6.16.orig/include/asm-i386/thread_info.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/thread_info.h 2006-04-19 15:02:12.000000000 +0400
-@@ -101,13 +101,13 @@ register unsigned long current_stack_poi
- ({ \
- struct thread_info *ret; \
- \
-- ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \
-+ ret = kmalloc(THREAD_SIZE, GFP_KERNEL_UBC); \
- if (ret) \
- memset(ret, 0, THREAD_SIZE); \
- ret; \
- })
- #else
--#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
-+#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL_UBC)
- #endif
-
- #define free_thread_info(info) kfree(info)
-@@ -142,7 +142,8 @@ register unsigned long current_stack_poi
- #define TIF_SECCOMP 8 /* secure computing */
- #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
- #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
--#define TIF_MEMDIE 17
-+#define TIF_FREEZE 17 /* Freeze request, atomic version of PF_FREEZE */
-+#define TIF_MEMDIE 18
-
- #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
- #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-diff -upr linux-2.6.16.orig/include/asm-i386/timex.h linux-2.6.16-026test009/include/asm-i386/timex.h
---- linux-2.6.16.orig/include/asm-i386/timex.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/timex.h 2006-04-19 15:02:12.000000000 +0400
-@@ -36,13 +36,17 @@ static inline cycles_t get_cycles (void)
- {
- unsigned long long ret=0;
-
--#ifndef CONFIG_X86_TSC
-- if (!cpu_has_tsc)
-- return 0;
--#endif
--
- #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
- rdtscll(ret);
-+#elif defined(CONFIG_VE)
-+ /*
-+ * get_cycles is used in the following calculations:
-+ * - VPS idle and iowait times in kernel/shced.h
-+ * - task's sleep time to be shown with SyRq-t
-+ * - kstat latencies in linux/vzstat.h
-+ * - sched latency via wakeup_stamp in linux/ve_task.h
-+ */
-+#warning "some of VPS statistics won't be correct without get_cycles() (kstat_lat, ve_idle, etc)"
- #endif
- return ret;
- }
-diff -upr linux-2.6.16.orig/include/asm-i386/unistd.h linux-2.6.16-026test009/include/asm-i386/unistd.h
---- linux-2.6.16.orig/include/asm-i386/unistd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-i386/unistd.h 2006-04-19 15:02:11.000000000 +0400
-@@ -316,8 +316,11 @@
- #define __NR_pselect6 308
- #define __NR_ppoll 309
- #define __NR_unshare 310
--
--#define NR_syscalls 311
-+#define __NR_getluid 510
-+#define __NR_setluid 511
-+#define __NR_setublimit 512
-+#define __NR_ubstat 513
-+#define NR_syscalls 513
-
- /*
- * user-visible error numbers are in the range -1 - -128: see
-diff -upr linux-2.6.16.orig/include/asm-ia64/mman.h linux-2.6.16-026test009/include/asm-ia64/mman.h
---- linux-2.6.16.orig/include/asm-ia64/mman.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-ia64/mman.h 2006-04-19 15:02:11.000000000 +0400
-@@ -18,6 +18,7 @@
- #define MAP_NORESERVE 0x04000 /* don't check for reservations */
- #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */
- #define MAP_NONBLOCK 0x10000 /* do not block on IO */
-+#define MAP_EXECPRIO 0x20000 /* soft ubc charge */
-
- #define MCL_CURRENT 1 /* lock all current mappings */
- #define MCL_FUTURE 2 /* lock all future mappings */
-diff -upr linux-2.6.16.orig/include/asm-ia64/pgalloc.h linux-2.6.16-026test009/include/asm-ia64/pgalloc.h
---- linux-2.6.16.orig/include/asm-ia64/pgalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-ia64/pgalloc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -20,6 +20,8 @@
- #include <linux/page-flags.h>
- #include <linux/threads.h>
-
-+#include <ub/ub_mem.h>
-+
- #include <asm/mmu_context.h>
-
- DECLARE_PER_CPU(unsigned long *, __pgtable_quicklist);
-@@ -38,7 +40,7 @@ static inline long pgtable_quicklist_tot
- return ql_size;
- }
-
--static inline void *pgtable_quicklist_alloc(void)
-+static inline void *pgtable_quicklist_alloc(int charge)
- {
- unsigned long *ret = NULL;
-
-@@ -46,13 +48,19 @@ static inline void *pgtable_quicklist_al
-
- ret = pgtable_quicklist;
- if (likely(ret != NULL)) {
-+ if (ub_page_charge(virt_to_page(ret), 0,
-+ charge ? __GFP_UBC|__GFP_SOFT_UBC : 0))
-+ goto out;
-+
- pgtable_quicklist = (unsigned long *)(*ret);
- ret[0] = 0;
- --pgtable_quicklist_size;
-+out:
- preempt_enable();
- } else {
- preempt_enable();
-- ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-+ ret = (unsigned long *)__get_free_page(GFP_KERNEL | __GFP_ZERO |
-+ (charge ? __GFP_UBC | __GFP_SOFT_UBC : 0));
- }
-
- return ret;
-@@ -70,6 +78,7 @@ static inline void pgtable_quicklist_fre
- #endif
-
- preempt_disable();
-+ ub_page_uncharge(virt_to_page(pgtable_entry), 0);
- *(unsigned long *)pgtable_entry = (unsigned long)pgtable_quicklist;
- pgtable_quicklist = (unsigned long *)pgtable_entry;
- ++pgtable_quicklist_size;
-@@ -78,7 +87,7 @@ static inline void pgtable_quicklist_fre
-
- static inline pgd_t *pgd_alloc(struct mm_struct *mm)
- {
-- return pgtable_quicklist_alloc();
-+ return pgtable_quicklist_alloc(1);
- }
-
- static inline void pgd_free(pgd_t * pgd)
-@@ -95,7 +104,7 @@ pgd_populate(struct mm_struct *mm, pgd_t
-
- static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
- {
-- return pgtable_quicklist_alloc();
-+ return pgtable_quicklist_alloc(1);
- }
-
- static inline void pud_free(pud_t * pud)
-@@ -113,7 +122,7 @@ pud_populate(struct mm_struct *mm, pud_t
-
- static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
- {
-- return pgtable_quicklist_alloc();
-+ return pgtable_quicklist_alloc(1);
- }
-
- static inline void pmd_free(pmd_t * pmd)
-@@ -138,13 +147,13 @@ pmd_populate_kernel(struct mm_struct *mm
- static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long addr)
- {
-- return virt_to_page(pgtable_quicklist_alloc());
-+ return virt_to_page(pgtable_quicklist_alloc(1));
- }
-
- static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long addr)
- {
-- return pgtable_quicklist_alloc();
-+ return pgtable_quicklist_alloc(0);
- }
-
- static inline void pte_free(struct page *pte)
-diff -upr linux-2.6.16.orig/include/asm-ia64/processor.h linux-2.6.16-026test009/include/asm-ia64/processor.h
---- linux-2.6.16.orig/include/asm-ia64/processor.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-ia64/processor.h 2006-04-19 15:02:12.000000000 +0400
-@@ -306,7 +306,7 @@ struct thread_struct {
- regs->loadrs = 0; \
- regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \
- regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \
-- if (unlikely(!current->mm->dumpable)) { \
-+ if (unlikely(!current->mm->dumpable || !current->mm->vps_dumpable)) { \
- /* \
- * Zap scratch regs to avoid leaking bits between processes with different \
- * uid/privileges. \
-diff -upr linux-2.6.16.orig/include/asm-ia64/unistd.h linux-2.6.16-026test009/include/asm-ia64/unistd.h
---- linux-2.6.16.orig/include/asm-ia64/unistd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-ia64/unistd.h 2006-04-19 15:02:11.000000000 +0400
-@@ -285,12 +285,17 @@
- #define __NR_faccessat 1293
- /* 1294, 1295 reserved for pselect/ppoll */
- #define __NR_unshare 1296
-+#define __NR_getluid 1505
-+#define __NR_setluid 1506
-+#define __NR_setublimit 1507
-+#define __NR_ubstat 1508
-
- #ifdef __KERNEL__
-
- #include <linux/config.h>
-
--#define NR_syscalls 273 /* length of syscall table */
-+/* length of syscall table */
-+#define NR_syscalls (__NR_ubstat - __NR_ni_syscall + 1)
-
- #define __ARCH_WANT_SYS_RT_SIGACTION
-
-diff -upr linux-2.6.16.orig/include/asm-m32r/smp.h linux-2.6.16-026test009/include/asm-m32r/smp.h
---- linux-2.6.16.orig/include/asm-m32r/smp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-m32r/smp.h 2006-04-19 15:02:11.000000000 +0400
-@@ -67,7 +67,8 @@ extern volatile int cpu_2_physid[NR_CPUS
- #define raw_smp_processor_id() (current_thread_info()->cpu)
-
- extern cpumask_t cpu_callout_map;
--#define cpu_possible_map cpu_callout_map
-+extern cpumask_t cpu_possible_map;
-+extern cpumask_t cpu_present_map;
-
- static __inline__ int hard_smp_processor_id(void)
- {
-diff -upr linux-2.6.16.orig/include/asm-m32r/uaccess.h linux-2.6.16-026test009/include/asm-m32r/uaccess.h
---- linux-2.6.16.orig/include/asm-m32r/uaccess.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-m32r/uaccess.h 2006-04-19 15:02:11.000000000 +0400
-@@ -5,17 +5,9 @@
- * linux/include/asm-m32r/uaccess.h
- *
- * M32R version.
-- * Copyright (C) 2004 Hirokazu Takata <takata at linux-m32r.org>
-+ * Copyright (C) 2004, 2006 Hirokazu Takata <takata at linux-m32r.org>
- */
-
--#undef UACCESS_DEBUG
--
--#ifdef UACCESS_DEBUG
--#define UAPRINTK(args...) printk(args)
--#else
--#define UAPRINTK(args...)
--#endif /* UACCESS_DEBUG */
--
- /*
- * User space memory access functions
- */
-@@ -38,27 +30,29 @@
- #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
- #ifdef CONFIG_MMU
-+
- #define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
- #define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
--#else
--#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
--#define USER_DS MAKE_MM_SEG(0xFFFFFFFF)
--#endif /* CONFIG_MMU */
--
- #define get_ds() (KERNEL_DS)
--#ifdef CONFIG_MMU
- #define get_fs() (current_thread_info()->addr_limit)
- #define set_fs(x) (current_thread_info()->addr_limit = (x))
--#else
-+
-+#else /* not CONFIG_MMU */
-+
-+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
-+#define USER_DS MAKE_MM_SEG(0xFFFFFFFF)
-+#define get_ds() (KERNEL_DS)
-+
- static inline mm_segment_t get_fs(void)
- {
-- return USER_DS;
-+ return USER_DS;
- }
-
- static inline void set_fs(mm_segment_t s)
- {
- }
--#endif /* CONFIG_MMU */
-+
-+#endif /* not CONFIG_MMU */
-
- #define segment_eq(a,b) ((a).seg == (b).seg)
-
-@@ -83,9 +77,9 @@ static inline void set_fs(mm_segment_t s
- " subx %0, %0\n" \
- " cmpu %4, %1\n" \
- " subx %0, %5\n" \
-- : "=&r"(flag), "=r"(sum) \
-- : "1"(addr), "r"((int)(size)), \
-- "r"(current_thread_info()->addr_limit.seg), "r"(0) \
-+ : "=&r" (flag), "=r" (sum) \
-+ : "1" (addr), "r" ((int)(size)), \
-+ "r" (current_thread_info()->addr_limit.seg), "r" (0) \
- : "cbit" ); \
- flag; })
-
-@@ -113,10 +107,10 @@ static inline void set_fs(mm_segment_t s
- #else
- static inline int access_ok(int type, const void *addr, unsigned long size)
- {
-- extern unsigned long memory_start, memory_end;
-- unsigned long val = (unsigned long)addr;
-+ extern unsigned long memory_start, memory_end;
-+ unsigned long val = (unsigned long)addr;
-
-- return ((val >= memory_start) && ((val + size) < memory_end));
-+ return ((val >= memory_start) && ((val + size) < memory_end));
- }
- #endif /* CONFIG_MMU */
-
-@@ -155,39 +149,6 @@ extern int fixup_exception(struct pt_reg
- * accesses to the same area of user memory).
- */
-
--extern void __get_user_1(void);
--extern void __get_user_2(void);
--extern void __get_user_4(void);
--
--#ifndef MODULE
--#define __get_user_x(size,ret,x,ptr) \
-- __asm__ __volatile__( \
-- " mv r0, %0\n" \
-- " mv r1, %1\n" \
-- " bl __get_user_" #size "\n" \
-- " mv %0, r0\n" \
-- " mv %1, r1\n" \
-- : "=r"(ret), "=r"(x) \
-- : "0"(ptr) \
-- : "r0", "r1", "r14" )
--#else /* MODULE */
--/*
-- * Use "jl" instead of "bl" for MODULE
-- */
--#define __get_user_x(size,ret,x,ptr) \
-- __asm__ __volatile__( \
-- " mv r0, %0\n" \
-- " mv r1, %1\n" \
-- " seth lr, #high(__get_user_" #size ")\n" \
-- " or3 lr, lr, #low(__get_user_" #size ")\n" \
-- " jl lr\n" \
-- " mv %0, r0\n" \
-- " mv %1, r1\n" \
-- : "=r"(ret), "=r"(x) \
-- : "0"(ptr) \
-- : "r0", "r1", "r14" )
--#endif
--
- /* Careful: we have to cast the result to the type of the pointer for sign
- reasons */
- /**
-@@ -208,20 +169,7 @@ extern void __get_user_4(void);
- * On error, the variable @x is set to zero.
- */
- #define get_user(x,ptr) \
--({ int __ret_gu; \
-- unsigned long __val_gu; \
-- __chk_user_ptr(ptr); \
-- switch(sizeof (*(ptr))) { \
-- case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
-- case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
-- case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \
-- default: __get_user_x(X,__ret_gu,__val_gu,ptr); break; \
-- } \
-- (x) = (__typeof__(*(ptr)))__val_gu; \
-- __ret_gu; \
--})
--
--extern void __put_user_bad(void);
-+ __get_user_check((x),(ptr),sizeof(*(ptr)))
-
- /**
- * put_user: - Write a simple value into user space.
-@@ -240,8 +188,7 @@ extern void __put_user_bad(void);
- * Returns zero on success, or -EFAULT on error.
- */
- #define put_user(x,ptr) \
-- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
--
-+ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
- /**
- * __get_user: - Get a simple variable from user space, with less checking.
-@@ -264,8 +211,64 @@ extern void __put_user_bad(void);
- * On error, the variable @x is set to zero.
- */
- #define __get_user(x,ptr) \
-- __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-+ __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
-
-+#define __get_user_nocheck(x,ptr,size) \
-+({ \
-+ long __gu_err = 0; \
-+ unsigned long __gu_val; \
-+ might_sleep(); \
-+ __get_user_size(__gu_val,(ptr),(size),__gu_err); \
-+ (x) = (__typeof__(*(ptr)))__gu_val; \
-+ __gu_err; \
-+})
-+
-+#define __get_user_check(x,ptr,size) \
-+({ \
-+ long __gu_err = -EFAULT; \
-+ unsigned long __gu_val = 0; \
-+ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
-+ might_sleep(); \
-+ if (access_ok(VERIFY_READ,__gu_addr,size)) \
-+ __get_user_size(__gu_val,__gu_addr,(size),__gu_err); \
-+ (x) = (__typeof__(*(ptr)))__gu_val; \
-+ __gu_err; \
-+})
-+
-+extern long __get_user_bad(void);
-+
-+#define __get_user_size(x,ptr,size,retval) \
-+do { \
-+ retval = 0; \
-+ __chk_user_ptr(ptr); \
-+ switch (size) { \
-+ case 1: __get_user_asm(x,ptr,retval,"ub"); break; \
-+ case 2: __get_user_asm(x,ptr,retval,"uh"); break; \
-+ case 4: __get_user_asm(x,ptr,retval,""); break; \
-+ default: (x) = __get_user_bad(); \
-+ } \
-+} while (0)
-+
-+#define __get_user_asm(x, addr, err, itype) \
-+ __asm__ __volatile__( \
-+ " .fillinsn\n" \
-+ "1: ld"itype" %1,@%2\n" \
-+ " .fillinsn\n" \
-+ "2:\n" \
-+ ".section .fixup,\"ax\"\n" \
-+ " .balign 4\n" \
-+ "3: ldi %0,%3\n" \
-+ " seth r14,#high(2b)\n" \
-+ " or3 r14,r14,#low(2b)\n" \
-+ " jmp r14\n" \
-+ ".previous\n" \
-+ ".section __ex_table,\"a\"\n" \
-+ " .balign 4\n" \
-+ " .long 1b,3b\n" \
-+ ".previous" \
-+ : "=&r" (err), "=&r" (x) \
-+ : "r" (addr), "i" (-EFAULT), "0" (err) \
-+ : "r14", "memory")
-
- /**
- * __put_user: - Write a simple value into user space, with less checking.
-@@ -287,11 +290,13 @@ extern void __put_user_bad(void);
- * Returns zero on success, or -EFAULT on error.
- */
- #define __put_user(x,ptr) \
-- __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-+ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-+
-
- #define __put_user_nocheck(x,ptr,size) \
- ({ \
- long __pu_err; \
-+ might_sleep(); \
- __put_user_size((x),(ptr),(size),__pu_err); \
- __pu_err; \
- })
-@@ -308,28 +313,28 @@ extern void __put_user_bad(void);
- })
-
- #if defined(__LITTLE_ENDIAN__)
--#define __put_user_u64(x, addr, err) \
-- __asm__ __volatile__( \
-- " .fillinsn\n" \
-- "1: st %L1,@%2\n" \
-- " .fillinsn\n" \
-- "2: st %H1,@(4,%2)\n" \
-- " .fillinsn\n" \
-- "3:\n" \
-- ".section .fixup,\"ax\"\n" \
-- " .balign 4\n" \
-- "4: ldi %0,%3\n" \
-- " seth r14,#high(3b)\n" \
-- " or3 r14,r14,#low(3b)\n" \
-- " jmp r14\n" \
-- ".previous\n" \
-- ".section __ex_table,\"a\"\n" \
-- " .balign 4\n" \
-- " .long 1b,4b\n" \
-- " .long 2b,4b\n" \
-- ".previous" \
-- : "=&r"(err) \
-- : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \
-+#define __put_user_u64(x, addr, err) \
-+ __asm__ __volatile__( \
-+ " .fillinsn\n" \
-+ "1: st %L1,@%2\n" \
-+ " .fillinsn\n" \
-+ "2: st %H1,@(4,%2)\n" \
-+ " .fillinsn\n" \
-+ "3:\n" \
-+ ".section .fixup,\"ax\"\n" \
-+ " .balign 4\n" \
-+ "4: ldi %0,%3\n" \
-+ " seth r14,#high(3b)\n" \
-+ " or3 r14,r14,#low(3b)\n" \
-+ " jmp r14\n" \
-+ ".previous\n" \
-+ ".section __ex_table,\"a\"\n" \
-+ " .balign 4\n" \
-+ " .long 1b,4b\n" \
-+ " .long 2b,4b\n" \
-+ ".previous" \
-+ : "=&r" (err) \
-+ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err) \
- : "r14", "memory")
-
- #elif defined(__BIG_ENDIAN__)
-@@ -353,13 +358,15 @@ extern void __put_user_bad(void);
- " .long 1b,4b\n" \
- " .long 2b,4b\n" \
- ".previous" \
-- : "=&r"(err) \
-- : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \
-+ : "=&r" (err) \
-+ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err) \
- : "r14", "memory")
- #else
- #error no endian defined
- #endif
-
-+extern void __put_user_bad(void);
-+
- #define __put_user_size(x,ptr,size,retval) \
- do { \
- retval = 0; \
-@@ -398,52 +405,8 @@ struct __large_struct { unsigned long bu
- " .balign 4\n" \
- " .long 1b,3b\n" \
- ".previous" \
-- : "=&r"(err) \
-- : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \
-- : "r14", "memory")
--
--#define __get_user_nocheck(x,ptr,size) \
--({ \
-- long __gu_err; \
-- unsigned long __gu_val; \
-- __get_user_size(__gu_val,(ptr),(size),__gu_err); \
-- (x) = (__typeof__(*(ptr)))__gu_val; \
-- __gu_err; \
--})
--
--extern long __get_user_bad(void);
--
--#define __get_user_size(x,ptr,size,retval) \
--do { \
-- retval = 0; \
-- __chk_user_ptr(ptr); \
-- switch (size) { \
-- case 1: __get_user_asm(x,ptr,retval,"ub"); break; \
-- case 2: __get_user_asm(x,ptr,retval,"uh"); break; \
-- case 4: __get_user_asm(x,ptr,retval,""); break; \
-- default: (x) = __get_user_bad(); \
-- } \
--} while (0)
--
--#define __get_user_asm(x, addr, err, itype) \
-- __asm__ __volatile__( \
-- " .fillinsn\n" \
-- "1: ld"itype" %1,@%2\n" \
-- " .fillinsn\n" \
-- "2:\n" \
-- ".section .fixup,\"ax\"\n" \
-- " .balign 4\n" \
-- "3: ldi %0,%3\n" \
-- " seth r14,#high(2b)\n" \
-- " or3 r14,r14,#low(2b)\n" \
-- " jmp r14\n" \
-- ".previous\n" \
-- ".section __ex_table,\"a\"\n" \
-- " .balign 4\n" \
-- " .long 1b,3b\n" \
-- ".previous" \
-- : "=&r"(err), "=&r"(x) \
-- : "r"(addr), "i"(-EFAULT), "0"(err) \
-+ : "=&r" (err) \
-+ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err) \
- : "r14", "memory")
-
- /*
-@@ -453,7 +416,6 @@ do { \
- * anything, so this is accurate.
- */
-
--
- /*
- * Copy To/From Userspace
- */
-@@ -511,8 +473,9 @@ do { \
- " .long 2b,9b\n" \
- " .long 3b,9b\n" \
- ".previous\n" \
-- : "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c) \
-- : "0"(to), "1"(from), "2"(size), "3"(size / 4) \
-+ : "=&r" (__dst), "=&r" (__src), "=&r" (size), \
-+ "=&r" (__c) \
-+ : "0" (to), "1" (from), "2" (size), "3" (size / 4) \
- : "r14", "memory"); \
- } while (0)
-
-@@ -573,8 +536,9 @@ do { \
- " .long 2b,7b\n" \
- " .long 3b,7b\n" \
- ".previous\n" \
-- : "=&r"(__dst), "=&r"(__src), "=&r"(size), "=&r"(__c) \
-- : "0"(to), "1"(from), "2"(size), "3"(size / 4) \
-+ : "=&r" (__dst), "=&r" (__src), "=&r" (size), \
-+ "=&r" (__c) \
-+ : "0" (to), "1" (from), "2" (size), "3" (size / 4) \
- : "r14", "memory"); \
- } while (0)
-
-@@ -676,7 +640,7 @@ unsigned long __generic_copy_from_user(v
- #define copy_from_user(to,from,n) \
- ({ \
- might_sleep(); \
--__generic_copy_from_user((to),(from),(n)); \
-+ __generic_copy_from_user((to),(from),(n)); \
- })
-
- long __must_check strncpy_from_user(char *dst, const char __user *src,
-diff -upr linux-2.6.16.orig/include/asm-powerpc/floppy.h linux-2.6.16-026test009/include/asm-powerpc/floppy.h
---- linux-2.6.16.orig/include/asm-powerpc/floppy.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-powerpc/floppy.h 2006-04-19 15:02:11.000000000 +0400
-@@ -35,6 +35,7 @@
- #ifdef CONFIG_PCI
-
- #include <linux/pci.h>
-+#include <asm/ppc-pci.h> /* for ppc64_isabridge_dev */
-
- #define fd_dma_setup(addr,size,mode,io) powerpc_fd_dma_setup(addr,size,mode,io)
-
-@@ -52,12 +53,12 @@ static __inline__ int powerpc_fd_dma_set
- if (bus_addr
- && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
- /* different from last time -- unmap prev */
-- pci_unmap_single(NULL, bus_addr, prev_size, prev_dir);
-+ pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
- bus_addr = 0;
- }
-
- if (!bus_addr) /* need to map it */
-- bus_addr = pci_map_single(NULL, addr, size, dir);
-+ bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
-
- /* remember this one as prev */
- prev_addr = addr;
-diff -upr linux-2.6.16.orig/include/asm-powerpc/pgalloc.h linux-2.6.16-026test009/include/asm-powerpc/pgalloc.h
---- linux-2.6.16.orig/include/asm-powerpc/pgalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-powerpc/pgalloc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -33,7 +33,8 @@ extern kmem_cache_t *pgtable_cache[];
-
- static inline pgd_t *pgd_alloc(struct mm_struct *mm)
- {
-- return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
-+ return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM],
-+ GFP_KERNEL_UBC | __GFP_SOFT_UBC);
- }
-
- static inline void pgd_free(pgd_t *pgd)
-@@ -48,7 +49,7 @@ static inline void pgd_free(pgd_t *pgd)
- static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
- {
- return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
-- GFP_KERNEL|__GFP_REPEAT);
-+ GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
- }
-
- static inline void pud_free(pud_t *pud)
-@@ -84,7 +85,7 @@ static inline void pmd_populate_kernel(s
- static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
- {
- return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
-- GFP_KERNEL|__GFP_REPEAT);
-+ GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT);
- }
-
- static inline void pmd_free(pmd_t *pmd)
-@@ -92,17 +93,21 @@ static inline void pmd_free(pmd_t *pmd)
- kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
- }
-
-+static inline pte_t *__pte_alloc(gfp_t flags)
-+{
-+ return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], flags);
-+}
-+
- static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
- {
-- return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
-- GFP_KERNEL|__GFP_REPEAT);
-+ return __pte_alloc(GFP_KERNEL | __GFP_REPEAT);
- }
-
- static inline struct page *pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
- {
-- return virt_to_page(pte_alloc_one_kernel(mm, address));
-+ return virt_to_page(__pte_alloc(GFP_KERNEL_UBC | __GFP_SOFT_UBC));
- }
-
- static inline void pte_free_kernel(pte_t *pte)
-diff -upr linux-2.6.16.orig/include/asm-powerpc/unistd.h linux-2.6.16-026test009/include/asm-powerpc/unistd.h
---- linux-2.6.16.orig/include/asm-powerpc/unistd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-powerpc/unistd.h 2006-04-19 15:02:11.000000000 +0400
-@@ -301,8 +301,12 @@
- #define __NR_pselect6 280
- #define __NR_ppoll 281
- #define __NR_unshare 282
--
--#define __NR_syscalls 283
-+#define __NR_getluid 410
-+#define __NR_setluid 411
-+#define __NR_setublimit 412
-+#define __NR_ubstat 413
-+
-+#define NR_syscalls 414
-
- #ifdef __KERNEL__
- #define __NR__exit __NR_exit
-diff -upr linux-2.6.16.orig/include/asm-s390/pgalloc.h linux-2.6.16-026test009/include/asm-s390/pgalloc.h
---- linux-2.6.16.orig/include/asm-s390/pgalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-s390/pgalloc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -34,12 +34,12 @@ static inline pgd_t *pgd_alloc(struct mm
- int i;
-
- #ifndef __s390x__
-- pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,1);
-+ pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 1);
- if (pgd != NULL)
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
- pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE));
- #else /* __s390x__ */
-- pgd = (pgd_t *) __get_free_pages(GFP_KERNEL,2);
-+ pgd = (pgd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
- if (pgd != NULL)
- for (i = 0; i < PTRS_PER_PGD; i++)
- pgd_clear(pgd + i);
-@@ -72,7 +72,7 @@ static inline pmd_t * pmd_alloc_one(stru
- pmd_t *pmd;
- int i;
-
-- pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, 2);
-+ pmd = (pmd_t *) __get_free_pages(GFP_KERNEL_UBC | __GFP_SOFT_UBC, 2);
- if (pmd != NULL) {
- for (i=0; i < PTRS_PER_PMD; i++)
- pmd_clear(pmd+i);
-@@ -118,16 +118,13 @@ pmd_populate(struct mm_struct *mm, pmd_t
- pmd_populate_kernel(mm, pmd, (pte_t *)((page-mem_map) << PAGE_SHIFT));
- }
-
--/*
-- * page table entry allocation/free routines.
-- */
--static inline pte_t *
--pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
-+static inline pte_t *pte_alloc(struct mm_struct *mm, unsigned long vmaddr,
-+ gfp_t mask)
- {
- pte_t *pte;
- int i;
-
-- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-+ pte = (pte_t *)__get_free_page(mask);
- if (pte != NULL) {
- for (i=0; i < PTRS_PER_PTE; i++) {
- pte_clear(mm, vmaddr, pte+i);
-@@ -137,10 +134,20 @@ pte_alloc_one_kernel(struct mm_struct *m
- return pte;
- }
-
-+/*
-+ * page table entry allocation/free routines.
-+ */
-+static inline pte_t *
-+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr)
-+{
-+ return pte_alloc(mm, vmaddr, GFP_KERNEL | __GFP_REPEAT);
-+}
-+
- static inline struct page *
- pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
- {
-- pte_t *pte = pte_alloc_one_kernel(mm, vmaddr);
-+ pte_t *pte = pte_alloc(mm, vmaddr, GFP_KERNEL_UBC | __GFP_SOFT_UBC |
-+ __GFP_REPEAT);
- if (pte)
- return virt_to_page(pte);
- return 0;
-diff -upr linux-2.6.16.orig/include/asm-sh64/pgalloc.h linux-2.6.16-026test009/include/asm-sh64/pgalloc.h
---- linux-2.6.16.orig/include/asm-sh64/pgalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-sh64/pgalloc.h 2006-04-19 15:02:12.000000000 +0400
-@@ -173,7 +173,7 @@ static inline void set_pgdir(unsigned lo
- pgd_t *pgd;
-
- read_lock(&tasklist_lock);
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (!p->mm)
- continue;
- *pgd_offset(p->mm,address) = entry;
-diff -upr linux-2.6.16.orig/include/asm-x86_64/mman.h linux-2.6.16-026test009/include/asm-x86_64/mman.h
---- linux-2.6.16.orig/include/asm-x86_64/mman.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/mman.h 2006-04-19 15:02:11.000000000 +0400
-@@ -12,6 +12,7 @@
- #define MAP_NORESERVE 0x4000 /* don't check for reservations */
- #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
- #define MAP_NONBLOCK 0x10000 /* do not block on IO */
-+#define MAP_EXECPRIO 0x20000 /* soft ubc charge */
-
- #define MCL_CURRENT 1 /* lock all current mappings */
- #define MCL_FUTURE 2 /* lock all future mappings */
-diff -upr linux-2.6.16.orig/include/asm-x86_64/nmi.h linux-2.6.16-026test009/include/asm-x86_64/nmi.h
---- linux-2.6.16.orig/include/asm-x86_64/nmi.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/nmi.h 2006-04-19 15:02:11.000000000 +0400
-@@ -24,6 +24,9 @@ void set_nmi_callback(nmi_callback_t cal
- * Remove the handler previously set.
- */
- void unset_nmi_callback(void);
-+
-+void set_nmi_ipi_callback(nmi_callback_t callback);
-+void unset_nmi_ipi_callback(void);
-
- #ifdef CONFIG_PM
-
-diff -upr linux-2.6.16.orig/include/asm-x86_64/pgalloc.h linux-2.6.16-026test009/include/asm-x86_64/pgalloc.h
---- linux-2.6.16.orig/include/asm-x86_64/pgalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/pgalloc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -31,12 +31,14 @@ static inline void pmd_free(pmd_t *pmd)
-
- static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
- {
-- return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-+ return (pmd_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
-+ __GFP_SOFT_UBC);
- }
-
- static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
- {
-- return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-+ return (pud_t *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
-+ __GFP_SOFT_UBC);
- }
-
- static inline void pud_free (pud_t *pud)
-@@ -48,7 +50,8 @@ static inline void pud_free (pud_t *pud)
- static inline pgd_t *pgd_alloc(struct mm_struct *mm)
- {
- unsigned boundary;
-- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL_UBC|__GFP_REPEAT|
-+ __GFP_SOFT_UBC);
- if (!pgd)
- return NULL;
- /*
-@@ -77,7 +80,8 @@ static inline pte_t *pte_alloc_one_kerne
-
- static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
- {
-- void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
-+ void *p = (void *)get_zeroed_page(GFP_KERNEL_UBC|__GFP_REPEAT|
-+ __GFP_SOFT_UBC);
- if (!p)
- return NULL;
- return virt_to_page(p);
-diff -upr linux-2.6.16.orig/include/asm-x86_64/processor.h linux-2.6.16-026test009/include/asm-x86_64/processor.h
---- linux-2.6.16.orig/include/asm-x86_64/processor.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/processor.h 2006-04-19 15:02:12.000000000 +0400
-@@ -167,7 +167,7 @@ static inline void clear_in_cr4 (unsigne
- /* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
--#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
-+#define IA32_PAGE_OFFSET 0xc0000000
-
- #define TASK_SIZE (test_thread_flag(TIF_IA32) ? IA32_PAGE_OFFSET : TASK_SIZE64)
- #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? IA32_PAGE_OFFSET : TASK_SIZE64)
-diff -upr linux-2.6.16.orig/include/asm-x86_64/segment.h linux-2.6.16-026test009/include/asm-x86_64/segment.h
---- linux-2.6.16.orig/include/asm-x86_64/segment.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/segment.h 2006-04-19 15:02:12.000000000 +0400
-@@ -3,29 +3,28 @@
-
- #include <asm/cache.h>
-
--#define __KERNEL_CS 0x10
--#define __KERNEL_DS 0x18
--
--#define __KERNEL32_CS 0x38
--
-+#define GDT_ENTRY_BOOT_CS 2
-+#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
-+#define GDT_ENTRY_BOOT_DS 3
-+#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
-+#define GDT_ENTRY_TSS 4 /* needs two entries */
- /*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
- */
-+#define GDT_ENTRY_TLS_MIN 6
-+#define GDT_ENTRY_TLS_MAX 8
-
--#define __USER32_CS 0x23 /* 4*8+3 */
--#define __USER_DS 0x2b /* 5*8+3 */
--#define __USER_CS 0x33 /* 6*8+3 */
-+#define GDT_ENTRY_LDT 9 /* needs two entries */
-+#define __KERNEL32_CS 0x58 /* 11*8 */
-+#define __KERNEL_CS 0x60 /* 12*8 */
-+#define __KERNEL_DS 0x68 /* 13*8 */
-+#define __USER32_CS 0x73 /* 14*8+3 */
-+#define __USER_DS 0x7b /* 15*8+3 */
- #define __USER32_DS __USER_DS
--
--#define GDT_ENTRY_TLS 1
--#define GDT_ENTRY_TSS 8 /* needs two entries */
--#define GDT_ENTRY_LDT 10 /* needs two entries */
--#define GDT_ENTRY_TLS_MIN 12
--#define GDT_ENTRY_TLS_MAX 14
--/* 15 free */
-+#define __USER_CS 0x83 /* 16*8+3 */
-
- #define GDT_ENTRY_TLS_ENTRIES 3
-
-@@ -37,7 +36,7 @@
- #define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
- #define IDT_ENTRIES 256
--#define GDT_ENTRIES 16
-+#define GDT_ENTRIES 32
- #define GDT_SIZE (GDT_ENTRIES * 8)
- #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
-diff -upr linux-2.6.16.orig/include/asm-x86_64/signal.h linux-2.6.16-026test009/include/asm-x86_64/signal.h
---- linux-2.6.16.orig/include/asm-x86_64/signal.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/signal.h 2006-04-19 15:02:12.000000000 +0400
-@@ -23,11 +23,6 @@ typedef struct {
- unsigned long sig[_NSIG_WORDS];
- } sigset_t;
-
--
--struct pt_regs;
--asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
--
--
- #else
- /* Here we must cater to libcs that poke about in kernel headers. */
-
-diff -upr linux-2.6.16.orig/include/asm-x86_64/thread_info.h linux-2.6.16-026test009/include/asm-x86_64/thread_info.h
---- linux-2.6.16.orig/include/asm-x86_64/thread_info.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/thread_info.h 2006-04-19 15:02:12.000000000 +0400
-@@ -74,7 +74,7 @@ static inline struct thread_info *stack_
-
- /* thread information allocation */
- #define alloc_thread_info(tsk) \
-- ((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
-+ ((struct thread_info *) __get_free_pages(GFP_KERNEL_UBC,THREAD_ORDER))
- #define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
-
- #else /* !__ASSEMBLY__ */
-@@ -101,11 +101,13 @@ static inline struct thread_info *stack_
- #define TIF_IRET 5 /* force IRET */
- #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
- #define TIF_SECCOMP 8 /* secure computing */
-+#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
- #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
- #define TIF_IA32 17 /* 32bit process */
- #define TIF_FORK 18 /* ret_from_fork */
- #define TIF_ABI_PENDING 19
--#define TIF_MEMDIE 20
-+#define TIF_FREEZE 20
-+#define TIF_MEMDIE 21
-
- #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
- #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
-@@ -115,6 +117,7 @@ static inline struct thread_info *stack_
- #define _TIF_IRET (1<<TIF_IRET)
- #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
- #define _TIF_SECCOMP (1<<TIF_SECCOMP)
-+#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
- #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
- #define _TIF_IA32 (1<<TIF_IA32)
- #define _TIF_FORK (1<<TIF_FORK)
-diff -upr linux-2.6.16.orig/include/asm-x86_64/unistd.h linux-2.6.16-026test009/include/asm-x86_64/unistd.h
---- linux-2.6.16.orig/include/asm-x86_64/unistd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/asm-x86_64/unistd.h 2006-04-19 15:02:12.000000000 +0400
-@@ -605,8 +605,16 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall)
- __SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */
- #define __NR_unshare 272
- __SYSCALL(__NR_unshare, sys_unshare)
--
--#define __NR_syscall_max __NR_unshare
-+#define __NR_getluid 500
-+__SYSCALL(__NR_getluid, sys_getluid)
-+#define __NR_setluid 501
-+__SYSCALL(__NR_setluid, sys_setluid)
-+#define __NR_setublimit 502
-+__SYSCALL(__NR_setublimit, sys_setublimit)
-+#define __NR_ubstat 503
-+__SYSCALL(__NR_ubstat, sys_ubstat)
-+
-+#define __NR_syscall_max __NR_ubstat
-
- #ifndef __NO_STUBS
-
-@@ -645,6 +653,7 @@ do { \
- #define __ARCH_WANT_SYS_RT_SIGACTION
- #define __ARCH_WANT_SYS_TIME
- #define __ARCH_WANT_COMPAT_SYS_TIME
-+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
- #endif
-
- #ifndef __KERNEL_SYSCALLS__
-diff -upr linux-2.6.16.orig/include/linux/aio.h linux-2.6.16-026test009/include/linux/aio.h
---- linux-2.6.16.orig/include/linux/aio.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/aio.h 2006-04-19 15:02:12.000000000 +0400
-@@ -247,4 +247,8 @@ static inline struct kiocb *list_kiocb(s
- extern unsigned long aio_nr;
- extern unsigned long aio_max_nr;
-
-+void wait_for_all_aios(struct kioctx *ctx);
-+extern kmem_cache_t *kioctx_cachep;
-+extern void aio_kick_handler(void *);
-+
- #endif /* __LINUX__AIO_H */
-diff -upr linux-2.6.16.orig/include/linux/binfmts.h linux-2.6.16-026test009/include/linux/binfmts.h
---- linux-2.6.16.orig/include/linux/binfmts.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/binfmts.h 2006-04-19 15:02:11.000000000 +0400
-@@ -2,6 +2,7 @@
- #define _LINUX_BINFMTS_H
-
- #include <linux/capability.h>
-+#include <linux/fs.h>
-
- struct pt_regs;
-
-@@ -28,6 +29,7 @@ struct linux_binprm{
- int sh_bang;
- struct file * file;
- int e_uid, e_gid;
-+ struct exec_perm perm;
- kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
- void *security;
- int argc, envc;
-diff -upr linux-2.6.16.orig/include/linux/capability.h linux-2.6.16-026test009/include/linux/capability.h
---- linux-2.6.16.orig/include/linux/capability.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/capability.h 2006-04-19 15:02:12.000000000 +0400
-@@ -146,12 +146,9 @@ typedef __u32 kernel_cap_t;
-
- #define CAP_NET_BROADCAST 11
-
--/* Allow interface configuration */
- /* Allow administration of IP firewall, masquerading and accounting */
- /* Allow setting debug option on sockets */
- /* Allow modification of routing tables */
--/* Allow setting arbitrary process / process group ownership on
-- sockets */
- /* Allow binding to any address for transparent proxying */
- /* Allow setting TOS (type of service) */
- /* Allow setting promiscuous mode */
-@@ -200,24 +197,19 @@ typedef __u32 kernel_cap_t;
-
- /* Allow configuration of the secure attention key */
- /* Allow administration of the random device */
--/* Allow examination and configuration of disk quotas */
- /* Allow configuring the kernel's syslog (printk behaviour) */
- /* Allow setting the domainname */
- /* Allow setting the hostname */
- /* Allow calling bdflush() */
--/* Allow mount() and umount(), setting up new smb connection */
-+/* Allow setting up new smb connection */
- /* Allow some autofs root ioctls */
- /* Allow nfsservctl */
- /* Allow VM86_REQUEST_IRQ */
- /* Allow to read/write pci config on alpha */
- /* Allow irix_prctl on mips (setstacksize) */
- /* Allow flushing all cache on m68k (sys_cacheflush) */
--/* Allow removing semaphores */
--/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-- and shared memory */
- /* Allow locking/unlocking of shared memory segment */
- /* Allow turning swap on/off */
--/* Allow forged pids on socket credentials passing */
- /* Allow setting readahead and flushing buffers on block devices */
- /* Allow setting geometry in floppy driver */
- /* Allow turning DMA on/off in xd driver */
-@@ -288,7 +280,52 @@ typedef __u32 kernel_cap_t;
-
- #define CAP_AUDIT_CONTROL 30
-
-+/*
-+ * Important note: VZ capabilities do intersect with CAP_AUDIT
-+ * this is due to compatibility reasons. Nothing bad.
-+ * Both VZ and Audit/SELinux caps are disabled in VPSs.
-+ */
-+
-+/* Allow access to all information. In the other case some structures will be
-+ hiding to ensure different Virtual Environment non-interaction on the same
-+ node */
-+#define CAP_SETVEID 29
-+
-+#define CAP_VE_ADMIN 30
-+
- #ifdef __KERNEL__
-+
-+#include <linux/config.h>
-+
-+#ifdef CONFIG_VE
-+
-+/* Replacement for CAP_NET_ADMIN:
-+ delegated rights to the Virtual environment of its network administration.
-+ For now the following rights have been delegated:
-+
-+ Allow setting arbitrary process / process group ownership on sockets
-+ Allow interface configuration
-+ */
-+#define CAP_VE_NET_ADMIN CAP_VE_ADMIN
-+
-+/* Replacement for CAP_SYS_ADMIN:
-+ delegated rights to the Virtual environment of its administration.
-+ For now the following rights have been delegated:
-+ */
-+/* Allow mount/umount/remount */
-+/* Allow examination and configuration of disk quotas */
-+/* Allow removing semaphores */
-+/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
-+ and shared memory */
-+/* Allow locking/unlocking of shared memory segment */
-+/* Allow forged pids on socket credentials passing */
-+
-+#define CAP_VE_SYS_ADMIN CAP_VE_ADMIN
-+#else
-+#define CAP_VE_NET_ADMIN CAP_NET_ADMIN
-+#define CAP_VE_SYS_ADMIN CAP_SYS_ADMIN
-+#endif
-+
- /*
- * Bounding set
- */
-@@ -352,9 +389,14 @@ static inline kernel_cap_t cap_invert(ke
- #define cap_issubset(a,set) (!(cap_t(a) & ~cap_t(set)))
-
- #define cap_clear(c) do { cap_t(c) = 0; } while(0)
-+#ifndef CONFIG_VE
- #define cap_set_full(c) do { cap_t(c) = ~0; } while(0)
-+#else
-+#define cap_set_full(c) \
-+ do {cap_t(c) = ve_is_super(get_exec_env()) ? ~0 : \
-+ get_exec_env()->cap_default; } while(0)
-+#endif
- #define cap_mask(c,mask) do { cap_t(c) &= cap_t(mask); } while(0)
--
- #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK)
-
- extern int capable(int cap);
-diff -upr linux-2.6.16.orig/include/linux/coda_linux.h linux-2.6.16-026test009/include/linux/coda_linux.h
---- linux-2.6.16.orig/include/linux/coda_linux.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/coda_linux.h 2006-04-19 15:02:11.000000000 +0400
-@@ -38,7 +38,8 @@ extern struct file_operations coda_ioctl
- int coda_open(struct inode *i, struct file *f);
- int coda_flush(struct file *f);
- int coda_release(struct inode *i, struct file *f);
--int coda_permission(struct inode *inode, int mask, struct nameidata *nd);
-+int coda_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *);
- int coda_revalidate_inode(struct dentry *);
- int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
- int coda_setattr(struct dentry *, struct iattr *);
-diff -upr linux-2.6.16.orig/include/linux/compat.h linux-2.6.16-026test009/include/linux/compat.h
---- linux-2.6.16.orig/include/linux/compat.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/compat.h 2006-04-19 15:02:12.000000000 +0400
-@@ -181,5 +181,7 @@ static inline int compat_timespec_compar
- return lhs->tv_nsec - rhs->tv_nsec;
- }
-
-+extern long compat_nanosleep_restart(struct restart_block *restart);
-+
- #endif /* CONFIG_COMPAT */
- #endif /* _LINUX_COMPAT_H */
-diff -upr linux-2.6.16.orig/include/linux/cpt_image.h linux-2.6.16-026test009/include/linux/cpt_image.h
---- linux-2.6.16.orig/include/linux/cpt_image.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/cpt_image.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,1427 @@
-+/*
-+ *
-+ * include/linux/cpt_image.h
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __CPT_IMAGE_H_
-+#define __CPT_IMAGE_H_ 1
-+
-+#define CPT_NULL (~0ULL)
-+#define CPT_NOINDEX (~0U)
-+
-+/*
-+ * Image file layout.
-+ *
-+ * - major header
-+ * - sections[]
-+ *
-+ * Each section is:
-+ * - section header
-+ * - array of objects
-+ *
-+ * All data records are arch independent, 64 bit aligned.
-+ */
-+
-+enum _cpt_object_type
-+{
-+ CPT_OBJ_TASK = 0,
-+ CPT_OBJ_MM,
-+ CPT_OBJ_FS,
-+ CPT_OBJ_FILES,
-+ CPT_OBJ_FILE,
-+ CPT_OBJ_SIGHAND_STRUCT,
-+ CPT_OBJ_SIGNAL_STRUCT,
-+ CPT_OBJ_TTY,
-+ CPT_OBJ_SOCKET,
-+ CPT_OBJ_SYSVSEM_UNDO,
-+ CPT_OBJ_NAMESPACE,
-+ CPT_OBJ_SYSV_SHM,
-+ CPT_OBJ_INODE,
-+ CPT_OBJ_UBC,
-+ CPT_OBJ_SLM_SGREG,
-+ CPT_OBJ_SLM_REGOBJ,
-+ CPT_OBJ_SLM_MM,
-+ CPT_OBJ_MAX,
-+ /* The objects above are stored in memory while checkpointing */
-+
-+ CPT_OBJ_VMA = 1024,
-+ CPT_OBJ_FILEDESC,
-+ CPT_OBJ_SIGHANDLER,
-+ CPT_OBJ_SIGINFO,
-+ CPT_OBJ_LASTSIGINFO,
-+ CPT_OBJ_SYSV_SEM,
-+ CPT_OBJ_SKB,
-+ CPT_OBJ_FLOCK,
-+ CPT_OBJ_OPENREQ,
-+ CPT_OBJ_VFSMOUNT,
-+ CPT_OBJ_TRAILER,
-+ CPT_OBJ_SYSVSEM_UNDO_REC,
-+ CPT_OBJ_NET_DEVICE,
-+ CPT_OBJ_NET_IFADDR,
-+ CPT_OBJ_NET_ROUTE,
-+ CPT_OBJ_NET_CONNTRACK,
-+ CPT_OBJ_NET_CONNTRACK_EXPECT,
-+ CPT_OBJ_AIO_CONTEXT,
-+ CPT_OBJ_VEINFO,
-+ CPT_OBJ_EPOLL,
-+ CPT_OBJ_EPOLL_FILE,
-+
-+ CPT_OBJ_X86_REGS = 4096,
-+ CPT_OBJ_X86_64_REGS,
-+ CPT_OBJ_PAGES,
-+ CPT_OBJ_COPYPAGES,
-+ CPT_OBJ_REMAPPAGES,
-+ CPT_OBJ_LAZYPAGES,
-+ CPT_OBJ_NAME,
-+ CPT_OBJ_BITS,
-+ CPT_OBJ_REF,
-+};
-+
-+#define CPT_ALIGN(n) (((n)+7)&~7)
-+
-+struct cpt_major_hdr
-+{
-+ __u8 cpt_signature[4]; /* Magic number */
-+ __u16 cpt_hdrlen; /* Length of this header */
-+ __u16 cpt_image_version; /* Format of this file; mbz */
-+ __u16 cpt_os_arch; /* Architecture */
-+#define CPT_OS_ARCH_I386 0
-+#define CPT_OS_ARCH_EMT64 1
-+#define CPT_OS_ARCH_IA64 2
-+ __u16 __cpt_pad1;
-+ __u32 cpt_os_version; /* Version of kernel, where image was done */
-+ __u32 cpt_os_features; /* Kernel features: SMP etc. */
-+ __u16 cpt_pagesize; /* Page size used by OS */
-+ __u16 cpt_hz; /* HZ used by OS */
-+ __u64 cpt_start_jiffies64; /* Jiffies */
-+ __u32 cpt_start_sec; /* Seconds */
-+ __u32 cpt_start_nsec; /* Nanoseconds */
-+ __u32 cpt_cpu_caps[4]; /* CPU capabilities */
-+ __u32 cpt_kernel_config[4]; /* Kernel config */
-+ __u64 cpt_iptables_mask; /* Used netfilter modules */
-+} __attribute__ ((aligned (8)));
-+
-+#define CPT_SIGNATURE0 0x79
-+#define CPT_SIGNATURE1 0x1c
-+#define CPT_SIGNATURE2 0x01
-+#define CPT_SIGNATURE3 0x63
-+
-+#define CPT_CPU_X86_CMOV 0
-+#define CPT_CPU_X86_FXSR 1
-+#define CPT_CPU_X86_SSE 2
-+#define CPT_CPU_X86_SSE2 3
-+#define CPT_CPU_X86_MMX 4
-+#define CPT_CPU_X86_3DNOW 5
-+#define CPT_CPU_X86_3DNOW2 6
-+#define CPT_CPU_X86_SEP 7
-+#define CPT_CPU_X86_EMT64 8
-+#define CPT_CPU_X86_IA64 9
-+
-+#define CPT_KERNEL_CONFIG_PAE 0
-+
-+struct cpt_section_hdr
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_section;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_align;
-+} __attribute__ ((aligned (8)));
-+
-+enum
-+{
-+ CPT_SECT_ERROR, /* Error section, content is string */
-+ CPT_SECT_VEINFO,
-+ CPT_SECT_FILES, /* Files. Content is array of file objects */
-+ CPT_SECT_TASKS,
-+ CPT_SECT_MM,
-+ CPT_SECT_FILES_STRUCT,
-+ CPT_SECT_FS,
-+ CPT_SECT_SIGHAND_STRUCT,
-+ CPT_SECT_TTY,
-+ CPT_SECT_SOCKET,
-+ CPT_SECT_NAMESPACE,
-+ CPT_SECT_SYSVSEM_UNDO,
-+ CPT_SECT_INODE, /* Inodes with i->i_nlink==0 and
-+ * deleted dentires with inodes not
-+ * referenced inside dumped process.
-+ */
-+ CPT_SECT_SYSV_SHM,
-+ CPT_SECT_SYSV_SEM,
-+ CPT_SECT_ORPHANS,
-+ CPT_SECT_NET_DEVICE,
-+ CPT_SECT_NET_IFADDR,
-+ CPT_SECT_NET_ROUTE,
-+ CPT_SECT_NET_IPTABLES,
-+ CPT_SECT_NET_CONNTRACK,
-+ CPT_SECT_NET_CONNTRACK_VE0,
-+ CPT_SECT_UTSNAME,
-+ CPT_SECT_TRAILER,
-+ CPT_SECT_UBC,
-+ CPT_SECT_SLM_SGREGS,
-+ CPT_SECT_SLM_REGOBJS,
-+/* Due to silly mistake we cannot index sections beyond this value */
-+#define CPT_SECT_MAX_INDEX (CPT_SECT_SLM_REGOBJS+1)
-+ CPT_SECT_EPOLL,
-+ CPT_SECT_MAX
-+};
-+
-+struct cpt_major_tail
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_lazypages;
-+ __u32 cpt_64bit;
-+ __u64 cpt_sections[CPT_SECT_MAX_INDEX];
-+ __u32 cpt_nsect;
-+ __u8 cpt_signature[4]; /* Magic number */
-+} __attribute__ ((aligned (8)));
-+
-+
-+/* Common object header. */
-+struct cpt_object_hdr
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+} __attribute__ ((aligned (8)));
-+
-+enum _cpt_content_type {
-+ CPT_CONTENT_VOID,
-+ CPT_CONTENT_ARRAY,
-+ CPT_CONTENT_DATA,
-+ CPT_CONTENT_NAME,
-+
-+ CPT_CONTENT_STACK,
-+ CPT_CONTENT_X86_FPUSTATE_OLD,
-+ CPT_CONTENT_X86_FPUSTATE,
-+ CPT_CONTENT_MM_CONTEXT,
-+ CPT_CONTENT_SEMARRAY,
-+ CPT_CONTENT_SEMUNDO,
-+ CPT_CONTENT_NLMARRAY,
-+ CPT_CONTENT_MAX
-+};
-+
-+/* CPT_OBJ_BITS: encode array of bytes */
-+struct cpt_obj_bits
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_size;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+
-+/* CPT_OBJ_REF: a reference to another object */
-+struct cpt_obj_ref
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_pos;
-+} __attribute__ ((aligned (8)));
-+
-+/* CPT_OBJ_VEINFO: various ve specific data */
-+struct cpt_veinfo_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ /* ipc ctls */
-+ __u32 shm_ctl_max;
-+ __u32 shm_ctl_all;
-+ __u32 shm_ctl_mni;
-+ __u32 msg_ctl_max;
-+ __u32 msg_ctl_mni;
-+ __u32 msg_ctl_mnb;
-+ __u32 sem_ctl_arr[4];
-+
-+ /* start time */
-+ __u64 start_timespec_delta;
-+ __u64 start_jiffies_delta;
-+} __attribute__ ((aligned (8)));
-+
-+/* CPT_OBJ_FILE: one struct file */
-+struct cpt_file_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_flags;
-+ __u32 cpt_mode;
-+ __u64 cpt_pos;
-+ __u32 cpt_uid;
-+ __u32 cpt_gid;
-+
-+ __u32 cpt_i_mode;
-+ __u32 cpt_lflags;
-+#define CPT_DENTRY_DELETED 1
-+#define CPT_DENTRY_ROOT 2
-+#define CPT_DENTRY_CLONING 4
-+#define CPT_DENTRY_PROC 8
-+#define CPT_DENTRY_EPOLL 0x10
-+ __u64 cpt_inode;
-+ __u64 cpt_priv;
-+
-+ __u32 cpt_fown_fd;
-+ __u32 cpt_fown_pid;
-+ __u32 cpt_fown_uid;
-+ __u32 cpt_fown_euid;
-+ __u32 cpt_fown_signo;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+/* Followed by file name, encoded as CPT_OBJ_NAME */
-+
-+struct cpt_epoll_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_file;
-+} __attribute__ ((aligned (8)));
-+/* Followed by array of struct cpt_epoll_file */
-+
-+struct cpt_epoll_file_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_file;
-+ __u32 cpt_fd;
-+ __u32 cpt_events;
-+ __u64 cpt_data;
-+ __u32 cpt_revents;
-+ __u32 cpt_ready;
-+} __attribute__ ((aligned (8)));
-+
-+
-+/* CPT_OBJ_FILEDESC: one file descriptor */
-+struct cpt_fd_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_fd;
-+ __u32 cpt_flags;
-+#define CPT_FD_FLAG_CLOSEEXEC 1
-+ __u64 cpt_file;
-+} __attribute__ ((aligned (8)));
-+
-+/* CPT_OBJ_FILES: one files_struct */
-+struct cpt_files_struct_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_index;
-+ __u32 cpt_max_fds;
-+ __u32 cpt_next_fd;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+/* Followed by array of cpt_fd_image */
-+
-+/* CPT_OBJ_FS: one fs_struct */
-+struct cpt_fs_struct_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_umask;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+/* Followed by two/three CPT_OBJ_FILENAME for root, pwd and, optionally, altroot */
-+
-+/* CPT_OBJ_INODE: one struct inode */
-+struct cpt_inode_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_dev;
-+ __u64 cpt_ino;
-+ __u32 cpt_mode;
-+ __u32 cpt_nlink;
-+ __u32 cpt_uid;
-+ __u32 cpt_gid;
-+ __u64 cpt_rdev;
-+ __u64 cpt_size;
-+ __u64 cpt_blksize;
-+ __u64 cpt_atime;
-+ __u64 cpt_mtime;
-+ __u64 cpt_ctime;
-+ __u64 cpt_blocks;
-+ __u32 cpt_sb;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+
-+/* CPT_OBJ_VFSMOUNT: one vfsmount */
-+struct cpt_vfsmount_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_mntflags;
-+ __u32 cpt_flags;
-+} __attribute__ ((aligned (8)));
-+
-+
-+struct cpt_flock_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_owner;
-+ __u32 cpt_pid;
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+ __u32 cpt_flags;
-+ __u32 cpt_type;
-+} __attribute__ ((aligned (8)));
-+
-+
-+struct cpt_tty_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_flags;
-+ __u32 cpt_link;
-+ __u32 cpt_index;
-+ __u32 cpt_drv_type;
-+ __u32 cpt_drv_subtype;
-+ __u32 cpt_drv_flags;
-+ __u8 cpt_packet;
-+ __u8 cpt_stopped;
-+ __u8 cpt_hw_stopped;
-+ __u8 cpt_flow_stopped;
-+
-+ __u32 cpt_canon_data;
-+ __u32 cpt_canon_head;
-+ __u32 cpt_canon_column;
-+ __u32 cpt_column;
-+ __u8 cpt_ctrl_status;
-+ __u8 cpt_erasing;
-+ __u8 cpt_lnext;
-+ __u8 cpt_icanon;
-+ __u8 cpt_raw;
-+ __u8 cpt_real_raw;
-+ __u8 cpt_closing;
-+ __u8 __cpt_pad1;
-+ __u16 cpt_minimum_to_wake;
-+ __u16 __cpt_pad2;
-+ __u32 cpt_pgrp;
-+ __u32 cpt_session;
-+ __u32 cpt_c_line;
-+ __u8 cpt_name[64];
-+ __u16 cpt_ws_row;
-+ __u16 cpt_ws_col;
-+ __u16 cpt_ws_prow;
-+ __u16 cpt_ws_pcol;
-+ __u8 cpt_c_cc[32];
-+ __u32 cpt_c_iflag;
-+ __u32 cpt_c_oflag;
-+ __u32 cpt_c_cflag;
-+ __u32 cpt_c_lflag;
-+ __u32 cpt_read_flags[4096/32];
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_sock_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_file;
-+ __u32 cpt_parent;
-+ __u32 cpt_index;
-+
-+ __u64 cpt_ssflags;
-+ __u16 cpt_type;
-+ __u16 cpt_family;
-+ __u8 cpt_sstate;
-+ __u8 cpt_passcred;
-+ __u8 cpt_state;
-+ __u8 cpt_reuse;
-+
-+ __u8 cpt_zapped;
-+ __u8 cpt_shutdown;
-+ __u8 cpt_userlocks;
-+ __u8 cpt_no_check;
-+ __u8 cpt_debug;
-+ __u8 cpt_rcvtstamp;
-+ __u8 cpt_localroute;
-+ __u8 cpt_protocol;
-+
-+ __u32 cpt_err;
-+ __u32 cpt_err_soft;
-+
-+ __u16 cpt_max_ack_backlog;
-+ __u16 __cpt_pad1;
-+ __u32 cpt_priority;
-+
-+ __u32 cpt_rcvlowat;
-+ __u32 cpt_bound_dev_if;
-+
-+ __u64 cpt_rcvtimeo;
-+ __u64 cpt_sndtimeo;
-+ __u32 cpt_rcvbuf;
-+ __u32 cpt_sndbuf;
-+ __u64 cpt_flags;
-+ __u64 cpt_lingertime;
-+ __u32 cpt_peer_pid;
-+ __u32 cpt_peer_uid;
-+
-+ __u32 cpt_peer_gid;
-+ __u32 cpt_laddrlen;
-+ __u32 cpt_laddr[128/4];
-+ __u32 cpt_raddrlen;
-+ __u32 cpt_raddr[128/4];
-+ /* AF_UNIX */
-+ __u32 cpt_peer;
-+
-+ __u8 cpt_socketpair;
-+ __u8 cpt_deleted;
-+ __u16 __cpt_pad4;
-+ __u32 __cpt_pad5;
-+/*
-+ struct sk_filter *sk_filter;
-+ */
-+
-+ __u64 cpt_stamp;
-+ __u32 cpt_daddr;
-+ __u16 cpt_dport;
-+ __u16 cpt_sport;
-+
-+ __u32 cpt_saddr;
-+ __u32 cpt_rcv_saddr;
-+
-+ __u32 cpt_uc_ttl;
-+ __u32 cpt_tos;
-+
-+ __u32 cpt_cmsg_flags;
-+ __u32 cpt_mc_index;
-+
-+ __u32 cpt_mc_addr;
-+/*
-+ struct ip_options *opt;
-+ struct ip_mc_socklist *mc_list;
-+ */
-+ __u8 cpt_hdrincl;
-+ __u8 cpt_mc_ttl;
-+ __u8 cpt_mc_loop;
-+ __u8 cpt_pmtudisc;
-+
-+ __u8 cpt_recverr;
-+ __u8 cpt_freebind;
-+ __u16 cpt_idcounter;
-+ __u32 cpt_cork_flags;
-+
-+ __u32 cpt_cork_fragsize;
-+ __u32 cpt_cork_length;
-+ __u32 cpt_cork_addr;
-+ __u32 cpt_cork_saddr;
-+ __u32 cpt_cork_daddr;
-+ __u32 cpt_cork_oif;
-+
-+ __u32 cpt_udp_pending;
-+ __u32 cpt_udp_corkflag;
-+ __u16 cpt_udp_encap;
-+ __u16 cpt_udp_len;
-+ __u32 __cpt_pad7;
-+
-+ __u64 cpt_saddr6[2];
-+ __u64 cpt_rcv_saddr6[2];
-+ __u64 cpt_daddr6[2];
-+ __u32 cpt_flow_label6;
-+ __u32 cpt_frag_size6;
-+ __u32 cpt_hop_limit6;
-+ __u32 cpt_mcast_hops6;
-+
-+ __u32 cpt_mcast_oif6;
-+ __u8 cpt_rxopt6;
-+ __u8 cpt_mc_loop6;
-+ __u8 cpt_recverr6;
-+ __u8 cpt_sndflow6;
-+
-+ __u8 cpt_pmtudisc6;
-+ __u8 cpt_ipv6only6;
-+ __u8 cpt_mapped;
-+ __u8 __cpt_pad8;
-+ __u32 cpt_pred_flags;
-+
-+ __u32 cpt_rcv_nxt;
-+ __u32 cpt_snd_nxt;
-+
-+ __u32 cpt_snd_una;
-+ __u32 cpt_snd_sml;
-+
-+ __u32 cpt_rcv_tstamp;
-+ __u32 cpt_lsndtime;
-+
-+ __u8 cpt_tcp_header_len;
-+ __u8 cpt_ack_pending;
-+ __u8 cpt_quick;
-+ __u8 cpt_pingpong;
-+ __u8 cpt_blocked;
-+ __u8 __cpt_pad9;
-+ __u16 __cpt_pad10;
-+
-+ __u32 cpt_ato;
-+ __u32 cpt_ack_timeout;
-+
-+ __u32 cpt_lrcvtime;
-+ __u16 cpt_last_seg_size;
-+ __u16 cpt_rcv_mss;
-+
-+ __u32 cpt_snd_wl1;
-+ __u32 cpt_snd_wnd;
-+
-+ __u32 cpt_max_window;
-+ __u32 cpt_pmtu_cookie;
-+
-+ __u32 cpt_mss_cache;
-+ __u16 cpt_mss_cache_std;
-+ __u16 cpt_mss_clamp;
-+
-+ __u16 cpt_ext_header_len;
-+ __u16 cpt_ext2_header_len;
-+ __u8 cpt_ca_state;
-+ __u8 cpt_retransmits;
-+ __u8 cpt_reordering;
-+ __u8 cpt_frto_counter;
-+
-+ __u32 cpt_frto_highmark;
-+ __u8 cpt_adv_cong;
-+ __u8 cpt_defer_accept;
-+ __u8 cpt_backoff;
-+ __u8 __cpt_pad11;
-+
-+ __u32 cpt_srtt;
-+ __u32 cpt_mdev;
-+
-+ __u32 cpt_mdev_max;
-+ __u32 cpt_rttvar;
-+
-+ __u32 cpt_rtt_seq;
-+ __u32 cpt_rto;
-+
-+ __u32 cpt_packets_out;
-+ __u32 cpt_left_out;
-+
-+ __u32 cpt_retrans_out;
-+ __u32 cpt_snd_ssthresh;
-+
-+ __u32 cpt_snd_cwnd;
-+ __u16 cpt_snd_cwnd_cnt;
-+ __u16 cpt_snd_cwnd_clamp;
-+
-+ __u32 cpt_snd_cwnd_used;
-+ __u32 cpt_snd_cwnd_stamp;
-+
-+ __u32 cpt_timeout;
-+ __u32 cpt_ka_timeout;
-+
-+ __u32 cpt_rcv_wnd;
-+ __u32 cpt_rcv_wup;
-+
-+ __u32 cpt_write_seq;
-+ __u32 cpt_pushed_seq;
-+
-+ __u32 cpt_copied_seq;
-+ __u8 cpt_tstamp_ok;
-+ __u8 cpt_wscale_ok;
-+ __u8 cpt_sack_ok;
-+ __u8 cpt_saw_tstamp;
-+
-+ __u8 cpt_snd_wscale;
-+ __u8 cpt_rcv_wscale;
-+ __u8 cpt_nonagle;
-+ __u8 cpt_keepalive_probes;
-+ __u32 cpt_rcv_tsval;
-+
-+ __u32 cpt_rcv_tsecr;
-+ __u32 cpt_ts_recent;
-+
-+ __u64 cpt_ts_recent_stamp;
-+ __u16 cpt_user_mss;
-+ __u8 cpt_dsack;
-+ __u8 cpt_eff_sacks;
-+ __u32 cpt_sack_array[2*5];
-+ __u32 cpt_window_clamp;
-+
-+ __u32 cpt_rcv_ssthresh;
-+ __u8 cpt_probes_out;
-+ __u8 cpt_num_sacks;
-+ __u16 cpt_advmss;
-+
-+ __u8 cpt_syn_retries;
-+ __u8 cpt_ecn_flags;
-+ __u16 cpt_prior_ssthresh;
-+ __u32 cpt_lost_out;
-+
-+ __u32 cpt_sacked_out;
-+ __u32 cpt_fackets_out;
-+
-+ __u32 cpt_high_seq;
-+ __u32 cpt_retrans_stamp;
-+
-+ __u32 cpt_undo_marker;
-+ __u32 cpt_undo_retrans;
-+
-+ __u32 cpt_urg_seq;
-+ __u16 cpt_urg_data;
-+ __u8 cpt_pending;
-+ __u8 cpt_urg_mode;
-+
-+ __u32 cpt_snd_up;
-+ __u32 cpt_keepalive_time;
-+
-+ __u32 cpt_keepalive_intvl;
-+ __u32 cpt_linger2;
-+
-+ __u32 cpt_rcvrtt_rtt;
-+ __u32 cpt_rcvrtt_seq;
-+
-+ __u32 cpt_rcvrtt_time;
-+ __u32 __cpt_pad12;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_openreq_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_rcv_isn;
-+ __u32 cpt_snt_isn;
-+
-+ __u16 cpt_rmt_port;
-+ __u16 cpt_mss;
-+ __u8 cpt_family;
-+ __u8 cpt_retrans;
-+ __u8 cpt_snd_wscale;
-+ __u8 cpt_rcv_wscale;
-+
-+ __u8 cpt_tstamp_ok;
-+ __u8 cpt_sack_ok;
-+ __u8 cpt_wscale_ok;
-+ __u8 cpt_ecn_ok;
-+ __u8 cpt_acked;
-+ __u8 __cpt_pad1;
-+ __u16 __cpt_pad2;
-+
-+ __u32 cpt_window_clamp;
-+ __u32 cpt_rcv_wnd;
-+ __u32 cpt_ts_recent;
-+ __u32 cpt_iif;
-+ __u64 cpt_expires;
-+
-+ __u64 cpt_loc_addr[2];
-+ __u64 cpt_rmt_addr[2];
-+/*
-+ struct ip_options *opt;
-+ */
-+
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_skb_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_owner;
-+ __u32 cpt_queue;
-+#define CPT_SKB_NQ 0
-+#define CPT_SKB_RQ 1
-+#define CPT_SKB_WQ 2
-+#define CPT_SKB_OFOQ 3
-+
-+ __u64 cpt_stamp;
-+ __u32 cpt_len;
-+ __u32 cpt_hspace;
-+ __u32 cpt_tspace;
-+ __u32 cpt_h;
-+ __u32 cpt_nh;
-+ __u32 cpt_mac;
-+
-+ __u64 cpt_cb[5];
-+ __u32 cpt_mac_len;
-+ __u32 cpt_csum;
-+ __u8 cpt_local_df;
-+ __u8 cpt_pkt_type;
-+ __u8 cpt_ip_summed;
-+ __u8 __cpt_pad1;
-+ __u32 cpt_priority;
-+ __u16 cpt_protocol;
-+ __u16 cpt_security;
-+ __u16 cpt_tso_segs;
-+ __u16 cpt_tso_size;
-+} __attribute__ ((aligned (8)));
-+
-+
-+struct cpt_sysvshm_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_key;
-+ __u64 cpt_uid;
-+ __u64 cpt_gid;
-+ __u64 cpt_cuid;
-+ __u64 cpt_cgid;
-+ __u64 cpt_mode;
-+ __u64 cpt_seq;
-+
-+ __u32 cpt_id;
-+ __u32 cpt_mlockuser;
-+ __u64 cpt_segsz;
-+ __u64 cpt_atime;
-+ __u64 cpt_ctime;
-+ __u64 cpt_dtime;
-+ __u64 cpt_creator;
-+ __u64 cpt_last;
-+} __attribute__ ((aligned (8)));
-+
-+
-+struct cpt_sysvsem_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_key;
-+ __u64 cpt_uid;
-+ __u64 cpt_gid;
-+ __u64 cpt_cuid;
-+ __u64 cpt_cgid;
-+ __u64 cpt_mode;
-+ __u64 cpt_seq;
-+ __u32 cpt_id;
-+ __u32 __cpt_pad1;
-+
-+ __u64 cpt_otime;
-+ __u64 cpt_ctime;
-+} __attribute__ ((aligned (8)));
-+/* Content is array of pairs semval/sempid */
-+
-+struct cpt_sysvsem_undo_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_id;
-+ __u32 cpt_nsem;
-+} __attribute__ ((aligned (8)));
-+
-+
-+struct cpt_mm_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_start_code;
-+ __u64 cpt_end_code;
-+ __u64 cpt_start_data;
-+ __u64 cpt_end_data;
-+ __u64 cpt_start_brk;
-+ __u64 cpt_brk;
-+ __u64 cpt_start_stack;
-+ __u64 cpt_start_arg;
-+ __u64 cpt_end_arg;
-+ __u64 cpt_start_env;
-+ __u64 cpt_end_env;
-+ __u64 cpt_def_flags;
-+ __u64 cpt_mmub;
-+ __u8 cpt_dumpable;
-+ __u8 cpt_vps_dumpable;
-+ __u8 cpt_used_hugetlb;
-+ __u8 __cpt_pad;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_page_block
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_remappage_block
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+ __u64 cpt_pgoff;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_copypage_block
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+ __u64 cpt_source;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_lazypage_block
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+ __u64 cpt_index;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_vma_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_file;
-+ __u32 cpt_type;
-+#define CPT_VMA_TYPE_0 0
-+#define CPT_VMA_TYPE_SHM 1
-+ __u32 cpt_anonvma;
-+ __u64 cpt_anonvmaid;
-+
-+ __u64 cpt_start;
-+ __u64 cpt_end;
-+ __u64 cpt_flags;
-+ __u64 cpt_pgprot;
-+ __u64 cpt_pgoff;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_aio_ctx_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_max_reqs;
-+ __u32 cpt_ring_pages;
-+ __u32 cpt_tail;
-+ __u32 cpt_nr;
-+ __u64 cpt_mmap_base;
-+ /* Data (io_event's) and struct aio_ring are stored in user space VM */
-+} __attribute__ ((aligned (8)));
-+
-+
-+/* Format of MM section.
-+ *
-+ * It is array of MM objects (mm_struct). Each MM object is
-+ * header, encoding mm_struct, followed by array of VMA objects.
-+ * Each VMA consists of VMA header, encoding vm_area_struct, and
-+ * if the VMA contains copied pages, the header is followed by
-+ * array of tuples start-end each followed by data.
-+ *
-+ * ATTN: no block/page alignment. Only 64bit alignment. This might be not good?
-+ */
-+
-+struct cpt_restart_block {
-+ __u64 fn;
-+#define CPT_RBL_0 0
-+#define CPT_RBL_NANOSLEEP 1
-+#define CPT_RBL_COMPAT_NANOSLEEP 2
-+ __u64 arg0;
-+ __u64 arg1;
-+ __u64 arg2;
-+ __u64 arg3;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_siginfo_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_qflags;
-+ __u32 cpt_signo;
-+ __u32 cpt_errno;
-+ __u32 cpt_code;
-+
-+ __u64 cpt_sigval;
-+ __u32 cpt_pid;
-+ __u32 cpt_uid;
-+ __u64 cpt_utime;
-+ __u64 cpt_stime;
-+
-+ __u64 cpt_user;
-+} __attribute__ ((aligned (8)));
-+
-+/* Portable presentaions for segment registers */
-+
-+#define CPT_SEG_ZERO 0
-+#define CPT_SEG_TLS1 1
-+#define CPT_SEG_TLS2 2
-+#define CPT_SEG_TLS3 3
-+#define CPT_SEG_USER32_DS 4
-+#define CPT_SEG_USER32_CS 5
-+#define CPT_SEG_USER64_DS 6
-+#define CPT_SEG_USER64_CS 7
-+#define CPT_SEG_LDT 256
-+
-+struct cpt_x86_regs
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_debugreg[8];
-+ __u32 cpt_fs;
-+ __u32 cpt_gs;
-+
-+ __u32 cpt_ebx;
-+ __u32 cpt_ecx;
-+ __u32 cpt_edx;
-+ __u32 cpt_esi;
-+ __u32 cpt_edi;
-+ __u32 cpt_ebp;
-+ __u32 cpt_eax;
-+ __u32 cpt_xds;
-+ __u32 cpt_xes;
-+ __u32 cpt_orig_eax;
-+ __u32 cpt_eip;
-+ __u32 cpt_xcs;
-+ __u32 cpt_eflags;
-+ __u32 cpt_esp;
-+ __u32 cpt_xss;
-+ __u32 cpt_pad;
-+};
-+
-+struct cpt_x86_64_regs
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_debugreg[8];
-+
-+ __u64 cpt_fsbase;
-+ __u64 cpt_gsbase;
-+ __u32 cpt_fsindex;
-+ __u32 cpt_gsindex;
-+ __u32 cpt_ds;
-+ __u32 cpt_es;
-+
-+ __u64 cpt_r15;
-+ __u64 cpt_r14;
-+ __u64 cpt_r13;
-+ __u64 cpt_r12;
-+ __u64 cpt_rbp;
-+ __u64 cpt_rbx;
-+ __u64 cpt_r11;
-+ __u64 cpt_r10;
-+ __u64 cpt_r9;
-+ __u64 cpt_r8;
-+ __u64 cpt_rax;
-+ __u64 cpt_rcx;
-+ __u64 cpt_rdx;
-+ __u64 cpt_rsi;
-+ __u64 cpt_rdi;
-+ __u64 cpt_orig_rax;
-+ __u64 cpt_rip;
-+ __u64 cpt_cs;
-+ __u64 cpt_eflags;
-+ __u64 cpt_rsp;
-+ __u64 cpt_ss;
-+};
-+
-+struct cpt_task_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_state;
-+ __u64 cpt_flags;
-+ __u64 cpt_ptrace;
-+ __u32 cpt_prio;
-+ __u32 cpt_static_prio;
-+ __u32 cpt_policy;
-+ __u32 cpt_rt_priority;
-+
-+ /* struct thread_info */
-+ __u64 cpt_exec_domain;
-+ __u64 cpt_thrflags;
-+ __u64 cpt_thrstatus;
-+ __u64 cpt_addr_limit;
-+
-+ __u64 cpt_personality;
-+
-+ __u64 cpt_mm;
-+ __u64 cpt_files;
-+ __u64 cpt_fs;
-+ __u64 cpt_signal;
-+ __u64 cpt_sighand;
-+ __u64 cpt_sigblocked;
-+ __u64 cpt_sigrblocked;
-+ __u64 cpt_sigpending;
-+ __u64 cpt_namespace;
-+ __u64 cpt_sysvsem_undo;
-+ __u32 cpt_pid;
-+ __u32 cpt_tgid;
-+ __u32 cpt_ppid;
-+ __u32 cpt_rppid;
-+ __u32 cpt_pgrp;
-+ __u32 cpt_session;
-+ __u32 cpt_old_pgrp;
-+ __u32 __cpt_pad;
-+ __u32 cpt_leader;
-+ __u8 cpt_pn_state;
-+ __u8 cpt_stopped_state;
-+ __u8 cpt_sigsuspend_state;
-+ __u8 cpt_64bit;
-+ __u64 cpt_set_tid;
-+ __u64 cpt_clear_tid;
-+ __u32 cpt_exit_code;
-+ __u32 cpt_exit_signal;
-+ __u32 cpt_pdeath_signal;
-+ __u32 cpt_user;
-+ __u32 cpt_uid;
-+ __u32 cpt_euid;
-+ __u32 cpt_suid;
-+ __u32 cpt_fsuid;
-+ __u32 cpt_gid;
-+ __u32 cpt_egid;
-+ __u32 cpt_sgid;
-+ __u32 cpt_fsgid;
-+ __u32 cpt_ngids;
-+ __u32 cpt_gids[32];
-+ __u32 __cpt_pad2;
-+ __u64 cpt_ecap;
-+ __u64 cpt_icap;
-+ __u64 cpt_pcap;
-+ __u8 cpt_comm[16];
-+ __u64 cpt_tls[3];
-+ struct cpt_restart_block cpt_restart;
-+ __u64 cpt_it_real_value; /* V0: jiffies, V1: nsec */
-+ __u64 cpt_it_real_incr; /* V0: jiffies, V1: nsec */
-+ __u64 cpt_it_prof_value;
-+ __u64 cpt_it_prof_incr;
-+ __u64 cpt_it_virt_value;
-+ __u64 cpt_it_virt_incr;
-+
-+ __u16 cpt_used_math;
-+ __u8 cpt_keepcap;
-+ __u8 cpt_did_exec;
-+ __u32 cpt_ptrace_message;
-+
-+ __u64 cpt_utime;
-+ __u64 cpt_stime;
-+ __u64 cpt_starttime; /* V0: jiffies, V1: timespec */
-+ __u64 cpt_nvcsw;
-+ __u64 cpt_nivcsw;
-+ __u64 cpt_min_flt;
-+ __u64 cpt_maj_flt;
-+
-+ __u64 cpt_sigsuspend_blocked;
-+ __u64 cpt_cutime, cpt_cstime;
-+ __u64 cpt_cnvcsw, cpt_cnivcsw;
-+ __u64 cpt_cmin_flt, cpt_cmaj_flt;
-+
-+#define CPT_RLIM_NLIMITS 16
-+ __u64 cpt_rlim_cur[CPT_RLIM_NLIMITS];
-+ __u64 cpt_rlim_max[CPT_RLIM_NLIMITS];
-+
-+ __u64 cpt_task_ub;
-+ __u64 cpt_exec_ub;
-+ __u64 cpt_mm_ub;
-+ __u64 cpt_fork_sub;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_signal_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_leader;
-+ __u8 cpt_pgrp_type;
-+ __u8 cpt_old_pgrp_type;
-+ __u8 cpt_session_type;
-+#define CPT_PGRP_NORMAL 0
-+#define CPT_PGRP_ORPHAN 1
-+#define CPT_PGRP_STRAY 2
-+ __u8 __cpt_pad1;
-+ __u64 cpt_pgrp;
-+ __u64 cpt_old_pgrp;
-+ __u64 cpt_session;
-+ __u64 cpt_sigpending;
-+ __u64 cpt_ctty;
-+
-+ __u32 cpt_curr_target;
-+ __u32 cpt_group_exit;
-+ __u32 cpt_group_exit_code;
-+ __u32 cpt_group_exit_task;
-+ __u32 cpt_notify_count;
-+ __u32 cpt_group_stop_count;
-+ __u32 cpt_stop_state;
-+ __u32 __cpt_pad2;
-+
-+ __u64 cpt_utime, cpt_stime, cpt_cutime, cpt_cstime;
-+ __u64 cpt_nvcsw, cpt_nivcsw, cpt_cnvcsw, cpt_cnivcsw;
-+ __u64 cpt_min_flt, cpt_maj_flt, cpt_cmin_flt, cpt_cmaj_flt;
-+
-+ __u64 cpt_rlim_cur[CPT_RLIM_NLIMITS];
-+ __u64 cpt_rlim_max[CPT_RLIM_NLIMITS];
-+} __attribute__ ((aligned (8)));
-+/* Followed by list of posix timers. */
-+
-+struct cpt_sighand_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+} __attribute__ ((aligned (8)));
-+/* Followed by list of sighandles. */
-+
-+struct cpt_sighandler_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_signo;
-+ __u32 __cpt_pad1;
-+ __u64 cpt_handler;
-+ __u64 cpt_restorer;
-+ __u64 cpt_flags;
-+ __u64 cpt_mask;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_netdev_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_index;
-+ __u32 cpt_flags;
-+ __u8 cpt_name[16];
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_ifaddr_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u32 cpt_index;
-+ __u8 cpt_family;
-+ __u8 cpt_masklen;
-+ __u8 cpt_flags;
-+ __u8 cpt_scope;
-+ __u32 cpt_address[4];
-+ __u32 cpt_peer[4];
-+ __u32 cpt_broadcast[4];
-+ __u8 cpt_label[16];
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_ipct_tuple
-+{
-+ __u32 cpt_src;
-+ __u16 cpt_srcport;
-+ __u16 __cpt_pad1;
-+
-+ __u32 cpt_dst;
-+ __u16 cpt_dstport;
-+ __u8 cpt_protonum;
-+ __u8 cpt_dir; /* TEMPORARY HACK TO VALIDATE CODE */
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_nat_manip
-+{
-+ __u8 cpt_direction;
-+ __u8 cpt_hooknum;
-+ __u8 cpt_maniptype;
-+ __u8 __cpt_pad1;
-+
-+ __u32 cpt_manip_addr;
-+ __u16 cpt_manip_port;
-+ __u16 __cpt_pad2;
-+ __u32 __cpt_pad3;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_nat_seq
-+{
-+ __u32 cpt_correction_pos;
-+ __u32 cpt_offset_before;
-+ __u32 cpt_offset_after;
-+ __u32 __cpt_pad1;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_ip_connexpect_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_timeout;
-+ __u32 cpt_sibling_conntrack; /* Index of child conntrack */
-+ __u32 cpt_seq; /* id in 2.6.15 */
-+
-+ struct cpt_ipct_tuple cpt_ct_tuple; /* NU 2.6.15 */
-+ struct cpt_ipct_tuple cpt_tuple;
-+ struct cpt_ipct_tuple cpt_mask;
-+
-+ /* union ip_conntrack_expect_help. Used by ftp, irc, amanda */
-+ __u32 cpt_help[3]; /* NU 2.6.15 */
-+ __u16 cpt_manip_proto;
-+ __u8 cpt_dir;
-+ __u8 cpt_flags;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_ip_conntrack_image
-+{
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ struct cpt_ipct_tuple cpt_tuple[2];
-+ __u64 cpt_status;
-+ __u64 cpt_timeout;
-+ __u32 cpt_index;
-+ __u8 cpt_ct_helper;
-+ __u8 cpt_nat_helper;
-+ __u16 cpt_pad1;
-+
-+ /* union ip_conntrack_proto. Used by tcp and icmp. */
-+ __u32 cpt_proto_data[12];
-+
-+ /* union ip_conntrack_help. Used by ftp and pptp helper.
-+ * We do not support pptp...
-+ */
-+ __u32 cpt_help_data[6];
-+
-+ /* nat info */
-+ __u32 cpt_initialized; /* NU 2.6.15 */
-+ __u32 cpt_num_manips; /* NU 2.6.15 */
-+ struct cpt_nat_manip cpt_nat_manips[6]; /* NU 2.6.15 */
-+
-+ struct cpt_nat_seq cpt_nat_seq[2];
-+
-+ __u32 cpt_masq_index;
-+ __u32 cpt_id;
-+ __u32 cpt_mark;
-+} __attribute__ ((aligned (8)));
-+
-+struct cpt_beancounter_image {
-+ __u64 cpt_next;
-+ __u32 cpt_object;
-+ __u16 cpt_hdrlen;
-+ __u16 cpt_content;
-+
-+ __u64 cpt_parent;
-+ __u32 cpt_id;
-+ __u32 __cpt_pad;
-+ __u64 cpt_parms[32 * 6 * 2];
-+} __attribute__ ((aligned (8)));
-+
-+#ifdef __KERNEL__
-+
-+static inline void *cpt_ptr_import(__u64 ptr)
-+{
-+ return (void*)(unsigned long)ptr;
-+}
-+
-+static inline __u64 cpt_ptr_export(void __user *ptr)
-+{
-+ return (__u64)(unsigned long)ptr;
-+}
-+
-+static inline void cpt_sigset_import(sigset_t *sig, __u64 ptr)
-+{
-+ memcpy(sig, &ptr, sizeof(*sig));
-+}
-+
-+static inline __u64 cpt_sigset_export(sigset_t *sig)
-+{
-+ return *(__u64*)sig;
-+}
-+
-+static inline __u64 cpt_timespec_export(struct timespec *tv)
-+{
-+ return (((u64)tv->tv_sec) << 32) + tv->tv_nsec;
-+}
-+
-+static inline void cpt_timespec_import(struct timespec *tv, __u64 val)
-+{
-+ tv->tv_sec = val>>32;
-+ tv->tv_nsec = (val&0xFFFFFFFF);
-+}
-+
-+static inline __u64 cpt_timeval_export(struct timeval *tv)
-+{
-+ return (((u64)tv->tv_sec) << 32) + tv->tv_usec;
-+}
-+
-+static inline void cpt_timeval_import(struct timeval *tv, __u64 val)
-+{
-+ tv->tv_sec = val>>32;
-+ tv->tv_usec = (val&0xFFFFFFFF);
-+}
-+
-+#endif
-+
-+#endif /* __CPT_IMAGE_H_ */
-diff -upr linux-2.6.16.orig/include/linux/cpt_ioctl.h linux-2.6.16-026test009/include/linux/cpt_ioctl.h
---- linux-2.6.16.orig/include/linux/cpt_ioctl.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/cpt_ioctl.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,41 @@
-+/*
-+ *
-+ * include/linux/cpt_ioctl.h
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _CPT_IOCTL_H_
-+#define _CPT_IOCTL_H_ 1
-+
-+#include <linux/types.h>
-+#include <linux/ioctl.h>
-+
-+#define CPTCTLTYPE '-'
-+#define CPT_SET_DUMPFD _IOW(CPTCTLTYPE, 1, int)
-+#define CPT_SET_STATUSFD _IOW(CPTCTLTYPE, 2, int)
-+#define CPT_SET_LOCKFD _IOW(CPTCTLTYPE, 3, int)
-+#define CPT_SET_VEID _IOW(CPTCTLTYPE, 4, int)
-+#define CPT_SUSPEND _IO(CPTCTLTYPE, 5)
-+#define CPT_DUMP _IO(CPTCTLTYPE, 6)
-+#define CPT_UNDUMP _IO(CPTCTLTYPE, 7)
-+#define CPT_RESUME _IO(CPTCTLTYPE, 8)
-+#define CPT_KILL _IO(CPTCTLTYPE, 9)
-+#define CPT_JOIN_CONTEXT _IO(CPTCTLTYPE, 10)
-+#define CPT_GET_CONTEXT _IOW(CPTCTLTYPE, 11, unsigned int)
-+#define CPT_PUT_CONTEXT _IO(CPTCTLTYPE, 12)
-+#define CPT_SET_PAGEINFDIN _IOW(CPTCTLTYPE, 13, int)
-+#define CPT_SET_PAGEINFDOUT _IOW(CPTCTLTYPE, 14, int)
-+#define CPT_PAGEIND _IO(CPTCTLTYPE, 15)
-+#define CPT_VMPREP _IOW(CPTCTLTYPE, 16, int)
-+#define CPT_SET_LAZY _IOW(CPTCTLTYPE, 17, int)
-+#define CPT_SET_CPU_FLAGS _IOW(CPTCTLTYPE, 18, unsigned int)
-+#define CPT_TEST_CAPS _IOW(CPTCTLTYPE, 19, unsigned int)
-+#define CPT_TEST_VECAPS _IOW(CPTCTLTYPE, 20, unsigned int)
-+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/cpu.h linux-2.6.16-026test009/include/linux/cpu.h
---- linux-2.6.16.orig/include/linux/cpu.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/cpu.h 2006-04-19 15:02:11.000000000 +0400
-@@ -32,7 +32,7 @@ struct cpu {
- };
-
- extern int register_cpu(struct cpu *, int, struct node *);
--extern struct sys_device *get_cpu_sysdev(int cpu);
-+extern struct sys_device *get_cpu_sysdev(unsigned cpu);
- #ifdef CONFIG_HOTPLUG_CPU
- extern void unregister_cpu(struct cpu *, struct node *);
- #endif
-diff -upr linux-2.6.16.orig/include/linux/dcache.h linux-2.6.16-026test009/include/linux/dcache.h
---- linux-2.6.16.orig/include/linux/dcache.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/dcache.h 2006-04-19 15:02:12.000000000 +0400
-@@ -9,6 +9,8 @@
- #include <linux/cache.h>
- #include <linux/rcupdate.h>
-
-+#include <ub/ub_dcache.h>
-+
- struct nameidata;
- struct vfsmount;
-
-@@ -111,6 +113,9 @@ struct dentry {
- struct dcookie_struct *d_cookie; /* cookie, if any */
- #endif
- int d_mounted;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct dentry_beancounter dentry_bc;
-+#endif
- unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
- };
-
-@@ -161,7 +166,11 @@ d_iput: no no no yes
-
- #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
- #define DCACHE_UNHASHED 0x0010
-+#define DCACHE_VIRTUAL 0x0100 /* ve accessible */
-+
-+extern void mark_tree_virtual(struct vfsmount *m, struct dentry *d);
-
-+extern kmem_cache_t *dentry_cache;
- extern spinlock_t dcache_lock;
-
- /**
-@@ -215,7 +224,8 @@ extern struct dentry * d_alloc_anon(stru
- extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
- extern void shrink_dcache_sb(struct super_block *);
- extern void shrink_dcache_parent(struct dentry *);
--extern void shrink_dcache_anon(struct hlist_head *);
-+extern void shrink_dcache_anon(struct super_block *);
-+extern void dcache_shrinker_wait_sb(struct super_block *sb);
- extern int d_invalidate(struct dentry *);
-
- /* only used at mount-time */
-@@ -277,6 +287,7 @@ extern struct dentry * __d_lookup(struct
- /* validate "insecure" dentry pointer */
- extern int d_validate(struct dentry *, struct dentry *);
-
-+extern int d_root_check(struct dentry *, struct vfsmount *);
- extern char * d_path(struct dentry *, struct vfsmount *, char *, int);
-
- /* Allocation counts.. */
-@@ -297,6 +308,8 @@ extern char * d_path(struct dentry *, st
- static inline struct dentry *dget(struct dentry *dentry)
- {
- if (dentry) {
-+ if (ub_dget_testone(dentry))
-+ BUG();
- BUG_ON(!atomic_read(&dentry->d_count));
- atomic_inc(&dentry->d_count);
- }
-@@ -340,6 +353,8 @@ extern struct dentry *lookup_create(stru
-
- extern int sysctl_vfs_cache_pressure;
-
-+extern int check_area_access_ve(struct dentry *, struct vfsmount *);
-+extern int check_area_execute_ve(struct dentry *, struct vfsmount *);
- #endif /* __KERNEL__ */
-
- #endif /* __LINUX_DCACHE_H */
-diff -upr linux-2.6.16.orig/include/linux/devpts_fs.h linux-2.6.16-026test009/include/linux/devpts_fs.h
---- linux-2.6.16.orig/include/linux/devpts_fs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/devpts_fs.h 2006-04-19 15:02:12.000000000 +0400
-@@ -21,6 +21,15 @@ int devpts_pty_new(struct tty_struct *tt
- struct tty_struct *devpts_get_tty(int number); /* get tty structure */
- void devpts_pty_kill(int number); /* unlink */
-
-+struct devpts_config {
-+ int setuid;
-+ int setgid;
-+ uid_t uid;
-+ gid_t gid;
-+ umode_t mode;
-+};
-+
-+extern struct devpts_config devpts_config;
- #else
-
- /* Dummy stubs in the no-pty case */
-diff -upr linux-2.6.16.orig/include/linux/elfcore.h linux-2.6.16-026test009/include/linux/elfcore.h
---- linux-2.6.16.orig/include/linux/elfcore.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/elfcore.h 2006-04-19 15:02:12.000000000 +0400
-@@ -7,6 +7,8 @@
- #include <linux/user.h>
- #include <linux/ptrace.h>
-
-+extern int sysctl_at_vsyscall;
-+
- struct elf_siginfo
- {
- int si_signo; /* signal number */
-diff -upr linux-2.6.16.orig/include/linux/eventpoll.h linux-2.6.16-026test009/include/linux/eventpoll.h
---- linux-2.6.16.orig/include/linux/eventpoll.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/eventpoll.h 2006-04-19 15:02:12.000000000 +0400
-@@ -85,6 +85,91 @@ static inline void eventpoll_release(str
- eventpoll_release_file(file);
- }
-
-+struct epoll_filefd {
-+ struct file *file;
-+ int fd;
-+};
-+
-+/*
-+ * This structure is stored inside the "private_data" member of the file
-+ * structure and rapresent the main data sructure for the eventpoll
-+ * interface.
-+ */
-+struct eventpoll {
-+ /* Protect the this structure access */
-+ rwlock_t lock;
-+
-+ /*
-+ * This semaphore is used to ensure that files are not removed
-+ * while epoll is using them. This is read-held during the event
-+ * collection loop and it is write-held during the file cleanup
-+ * path, the epoll file exit code and the ctl operations.
-+ */
-+ struct rw_semaphore sem;
-+
-+ /* Wait queue used by sys_epoll_wait() */
-+ wait_queue_head_t wq;
-+
-+ /* Wait queue used by file->poll() */
-+ wait_queue_head_t poll_wait;
-+
-+ /* List of ready file descriptors */
-+ struct list_head rdllist;
-+
-+ /* RB-Tree root used to store monitored fd structs */
-+ struct rb_root rbr;
-+};
-+
-+/*
-+ * Each file descriptor added to the eventpoll interface will
-+ * have an entry of this type linked to the hash.
-+ */
-+struct epitem {
-+ /* RB-Tree node used to link this structure to the eventpoll rb-tree */
-+ struct rb_node rbn;
-+
-+ /* List header used to link this structure to the eventpoll ready list */
-+ struct list_head rdllink;
-+
-+ /* The file descriptor information this item refers to */
-+ struct epoll_filefd ffd;
-+
-+ /* Number of active wait queue attached to poll operations */
-+ int nwait;
-+
-+ /* List containing poll wait queues */
-+ struct list_head pwqlist;
-+
-+ /* The "container" of this item */
-+ struct eventpoll *ep;
-+
-+ /* The structure that describe the interested events and the source fd */
-+ struct epoll_event event;
-+
-+ /*
-+ * Used to keep track of the usage count of the structure. This avoids
-+ * that the structure will desappear from underneath our processing.
-+ */
-+ atomic_t usecnt;
-+
-+ /* List header used to link this item to the "struct file" items list */
-+ struct list_head fllink;
-+
-+ /* List header used to link the item to the transfer list */
-+ struct list_head txlink;
-+
-+ /*
-+ * This is used during the collection/transfer of events to userspace
-+ * to pin items empty events set.
-+ */
-+ unsigned int revents;
-+};
-+
-+extern struct semaphore epsem;
-+struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
-+int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-+ struct file *tfile, int fd);
-+void ep_release_epitem(struct epitem *epi);
-
- #else
-
-diff -upr linux-2.6.16.orig/include/linux/faudit.h linux-2.6.16-026test009/include/linux/faudit.h
---- linux-2.6.16.orig/include/linux/faudit.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/faudit.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,38 @@
-+/*
-+ * include/linux/faudit.h
-+ *
-+ * Copyright (C) 2005 SWSoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __FAUDIT_H_
-+#define __FAUDIT_H_
-+
-+#include <linux/config.h>
-+#include <linux/virtinfo.h>
-+
-+struct vfsmount;
-+struct dentry;
-+struct pt_regs;
-+
-+struct faudit_regs_arg {
-+ int err;
-+ struct pt_regs *regs;
-+};
-+
-+struct faudit_stat_arg {
-+ int err;
-+ struct vfsmount *mnt;
-+ struct dentry *dentry;
-+ void *stat;
-+};
-+
-+#define VIRTINFO_FAUDIT (0)
-+#define VIRTINFO_FAUDIT_STAT (VIRTINFO_FAUDIT + 0)
-+#define VIRTINFO_FAUDIT_STATFS (VIRTINFO_FAUDIT + 1)
-+#define VIRTINFO_FAUDIT_STATFS64 (VIRTINFO_FAUDIT + 2)
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/fb.h linux-2.6.16-026test009/include/linux/fb.h
---- linux-2.6.16.orig/include/linux/fb.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/fb.h 2006-04-19 15:02:11.000000000 +0400
-@@ -839,12 +839,10 @@ struct fb_info {
- #define FB_LEFT_POS(bpp) (32 - bpp)
- #define FB_SHIFT_HIGH(val, bits) ((val) >> (bits))
- #define FB_SHIFT_LOW(val, bits) ((val) << (bits))
--#define FB_BIT_NR(b) (7 - (b))
- #else
- #define FB_LEFT_POS(bpp) (0)
- #define FB_SHIFT_HIGH(val, bits) ((val) << (bits))
- #define FB_SHIFT_LOW(val, bits) ((val) >> (bits))
--#define FB_BIT_NR(b) (b)
- #endif
-
- /*
-diff -upr linux-2.6.16.orig/include/linux/fs.h linux-2.6.16-026test009/include/linux/fs.h
---- linux-2.6.16.orig/include/linux/fs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/fs.h 2006-04-19 15:02:12.000000000 +0400
-@@ -7,6 +7,7 @@
- */
-
- #include <linux/config.h>
-+#include <linux/ve_owner.h>
- #include <linux/limits.h>
- #include <linux/ioctl.h>
-
-@@ -64,6 +65,7 @@ extern int dir_notify_enable;
- #define FMODE_LSEEK 4
- #define FMODE_PREAD 8
- #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */
-+#define FMODE_QUOTACTL 4
-
- #define RW_MASK 1
- #define RWA_MASK 2
-@@ -83,6 +85,7 @@ extern int dir_notify_enable;
- /* public flags for file_system_type */
- #define FS_REQUIRES_DEV 1
- #define FS_BINARY_MOUNTDATA 2
-+#define FS_VIRTUALIZED 64 /* Can mount this fstype inside ve */
- #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
- #define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon
- * as nfs_rename() will be cleaned up
-@@ -297,6 +300,9 @@ struct iattr {
- * Includes for diskquotas.
- */
- #include <linux/quota.h>
-+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
-+#include <linux/vzquota_qlnk.h>
-+#endif
-
- /**
- * enum positive_aop_returns - aop return codes with specific semantics
-@@ -493,6 +499,9 @@ struct inode {
- #ifdef CONFIG_QUOTA
- struct dquot *i_dquot[MAXQUOTAS];
- #endif
-+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
-+ struct vz_quota_ilink i_qlnk;
-+#endif
- /* These three should probably be a union */
- struct list_head i_devices;
- struct pipe_inode_info *i_pipe;
-@@ -527,6 +536,8 @@ struct inode {
- #endif
- };
-
-+extern kmem_cache_t *inode_cachep;
-+
- /*
- * NOTE: in a 32bit arch with a preemptable kernel and
- * an UP compile the i_size_read/write must be atomic
-@@ -588,6 +599,20 @@ static inline unsigned imajor(struct ino
-
- extern struct block_device *I_BDEV(struct inode *inode);
-
-+struct exec_perm {
-+ umode_t mode;
-+ uid_t uid, gid;
-+ int set;
-+};
-+
-+static inline void set_exec_perm(struct exec_perm *perm, struct inode *ino)
-+{
-+ perm->set = 1;
-+ perm->mode = ino->i_mode;
-+ perm->uid = ino->i_uid;
-+ perm->gid = ino->i_gid;
-+}
-+
- struct fown_struct {
- rwlock_t lock; /* protects pid, uid, euid fields */
- int pid; /* pid or -pgrp where SIGIO should be sent */
-@@ -646,7 +671,10 @@ struct file {
- spinlock_t f_ep_lock;
- #endif /* #ifdef CONFIG_EPOLL */
- struct address_space *f_mapping;
-+ struct ve_struct *owner_env;
- };
-+DCL_VE_OWNER_PROTO(FILP, struct file, owner_env)
-+
- extern spinlock_t files_lock;
- #define file_list_lock() spin_lock(&files_lock);
- #define file_list_unlock() spin_unlock(&files_lock);
-@@ -710,6 +738,9 @@ struct file_lock {
- struct file *fl_file;
- unsigned char fl_flags;
- unsigned char fl_type;
-+#ifdef CONFIG_USER_RESOURCE
-+ unsigned char fl_charged;
-+#endif
- loff_t fl_start;
- loff_t fl_end;
-
-@@ -831,6 +862,7 @@ struct super_block {
- struct list_head s_io; /* parked for writeback */
- struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
- struct list_head s_files;
-+ struct list_head s_dshrinkers; /* active dcache shrinkers */
-
- struct block_device *s_bdev;
- struct list_head s_instances;
-@@ -902,7 +934,7 @@ static inline void unlock_super(struct s
- /*
- * VFS helper functions..
- */
--extern int vfs_permission(struct nameidata *, int);
-+extern int vfs_permission(struct nameidata *, int, struct exec_perm *);
- extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
- extern int vfs_mkdir(struct inode *, struct dentry *, int);
- extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
-@@ -1041,7 +1073,8 @@ struct inode_operations {
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
- void (*truncate) (struct inode *);
-- int (*permission) (struct inode *, int, struct nameidata *);
-+ int (*permission) (struct inode *, int, struct nameidata *,
-+ struct exec_perm *);
- int (*setattr) (struct dentry *, struct iattr *);
- int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
- int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
-@@ -1089,6 +1122,8 @@ struct super_operations {
-
- ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
- ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
-+
-+ struct inode *(*get_quota_root)(struct super_block *);
- };
-
- /* Inode state bits. Protected by inode_lock. */
-@@ -1246,8 +1281,14 @@ struct file_system_type {
- struct module *owner;
- struct file_system_type * next;
- struct list_head fs_supers;
-+ struct ve_struct *owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(FSTYPE, struct file_system_type, owner_env)
-+
-+void get_filesystem(struct file_system_type *fs);
-+void put_filesystem(struct file_system_type *fs);
-+
- struct super_block *get_sb_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int));
-@@ -1285,6 +1326,7 @@ extern struct vfsmount *kern_mount(struc
- extern int may_umount_tree(struct vfsmount *);
- extern int may_umount(struct vfsmount *);
- extern void umount_tree(struct vfsmount *, int, struct list_head *);
-+#define kern_umount mntput
- extern void release_mounts(struct list_head *);
- extern long do_mount(char *, char *, char *, unsigned long, void *);
- extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
-@@ -1401,7 +1443,7 @@ extern void release_chrdev_list(void *);
- #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
- extern const char *__bdevname(dev_t, char *buffer);
- extern const char *bdevname(struct block_device *bdev, char *buffer);
--extern struct block_device *lookup_bdev(const char *);
-+extern struct block_device *lookup_bdev(const char *, int mode);
- extern struct block_device *open_bdev_excl(const char *, int, void *);
- extern void close_bdev_excl(struct block_device *);
- extern void *acquire_blkdev_list(void);
-@@ -1433,7 +1475,7 @@ extern int fs_may_remount_ro(struct supe
- #define bio_data_dir(bio) ((bio)->bi_rw & 1)
-
- extern int check_disk_change(struct block_device *);
--extern int invalidate_inodes(struct super_block *);
-+extern int invalidate_inodes(struct super_block *, int);
- extern int __invalidate_device(struct block_device *);
- extern int invalidate_partition(struct gendisk *, int);
- unsigned long invalidate_mapping_pages(struct address_space *mapping,
-@@ -1463,9 +1505,10 @@ extern int do_remount_sb(struct super_bl
- void *data, int force);
- extern sector_t bmap(struct inode *, sector_t);
- extern int notify_change(struct dentry *, struct iattr *);
--extern int permission(struct inode *, int, struct nameidata *);
-+extern int permission(struct inode *, int, struct nameidata *,
-+ struct exec_perm *);
- extern int generic_permission(struct inode *, int,
-- int (*check_acl)(struct inode *, int));
-+ int (*check_acl)(struct inode *, int), struct exec_perm *);
-
- extern int get_write_access(struct inode *);
- extern int deny_write_access(struct file *);
-@@ -1484,7 +1527,9 @@ extern int open_namei(int dfd, const cha
- extern int may_open(struct nameidata *, int, int);
-
- extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
--extern struct file * open_exec(const char *);
-+
-+struct linux_binprm;
-+extern struct file * open_exec(const char *, struct linux_binprm *);
-
- /* fs/dcache.c -- generic fs support functions */
- extern int is_subdir(struct dentry *, struct dentry *);
-diff -upr linux-2.6.16.orig/include/linux/genhd.h linux-2.6.16-026test009/include/linux/genhd.h
---- linux-2.6.16.orig/include/linux/genhd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/genhd.h 2006-04-19 15:02:12.000000000 +0400
-@@ -421,6 +421,7 @@ static inline struct block_device *bdget
- return bdget(MKDEV(disk->major, disk->first_minor) + index);
- }
-
-+extern struct subsystem block_subsys;
- #endif
-
- #endif
-diff -upr linux-2.6.16.orig/include/linux/gfp.h linux-2.6.16-026test009/include/linux/gfp.h
---- linux-2.6.16.orig/include/linux/gfp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/gfp.h 2006-04-19 15:02:11.000000000 +0400
-@@ -47,6 +47,8 @@ struct vm_area_struct;
- #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
- #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
- #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
-+#define __GFP_UBC ((__force gfp_t)0x40000u)/* charge kmem in buddy and slab */
-+#define __GFP_SOFT_UBC ((__force gfp_t)0x80000u)/* use soft charging */
-
- #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
- #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
-@@ -55,14 +57,17 @@ struct vm_area_struct;
- #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
- __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-- __GFP_NOMEMALLOC|__GFP_HARDWALL)
-+ __GFP_NOMEMALLOC|__GFP_HARDWALL| \
-+ __GFP_UBC|__GFP_SOFT_UBC)
-
- /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
- #define GFP_ATOMIC (__GFP_HIGH)
- #define GFP_NOIO (__GFP_WAIT)
- #define GFP_NOFS (__GFP_WAIT | __GFP_IO)
- #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
-+#define GFP_KERNEL_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_UBC)
- #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
-+#define GFP_USER_UBC (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | __GFP_UBC)
- #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
- __GFP_HIGHMEM)
-
-diff -upr linux-2.6.16.orig/include/linux/hrtimer.h linux-2.6.16-026test009/include/linux/hrtimer.h
---- linux-2.6.16.orig/include/linux/hrtimer.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/hrtimer.h 2006-04-19 15:02:12.000000000 +0400
-@@ -140,4 +140,9 @@ extern void hrtimer_run_queues(void);
- /* Bootup initialization: */
- extern void __init hrtimers_init(void);
-
-+extern long nanosleep_restart(struct restart_block *restart);
-+
-+extern ktime_t schedule_hrtimer(struct hrtimer *timer,
-+ const enum hrtimer_mode mode);
-+
- #endif
-diff -upr linux-2.6.16.orig/include/linux/inetdevice.h linux-2.6.16-026test009/include/linux/inetdevice.h
---- linux-2.6.16.orig/include/linux/inetdevice.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/inetdevice.h 2006-04-19 15:02:12.000000000 +0400
-@@ -34,6 +34,12 @@ struct ipv4_devconf
- };
-
- extern struct ipv4_devconf ipv4_devconf;
-+extern struct ipv4_devconf ipv4_devconf_dflt;
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_ipv4_devconf (*(get_exec_env()->_ipv4_devconf))
-+#else
-+#define ve_ipv4_devconf ipv4_devconf
-+#endif
-
- struct in_device
- {
-@@ -60,29 +66,29 @@ struct in_device
- };
-
- #define IN_DEV_FORWARD(in_dev) ((in_dev)->cnf.forwarding)
--#define IN_DEV_MFORWARD(in_dev) (ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
--#define IN_DEV_RPFILTER(in_dev) (ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
--#define IN_DEV_SOURCE_ROUTE(in_dev) (ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
--#define IN_DEV_BOOTP_RELAY(in_dev) (ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
--
--#define IN_DEV_LOG_MARTIANS(in_dev) (ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
--#define IN_DEV_PROXY_ARP(in_dev) (ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
--#define IN_DEV_SHARED_MEDIA(in_dev) (ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
--#define IN_DEV_TX_REDIRECTS(in_dev) (ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
--#define IN_DEV_SEC_REDIRECTS(in_dev) (ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
-+#define IN_DEV_MFORWARD(in_dev) (ve_ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-+#define IN_DEV_RPFILTER(in_dev) (ve_ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-+#define IN_DEV_SOURCE_ROUTE(in_dev) (ve_ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
-+#define IN_DEV_BOOTP_RELAY(in_dev) (ve_ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
-+
-+#define IN_DEV_LOG_MARTIANS(in_dev) (ve_ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-+#define IN_DEV_PROXY_ARP(in_dev) (ve_ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-+#define IN_DEV_SHARED_MEDIA(in_dev) (ve_ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-+#define IN_DEV_TX_REDIRECTS(in_dev) (ve_ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-+#define IN_DEV_SEC_REDIRECTS(in_dev) (ve_ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
- #define IN_DEV_IDTAG(in_dev) ((in_dev)->cnf.tag)
- #define IN_DEV_MEDIUM_ID(in_dev) ((in_dev)->cnf.medium_id)
- #define IN_DEV_PROMOTE_SECONDARIES(in_dev) (ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
-
- #define IN_DEV_RX_REDIRECTS(in_dev) \
- ((IN_DEV_FORWARD(in_dev) && \
-- (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
-+ (ve_ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
- || (!IN_DEV_FORWARD(in_dev) && \
-- (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
-+ (ve_ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
-
--#define IN_DEV_ARPFILTER(in_dev) (ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
--#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
--#define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
-+#define IN_DEV_ARPFILTER(in_dev) (ve_ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-+#define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ve_ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-+#define IN_DEV_ARP_IGNORE(in_dev) (max(ve_ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
-
- struct in_ifaddr
- {
-@@ -113,6 +119,7 @@ extern u32 inet_select_addr(const struc
- extern u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
- extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
- extern void inet_forward_change(void);
-+extern void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy);
-
- static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
- {
-@@ -180,6 +187,10 @@ static inline void in_dev_put(struct in_
- #define __in_dev_put(idev) atomic_dec(&(idev)->refcnt)
- #define in_dev_hold(idev) atomic_inc(&(idev)->refcnt)
-
-+struct ve_struct;
-+extern int devinet_sysctl_init(struct ve_struct *);
-+extern void devinet_sysctl_fini(struct ve_struct *);
-+extern void devinet_sysctl_free(struct ve_struct *);
- #endif /* __KERNEL__ */
-
- static __inline__ __u32 inet_make_mask(int logmask)
-diff -upr linux-2.6.16.orig/include/linux/jbd.h linux-2.6.16-026test009/include/linux/jbd.h
---- linux-2.6.16.orig/include/linux/jbd.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/jbd.h 2006-04-19 15:02:11.000000000 +0400
-@@ -245,10 +245,15 @@ typedef struct journal_superblock_s
- #define J_ASSERT(assert) \
- do { \
- if (!(assert)) { \
-+ unsigned long stack; \
- printk (KERN_EMERG \
- "Assertion failure in %s() at %s:%d: \"%s\"\n", \
- __FUNCTION__, __FILE__, __LINE__, # assert); \
-- BUG(); \
-+ printk("Stack=%p current=%p pid=%d ve=%d comm='%s'\n", \
-+ &stack, current, current->pid, \
-+ get_exec_env()->veid, \
-+ current->comm); \
-+ dump_stack(); \
- } \
- } while (0)
-
-diff -upr linux-2.6.16.orig/include/linux/kdev_t.h linux-2.6.16-026test009/include/linux/kdev_t.h
---- linux-2.6.16.orig/include/linux/kdev_t.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/kdev_t.h 2006-04-19 15:02:12.000000000 +0400
-@@ -87,6 +87,57 @@ static inline unsigned sysv_minor(u32 de
- return dev & 0x3ffff;
- }
-
-+#define UNNAMED_MAJOR_COUNT 16
-+
-+#if UNNAMED_MAJOR_COUNT > 1
-+
-+extern int unnamed_dev_majors[UNNAMED_MAJOR_COUNT];
-+
-+static inline dev_t make_unnamed_dev(int idx)
-+{
-+ /*
-+ * Here we transfer bits from 8 to 8+log2(UNNAMED_MAJOR_COUNT) of the
-+ * unnamed device index into major number.
-+ */
-+ return MKDEV(unnamed_dev_majors[(idx >> 8) & (UNNAMED_MAJOR_COUNT - 1)],
-+ idx & ~((UNNAMED_MAJOR_COUNT - 1) << 8));
-+}
-+
-+static inline int unnamed_dev_idx(dev_t dev)
-+{
-+ int i;
-+ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
-+ MAJOR(dev) != unnamed_dev_majors[i]; i++);
-+ return MINOR(dev) | (i << 8);
-+}
-+
-+static inline int is_unnamed_dev(dev_t dev)
-+{
-+ int i;
-+ for (i = 0; i < UNNAMED_MAJOR_COUNT &&
-+ MAJOR(dev) != unnamed_dev_majors[i]; i++);
-+ return i < UNNAMED_MAJOR_COUNT;
-+}
-+
-+#else /* UNNAMED_MAJOR_COUNT */
-+
-+static inline dev_t make_unnamed_dev(int idx)
-+{
-+ return MKDEV(0, idx);
-+}
-+
-+static inline int unnamed_dev_idx(dev_t dev)
-+{
-+ return MINOR(dev);
-+}
-+
-+static inline int is_unnamed_dev(dev_t dev)
-+{
-+ return MAJOR(dev) == 0;
-+}
-+
-+#endif /* UNNAMED_MAJOR_COUNT */
-+
-
- #else /* __KERNEL__ */
-
-diff -upr linux-2.6.16.orig/include/linux/kernel.h linux-2.6.16-026test009/include/linux/kernel.h
---- linux-2.6.16.orig/include/linux/kernel.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/kernel.h 2006-04-19 15:02:12.000000000 +0400
-@@ -132,6 +132,9 @@ asmlinkage int vprintk(const char *fmt,
- __attribute__ ((format (printf, 1, 0)));
- asmlinkage int printk(const char * fmt, ...)
- __attribute__ ((format (printf, 1, 2)));
-+asmlinkage int ve_printk(int, const char * fmt, ...)
-+ __attribute__ ((format (printf, 2, 3)));
-+void prepare_printk(void);
- #else
- static inline int vprintk(const char *s, va_list args)
- __attribute__ ((format (printf, 1, 0)));
-@@ -139,8 +142,16 @@ static inline int vprintk(const char *s,
- static inline int printk(const char *s, ...)
- __attribute__ ((format (printf, 1, 2)));
- static inline int printk(const char *s, ...) { return 0; }
-+static inline int ve_printk(int d, const char *s, ...)
-+ __attribute__ ((format (printf, 1, 2)));
-+static inline int printk(int d, const char *s, ...) { return 0; }
-+#define prepare_printk() do { } while (0)
- #endif
-
-+#define VE0_LOG 1
-+#define VE_LOG 2
-+#define VE_LOG_BOTH (VE0_LOG | VE_LOG)
-+
- unsigned long int_sqrt(unsigned long);
-
- static inline int __attribute_pure__ long_log2(unsigned long x)
-@@ -171,10 +182,13 @@ static inline void console_verbose(void)
- }
-
- extern void bust_spinlocks(int yes);
-+extern void wake_up_klogd(void);
- extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
- extern __deprecated_for_modules int panic_timeout;
- extern int panic_on_oops;
-+extern int decode_call_traces;
- extern int tainted;
-+extern int kernel_text_csum_broken;
- extern const char *print_tainted(void);
- extern void add_taint(unsigned);
-
-diff -upr linux-2.6.16.orig/include/linux/kmem_cache.h linux-2.6.16-026test009/include/linux/kmem_cache.h
---- linux-2.6.16.orig/include/linux/kmem_cache.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/kmem_cache.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,199 @@
-+#ifndef __KMEM_CACHE_H__
-+#define __KMEM_CACHE_H__
-+#include <linux/threads.h>
-+#include <linux/smp.h>
-+#include <linux/spinlock.h>
-+#include <linux/list.h>
-+#include <linux/mm.h>
-+#include <asm/atomic.h>
-+
-+/*
-+ * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
-+ * SLAB_RED_ZONE & SLAB_POISON.
-+ * 0 for faster, smaller code (especially in the critical paths).
-+ *
-+ * STATS - 1 to collect stats for /proc/slabinfo.
-+ * 0 for faster, smaller code (especially in the critical paths).
-+ *
-+ * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
-+ */
-+
-+#ifdef CONFIG_DEBUG_SLAB
-+#define SLAB_DEBUG 1
-+#define SLAB_STATS 1
-+#define SLAB_FORCED_DEBUG 1
-+#else
-+#define SLAB_DEBUG 0
-+#define SLAB_STATS 0
-+#define SLAB_FORCED_DEBUG 0
-+#endif
-+
-+/*
-+ * struct array_cache
-+ *
-+ * Purpose:
-+ * - LIFO ordering, to hand out cache-warm objects from _alloc
-+ * - reduce the number of linked list operations
-+ * - reduce spinlock operations
-+ *
-+ * The limit is stored in the per-cpu structure to reduce the data cache
-+ * footprint.
-+ *
-+ */
-+struct array_cache {
-+ unsigned int avail;
-+ unsigned int limit;
-+ unsigned int batchcount;
-+ unsigned int touched;
-+ spinlock_t lock;
-+ void *entry[0]; /*
-+ * Must have this definition in here for the proper
-+ * alignment of array_cache. Also simplifies accessing
-+ * the entries.
-+ * [0] is for gcc 2.95. It should really be [].
-+ */
-+};
-+
-+/* bootstrap: The caches do not work without cpuarrays anymore,
-+ * but the cpuarrays are allocated from the generic caches...
-+ */
-+#define BOOT_CPUCACHE_ENTRIES 1
-+struct arraycache_init {
-+ struct array_cache cache;
-+ void *entries[BOOT_CPUCACHE_ENTRIES];
-+};
-+
-+/*
-+ * The slab lists for all objects.
-+ */
-+struct kmem_list3 {
-+ struct list_head slabs_partial; /* partial list first, better asm code */
-+ struct list_head slabs_full;
-+ struct list_head slabs_free;
-+ unsigned long free_objects;
-+ unsigned long next_reap;
-+ int free_touched;
-+ unsigned int free_limit;
-+ unsigned int colour_next; /* Per-node cache coloring */
-+ spinlock_t list_lock;
-+ struct array_cache *shared; /* shared per node */
-+ struct array_cache **alien; /* on other nodes */
-+};
-+
-+/*
-+ * struct kmem_cache
-+ *
-+ * manages a cache.
-+ */
-+
-+struct kmem_cache {
-+/* 1) per-cpu data, touched during every alloc/free */
-+ struct array_cache *array[NR_CPUS];
-+ unsigned int batchcount;
-+ unsigned int limit;
-+ unsigned int shared;
-+ unsigned int buffer_size;
-+/* 2) touched by every alloc & free from the backend */
-+ struct kmem_list3 *nodelists[MAX_NUMNODES];
-+ unsigned int flags; /* constant flags */
-+ unsigned int num; /* # of objs per slab */
-+ spinlock_t spinlock;
-+
-+/* 3) cache_grow/shrink */
-+ /* order of pgs per slab (2^n) */
-+ unsigned int gfporder;
-+
-+ /* force GFP flags, e.g. GFP_DMA */
-+ gfp_t gfpflags;
-+
-+ size_t colour; /* cache colouring range */
-+ unsigned int colour_off; /* colour offset */
-+ struct kmem_cache *slabp_cache;
-+ unsigned int slab_size;
-+ unsigned int dflags; /* dynamic flags */
-+
-+ /* constructor func */
-+ void (*ctor) (void *, struct kmem_cache *, unsigned long);
-+
-+ /* de-constructor func */
-+ void (*dtor) (void *, struct kmem_cache *, unsigned long);
-+
-+/* 4) cache creation/removal */
-+ const char *name;
-+ struct list_head next;
-+
-+/* 5) statistics */
-+#if SLAB_STATS
-+ unsigned long num_active;
-+ unsigned long num_allocations;
-+ unsigned long high_mark;
-+ unsigned long grown;
-+ unsigned long reaped;
-+ unsigned long errors;
-+ unsigned long max_freeable;
-+ unsigned long node_allocs;
-+ unsigned long node_frees;
-+ atomic_t allochit;
-+ atomic_t allocmiss;
-+ atomic_t freehit;
-+ atomic_t freemiss;
-+#endif
-+#if SLAB_DEBUG
-+ /*
-+ * If debugging is enabled, then the allocator can add additional
-+ * fields and/or padding to every object. buffer_size contains the total
-+ * object size including these internal fields, the following two
-+ * variables contain the offset to the user object and its size.
-+ */
-+ int obj_offset;
-+ int obj_size;
-+#endif
-+#ifdef CONFIG_USER_RESOURCE
-+ unsigned int objuse;
-+#endif
-+};
-+
-+#define CFLGS_OFF_SLAB (0x80000000UL)
-+#define CFLGS_ENVIDS (0x04000000UL)
-+#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
-+#define ENVIDS(x) ((x)->flags & CFLGS_ENVIDS)
-+#define kmem_mark_nocharge(c) do { (c)->flags |= SLAB_NO_CHARGE; } while (0)
-+
-+struct slab;
-+/* Functions for storing/retrieving the cachep and or slab from the
-+ * global 'mem_map'. These are used to find the slab an obj belongs to.
-+ * With kfree(), these are used to find the cache which an obj belongs to.
-+ */
-+static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
-+{
-+ page->lru.next = (struct list_head *)cache;
-+}
-+
-+static inline struct kmem_cache *page_get_cache(struct page *page)
-+{
-+ return (struct kmem_cache *)page->lru.next;
-+}
-+
-+static inline void page_set_slab(struct page *page, struct slab *slab)
-+{
-+ page->lru.prev = (struct list_head *)slab;
-+}
-+
-+static inline struct slab *page_get_slab(struct page *page)
-+{
-+ return (struct slab *)page->lru.prev;
-+}
-+
-+static inline struct kmem_cache *virt_to_cache(const void *obj)
-+{
-+ struct page *page = virt_to_page(obj);
-+ return page_get_cache(page);
-+}
-+
-+static inline struct slab *virt_to_slab(const void *obj)
-+{
-+ struct page *page = virt_to_page(obj);
-+ return page_get_slab(page);
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/kmem_slab.h linux-2.6.16-026test009/include/linux/kmem_slab.h
---- linux-2.6.16.orig/include/linux/kmem_slab.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/kmem_slab.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,71 @@
-+#ifndef __KMEM_SLAB_H__
-+#define __KMEM_SLAB_H__
-+
-+/*
-+ * kmem_bufctl_t:
-+ *
-+ * Bufctl's are used for linking objs within a slab
-+ * linked offsets.
-+ *
-+ * This implementation relies on "struct page" for locating the cache &
-+ * slab an object belongs to.
-+ * This allows the bufctl structure to be small (one int), but limits
-+ * the number of objects a slab (not a cache) can contain when off-slab
-+ * bufctls are used. The limit is the size of the largest general cache
-+ * that does not use off-slab slabs.
-+ * For 32bit archs with 4 kB pages, is this 56.
-+ * This is not serious, as it is only for large objects, when it is unwise
-+ * to have too many per slab.
-+ * Note: This limit can be raised by introducing a general cache whose size
-+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
-+ */
-+
-+typedef unsigned int kmem_bufctl_t;
-+#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
-+#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
-+#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
-+
-+/*
-+ * struct slab
-+ *
-+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
-+ * for a slab, or allocated from an general cache.
-+ * Slabs are chained into three list: fully used, partial, fully free slabs.
-+ */
-+struct slab {
-+ struct list_head list;
-+ unsigned long colouroff;
-+ void *s_mem; /* including colour offset */
-+ unsigned int inuse; /* num of objs active in slab */
-+ kmem_bufctl_t free;
-+ unsigned short nodeid;
-+};
-+
-+/*
-+ * struct slab_rcu
-+ *
-+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
-+ * arrange for kmem_freepages to be called via RCU. This is useful if
-+ * we need to approach a kernel structure obliquely, from its address
-+ * obtained without the usual locking. We can lock the structure to
-+ * stabilize it and check it's still at the given address, only if we
-+ * can be sure that the memory has not been meanwhile reused for some
-+ * other kind of object (which our subsystem's lock might corrupt).
-+ *
-+ * rcu_read_lock before reading the address, then rcu_read_unlock after
-+ * taking the spinlock within the structure expected at that address.
-+ *
-+ * We assume struct slab_rcu can overlay struct slab when destroying.
-+ */
-+struct slab_rcu {
-+ struct rcu_head head;
-+ struct kmem_cache *cachep;
-+ void *addr;
-+};
-+
-+static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
-+{
-+ return (kmem_bufctl_t *) (slabp + 1);
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/list.h linux-2.6.16-026test009/include/linux/list.h
---- linux-2.6.16.orig/include/linux/list.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/list.h 2006-04-19 15:02:12.000000000 +0400
-@@ -325,6 +325,9 @@ static inline void list_splice_init(stru
- #define list_entry(ptr, type, member) \
- container_of(ptr, type, member)
-
-+#define list_first_entry(ptr, type, member) \
-+ container_of((ptr)->next, type, member)
-+
- /**
- * list_for_each - iterate over a list
- * @pos: the &struct list_head to use as a loop counter.
-@@ -411,6 +414,20 @@ static inline void list_splice_init(stru
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
- /**
-+ * list_for_each_entry_continue_reverse - iterate backwards over list of given
-+ * type continuing after existing point
-+ * @pos: the type * to use as a loop counter.
-+ * @head: the head for your list.
-+ * @member: the name of the list_struct within the struct.
-+ */
-+#define list_for_each_entry_continue_reverse(pos, head, member) \
-+ for (pos = list_entry(pos->member.prev, typeof(*pos), member), \
-+ prefetch(pos->member.prev); \
-+ &pos->member != (head); \
-+ pos = list_entry(pos->member.prev, typeof(*pos), member), \
-+ prefetch(pos->member.prev))
-+
-+/**
- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @pos: the type * to use as a loop counter.
- * @n: another type * to use as temporary storage
-diff -upr linux-2.6.16.orig/include/linux/major.h linux-2.6.16-026test009/include/linux/major.h
---- linux-2.6.16.orig/include/linux/major.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/major.h 2006-04-19 15:02:12.000000000 +0400
-@@ -165,4 +165,7 @@
-
- #define VIOTAPE_MAJOR 230
-
-+#define UNNAMED_EXTRA_MAJOR 130
-+#define UNNAMED_EXTRA_MAJOR_COUNT 120
-+
- #endif
-diff -upr linux-2.6.16.orig/include/linux/mm.h linux-2.6.16-026test009/include/linux/mm.h
---- linux-2.6.16.orig/include/linux/mm.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/mm.h 2006-04-19 15:02:12.000000000 +0400
-@@ -41,6 +41,27 @@ extern int sysctl_legacy_va_layout;
-
- #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
-
-+#include <linux/mm_counter.h>
-+
-+#ifdef CONFIG_USER_RESOURCE
-+#define set_vma_rss(vma, v) set_mm_counter(vma, vm_rss, v)
-+#define get_vma_rss(vma) get_mm_counter(vma, vm_rss)
-+#define inc_vma_rss(vma) inc_mm_counter(vma, vm_rss)
-+#define dec_vma_rss(vma) dec_mm_counter(vma, vm_rss)
-+#define add_vma_rss(vma, v) add_mm_counter(vma, vm_rss, v)
-+#define sub_vma_rss(vma, v) do { \
-+ if (unlikely(dec_mm_counter_chk(vma, vm_rss, v))) \
-+ warn_bad_rss(vma, v); \
-+ } while (0)
-+#else
-+#define set_vma_rss(vma, v) do { } while (0)
-+#define get_vma_rss(vma) (0)
-+#define inc_vma_rss(vma) do { } while (0)
-+#define dec_vma_rss(vma) do { } while (0)
-+#define add_vma_rss(vma, v) do { } while (0)
-+#define sub_vma_rss(vma, v) do { } while (0)
-+#endif
-+
- /*
- * Linux kernel virtual memory manager primitives.
- * The idea being to have a "virtual" mm in the same way
-@@ -111,6 +132,9 @@ struct vm_area_struct {
- #ifdef CONFIG_NUMA
- struct mempolicy *vm_policy; /* NUMA policy for the VMA */
- #endif
-+#ifdef CONFIG_USER_RESOURCE
-+ mm_counter_t _vm_rss;
-+#endif
- };
-
- /*
-@@ -229,10 +253,9 @@ struct page {
- unsigned long private; /* Mapping-private opaque data:
- * usually used for buffer_heads
- * if PagePrivate set; used for
-- * swp_entry_t if PageSwapCache.
-- * When page is free, this
-+ * swp_entry_t if PageSwapCache;
- * indicates order in the buddy
-- * system.
-+ * system if PG_buddy is set.
- */
- struct address_space *mapping; /* If low bit clear, points to
- * inode address_space, or NULL.
-@@ -264,6 +287,12 @@ struct page {
- void *virtual; /* Kernel virtual address (NULL if
- not kmapped, ie. highmem) */
- #endif /* WANT_PAGE_VIRTUAL */
-+#ifdef CONFIG_USER_RESOURCE
-+ union {
-+ struct user_beancounter *page_ub;
-+ struct page_beancounter *page_pb;
-+ } bc;
-+#endif
- };
-
- #define page_private(page) ((page)->private)
-@@ -636,16 +665,9 @@ struct page *shmem_nopage(struct vm_area
- int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
- struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
- unsigned long addr);
--int shmem_lock(struct file *file, int lock, struct user_struct *user);
- #else
- #define shmem_nopage filemap_nopage
-
--static inline int shmem_lock(struct file *file, int lock,
-- struct user_struct *user)
--{
-- return 0;
--}
--
- static inline int shmem_set_policy(struct vm_area_struct *vma,
- struct mempolicy *new)
- {
-@@ -706,7 +728,9 @@ void free_pgd_range(struct mmu_gather **
- void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
- unsigned long floor, unsigned long ceiling);
- int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-- struct vm_area_struct *vma);
-+ struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
-+ unsigned long addr, size_t size);
- int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
- unsigned long size, pgprot_t prot);
- void unmap_mapping_range(struct address_space *mapping,
-diff -upr linux-2.6.16.orig/include/linux/mm_counter.h linux-2.6.16-026test009/include/linux/mm_counter.h
---- linux-2.6.16.orig/include/linux/mm_counter.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/mm_counter.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,32 @@
-+#ifndef __MM_COUNTER_H_
-+#define __MM_COUNTER_H_
-+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-+/*
-+ * The mm counters are not protected by its page_table_lock,
-+ * so must be incremented atomically.
-+ */
-+#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
-+#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
-+#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
-+#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
-+#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-+#define dec_mm_counter_chk(mm, member, value) \
-+ atomic_long_add_negative(-(value), &(mm)->_##member)
-+typedef atomic_long_t mm_counter_t;
-+
-+#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-+/*
-+ * The mm counters are protected by its page_table_lock,
-+ * so can be incremented directly.
-+ */
-+#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
-+#define get_mm_counter(mm, member) ((mm)->_##member)
-+#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
-+#define inc_mm_counter(mm, member) (mm)->_##member++
-+#define dec_mm_counter(mm, member) (mm)->_##member--
-+#define dec_mm_counter_chk(mm, member, value) \
-+ (((mm)->_##member -= (value)) < 0)
-+typedef unsigned long mm_counter_t;
-+
-+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/msg.h linux-2.6.16-026test009/include/linux/msg.h
---- linux-2.6.16.orig/include/linux/msg.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/msg.h 2006-04-19 15:02:12.000000000 +0400
-@@ -92,6 +92,8 @@ struct msg_queue {
- struct list_head q_senders;
- };
-
-+int sysvipc_walk_msg(int (*func)(int, struct msg_queue*, void *), void *arg);
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_MSG_H */
-diff -upr linux-2.6.16.orig/include/linux/namei.h linux-2.6.16-026test009/include/linux/namei.h
---- linux-2.6.16.orig/include/linux/namei.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/namei.h 2006-04-19 15:02:12.000000000 +0400
-@@ -48,12 +48,15 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
- #define LOOKUP_PARENT 16
- #define LOOKUP_NOALT 32
- #define LOOKUP_REVAL 64
-+#define LOOKUP_STRICT 128 /* no symlinks or other filesystems */
-+
- /*
- * Intent data
- */
- #define LOOKUP_OPEN (0x0100)
- #define LOOKUP_CREATE (0x0200)
- #define LOOKUP_ACCESS (0x0400)
-+#define LOOKUP_NOAREACHECK (0x0800) /* no area check on lookup */
-
- extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
- extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
-diff -upr linux-2.6.16.orig/include/linux/namespace.h linux-2.6.16-026test009/include/linux/namespace.h
---- linux-2.6.16.orig/include/linux/namespace.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/namespace.h 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,8 @@ struct namespace {
- int event;
- };
-
-+extern struct rw_semaphore namespace_sem;
-+
- extern int copy_namespace(int, struct task_struct *);
- extern void __put_namespace(struct namespace *namespace);
- extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
-diff -upr linux-2.6.16.orig/include/linux/netdevice.h linux-2.6.16-026test009/include/linux/netdevice.h
---- linux-2.6.16.orig/include/linux/netdevice.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netdevice.h 2006-04-19 15:02:12.000000000 +0400
-@@ -37,6 +37,7 @@
- #include <linux/config.h>
- #include <linux/device.h>
- #include <linux/percpu.h>
-+#include <linux/ctype.h>
-
- struct divert_blk;
- struct vlan_group;
-@@ -233,6 +234,11 @@ enum netdev_state_t
- __LINK_STATE_LINKWATCH_PENDING
- };
-
-+struct netdev_bc {
-+ struct user_beancounter *exec_ub, *owner_ub;
-+};
-+
-+#define netdev_bc(dev) (&(dev)->dev_bc)
-
- /*
- * This structure holds at boot time configured netdevice settings. They
-@@ -309,6 +315,8 @@ struct net_device
- #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
- #define NETIF_F_LLTX 4096 /* LockLess TX */
- #define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
-+#define NETIF_F_VIRTUAL 0x40000000 /* can be registered in ve */
-+#define NETIF_F_VENET 0x80000000 /* Device is VENET device */
-
- struct net_device *next_sched;
-
-@@ -431,6 +439,7 @@ struct net_device
- enum { NETREG_UNINITIALIZED=0,
- NETREG_REGISTERING, /* called register_netdevice */
- NETREG_REGISTERED, /* completed register todo */
-+ NETREG_REGISTER_ERR, /* register todo failed */
- NETREG_UNREGISTERING, /* called unregister_netdevice */
- NETREG_UNREGISTERED, /* completed unregister todo */
- NETREG_RELEASED, /* called free_netdev */
-@@ -500,8 +509,18 @@ struct net_device
- struct divert_blk *divert;
- #endif /* CONFIG_NET_DIVERT */
-
-+ unsigned orig_mtu; /* MTU value before move to VE */
-+ struct ve_struct *owner_env; /* Owner VE of the interface */
-+ struct netdev_bc dev_bc;
-+
- /* class/net/name entry */
- struct class_device class_dev;
-+
-+#ifdef CONFIG_VE
-+ /* List entry in global devices list to keep track of their names
-+ * assignment */
-+ struct list_head dev_global_list_entry;
-+#endif
- };
-
- #define NETDEV_ALIGN 32
-@@ -535,9 +554,23 @@ struct packet_type {
- #include <linux/notifier.h>
-
- extern struct net_device loopback_dev; /* The loopback */
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define loopback_dev (*get_exec_env()->_loopback_dev)
-+#define ve0_loopback (*get_ve0()->_loopback_dev)
-+#define dev_base (get_exec_env()->_net_dev_base)
-+#define visible_dev_head(x) (&(x)->_net_dev_head)
-+#define visible_dev_index_head(x) (&(x)->_net_dev_index_head)
-+#else
- extern struct net_device *dev_base; /* All devices */
-+#define ve0_loopback loopback_dev
-+#define visible_dev_head(x) NULL
-+#define visible_dev_index_head(x) NULL
-+#endif
- extern rwlock_t dev_base_lock; /* Device list lock */
-
-+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env);
-+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env);
-+
- extern int netdev_boot_setup_check(struct net_device *dev);
- extern unsigned long netdev_boot_base(const char *prefix, int unit);
- extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
-@@ -554,6 +587,7 @@ extern int dev_alloc_name(struct net_de
- extern int dev_open(struct net_device *dev);
- extern int dev_close(struct net_device *dev);
- extern int dev_queue_xmit(struct sk_buff *skb);
-+extern int dev_set_mtu(struct net_device *dev, int new_mtu);
- extern int register_netdevice(struct net_device *dev);
- extern int unregister_netdevice(struct net_device *dev);
- extern void free_netdev(struct net_device *dev);
-@@ -951,6 +985,18 @@ extern void dev_seq_stop(struct seq_file
-
- extern void linkwatch_run_queue(void);
-
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+static inline int ve_is_dev_movable(struct net_device *dev)
-+{
-+ return !(dev->features & NETIF_F_VIRTUAL);
-+}
-+#else
-+static inline int ve_is_dev_movable(struct net_device *dev)
-+{
-+ return 0;
-+}
-+#endif
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_DEV_H */
-diff -upr linux-2.6.16.orig/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.16-026test009/include/linux/netfilter/nf_conntrack_ftp.h
---- linux-2.6.16.orig/include/linux/netfilter/nf_conntrack_ftp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/nf_conntrack_ftp.h 2006-04-19 15:02:12.000000000 +0400
-@@ -32,13 +32,22 @@ struct ip_conntrack_expect;
-
- /* For NAT to hook in when we find a packet which describes what other
- * connection we should expect. */
--extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-+typedef unsigned int (*ip_nat_helper_ftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq);
-+extern ip_nat_helper_ftp_hook ip_nat_ftp_hook;
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ip_nat_ftp_hook \
-+ ((ip_nat_helper_ftp_hook) \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook))
-+#else
-+#define ve_ip_nat_ftp_hook ip_nat_ftp_hook
-+#endif
- #endif /* __KERNEL__ */
-
- #endif /* _NF_CONNTRACK_FTP_H */
-diff -upr linux-2.6.16.orig/include/linux/netfilter/x_tables.h linux-2.6.16-026test009/include/linux/netfilter/x_tables.h
---- linux-2.6.16.orig/include/linux/netfilter/x_tables.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/x_tables.h 2006-04-19 15:02:12.000000000 +0400
-@@ -80,12 +80,19 @@ struct xt_counters_info
-
- #ifdef __KERNEL__
-
-+#include <linux/config.h>
- #include <linux/netdevice.h>
-
- #define ASSERT_READ_LOCK(x)
- #define ASSERT_WRITE_LOCK(x)
- #include <linux/netfilter_ipv4/listhelp.h>
-
-+#ifdef CONFIG_COMPAT
-+#define COMPAT_TO_USER 1
-+#define COMPAT_FROM_USER -1
-+#define COMPAT_CALC_SIZE 0
-+#endif
-+
- struct xt_match
- {
- struct list_head list;
-@@ -118,6 +125,10 @@ struct xt_match
- /* Called when entry of this type deleted. */
- void (*destroy)(void *matchinfo, unsigned int matchinfosize);
-
-+#ifdef CONFIG_COMPAT
-+ /* Called when userspace align differs from kernel space one */
-+ int (*compat)(void *match, void **dstptr, int *size, int convert);
-+#endif
- /* Set this to THIS_MODULE if you are a module, otherwise NULL */
- struct module *me;
- };
-@@ -154,6 +165,10 @@ struct xt_target
- /* Called when entry of this type deleted. */
- void (*destroy)(void *targinfo, unsigned int targinfosize);
-
-+#ifdef CONFIG_COMPAT
-+ /* Called when userspace align differs from kernel space one */
-+ int (*compat)(void *target, void **dstptr, int *size, int convert);
-+#endif
- /* Set this to THIS_MODULE if you are a module, otherwise NULL */
- struct module *me;
- };
-@@ -211,6 +226,10 @@ extern int xt_register_table(struct xt_t
- struct xt_table_info *bootstrap,
- struct xt_table_info *newinfo);
- extern void *xt_unregister_table(struct xt_table *table);
-+extern struct xt_table *virt_xt_register_table(struct xt_table *table,
-+ struct xt_table_info *bootstrap,
-+ struct xt_table_info *newinfo);
-+extern void *virt_xt_unregister_table(struct xt_table *table);
-
- extern struct xt_table_info *xt_replace_table(struct xt_table *table,
- unsigned int num_counters,
-@@ -233,6 +252,34 @@ extern void xt_proto_fini(int af);
- extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
- extern void xt_free_table_info(struct xt_table_info *info);
-
-+#ifdef CONFIG_COMPAT
-+#include <net/compat.h>
-+
-+/* FIXME: this works only on 32 bit tasks
-+ * need to change whole approach in order to calculate align as function of
-+ * current task alignment */
-+
-+struct compat_xt_counters
-+{
-+ u_int32_t cnt[4];
-+};
-+
-+struct compat_xt_counters_info
-+{
-+ char name[XT_TABLE_MAXNAMELEN];
-+ compat_uint_t num_counters;
-+ struct compat_xt_counters counters[0];
-+};
-+
-+#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
-+ & ~(__alignof__(struct compat_xt_counters)-1))
-+
-+extern int ipt_match_align_compat(void *match, void **dstptr,
-+ int *size, int off, int convert);
-+extern int ipt_target_align_compat(void *target, void **dstptr,
-+ int *size, int off, int convert);
-+
-+#endif /* CONFIG_COMPAT */
- #endif /* __KERNEL__ */
-
- #endif /* _X_TABLES_H */
-diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_conntrack.h linux-2.6.16-026test009/include/linux/netfilter/xt_conntrack.h
---- linux-2.6.16.orig/include/linux/netfilter/xt_conntrack.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/xt_conntrack.h 2006-04-19 15:02:11.000000000 +0400
-@@ -5,6 +5,7 @@
- #ifndef _XT_CONNTRACK_H
- #define _XT_CONNTRACK_H
-
-+#include <linux/config.h>
- #include <linux/netfilter/nf_conntrack_tuple_common.h>
- #include <linux/in.h>
-
-@@ -60,4 +61,21 @@ struct xt_conntrack_info
- /* Inverse flags */
- u_int8_t invflags;
- };
-+
-+#ifdef CONFIG_COMPAT
-+struct compat_xt_conntrack_info
-+{
-+ compat_uint_t statemask, statusmask;
-+
-+ struct ip_conntrack_tuple tuple[IP_CT_DIR_MAX];
-+ struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
-+
-+ compat_ulong_t expires_min, expires_max;
-+
-+ /* Flags word */
-+ u_int8_t flags;
-+ /* Inverse flags */
-+ u_int8_t invflags;
-+};
-+#endif
- #endif /*_XT_CONNTRACK_H*/
-diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_helper.h linux-2.6.16-026test009/include/linux/netfilter/xt_helper.h
---- linux-2.6.16.orig/include/linux/netfilter/xt_helper.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/xt_helper.h 2006-04-19 15:02:11.000000000 +0400
-@@ -1,8 +1,17 @@
- #ifndef _XT_HELPER_H
- #define _XT_HELPER_H
-
-+#include <linux/config.h>
-+
- struct xt_helper_info {
- int invert;
- char name[30];
- };
-+
-+#ifdef CONFIG_COMPAT
-+struct compat_xt_helper_info {
-+ compat_int_t invert;
-+ char name[30];
-+};
-+#endif
- #endif /* _XT_HELPER_H */
-diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_limit.h linux-2.6.16-026test009/include/linux/netfilter/xt_limit.h
---- linux-2.6.16.orig/include/linux/netfilter/xt_limit.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/xt_limit.h 2006-04-19 15:02:11.000000000 +0400
-@@ -1,6 +1,8 @@
- #ifndef _XT_RATE_H
- #define _XT_RATE_H
-
-+#include <linux/config.h>
-+
- /* timings are in milliseconds. */
- #define XT_LIMIT_SCALE 10000
-
-@@ -18,4 +20,19 @@ struct xt_rateinfo {
- /* Ugly, ugly fucker. */
- struct xt_rateinfo *master;
- };
-+
-+#ifdef CONFIG_COMPAT
-+struct compat_xt_rateinfo {
-+ u_int32_t avg; /* Average secs between packets * scale */
-+ u_int32_t burst; /* Period multiplier for upper limit. */
-+
-+ /* Used internally by the kernel */
-+ compat_ulong_t prev;
-+ u_int32_t credit;
-+ u_int32_t credit_cap, cost;
-+
-+ /* Ugly, ugly fucker. */
-+ compat_uptr_t master;
-+};
-+#endif
- #endif /*_XT_RATE_H*/
-diff -upr linux-2.6.16.orig/include/linux/netfilter/xt_state.h linux-2.6.16-026test009/include/linux/netfilter/xt_state.h
---- linux-2.6.16.orig/include/linux/netfilter/xt_state.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter/xt_state.h 2006-04-19 15:02:11.000000000 +0400
-@@ -1,6 +1,8 @@
- #ifndef _XT_STATE_H
- #define _XT_STATE_H
-
-+#include <linux/config.h>
-+
- #define XT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
- #define XT_STATE_INVALID (1 << 0)
-
-@@ -10,4 +12,11 @@ struct xt_state_info
- {
- unsigned int statemask;
- };
-+
-+#ifdef CONFIG_COMPAT
-+struct compat_xt_state_info
-+{
-+ compat_uint_t statemask;
-+};
-+#endif
- #endif /*_XT_STATE_H*/
-diff -upr linux-2.6.16.orig/include/linux/netfilter.h linux-2.6.16-026test009/include/linux/netfilter.h
---- linux-2.6.16.orig/include/linux/netfilter.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter.h 2006-04-19 15:02:12.000000000 +0400
-@@ -107,12 +107,21 @@ struct nf_info
- int nf_register_hook(struct nf_hook_ops *reg);
- void nf_unregister_hook(struct nf_hook_ops *reg);
-
-+int virt_nf_register_hook(struct nf_hook_ops *reg);
-+int virt_nf_unregister_hook(struct nf_hook_ops *reg);
-+
- /* Functions to register get/setsockopt ranges (non-inclusive). You
- need to check permissions yourself! */
- int nf_register_sockopt(struct nf_sockopt_ops *reg);
- void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
-
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_nf_hooks \
-+ ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
-+#else
- extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
-+#define ve_nf_hooks nf_hooks
-+#endif
-
- /* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
- * disappear once iptables is replaced with pkttables. Please DO NOT use them
-@@ -190,7 +199,7 @@ static inline int nf_hook_thresh(int pf,
- if (!cond)
- return 1;
- #ifndef CONFIG_NETFILTER_DEBUG
-- if (list_empty(&nf_hooks[pf][hook]))
-+ if (list_empty(&ve_nf_hooks[pf][hook]))
- return 1;
- #endif
- return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack.h 2006-04-19 15:02:12.000000000 +0400
-@@ -71,6 +71,11 @@ do { \
-
- struct ip_conntrack_helper;
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/ve.h>
-+#include <linux/ve_owner.h>
-+#endif
-+
- struct ip_conntrack
- {
- /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
-@@ -122,8 +127,15 @@ struct ip_conntrack
- /* Traversed often, so hopefully in different cacheline to top */
- /* These are my tuples; original and reply */
- struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
-+#ifdef CONFIG_VE_IPTABLES
-+ struct ve_struct *ct_owner_env;
-+#endif
- };
-
-+#ifdef CONFIG_VE_IPTABLES
-+DCL_VE_OWNER_PROTO(CT, struct ip_conntrack, ct_owner_env)
-+#endif
-+
- struct ip_conntrack_expect
- {
- /* Internal linked list (global expectation list) */
-@@ -232,7 +244,15 @@ extern void ip_conntrack_tcp_update(stru
- enum ip_conntrack_dir dir);
-
- /* Call me when a conntrack is destroyed. */
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ip_conntrack_destroyed \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_destroyed)
-+#else
- extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
-+#define ve_ip_conntrack_destroyed ip_conntrack_destroyed
-+#endif
-+
-
- /* Fake conntrack entry for untracked connections */
- extern struct ip_conntrack ip_conntrack_untracked;
-@@ -261,7 +281,7 @@ extern void ip_conntrack_proto_put(struc
- extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
-
- extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
-- struct ip_conntrack_tuple *);
-+ struct ip_conntrack_tuple *, struct user_beancounter *);
-
- extern void ip_conntrack_free(struct ip_conntrack *ct);
-
-@@ -270,6 +290,8 @@ extern void ip_conntrack_hash_insert(str
- extern struct ip_conntrack_expect *
- __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
-
-+extern void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp);
-+
- extern struct ip_conntrack_expect *
- ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
-
-@@ -291,6 +313,7 @@ static inline int is_dying(struct ip_con
- }
-
- extern unsigned int ip_conntrack_htable_size;
-+extern int ip_conntrack_disable_ve0;
-
- #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
-
-@@ -341,6 +364,9 @@ ip_conntrack_event_cache(enum ip_conntra
- struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
- struct ip_conntrack_ecache *ecache;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return;
-+
- local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- if (ct != ecache->ct)
-@@ -352,7 +378,7 @@ ip_conntrack_event_cache(enum ip_conntra
- static inline void ip_conntrack_event(enum ip_conntrack_events event,
- struct ip_conntrack *ct)
- {
-- if (is_confirmed(ct) && !is_dying(ct))
-+ if (is_confirmed(ct) && !is_dying(ct) && ve_is_super(get_exec_env()))
- notifier_call_chain(&ip_conntrack_chain, event, ct);
- }
-
-@@ -360,7 +386,8 @@ static inline void
- ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
- struct ip_conntrack_expect *exp)
- {
-- notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
-+ if (ve_is_super(get_exec_env()))
-+ notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
- }
- #else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
- static inline void ip_conntrack_event_cache(enum ip_conntrack_events event,
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_core.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_core.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_core.h 2006-04-19 15:02:12.000000000 +0400
-@@ -3,7 +3,6 @@
- #include <linux/netfilter.h>
-
- #define MAX_IP_CT_PROTO 256
--extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
-
- /* This header is used to share core functionality between the
- standalone connection tracking module, and the compatibility layer's use
-@@ -54,8 +53,26 @@ static inline int ip_conntrack_confirm(s
-
- extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ip_ct_protos \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_protos)
-+#define ve_ip_conntrack_hash \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_hash)
-+#define ve_ip_conntrack_expect_list \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_expect_list)
-+#define ve_ip_conntrack_vmalloc \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_vmalloc)
-+#else
-+extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
- extern struct list_head *ip_conntrack_hash;
- extern struct list_head ip_conntrack_expect_list;
-+#define ve_ip_ct_protos ip_ct_protos
-+#define ve_ip_conntrack_hash ip_conntrack_hash
-+#define ve_ip_conntrack_expect_list ip_conntrack_expect_list
-+#define ve_ip_conntrack_vmalloc ip_conntrack_vmalloc
-+#endif /* CONFIG_VE_IPTABLES */
-+
- extern rwlock_t ip_conntrack_lock;
- #endif /* _IP_CONNTRACK_CORE_H */
-
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_helper.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2006-04-19 15:02:12.000000000 +0400
-@@ -31,6 +31,9 @@ struct ip_conntrack_helper
- extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
- extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
-
-+extern int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *);
-+extern void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
-+
- /* Allocate space for an expectation: this is mandatory before calling
- ip_conntrack_expect_related. You will have to call put afterwards. */
- extern struct ip_conntrack_expect *
-@@ -41,4 +44,5 @@ extern void ip_conntrack_expect_put(stru
- extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
- extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
-
-+extern struct list_head helpers;
- #endif /*_IP_CONNTRACK_HELPER_H*/
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_irc.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_irc.h 2006-04-19 15:02:12.000000000 +0400
-@@ -14,16 +14,26 @@
- #ifndef _IP_CONNTRACK_IRC_H
- #define _IP_CONNTRACK_IRC_H
-
-+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-+
- /* This structure exists only once per master */
- struct ip_ct_irc_master {
- };
-
- #ifdef __KERNEL__
--extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-- enum ip_conntrack_info ctinfo,
-- unsigned int matchoff,
-- unsigned int matchlen,
-- struct ip_conntrack_expect *exp);
-+typedef unsigned int (*ip_nat_helper_irc_hook)(struct sk_buff **,
-+ enum ip_conntrack_info, unsigned int, unsigned int,
-+ struct ip_conntrack_expect *);
-+
-+extern ip_nat_helper_irc_hook ip_nat_irc_hook;
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ip_nat_irc_hook \
-+ ((ip_nat_helper_irc_hook) \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_irc_hook))
-+#else
-+#define ve_ip_nat_irc_hook ip_nat_irc_hook
-+#endif
-
- #define IRC_PORT 6667
-
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_conntrack_protocol.h 2006-04-19 15:02:12.000000000 +0400
-@@ -67,6 +67,7 @@ struct ip_conntrack_protocol
- /* Protocol registration. */
- extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
- extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
-+
- /* Existing built-in protocols */
- extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
- extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
-@@ -74,6 +75,41 @@ extern struct ip_conntrack_protocol ip_c
- extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
- extern int ip_conntrack_protocol_tcp_init(void);
-
-+#if defined(CONFIG_VE_IPTABLES) && defined(CONFIG_SYSCTL)
-+#include <linux/sched.h>
-+#define ve_ip_ct_tcp_timeouts \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeouts)
-+#define ve_ip_ct_udp_timeout \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout)
-+#define ve_ip_ct_udp_timeout_stream \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_udp_timeout_stream)
-+#define ve_ip_ct_icmp_timeout \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_icmp_timeout)
-+#define ve_ip_ct_generic_timeout \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_generic_timeout)
-+#define ve_ip_ct_log_invalid \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_log_invalid)
-+#define ve_ip_ct_tcp_timeout_max_retrans \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_timeout_max_retrans)
-+#define ve_ip_ct_tcp_loose \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_loose)
-+#define ve_ip_ct_tcp_be_liberal \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_be_liberal)
-+#define ve_ip_ct_tcp_max_retrans \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_tcp_max_retrans)
-+#else
-+#define ve_ip_ct_tcp_timeouts *tcp_timeouts
-+#define ve_ip_ct_udp_timeout ip_ct_udp_timeout
-+#define ve_ip_ct_udp_timeout_stream ip_ct_udp_timeout_stream
-+#define ve_ip_ct_icmp_timeout ip_ct_icmp_timeout
-+#define ve_ip_ct_generic_timeout ip_ct_generic_timeout
-+#define ve_ip_ct_log_invalid ip_ct_log_invalid
-+#define ve_ip_ct_tcp_timeout_max_retrans ip_ct_tcp_timeout_max_retrans
-+#define ve_ip_ct_tcp_loose ip_ct_tcp_loose
-+#define ve_ip_ct_tcp_be_liberal ip_ct_tcp_be_liberal
-+#define ve_ip_ct_tcp_max_retrans ip_ct_tcp_max_retrans
-+#endif
-+
- /* Log invalid packets */
- extern unsigned int ip_ct_log_invalid;
-
-@@ -85,10 +121,10 @@ extern int ip_ct_port_nfattr_to_tuple(st
- #ifdef CONFIG_SYSCTL
- #ifdef DEBUG_INVALID_PACKETS
- #define LOG_INVALID(proto) \
-- (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
-+ (ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW)
- #else
- #define LOG_INVALID(proto) \
-- ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
-+ ((ve_ip_ct_log_invalid == (proto) || ve_ip_ct_log_invalid == IPPROTO_RAW) \
- && net_ratelimit())
- #endif
- #else
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_nat.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_nat.h 2006-04-19 15:02:12.000000000 +0400
-@@ -1,5 +1,6 @@
- #ifndef _IP_NAT_H
- #define _IP_NAT_H
-+#include <linux/config.h>
- #include <linux/netfilter_ipv4.h>
- #include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-
-@@ -72,10 +73,29 @@ extern unsigned int ip_nat_setup_info(st
- extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack);
-
-+extern void ip_nat_hash_conntrack(struct ip_conntrack *conntrack);
-+
- /* Calculate relative checksum. */
- extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
- u_int32_t newval,
- u_int16_t oldcheck);
-+
-+#ifdef CONFIG_COMPAT
-+#include <net/compat.h>
-+
-+struct compat_ip_nat_range
-+{
-+ compat_uint_t flags;
-+ u_int32_t min_ip, max_ip;
-+ union ip_conntrack_manip_proto min, max;
-+};
-+
-+struct compat_ip_nat_multi_range
-+{
-+ compat_uint_t rangesize;
-+ struct compat_ip_nat_range range[1];
-+};
-+#endif
- #else /* !__KERNEL__: iptables wants this to compile. */
- #define ip_nat_multi_range ip_nat_multi_range_compat
- #endif /*__KERNEL__*/
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat_rule.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_nat_rule.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_nat_rule.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_nat_rule.h 2006-04-19 15:02:12.000000000 +0400
-@@ -6,7 +6,7 @@
-
- #ifdef __KERNEL__
-
--extern int ip_nat_rule_init(void) __init;
-+extern int ip_nat_rule_init(void);
- extern void ip_nat_rule_cleanup(void);
- extern int ip_nat_rule_find(struct sk_buff **pskb,
- unsigned int hooknum,
-diff -upr linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_tables.h
---- linux-2.6.16.orig/include/linux/netfilter_ipv4/ip_tables.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/netfilter_ipv4/ip_tables.h 2006-04-19 15:02:12.000000000 +0400
-@@ -16,6 +16,7 @@
- #define _IPTABLES_H
-
- #ifdef __KERNEL__
-+#include <linux/config.h>
- #include <linux/if.h>
- #include <linux/types.h>
- #include <linux/in.h>
-@@ -330,7 +331,7 @@ extern void ipt_init(void) __init;
- //#define ipt_register_table(tbl, repl) xt_register_table(AF_INET, tbl, repl)
- //#define ipt_unregister_table(tbl) xt_unregister_table(AF_INET, tbl)
-
--extern int ipt_register_table(struct ipt_table *table,
-+extern struct ipt_table *ipt_register_table(struct ipt_table *table,
- const struct ipt_replace *repl);
- extern void ipt_unregister_table(struct ipt_table *table);
-
-@@ -364,5 +365,62 @@ extern unsigned int ipt_do_table(struct
- void *userdata);
-
- #define IPT_ALIGN(s) XT_ALIGN(s)
-+
-+#ifdef CONFIG_COMPAT
-+#include <net/compat.h>
-+
-+struct compat_ipt_getinfo
-+{
-+ char name[IPT_TABLE_MAXNAMELEN];
-+ compat_uint_t valid_hooks;
-+ compat_uint_t hook_entry[NF_IP_NUMHOOKS];
-+ compat_uint_t underflow[NF_IP_NUMHOOKS];
-+ compat_uint_t num_entries;
-+ compat_uint_t size;
-+};
-+
-+struct compat_ipt_entry
-+{
-+ struct ipt_ip ip;
-+ compat_uint_t nfcache;
-+ u_int16_t target_offset;
-+ u_int16_t next_offset;
-+ compat_uint_t comefrom;
-+ struct compat_xt_counters counters;
-+ unsigned char elems[0];
-+};
-+
-+struct compat_ipt_entry_match
-+{
-+ union {
-+ struct {
-+ u_int16_t match_size;
-+ char name[IPT_FUNCTION_MAXNAMELEN];
-+ } user;
-+ u_int16_t match_size;
-+ } u;
-+ unsigned char data[0];
-+};
-+
-+struct compat_ipt_entry_target
-+{
-+ union {
-+ struct {
-+ u_int16_t target_size;
-+ char name[IPT_FUNCTION_MAXNAMELEN];
-+ } user;
-+ u_int16_t target_size;
-+ } u;
-+ unsigned char data[0];
-+};
-+
-+#define COMPAT_IPT_ALIGN(s) COMPAT_XT_ALIGN(s)
-+
-+extern int ipt_match_align_compat(void *match, void **dstptr,
-+ int *size, int off, int convert);
-+extern int ipt_target_align_compat(void *target, void **dstptr,
-+ int *size, int off, int convert);
-+
-+#endif /* CONFIG_COMPAT */
- #endif /*__KERNEL__*/
- #endif /* _IPTABLES_H */
-diff -upr linux-2.6.16.orig/include/linux/nfcalls.h linux-2.6.16-026test009/include/linux/nfcalls.h
---- linux-2.6.16.orig/include/linux/nfcalls.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/nfcalls.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,233 @@
-+/*
-+ * include/linux/nfcalls.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_NFCALLS_H
-+#define _LINUX_NFCALLS_H
-+
-+#include <linux/rcupdate.h>
-+
-+#ifdef CONFIG_MODULES
-+extern struct module no_module;
-+
-+#define DECL_KSYM_MODULE(name) \
-+ extern struct module *vz_mod_##name
-+#define DECL_KSYM_CALL(type, name, args) \
-+ extern type (*vz_##name) args
-+
-+#define INIT_KSYM_MODULE(name) \
-+ struct module *vz_mod_##name = &no_module; \
-+ EXPORT_SYMBOL(vz_mod_##name)
-+#define INIT_KSYM_CALL(type, name, args) \
-+ type (*vz_##name) args; \
-+ EXPORT_SYMBOL(vz_##name)
-+
-+#define __KSYMERRCALL(err, type, mod, name, args) \
-+({ \
-+ type ret = (type)err; \
-+ if (!__vzksym_module_get(vz_mod_##mod)) { \
-+ if (vz_##name) \
-+ ret = ((*vz_##name)args); \
-+ __vzksym_module_put(vz_mod_##mod); \
-+ } \
-+ ret; \
-+})
-+#define __KSYMSAFECALL_VOID(mod, name, args) \
-+do { \
-+ if (!__vzksym_module_get(vz_mod_##mod)) { \
-+ if (vz_##name) \
-+ ((*vz_##name)args); \
-+ __vzksym_module_put(vz_mod_##mod); \
-+ } \
-+} while (0)
-+#else
-+#define DECL_KSYM_CALL(type, name, args) \
-+ extern type name args
-+#define INIT_KSYM_MODULE(name)
-+#define INIT_KSYM_CALL(type, name, args) \
-+ type name args
-+#define __KSYMERRCALL(err, type, mod, name, args) ((*name)args)
-+#define __KSYMSAFECALL_VOID(mod, name, args) ((*name)args)
-+#endif
-+
-+#define KSYMERRCALL(err, mod, name, args) \
-+ __KSYMERRCALL(err, int, mod, name, args)
-+#define KSYMSAFECALL(type, mod, name, args) \
-+ __KSYMERRCALL(0, type, mod, name, args)
-+#define KSYMSAFECALL_VOID(mod, name, args) \
-+ __KSYMSAFECALL_VOID(mod, name, args)
-+
-+#if defined(CONFIG_VE) && defined(CONFIG_MODULES)
-+/* should be called _after_ KSYMRESOLVE's */
-+#define KSYMMODRESOLVE(name) \
-+ __vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
-+#define KSYMMODUNRESOLVE(name) \
-+ __vzksym_modunresolve(&vz_mod_##name)
-+
-+#define KSYMRESOLVE(name) \
-+ vz_##name = &name
-+#define KSYMUNRESOLVE(name) \
-+ vz_##name = NULL
-+#else
-+#define KSYMRESOLVE(name) do { } while (0)
-+#define KSYMUNRESOLVE(name) do { } while (0)
-+#define KSYMMODRESOLVE(name) do { } while (0)
-+#define KSYMMODUNRESOLVE(name) do { } while (0)
-+#endif
-+
-+#ifdef CONFIG_MODULES
-+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
-+{
-+ /*
-+ * we want to be sure, that pointer updates are visible first:
-+ * 1. wmb() is here only for piece of sure
-+ * (note, no rmb() in KSYMSAFECALL)
-+ * 2. synchronize_sched() guarantees that updates are visible
-+ * on all cpus and allows us to remove rmb() in KSYMSAFECALL
-+ */
-+ wmb(); synchronize_sched();
-+ *modp = mod;
-+ /* just to be sure, our changes are visible as soon as possible */
-+ wmb(); synchronize_sched();
-+}
-+
-+static inline void __vzksym_modunresolve(struct module **modp)
-+{
-+ /*
-+ * try_module_get() in KSYMSAFECALL should fail at this moment since
-+ * THIS_MODULE in in unloading state (we should be called from fini),
-+ * no need to syncronize pointers/ve_module updates.
-+ */
-+ *modp = &no_module;
-+ /*
-+ * synchronize_sched() guarantees here that we see
-+ * updated module pointer before the module really gets away
-+ */
-+ synchronize_sched();
-+}
-+
-+static inline int __vzksym_module_get(struct module *mod)
-+{
-+ /*
-+ * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
-+ * and smp_read_barrier_depends() here...
-+ */
-+ smp_read_barrier_depends(); /* for module loading */
-+ if (!try_module_get(mod))
-+ return -EBUSY;
-+
-+ return 0;
-+}
-+
-+static inline void __vzksym_module_put(struct module *mod)
-+{
-+ module_put(mod);
-+}
-+#endif
-+
-+#if defined(CONFIG_VE_IPTABLES)
-+#ifdef CONFIG_MODULES
-+DECL_KSYM_MODULE(x_tables);
-+DECL_KSYM_MODULE(xt_tcpudp);
-+DECL_KSYM_MODULE(ip_tables);
-+DECL_KSYM_MODULE(iptable_filter);
-+DECL_KSYM_MODULE(iptable_mangle);
-+DECL_KSYM_MODULE(xt_limit);
-+DECL_KSYM_MODULE(ipt_multiport);
-+DECL_KSYM_MODULE(ipt_tos);
-+DECL_KSYM_MODULE(ipt_TOS);
-+DECL_KSYM_MODULE(ipt_REJECT);
-+DECL_KSYM_MODULE(ipt_TCPMSS);
-+DECL_KSYM_MODULE(xt_tcpmss);
-+DECL_KSYM_MODULE(ipt_ttl);
-+DECL_KSYM_MODULE(ipt_LOG);
-+DECL_KSYM_MODULE(xt_length);
-+DECL_KSYM_MODULE(ip_conntrack);
-+DECL_KSYM_MODULE(ip_conntrack_ftp);
-+DECL_KSYM_MODULE(ip_conntrack_irc);
-+DECL_KSYM_MODULE(xt_conntrack);
-+DECL_KSYM_MODULE(xt_state);
-+DECL_KSYM_MODULE(xt_helper);
-+DECL_KSYM_MODULE(ip_nat);
-+DECL_KSYM_MODULE(iptable_nat);
-+DECL_KSYM_MODULE(ip_nat_ftp);
-+DECL_KSYM_MODULE(ip_nat_irc);
-+DECL_KSYM_MODULE(ipt_REDIRECT);
-+#endif
-+
-+struct sk_buff;
-+
-+DECL_KSYM_CALL(int, init_netfilter, (void));
-+DECL_KSYM_CALL(int, init_xtables, (void));
-+DECL_KSYM_CALL(int, init_xt_tcpudp, (void));
-+DECL_KSYM_CALL(int, init_iptables, (void));
-+DECL_KSYM_CALL(int, init_iptable_filter, (void));
-+DECL_KSYM_CALL(int, init_iptable_mangle, (void));
-+DECL_KSYM_CALL(int, init_xt_limit, (void));
-+DECL_KSYM_CALL(int, init_iptable_multiport, (void));
-+DECL_KSYM_CALL(int, init_iptable_tos, (void));
-+DECL_KSYM_CALL(int, init_iptable_TOS, (void));
-+DECL_KSYM_CALL(int, init_iptable_REJECT, (void));
-+DECL_KSYM_CALL(int, init_iptable_TCPMSS, (void));
-+DECL_KSYM_CALL(int, init_xt_tcpmss, (void));
-+DECL_KSYM_CALL(int, init_iptable_ttl, (void));
-+DECL_KSYM_CALL(int, init_iptable_LOG, (void));
-+DECL_KSYM_CALL(int, init_xt_length, (void));
-+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
-+DECL_KSYM_CALL(int, init_iptable_ftp, (void));
-+DECL_KSYM_CALL(int, init_iptable_irc, (void));
-+DECL_KSYM_CALL(int, init_xt_conntrack_match, (void));
-+DECL_KSYM_CALL(int, init_xt_state, (void));
-+DECL_KSYM_CALL(int, init_xt_helper, (void));
-+DECL_KSYM_CALL(int, ip_nat_init, (void));
-+DECL_KSYM_CALL(int, init_iptable_nat, (void));
-+DECL_KSYM_CALL(int, init_iptable_nat_ftp, (void));
-+DECL_KSYM_CALL(int, init_iptable_nat_irc, (void));
-+DECL_KSYM_CALL(int, init_iptable_REDIRECT, (void));
-+DECL_KSYM_CALL(void, fini_iptable_nat_irc, (void));
-+DECL_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
-+DECL_KSYM_CALL(void, fini_iptable_nat, (void));
-+DECL_KSYM_CALL(void, ip_nat_cleanup, (void));
-+DECL_KSYM_CALL(void, fini_xt_helper, (void));
-+DECL_KSYM_CALL(void, fini_xt_state, (void));
-+DECL_KSYM_CALL(void, fini_xt_conntrack_match, (void));
-+DECL_KSYM_CALL(void, fini_iptable_irc, (void));
-+DECL_KSYM_CALL(void, fini_iptable_ftp, (void));
-+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
-+DECL_KSYM_CALL(void, fini_xt_length, (void));
-+DECL_KSYM_CALL(void, fini_iptable_LOG, (void));
-+DECL_KSYM_CALL(void, fini_iptable_ttl, (void));
-+DECL_KSYM_CALL(void, fini_xt_tcpmss, (void));
-+DECL_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
-+DECL_KSYM_CALL(void, fini_iptable_REJECT, (void));
-+DECL_KSYM_CALL(void, fini_iptable_TOS, (void));
-+DECL_KSYM_CALL(void, fini_iptable_tos, (void));
-+DECL_KSYM_CALL(void, fini_iptable_multiport, (void));
-+DECL_KSYM_CALL(void, fini_xt_limit, (void));
-+DECL_KSYM_CALL(void, fini_iptable_filter, (void));
-+DECL_KSYM_CALL(void, fini_iptable_mangle, (void));
-+DECL_KSYM_CALL(void, fini_iptables, (void));
-+DECL_KSYM_CALL(void, fini_xt_tcpudp, (void));
-+DECL_KSYM_CALL(void, fini_xtables, (void));
-+DECL_KSYM_CALL(void, fini_netfilter, (void));
-+DECL_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
-+
-+DECL_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
-+#endif /* CONFIG_VE_IPTABLES */
-+
-+#ifdef CONFIG_VE_CALLS_MODULE
-+DECL_KSYM_MODULE(vzmon);
-+DECL_KSYM_CALL(int, real_get_device_perms_ve,
-+ (int dev_type, dev_t dev, int access_mode));
-+DECL_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
-+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
-+DECL_KSYM_CALL(void, real_update_load_avg_ve, (void));
-+#endif
-+
-+#endif /* _LINUX_NFCALLS_H */
-diff -upr linux-2.6.16.orig/include/linux/nfs_fs.h linux-2.6.16-026test009/include/linux/nfs_fs.h
---- linux-2.6.16.orig/include/linux/nfs_fs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/nfs_fs.h 2006-04-19 15:02:11.000000000 +0400
-@@ -296,7 +296,7 @@ extern struct inode *nfs_fhget(struct su
- extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
- extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
- extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
--extern int nfs_permission(struct inode *, int, struct nameidata *);
-+extern int nfs_permission(struct inode *, int, struct nameidata *, struct exec_perm *);
- extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
- extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
- extern int nfs_open(struct inode *, struct file *);
-diff -upr linux-2.6.16.orig/include/linux/notifier.h linux-2.6.16-026test009/include/linux/notifier.h
---- linux-2.6.16.orig/include/linux/notifier.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/notifier.h 2006-04-19 15:02:12.000000000 +0400
-@@ -27,8 +27,9 @@ extern int notifier_call_chain(struct no
-
- #define NOTIFY_DONE 0x0000 /* Don't care */
- #define NOTIFY_OK 0x0001 /* Suits me */
-+#define NOTIFY_FAIL 0x0002 /* Reject */
- #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */
--#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */
-+#define NOTIFY_BAD (NOTIFY_STOP_MASK|NOTIFY_FAIL) /* Bad/Veto action */
- /*
- * Clean way to return from the notifier and stop further calls.
- */
-diff -upr linux-2.6.16.orig/include/linux/page-flags.h linux-2.6.16-026test009/include/linux/page-flags.h
---- linux-2.6.16.orig/include/linux/page-flags.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/page-flags.h 2006-04-19 15:02:11.000000000 +0400
-@@ -74,7 +74,9 @@
- #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
- #define PG_reclaim 17 /* To be reclaimed asap */
- #define PG_nosave_free 18 /* Free, should not be written */
--#define PG_uncached 19 /* Page has been mapped as uncached */
-+#define PG_buddy 19 /* Page is free, on buddy lists */
-+
-+#define PG_uncached 20 /* Page has been mapped as uncached */
-
- /*
- * Global page accounting. One instance per CPU. Only unsigned longs are
-@@ -319,6 +321,10 @@ extern void __mod_page_state_offset(unsi
- #define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags)
- #define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags)
-
-+#define PageBuddy(page) test_bit(PG_buddy, &(page)->flags)
-+#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags)
-+#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)
-+
- #define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
- #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
- #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
-diff -upr linux-2.6.16.orig/include/linux/pid.h linux-2.6.16-026test009/include/linux/pid.h
---- linux-2.6.16.orig/include/linux/pid.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/pid.h 2006-04-19 15:02:12.000000000 +0400
-@@ -1,6 +1,18 @@
- #ifndef _LINUX_PID_H
- #define _LINUX_PID_H
-
-+#define VPID_BIT 10
-+#define VPID_DIV (1<<VPID_BIT)
-+
-+#ifdef CONFIG_VE
-+#define __is_virtual_pid(pid) ((pid) & VPID_DIV)
-+#define is_virtual_pid(pid) \
-+ (__is_virtual_pid(pid) || ((pid)==1 && !ve_is_super(get_exec_env())))
-+#else
-+#define __is_virtual_pid(pid) 0
-+#define is_virtual_pid(pid) 0
-+#endif
-+
- enum pid_type
- {
- PIDTYPE_PID,
-@@ -15,6 +27,9 @@ struct pid
- /* Try to keep pid_chain in the same cacheline as nr for find_pid */
- int nr;
- struct hlist_node pid_chain;
-+#ifdef CONFIG_VE
-+ int vnr;
-+#endif
- /* list of pids with the same nr, only one of them is in the hash */
- struct list_head pid_list;
- };
-@@ -40,16 +55,89 @@ extern int alloc_pidmap(void);
- extern void FASTCALL(free_pidmap(int));
- extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
-
--#define do_each_task_pid(who, type, task) \
-- if ((task = find_task_by_pid_type(type, who))) { \
-+#ifndef CONFIG_VE
-+
-+#define vpid_to_pid(pid) (pid)
-+#define __vpid_to_pid(pid) (pid)
-+#define pid_type_to_vpid(type, pid) (pid)
-+#define __pid_type_to_vpid(type, pid) (pid)
-+
-+#define comb_vpid_to_pid(pid) (pid)
-+#define comb_pid_to_vpid(pid) (pid)
-+
-+#else
-+
-+struct ve_struct;
-+extern void free_vpid(int vpid, struct ve_struct *ve);
-+extern int alloc_vpid(int pid, int vpid);
-+extern int vpid_to_pid(int pid);
-+extern int __vpid_to_pid(int pid);
-+extern pid_t pid_type_to_vpid(int type, pid_t pid);
-+extern pid_t _pid_type_to_vpid(int type, pid_t pid);
-+
-+static inline int comb_vpid_to_pid(int vpid)
-+{
-+ int pid = vpid;
-+
-+ if (vpid > 0) {
-+ pid = vpid_to_pid(vpid);
-+ if (unlikely(pid < 0))
-+ return 0;
-+ } else if (vpid < 0) {
-+ pid = vpid_to_pid(-vpid);
-+ if (unlikely(pid < 0))
-+ return 0;
-+ pid = -pid;
-+ }
-+ return pid;
-+}
-+
-+static inline int comb_pid_to_vpid(int pid)
-+{
-+ int vpid = pid;
-+
-+ if (pid > 0) {
-+ vpid = pid_type_to_vpid(PIDTYPE_PID, pid);
-+ if (unlikely(vpid < 0))
-+ return 0;
-+ } else if (pid < 0) {
-+ vpid = pid_type_to_vpid(PIDTYPE_PGID, -pid);
-+ if (unlikely(vpid < 0))
-+ return 0;
-+ vpid = -vpid;
-+ }
-+ return vpid;
-+}
-+#endif
-+
-+#define do_each_task_pid_all(who, type, task) \
-+ if ((task = find_task_by_pid_type_all(type, who))) { \
- prefetch((task)->pids[type].pid_list.next); \
- do {
-
--#define while_each_task_pid(who, type, task) \
-+#define while_each_task_pid_all(who, type, task) \
- } while (task = pid_task((task)->pids[type].pid_list.next,\
- type), \
- prefetch((task)->pids[type].pid_list.next), \
- hlist_unhashed(&(task)->pids[type].pid_chain)); \
- } \
-
-+#ifndef CONFIG_VE
-+#define __do_each_task_pid_ve(who, type, task, owner) \
-+ do_each_task_pid_all(who, type, task)
-+#define __while_each_task_pid_ve(who, type, task, owner) \
-+ while_each_task_pid_all(who, type, task)
-+#else /* CONFIG_VE */
-+#define __do_each_task_pid_ve(who, type, task, owner) \
-+ do_each_task_pid_all(who, type, task) \
-+ if (ve_accessible(VE_TASK_INFO(task)->owner_env, owner))
-+#define __while_each_task_pid_ve(who, type, task, owner) \
-+ while_each_task_pid_all(who, type, task)
-+#endif /* CONFIG_VE */
-+
-+#define do_each_task_pid_ve(who, type, task) \
-+ __do_each_task_pid_ve(who, type, task, get_exec_env());
-+#define while_each_task_pid_ve(who, type, task) \
-+ __while_each_task_pid_ve(who, type, task, get_exec_env());
-+
- #endif /* _LINUX_PID_H */
-diff -upr linux-2.6.16.orig/include/linux/proc_fs.h linux-2.6.16-026test009/include/linux/proc_fs.h
---- linux-2.6.16.orig/include/linux/proc_fs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/proc_fs.h 2006-04-19 15:02:12.000000000 +0400
-@@ -78,7 +78,7 @@ struct kcore_list {
- struct vmcore {
- struct list_head list;
- unsigned long long paddr;
-- unsigned long size;
-+ unsigned long long size;
- loff_t offset;
- };
-
-@@ -86,8 +86,14 @@ struct vmcore {
-
- extern struct proc_dir_entry proc_root;
- extern struct proc_dir_entry *proc_root_fs;
-+#ifdef CONFIG_VE
-+#include <linux/sched.h>
-+#define proc_net (get_exec_env()->_proc_net)
-+#define proc_net_stat (get_exec_env()->_proc_net_stat)
-+#else
- extern struct proc_dir_entry *proc_net;
- extern struct proc_dir_entry *proc_net_stat;
-+#endif
- extern struct proc_dir_entry *proc_bus;
- extern struct proc_dir_entry *proc_root_driver;
- extern struct proc_dir_entry *proc_root_kcore;
-@@ -98,8 +104,8 @@ extern void proc_misc_init(void);
- struct mm_struct;
-
- struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
--struct dentry *proc_pid_unhash(struct task_struct *p);
--void proc_pid_flush(struct dentry *proc_dentry);
-+void proc_pid_unhash(struct task_struct *p, struct dentry * [2]);
-+void proc_pid_flush(struct dentry *proc_dentry[2]);
- int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
- unsigned long task_vsize(struct mm_struct *);
- int task_statm(struct mm_struct *, int *, int *, int *, int *);
-@@ -107,7 +113,11 @@ char *task_mem(struct mm_struct *, char
-
- extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
- struct proc_dir_entry *parent);
-+extern struct proc_dir_entry *create_proc_glob_entry(const char *name,
-+ mode_t mode,
-+ struct proc_dir_entry *parent);
- extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
-+extern void remove_proc_glob_entry(const char *name, struct proc_dir_entry *parent);
-
- extern struct vfsmount *proc_mnt;
- extern int proc_fill_super(struct super_block *,void *,int);
-@@ -194,6 +204,15 @@ static inline struct proc_dir_entry *pro
- return res;
- }
-
-+static inline struct proc_dir_entry *proc_glob_fops_create(const char *name,
-+ mode_t mode, struct file_operations *fops)
-+{
-+ struct proc_dir_entry *res = create_proc_glob_entry(name, mode, NULL);
-+ if (res)
-+ res->proc_fops = fops;
-+ return res;
-+}
-+
- static inline void proc_net_remove(const char *name)
- {
- remove_proc_entry(name,proc_net);
-@@ -206,16 +225,21 @@ static inline void proc_net_remove(const
- #define proc_bus NULL
-
- #define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
-+#define proc_glob_fops_create(name, mode, fops) ({ (void)(mode), NULL; })
- #define proc_net_create(name, mode, info) ({ (void)(mode), NULL; })
- static inline void proc_net_remove(const char *name) {}
-
--static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; }
--static inline void proc_pid_flush(struct dentry *proc_dentry) { }
-+static inline struct dentry *proc_pid_unhash(struct task_struct *p,
-+ struct dentry *d[2]) { return NULL; }
-+static inline void proc_pid_flush(struct dentry *proc_dentry[2]) { }
-
- static inline struct proc_dir_entry *create_proc_entry(const char *name,
- mode_t mode, struct proc_dir_entry *parent) { return NULL; }
-+static inline struct proc_dir_entry *create_proc_glob_entry(const char *name,
-+ mode_t mode, struct proc_dir_entry *parent) { return NULL; }
-
- #define remove_proc_entry(name, parent) do {} while (0)
-+#define remove_proc_glob_entry(name, parent) do {} while (0)
-
- static inline struct proc_dir_entry *proc_symlink(const char *name,
- struct proc_dir_entry *parent,const char *dest) {return NULL;}
-@@ -266,4 +290,18 @@ static inline struct proc_dir_entry *PDE
- return PROC_I(inode)->pde;
- }
-
-+static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
-+{
-+ if (de)
-+ atomic_inc(&de->count);
-+ return de;
-+}
-+
-+extern void de_put(struct proc_dir_entry *);
-+
-+#define LPDE(inode) (PROC_I((inode))->pde)
-+#ifdef CONFIG_VE
-+#define GPDE(inode) (*(struct proc_dir_entry **)(&(inode)->i_pipe))
-+#endif
-+
- #endif /* _LINUX_PROC_FS_H */
-diff -upr linux-2.6.16.orig/include/linux/quota.h linux-2.6.16-026test009/include/linux/quota.h
---- linux-2.6.16.orig/include/linux/quota.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/quota.h 2006-04-19 15:02:12.000000000 +0400
-@@ -37,7 +37,6 @@
-
- #include <linux/errno.h>
- #include <linux/types.h>
--#include <linux/spinlock.h>
-
- #define __DQUOT_VERSION__ "dquot_6.5.1"
- #define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
-@@ -45,8 +44,6 @@
- typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
- typedef __u64 qsize_t; /* Type in which we store sizes */
-
--extern spinlock_t dq_data_lock;
--
- /* Size of blocks in which are counted size limits */
- #define QUOTABLOCK_BITS 10
- #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-@@ -133,6 +130,10 @@ struct if_dqinfo {
-
- #ifdef __KERNEL__
-
-+#include <linux/spinlock.h>
-+
-+extern spinlock_t dq_data_lock;
-+
- #include <linux/dqblk_xfs.h>
- #include <linux/dqblk_v1.h>
- #include <linux/dqblk_v2.h>
-@@ -242,6 +243,8 @@ struct quota_format_ops {
- int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */
- };
-
-+struct inode;
-+struct iattr;
- /* Operations working with dquots */
- struct dquot_operations {
- int (*initialize) (struct inode *, int);
-@@ -256,9 +259,11 @@ struct dquot_operations {
- int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
- int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
- int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */
-+ int (*rename) (struct inode *, struct inode *, struct inode *);
- };
-
- /* Operations handling requests from userspace */
-+struct v2_disk_dqblk;
- struct quotactl_ops {
- int (*quota_on)(struct super_block *, int, int, char *);
- int (*quota_off)(struct super_block *, int);
-@@ -271,6 +276,9 @@ struct quotactl_ops {
- int (*set_xstate)(struct super_block *, unsigned int, int);
- int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
- int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
-+#ifdef CONFIG_QUOTA_COMPAT
-+ int (*get_quoti)(struct super_block *, int, unsigned int, struct v2_disk_dqblk *);
-+#endif
- };
-
- struct quota_format_type {
-@@ -291,6 +299,10 @@ struct quota_info {
- struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
- struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
- struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
-+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
-+ struct vz_quota_master *vzdq_master;
-+ int vzdq_count;
-+#endif
- };
-
- /* Inline would be better but we need to dereference super_block which is not defined yet */
-diff -upr linux-2.6.16.orig/include/linux/quotaops.h linux-2.6.16-026test009/include/linux/quotaops.h
---- linux-2.6.16.orig/include/linux/quotaops.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/quotaops.h 2006-04-19 15:02:12.000000000 +0400
-@@ -171,6 +171,19 @@ static __inline__ int DQUOT_TRANSFER(str
- return 0;
- }
-
-+static __inline__ int DQUOT_RENAME(struct inode *inode,
-+ struct inode *old_dir, struct inode *new_dir)
-+{
-+ struct dquot_operations *q_op;
-+
-+ q_op = inode->i_sb->dq_op;
-+ if (q_op && q_op->rename) {
-+ if (q_op->rename(inode, old_dir, new_dir) == NO_QUOTA)
-+ return 1;
-+ }
-+ return 0;
-+}
-+
- /* The following two functions cannot be called inside a transaction */
- #define DQUOT_SYNC(sb) sync_dquots(sb, -1)
-
-@@ -197,6 +210,7 @@ static __inline__ int DQUOT_OFF(struct s
- #define DQUOT_SYNC(sb) do { } while(0)
- #define DQUOT_OFF(sb) do { } while(0)
- #define DQUOT_TRANSFER(inode, iattr) (0)
-+#define DQUOT_RENAME(inode, old_dir, new_dir) (0)
- static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
- {
- inode_add_bytes(inode, nr);
-diff -upr linux-2.6.16.orig/include/linux/raid/raid1.h linux-2.6.16-026test009/include/linux/raid/raid1.h
---- linux-2.6.16.orig/include/linux/raid/raid1.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/raid/raid1.h 2006-04-19 15:02:11.000000000 +0400
-@@ -130,6 +130,6 @@ struct r1bio_s {
- * with failure when last write completes (and all failed).
- * Record that bi_end_io was called with this flag...
- */
--#define R1BIO_Returned 4
-+#define R1BIO_Returned 6
-
- #endif
-diff -upr linux-2.6.16.orig/include/linux/reiserfs_xattr.h linux-2.6.16-026test009/include/linux/reiserfs_xattr.h
---- linux-2.6.16.orig/include/linux/reiserfs_xattr.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/reiserfs_xattr.h 2006-04-19 15:02:11.000000000 +0400
-@@ -42,7 +42,8 @@ int reiserfs_removexattr(struct dentry *
- int reiserfs_delete_xattrs(struct inode *inode);
- int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
- int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
--int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd);
-+int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
-+ struct exec_perm *);
-
- int reiserfs_xattr_del(struct inode *, const char *);
- int reiserfs_xattr_get(const struct inode *, const char *, void *, size_t);
-diff -upr linux-2.6.16.orig/include/linux/rmap.h linux-2.6.16-026test009/include/linux/rmap.h
---- linux-2.6.16.orig/include/linux/rmap.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/rmap.h 2006-04-19 15:02:12.000000000 +0400
-@@ -74,6 +74,7 @@ void page_add_anon_rmap(struct page *, s
- void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
- void page_add_file_rmap(struct page *);
- void page_remove_rmap(struct page *);
-+struct anon_vma *page_lock_anon_vma(struct page *page);
-
- /**
- * page_dup_rmap - duplicate pte mapping to a page
-diff -upr linux-2.6.16.orig/include/linux/rtc.h linux-2.6.16-026test009/include/linux/rtc.h
---- linux-2.6.16.orig/include/linux/rtc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/rtc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -11,8 +11,6 @@
- #ifndef _LINUX_RTC_H_
- #define _LINUX_RTC_H_
-
--#include <linux/interrupt.h>
--
- /*
- * The struct used to pass data via the following ioctl. Similar to the
- * struct tm in <time.h>, but it needs to be here so that the kernel
-@@ -95,6 +93,8 @@ struct rtc_pll_info {
-
- #ifdef __KERNEL__
-
-+#include <linux/interrupt.h>
-+
- typedef struct rtc_task {
- void (*func)(void *private_data);
- void *private_data;
-diff -upr linux-2.6.16.orig/include/linux/sched.h linux-2.6.16-026test009/include/linux/sched.h
---- linux-2.6.16.orig/include/linux/sched.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/sched.h 2006-04-19 15:02:12.000000000 +0400
-@@ -38,7 +38,10 @@
-
- #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
-
-+#include <ub/ub_task.h>
-+
- struct exec_domain;
-+struct ve_struct;
-
- /*
- * cloning flags:
-@@ -92,15 +95,34 @@ extern unsigned long avenrun[]; /* Load
- load += n*(FIXED_1-exp); \
- load >>= FSHIFT;
-
-+#define LOAD_INT(x) ((x) >> FSHIFT)
-+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-+
- extern unsigned long total_forks;
- extern int nr_threads;
- extern int last_pid;
- DECLARE_PER_CPU(unsigned long, process_counts);
- extern int nr_processes(void);
-+
-+extern unsigned long nr_sleeping(void);
-+extern unsigned long nr_stopped(void);
-+extern unsigned long nr_zombie;
-+extern atomic_t nr_dead;
- extern unsigned long nr_running(void);
- extern unsigned long nr_uninterruptible(void);
- extern unsigned long nr_iowait(void);
-
-+#ifdef CONFIG_VE
-+struct ve_struct;
-+extern unsigned long nr_running_ve(struct ve_struct *);
-+extern unsigned long nr_iowait_ve(struct ve_struct *);
-+extern unsigned long nr_uninterruptible_ve(struct ve_struct *);
-+#else
-+#define nr_running_ve(ve) 0
-+#define nr_iowait_ve(ve) 0
-+#define nr_uninterruptible_ve(ve) 0
-+#endif
-+
- #include <linux/time.h>
- #include <linux/param.h>
- #include <linux/resource.h>
-@@ -189,6 +211,7 @@ extern cpumask_t nohz_cpu_mask;
-
- extern void show_state(void);
- extern void show_regs(struct pt_regs *);
-+extern void smp_show_regs(struct pt_regs *, void *);
-
- /*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
-@@ -252,31 +275,7 @@ arch_get_unmapped_area_topdown(struct fi
- extern void arch_unmap_area(struct mm_struct *, unsigned long);
- extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
-
--#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
--/*
-- * The mm counters are not protected by its page_table_lock,
-- * so must be incremented atomically.
-- */
--#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
--#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
--#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
--#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
--#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
--typedef atomic_long_t mm_counter_t;
--
--#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
--/*
-- * The mm counters are protected by its page_table_lock,
-- * so can be incremented directly.
-- */
--#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
--#define get_mm_counter(mm, member) ((mm)->_##member)
--#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
--#define inc_mm_counter(mm, member) (mm)->_##member++
--#define dec_mm_counter(mm, member) (mm)->_##member--
--typedef unsigned long mm_counter_t;
--
--#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-+#include <linux/mm_counter.h>
-
- #define get_mm_rss(mm) \
- (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
-@@ -332,6 +331,7 @@ struct mm_struct {
- unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
-
- unsigned dumpable:2;
-+ unsigned vps_dumpable:1;
- cpumask_t cpu_vm_mask;
-
- /* Architecture-specific MM context */
-@@ -348,6 +348,9 @@ struct mm_struct {
- /* aio bits */
- rwlock_t ioctx_list_lock;
- struct kioctx *ioctx_list;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct user_beancounter *mm_ub;
-+#endif
- };
-
- struct sighand_struct {
-@@ -364,6 +367,9 @@ static inline void sighand_free(struct s
- call_rcu(&sp->rcu, sighand_free_cb);
- }
-
-+#include <linux/ve.h>
-+#include <linux/ve_task.h>
-+
- /*
- * NOTE! "signal_struct" does not have it's own
- * locking, because a shared signal_struct always
-@@ -846,6 +852,11 @@ struct task_struct {
-
- unsigned long ptrace_message;
- siginfo_t *last_siginfo; /* For ptrace use. */
-+
-+/* state tracking for suspend */
-+ __u8 pn_state;
-+ __u8 stopped_state:1;
-+
- /*
- * current io wait handle: wait queue entry to use for io waits
- * If this thread is processing aio, this points at the waitqueue
-@@ -871,6 +882,16 @@ struct task_struct {
- #endif
- atomic_t fs_excl; /* holding fs exclusive resources */
- struct rcu_head rcu;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct task_beancounter task_bc;
-+#endif
-+#ifdef CONFIG_VE
-+ struct ve_task_info ve_task_info;
-+#endif
-+#if defined(CONFIG_VZ_QUOTA) || defined(CONFIG_VZ_QUOTA_MODULE)
-+ unsigned long magic;
-+ struct inode *ino;
-+#endif
- };
-
- static inline pid_t process_group(struct task_struct *tsk)
-@@ -929,6 +950,43 @@ static inline void put_task_struct(struc
- #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */
- #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
-
-+#ifndef CONFIG_VE
-+#define set_pn_state(tsk, state) do { } while(0)
-+#define clear_pn_state(tsk) do { } while(0)
-+#define set_stop_state(tsk) do { } while(0)
-+#define clear_stop_state(tsk) do { } while(0)
-+#else
-+#define PN_STOP_TF 1 /* was not in 2.6.8 */
-+#define PN_STOP_TF_RT 2 /* was not in 2.6.8 */
-+#define PN_STOP_ENTRY 3
-+#define PN_STOP_FORK 4
-+#define PN_STOP_VFORK 5
-+#define PN_STOP_SIGNAL 6
-+#define PN_STOP_EXIT 7
-+#define PN_STOP_EXEC 8
-+#define PN_STOP_LEAVE 9
-+
-+static inline void set_pn_state(struct task_struct *tsk, int state)
-+{
-+ tsk->pn_state = state;
-+}
-+
-+static inline void clear_pn_state(struct task_struct *tsk)
-+{
-+ tsk->pn_state = 0;
-+}
-+
-+static inline void set_stop_state(struct task_struct *tsk)
-+{
-+ tsk->stopped_state = 1;
-+}
-+
-+static inline void clear_stop_state(struct task_struct *tsk)
-+{
-+ tsk->stopped_state = 0;
-+}
-+#endif
-+
- /*
- * Only the _current_ task can read/write to tsk->flags, but other
- * tasks can access tsk->flags in readonly mode for example
-@@ -968,6 +1026,21 @@ static inline int set_cpus_allowed(task_
- extern unsigned long long sched_clock(void);
- extern unsigned long long current_sched_time(const task_t *current_task);
-
-+static inline unsigned long cycles_to_clocks(cycles_t cycles)
-+{
-+ extern unsigned long cycles_per_clock;
-+ do_div(cycles, cycles_per_clock);
-+ return cycles;
-+}
-+
-+static inline u64 cycles_to_jiffies(cycles_t cycles)
-+{
-+ extern unsigned long cycles_per_jiffy;
-+ do_div(cycles, cycles_per_jiffy);
-+ return cycles;
-+}
-+
-+
- /* sched_exec is called by processes performing an exec */
- #ifdef CONFIG_SMP
- extern void sched_exec(void);
-@@ -1020,12 +1093,227 @@ extern struct task_struct init_task;
-
- extern struct mm_struct init_mm;
-
--#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
--extern struct task_struct *find_task_by_pid_type(int type, int pid);
-+#define find_task_by_pid_all(nr) \
-+ find_task_by_pid_type_all(PIDTYPE_PID, nr)
-+extern struct task_struct *find_task_by_pid_type_all(int type, int pid);
- extern void set_special_pids(pid_t session, pid_t pgrp);
- extern void __set_special_pids(pid_t session, pid_t pgrp);
-
-+#ifndef CONFIG_VE
-+#define find_task_by_pid_ve find_task_by_pid_all
-+
-+#define get_exec_env() ((struct ve_struct *)NULL)
-+#define set_exec_env(new_env) ((struct ve_struct *)NULL)
-+
-+#define ve_is_super(env) 1
-+#define ve_accessible(target, owner) 1
-+#define ve_accessible_strict(target, owner) 1
-+#define ve_accessible_veid(target, owner) 1
-+#define ve_accessible_strict_veid(target, owner) 1
-+
-+#define VEID(envid) 0
-+#define get_ve0() NULL
-+
-+static inline pid_t virt_pid(struct task_struct *tsk)
-+{
-+ return tsk->pid;
-+}
-+
-+static inline pid_t virt_tgid(struct task_struct *tsk)
-+{
-+ return tsk->tgid;
-+}
-+
-+static inline pid_t virt_pgid(struct task_struct *tsk)
-+{
-+ return tsk->signal->pgrp;
-+}
-+
-+static inline pid_t virt_sid(struct task_struct *tsk)
-+{
-+ return tsk->signal->session;
-+}
-+
-+#define get_task_pid_ve(tsk, ve) get_task_pid(tsk)
-+
-+static inline pid_t get_task_pid(struct task_struct *tsk)
-+{
-+ return tsk->pid;
-+}
-+
-+static inline pid_t get_task_tgid(struct task_struct *tsk)
-+{
-+ return tsk->tgid;
-+}
-+
-+static inline pid_t get_task_pgid(struct task_struct *tsk)
-+{
-+ return tsk->signal->pgrp;
-+}
-+
-+static inline pid_t get_task_sid(struct task_struct *tsk)
-+{
-+ return tsk->signal->session;
-+}
-+
-+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
-+{
-+}
-+
-+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
-+{
-+}
-+
-+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
-+{
-+}
-+
-+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
-+{
-+}
-+
-+static inline pid_t get_task_ppid(struct task_struct *p)
-+{
-+ return pid_alive(p) ? p->group_leader->real_parent->tgid : 0;
-+}
-+
-+#else /* CONFIG_VE */
-+
-+#include <asm/current.h>
-+#include <linux/ve.h>
-+
-+extern struct ve_struct ve0;
-+
-+#define find_task_by_pid_ve(nr) \
-+ find_task_by_pid_type_ve(PIDTYPE_PID, nr)
-+
-+extern struct task_struct *find_task_by_pid_type_ve(int type, int pid);
-+
-+#define get_ve0() (&ve0)
-+#define VEID(envid) ((envid)->veid)
-+
-+#define get_exec_env() (VE_TASK_INFO(current)->exec_env)
-+static inline struct ve_struct *set_exec_env(struct ve_struct *new_env)
-+{
-+ struct ve_struct *old_env;
-+
-+ old_env = VE_TASK_INFO(current)->exec_env;
-+ VE_TASK_INFO(current)->exec_env = new_env;
-+
-+ return old_env;
-+}
-+
-+#define ve_is_super(env) ((env) == get_ve0())
-+#define ve_accessible_strict(target, owner) ((target) == (owner))
-+static inline int ve_accessible(struct ve_struct *target,
-+ struct ve_struct *owner) {
-+ return ve_is_super(owner) || ve_accessible_strict(target, owner);
-+}
-+
-+#define ve_accessible_strict_veid(target, owner) ((target) == (owner))
-+static inline int ve_accessible_veid(envid_t target, envid_t owner)
-+{
-+ return get_ve0()->veid == owner ||
-+ ve_accessible_strict_veid(target, owner);
-+}
-+
-+static inline pid_t virt_pid(struct task_struct *tsk)
-+{
-+ return tsk->pids[PIDTYPE_PID].vnr;
-+}
-+
-+static inline pid_t virt_tgid(struct task_struct *tsk)
-+{
-+ return tsk->pids[PIDTYPE_TGID].vnr;
-+}
-+
-+static inline pid_t virt_pgid(struct task_struct *tsk)
-+{
-+ return tsk->pids[PIDTYPE_PGID].vnr;
-+}
-+
-+static inline pid_t virt_sid(struct task_struct *tsk)
-+{
-+ return tsk->pids[PIDTYPE_SID].vnr;
-+}
-+
-+static inline pid_t get_task_pid_ve(struct task_struct *tsk, struct ve_struct *env)
-+{
-+ return ve_is_super(env) ? tsk->pid : virt_pid(tsk);
-+}
-+
-+static inline pid_t get_task_pid(struct task_struct *tsk)
-+{
-+ return get_task_pid_ve(tsk, get_exec_env());
-+}
-+
-+static inline pid_t get_task_tgid(struct task_struct *tsk)
-+{
-+ return ve_is_super(get_exec_env()) ? tsk->tgid : virt_tgid(tsk);
-+}
-+
-+static inline pid_t get_task_pgid(struct task_struct *tsk)
-+{
-+ return ve_is_super(get_exec_env()) ? tsk->signal->pgrp : virt_pgid(tsk);
-+}
-+
-+static inline pid_t get_task_sid(struct task_struct *tsk)
-+{
-+ return ve_is_super(get_exec_env()) ? tsk->signal->session : virt_sid(tsk);
-+}
-+
-+static inline void set_virt_pid(struct task_struct *tsk, pid_t pid)
-+{
-+ tsk->pids[PIDTYPE_PID].vnr = pid;
-+}
-+
-+static inline void set_virt_tgid(struct task_struct *tsk, pid_t pid)
-+{
-+ tsk->pids[PIDTYPE_TGID].vnr = pid;
-+}
-+
-+static inline void set_virt_pgid(struct task_struct *tsk, pid_t pid)
-+{
-+ tsk->pids[PIDTYPE_PGID].vnr = pid;
-+}
-+
-+static inline void set_virt_sid(struct task_struct *tsk, pid_t pid)
-+{
-+ tsk->pids[PIDTYPE_SID].vnr = pid;
-+}
-+
-+static inline pid_t get_task_ppid(struct task_struct *p)
-+{
-+ struct task_struct *parent;
-+ struct ve_struct *env;
-+
-+ if (!pid_alive(p))
-+ return 0;
-+ env = get_exec_env();
-+ if (get_task_pid_ve(p, env) == 1)
-+ return 0;
-+ parent = p->group_leader->real_parent;
-+ return ve_accessible(VE_TASK_INFO(parent)->owner_env, env) ?
-+ get_task_tgid(parent) : 1;
-+}
-+
-+void ve_sched_get_cpu_stat(struct ve_struct *envid, cycles_t *idle,
-+ cycles_t *strv, unsigned int cpu);
-+void ve_sched_attach(struct ve_struct *envid);
-+
-+#endif /* CONFIG_VE */
-+
-+
-+#ifdef CONFIG_VE
-+extern cycles_t ve_sched_get_idle_time(struct ve_struct *, int);
-+extern cycles_t ve_sched_get_iowait_time(struct ve_struct *, int);
-+#else
-+#define ve_sched_get_idle_time(ve, cpu) 0
-+#define ve_sched_get_iowait_time(ve, cpu) 0
-+#endif
-+
- /* per-UID process charging. */
-+extern int set_user(uid_t new_ruid, int dumpclear);
- extern struct user_struct * alloc_uid(uid_t);
- static inline struct user_struct *get_uid(struct user_struct *u)
- {
-@@ -1161,6 +1449,13 @@ extern task_t *child_reaper;
-
- extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
- extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
-+extern long do_fork_pid(unsigned long clone_flags,
-+ unsigned long stack_start,
-+ struct pt_regs *regs,
-+ unsigned long stack_size,
-+ int __user *parent_tidptr,
-+ int __user *child_tidptr,
-+ long pid0);
- task_t *fork_idle(int);
-
- extern void set_task_comm(struct task_struct *tsk, char *from);
-@@ -1187,22 +1482,100 @@ extern void wait_task_inactive(task_t *
- add_parent(p, (p)->parent); \
- } while (0)
-
--#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
--#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
-+#define next_task_all(p) list_entry((p)->tasks.next, struct task_struct, tasks)
-+#define prev_task_all(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
-
--#define for_each_process(p) \
-- for (p = &init_task ; (p = next_task(p)) != &init_task ; )
-+#define for_each_process_all(p) \
-+ for (p = &init_task ; (p = next_task_all(p)) != &init_task ; )
-
- /*
- * Careful: do_each_thread/while_each_thread is a double loop so
- * 'break' will not work as expected - use goto instead.
- */
--#define do_each_thread(g, t) \
-- for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
-+#define do_each_thread_all(g, t) \
-+ for (g = t = &init_task ; (g = t = next_task_all(g)) != &init_task ; ) do
-
--#define while_each_thread(g, t) \
-+#define while_each_thread_all(g, t) \
- while ((t = next_thread(t)) != g)
-
-+#ifndef CONFIG_VE
-+
-+#define SET_VE_LINKS(p)
-+#define REMOVE_VE_LINKS(p)
-+#define for_each_process_ve(p) for_each_process_all(p)
-+#define do_each_thread_ve(g, t) do_each_thread_all(g, t)
-+#define while_each_thread_ve(g, t) while_each_thread_all(g, t)
-+#define first_task_ve() next_task_ve(&init_task)
-+#define __first_task_ve(owner) next_task_ve(&init_task)
-+#define __next_task_ve(owner, p) next_task_ve(p)
-+#define next_task_ve(p) \
-+ (next_task_all(p) != &init_task ? next_task_all(p) : NULL)
-+
-+#else /* CONFIG_VE */
-+
-+#define SET_VE_LINKS(p) \
-+ do { \
-+ if (thread_group_leader(p)) \
-+ list_add_tail(&VE_TASK_INFO(p)->vetask_list, \
-+ &VE_TASK_INFO(p)->owner_env->vetask_lh); \
-+ } while (0)
-+
-+#define REMOVE_VE_LINKS(p) \
-+ do { \
-+ if (thread_group_leader(p)) \
-+ list_del(&VE_TASK_INFO(p)->vetask_list); \
-+ } while(0)
-+
-+static inline task_t* __first_task_ve(struct ve_struct *ve)
-+{
-+ task_t *tsk;
-+
-+ if (unlikely(ve_is_super(ve))) {
-+ tsk = next_task_all(&init_task);
-+ if (tsk == &init_task)
-+ tsk = NULL;
-+ } else {
-+ /* probably can return ve->init_entry, but it's more clear */
-+ BUG_ON(list_empty(&ve->vetask_lh));
-+ tsk = VE_TASK_LIST_2_TASK(ve->vetask_lh.next);
-+ }
-+ return tsk;
-+}
-+
-+static inline task_t* __next_task_ve(struct ve_struct *ve, task_t *tsk)
-+{
-+ if (unlikely(ve_is_super(ve))) {
-+ tsk = next_task_all(tsk);
-+ if (tsk == &init_task)
-+ tsk = NULL;
-+ } else {
-+ struct list_head *tmp;
-+
-+ BUG_ON(VE_TASK_INFO(tsk)->owner_env != ve);
-+ tmp = VE_TASK_INFO(tsk)->vetask_list.next;
-+ if (tmp == &ve->vetask_lh)
-+ tsk = NULL;
-+ else
-+ tsk = VE_TASK_LIST_2_TASK(tmp);
-+ }
-+ return tsk;
-+}
-+
-+#define first_task_ve() __first_task_ve(get_exec_env())
-+#define next_task_ve(p) __next_task_ve(get_exec_env(), p)
-+/* no one uses prev_task_ve(), copy next_task_ve() if needed */
-+
-+#define for_each_process_ve(p) \
-+ for (p = first_task_ve(); p != NULL ; p = next_task_ve(p))
-+
-+#define do_each_thread_ve(g, t) \
-+ for (g = t = first_task_ve() ; g != NULL; g = t = next_task_ve(g)) do
-+
-+#define while_each_thread_ve(g, t) \
-+ while ((t = next_thread(t)) != g)
-+
-+#endif /* CONFIG_VE */
-+
- extern task_t * FASTCALL(next_thread(const task_t *p));
-
- #define thread_group_leader(p) (p->pid == p->tgid)
-@@ -1401,7 +1774,7 @@ static inline int frozen(struct task_str
- */
- static inline int freezing(struct task_struct *p)
- {
-- return p->flags & PF_FREEZE;
-+ return test_tsk_thread_flag(p, TIF_FREEZE);
- }
-
- /*
-@@ -1410,7 +1783,7 @@ static inline int freezing(struct task_s
- */
- static inline void freeze(struct task_struct *p)
- {
-- p->flags |= PF_FREEZE;
-+ set_tsk_thread_flag(p, TIF_FREEZE);
- }
-
- /*
-@@ -1431,7 +1804,8 @@ static inline int thaw_process(struct ta
- */
- static inline void frozen_process(struct task_struct *p)
- {
-- p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
-+ clear_tsk_thread_flag(p, TIF_FREEZE);
-+ p->flags |= PF_FROZEN;
- }
-
- extern void refrigerator(void);
-diff -upr linux-2.6.16.orig/include/linux/sem.h linux-2.6.16-026test009/include/linux/sem.h
---- linux-2.6.16.orig/include/linux/sem.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/sem.h 2006-04-19 15:02:12.000000000 +0400
-@@ -155,6 +155,9 @@ static inline void exit_sem(struct task_
- }
- #endif
-
-+int sysvipc_walk_sem(int (*func)(int, struct sem_array*, void *), void *arg);
-+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg);
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_SEM_H */
-diff -upr linux-2.6.16.orig/include/linux/shm.h linux-2.6.16-026test009/include/linux/shm.h
---- linux-2.6.16.orig/include/linux/shm.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/shm.h 2006-04-19 15:02:12.000000000 +0400
-@@ -86,6 +86,7 @@ struct shmid_kernel /* private to the ke
- pid_t shm_cprid;
- pid_t shm_lprid;
- struct user_struct *mlock_user;
-+ struct ipc_ids *_shm_ids;
- };
-
- /* shm_mode upper byte flags */
-@@ -104,6 +105,9 @@ static inline long do_shmat(int shmid, c
- }
- #endif
-
-+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg);
-+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg);
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_SHM_H_ */
-diff -upr linux-2.6.16.orig/include/linux/shmem_fs.h linux-2.6.16-026test009/include/linux/shmem_fs.h
---- linux-2.6.16.orig/include/linux/shmem_fs.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/shmem_fs.h 2006-04-19 15:02:11.000000000 +0400
-@@ -19,6 +19,9 @@ struct shmem_inode_info {
- swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */
- struct list_head swaplist; /* chain of maybes on swap */
- struct inode vfs_inode;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct user_beancounter *shmi_ub;
-+#endif
- };
-
- struct shmem_sb_info {
-diff -upr linux-2.6.16.orig/include/linux/signal.h linux-2.6.16-026test009/include/linux/signal.h
---- linux-2.6.16.orig/include/linux/signal.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/signal.h 2006-04-19 15:02:12.000000000 +0400
-@@ -3,6 +3,7 @@
-
- #include <linux/list.h>
- #include <linux/spinlock.h>
-+#include <linux/slab.h>
- #include <asm/signal.h>
- #include <asm/siginfo.h>
-
-@@ -41,6 +42,9 @@ struct sigqueue {
- int flags;
- siginfo_t info;
- struct user_struct *user;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct user_beancounter *sig_ub;
-+#endif
- };
-
- /* flags values. */
-@@ -263,6 +267,8 @@ extern int sigprocmask(int, sigset_t *,
- struct pt_regs;
- extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
-
-+extern kmem_cache_t *sigqueue_cachep;
-+
- #endif /* __KERNEL__ */
-
- #endif /* _LINUX_SIGNAL_H */
-diff -upr linux-2.6.16.orig/include/linux/skbuff.h linux-2.6.16-026test009/include/linux/skbuff.h
---- linux-2.6.16.orig/include/linux/skbuff.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/skbuff.h 2006-04-19 15:02:12.000000000 +0400
-@@ -19,6 +19,7 @@
- #include <linux/compiler.h>
- #include <linux/time.h>
- #include <linux/cache.h>
-+#include <linux/ve_owner.h>
-
- #include <asm/atomic.h>
- #include <asm/types.h>
-@@ -211,6 +212,8 @@ enum {
- * @tc_verd: traffic control verdict
- */
-
-+#include <ub/ub_sk.h>
-+
- struct sk_buff {
- /* These two members must be first. */
- struct sk_buff *next;
-@@ -294,13 +297,18 @@ struct sk_buff {
- *data,
- *tail,
- *end;
-+ struct skb_beancounter skb_bc;
-+ struct ve_struct *owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(SKB, struct sk_buff, owner_env)
-+
- #ifdef __KERNEL__
- /*
- * Handling routines are only of interest to the kernel
- */
- #include <linux/slab.h>
-+#include <ub/ub_net.h>
-
- #include <asm/system.h>
-
-@@ -1007,6 +1015,8 @@ static inline int pskb_trim(struct sk_bu
- */
- static inline void skb_orphan(struct sk_buff *skb)
- {
-+ ub_skb_uncharge(skb);
-+
- if (skb->destructor)
- skb->destructor(skb);
- skb->destructor = NULL;
-diff -upr linux-2.6.16.orig/include/linux/slab.h linux-2.6.16-026test009/include/linux/slab.h
---- linux-2.6.16.orig/include/linux/slab.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/slab.h 2006-04-19 15:02:11.000000000 +0400
-@@ -48,6 +48,26 @@ typedef struct kmem_cache kmem_cache_t;
- #define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */
- #define SLAB_DESTROY_BY_RCU 0x00080000UL /* defer freeing pages to RCU */
-
-+/*
-+ * allocation rules: __GFP_UBC 0
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ * cache (SLAB_UBC) charge charge
-+ * (usual caches: mm, vma, task_struct, ...)
-+ *
-+ * cache (SLAB_UBC | SLAB_NO_CHARGE) charge ---
-+ * (ub_kmalloc) (kmalloc)
-+ *
-+ * cache (no UB flags) BUG() ---
-+ * (nonub caches, mempools)
-+ *
-+ * pages charge ---
-+ * (ub_vmalloc, (vmalloc,
-+ * poll, fdsets, ...) non-ub allocs)
-+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-+ */
-+#define SLAB_UBC 0x20000000UL /* alloc space for ubs ... */
-+#define SLAB_NO_CHARGE 0x40000000UL /* ... but don't charge */
-+
- /* flags passed to a constructor func */
- #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
- #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */
-diff -upr linux-2.6.16.orig/include/linux/smp.h linux-2.6.16-026test009/include/linux/smp.h
---- linux-2.6.16.orig/include/linux/smp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/smp.h 2006-04-19 15:02:11.000000000 +0400
-@@ -10,6 +10,9 @@
-
- extern void cpu_idle(void);
-
-+struct pt_regs;
-+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
-+
- #ifdef CONFIG_SMP
-
- #include <linux/preempt.h>
-@@ -49,6 +52,8 @@ extern int __cpu_up(unsigned int cpunum)
- */
- extern void smp_cpus_done(unsigned int max_cpus);
-
-+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
-+
- /*
- * Call a function on all other processors
- */
-@@ -99,6 +104,12 @@ static inline void smp_send_reschedule(i
- #define num_booting_cpus() 1
- #define smp_prepare_boot_cpu() do {} while (0)
-
-+static inline int smp_nmi_call_function(smp_nmi_function func,
-+ void *info, int wait)
-+{
-+ return 0;
-+}
-+
- #endif /* !SMP */
-
- /*
-diff -upr linux-2.6.16.orig/include/linux/socket.h linux-2.6.16-026test009/include/linux/socket.h
---- linux-2.6.16.orig/include/linux/socket.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/socket.h 2006-04-19 15:02:12.000000000 +0400
-@@ -300,6 +300,7 @@ extern int memcpy_toiovec(struct iovec *
- extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
- extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
- extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
-+extern int vz_security_proto_check(int family, int type, int protocol);
-
- #endif
- #endif /* not kernel and not glibc */
-diff -upr linux-2.6.16.orig/include/linux/swap.h linux-2.6.16-026test009/include/linux/swap.h
---- linux-2.6.16.orig/include/linux/swap.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/swap.h 2006-04-19 15:02:12.000000000 +0400
-@@ -80,6 +80,7 @@ struct address_space;
- struct sysinfo;
- struct writeback_control;
- struct zone;
-+struct user_beancounter;
-
- /*
- * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
-@@ -119,6 +120,7 @@ enum {
- /*
- * The in-memory structure used to track swap areas.
- */
-+struct user_beancounter;
- struct swap_info_struct {
- unsigned int flags;
- int prio; /* swap priority */
-@@ -136,6 +138,9 @@ struct swap_info_struct {
- unsigned int max;
- unsigned int inuse_pages;
- int next; /* next entry on swap list */
-+#ifdef CONFIG_USER_SWAP_ACCOUNTING
-+ struct user_beancounter **swap_ubs;
-+#endif
- };
-
- struct swap_list_t {
-@@ -240,7 +245,7 @@ extern long total_swap_pages;
- extern unsigned int nr_swapfiles;
- extern struct swap_info_struct swap_info[];
- extern void si_swapinfo(struct sysinfo *);
--extern swp_entry_t get_swap_page(void);
-+extern swp_entry_t get_swap_page(struct user_beancounter *);
- extern swp_entry_t get_swap_page_of_type(int type);
- extern int swap_duplicate(swp_entry_t);
- extern int valid_swaphandles(swp_entry_t, unsigned long *);
-@@ -253,7 +258,9 @@ extern int remove_exclusive_swap_page(st
- struct backing_dev_info;
-
- extern spinlock_t swap_lock;
--extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page);
-+struct page_beancounter;
-+extern int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
-+ struct page_beancounter **pb);
-
- /* linux/mm/thrash.c */
- extern struct mm_struct * swap_token_mm;
-@@ -310,7 +317,7 @@ static inline int remove_exclusive_swap_
- return 0;
- }
-
--static inline swp_entry_t get_swap_page(void)
-+static inline swp_entry_t get_swap_page(struct user_beancounter *ub)
- {
- swp_entry_t entry;
- entry.val = 0;
-diff -upr linux-2.6.16.orig/include/linux/sysctl.h linux-2.6.16-026test009/include/linux/sysctl.h
---- linux-2.6.16.orig/include/linux/sysctl.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/sysctl.h 2006-04-19 15:02:12.000000000 +0400
-@@ -148,6 +148,8 @@ enum
- KERN_SPIN_RETRY=70, /* int: number of spinlock retries */
- KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */
- KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
-+ KERN_VIRT_PIDS=202, /* int: VE pids virtualization */
-+ KERN_VIRT_OSRELEASE=205,/* virtualization of utsname.release */
- };
-
-
-@@ -401,6 +403,7 @@ enum
-
- enum {
- NET_IPV4_ROUTE_FLUSH=1,
-+ NET_IPV4_ROUTE_SRC_CHECK=188,
- NET_IPV4_ROUTE_MIN_DELAY=2,
- NET_IPV4_ROUTE_MAX_DELAY=3,
- NET_IPV4_ROUTE_GC_THRESH=4,
-@@ -760,6 +763,12 @@ enum
- FS_AIO_NR=18, /* current system-wide number of aio requests */
- FS_AIO_MAX_NR=19, /* system-wide maximum number of aio requests */
- FS_INOTIFY=20, /* inotify submenu */
-+ FS_AT_VSYSCALL=21, /* int: to announce vsyscall data */
-+};
-+
-+/* /proc/sys/debug */
-+enum {
-+ DBG_DECODE_CALLTRACES = 1, /* int: decode call traces on oops */
- };
-
- /* /proc/sys/fs/quota/ */
-@@ -900,6 +909,8 @@ extern int proc_doulongvec_minmax(ctl_ta
- void __user *, size_t *, loff_t *);
- extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int,
- struct file *, void __user *, size_t *, loff_t *);
-+extern int proc_doutsstring(ctl_table *table, int write, struct file *,
-+ void __user *, size_t *, loff_t *);
-
- extern int do_sysctl (int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
-@@ -954,6 +965,8 @@ extern ctl_handler sysctl_ms_jiffies;
- */
-
- /* A sysctl table is an array of struct ctl_table: */
-+struct ve_struct;
-+
- struct ctl_table
- {
- int ctl_name; /* Binary ID */
-@@ -967,6 +980,7 @@ struct ctl_table
- struct proc_dir_entry *de; /* /proc control block */
- void *extra1;
- void *extra2;
-+ struct ve_struct *owner_env;
- };
-
- /* struct ctl_table_header is used to maintain dynamic lists of
-@@ -983,6 +997,9 @@ struct ctl_table_header * register_sysct
- int insert_at_head);
- void unregister_sysctl_table(struct ctl_table_header * table);
-
-+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr);
-+void free_sysctl_clone(ctl_table *clone);
-+
- #else /* __KERNEL__ */
-
- #endif /* __KERNEL__ */
-diff -upr linux-2.6.16.orig/include/linux/tty.h linux-2.6.16-026test009/include/linux/tty.h
---- linux-2.6.16.orig/include/linux/tty.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/tty.h 2006-04-19 15:02:12.000000000 +0400
-@@ -238,8 +238,11 @@ struct tty_struct {
- spinlock_t read_lock;
- /* If the tty has a pending do_SAK, queue it here - akpm */
- struct work_struct SAK_work;
-+ struct ve_struct *owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(TTY, struct tty_struct, owner_env)
-+
- /* tty magic number */
- #define TTY_MAGIC 0x5401
-
-@@ -266,6 +269,7 @@ struct tty_struct {
- #define TTY_PTY_LOCK 16 /* pty private */
- #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */
- #define TTY_HUPPED 18 /* Post driver->hangup() */
-+#define TTY_CHARGED 19 /* Charged as ub resource */
-
- #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
-
-diff -upr linux-2.6.16.orig/include/linux/tty_driver.h linux-2.6.16-026test009/include/linux/tty_driver.h
---- linux-2.6.16.orig/include/linux/tty_driver.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/tty_driver.h 2006-04-19 15:02:12.000000000 +0400
-@@ -115,6 +115,7 @@
- * character to the device.
- */
-
-+#include <linux/ve_owner.h>
- #include <linux/fs.h>
- #include <linux/list.h>
- #include <linux/cdev.h>
-@@ -214,9 +215,18 @@ struct tty_driver {
- unsigned int set, unsigned int clear);
-
- struct list_head tty_drivers;
-+ struct ve_struct *owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(TTYDRV, struct tty_driver, owner_env)
-+
-+#ifdef CONFIG_LEGACY_PTYS
-+extern struct tty_driver *pty_driver;
-+extern struct tty_driver *pty_slave_driver;
-+#endif
-+
- extern struct list_head tty_drivers;
-+extern rwlock_t tty_driver_guard;
-
- struct tty_driver *alloc_tty_driver(int lines);
- void put_tty_driver(struct tty_driver *driver);
-diff -upr linux-2.6.16.orig/include/linux/ve.h linux-2.6.16-026test009/include/linux/ve.h
---- linux-2.6.16.orig/include/linux/ve.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/ve.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,313 @@
-+/*
-+ * include/linux/ve.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_VE_H
-+#define _LINUX_VE_H
-+
-+#include <linux/config.h>
-+
-+#ifndef __ENVID_T_DEFINED__
-+typedef unsigned envid_t;
-+#define __ENVID_T_DEFINED__
-+#endif
-+
-+#include <linux/types.h>
-+#include <linux/capability.h>
-+#include <linux/utsname.h>
-+#include <linux/sysctl.h>
-+#include <linux/vzstat.h>
-+#include <linux/kobject.h>
-+
-+#ifdef VZMON_DEBUG
-+# define VZTRACE(fmt,args...) \
-+ printk(KERN_DEBUG fmt, ##args)
-+#else
-+# define VZTRACE(fmt,args...)
-+#endif /* VZMON_DEBUG */
-+
-+struct tty_driver;
-+struct devpts_config;
-+struct task_struct;
-+struct new_utsname;
-+struct file_system_type;
-+struct icmp_mib;
-+struct ip_mib;
-+struct tcp_mib;
-+struct udp_mib;
-+struct linux_mib;
-+struct fib_info;
-+struct fib_rule;
-+struct veip_struct;
-+struct ve_monitor;
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+struct fib_table;
-+struct devcnfv4_struct;
-+#ifdef CONFIG_VE_IPTABLES
-+struct xt_af;
-+struct xt_table;
-+struct xt_target;
-+struct ip_conntrack;
-+typedef unsigned int (*ip_nat_helper_func)(void);
-+struct ve_ip_conntrack {
-+ struct list_head *_ip_conntrack_hash;
-+ struct list_head _ip_conntrack_expect_list;
-+ struct list_head _ip_conntrack_unconfirmed;
-+ struct ip_conntrack_protocol ** _ip_ct_protos;
-+ struct list_head _ip_conntrack_helpers;
-+ int _ip_conntrack_max;
-+ int _ip_conntrack_vmalloc;
-+ atomic_t _ip_conntrack_count;
-+ void (*_ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
-+#ifdef CONFIG_SYSCTL
-+ unsigned long _ip_ct_tcp_timeouts[10];
-+ unsigned long _ip_ct_udp_timeout;
-+ unsigned long _ip_ct_udp_timeout_stream;
-+ unsigned long _ip_ct_icmp_timeout;
-+ unsigned long _ip_ct_generic_timeout;
-+ unsigned int _ip_ct_log_invalid;
-+ unsigned long _ip_ct_tcp_timeout_max_retrans;
-+ int _ip_ct_tcp_loose;
-+ int _ip_ct_tcp_be_liberal;
-+ int _ip_ct_tcp_max_retrans;
-+ struct ctl_table_header *_ip_ct_sysctl_header;
-+ ctl_table *_ip_ct_net_table;
-+ ctl_table *_ip_ct_ipv4_table;
-+ ctl_table *_ip_ct_netfilter_table;
-+ ctl_table *_ip_ct_sysctl_table;
-+#endif /*CONFIG_SYSCTL*/
-+
-+ struct ip_nat_protocol **_ip_nat_protos;
-+ ip_nat_helper_func _ip_nat_ftp_hook;
-+ ip_nat_helper_func _ip_nat_irc_hook;
-+ struct list_head *_ip_nat_bysource;
-+ struct xt_table *_ip_nat_table;
-+
-+ /* resource accounting */
-+ struct user_beancounter *ub;
-+};
-+#endif
-+#endif
-+
-+#define UIDHASH_BITS_VE 6
-+#define UIDHASH_SZ_VE (1 << UIDHASH_BITS_VE)
-+
-+struct ve_cpu_stats {
-+ cycles_t idle_time;
-+ cycles_t iowait_time;
-+ cycles_t strt_idle_time;
-+ cycles_t used_time;
-+ seqcount_t stat_lock;
-+ int nr_running;
-+ int nr_unint;
-+ int nr_iowait;
-+ cputime64_t user;
-+ cputime64_t nice;
-+ cputime64_t system;
-+} ____cacheline_aligned;
-+
-+struct ve_struct {
-+ struct ve_struct *prev;
-+ struct ve_struct *next;
-+
-+ envid_t veid;
-+ struct task_struct *init_entry;
-+ struct list_head vetask_lh;
-+ kernel_cap_t cap_default;
-+ atomic_t pcounter;
-+ /* ref counter to ve from ipc */
-+ atomic_t counter;
-+ unsigned int class_id;
-+ struct veip_struct *veip;
-+ struct rw_semaphore op_sem;
-+ int is_running;
-+ int is_locked;
-+ int virt_pids;
-+ /* see vzcalluser.h for VE_FEATURE_XXX definitions */
-+ __u64 features;
-+
-+/* VE's root */
-+ struct vfsmount *fs_rootmnt;
-+ struct dentry *fs_root;
-+
-+/* sysctl */
-+ struct new_utsname *utsname;
-+ struct list_head sysctl_lh;
-+ struct ctl_table_header *kern_header;
-+ struct ctl_table *kern_table;
-+ struct ctl_table_header *quota_header;
-+ struct ctl_table *quota_table;
-+ struct file_system_type *proc_fstype;
-+ struct vfsmount *proc_mnt;
-+ struct proc_dir_entry *proc_root;
-+ struct proc_dir_entry *proc_sys_root;
-+ struct proc_dir_entry *_proc_net;
-+ struct proc_dir_entry *_proc_net_stat;
-+
-+/* SYSV IPC */
-+ struct ipc_ids *_shm_ids;
-+ struct ipc_ids *_msg_ids;
-+ struct ipc_ids *_sem_ids;
-+ int _used_sems;
-+ int _shm_tot;
-+ size_t _shm_ctlmax;
-+ size_t _shm_ctlall;
-+ int _shm_ctlmni;
-+ int _msg_ctlmax;
-+ int _msg_ctlmni;
-+ int _msg_ctlmnb;
-+ int _sem_ctls[4];
-+
-+/* BSD pty's */
-+ struct tty_driver *pty_driver;
-+ struct tty_driver *pty_slave_driver;
-+
-+#ifdef CONFIG_UNIX98_PTYS
-+ struct tty_driver *ptm_driver;
-+ struct tty_driver *pts_driver;
-+ struct idr *allocated_ptys;
-+ struct file_system_type *devpts_fstype;
-+ struct vfsmount *devpts_mnt;
-+ struct dentry *devpts_root;
-+ struct devpts_config *devpts_config;
-+#endif
-+
-+ struct file_system_type *shmem_fstype;
-+ struct vfsmount *shmem_mnt;
-+#ifdef CONFIG_SYSFS
-+ struct file_system_type *sysfs_fstype;
-+ struct vfsmount *sysfs_mnt;
-+ struct super_block *sysfs_sb;
-+ struct sysfs_dirent *sysfs_root;
-+#endif
-+ struct subsystem *class_subsys;
-+ struct subsystem *class_obj_subsys;
-+ struct class *net_class;
-+
-+/* User uids hash */
-+ struct list_head uidhash_table[UIDHASH_SZ_VE];
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ struct hlist_head _net_dev_head;
-+ struct hlist_head _net_dev_index_head;
-+ struct net_device *_net_dev_base, **_net_dev_tail;
-+ int ifindex;
-+ struct net_device *_loopback_dev;
-+ struct net_device *_venet_dev;
-+ struct ipv4_devconf *_ipv4_devconf;
-+ struct ipv4_devconf *_ipv4_devconf_dflt;
-+ struct ctl_table_header *forward_header;
-+ struct ctl_table *forward_table;
-+#endif
-+ unsigned long rt_flush_required;
-+
-+/* per VE CPU stats*/
-+ struct timespec start_timespec;
-+ u64 start_jiffies;
-+ cycles_t start_cycles;
-+ unsigned long avenrun[3]; /* loadavg data */
-+
-+ cycles_t cpu_used_ve;
-+ struct kstat_lat_pcpu_struct sched_lat_ve;
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ struct hlist_head *_fib_info_hash;
-+ struct hlist_head *_fib_info_laddrhash;
-+ int _fib_hash_size;
-+ int _fib_info_cnt;
-+
-+ struct fib_rule *_local_rule;
-+ struct fib_rule *_fib_rules;
-+#ifdef CONFIG_IP_MULTIPLE_TABLES
-+ /* XXX: why a magic constant? */
-+ struct fib_table *_fib_tables[256]; /* RT_TABLE_MAX - for now */
-+#else
-+ struct fib_table *_main_table;
-+ struct fib_table *_local_table;
-+#endif
-+ struct icmp_mib *_icmp_statistics[2];
-+ struct ipstats_mib *_ip_statistics[2];
-+ struct tcp_mib *_tcp_statistics[2];
-+ struct udp_mib *_udp_statistics[2];
-+ struct linux_mib *_net_statistics[2];
-+ struct venet_stat *stat;
-+#ifdef CONFIG_VE_IPTABLES
-+/* core/netfilter.c virtualization */
-+ void *_nf_hooks;
-+ struct xt_table *_ve_ipt_filter_pf; /* packet_filter struct */
-+ struct xt_table *_ipt_mangle_table;
-+ struct xt_af *_xt;
-+ struct xt_target *_ipt_standard_target;
-+
-+ __u64 _iptables_modules;
-+ struct ve_ip_conntrack *_ip_conntrack;
-+#endif /* CONFIG_VE_IPTABLES */
-+#endif
-+ wait_queue_head_t *_log_wait;
-+ unsigned long *_log_start;
-+ unsigned long *_log_end;
-+ unsigned long *_logged_chars;
-+ char *log_buf;
-+#define VE_DEFAULT_LOG_BUF_LEN 4096
-+
-+ struct ve_cpu_stats ve_cpu_stats[NR_CPUS] ____cacheline_aligned;
-+ unsigned long down_at;
-+ struct list_head cleanup_list;
-+
-+ unsigned long jiffies_fixup;
-+ unsigned char disable_net;
-+ unsigned char sparse_vpid;
-+ struct ve_monitor *monitor;
-+ struct proc_dir_entry *monitor_proc;
-+};
-+
-+#define VE_CPU_STATS(ve, cpu) (&((ve)->ve_cpu_stats[(cpu)]))
-+
-+extern int nr_ve;
-+
-+#ifdef CONFIG_VE
-+
-+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode);
-+void do_env_cleanup(struct ve_struct *envid);
-+void do_update_load_avg_ve(void);
-+void do_env_free(struct ve_struct *ptr);
-+
-+#define ve_utsname (*get_exec_env()->utsname)
-+
-+static inline struct ve_struct *get_ve(struct ve_struct *ptr)
-+{
-+ if (ptr != NULL)
-+ atomic_inc(&ptr->counter);
-+ return ptr;
-+}
-+
-+static inline void put_ve(struct ve_struct *ptr)
-+{
-+ if (ptr && atomic_dec_and_test(&ptr->counter)) {
-+ if (atomic_read(&ptr->pcounter) > 0)
-+ BUG();
-+ if (ptr->is_running)
-+ BUG();
-+ do_env_free(ptr);
-+ }
-+}
-+
-+#ifdef CONFIG_FAIRSCHED
-+#define ve_cpu_online_map(ve, mask) fairsched_cpu_online_map(ve->veid, mask)
-+#else
-+#define ve_cpu_online_map(ve, mask) do { *(mask) = cpu_online_map; } while (0)
-+#endif
-+#else /* CONFIG_VE */
-+#define ve_utsname system_utsname
-+#define get_ve(ve) (NULL)
-+#define put_ve(ve) do { } while (0)
-+#endif /* CONFIG_VE */
-+
-+#endif /* _LINUX_VE_H */
-diff -upr linux-2.6.16.orig/include/linux/ve_owner.h linux-2.6.16-026test009/include/linux/ve_owner.h
---- linux-2.6.16.orig/include/linux/ve_owner.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/ve_owner.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,32 @@
-+/*
-+ * include/linux/ve_owner.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VE_OWNER_H__
-+#define __VE_OWNER_H__
-+
-+#include <linux/config.h>
-+#include <linux/vmalloc.h>
-+
-+
-+#define DCL_VE_OWNER(name, type, member)
-+ /* prototype declares static inline functions */
-+
-+#define DCL_VE_OWNER_PROTO(name, type, member) \
-+type; \
-+static inline struct ve_struct *VE_OWNER_##name(const type *obj) \
-+{ \
-+ return obj->member; \
-+} \
-+static inline void SET_VE_OWNER_##name(type *obj, struct ve_struct *ve) \
-+{ \
-+ obj->member = ve; \
-+}
-+
-+#endif /* __VE_OWNER_H__ */
-diff -upr linux-2.6.16.orig/include/linux/ve_proto.h linux-2.6.16-026test009/include/linux/ve_proto.h
---- linux-2.6.16.orig/include/linux/ve_proto.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/ve_proto.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,75 @@
-+/*
-+ * include/linux/ve_proto.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VE_H__
-+#define __VE_H__
-+
-+#ifdef CONFIG_VE
-+
-+extern struct semaphore ve_call_guard;
-+extern rwlock_t ve_call_lock;
-+
-+#ifdef CONFIG_SYSVIPC
-+extern void prepare_ipc(void);
-+extern int init_ve_ipc(struct ve_struct *);
-+extern void fini_ve_ipc(struct ve_struct *);
-+extern void ve_ipc_cleanup(void);
-+#endif
-+
-+#ifdef CONFIG_UNIX98_PTYS
-+extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */
-+extern struct tty_driver *pts_driver; /* Unix98 pty slaves; for /dev/ptmx */
-+#endif
-+
-+extern rwlock_t tty_driver_guard;
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+void ip_fragment_cleanup(struct ve_struct *envid);
-+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
-+struct fib_table * fib_hash_init(int id);
-+int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
-+extern int main_loopback_init(struct net_device*);
-+int venet_init(void);
-+#endif
-+
-+extern struct ve_struct *ve_list_head;
-+extern rwlock_t ve_list_guard;
-+extern struct ve_struct *get_ve_by_id(envid_t);
-+extern struct ve_struct *__find_ve_by_id(envid_t);
-+
-+struct env_create_param2;
-+extern int real_env_create(envid_t veid, unsigned flags, u32 class_id,
-+ struct env_create_param2 *data, int datalen);
-+
-+extern int do_setdevperms(envid_t veid, unsigned type,
-+ dev_t dev, unsigned mask);
-+
-+#define VE_HOOK_INIT 0
-+#define VE_HOOK_FINI 1
-+#define VE_MAX_HOOKS 2
-+
-+typedef int ve_hookfn(unsigned int hooknum, void *data);
-+
-+struct ve_hook
-+{
-+ struct list_head list;
-+ ve_hookfn *hook;
-+ ve_hookfn *undo;
-+ struct module *owner;
-+ int hooknum;
-+ /* Functions are called in ascending priority. */
-+ int priority;
-+};
-+
-+extern int ve_hook_register(struct ve_hook *vh);
-+extern void ve_hook_unregister(struct ve_hook *vh);
-+
-+#endif
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/ve_task.h linux-2.6.16-026test009/include/linux/ve_task.h
---- linux-2.6.16.orig/include/linux/ve_task.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/ve_task.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,34 @@
-+/*
-+ * include/linux/ve_task.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VE_TASK_H__
-+#define __VE_TASK_H__
-+
-+#include <linux/seqlock.h>
-+
-+struct ve_task_info {
-+/* virtualization */
-+ struct ve_struct *owner_env;
-+ struct ve_struct *exec_env;
-+ struct list_head vetask_list;
-+ struct dentry *glob_proc_dentry;
-+/* statistics: scheduling latency */
-+ cycles_t sleep_time;
-+ cycles_t sched_time;
-+ cycles_t sleep_stamp;
-+ cycles_t wakeup_stamp;
-+ seqcount_t wakeup_lock;
-+};
-+
-+#define VE_TASK_INFO(task) (&(task)->ve_task_info)
-+#define VE_TASK_LIST_2_TASK(lh) \
-+ list_entry(lh, struct task_struct, ve_task_info.vetask_list)
-+
-+#endif /* __VE_TASK_H__ */
-diff -upr linux-2.6.16.orig/include/linux/venet.h linux-2.6.16-026test009/include/linux/venet.h
---- linux-2.6.16.orig/include/linux/venet.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/venet.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,68 @@
-+/*
-+ * include/linux/venet.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _VENET_H
-+#define _VENET_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/vzcalluser.h>
-+
-+#define VEIP_HASH_SZ 512
-+
-+struct ve_struct;
-+struct venet_stat;
-+struct ip_entry_struct
-+{
-+ __u32 ip;
-+ struct ve_struct *active_env;
-+ struct venet_stat *stat;
-+ struct veip_struct *veip;
-+ struct list_head ip_hash;
-+ struct list_head ve_list;
-+};
-+
-+struct veip_struct
-+{
-+ struct list_head src_lh;
-+ struct list_head dst_lh;
-+ struct list_head ip_lh;
-+ struct list_head list;
-+ envid_t veid;
-+};
-+
-+/* veip_hash_lock should be taken for write by caller */
-+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip);
-+/* veip_hash_lock should be taken for write by caller */
-+void ip_entry_unhash(struct ip_entry_struct *entry);
-+/* veip_hash_lock should be taken for read by caller */
-+struct ip_entry_struct *ip_entry_lookup(u32 addr);
-+
-+/* veip_hash_lock should be taken for read by caller */
-+struct veip_struct *veip_find(envid_t veid);
-+/* veip_hash_lock should be taken for write by caller */
-+struct veip_struct *veip_findcreate(envid_t veid);
-+/* veip_hash_lock should be taken for write by caller */
-+void veip_put(struct veip_struct *veip);
-+
-+int veip_start(struct ve_struct *ve);
-+void veip_stop(struct ve_struct *ve);
-+int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr);
-+int veip_entry_del(envid_t veid, struct sockaddr_in *addr);
-+int venet_change_skb_owner(struct sk_buff *skb);
-+
-+extern struct list_head ip_entry_hash_table[];
-+extern rwlock_t veip_hash_lock;
-+
-+#ifdef CONFIG_PROC_FS
-+int veip_seq_show(struct seq_file *m, void *v);
-+#endif
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/veprintk.h linux-2.6.16-026test009/include/linux/veprintk.h
---- linux-2.6.16.orig/include/linux/veprintk.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/veprintk.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,38 @@
-+/*
-+ * include/linux/veprintk.h
-+ *
-+ * Copyright (C) 2006 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VE_PRINTK_H__
-+#define __VE_PRINTK_H__
-+
-+#ifdef CONFIG_VE
-+
-+#define ve_log_wait (*(get_exec_env()->_log_wait))
-+#define ve_log_start (*(get_exec_env()->_log_start))
-+#define ve_log_end (*(get_exec_env()->_log_end))
-+#define ve_logged_chars (*(get_exec_env()->_logged_chars))
-+#define ve_log_buf (get_exec_env()->log_buf)
-+#define ve_log_buf_len (ve_is_super(get_exec_env()) ? \
-+ log_buf_len : VE_DEFAULT_LOG_BUF_LEN)
-+#define VE_LOG_BUF_MASK (ve_log_buf_len - 1)
-+#define VE_LOG_BUF(idx) (ve_log_buf[(idx) & VE_LOG_BUF_MASK])
-+
-+#else
-+
-+#define ve_log_wait log_wait
-+#define ve_log_start log_start
-+#define ve_log_end log_end
-+#define ve_logged_chars logged_chars
-+#define ve_log_buf log_buf
-+#define ve_log_buf_len log_buf_len
-+#define VE_LOG_BUF_MASK LOG_BUF_MASK
-+#define VE_LOG_BUF(idx) LOG_BUF(idx)
-+
-+#endif /* CONFIG_VE */
-+#endif /* __VE_PRINTK_H__ */
-diff -upr linux-2.6.16.orig/include/linux/virtinfo.h linux-2.6.16-026test009/include/linux/virtinfo.h
---- linux-2.6.16.orig/include/linux/virtinfo.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/virtinfo.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,52 @@
-+/*
-+ * include/linux/virtinfo.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __LINUX_VIRTINFO_H
-+#define __LINUX_VIRTINFO_H
-+
-+#include <linux/kernel.h>
-+#include <linux/page-flags.h>
-+#include <linux/rwsem.h>
-+#include <linux/notifier.h>
-+
-+struct vnotifier_block
-+{
-+ int (*notifier_call)(struct vnotifier_block *self,
-+ unsigned long, void *, int);
-+ struct vnotifier_block *next;
-+ int priority;
-+};
-+
-+void virtinfo_notifier_register(int type, struct vnotifier_block *nb);
-+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb);
-+int virtinfo_notifier_call(int type, unsigned long n, void *data);
-+
-+struct meminfo {
-+ struct sysinfo si;
-+ unsigned long active, inactive;
-+ unsigned long cache, swapcache;
-+ unsigned long committed_space;
-+ unsigned long allowed;
-+ struct page_state ps;
-+ unsigned long vmalloc_total, vmalloc_used, vmalloc_largest;
-+};
-+
-+#define VIRTINFO_MEMINFO 0
-+#define VIRTINFO_ENOUGHMEM 1
-+
-+enum virt_info_types {
-+ VITYPE_GENERAL,
-+ VITYPE_FAUDIT,
-+ VITYPE_QUOTA,
-+
-+ VIRT_TYPES
-+};
-+
-+#endif /* __LINUX_VIRTINFO_H */
-diff -upr linux-2.6.16.orig/include/linux/vmalloc.h linux-2.6.16-026test009/include/linux/vmalloc.h
---- linux-2.6.16.orig/include/linux/vmalloc.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vmalloc.h 2006-04-19 15:02:12.000000000 +0400
-@@ -18,6 +18,10 @@
- #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */
- #endif
-
-+/* align size to 2^n page boundary */
-+#define POWER2_PAGE_ALIGN(size) \
-+ ((typeof(size))(1UL << (PAGE_SHIFT + get_order(size))))
-+
- struct vm_struct {
- void *addr;
- unsigned long size;
-@@ -36,6 +40,8 @@ extern void *vmalloc_node(unsigned long
- extern void *vmalloc_exec(unsigned long size);
- extern void *vmalloc_32(unsigned long size);
- extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-+extern void *vmalloc_best(unsigned long size);
-+extern void *ub_vmalloc_best(unsigned long size);
- extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot);
- extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
-@@ -52,6 +58,9 @@ extern void vunmap(void *addr);
- extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
- extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
- unsigned long start, unsigned long end);
-+extern struct vm_struct * get_vm_area_best(unsigned long size,
-+ unsigned long flags);
-+extern void vprintstat(void);
- extern struct vm_struct *get_vm_area_node(unsigned long size,
- unsigned long flags, int node);
- extern struct vm_struct *remove_vm_area(void *addr);
-diff -upr linux-2.6.16.orig/include/linux/vzcalluser.h linux-2.6.16-026test009/include/linux/vzcalluser.h
---- linux-2.6.16.orig/include/linux/vzcalluser.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzcalluser.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,221 @@
-+/*
-+ * include/linux/vzcalluser.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_VZCALLUSER_H
-+#define _LINUX_VZCALLUSER_H
-+
-+#include <linux/types.h>
-+#include <linux/ioctl.h>
-+
-+#define KERN_VZ_PRIV_RANGE 51
-+
-+#ifndef __ENVID_T_DEFINED__
-+typedef unsigned envid_t;
-+#define __ENVID_T_DEFINED__
-+#endif
-+
-+/*
-+ * VE management ioctls
-+ */
-+
-+struct vzctl_old_env_create {
-+ envid_t veid;
-+ unsigned flags;
-+#define VE_CREATE 1 /* Create VE, VE_ENTER added automatically */
-+#define VE_EXCLUSIVE 2 /* Fail if exists */
-+#define VE_ENTER 4 /* Enter existing VE */
-+#define VE_TEST 8 /* Test if VE exists */
-+#define VE_LOCK 16 /* Do not allow entering created VE */
-+#define VE_SKIPLOCK 32 /* Allow entering embrion VE */
-+ __u32 addr;
-+};
-+
-+struct vzctl_mark_env_to_down {
-+ envid_t veid;
-+};
-+
-+struct vzctl_setdevperms {
-+ envid_t veid;
-+ unsigned type;
-+#define VE_USE_MAJOR 010 /* Test MAJOR supplied in rule */
-+#define VE_USE_MINOR 030 /* Test MINOR supplied in rule */
-+#define VE_USE_MASK 030 /* Testing mask, VE_USE_MAJOR|VE_USE_MINOR */
-+ unsigned dev;
-+ unsigned mask;
-+};
-+
-+struct vzctl_ve_netdev {
-+ envid_t veid;
-+ int op;
-+#define VE_NETDEV_ADD 1
-+#define VE_NETDEV_DEL 2
-+ char *dev_name;
-+};
-+
-+/* these masks represent modules */
-+#define VE_IP_IPTABLES_MOD (1U<<0)
-+#define VE_IP_FILTER_MOD (1U<<1)
-+#define VE_IP_MANGLE_MOD (1U<<2)
-+#define VE_IP_MATCH_LIMIT_MOD (1U<<3)
-+#define VE_IP_MATCH_MULTIPORT_MOD (1U<<4)
-+#define VE_IP_MATCH_TOS_MOD (1U<<5)
-+#define VE_IP_TARGET_TOS_MOD (1U<<6)
-+#define VE_IP_TARGET_REJECT_MOD (1U<<7)
-+#define VE_IP_TARGET_TCPMSS_MOD (1U<<8)
-+#define VE_IP_MATCH_TCPMSS_MOD (1U<<9)
-+#define VE_IP_MATCH_TTL_MOD (1U<<10)
-+#define VE_IP_TARGET_LOG_MOD (1U<<11)
-+#define VE_IP_MATCH_LENGTH_MOD (1U<<12)
-+#define VE_IP_CONNTRACK_MOD (1U<<14)
-+#define VE_IP_CONNTRACK_FTP_MOD (1U<<15)
-+#define VE_IP_CONNTRACK_IRC_MOD (1U<<16)
-+#define VE_IP_MATCH_CONNTRACK_MOD (1U<<17)
-+#define VE_IP_MATCH_STATE_MOD (1U<<18)
-+#define VE_IP_MATCH_HELPER_MOD (1U<<19)
-+#define VE_IP_NAT_MOD (1U<<20)
-+#define VE_IP_NAT_FTP_MOD (1U<<21)
-+#define VE_IP_NAT_IRC_MOD (1U<<22)
-+#define VE_IP_TARGET_REDIRECT_MOD (1U<<23)
-+
-+/* these masks represent modules with their dependences */
-+#define VE_IP_IPTABLES (VE_IP_IPTABLES_MOD)
-+#define VE_IP_FILTER (VE_IP_FILTER_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MANGLE (VE_IP_MANGLE_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_LIMIT (VE_IP_MATCH_LIMIT_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_MULTIPORT (VE_IP_MATCH_MULTIPORT_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_TOS (VE_IP_MATCH_TOS_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_TARGET_TOS (VE_IP_TARGET_TOS_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_TARGET_REJECT (VE_IP_TARGET_REJECT_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_TARGET_TCPMSS (VE_IP_TARGET_TCPMSS_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_TCPMSS (VE_IP_MATCH_TCPMSS_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_TTL (VE_IP_MATCH_TTL_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_TARGET_LOG (VE_IP_TARGET_LOG_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MATCH_LENGTH (VE_IP_MATCH_LENGTH_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_CONNTRACK (VE_IP_CONNTRACK_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_CONNTRACK_FTP (VE_IP_CONNTRACK_FTP_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_CONNTRACK_IRC (VE_IP_CONNTRACK_IRC_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_MATCH_CONNTRACK (VE_IP_MATCH_CONNTRACK_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_MATCH_STATE (VE_IP_MATCH_STATE_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_MATCH_HELPER (VE_IP_MATCH_HELPER_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_NAT (VE_IP_NAT_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_NAT_FTP (VE_IP_NAT_FTP_MOD \
-+ | VE_IP_NAT | VE_IP_CONNTRACK_FTP)
-+#define VE_IP_NAT_IRC (VE_IP_NAT_IRC_MOD \
-+ | VE_IP_NAT | VE_IP_CONNTRACK_IRC)
-+#define VE_IP_TARGET_REDIRECT (VE_IP_TARGET_REDIRECT_MOD \
-+ | VE_IP_NAT)
-+
-+/* safe iptables mask to be used by default */
-+#define VE_IP_DEFAULT \
-+ (VE_IP_IPTABLES | \
-+ VE_IP_FILTER | VE_IP_MANGLE | \
-+ VE_IP_MATCH_LIMIT | VE_IP_MATCH_MULTIPORT | \
-+ VE_IP_MATCH_TOS | VE_IP_TARGET_REJECT | \
-+ VE_IP_TARGET_TCPMSS | VE_IP_MATCH_TCPMSS | \
-+ VE_IP_MATCH_TTL | VE_IP_MATCH_LENGTH)
-+
-+#define VE_IPT_CMP(x,y) (((x) & (y)) == (y))
-+
-+struct vzctl_env_create_cid {
-+ envid_t veid;
-+ unsigned flags;
-+ __u32 class_id;
-+};
-+
-+struct vzctl_env_create {
-+ envid_t veid;
-+ unsigned flags;
-+ __u32 class_id;
-+};
-+
-+struct env_create_param {
-+ __u64 iptables_mask;
-+};
-+
-+#define VZCTL_ENV_CREATE_DATA_MINLEN sizeof(struct env_create_param)
-+
-+struct env_create_param2 {
-+ __u64 iptables_mask;
-+ __u64 feature_mask;
-+#define VE_FEATURE_SYSFS (1ULL << 0)
-+ __u32 total_vcpus; /* 0 - don't care, same as in host */
-+};
-+#define VZCTL_ENV_CREATE_DATA_MAXLEN sizeof(struct env_create_param2)
-+
-+typedef struct env_create_param2 env_create_param_t;
-+
-+struct vzctl_env_create_data {
-+ envid_t veid;
-+ unsigned flags;
-+ __u32 class_id;
-+ env_create_param_t *data;
-+ int datalen;
-+};
-+
-+struct vz_load_avg {
-+ int val_int;
-+ int val_frac;
-+};
-+
-+struct vz_cpu_stat {
-+ unsigned long user_jif;
-+ unsigned long nice_jif;
-+ unsigned long system_jif;
-+ unsigned long uptime_jif;
-+ __u64 idle_clk;
-+ __u64 strv_clk;
-+ __u64 uptime_clk;
-+ struct vz_load_avg avenrun[3]; /* loadavg data */
-+};
-+
-+struct vzctl_cpustatctl {
-+ envid_t veid;
-+ struct vz_cpu_stat *cpustat;
-+};
-+
-+#define VZCTLTYPE '.'
-+#define VZCTL_OLD_ENV_CREATE _IOW(VZCTLTYPE, 0, \
-+ struct vzctl_old_env_create)
-+#define VZCTL_MARK_ENV_TO_DOWN _IOW(VZCTLTYPE, 1, \
-+ struct vzctl_mark_env_to_down)
-+#define VZCTL_SETDEVPERMS _IOW(VZCTLTYPE, 2, \
-+ struct vzctl_setdevperms)
-+#define VZCTL_ENV_CREATE_CID _IOW(VZCTLTYPE, 4, \
-+ struct vzctl_env_create_cid)
-+#define VZCTL_ENV_CREATE _IOW(VZCTLTYPE, 5, \
-+ struct vzctl_env_create)
-+#define VZCTL_GET_CPU_STAT _IOW(VZCTLTYPE, 6, \
-+ struct vzctl_cpustatctl)
-+#define VZCTL_ENV_CREATE_DATA _IOW(VZCTLTYPE, 10, \
-+ struct vzctl_env_create_data)
-+#define VZCTL_VE_NETDEV _IOW(VZCTLTYPE, 11, \
-+ struct vzctl_ve_netdev)
-+
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/vzctl.h linux-2.6.16-026test009/include/linux/vzctl.h
---- linux-2.6.16.orig/include/linux/vzctl.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzctl.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,30 @@
-+/*
-+ * include/linux/vzctl.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_VZCTL_H
-+#define _LINUX_VZCTL_H
-+
-+#include <linux/list.h>
-+
-+struct module;
-+struct inode;
-+struct file;
-+struct vzioctlinfo {
-+ unsigned type;
-+ int (*func)(struct inode *, struct file *,
-+ unsigned int, unsigned long);
-+ struct module *owner;
-+ struct list_head list;
-+};
-+
-+extern void vzioctl_register(struct vzioctlinfo *inf);
-+extern void vzioctl_unregister(struct vzioctlinfo *inf);
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/vzctl_quota.h linux-2.6.16-026test009/include/linux/vzctl_quota.h
---- linux-2.6.16.orig/include/linux/vzctl_quota.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzctl_quota.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,43 @@
-+/*
-+ * include/linux/vzctl_quota.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __LINUX_VZCTL_QUOTA_H__
-+#define __LINUX_VZCTL_QUOTA_H__
-+
-+/*
-+ * Quota management ioctl
-+ */
-+
-+struct vz_quota_stat;
-+struct vzctl_quotactl {
-+ int cmd;
-+ unsigned int quota_id;
-+ struct vz_quota_stat *qstat;
-+ char *ve_root;
-+};
-+
-+struct vzctl_quotaugidctl {
-+ int cmd; /* subcommand */
-+ unsigned int quota_id; /* quota id where it applies to */
-+ unsigned int ugid_index;/* for reading statistic. index of first
-+ uid/gid record to read */
-+ unsigned int ugid_size; /* size of ugid_buf array */
-+ void *addr; /* user-level buffer */
-+};
-+
-+#define VZDQCTLTYPE '+'
-+#define VZCTL_QUOTA_CTL _IOWR(VZDQCTLTYPE, 1, \
-+ struct vzctl_quotactl)
-+#define VZCTL_QUOTA_NEW_CTL _IOWR(VZDQCTLTYPE, 2, \
-+ struct vzctl_quotactl)
-+#define VZCTL_QUOTA_UGID_CTL _IOWR(VZDQCTLTYPE, 3, \
-+ struct vzctl_quotaugidctl)
-+
-+#endif /* __LINUX_VZCTL_QUOTA_H__ */
-diff -upr linux-2.6.16.orig/include/linux/vzctl_venet.h linux-2.6.16-026test009/include/linux/vzctl_venet.h
---- linux-2.6.16.orig/include/linux/vzctl_venet.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzctl_venet.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,36 @@
-+/*
-+ * include/linux/vzctl_venet.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _VZCTL_VENET_H
-+#define _VZCTL_VENET_H
-+
-+#include <linux/types.h>
-+#include <linux/ioctl.h>
-+
-+#ifndef __ENVID_T_DEFINED__
-+typedef unsigned envid_t;
-+#define __ENVID_T_DEFINED__
-+#endif
-+
-+struct vzctl_ve_ip_map {
-+ envid_t veid;
-+ int op;
-+#define VE_IP_ADD 1
-+#define VE_IP_DEL 2
-+ struct sockaddr *addr;
-+ int addrlen;
-+};
-+
-+#define VENETCTLTYPE '('
-+
-+#define VENETCTL_VE_IP_MAP _IOW(VENETCTLTYPE, 3, \
-+ struct vzctl_ve_ip_map)
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/linux/vzdq_tree.h linux-2.6.16-026test009/include/linux/vzdq_tree.h
---- linux-2.6.16.orig/include/linux/vzdq_tree.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzdq_tree.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,99 @@
-+/*
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains Virtuozzo disk quota tree definition
-+ */
-+
-+#ifndef _VZDQ_TREE_H
-+#define _VZDQ_TREE_H
-+
-+#include <linux/list.h>
-+#include <asm/string.h>
-+
-+typedef unsigned int quotaid_t;
-+#define QUOTAID_BITS 32
-+#define QUOTAID_BBITS 4
-+#define QUOTAID_EBITS 8
-+
-+#if QUOTAID_EBITS % QUOTAID_BBITS
-+#error Quota bit assumption failure
-+#endif
-+
-+#define QUOTATREE_BSIZE (1 << QUOTAID_BBITS)
-+#define QUOTATREE_BMASK (QUOTATREE_BSIZE - 1)
-+#define QUOTATREE_DEPTH ((QUOTAID_BITS + QUOTAID_BBITS - 1) \
-+ / QUOTAID_BBITS)
-+#define QUOTATREE_EDEPTH ((QUOTAID_BITS + QUOTAID_EBITS - 1) \
-+ / QUOTAID_EBITS)
-+#define QUOTATREE_BSHIFT(lvl) ((QUOTATREE_DEPTH - (lvl) - 1) * QUOTAID_BBITS)
-+
-+/*
-+ * Depth of keeping unused node (not inclusive).
-+ * 0 means release all nodes including root,
-+ * QUOTATREE_DEPTH means never release nodes.
-+ * Current value: release all nodes strictly after QUOTATREE_EDEPTH
-+ * (measured in external shift units).
-+ */
-+#define QUOTATREE_CDEPTH (QUOTATREE_DEPTH \
-+ - 2 * QUOTATREE_DEPTH / QUOTATREE_EDEPTH \
-+ + 1)
-+
-+/*
-+ * Levels 0..(QUOTATREE_DEPTH-1) are tree nodes.
-+ * On level i the maximal number of nodes is 2^(i*QUOTAID_BBITS),
-+ * and each node contains 2^QUOTAID_BBITS pointers.
-+ * Level 0 is a (single) tree root node.
-+ *
-+ * Nodes of level (QUOTATREE_DEPTH-1) contain pointers to caller's data.
-+ * Nodes of lower levels contain pointers to nodes.
-+ *
-+ * Double pointer in array of i-level node, pointing to a (i+1)-level node
-+ * (such as inside quotatree_find_state) are marked by level (i+1), not i.
-+ * Level 0 double pointer is a pointer to root inside tree struct.
-+ *
-+ * The tree is permanent, i.e. all index blocks allocated are keeped alive to
-+ * preserve the blocks numbers in the quota file tree to keep its changes
-+ * locally.
-+ */
-+struct quotatree_node {
-+ struct list_head list;
-+ quotaid_t num;
-+ void *blocks[QUOTATREE_BSIZE];
-+};
-+
-+struct quotatree_level {
-+ struct list_head usedlh, freelh;
-+ quotaid_t freenum;
-+};
-+
-+struct quotatree_tree {
-+ struct quotatree_level levels[QUOTATREE_DEPTH];
-+ struct quotatree_node *root;
-+ unsigned int leaf_num;
-+};
-+
-+struct quotatree_find_state {
-+ void **block;
-+ int level;
-+};
-+
-+/* number of leafs (objects) and leaf level of the tree */
-+#define QTREE_LEAFNUM(tree) ((tree)->leaf_num)
-+#define QTREE_LEAFLVL(tree) (&(tree)->levels[QUOTATREE_DEPTH - 1])
-+
-+struct quotatree_tree *quotatree_alloc(void);
-+void *quotatree_find(struct quotatree_tree *tree, quotaid_t id,
-+ struct quotatree_find_state *st);
-+int quotatree_insert(struct quotatree_tree *tree, quotaid_t id,
-+ struct quotatree_find_state *st, void *data);
-+void quotatree_remove(struct quotatree_tree *tree, quotaid_t id);
-+void quotatree_free(struct quotatree_tree *tree, void (*dtor)(void *));
-+void *quotatree_get_next(struct quotatree_tree *tree, quotaid_t id);
-+void *quotatree_leaf_byindex(struct quotatree_tree *tree, unsigned int index);
-+
-+#endif /* _VZDQ_TREE_H */
-+
-diff -upr linux-2.6.16.orig/include/linux/vzquota.h linux-2.6.16-026test009/include/linux/vzquota.h
---- linux-2.6.16.orig/include/linux/vzquota.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzquota.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,291 @@
-+/*
-+ *
-+ * Copyright (C) 2001-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * This file contains Virtuozzo disk quota implementation
-+ */
-+
-+#ifndef _VZDQUOTA_H
-+#define _VZDQUOTA_H
-+
-+#include <linux/types.h>
-+#include <linux/quota.h>
-+
-+/* vzquotactl syscall commands */
-+#define VZ_DQ_CREATE 5 /* create quota master block */
-+#define VZ_DQ_DESTROY 6 /* destroy qmblk */
-+#define VZ_DQ_ON 7 /* mark dentry with already created qmblk */
-+#define VZ_DQ_OFF 8 /* remove mark, don't destroy qmblk */
-+#define VZ_DQ_SETLIMIT 9 /* set new limits */
-+#define VZ_DQ_GETSTAT 10 /* get usage statistic */
-+/* set of syscalls to maintain UGID quotas */
-+#define VZ_DQ_UGID_GETSTAT 1 /* get usage/limits for ugid(s) */
-+#define VZ_DQ_UGID_ADDSTAT 2 /* set usage/limits statistic for ugid(s) */
-+#define VZ_DQ_UGID_GETGRACE 3 /* get expire times */
-+#define VZ_DQ_UGID_SETGRACE 4 /* set expire times */
-+#define VZ_DQ_UGID_GETCONFIG 5 /* get ugid_max limit, cnt, flags of qmblk */
-+#define VZ_DQ_UGID_SETCONFIG 6 /* set ugid_max limit, flags of qmblk */
-+#define VZ_DQ_UGID_SETLIMIT 7 /* set ugid B/I limits */
-+#define VZ_DQ_UGID_SETINFO 8 /* set ugid info */
-+
-+/* common structure for vz and ugid quota */
-+struct dq_stat {
-+ /* blocks limits */
-+ __u64 bhardlimit; /* absolute limit in bytes */
-+ __u64 bsoftlimit; /* preferred limit in bytes */
-+ time_t btime; /* time limit for excessive disk use */
-+ __u64 bcurrent; /* current bytes count */
-+ /* inodes limits */
-+ __u32 ihardlimit; /* absolute limit on allocated inodes */
-+ __u32 isoftlimit; /* preferred inode limit */
-+ time_t itime; /* time limit for excessive inode use */
-+ __u32 icurrent; /* current # allocated inodes */
-+};
-+
-+/* One second resolution for grace times */
-+#define CURRENT_TIME_SECONDS (get_seconds())
-+
-+/* Values for dq_info->flags */
-+#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
-+#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
-+
-+struct dq_info {
-+ time_t bexpire; /* expire timeout for excessive disk use */
-+ time_t iexpire; /* expire timeout for excessive inode use */
-+ unsigned flags; /* see previos defines */
-+};
-+
-+struct vz_quota_stat {
-+ struct dq_stat dq_stat;
-+ struct dq_info dq_info;
-+};
-+
-+/* UID/GID interface record - for user-kernel level exchange */
-+struct vz_quota_iface {
-+ unsigned int qi_id; /* UID/GID this applies to */
-+ unsigned int qi_type; /* USRQUOTA|GRPQUOTA */
-+ struct dq_stat qi_stat; /* limits, options, usage stats */
-+};
-+
-+/* values for flags and dq_flags */
-+/* this flag is set if the userspace has been unable to provide usage
-+ * information about all ugids
-+ * if the flag is set, we don't allocate new UG quota blocks (their
-+ * current usage is unknown) or free existing UG quota blocks (not to
-+ * lose information that this block is ok) */
-+#define VZDQUG_FIXED_SET 0x01
-+/* permit to use ugid quota */
-+#define VZDQUG_ON 0x02
-+#define VZDQ_USRQUOTA 0x10
-+#define VZDQ_GRPQUOTA 0x20
-+#define VZDQ_NOACT 0x1000 /* not actual */
-+#define VZDQ_NOQUOT 0x2000 /* not under quota tree */
-+
-+struct vz_quota_ugid_stat {
-+ unsigned int limit; /* max amount of ugid records */
-+ unsigned int count; /* amount of ugid records */
-+ unsigned int flags;
-+};
-+
-+struct vz_quota_ugid_setlimit {
-+ unsigned int type; /* quota type (USR/GRP) */
-+ unsigned int id; /* ugid */
-+ struct if_dqblk dqb; /* limits info */
-+};
-+
-+struct vz_quota_ugid_setinfo {
-+ unsigned int type; /* quota type (USR/GRP) */
-+ struct if_dqinfo dqi; /* grace info */
-+};
-+
-+#ifdef __KERNEL__
-+#include <linux/list.h>
-+#include <asm/atomic.h>
-+#include <asm/semaphore.h>
-+#include <linux/time.h>
-+#include <linux/vzquota_qlnk.h>
-+#include <linux/vzdq_tree.h>
-+
-+/* Values for dq_info flags */
-+#define VZ_QUOTA_INODES 0x01 /* inodes limit warning printed */
-+#define VZ_QUOTA_SPACE 0x02 /* space limit warning printed */
-+
-+/* values for dq_state */
-+#define VZDQ_STARTING 0 /* created, not turned on yet */
-+#define VZDQ_WORKING 1 /* quota created, turned on */
-+#define VZDQ_STOPING 2 /* created, turned on and off */
-+
-+/* master quota record - one per veid */
-+struct vz_quota_master {
-+ struct list_head dq_hash; /* next quota in hash list */
-+ atomic_t dq_count; /* inode reference count */
-+ unsigned int dq_flags; /* see VZDQUG_FIXED_SET */
-+ unsigned int dq_state; /* see values above */
-+ unsigned int dq_id; /* VEID this applies to */
-+ struct dq_stat dq_stat; /* limits, grace, usage stats */
-+ struct dq_info dq_info; /* grace times and flags */
-+ spinlock_t dq_data_lock; /* for dq_stat */
-+
-+ struct semaphore dq_sem; /* semaphore to protect
-+ ugid tree */
-+
-+ struct list_head dq_ilink_list; /* list of vz_quota_ilink */
-+ struct quotatree_tree *dq_uid_tree; /* vz_quota_ugid tree for UIDs */
-+ struct quotatree_tree *dq_gid_tree; /* vz_quota_ugid tree for GIDs */
-+ unsigned int dq_ugid_count; /* amount of ugid records */
-+ unsigned int dq_ugid_max; /* max amount of ugid records */
-+ struct dq_info dq_ugid_info[MAXQUOTAS]; /* ugid grace times */
-+
-+ struct dentry *dq_root_dentry;/* dentry of fs tree */
-+ struct vfsmount *dq_root_mnt; /* vfsmnt of this dentry */
-+ struct super_block *dq_sb; /* superblock of our quota root */
-+};
-+
-+/* UID/GID quota record - one per pair (quota_master, uid or gid) */
-+struct vz_quota_ugid {
-+ unsigned int qugid_id; /* UID/GID this applies to */
-+ struct dq_stat qugid_stat; /* limits, options, usage stats */
-+ int qugid_type; /* USRQUOTA|GRPQUOTA */
-+ atomic_t qugid_count; /* reference count */
-+};
-+
-+#define VZ_QUOTA_UGBAD ((struct vz_quota_ugid *)0xfeafea11)
-+
-+struct vz_quota_datast {
-+ struct vz_quota_ilink qlnk;
-+};
-+
-+#define VIRTINFO_QUOTA_GETSTAT 0
-+#define VIRTINFO_QUOTA_ON 1
-+#define VIRTINFO_QUOTA_OFF 2
-+
-+struct virt_info_quota {
-+ struct super_block *super;
-+ struct dq_stat *qstat;
-+};
-+
-+/*
-+ * Interface to VZ quota core
-+ */
-+#define INODE_QLNK(inode) (&(inode)->i_qlnk)
-+#define QLNK_INODE(qlnk) container_of((qlnk), struct inode, i_qlnk)
-+
-+#define VZ_QUOTA_BAD ((struct vz_quota_master *)0xefefefef)
-+
-+#define VZ_QUOTAO_SETE 1
-+#define VZ_QUOTAO_INIT 2
-+#define VZ_QUOTAO_DESTR 3
-+#define VZ_QUOTAO_SWAP 4
-+#define VZ_QUOTAO_INICAL 5
-+#define VZ_QUOTAO_DRCAL 6
-+#define VZ_QUOTAO_QSET 7
-+#define VZ_QUOTAO_TRANS 8
-+#define VZ_QUOTAO_ACT 9
-+#define VZ_QUOTAO_DTREE 10
-+#define VZ_QUOTAO_DET 11
-+#define VZ_QUOTAO_ON 12
-+
-+extern struct semaphore vz_quota_sem;
-+void inode_qmblk_lock(struct super_block *sb);
-+void inode_qmblk_unlock(struct super_block *sb);
-+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
-+void qmblk_data_read_unlock(struct vz_quota_master *qmblk);
-+void qmblk_data_write_lock(struct vz_quota_master *qmblk);
-+void qmblk_data_write_unlock(struct vz_quota_master *qmblk);
-+
-+/* for quota operations */
-+void vzquota_inode_init_call(struct inode *inode);
-+void vzquota_inode_drop_call(struct inode *inode);
-+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
-+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
-+ struct vz_quota_datast *);
-+void vzquota_data_unlock(struct inode *inode, struct vz_quota_datast *);
-+int vzquota_rename_check(struct inode *inode,
-+ struct inode *old_dir, struct inode *new_dir);
-+struct vz_quota_master *vzquota_inode_qmblk(struct inode *inode);
-+/* for second-level quota */
-+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
-+/* for management operations */
-+struct vz_quota_master *vzquota_alloc_master(unsigned int quota_id,
-+ struct vz_quota_stat *qstat);
-+void vzquota_free_master(struct vz_quota_master *);
-+struct vz_quota_master *vzquota_find_master(unsigned int quota_id);
-+int vzquota_on_qmblk(struct super_block *sb, struct inode *inode,
-+ struct vz_quota_master *qmblk);
-+int vzquota_off_qmblk(struct super_block *sb, struct vz_quota_master *qmblk);
-+int vzquota_get_super(struct super_block *sb);
-+void vzquota_put_super(struct super_block *sb);
-+
-+static inline struct vz_quota_master *qmblk_get(struct vz_quota_master *qmblk)
-+{
-+ if (!atomic_read(&qmblk->dq_count))
-+ BUG();
-+ atomic_inc(&qmblk->dq_count);
-+ return qmblk;
-+}
-+
-+static inline void __qmblk_put(struct vz_quota_master *qmblk)
-+{
-+ atomic_dec(&qmblk->dq_count);
-+}
-+
-+static inline void qmblk_put(struct vz_quota_master *qmblk)
-+{
-+ if (!atomic_dec_and_test(&qmblk->dq_count))
-+ return;
-+ vzquota_free_master(qmblk);
-+}
-+
-+extern struct list_head vzquota_hash_table[];
-+extern int vzquota_hash_size;
-+
-+/*
-+ * Interface to VZ UGID quota
-+ */
-+extern struct quotactl_ops vz_quotactl_operations;
-+extern struct dquot_operations vz_quota_operations2;
-+extern struct quota_format_type vz_quota_empty_v2_format;
-+
-+#define QUGID_TREE(qmblk, type) (((type) == USRQUOTA) ? \
-+ qmblk->dq_uid_tree : \
-+ qmblk->dq_gid_tree)
-+
-+#define VZDQUG_FIND_DONT_ALLOC 1
-+#define VZDQUG_FIND_FAKE 2
-+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
-+ unsigned int quota_id, int type, int flags);
-+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
-+ unsigned int quota_id, int type, int flags);
-+struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid);
-+void vzquota_put_ugid(struct vz_quota_master *qmblk,
-+ struct vz_quota_ugid *qugid);
-+void vzquota_kill_ugid(struct vz_quota_master *qmblk);
-+int vzquota_ugid_init(void);
-+void vzquota_ugid_release(void);
-+int vzquota_transfer_usage(struct inode *inode, int mask,
-+ struct vz_quota_ilink *qlnk);
-+
-+struct vzctl_quotaugidctl;
-+long do_vzquotaugidctl(struct vzctl_quotaugidctl *qub);
-+
-+/*
-+ * Other VZ quota parts
-+ */
-+extern struct dquot_operations vz_quota_operations;
-+
-+long do_vzquotactl(int cmd, unsigned int quota_id,
-+ struct vz_quota_stat *qstat, const char *ve_root);
-+int vzquota_proc_init(void);
-+void vzquota_proc_release(void);
-+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
-+extern struct semaphore vz_quota_sem;
-+
-+void vzaquota_init(void);
-+void vzaquota_fini(void);
-+
-+#endif /* __KERNEL__ */
-+
-+#endif /* _VZDQUOTA_H */
-diff -upr linux-2.6.16.orig/include/linux/vzquota_qlnk.h linux-2.6.16-026test009/include/linux/vzquota_qlnk.h
---- linux-2.6.16.orig/include/linux/vzquota_qlnk.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzquota_qlnk.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,25 @@
-+/*
-+ * include/linux/vzquota_qlnk.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _VZDQUOTA_QLNK_H
-+#define _VZDQUOTA_QLNK_H
-+
-+struct vz_quota_master;
-+struct vz_quota_ugid;
-+
-+/* inode link, used to track inodes using quota via dq_ilink_list */
-+struct vz_quota_ilink {
-+ struct vz_quota_master *qmblk;
-+ struct vz_quota_ugid *qugid[MAXQUOTAS];
-+ struct list_head list;
-+ unsigned char origin;
-+};
-+
-+#endif /* _VZDQUOTA_QLNK_H */
-diff -upr linux-2.6.16.orig/include/linux/vzratelimit.h linux-2.6.16-026test009/include/linux/vzratelimit.h
---- linux-2.6.16.orig/include/linux/vzratelimit.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzratelimit.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,28 @@
-+/*
-+ * include/linux/vzratelimit.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VZ_RATELIMIT_H__
-+#define __VZ_RATELIMIT_H__
-+
-+/*
-+ * Generic ratelimiting stuff.
-+ */
-+
-+struct vz_rate_info {
-+ int burst;
-+ int interval; /* jiffy_t per event */
-+ int bucket; /* kind of leaky bucket */
-+ unsigned long last; /* last event */
-+};
-+
-+/* Return true if rate limit permits. */
-+int vz_ratelimit(struct vz_rate_info *p);
-+
-+#endif /* __VZ_RATELIMIT_H__ */
-diff -upr linux-2.6.16.orig/include/linux/vzstat.h linux-2.6.16-026test009/include/linux/vzstat.h
---- linux-2.6.16.orig/include/linux/vzstat.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/linux/vzstat.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,182 @@
-+/*
-+ * include/linux/vzstat.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __VZSTAT_H__
-+#define __VZSTAT_H__
-+
-+struct swap_cache_info_struct {
-+ unsigned long add_total;
-+ unsigned long del_total;
-+ unsigned long find_success;
-+ unsigned long find_total;
-+ unsigned long noent_race;
-+ unsigned long exist_race;
-+ unsigned long remove_race;
-+};
-+
-+struct kstat_lat_snap_struct {
-+ cycles_t maxlat, totlat;
-+ unsigned long count;
-+};
-+struct kstat_lat_pcpu_snap_struct {
-+ cycles_t maxlat, totlat;
-+ unsigned long count;
-+ seqcount_t lock;
-+} ____cacheline_aligned_in_smp;
-+
-+struct kstat_lat_struct {
-+ struct kstat_lat_snap_struct cur, last;
-+ cycles_t avg[3];
-+};
-+struct kstat_lat_pcpu_struct {
-+ struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
-+ cycles_t max_snap;
-+ struct kstat_lat_snap_struct last;
-+ cycles_t avg[3];
-+};
-+
-+struct kstat_perf_snap_struct {
-+ cycles_t wall_tottime, cpu_tottime;
-+ cycles_t wall_maxdur, cpu_maxdur;
-+ unsigned long count;
-+};
-+struct kstat_perf_struct {
-+ struct kstat_perf_snap_struct cur, last;
-+};
-+
-+struct kstat_zone_avg {
-+ unsigned long free_pages_avg[3],
-+ nr_active_avg[3],
-+ nr_inactive_avg[3];
-+};
-+
-+#define KSTAT_ALLOCSTAT_NR 5
-+
-+struct kernel_stat_glob {
-+ unsigned long nr_unint_avg[3];
-+
-+ unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
-+ struct kstat_lat_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
-+ struct kstat_lat_pcpu_struct sched_lat;
-+ struct kstat_lat_struct swap_in;
-+
-+ struct kstat_perf_struct ttfp, cache_reap,
-+ refill_inact, shrink_icache, shrink_dcache;
-+
-+ struct kstat_zone_avg zone_avg[3]; /* MAX_NR_ZONES */
-+} ____cacheline_aligned;
-+
-+extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
-+extern spinlock_t kstat_glb_lock;
-+
-+#ifdef CONFIG_VE
-+#define KSTAT_PERF_ENTER(name) \
-+ unsigned long flags; \
-+ cycles_t start, sleep_time; \
-+ \
-+ start = get_cycles(); \
-+ sleep_time = VE_TASK_INFO(current)->sleep_time; \
-+
-+#define KSTAT_PERF_LEAVE(name) \
-+ spin_lock_irqsave(&kstat_glb_lock, flags); \
-+ kstat_glob.name.cur.count++; \
-+ start = get_cycles() - start; \
-+ if (kstat_glob.name.cur.wall_maxdur < start) \
-+ kstat_glob.name.cur.wall_maxdur = start;\
-+ kstat_glob.name.cur.wall_tottime += start; \
-+ start -= VE_TASK_INFO(current)->sleep_time - \
-+ sleep_time; \
-+ if (kstat_glob.name.cur.cpu_maxdur < start) \
-+ kstat_glob.name.cur.cpu_maxdur = start; \
-+ kstat_glob.name.cur.cpu_tottime += start; \
-+ spin_unlock_irqrestore(&kstat_glb_lock, flags); \
-+
-+#else
-+#define KSTAT_PERF_ENTER(name)
-+#define KSTAT_PERF_LEAVE(name)
-+#endif
-+
-+/*
-+ * Add another statistics reading.
-+ * Serialization is the caller's due.
-+ */
-+static inline void KSTAT_LAT_ADD(struct kstat_lat_struct *p,
-+ cycles_t dur)
-+{
-+ p->cur.count++;
-+ if (p->cur.maxlat < dur)
-+ p->cur.maxlat = dur;
-+ p->cur.totlat += dur;
-+}
-+
-+static inline void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, int cpu,
-+ cycles_t dur)
-+{
-+ struct kstat_lat_pcpu_snap_struct *cur;
-+
-+ cur = &p->cur[cpu];
-+ write_seqcount_begin(&cur->lock);
-+ cur->count++;
-+ if (cur->maxlat < dur)
-+ cur->maxlat = dur;
-+ cur->totlat += dur;
-+ write_seqcount_end(&cur->lock);
-+}
-+
-+/*
-+ * Move current statistics to last, clear last.
-+ * Serialization is the caller's due.
-+ */
-+static inline void KSTAT_LAT_UPDATE(struct kstat_lat_struct *p)
-+{
-+ cycles_t m;
-+ memcpy(&p->last, &p->cur, sizeof(p->last));
-+ p->cur.maxlat = 0;
-+ m = p->last.maxlat;
-+ CALC_LOAD(p->avg[0], EXP_1, m)
-+ CALC_LOAD(p->avg[1], EXP_5, m)
-+ CALC_LOAD(p->avg[2], EXP_15, m)
-+}
-+
-+static inline void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
-+{
-+ unsigned i, cpu;
-+ struct kstat_lat_pcpu_snap_struct snap, *cur;
-+ cycles_t m;
-+
-+ memset(&p->last, 0, sizeof(p->last));
-+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+ cur = &p->cur[cpu];
-+ do {
-+ i = read_seqcount_begin(&cur->lock);
-+ memcpy(&snap, cur, sizeof(snap));
-+ } while (read_seqcount_retry(&cur->lock, i));
-+ /*
-+ * read above and this update of maxlat is not atomic,
-+ * but this is OK, since it happens rarely and losing
-+ * a couple of peaks is not essential. xemul
-+ */
-+ cur->maxlat = 0;
-+
-+ p->last.count += snap.count;
-+ p->last.totlat += snap.totlat;
-+ if (p->last.maxlat < snap.maxlat)
-+ p->last.maxlat = snap.maxlat;
-+ }
-+
-+ m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
-+ CALC_LOAD(p->avg[0], EXP_1, m);
-+ CALC_LOAD(p->avg[1], EXP_5, m);
-+ CALC_LOAD(p->avg[2], EXP_15, m);
-+ /* reset max_snap to calculate it correctly next time */
-+ p->max_snap = 0;
-+}
-+
-+#endif /* __VZSTAT_H__ */
-diff -upr linux-2.6.16.orig/include/net/af_unix.h linux-2.6.16-026test009/include/net/af_unix.h
---- linux-2.6.16.orig/include/net/af_unix.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/af_unix.h 2006-04-19 15:02:12.000000000 +0400
-@@ -19,23 +19,37 @@ extern atomic_t unix_tot_inflight;
-
- static inline struct sock *first_unix_socket(int *i)
- {
-+ struct sock *s;
-+ struct ve_struct *ve;
-+
-+ ve = get_exec_env();
- for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-- if (!hlist_empty(&unix_socket_table[*i]))
-- return __sk_head(&unix_socket_table[*i]);
-+ for (s = sk_head(&unix_socket_table[*i]);
-+ s != NULL && !ve_accessible(s->sk_owner_env, ve);
-+ s = sk_next(s));
-+ if (s != NULL)
-+ return s;
- }
- return NULL;
- }
-
- static inline struct sock *next_unix_socket(int *i, struct sock *s)
- {
-- struct sock *next = sk_next(s);
-- /* More in this chain? */
-- if (next)
-- return next;
-+ struct ve_struct *ve;
-+
-+ ve = get_exec_env();
-+ for (s = sk_next(s); s != NULL; s = sk_next(s)) {
-+ if (!ve_accessible(s->sk_owner_env, ve))
-+ continue;
-+ return s;
-+ }
- /* Look for next non-empty chain. */
- for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-- if (!hlist_empty(&unix_socket_table[*i]))
-- return __sk_head(&unix_socket_table[*i]);
-+ for (s = sk_head(&unix_socket_table[*i]);
-+ s != NULL && !ve_accessible(s->sk_owner_env, ve);
-+ s = sk_next(s));
-+ if (s != NULL)
-+ return s;
- }
- return NULL;
- }
-diff -upr linux-2.6.16.orig/include/net/compat.h linux-2.6.16-026test009/include/net/compat.h
---- linux-2.6.16.orig/include/net/compat.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/compat.h 2006-04-19 15:02:11.000000000 +0400
-@@ -23,6 +23,14 @@ struct compat_cmsghdr {
- compat_int_t cmsg_type;
- };
-
-+#if defined(CONFIG_X86_64)
-+#define is_current_32bits() (current_thread_info()->flags & _TIF_IA32)
-+#elif defined(CONFIG_IA64)
-+#define is_current_32bits() (IS_IA32_PROCESS(ia64_task_regs(current)))
-+#else
-+#define is_current_32bits() 0
-+#endif
-+
- #else /* defined(CONFIG_COMPAT) */
- #define compat_msghdr msghdr /* to avoid compiler warnings */
- #endif /* defined(CONFIG_COMPAT) */
-diff -upr linux-2.6.16.orig/include/net/flow.h linux-2.6.16-026test009/include/net/flow.h
---- linux-2.6.16.orig/include/net/flow.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/flow.h 2006-04-19 15:02:12.000000000 +0400
-@@ -10,6 +10,7 @@
- #include <linux/in6.h>
- #include <asm/atomic.h>
-
-+struct ve_struct;
- struct flowi {
- int oif;
- int iif;
-@@ -78,6 +79,9 @@ struct flowi {
- #define fl_icmp_type uli_u.icmpt.type
- #define fl_icmp_code uli_u.icmpt.code
- #define fl_ipsec_spi uli_u.spi
-+#ifdef CONFIG_VE
-+ struct ve_struct *owner_env;
-+#endif
- } __attribute__((__aligned__(BITS_PER_LONG/8)));
-
- #define FLOW_DIR_IN 0
-diff -upr linux-2.6.16.orig/include/net/icmp.h linux-2.6.16-026test009/include/net/icmp.h
---- linux-2.6.16.orig/include/net/icmp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/icmp.h 2006-04-19 15:02:12.000000000 +0400
-@@ -31,9 +31,14 @@ struct icmp_err {
-
- extern struct icmp_err icmp_err_convert[];
- DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
--#define ICMP_INC_STATS(field) SNMP_INC_STATS(icmp_statistics, field)
--#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field)
--#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field)
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_icmp_statistics (get_exec_env()->_icmp_statistics)
-+#else
-+#define ve_icmp_statistics icmp_statistics
-+#endif
-+#define ICMP_INC_STATS(field) SNMP_INC_STATS(ve_icmp_statistics, field)
-+#define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_icmp_statistics, field)
-+#define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_icmp_statistics, field)
-
- struct dst_entry;
- struct net_proto_family;
-diff -upr linux-2.6.16.orig/include/net/inet_hashtables.h linux-2.6.16-026test009/include/net/inet_hashtables.h
---- linux-2.6.16.orig/include/net/inet_hashtables.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/inet_hashtables.h 2006-04-19 15:02:12.000000000 +0400
-@@ -24,6 +24,7 @@
- #include <linux/spinlock.h>
- #include <linux/types.h>
- #include <linux/wait.h>
-+#include <linux/ve_owner.h>
-
- #include <net/inet_connection_sock.h>
- #include <net/inet_sock.h>
-@@ -75,11 +76,13 @@ struct inet_ehash_bucket {
- * ports are created in O(1) time? I thought so. ;-) -DaveM
- */
- struct inet_bind_bucket {
-+ struct ve_struct *owner_env;
- unsigned short port;
- signed short fastreuse;
- struct hlist_node node;
- struct hlist_head owners;
- };
-+DCL_VE_OWNER_PROTO(TB, struct inet_bind_bucket, owner_env)
-
- #define inet_bind_bucket_for_each(tb, node, head) \
- hlist_for_each_entry(tb, node, head, node)
-@@ -139,37 +142,43 @@ static inline struct inet_ehash_bucket *
- extern struct inet_bind_bucket *
- inet_bind_bucket_create(kmem_cache_t *cachep,
- struct inet_bind_hashbucket *head,
-- const unsigned short snum);
-+ const unsigned short snum,
-+ struct ve_struct *env);
- extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
- struct inet_bind_bucket *tb);
-
--static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
-+static inline int inet_bhashfn(const __u16 lport, const int bhash_size,
-+ unsigned veid)
- {
-- return lport & (bhash_size - 1);
-+ return ((lport + (veid ^ (veid >> 16))) & (bhash_size - 1));
- }
-
- extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum);
-
- /* These can have wildcards, don't try too hard. */
--static inline int inet_lhashfn(const unsigned short num)
-+static inline int inet_lhashfn(const unsigned short num, unsigned veid)
- {
-- return num & (INET_LHTABLE_SIZE - 1);
-+ return ((num + (veid ^ (veid >> 16))) & (INET_LHTABLE_SIZE - 1));
- }
-
- static inline int inet_sk_listen_hashfn(const struct sock *sk)
- {
-- return inet_lhashfn(inet_sk(sk)->num);
-+ return inet_lhashfn(inet_sk(sk)->num, VEID(VE_OWNER_SK(sk)));
- }
-
- /* Caller must disable local BH processing. */
- static inline void __inet_inherit_port(struct inet_hashinfo *table,
- struct sock *sk, struct sock *child)
- {
-- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
-- struct inet_bind_hashbucket *head = &table->bhash[bhash];
-+ int bhash;
-+ struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
-
-+ bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size,
-+ VEID(VE_OWNER_SK(child)));
-+ head = &table->bhash[bhash];
-+
- spin_lock(&head->lock);
- tb = inet_csk(sk)->icsk_bind_hash;
- sk_add_bind_node(child, &tb->owners);
-@@ -275,7 +284,8 @@ static inline int inet_iif(const struct
- extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
- const u32 daddr,
- const unsigned short hnum,
-- const int dif);
-+ const int dif,
-+ struct ve_struct *env);
-
- /* Optimize the common listener case. */
- static inline struct sock *
-@@ -285,18 +295,21 @@ static inline struct sock *
- {
- struct sock *sk = NULL;
- const struct hlist_head *head;
-+ struct ve_struct *env;
-
-+ env = get_exec_env();
- read_lock(&hashinfo->lhash_lock);
-- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
-+ head = &hashinfo->listening_hash[inet_lhashfn(hnum, VEID(env))];
- if (!hlist_empty(head)) {
- const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
- if (inet->num == hnum && !sk->sk_node.next &&
-+ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
- (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
- (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
- !sk->sk_bound_dev_if)
- goto sherry_cache;
-- sk = __inet_lookup_listener(head, daddr, hnum, dif);
-+ sk = __inet_lookup_listener(head, daddr, hnum, dif, env);
- }
- if (sk) {
- sherry_cache:
-@@ -323,25 +336,25 @@ sherry_cache:
- #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
- const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
- #endif /* __BIG_ENDIAN */
--#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
- (((__sk)->sk_hash == (__hash)) && \
- ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
- ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
--#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
-+#define INET_TW_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
- (((__sk)->sk_hash == (__hash)) && \
- ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
- ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- #else /* 32-bit arch */
- #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
--#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
-+#define INET_MATCH_ALLVE(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
- (((__sk)->sk_hash == (__hash)) && \
- (inet_sk(__sk)->daddr == (__saddr)) && \
- (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
- ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
--#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
-+#define INET_TW_MATCH_ALLVE(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
- (((__sk)->sk_hash == (__hash)) && \
- (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
- (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
-@@ -349,6 +362,18 @@ sherry_cache:
- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- #endif /* 64-bit arch */
-
-+#define INET_MATCH(__sk, __hash, __cookie, __saddr, \
-+ __daddr, __ports, __dif, __ve) \
-+ (INET_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr), \
-+ (__daddr), (__ports), (__dif)) \
-+ && ve_accessible_strict(VE_OWNER_SK(__sk), (__ve)))
-+
-+#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, \
-+ __daddr, __ports, __dif, __ve) \
-+ (INET_TW_MATCH_ALLVE((__sk), (__hash), (__cookie), (__saddr), \
-+ (__daddr), (__ports), (__dif)) \
-+ && ve_accessible_strict(inet_twsk(__sk)->tw_owner_env, VEID(__ve)))
-+
- /*
- * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
- * not check it for lookups anymore, thanks Alexey. -DaveM
-@@ -368,19 +393,25 @@ static inline struct sock *
- /* Optimize here for direct hit, only listening connections can
- * have wildcards anyways.
- */
-- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
-- struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
--
-+ unsigned int hash;
-+ struct inet_ehash_bucket *head;
-+ struct ve_struct *env;
-+
-+ env = get_exec_env();
-+ hash = inet_ehashfn(daddr, hnum, saddr, sport, VEID(env));
-+ head = inet_ehash_bucket(hashinfo, hash);
- prefetch(head->chain.first);
- read_lock(&head->lock);
- sk_for_each(sk, node, &head->chain) {
-- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
-+ if (INET_MATCH(sk, hash, acookie, saddr, daddr,
-+ ports, dif, env))
- goto hit; /* You sunk my battleship! */
- }
-
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
-- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
-+ if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr,
-+ ports, dif, env))
- goto hit;
- }
- sk = NULL;
-diff -upr linux-2.6.16.orig/include/net/inet_sock.h linux-2.6.16-026test009/include/net/inet_sock.h
---- linux-2.6.16.orig/include/net/inet_sock.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/inet_sock.h 2006-04-19 15:02:12.000000000 +0400
-@@ -171,9 +171,10 @@ static inline void inet_sk_copy_descenda
- extern int inet_sk_rebuild_header(struct sock *sk);
-
- static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
-- const __u32 faddr, const __u16 fport)
-+ const __u32 faddr, const __u16 fport,
-+ const envid_t veid)
- {
-- unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
-+ int h = (laddr ^ lport) ^ (faddr ^ fport) ^ (veid ^ (veid >> 16));
- h ^= h >> 16;
- h ^= h >> 8;
- return h;
-@@ -186,8 +187,9 @@ static inline int inet_sk_ehashfn(const
- const __u16 lport = inet->num;
- const __u32 faddr = inet->daddr;
- const __u16 fport = inet->dport;
-+ envid_t veid = VEID(VE_OWNER_SK(sk));
-
-- return inet_ehashfn(laddr, lport, faddr, fport);
-+ return inet_ehashfn(laddr, lport, faddr, fport, veid);
- }
-
- #endif /* _INET_SOCK_H */
-diff -upr linux-2.6.16.orig/include/net/inet_timewait_sock.h linux-2.6.16-026test009/include/net/inet_timewait_sock.h
---- linux-2.6.16.orig/include/net/inet_timewait_sock.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/inet_timewait_sock.h 2006-04-19 15:02:12.000000000 +0400
-@@ -134,6 +134,7 @@ struct inet_timewait_sock {
- unsigned long tw_ttd;
- struct inet_bind_bucket *tw_tb;
- struct hlist_node tw_death_node;
-+ envid_t tw_owner_env;
- };
-
- static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
-diff -upr linux-2.6.16.orig/include/net/ip.h linux-2.6.16-026test009/include/net/ip.h
---- linux-2.6.16.orig/include/net/ip.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/ip.h 2006-04-19 15:02:12.000000000 +0400
-@@ -95,6 +95,7 @@ extern int ip_local_deliver(struct sk_b
- extern int ip_mr_input(struct sk_buff *skb);
- extern int ip_output(struct sk_buff *skb);
- extern int ip_mc_output(struct sk_buff *skb);
-+extern int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
- extern int ip_do_nat(struct sk_buff *skb);
- extern void ip_send_check(struct iphdr *ip);
- extern int ip_queue_xmit(struct sk_buff *skb, int ipfragok);
-@@ -152,15 +153,25 @@ struct ipv4_config
-
- extern struct ipv4_config ipv4_config;
- DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
--#define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field)
--#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field)
--#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field)
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_ip_statistics (get_exec_env()->_ip_statistics)
-+#else
-+#define ve_ip_statistics ip_statistics
-+#endif
-+#define IP_INC_STATS(field) SNMP_INC_STATS(ve_ip_statistics, field)
-+#define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_ip_statistics, field)
-+#define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_ip_statistics, field)
- DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
--#define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field)
--#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field)
--#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(net_statistics, field)
--#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(net_statistics, field, adnd)
--#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(net_statistics, field, adnd)
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_net_statistics (get_exec_env()->_net_statistics)
-+#else
-+#define ve_net_statistics net_statistics
-+#endif
-+#define NET_INC_STATS(field) SNMP_INC_STATS(ve_net_statistics, field)
-+#define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_net_statistics, field)
-+#define NET_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_net_statistics, field)
-+#define NET_ADD_STATS_BH(field, adnd) SNMP_ADD_STATS_BH(ve_net_statistics, field, adnd)
-+#define NET_ADD_STATS_USER(field, adnd) SNMP_ADD_STATS_USER(ve_net_statistics, field, adnd)
-
- extern int sysctl_local_port_range[2];
- extern int sysctl_ip_default_ttl;
-@@ -380,4 +391,11 @@ extern int ip_misc_proc_init(void);
-
- extern struct ctl_table ipv4_table[];
-
-+#ifdef CONFIG_SYSCTL
-+extern int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
-+ void __user *buffer, size_t *lenp, loff_t *ppos);
-+extern int ipv4_sysctl_forward_strategy(ctl_table *table, int __user *name,
-+ int nlen, void __user *oldval, size_t __user *oldlenp,
-+ void __user *newval, size_t newlen, void **context);
-+#endif
- #endif /* _IP_H */
-diff -upr linux-2.6.16.orig/include/net/ip_fib.h linux-2.6.16-026test009/include/net/ip_fib.h
---- linux-2.6.16.orig/include/net/ip_fib.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/ip_fib.h 2006-04-19 15:02:12.000000000 +0400
-@@ -168,10 +168,22 @@ struct fib_table {
- unsigned char tb_data[0];
- };
-
-+struct fn_zone;
-+struct fn_hash
-+{
-+ struct fn_zone *fn_zones[33];
-+ struct fn_zone *fn_zone_list;
-+};
-+
- #ifndef CONFIG_IP_MULTIPLE_TABLES
-
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ip_fib_local_table get_exec_env()->_local_table
-+#define ip_fib_main_table get_exec_env()->_main_table
-+#else
- extern struct fib_table *ip_fib_local_table;
- extern struct fib_table *ip_fib_main_table;
-+#endif
-
- static inline struct fib_table *fib_get_table(int id)
- {
-@@ -203,7 +215,12 @@ static inline void fib_select_default(co
- #define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL])
- #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN])
-
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define fib_tables get_exec_env()->_fib_tables
-+#else
- extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
-+#endif
-+
- extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
- extern struct fib_table *__fib_new_table(int id);
- extern void fib_rule_put(struct fib_rule *r);
-@@ -250,10 +267,19 @@ extern u32 __fib_res_prefsrc(struct fib
-
- /* Exported by fib_hash.c */
- extern struct fib_table *fib_hash_init(int id);
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+struct ve_struct;
-+extern int init_ve_route(struct ve_struct *ve);
-+extern void fini_ve_route(struct ve_struct *ve);
-+#else
-+#define init_ve_route(ve) (0)
-+#define fini_ve_route(ve) do { } while (0)
-+#endif
-
- #ifdef CONFIG_IP_MULTIPLE_TABLES
- /* Exported by fib_rules.c */
--
-+extern int fib_rules_create(void);
-+extern void fib_rules_destroy(void);
- extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
- extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
- extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
-diff -upr linux-2.6.16.orig/include/net/netlink_sock.h linux-2.6.16-026test009/include/net/netlink_sock.h
---- linux-2.6.16.orig/include/net/netlink_sock.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/netlink_sock.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,22 @@
-+#ifndef __NET_NETLINK_SOCK_H
-+#define __NET_NETLINK_SOCK_H
-+
-+struct netlink_sock {
-+ /* struct sock has to be the first member of netlink_sock */
-+ struct sock sk;
-+ u32 pid;
-+ u32 dst_pid;
-+ u32 dst_group;
-+ u32 flags;
-+ u32 subscriptions;
-+ u32 ngroups;
-+ unsigned long *groups;
-+ unsigned long state;
-+ wait_queue_head_t wait;
-+ struct netlink_callback *cb;
-+ spinlock_t cb_lock;
-+ void (*data_ready)(struct sock *sk, int bytes);
-+ struct module *module;
-+};
-+
-+#endif /* __NET_NETLINK_SOCK_H */
-diff -upr linux-2.6.16.orig/include/net/route.h linux-2.6.16-026test009/include/net/route.h
---- linux-2.6.16.orig/include/net/route.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/route.h 2006-04-19 15:02:12.000000000 +0400
-@@ -201,4 +201,14 @@ static inline struct inet_peer *rt_get_p
-
- extern ctl_table ipv4_route_table[];
-
-+#ifdef CONFIG_SYSCTL
-+extern int ipv4_flush_delay;
-+extern int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
-+ struct file *filp, void __user *buffer, size_t *lenp,
-+ loff_t *ppos);
-+extern int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
-+ int __user *name, int nlen, void __user *oldval,
-+ size_t __user *oldlenp, void __user *newval,
-+ size_t newlen, void **context);
-+#endif
- #endif /* _ROUTE_H */
-diff -upr linux-2.6.16.orig/include/net/scm.h linux-2.6.16-026test009/include/net/scm.h
---- linux-2.6.16.orig/include/net/scm.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/scm.h 2006-04-19 15:02:12.000000000 +0400
-@@ -40,7 +40,7 @@ static __inline__ int scm_send(struct so
- memset(scm, 0, sizeof(*scm));
- scm->creds.uid = current->uid;
- scm->creds.gid = current->gid;
-- scm->creds.pid = current->tgid;
-+ scm->creds.pid = virt_tgid(current);
- if (msg->msg_controllen <= 0)
- return 0;
- return __scm_send(sock, msg, scm);
-diff -upr linux-2.6.16.orig/include/net/sock.h linux-2.6.16-026test009/include/net/sock.h
---- linux-2.6.16.orig/include/net/sock.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/sock.h 2006-04-19 15:02:12.000000000 +0400
-@@ -55,6 +55,8 @@
- #include <net/dst.h>
- #include <net/checksum.h>
-
-+#include <ub/ub_net.h>
-+
- /*
- * This structure really needs to be cleaned up.
- * Most of it is for TCP, and not used by any of
-@@ -251,8 +253,12 @@ struct sock {
- int (*sk_backlog_rcv)(struct sock *sk,
- struct sk_buff *skb);
- void (*sk_destruct)(struct sock *sk);
-+ struct sock_beancounter sk_bc;
-+ struct ve_struct *sk_owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(SK, struct sock, sk_owner_env)
-+
- /*
- * Hashed lists helper routines
- */
-@@ -485,7 +491,8 @@ static inline void sk_add_backlog(struct
- })
-
- extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
--extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
-+extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p,
-+ unsigned long amount);
- extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
- extern int sk_stream_error(struct sock *sk, int flags, int err);
- extern void sk_stream_kill_queues(struct sock *sk);
-@@ -706,8 +713,11 @@ static inline void sk_stream_writequeue_
-
- static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
- {
-- return (int)skb->truesize <= sk->sk_forward_alloc ||
-- sk_stream_mem_schedule(sk, skb->truesize, 1);
-+ if ((int)skb->truesize > sk->sk_forward_alloc &&
-+ !sk_stream_mem_schedule(sk, skb->truesize, 1))
-+ /* The situation is bad according to mainstream. Den */
-+ return 0;
-+ return ub_tcprcvbuf_charge(sk, skb) == 0;
- }
-
- static inline int sk_stream_wmem_schedule(struct sock *sk, int size)
-@@ -765,6 +775,11 @@ extern struct sk_buff *sock_alloc_send
- unsigned long size,
- int noblock,
- int *errcode);
-+extern struct sk_buff *sock_alloc_send_skb2(struct sock *sk,
-+ unsigned long size,
-+ unsigned long size2,
-+ int noblock,
-+ int *errcode);
- extern void *sock_kmalloc(struct sock *sk, int size,
- gfp_t priority);
- extern void sock_kfree_s(struct sock *sk, void *mem, int size);
-@@ -1142,6 +1157,10 @@ static inline int sock_queue_rcv_skb(str
- goto out;
- }
-
-+ err = ub_sockrcvbuf_charge(sk, skb);
-+ if (err < 0)
-+ goto out;
-+
- /* It would be deadlock, if sock_queue_rcv_skb is used
- with socket lock! We assume that users of this
- function are lock free.
-diff -upr linux-2.6.16.orig/include/net/tcp.h linux-2.6.16-026test009/include/net/tcp.h
---- linux-2.6.16.orig/include/net/tcp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/tcp.h 2006-04-19 15:02:12.000000000 +0400
-@@ -40,6 +40,7 @@
- #include <net/tcp_states.h>
-
- #include <linux/seq_file.h>
-+#include <ub/ub_net.h>
-
- extern struct inet_hashinfo tcp_hashinfo;
-
-@@ -250,12 +251,17 @@ static inline int between(__u32 seq1, __
- extern struct proto tcp_prot;
-
- DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
--#define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
--#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
--#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
--#define TCP_DEC_STATS(field) SNMP_DEC_STATS(tcp_statistics, field)
--#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
--#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_tcp_statistics (get_exec_env()->_tcp_statistics)
-+#else
-+#define ve_tcp_statistics tcp_statistics
-+#endif
-+#define TCP_INC_STATS(field) SNMP_INC_STATS(ve_tcp_statistics, field)
-+#define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_tcp_statistics, field)
-+#define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_tcp_statistics, field)
-+#define TCP_DEC_STATS(field) SNMP_DEC_STATS(ve_tcp_statistics, field)
-+#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ve_tcp_statistics, field, val)
-+#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(ve_tcp_statistics, field, val)
-
- extern void tcp_v4_err(struct sk_buff *skb, u32);
-
-@@ -493,7 +499,7 @@ extern u32 __tcp_select_window(struct so
- * to use only the low 32-bits of jiffies and hide the ugly
- * casts with the following macro.
- */
--#define tcp_time_stamp ((__u32)(jiffies))
-+#define tcp_time_stamp ((__u32)(jiffies + get_exec_env()->jiffies_fixup))
-
- /* This is what the send packet queuing engine uses to pass
- * TCP per-packet control information to the transmission
-diff -upr linux-2.6.16.orig/include/net/udp.h linux-2.6.16-026test009/include/net/udp.h
---- linux-2.6.16.orig/include/net/udp.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/include/net/udp.h 2006-04-19 15:02:12.000000000 +0400
-@@ -39,13 +39,19 @@ extern rwlock_t udp_hash_lock;
-
- extern int udp_port_rover;
-
--static inline int udp_lport_inuse(u16 num)
-+static inline int udp_hashfn(u16 num, unsigned veid)
-+{
-+ return ((num + (veid ^ (veid >> 16))) & (UDP_HTABLE_SIZE - 1));
-+}
-+
-+static inline int udp_lport_inuse(u16 num, struct ve_struct *env)
- {
- struct sock *sk;
- struct hlist_node *node;
-
-- sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)])
-- if (inet_sk(sk)->num == num)
-+ sk_for_each(sk, node, &udp_hash[udp_hashfn(num, VEID(env))])
-+ if (inet_sk(sk)->num == num &&
-+ ve_accessible_strict(sk->sk_owner_env, env))
- return 1;
- return 0;
- }
-@@ -75,9 +81,14 @@ extern unsigned int udp_poll(struct file
- poll_table *wait);
-
- DECLARE_SNMP_STAT(struct udp_mib, udp_statistics);
--#define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field)
--#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field)
--#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_statistics, field)
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_udp_statistics (get_exec_env()->_udp_statistics)
-+#else
-+#define ve_udp_statistics udp_statistics
-+#endif
-+#define UDP_INC_STATS(field) SNMP_INC_STATS(ve_udp_statistics, field)
-+#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ve_udp_statistics, field)
-+#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ve_udp_statistics, field)
-
- /* /proc */
- struct udp_seq_afinfo {
-diff -upr linux-2.6.16.orig/include/ub/beancounter.h linux-2.6.16-026test009/include/ub/beancounter.h
---- linux-2.6.16.orig/include/ub/beancounter.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/beancounter.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,329 @@
-+/*
-+ * include/ub/beancounter.h
-+ *
-+ * Copyright (C) 1999-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * Andrey Savochkin saw@sw-soft.com
-+ *
-+ */
-+
-+#ifndef _LINUX_BEANCOUNTER_H
-+#define _LINUX_BEANCOUNTER_H
-+
-+#include <linux/config.h>
-+
-+/*
-+ * Generic ratelimiting stuff.
-+ */
-+
-+struct ub_rate_info {
-+ int burst;
-+ int interval; /* jiffy_t per event */
-+ int bucket; /* kind of leaky bucket */
-+ unsigned long last; /* last event */
-+};
-+
-+/* Return true if rate limit permits. */
-+int ub_ratelimit(struct ub_rate_info *);
-+
-+
-+/*
-+ * This magic is used to distinuish user beancounter and pages beancounter
-+ * in struct page. page_ub and page_bc are placed in union and MAGIC
-+ * ensures us that we don't use pbc as ubc in ub_page_uncharge().
-+ */
-+#define UB_MAGIC 0x62756275
-+
-+/*
-+ * Resource list.
-+ */
-+
-+#define UB_KMEMSIZE 0 /* Unswappable kernel memory size including
-+ * struct task, page directories, etc.
-+ */
-+#define UB_LOCKEDPAGES 1 /* Mlock()ed pages. */
-+#define UB_PRIVVMPAGES 2 /* Total number of pages, counting potentially
-+ * private pages as private and used.
-+ */
-+#define UB_SHMPAGES 3 /* IPC SHM segment size. */
-+#define UB_ZSHMPAGES 4 /* Anonymous shared memory. */
-+#define UB_NUMPROC 5 /* Number of processes. */
-+#define UB_PHYSPAGES 6 /* All resident pages, for swapout guarantee. */
-+#define UB_VMGUARPAGES 7 /* Guarantee for memory allocation,
-+ * checked against PRIVVMPAGES.
-+ */
-+#define UB_OOMGUARPAGES 8 /* Guarantees against OOM kill.
-+ * Only limit is used, no accounting.
-+ */
-+#define UB_NUMTCPSOCK 9 /* Number of TCP sockets. */
-+#define UB_NUMFLOCK 10 /* Number of file locks. */
-+#define UB_NUMPTY 11 /* Number of PTYs. */
-+#define UB_NUMSIGINFO 12 /* Number of siginfos. */
-+#define UB_TCPSNDBUF 13 /* Total size of tcp send buffers. */
-+#define UB_TCPRCVBUF 14 /* Total size of tcp receive buffers. */
-+#define UB_OTHERSOCKBUF 15 /* Total size of other socket
-+ * send buffers (all buffers for PF_UNIX).
-+ */
-+#define UB_DGRAMRCVBUF 16 /* Total size of other socket
-+ * receive buffers.
-+ */
-+#define UB_NUMOTHERSOCK 17 /* Number of other sockets. */
-+#define UB_DCACHESIZE 18 /* Size of busy dentry/inode cache. */
-+#define UB_NUMFILE 19 /* Number of open files. */
-+
-+#define UB_RESOURCES 24
-+
-+#define UB_UNUSEDPRIVVM (UB_RESOURCES + 0)
-+#define UB_TMPFSPAGES (UB_RESOURCES + 1)
-+#define UB_SWAPPAGES (UB_RESOURCES + 2)
-+#define UB_HELDPAGES (UB_RESOURCES + 3)
-+
-+struct ubparm {
-+ /*
-+ * A barrier over which resource allocations are failed gracefully.
-+ * If the amount of consumed memory is over the barrier further sbrk()
-+ * or mmap() calls fail, the existing processes are not killed.
-+ */
-+ unsigned long barrier;
-+ /* hard resource limit */
-+ unsigned long limit;
-+ /* consumed resources */
-+ unsigned long held;
-+ /* maximum amount of consumed resources through the last period */
-+ unsigned long maxheld;
-+ /* minimum amount of consumed resources through the last period */
-+ unsigned long minheld;
-+ /* count of failed charges */
-+ unsigned long failcnt;
-+};
-+
-+/*
-+ * Kernel internal part.
-+ */
-+
-+#ifdef __KERNEL__
-+
-+#include <ub/ub_debug.h>
-+#include <linux/interrupt.h>
-+#include <asm/atomic.h>
-+#include <linux/spinlock.h>
-+#include <linux/cache.h>
-+#include <linux/threads.h>
-+
-+/*
-+ * UB_MAXVALUE is essentially LONG_MAX declared in a cross-compiling safe form.
-+ */
-+#define UB_MAXVALUE ( (1UL << (sizeof(unsigned long)*8-1)) - 1)
-+
-+
-+/*
-+ * Resource management structures
-+ * Serialization issues:
-+ * beancounter list management is protected via ub_hash_lock
-+ * task pointers are set only for current task and only once
-+ * refcount is managed atomically
-+ * value and limit comparison and change are protected by per-ub spinlock
-+ */
-+
-+struct page_beancounter;
-+struct task_beancounter;
-+struct sock_beancounter;
-+
-+struct page_private {
-+ unsigned long ubp_unused_privvmpages;
-+ unsigned long ubp_tmpfs_respages;
-+ unsigned long ubp_swap_pages;
-+ unsigned long long ubp_held_pages;
-+};
-+
-+struct sock_private {
-+ unsigned long ubp_rmem_thres;
-+ unsigned long ubp_wmem_pressure;
-+ unsigned long ubp_maxadvmss;
-+ unsigned long ubp_rmem_pressure;
-+#define UB_RMEM_EXPAND 0
-+#define UB_RMEM_KEEP 1
-+#define UB_RMEM_SHRINK 2
-+ struct list_head ubp_other_socks;
-+ struct list_head ubp_tcp_socks;
-+ atomic_t ubp_orphan_count;
-+};
-+
-+struct ub_perfstat {
-+ unsigned long unmap;
-+ unsigned long swapin;
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ long pages_charged;
-+ long vmalloc_charged;
-+ long pbcs;
-+#endif
-+} ____cacheline_aligned_in_smp;
-+
-+struct user_beancounter
-+{
-+ unsigned long ub_magic;
-+ atomic_t ub_refcount;
-+ struct user_beancounter *ub_next;
-+ spinlock_t ub_lock;
-+ uid_t ub_uid;
-+
-+ struct ub_rate_info ub_limit_rl;
-+ int ub_oom_noproc;
-+
-+ struct page_private ppriv;
-+#define ub_unused_privvmpages ppriv.ubp_unused_privvmpages
-+#define ub_tmpfs_respages ppriv.ubp_tmpfs_respages
-+#define ub_swap_pages ppriv.ubp_swap_pages
-+#define ub_held_pages ppriv.ubp_held_pages
-+ struct sock_private spriv;
-+#define ub_rmem_thres spriv.ubp_rmem_thres
-+#define ub_maxadvmss spriv.ubp_maxadvmss
-+#define ub_rmem_pressure spriv.ubp_rmem_pressure
-+#define ub_wmem_pressure spriv.ubp_wmem_pressure
-+#define ub_tcp_sk_list spriv.ubp_tcp_socks
-+#define ub_other_sk_list spriv.ubp_other_socks
-+#define ub_orphan_count spriv.ubp_orphan_count
-+
-+ struct user_beancounter *parent;
-+ void *private_data;
-+
-+ /* resources statistic and settings */
-+ struct ubparm ub_parms[UB_RESOURCES];
-+ /* resources statistic for last interval */
-+ struct ubparm ub_store[UB_RESOURCES];
-+
-+ struct ub_perfstat ub_stat[NR_CPUS];
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ struct list_head ub_cclist;
-+#endif
-+};
-+
-+enum severity { UB_HARD, UB_SOFT, UB_FORCE };
-+
-+static inline int ub_barrier_hit(struct user_beancounter *ub, int resource)
-+{
-+ return ub->ub_parms[resource].held > ub->ub_parms[resource].barrier;
-+}
-+
-+static inline int ub_hfbarrier_hit(struct user_beancounter *ub, int resource)
-+{
-+ return (ub->ub_parms[resource].held >
-+ ((ub->ub_parms[resource].barrier) >> 1));
-+}
-+
-+#ifndef CONFIG_USER_RESOURCE
-+
-+extern inline struct user_beancounter *get_beancounter_byuid
-+ (uid_t uid, int create) { return NULL; }
-+extern inline struct user_beancounter *get_beancounter
-+ (struct user_beancounter *ub) { return NULL; }
-+extern inline void put_beancounter(struct user_beancounter *ub) {;}
-+
-+static inline void ub_init_cache(unsigned long mempages) { };
-+static inline void ub_init_ub0(void) { };
-+
-+#define get_ub0() NULL
-+
-+#else /* CONFIG_USER_RESOURCE */
-+
-+/*
-+ * Charge/uncharge operations
-+ */
-+
-+extern int __charge_beancounter_locked(struct user_beancounter *ub,
-+ int resource, unsigned long val, enum severity strict);
-+
-+extern void __uncharge_beancounter_locked(struct user_beancounter *ub,
-+ int resource, unsigned long val);
-+
-+extern void __put_beancounter(struct user_beancounter *ub);
-+
-+extern void uncharge_warn(struct user_beancounter *ub, int resource,
-+ unsigned long val, unsigned long held);
-+
-+extern const char *ub_rnames[];
-+/*
-+ * Put a beancounter reference
-+ */
-+
-+static inline void put_beancounter(struct user_beancounter *ub)
-+{
-+ if (unlikely(ub == NULL))
-+ return;
-+
-+ __put_beancounter(ub);
-+}
-+
-+/*
-+ * Create a new beancounter reference
-+ */
-+extern struct user_beancounter *get_beancounter_byuid(uid_t uid, int create);
-+
-+static inline
-+struct user_beancounter *get_beancounter(struct user_beancounter *ub)
-+{
-+ if (unlikely(ub == NULL))
-+ return NULL;
-+
-+ atomic_inc(&ub->ub_refcount);
-+ return ub;
-+}
-+
-+extern struct user_beancounter *get_subbeancounter_byid(
-+ struct user_beancounter *,
-+ int id, int create);
-+extern struct user_beancounter *subbeancounter_findcreate(
-+ struct user_beancounter *p, int id);
-+
-+extern struct user_beancounter ub0;
-+
-+extern void ub_init_cache(unsigned long);
-+extern void ub_init_ub0(void);
-+#define get_ub0() (&ub0)
-+
-+extern void print_ub_uid(struct user_beancounter *ub, char *buf, int size);
-+
-+/*
-+ * Resource charging
-+ * Change user's account and compare against limits
-+ */
-+
-+static inline void ub_adjust_maxheld(struct user_beancounter *ub, int resource)
-+{
-+ if (ub->ub_parms[resource].maxheld < ub->ub_parms[resource].held)
-+ ub->ub_parms[resource].maxheld = ub->ub_parms[resource].held;
-+ if (ub->ub_parms[resource].minheld > ub->ub_parms[resource].held)
-+ ub->ub_parms[resource].minheld = ub->ub_parms[resource].held;
-+}
-+
-+#endif /* CONFIG_USER_RESOURCE */
-+
-+#include <ub/ub_decl.h>
-+UB_DECLARE_FUNC(int, charge_beancounter(struct user_beancounter *ub,
-+ int resource, unsigned long val, enum severity strict));
-+UB_DECLARE_VOID_FUNC(uncharge_beancounter(struct user_beancounter *ub,
-+ int resource, unsigned long val));
-+
-+UB_DECLARE_VOID_FUNC(charge_beancounter_notop(struct user_beancounter *ub,
-+ int resource, unsigned long val));
-+UB_DECLARE_VOID_FUNC(uncharge_beancounter_notop(struct user_beancounter *ub,
-+ int resource, unsigned long val));
-+
-+#ifndef CONFIG_USER_RESOURCE_PROC
-+static inline void ub_init_proc(void) { };
-+#else
-+extern void ub_init_proc(void);
-+#endif
-+
-+#ifdef CONFIG_USER_RSS_ACCOUNTING
-+extern void ub_init_pbc(void);
-+#else
-+static inline void ub_ini_pbc(void) { }
-+#endif
-+#endif /* __KERNEL__ */
-+#endif /* _LINUX_BEANCOUNTER_H */
-diff -upr linux-2.6.16.orig/include/ub/ub_dcache.h linux-2.6.16-026test009/include/ub/ub_dcache.h
---- linux-2.6.16.orig/include/ub/ub_dcache.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_dcache.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,57 @@
-+/*
-+ * include/ub/ub_dcache.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_DCACHE_H_
-+#define __UB_DCACHE_H_
-+
-+#include <ub/ub_decl.h>
-+
-+/*
-+ * UB_DCACHESIZE accounting
-+ */
-+
-+struct dentry_beancounter
-+{
-+ /*
-+ * d_inuse =
-+ * <number of external refs> +
-+ * <number of 'used' childs>
-+ *
-+ * d_inuse == -1 means that dentry is unused
-+ * state change -1 => 0 causes charge
-+ * state change 0 => -1 causes uncharge
-+ */
-+ atomic_t d_inuse;
-+ /* charged size, including name length if name is not inline */
-+ unsigned long d_ubsize;
-+ struct user_beancounter *d_ub;
-+};
-+
-+struct dentry;
-+
-+UB_DECLARE_FUNC(int, ub_dentry_alloc(struct dentry *d))
-+UB_DECLARE_VOID_FUNC(ub_dentry_charge_nofail(struct dentry *d))
-+UB_DECLARE_VOID_FUNC(ub_dentry_uncharge(struct dentry *d))
-+
-+#ifdef CONFIG_USER_RESOURCE
-+UB_DECLARE_FUNC(int, ub_dentry_charge(struct dentry *d))
-+#define ub_dget_testone(d) (atomic_inc_and_test(&(d)->dentry_bc.d_inuse))
-+#define ub_dput_testzero(d) (atomic_add_negative(-1, &(d)->dentry_bc.d_inuse))
-+#define INUSE_INIT 0
-+#else
-+#define ub_dentry_charge(d) ({ \
-+ spin_unlock(&d->d_lock); \
-+ rcu_read_unlock(); \
-+ 0; \
-+ })
-+#define ub_dget_testone(d) (0)
-+#define ub_dput_testzero(d) (0)
-+#endif
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_debug.h linux-2.6.16-026test009/include/ub/ub_debug.h
---- linux-2.6.16.orig/include/ub/ub_debug.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_debug.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,95 @@
-+/*
-+ * include/ub/ub_debug.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_DEBUG_H_
-+#define __UB_DEBUG_H_
-+
-+/*
-+ * general debugging
-+ */
-+
-+#define UBD_ALLOC 0x1
-+#define UBD_CHARGE 0x2
-+#define UBD_LIMIT 0x4
-+#define UBD_TRACE 0x8
-+
-+/*
-+ * ub_net debugging
-+ */
-+
-+#define UBD_NET_SOCKET 0x10
-+#define UBD_NET_SLEEP 0x20
-+#define UBD_NET_SEND 0x40
-+#define UBD_NET_RECV 0x80
-+
-+/*
-+ * Main routines
-+ */
-+
-+#define UB_DEBUG (0)
-+#define DEBUG_RESOURCE (0ULL)
-+
-+#define ub_dbg_cond(__cond, __str, args...) \
-+ do { \
-+ if ((__cond) != 0) \
-+ printk(__str, ##args); \
-+ } while(0)
-+
-+#define ub_debug(__section, __str, args...) \
-+ ub_dbg_cond(UB_DEBUG & (__section), __str, ##args)
-+
-+#define ub_debug_resource(__resource, __str, args...) \
-+ ub_dbg_cond((UB_DEBUG & UBD_CHARGE) && \
-+ (DEBUG_RESOURCE & (1 << (__resource))), \
-+ __str, ##args)
-+
-+#if UB_DEBUG & UBD_TRACE
-+#define ub_debug_trace(__cond, __b, __r) \
-+ do { \
-+ static struct ub_rate_info ri = { __b, __r }; \
-+ if ((__cond) != 0 && ub_ratelimit(&ri)) \
-+ dump_stack(); \
-+ } while(0)
-+#else
-+#define ub_debug_trace(__cond, __burst, __rate)
-+#endif
-+
-+#include <linux/config.h>
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+#include <linux/list.h>
-+#include <linux/kmem_cache.h>
-+
-+struct user_beancounter;
-+struct ub_cache_counter {
-+ struct list_head ulist;
-+ struct ub_cache_counter *next;
-+ struct user_beancounter *ub;
-+ kmem_cache_t *cachep;
-+ unsigned long counter;
-+};
-+
-+extern spinlock_t cc_lock;
-+extern void init_cache_counters(void);
-+extern void ub_free_counters(struct user_beancounter *);
-+extern void ub_kmemcache_free(kmem_cache_t *cachep);
-+
-+struct vm_struct;
-+extern void inc_vmalloc_charged(struct vm_struct *, int);
-+extern void dec_vmalloc_charged(struct vm_struct *);
-+#else
-+#define init_cache_counters() do { } while (0)
-+#define inc_vmalloc_charged(vm, f) do { } while (0)
-+#define dec_vmalloc_charged(vm) do { } while (0)
-+#define ub_free_counters(ub) do { } while (0)
-+#define ub_kmemcache_free(cachep) do { } while (0)
-+#endif
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_decl.h linux-2.6.16-026test009/include/ub/ub_decl.h
---- linux-2.6.16.orig/include/ub/ub_decl.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_decl.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,40 @@
-+/*
-+ * include/ub/ub_decl.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_DECL_H_
-+#define __UB_DECL_H_
-+
-+#include <linux/config.h>
-+
-+/*
-+ * Naming convension:
-+ * ub_<section|object>_<operation>
-+ */
-+
-+#ifdef CONFIG_USER_RESOURCE
-+
-+#define UB_DECLARE_FUNC(ret_type, decl) extern ret_type decl;
-+#define UB_DECLARE_VOID_FUNC(decl) extern void decl;
-+
-+#else /* CONFIG_USER_RESOURCE */
-+
-+#define UB_DECLARE_FUNC(ret_type, decl) \
-+ static inline ret_type decl \
-+ { \
-+ return (ret_type)0; \
-+ }
-+#define UB_DECLARE_VOID_FUNC(decl) \
-+ static inline void decl \
-+ { \
-+ }
-+
-+#endif /* CONFIG_USER_RESOURCE */
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_hash.h linux-2.6.16-026test009/include/ub/ub_hash.h
---- linux-2.6.16.orig/include/ub/ub_hash.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_hash.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,41 @@
-+/*
-+ * include/ub/ub_hash.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_UBHASH_H
-+#define _LINUX_UBHASH_H
-+
-+#ifdef __KERNEL__
-+
-+#define UB_HASH_SIZE 256
-+
-+struct ub_hash_slot {
-+ struct user_beancounter *ubh_beans;
-+};
-+
-+extern struct ub_hash_slot ub_hash[];
-+extern spinlock_t ub_hash_lock;
-+
-+#ifdef CONFIG_USER_RESOURCE
-+
-+/*
-+ * Iterate over beancounters
-+ * @__slot - hash slot
-+ * @__ubp - beancounter ptr
-+ * Can use break :)
-+ */
-+#define for_each_beancounter(__slot, __ubp) \
-+ for (__slot = 0, __ubp = NULL; \
-+ __slot < UB_HASH_SIZE && __ubp == NULL; __slot++) \
-+ for (__ubp = ub_hash[__slot].ubh_beans; __ubp; \
-+ __ubp = __ubp->ub_next)
-+
-+#endif /* CONFIG_USER_RESOURCE */
-+#endif /* __KERNEL__ */
-+#endif /* _LINUX_UBHASH_H */
-diff -upr linux-2.6.16.orig/include/ub/ub_mem.h linux-2.6.16-026test009/include/ub/ub_mem.h
---- linux-2.6.16.orig/include/ub/ub_mem.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_mem.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,85 @@
-+/*
-+ * include/ub/ub_mem.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_SLAB_H_
-+#define __UB_SLAB_H_
-+
-+#include <linux/config.h>
-+#include <linux/kmem_slab.h>
-+#include <ub/beancounter.h>
-+#include <ub/ub_decl.h>
-+
-+/*
-+ * UB_KMEMSIZE accounting
-+ */
-+
-+#ifdef CONFIG_UBC_DEBUG_ITEMS
-+#define CHARGE_ORDER(__o) (1 << __o)
-+#define CHARGE_SIZE(__s) 1
-+#else
-+#define CHARGE_ORDER(__o) (PAGE_SIZE << (__o))
-+#define CHARGE_SIZE(__s) (__s)
-+#endif
-+
-+#define page_ub(__page) ((__page)->bc.page_ub)
-+
-+struct mm_struct;
-+struct page;
-+
-+UB_DECLARE_FUNC(struct user_beancounter *, slab_ub(void *obj))
-+UB_DECLARE_FUNC(struct user_beancounter *, vmalloc_ub(void *obj))
-+UB_DECLARE_FUNC(struct user_beancounter *, mem_ub(void *obj))
-+
-+UB_DECLARE_FUNC(int, ub_page_charge(struct page *page, int order, int mask))
-+UB_DECLARE_VOID_FUNC(ub_page_uncharge(struct page *page, int order))
-+UB_DECLARE_FUNC(int, ub_slab_charge(void *objp, int flags))
-+UB_DECLARE_VOID_FUNC(ub_slab_uncharge(void *obj))
-+
-+#define slab_ubcs(cachep, slabp) ((struct user_beancounter **)\
-+ (ALIGN((unsigned long)(slab_bufctl(slabp) + (cachep)->num),\
-+ sizeof(void *))))
-+
-+#ifdef CONFIG_USER_RESOURCE
-+/* Flags without __GFP_UBC must comply with vmalloc */
-+#define ub_vmalloc(size) __vmalloc(size, \
-+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL)
-+#define ub_vmalloc_node(size, node) __vmalloc_node(size, \
-+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_UBC, PAGE_KERNEL, node)
-+#define ub_kmalloc(size, flags) kmalloc(size, ((flags) | __GFP_UBC))
-+extern struct user_beancounter *ub_select_worst(long *);
-+
-+/* mm/slab.c needed stuff */
-+#define UB_ALIGN(flags) (flags & SLAB_UBC ? sizeof(void *) : 1)
-+#define UB_EXTRA(flags) (flags & SLAB_UBC ? sizeof(void *) : 0)
-+#define set_cache_objuse(cachep) do { \
-+ (cachep)->objuse = ((PAGE_SIZE << (cachep)->gfporder) + \
-+ (cachep)->num - 1) / (cachep)->num; \
-+ if (!OFF_SLAB(cachep)) \
-+ break; \
-+ (cachep)->objuse += ((cachep)->slabp_cache->objuse + \
-+ (cachep)->num - 1) / (cachep)->num; \
-+ } while (0)
-+#define init_slab_ubps(cachep, slabp) do { \
-+ if (!((cachep)->flags & SLAB_UBC)) \
-+ break; \
-+ memset(slab_ubcs(cachep, slabp), 0, \
-+ (cachep)->num * sizeof(void *)); \
-+ } while (0)
-+#define kmem_obj_memusage(o) (virt_to_cache(o)->objuse)
-+#else
-+#define ub_vmalloc(size) vmalloc(size)
-+#define ub_vmalloc_node(size, node) vmalloc_node(size, node)
-+#define ub_kmalloc(size, flags) kmalloc(size, flags)
-+#define UB_ALIGN(flags) 1
-+#define UB_EXTRA(flags) 0
-+#define set_cache_objuse(c) do { } while (0)
-+#define init_slab_ubps(c, s) do { } while (0)
-+#endif
-+#endif /* __UB_SLAB_H_ */
-diff -upr linux-2.6.16.orig/include/ub/ub_misc.h linux-2.6.16-026test009/include/ub/ub_misc.h
---- linux-2.6.16.orig/include/ub/ub_misc.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_misc.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,49 @@
-+/*
-+ * include/ub/ub_misc.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_MISC_H_
-+#define __UB_MISC_H_
-+
-+#include <ub/ub_decl.h>
-+
-+struct tty_struct;
-+struct file;
-+struct file_lock;
-+struct sigqueue;
-+
-+UB_DECLARE_FUNC(int, ub_file_charge(struct file *f))
-+UB_DECLARE_VOID_FUNC(ub_file_uncharge(struct file *f))
-+UB_DECLARE_FUNC(int, ub_flock_charge(struct file_lock *fl, int hard))
-+UB_DECLARE_VOID_FUNC(ub_flock_uncharge(struct file_lock *fl))
-+UB_DECLARE_FUNC(int, ub_siginfo_charge(struct sigqueue *q,
-+ struct user_beancounter *ub))
-+UB_DECLARE_VOID_FUNC(ub_siginfo_uncharge(struct sigqueue *q))
-+UB_DECLARE_FUNC(int, ub_task_charge(struct task_struct *parent,
-+ struct task_struct *task))
-+UB_DECLARE_VOID_FUNC(ub_task_uncharge(struct task_struct *task))
-+UB_DECLARE_FUNC(int, ub_pty_charge(struct tty_struct *tty))
-+UB_DECLARE_VOID_FUNC(ub_pty_uncharge(struct tty_struct *tty))
-+
-+#ifdef CONFIG_USER_RESOURCE
-+#define set_flock_charged(fl) do { (fl)->fl_charged = 1; } while (0)
-+#define set_mm_ub(mm, tsk) do { \
-+ (mm)->mm_ub = get_beancounter(tsk ? \
-+ tsk->task_bc.task_ub : get_exec_ub()); \
-+ } while (0)
-+#define put_mm_ub(mm) do { \
-+ put_beancounter((mm)->mm_ub); \
-+ (mm)->mm_ub = NULL; \
-+ } while (0)
-+#else
-+#define set_flock_charged(fl) do { } while (0)
-+#define set_mm_ub(mm, tsk) do { } while (0)
-+#define put_mm_ub(mm) do { } while (0)
-+#endif
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_net.h linux-2.6.16-026test009/include/ub/ub_net.h
---- linux-2.6.16.orig/include/ub/ub_net.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_net.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,141 @@
-+/*
-+ * include/ub/ub_net.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_NET_H_
-+#define __UB_NET_H_
-+
-+/*
-+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
-+ */
-+
-+#include <ub/ub_decl.h>
-+#include <ub/ub_sk.h>
-+
-+#define bid2sid(__bufid) \
-+ ((__bufid) == UB_TCPSNDBUF ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK)
-+
-+#define SOCK_MIN_UBCSPACE ((int)((2048 - sizeof(struct skb_shared_info)) & \
-+ ~(SMP_CACHE_BYTES-1)))
-+#define SOCK_MIN_UBCSPACE_CH skb_charge_size(SOCK_MIN_UBCSPACE)
-+
-+
-+#define IS_TCP_SOCK(__family, __type) \
-+ ((__family) == PF_INET && (__type) == SOCK_STREAM)
-+
-+UB_DECLARE_FUNC(int, ub_sock_charge(struct sock *sk, int family, int type))
-+UB_DECLARE_FUNC(int, ub_tcp_sock_charge(struct sock *sk))
-+UB_DECLARE_FUNC(int, ub_other_sock_charge(struct sock *sk))
-+UB_DECLARE_VOID_FUNC(ub_sock_uncharge(struct sock *sk))
-+UB_DECLARE_VOID_FUNC(ub_skb_uncharge(struct sk_buff *skb))
-+UB_DECLARE_FUNC(int, ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask))
-+UB_DECLARE_VOID_FUNC(ub_skb_free_bc(struct sk_buff *skb))
-+UB_DECLARE_FUNC(int, ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk))
-+UB_DECLARE_FUNC(int, ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb))
-+UB_DECLARE_VOID_FUNC(ub_sock_snd_queue_add(struct sock *sk, int resource,
-+ unsigned long size))
-+UB_DECLARE_FUNC(long, ub_sock_wait_for_space(struct sock *sk, long timeo,
-+ unsigned long size))
-+
-+UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb))
-+UB_DECLARE_FUNC(int, ub_tcprcvbuf_charge_forced(struct sock *sk,
-+ struct sk_buff *skb))
-+UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb))
-+UB_DECLARE_FUNC(int, ub_tcpsndbuf_charge_forced(struct sock *sk,
-+ struct sk_buff *skb))
-+
-+/* Charge size */
-+static inline unsigned long skb_charge_datalen(unsigned long chargesize)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ unsigned long slabsize;
-+
-+ chargesize -= sizeof(struct sk_buff);
-+ slabsize = 64;
-+ do {
-+ slabsize <<= 1;
-+ } while (slabsize <= chargesize);
-+
-+ slabsize >>= 1;
-+ return (slabsize - sizeof(struct skb_shared_info)) &
-+ ~(SMP_CACHE_BYTES-1);
-+#else
-+ return 0;
-+#endif
-+}
-+
-+static inline unsigned long skb_charge_size_gen(unsigned long size)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ unsigned int slabsize;
-+
-+ size = SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
-+ slabsize = 32; /* min size is 64 because of skb_shared_info */
-+ do {
-+ slabsize <<= 1;
-+ } while (slabsize < size);
-+
-+ return slabsize + sizeof(struct sk_buff);
-+#else
-+ return 0;
-+#endif
-+
-+}
-+
-+static inline unsigned long skb_charge_size_const(unsigned long size)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ unsigned int ret;
-+ if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 64)
-+ ret = 64 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 128)
-+ ret = 128 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 256)
-+ ret = 256 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 512)
-+ ret = 512 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 1024)
-+ ret = 1024 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 2048)
-+ ret = 2048 + sizeof(struct sk_buff);
-+ else if (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info) <= 4096)
-+ ret = 4096 + sizeof(struct sk_buff);
-+ else
-+ ret = skb_charge_size_gen(size);
-+ return ret;
-+#else
-+ return 0;
-+#endif
-+}
-+
-+
-+#define skb_charge_size(__size) \
-+ (__builtin_constant_p(__size) ? \
-+ skb_charge_size_const(__size) : \
-+ skb_charge_size_gen(__size))
-+
-+UB_DECLARE_FUNC(int, skb_charge_fullsize(struct sk_buff *skb))
-+UB_DECLARE_VOID_FUNC(ub_skb_set_charge(struct sk_buff *skb,
-+ struct sock *sk, unsigned long size, int res))
-+
-+/* Poll reserv */
-+UB_DECLARE_FUNC(int, ub_sock_makewres_other(struct sock *sk, unsigned long sz))
-+UB_DECLARE_FUNC(int, ub_sock_makewres_tcp(struct sock *sk, unsigned long size))
-+UB_DECLARE_FUNC(int, ub_sock_getwres_other(struct sock *sk, unsigned long size))
-+UB_DECLARE_FUNC(int, ub_sock_getwres_tcp(struct sock *sk, unsigned long size))
-+UB_DECLARE_VOID_FUNC(ub_sock_retwres_other(struct sock *sk, unsigned long size,
-+ unsigned long ressize))
-+UB_DECLARE_VOID_FUNC(ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
-+ unsigned long ressize))
-+UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_other(struct sock *sk,
-+ unsigned long size))
-+UB_DECLARE_VOID_FUNC(ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz))
-+UB_DECLARE_VOID_FUNC(ub_sock_sndqueuedel(struct sock *sk))
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_orphan.h linux-2.6.16-026test009/include/ub/ub_orphan.h
---- linux-2.6.16.orig/include/ub/ub_orphan.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_orphan.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,56 @@
-+/*
-+ * include/ub/ub_orphan.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_ORPHAN_H_
-+#define __UB_ORPHAN_H_
-+
-+#include <net/tcp.h>
-+
-+#include "ub/beancounter.h"
-+#include "ub/ub_net.h"
-+
-+
-+static inline atomic_t *__ub_get_orphan_count_ptr(struct sock *sk)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ if (sock_has_ubc(sk))
-+ return &sock_bc(sk)->ub->ub_orphan_count;
-+#endif
-+ return sk->sk_prot->orphan_count;
-+}
-+
-+static inline void ub_inc_orphan_count(struct sock *sk)
-+{
-+ atomic_inc(__ub_get_orphan_count_ptr(sk));
-+}
-+
-+static inline void ub_dec_orphan_count(struct sock *sk)
-+{
-+ atomic_dec(__ub_get_orphan_count_ptr(sk));
-+}
-+
-+static inline int ub_get_orphan_count(struct sock *sk)
-+{
-+ return atomic_read(__ub_get_orphan_count_ptr(sk));
-+}
-+
-+extern int __ub_too_many_orphans(struct sock *sk, int count);
-+static inline int ub_too_many_orphans(struct sock *sk, int count)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ if (__ub_too_many_orphans(sk, count))
-+ return 1;
-+#endif
-+ return (ub_get_orphan_count(sk) > sysctl_tcp_max_orphans ||
-+ (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-+ atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]));
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_page.h linux-2.6.16-026test009/include/ub/ub_page.h
---- linux-2.6.16.orig/include/ub/ub_page.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_page.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,48 @@
-+/*
-+ * include/ub/ub_page.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_PAGE_H_
-+#define __UB_PAGE_H_
-+
-+#include <linux/config.h>
-+
-+/*
-+ * Page_beancounters
-+ */
-+
-+struct page;
-+struct user_beancounter;
-+
-+#define PB_MAGIC 0x62700001UL
-+
-+struct page_beancounter {
-+ unsigned long pb_magic;
-+ struct page *page;
-+ struct user_beancounter *ub;
-+ struct page_beancounter *next_hash;
-+ unsigned refcount;
-+ struct list_head page_list;
-+};
-+
-+#define PB_REFCOUNT_BITS 24
-+#define PB_SHIFT_GET(c) ((c) >> PB_REFCOUNT_BITS)
-+#define PB_SHIFT_INC(c) ((c) += (1 << PB_REFCOUNT_BITS))
-+#define PB_SHIFT_DEC(c) ((c) -= (1 << PB_REFCOUNT_BITS))
-+#define PB_COUNT_GET(c) ((c) & ((1 << PB_REFCOUNT_BITS) - 1))
-+#define PB_COUNT_INC(c) ((c)++)
-+#define PB_COUNT_DEC(c) ((c)--)
-+#define PB_REFCOUNT_MAKE(s, c) (((s) << PB_REFCOUNT_BITS) + (c))
-+
-+#define page_pbc(__page) ((__page)->bc.page_pb)
-+
-+struct address_space;
-+extern int is_shmem_mapping(struct address_space *);
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_sk.h linux-2.6.16-026test009/include/ub/ub_sk.h
---- linux-2.6.16.orig/include/ub/ub_sk.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_sk.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,43 @@
-+/*
-+ * include/ub/ub_sk.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_SK_H_
-+#define __UB_SK_H_
-+
-+#include <linux/config.h>
-+#include <ub/ub_task.h>
-+
-+struct sock;
-+struct sk_buff;
-+
-+struct skb_beancounter {
-+ struct user_beancounter *ub;
-+ unsigned long charged:27, resource:5;
-+};
-+
-+struct sock_beancounter {
-+ /*
-+ * already charged for future sends, to make poll work;
-+ * changes are protected by bc spinlock, read is under socket
-+ * semaphore for sends and unprotected in poll
-+ */
-+ unsigned long poll_reserv;
-+ unsigned long ub_waitspc; /* space waiting for */
-+ unsigned long ub_wcharged;
-+ struct list_head ub_sock_list;
-+ struct user_beancounter *ub;
-+};
-+
-+#define sock_bc(__sk) (&(__sk)->sk_bc)
-+#define skb_bc(__skb) (&(__skb)->skb_bc)
-+#define skbc_sock(__skbc) (container_of(__skbc, struct sock, sk_bc))
-+#define sock_has_ubc(__sk) (sock_bc(__sk)->ub != NULL)
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_stat.h linux-2.6.16-026test009/include/ub/ub_stat.h
---- linux-2.6.16.orig/include/ub/ub_stat.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_stat.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,70 @@
-+/*
-+ * include/ub/ub_stat.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_STAT_H_
-+#define __UB_STAT_H_
-+
-+/* sys_ubstat commands list */
-+#define UBSTAT_READ_ONE 0x010000
-+#define UBSTAT_READ_ALL 0x020000
-+#define UBSTAT_READ_FULL 0x030000
-+#define UBSTAT_UBLIST 0x040000
-+#define UBSTAT_UBPARMNUM 0x050000
-+#define UBSTAT_GETTIME 0x060000
-+
-+#define UBSTAT_CMD(func) ((func) & 0xF0000)
-+#define UBSTAT_PARMID(func) ((func) & 0x0FFFF)
-+
-+#define TIME_MAX_SEC (LONG_MAX / HZ)
-+#define TIME_MAX_JIF (TIME_MAX_SEC * HZ)
-+
-+typedef unsigned long ubstattime_t;
-+
-+typedef struct {
-+ ubstattime_t start_time;
-+ ubstattime_t end_time;
-+ ubstattime_t cur_time;
-+} ubgettime_t;
-+
-+typedef struct {
-+ long maxinterval;
-+ int signum;
-+} ubnotifrq_t;
-+
-+typedef struct {
-+ unsigned long maxheld;
-+ unsigned long failcnt;
-+} ubstatparm_t;
-+
-+typedef struct {
-+ unsigned long barrier;
-+ unsigned long limit;
-+ unsigned long held;
-+ unsigned long maxheld;
-+ unsigned long minheld;
-+ unsigned long failcnt;
-+ unsigned long __unused1;
-+ unsigned long __unused2;
-+} ubstatparmf_t;
-+
-+typedef struct {
-+ ubstattime_t start_time;
-+ ubstattime_t end_time;
-+ ubstatparmf_t param[0];
-+} ubstatfull_t;
-+
-+#ifdef __KERNEL__
-+struct ub_stat_notify {
-+ struct list_head list;
-+ struct task_struct *task;
-+ int signum;
-+};
-+#endif
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_task.h linux-2.6.16-026test009/include/ub/ub_task.h
---- linux-2.6.16.orig/include/ub/ub_task.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_task.h 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,49 @@
-+/*
-+ * include/ub/ub_task.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_TASK_H_
-+#define __UB_TASK_H_
-+
-+#include <linux/config.h>
-+
-+struct user_beancounter;
-+
-+
-+#ifdef CONFIG_USER_RESOURCE
-+
-+struct task_beancounter {
-+ struct user_beancounter *exec_ub;
-+ struct user_beancounter *task_ub;
-+ struct user_beancounter *fork_sub;
-+ void *task_fnode, *task_freserv;
-+ unsigned long oom_generation;
-+ unsigned long task_data[4];
-+};
-+
-+#define get_exec_ub() (current->task_bc.exec_ub)
-+#define get_task_ub(__task) ((__task)->task_bc.task_ub)
-+#define set_exec_ub(__newub) \
-+({ \
-+ struct user_beancounter *old; \
-+ struct task_beancounter *tbc; \
-+ tbc = &current->task_bc; \
-+ old = tbc->exec_ub; \
-+ tbc->exec_ub = __newub; \
-+ old; \
-+})
-+
-+#else /* CONFIG_USER_RESOURCE */
-+
-+#define get_exec_ub() (NULL)
-+#define get_task_ub(task) (NULL)
-+#define set_exec_ub(__ub) (NULL)
-+
-+#endif /* CONFIG_USER_RESOURCE */
-+#endif /* __UB_TASK_H_ */
-diff -upr linux-2.6.16.orig/include/ub/ub_tcp.h linux-2.6.16-026test009/include/ub/ub_tcp.h
---- linux-2.6.16.orig/include/ub/ub_tcp.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_tcp.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,79 @@
-+/*
-+ * include/ub/ub_tcp.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_TCP_H_
-+#define __UB_TCP_H_
-+
-+/*
-+ * UB_NUMXXXSOCK, UB_XXXBUF accounting
-+ */
-+
-+#include <ub/ub_sk.h>
-+#include <ub/beancounter.h>
-+
-+static inline void ub_tcp_update_maxadvmss(struct sock *sk)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ if (!sock_has_ubc(sk))
-+ return;
-+ if (sock_bc(sk)->ub->ub_maxadvmss >= tcp_sk(sk)->advmss)
-+ return;
-+
-+ sock_bc(sk)->ub->ub_maxadvmss =
-+ skb_charge_size(MAX_HEADER + sizeof(struct iphdr)
-+ + sizeof(struct tcphdr) + tcp_sk(sk)->advmss);
-+#endif
-+}
-+
-+static inline int ub_tcp_rmem_allows_expand(struct sock *sk)
-+{
-+ if (tcp_memory_pressure)
-+ return 0;
-+#ifdef CONFIG_USER_RESOURCE
-+ if (sock_has_ubc(sk)) {
-+ struct user_beancounter *ub;
-+
-+ ub = sock_bc(sk)->ub;
-+ if (ub->ub_rmem_pressure == UB_RMEM_EXPAND)
-+ return 1;
-+ if (ub->ub_rmem_pressure == UB_RMEM_SHRINK)
-+ return 0;
-+ return sk->sk_rcvbuf <= ub->ub_rmem_thres;
-+ }
-+#endif
-+ return 1;
-+}
-+
-+static inline int ub_tcp_memory_pressure(struct sock *sk)
-+{
-+ if (tcp_memory_pressure)
-+ return 1;
-+#ifdef CONFIG_USER_RESOURCE
-+ if (sock_has_ubc(sk))
-+ return sock_bc(sk)->ub->ub_rmem_pressure != UB_RMEM_EXPAND;
-+#endif
-+ return 0;
-+}
-+
-+static inline int ub_tcp_shrink_rcvbuf(struct sock *sk)
-+{
-+ if (tcp_memory_pressure)
-+ return 1;
-+#ifdef CONFIG_USER_RESOURCE
-+ if (sock_has_ubc(sk))
-+ return sock_bc(sk)->ub->ub_rmem_pressure == UB_RMEM_SHRINK;
-+#endif
-+ return 0;
-+}
-+
-+UB_DECLARE_FUNC(int, ub_sock_tcp_chargepage(struct sock *sk))
-+UB_DECLARE_VOID_FUNC(ub_sock_tcp_detachpage(struct sock *sk))
-+
-+#endif
-diff -upr linux-2.6.16.orig/include/ub/ub_vmpages.h linux-2.6.16-026test009/include/ub/ub_vmpages.h
---- linux-2.6.16.orig/include/ub/ub_vmpages.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/include/ub/ub_vmpages.h 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,167 @@
-+/*
-+ * include/ub/ub_vmpages.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef __UB_PAGES_H_
-+#define __UB_PAGES_H_
-+
-+#include <linux/linkage.h>
-+#include <linux/config.h>
-+#include <ub/beancounter.h>
-+#include <ub/ub_decl.h>
-+
-+/*
-+ * Check whether vma has private or copy-on-write mapping.
-+ * Should match checks in ub_protected_charge().
-+ */
-+#define VM_UB_PRIVATE(__flags, __file) \
-+ ( ((__flags) & VM_WRITE) ? \
-+ (__file) == NULL || !((__flags) & VM_SHARED) : \
-+ 0 \
-+ )
-+
-+/* Mprotect charging result */
-+#define PRIVVM_ERROR -1
-+#define PRIVVM_NO_CHARGE 0 /* UB_DECLARE_FUNC retval with ubc off */
-+#define PRIVVM_TO_PRIVATE 1
-+#define PRIVVM_TO_SHARED 2
-+
-+UB_DECLARE_FUNC(int, ub_protected_charge(struct mm_struct *mm,
-+ unsigned long size,
-+ unsigned long newflags,
-+ struct vm_area_struct *vma))
-+
-+UB_DECLARE_VOID_FUNC(ub_unused_privvm_add(struct mm_struct *mm,
-+ struct vm_area_struct *vma,
-+ unsigned long num))
-+#define ub_unused_privvm_inc(mm, vma) ub_unused_privvm_add(mm, vma, 1)
-+UB_DECLARE_VOID_FUNC(ub_unused_privvm_sub(struct mm_struct *mm,
-+ struct vm_area_struct *vma,
-+ unsigned long num))
-+#define ub_unused_privvm_dec(mm, vma) ub_unused_privvm_sub(mm, vma, 1)
-+
-+UB_DECLARE_VOID_FUNC(__ub_unused_privvm_dec(struct mm_struct *mm,
-+ long sz))
-+
-+UB_DECLARE_FUNC(int, ub_memory_charge(struct mm_struct *mm,
-+ unsigned long size,
-+ unsigned vm_flags,
-+ struct file *vm_file,
-+ int strict))
-+UB_DECLARE_VOID_FUNC(ub_memory_uncharge(struct mm_struct *mm,
-+ unsigned long size,
-+ unsigned vm_flags,
-+ struct file *vm_file))
-+
-+struct shmem_inode_info;
-+UB_DECLARE_FUNC(int, ub_shmpages_charge(struct shmem_inode_info *i,
-+ unsigned long sz))
-+UB_DECLARE_VOID_FUNC(ub_shmpages_uncharge(struct shmem_inode_info *i,
-+ unsigned long sz))
-+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_inc(struct shmem_inode_info *shi))
-+UB_DECLARE_VOID_FUNC(ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
-+ unsigned long size))
-+#define ub_tmpfs_respages_dec(shi) ub_tmpfs_respages_sub(shi, 1)
-+
-+#ifdef CONFIG_USER_RESOURCE
-+#define shmi_ub_set(shi, ub) do { \
-+ (shi)->shmi_ub = get_beancounter(ub); \
-+ } while (0)
-+#define shmi_ub_put(shi) do { \
-+ put_beancounter((shi)->shmi_ub); \
-+ (shi)->shmi_ub = NULL; \
-+ } while (0)
-+#else
-+#define shmi_ub_set(shi, ub) do { } while (0)
-+#define shmi_ub_put(shi) do { } while (0)
-+#endif
-+
-+UB_DECLARE_FUNC(int, ub_locked_charge(struct mm_struct *mm,
-+ unsigned long size))
-+UB_DECLARE_VOID_FUNC(ub_locked_uncharge(struct mm_struct *mm,
-+ unsigned long size))
-+UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi,
-+ unsigned long size))
-+UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi,
-+ unsigned long size))
-+
-+UB_DECLARE_FUNC(unsigned long, pages_in_vma_range(struct vm_area_struct *vma,
-+ unsigned long addr, unsigned long end))
-+UB_DECLARE_VOID_FUNC(warn_bad_rss(struct vm_area_struct *vma,
-+ unsigned long freed))
-+#define pages_in_vma(vma) (pages_in_vma_range(vma, \
-+ vma->vm_start, vma->vm_end))
-+
-+#define UB_PAGE_WEIGHT_SHIFT 24
-+#define UB_PAGE_WEIGHT (1 << UB_PAGE_WEIGHT_SHIFT)
-+
-+struct page_beancounter;
-+#define PBC_COPY_SAME ((struct page_beancounter *) 1)
-+
-+/* Mprotect charging result */
-+#define PRIVVM_ERROR -1
-+#define PRIVVM_NO_CHARGE 0
-+#define PRIVVM_TO_PRIVATE 1
-+#define PRIVVM_TO_SHARED 2
-+
-+extern void fastcall __ub_update_physpages(struct user_beancounter *ub);
-+extern void fastcall __ub_update_oomguarpages(struct user_beancounter *ub);
-+extern void fastcall __ub_update_privvm(struct user_beancounter *ub);
-+
-+#ifdef CONFIG_USER_RSS_ACCOUNTING
-+#define PB_DECLARE_FUNC(ret, decl) UB_DECLARE_FUNC(ret, decl)
-+#define PB_DECLARE_VOID_FUNC(decl) UB_DECLARE_VOID_FUNC(decl)
-+#else
-+#define PB_DECLARE_FUNC(ret, decl) static inline ret decl {return (ret)0;}
-+#define PB_DECLARE_VOID_FUNC(decl) static inline void decl { }
-+#endif
-+
-+PB_DECLARE_FUNC(int, pb_alloc(struct page_beancounter **pbc))
-+PB_DECLARE_FUNC(int, pb_alloc_list(struct page_beancounter **pbc, int num))
-+PB_DECLARE_FUNC(int, pb_alloc_all(struct page_beancounter **pbc))
-+PB_DECLARE_VOID_FUNC(pb_add_ref(struct page *page,
-+ struct mm_struct *mm,
-+ struct page_beancounter **pbc))
-+PB_DECLARE_VOID_FUNC(pb_dup_ref(struct page *page,
-+ struct mm_struct *mm,
-+ struct page_beancounter **pbc))
-+PB_DECLARE_VOID_FUNC(pb_free_list(struct page_beancounter **pb))
-+PB_DECLARE_VOID_FUNC(pb_free(struct page_beancounter **pb))
-+PB_DECLARE_VOID_FUNC(pb_remove_ref(struct page *page,
-+ struct mm_struct *mm))
-+
-+PB_DECLARE_FUNC(struct user_beancounter *, pb_grab_page_ub(struct page *page))
-+#endif
-+
-+#ifdef CONFIG_USER_SWAP_ACCOUNTING
-+#define SWP_DECLARE_FUNC(ret, decl) UB_DECLARE_FUNC(ret, decl)
-+#define SWP_DECLARE_VOID_FUNC(decl) UB_DECLARE_VOID_FUNC(decl)
-+#else
-+#define SWP_DECLARE_FUNC(ret, decl) static inline ret decl {return (ret)0;}
-+#define SWP_DECLARE_VOID_FUNC(decl) static inline void decl { }
-+#endif
-+
-+struct swap_info_struct;
-+SWP_DECLARE_FUNC(int, ub_swap_init(struct swap_info_struct *si, pgoff_t n))
-+SWP_DECLARE_VOID_FUNC(ub_swap_fini(struct swap_info_struct *si))
-+SWP_DECLARE_VOID_FUNC(ub_swapentry_inc(struct swap_info_struct *si, pgoff_t n,
-+ struct user_beancounter *ub))
-+SWP_DECLARE_VOID_FUNC(ub_swapentry_dec(struct swap_info_struct *si, pgoff_t n))
-+
-+#ifdef CONFIG_USER_RESOURCE
-+#define ub_unmap_inc(mm) do { \
-+ (mm)->mm_ub->ub_stat[smp_processor_id()].unmap++; \
-+ } while (0)
-+#define ub_swapin_inc(mm) do { \
-+ (mm)->mm_ub->ub_stat[smp_processor_id()].swapin++; \
-+ } while (0)
-+#else
-+#define ub_unmap_inc(mm) do { } while (0)
-+#define ub_swapin_inc(mm) do { } while (0)
-+#endif
-diff -upr linux-2.6.16.orig/init/calibrate.c linux-2.6.16-026test009/init/calibrate.c
---- linux-2.6.16.orig/init/calibrate.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/init/calibrate.c 2006-04-19 15:02:12.000000000 +0400
-@@ -7,6 +7,7 @@
- #include <linux/sched.h>
- #include <linux/delay.h>
- #include <linux/init.h>
-+#include <linux/module.h>
-
- #include <asm/timex.h>
-
-@@ -105,6 +106,60 @@ static unsigned long __devinit calibrate
- static unsigned long __devinit calibrate_delay_direct(void) {return 0;}
- #endif
-
-+unsigned long cycles_per_jiffy, cycles_per_clock;
-+
-+static __devinit void calibrate_cycles(void)
-+{
-+ unsigned long ticks;
-+ cycles_t time;
-+
-+ ticks = jiffies;
-+ while (ticks == jiffies)
-+ /* nothing */;
-+ time = get_cycles();
-+ ticks = jiffies;
-+ while (ticks == jiffies)
-+ /* nothing */;
-+
-+ time = get_cycles() - time;
-+ cycles_per_jiffy = time;
-+ if ((time >> 32) != 0) {
-+ printk("CPU too fast! timings are incorrect\n");
-+ cycles_per_jiffy = -1;
-+ }
-+}
-+
-+EXPORT_SYMBOL(cycles_per_jiffy);
-+EXPORT_SYMBOL(cycles_per_clock);
-+
-+static __devinit void calc_cycles_per_jiffy(void)
-+{
-+#if defined(__i386__)
-+ extern unsigned long fast_gettimeoffset_quotient;
-+ unsigned long low, high;
-+
-+ if (fast_gettimeoffset_quotient != 0) {
-+ __asm__("divl %2"
-+ :"=a" (low), "=d" (high)
-+ :"r" (fast_gettimeoffset_quotient),
-+ "0" (0), "1" (1000000/HZ));
-+
-+ cycles_per_jiffy = low;
-+ }
-+#endif
-+ if (cycles_per_jiffy == 0)
-+ calibrate_cycles();
-+
-+ if (cycles_per_jiffy == 0) {
-+ printk(KERN_WARNING "Cycles are stuck! "
-+ "Some VPS statistics will not be available.");
-+ /* to prevent division by zero in cycles_to_(clocks|jiffies) */
-+ cycles_per_jiffy = 1;
-+ cycles_per_clock = 1;
-+ } else
-+ cycles_per_clock = cycles_per_jiffy * (HZ / CLOCKS_PER_SEC);
-+}
-+
- /*
- * This is the number of bits of precision for the loops_per_jiffy. Each
- * bit takes on average 1.5/HZ seconds. This (like the original) is a little
-@@ -170,4 +225,5 @@ void __devinit calibrate_delay(void)
- loops_per_jiffy);
- }
-
-+ calc_cycles_per_jiffy();
- }
-diff -upr linux-2.6.16.orig/init/main.c linux-2.6.16-026test009/init/main.c
---- linux-2.6.16.orig/init/main.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/init/main.c 2006-04-19 15:02:12.000000000 +0400
-@@ -48,6 +48,8 @@
- #include <linux/mempolicy.h>
- #include <linux/key.h>
-
-+#include <ub/beancounter.h>
-+
- #include <asm/io.h>
- #include <asm/bugs.h>
- #include <asm/setup.h>
-@@ -104,6 +106,20 @@ extern void tc_init(void);
- enum system_states system_state;
- EXPORT_SYMBOL(system_state);
-
-+#ifdef CONFIG_VE
-+extern void init_ve_system(void);
-+extern void prepare_ve0_process(struct task_struct *tsk);
-+extern void prepare_ve0_proc_root(void);
-+extern void prepare_ve0_sysctl(void);
-+extern void prepare_ve0_loopback(void);
-+#else
-+#define init_ve_system() do { } while (0)
-+#define prepare_ve0_process(tsk) do { } while (0)
-+#define prepare_ve0_proc_root() do { } while (0)
-+#define prepare_ve0_sysctl() do { } while (0)
-+#define prepare_ve0_loopback() do { } while (0)
-+#endif
-+
- /*
- * Boot command-line arguments
- */
-@@ -447,6 +463,10 @@ asmlinkage void __init start_kernel(void
- * enable them
- */
- lock_kernel();
-+ /*
-+ * Prepare ub0 to account early allocations if any
-+ */
-+ ub_init_ub0();
- page_address_init();
- printk(KERN_NOTICE);
- printk(linux_banner);
-@@ -459,6 +479,8 @@ asmlinkage void __init start_kernel(void
- */
- smp_prepare_boot_cpu();
-
-+ prepare_ve0_process(&init_task);
-+
- /*
- * Set up the scheduler prior starting any interrupts (such as the
- * timer interrupt). Full topology setup happens at smp_init()
-@@ -524,6 +546,7 @@ asmlinkage void __init start_kernel(void
- #endif
- fork_init(num_physpages);
- proc_caches_init();
-+ ub_init_cache(num_physpages);
- buffer_init();
- unnamed_dev_init();
- key_init();
-@@ -534,7 +557,10 @@ asmlinkage void __init start_kernel(void
- /* rootfs populating might need page-writeback */
- page_writeback_init();
- #ifdef CONFIG_PROC_FS
-+ prepare_ve0_proc_root();
-+ prepare_ve0_sysctl();
- proc_root_init();
-+ ub_init_proc();
- #endif
- cpuset_init();
-
-@@ -542,6 +568,10 @@ asmlinkage void __init start_kernel(void
-
- acpi_early_init(); /* before LAPIC and SMP init */
-
-+#ifdef CONFIG_USER_RESOURCE
-+ ub_init_pbc();
-+#endif
-+
- /* Do the rest non-__init'ed, we're now alive */
- rest_init();
- }
-@@ -603,6 +633,9 @@ static void __init do_initcalls(void)
- */
- static void __init do_basic_setup(void)
- {
-+ prepare_ve0_loopback();
-+ init_ve_system();
-+
- /* drivers will send hotplug events */
- init_workqueues();
- usermodehelper_init();
-diff -upr linux-2.6.16.orig/init/version.c linux-2.6.16-026test009/init/version.c
---- linux-2.6.16.orig/init/version.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/init/version.c 2006-04-19 15:02:12.000000000 +0400
-@@ -28,6 +28,12 @@ struct new_utsname system_utsname = {
-
- EXPORT_SYMBOL(system_utsname);
-
-+struct new_utsname virt_utsname = {
-+ /* we need only this field */
-+ .release = UTS_RELEASE,
-+};
-+EXPORT_SYMBOL(virt_utsname);
-+
- const char linux_banner[] =
- "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
-diff -upr linux-2.6.16.orig/ipc/mqueue.c linux-2.6.16-026test009/ipc/mqueue.c
---- linux-2.6.16.orig/ipc/mqueue.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/mqueue.c 2006-04-19 15:02:11.000000000 +0400
-@@ -639,7 +639,8 @@ static int oflag2acc[O_ACCMODE] = { MAY_
- return ERR_PTR(-EINVAL);
- }
-
-- if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) {
-+ if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE],
-+ NULL, NULL)) {
- dput(dentry);
- mntput(mqueue_mnt);
- return ERR_PTR(-EACCES);
-diff -upr linux-2.6.16.orig/ipc/msg.c linux-2.6.16-026test009/ipc/msg.c
---- linux-2.6.16.orig/ipc/msg.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/msg.c 2006-04-19 15:02:12.000000000 +0400
-@@ -88,6 +88,45 @@ void __init msg_init (void)
- sysvipc_msg_proc_show);
- }
-
-+#ifdef CONFIG_VE
-+void __init prepare_msg(void)
-+{
-+ get_ve0()->_msg_ids = &msg_ids;
-+ get_ve0()->_msg_ctlmax = msg_ctlmax;
-+ get_ve0()->_msg_ctlmnb = msg_ctlmnb;
-+ get_ve0()->_msg_ctlmni = msg_ctlmni;
-+}
-+
-+#define msg_ids (*(get_exec_env()->_msg_ids))
-+#define msg_ctlmax (get_exec_env()->_msg_ctlmax)
-+#define msg_ctlmnb (get_exec_env()->_msg_ctlmnb)
-+#define msg_ctlmni (get_exec_env()->_msg_ctlmni)
-+
-+void init_ve_ipc_msg(void)
-+{
-+ msg_ctlmax = MSGMAX;
-+ msg_ctlmnb = MSGMNB;
-+ msg_ctlmni = MSGMNI;
-+ ipc_init_ids(&msg_ids, MSGMNI);
-+}
-+
-+void cleanup_ve_ipc_msg(void)
-+{
-+ int i;
-+ struct msg_queue *msq;
-+
-+ down(&msg_ids.sem);
-+ for (i = 0; i <= msg_ids.max_id; i++) {
-+ msq = msg_lock(i);
-+ if (msq == NULL)
-+ continue;
-+
-+ freeque(msq, i);
-+ }
-+ up(&msg_ids.sem);
-+}
-+#endif
-+
- static int newque (key_t key, int msgflg)
- {
- int id;
-@@ -108,7 +147,7 @@ static int newque (key_t key, int msgflg
- return retval;
- }
-
-- id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
-+ id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni, -1);
- if(id == -1) {
- security_msg_queue_free(msq);
- ipc_rcu_putref(msq);
-@@ -450,7 +489,7 @@ asmlinkage long sys_msgctl (int msqid, i
- ipcp = &msq->q_perm;
- err = -EPERM;
- if (current->euid != ipcp->cuid &&
-- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
-+ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN))
- /* We _could_ check for CAP_CHOWN above, but we don't */
- goto out_unlock_up;
-
-@@ -540,7 +579,7 @@ static inline int pipelined_send(struct
- msr->r_msg = ERR_PTR(-E2BIG);
- } else {
- msr->r_msg = NULL;
-- msq->q_lrpid = msr->r_tsk->pid;
-+ msq->q_lrpid = virt_pid(msr->r_tsk);
- msq->q_rtime = get_seconds();
- wake_up_process(msr->r_tsk);
- smp_mb();
-@@ -622,7 +661,7 @@ asmlinkage long sys_msgsnd (int msqid, s
- }
- }
-
-- msq->q_lspid = current->tgid;
-+ msq->q_lspid = virt_tgid(current);
- msq->q_stime = get_seconds();
-
- if(!pipelined_send(msq,msg)) {
-@@ -718,7 +757,7 @@ asmlinkage long sys_msgrcv (int msqid, s
- list_del(&msg->m_list);
- msq->q_qnum--;
- msq->q_rtime = get_seconds();
-- msq->q_lrpid = current->tgid;
-+ msq->q_lrpid = virt_tgid(current);
- msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts,&msg_bytes);
- atomic_dec(&msg_hdrs);
-@@ -833,3 +872,27 @@ static int sysvipc_msg_proc_show(struct
- msq->q_ctime);
- }
- #endif
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_MODULE
-+#include <linux/module.h>
-+
-+int sysvipc_walk_msg(int (*func)(int i, struct msg_queue*, void *), void *arg)
-+{
-+ int i;
-+ int err = 0;
-+ struct msg_queue * msq;
-+
-+ down(&msg_ids.sem);
-+ for(i = 0; i <= msg_ids.max_id; i++) {
-+ if ((msq = msg_lock(i)) == NULL)
-+ continue;
-+ err = func(msg_buildid(i,msq->q_perm.seq), msq, arg);
-+ msg_unlock(msq);
-+ if (err)
-+ break;
-+ }
-+ up(&msg_ids.sem);
-+ return err;
-+}
-+EXPORT_SYMBOL_GPL(sysvipc_walk_msg);
-+#endif
-diff -upr linux-2.6.16.orig/ipc/msgutil.c linux-2.6.16-026test009/ipc/msgutil.c
---- linux-2.6.16.orig/ipc/msgutil.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/msgutil.c 2006-04-19 15:02:11.000000000 +0400
-@@ -17,6 +17,8 @@
-
- #include "util.h"
-
-+#include <ub/ub_mem.h>
-+
- struct msg_msgseg {
- struct msg_msgseg* next;
- /* the next part of the message follows immediately */
-@@ -36,7 +38,7 @@ struct msg_msg *load_msg(const void __us
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
-- msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
-+ msg = (struct msg_msg *)ub_kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
- if (msg == NULL)
- return ERR_PTR(-ENOMEM);
-
-@@ -56,7 +58,7 @@ struct msg_msg *load_msg(const void __us
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
-- seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
-+ seg = (struct msg_msgseg *)ub_kmalloc(sizeof(*seg) + alen,
- GFP_KERNEL);
- if (seg == NULL) {
- err = -ENOMEM;
-diff -upr linux-2.6.16.orig/ipc/sem.c linux-2.6.16-026test009/ipc/sem.c
---- linux-2.6.16.orig/ipc/sem.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/sem.c 2006-04-19 15:02:12.000000000 +0400
-@@ -78,6 +78,7 @@
- #include <asm/uaccess.h>
- #include "util.h"
-
-+#include <ub/ub_mem.h>
-
- #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
- #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
-@@ -88,7 +89,7 @@
- ipc_buildid(&sem_ids, id, seq)
- static struct ipc_ids sem_ids;
-
--static int newary (key_t, int, int);
-+static int newary (key_t, int, int, int);
- static void freeary (struct sem_array *sma, int id);
- #ifdef CONFIG_PROC_FS
- static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
-@@ -124,6 +125,48 @@ void __init sem_init (void)
- sysvipc_sem_proc_show);
- }
-
-+#ifdef CONFIG_VE
-+void __init prepare_sem(void)
-+{
-+ get_ve0()->_sem_ids = &sem_ids;
-+ get_ve0()->_used_sems = used_sems;
-+ get_ve0()->_sem_ctls[0] = sem_ctls[0];
-+ get_ve0()->_sem_ctls[1] = sem_ctls[1];
-+ get_ve0()->_sem_ctls[2] = sem_ctls[2];
-+ get_ve0()->_sem_ctls[3] = sem_ctls[3];
-+}
-+
-+#define sem_ids (*(get_exec_env()->_sem_ids))
-+#define used_sems (get_exec_env()->_used_sems)
-+#define sem_ctls (get_exec_env()->_sem_ctls)
-+
-+void init_ve_ipc_sem(void)
-+{
-+ used_sems = 0;
-+ sem_ctls[0] = SEMMSL;
-+ sem_ctls[1] = SEMMNS;
-+ sem_ctls[2] = SEMOPM;
-+ sem_ctls[3] = SEMMNI;
-+ ipc_init_ids(&sem_ids, SEMMNI);
-+}
-+
-+void cleanup_ve_ipc_sem(void)
-+{
-+ int i;
-+ struct sem_array *sma;
-+
-+ down(&sem_ids.sem);
-+ for (i = 0; i <= sem_ids.max_id; i++) {
-+ sma = sem_lock(i);
-+ if (sma == NULL)
-+ continue;
-+
-+ freeary(sma, i);
-+ }
-+ up(&sem_ids.sem);
-+}
-+#endif
-+
- /*
- * Lockless wakeup algorithm:
- * Without the check/retry algorithm a lockless wakeup is possible:
-@@ -158,7 +201,7 @@ void __init sem_init (void)
- */
- #define IN_WAKEUP 1
-
--static int newary (key_t key, int nsems, int semflg)
-+static int newary (key_t key, int semid, int nsems, int semflg)
- {
- int id;
- int retval;
-@@ -187,7 +230,7 @@ static int newary (key_t key, int nsems,
- return retval;
- }
-
-- id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
-+ id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni, semid);
- if(id == -1) {
- security_sem_free(sma);
- ipc_rcu_putref(sma);
-@@ -217,12 +260,12 @@ asmlinkage long sys_semget (key_t key, i
- down(&sem_ids.sem);
-
- if (key == IPC_PRIVATE) {
-- err = newary(key, nsems, semflg);
-+ err = newary(key, -1, nsems, semflg);
- } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */
- if (!(semflg & IPC_CREAT))
- err = -ENOENT;
- else
-- err = newary(key, nsems, semflg);
-+ err = newary(key, -1, nsems, semflg);
- } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
- err = -EEXIST;
- } else {
-@@ -743,7 +786,7 @@ static int semctl_main(int semid, int se
- for (un = sma->undo; un; un = un->id_next)
- un->semadj[semnum] = 0;
- curr->semval = val;
-- curr->sempid = current->tgid;
-+ curr->sempid = virt_tgid(current);
- sma->sem_ctime = get_seconds();
- /* maybe some queued-up processes were waiting for this */
- update_queue(sma);
-@@ -823,7 +866,7 @@ static int semctl_down(int semid, int se
- ipcp = &sma->sem_perm;
-
- if (current->euid != ipcp->cuid &&
-- current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
-+ current->euid != ipcp->uid && !capable(CAP_VE_SYS_ADMIN)) {
- err=-EPERM;
- goto out_unlock;
- }
-@@ -944,7 +987,8 @@ static inline int get_undo_list(struct s
- undo_list = current->sysvsem.undo_list;
- if (!undo_list) {
- size = sizeof(struct sem_undo_list);
-- undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
-+ undo_list = (struct sem_undo_list *) ub_kmalloc(size,
-+ GFP_KERNEL);
- if (undo_list == NULL)
- return -ENOMEM;
- memset(undo_list, 0, size);
-@@ -1008,7 +1052,8 @@ static struct sem_undo *find_undo(int se
- ipc_rcu_getref(sma);
- sem_unlock(sma);
-
-- new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
-+ new = (struct sem_undo *) ub_kmalloc(sizeof(struct sem_undo) +
-+ sizeof(short)*nsems, GFP_KERNEL);
- if (!new) {
- ipc_lock_by_ptr(&sma->sem_perm);
- ipc_rcu_putref(sma);
-@@ -1066,7 +1111,7 @@ asmlinkage long sys_semtimedop(int semid
- if (nsops > sc_semopm)
- return -E2BIG;
- if(nsops > SEMOPM_FAST) {
-- sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
-+ sops = ub_kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
- if(sops==NULL)
- return -ENOMEM;
- }
-@@ -1150,7 +1195,7 @@ retry_undos:
- queue.sops = sops;
- queue.nsops = nsops;
- queue.undo = un;
-- queue.pid = current->tgid;
-+ queue.pid = virt_tgid(current);
- queue.id = semid;
- queue.alter = alter;
- if (alter)
-@@ -1320,7 +1365,7 @@ found:
- sem->semval = 0;
- if (sem->semval > SEMVMX)
- sem->semval = SEMVMX;
-- sem->sempid = current->tgid;
-+ sem->sempid = virt_tgid(current);
- }
- }
- sma->sem_otime = get_seconds();
-@@ -1351,3 +1396,48 @@ static int sysvipc_sem_proc_show(struct
- sma->sem_ctime);
- }
- #endif
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_MODULE
-+#include <linux/module.h>
-+
-+int sysvipc_setup_sem(key_t key, int semid, size_t size, int semflg)
-+{
-+ int err = 0;
-+ struct sem_array *sma;
-+
-+ down(&sem_ids.sem);
-+ sma = sem_lock(semid);
-+ if (!sma) {
-+ err = newary(key, semid, size, semflg);
-+ if (err >= 0)
-+ sma = sem_lock(semid);
-+ }
-+ if (sma)
-+ sem_unlock(sma);
-+ up(&sem_ids.sem);
-+
-+ return err > 0 ? 0 : err;
-+}
-+EXPORT_SYMBOL_GPL(sysvipc_setup_sem);
-+
-+int sysvipc_walk_sem(int (*func)(int i, struct sem_array*, void *), void *arg)
-+{
-+ int i;
-+ int err = 0;
-+ struct sem_array *sma;
-+
-+ down(&sem_ids.sem);
-+ for (i = 0; i <= sem_ids.max_id; i++) {
-+ if ((sma = sem_lock(i)) == NULL)
-+ continue;
-+ err = func(sem_buildid(i,sma->sem_perm.seq), sma, arg);
-+ sem_unlock(sma);
-+ if (err)
-+ break;
-+ }
-+ up(&sem_ids.sem);
-+ return err;
-+}
-+EXPORT_SYMBOL_GPL(sysvipc_walk_sem);
-+EXPORT_SYMBOL_GPL(exit_sem);
-+#endif
-diff -upr linux-2.6.16.orig/ipc/shm.c linux-2.6.16-026test009/ipc/shm.c
---- linux-2.6.16.orig/ipc/shm.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/shm.c 2006-04-19 15:02:12.000000000 +0400
-@@ -30,9 +30,13 @@
- #include <linux/capability.h>
- #include <linux/ptrace.h>
- #include <linux/seq_file.h>
-+#include <linux/shmem_fs.h>
-
- #include <asm/uaccess.h>
-
-+#include <ub/beancounter.h>
-+#include <ub/ub_vmpages.h>
-+
- #include "util.h"
-
- static struct file_operations shm_file_operations;
-@@ -46,9 +50,11 @@ static struct ipc_ids shm_ids;
- #define shm_buildid(id, seq) \
- ipc_buildid(&shm_ids, id, seq)
-
--static int newseg (key_t key, int shmflg, size_t size);
-+static int newseg (key_t key, int shmid, int shmflg, size_t size);
- static void shm_open (struct vm_area_struct *shmd);
- static void shm_close (struct vm_area_struct *shmd);
-+static void shm_destroy (struct shmid_kernel *shmd);
-+static void do_shm_rmid(struct shmid_kernel *shp);
- #ifdef CONFIG_PROC_FS
- static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
- #endif
-@@ -68,6 +74,68 @@ void __init shm_init (void)
- sysvipc_shm_proc_show);
- }
-
-+#ifdef CONFIG_VE
-+void __init prepare_shm(void)
-+{
-+ get_ve0()->_shm_ids = &shm_ids;
-+ get_ve0()->_shm_ctlmax = shm_ctlmax;
-+ get_ve0()->_shm_ctlall = shm_ctlall;
-+ get_ve0()->_shm_ctlmni = shm_ctlmni;
-+ get_ve0()->_shm_tot = shm_tot;
-+}
-+
-+#define shm_ids (*(get_exec_env()->_shm_ids))
-+#define shm_ctlmax (get_exec_env()->_shm_ctlmax)
-+#define shm_ctlall (get_exec_env()->_shm_ctlall)
-+#define shm_ctlmni (get_exec_env()->_shm_ctlmni)
-+#define shm_total (get_exec_env()->_shm_tot)
-+
-+void init_ve_ipc_shm(void)
-+{
-+ shm_ctlmax = SHMMAX;
-+ shm_ctlall = SHMALL;
-+ shm_ctlmni = SHMMNI;
-+ shm_total = 0;
-+ ipc_init_ids(&shm_ids, 1);
-+}
-+
-+void cleanup_ve_ipc_shm(void)
-+{
-+ int i;
-+ struct shmid_kernel *shp;
-+
-+ down(&shm_ids.sem);
-+ for (i = 0; i <= shm_ids.max_id; i++) {
-+ shp = shm_lock(i);
-+ if (shp == NULL)
-+ continue;
-+
-+ do_shm_rmid(shp);
-+ }
-+ up(&shm_ids.sem);
-+}
-+#define sb_ve(sb) VE_OWNER_FSTYPE(sb->s_type)
-+#define shm_total_sb(sb) (&sb_ve(sb)->_shm_tot)
-+#define shm_lock_sb(id, sb) ((struct shmid_kernel *) \
-+ ipc_lock(sb_ve(sb)->_shm_ids, id))
-+#else
-+/* renamed since there is a struct field named shm_tot */
-+#define shm_total shm_tot
-+#define shm_total_sb(sb) (&shm_tot)
-+#define shm_lock_sb(id, sb) shm_lock(id)
-+#endif
-+
-+static void do_shm_rmid(struct shmid_kernel *shp)
-+{
-+ if (shp->shm_nattch){
-+ shp->shm_perm.mode |= SHM_DEST;
-+ /* Do not find it any more */
-+ shp->shm_perm.key = IPC_PRIVATE;
-+ shm_unlock(shp);
-+ } else
-+ shm_destroy (shp);
-+}
-+
- static inline int shm_checkid(struct shmid_kernel *s, int id)
- {
- if (ipc_checkid(&shm_ids,&s->shm_perm,id))
-@@ -75,25 +143,25 @@ static inline int shm_checkid(struct shm
- return 0;
- }
-
--static inline struct shmid_kernel *shm_rmid(int id)
-+static inline struct shmid_kernel *shm_rmid(struct ipc_ids *ids, int id)
- {
-- return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
-+ return (struct shmid_kernel *)ipc_rmid(ids,id);
- }
-
--static inline int shm_addid(struct shmid_kernel *shp)
-+static inline int shm_addid(struct shmid_kernel *shp, int reqid)
- {
-- return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
-+ return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni, reqid);
- }
-
-
-
--static inline void shm_inc (int id) {
-+static inline void shm_inc(int id, struct super_block *sb) {
- struct shmid_kernel *shp;
-
-- if(!(shp = shm_lock(id)))
-+ if(!(shp = shm_lock_sb(id, sb)))
- BUG();
- shp->shm_atim = get_seconds();
-- shp->shm_lprid = current->tgid;
-+ shp->shm_lprid = virt_tgid(current);
- shp->shm_nattch++;
- shm_unlock(shp);
- }
-@@ -101,7 +169,50 @@ static inline void shm_inc (int id) {
- /* This is called by fork, once for every shm attach. */
- static void shm_open (struct vm_area_struct *shmd)
- {
-- shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
-+ shm_inc(shmd->vm_file->f_dentry->d_inode->i_ino,
-+ shmd->vm_file->f_dentry->d_inode->i_sb);
-+}
-+
-+static int shmem_lock(struct shmid_kernel *shp, int lock,
-+ struct user_struct *user)
-+{
-+ struct file *file = shp->shm_file;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct shmem_inode_info *info = SHMEM_I(inode);
-+ unsigned long size;
-+
-+ size = shp->shm_segsz + PAGE_SIZE - 1;
-+
-+#ifdef CONFIG_SHMEM
-+ spin_lock(&info->lock);
-+ if (lock && !(info->flags & VM_LOCKED)) {
-+ if (ub_lockedshm_charge(info, size) < 0)
-+ goto out_ch;
-+
-+ if (!user_shm_lock(inode->i_size, user))
-+ goto out_user;
-+ info->flags |= VM_LOCKED;
-+ }
-+ if (!lock && (info->flags & VM_LOCKED) && user) {
-+ ub_lockedshm_uncharge(info, size);
-+ user_shm_unlock(inode->i_size, user);
-+ info->flags &= ~VM_LOCKED;
-+ }
-+ spin_unlock(&info->lock);
-+ return 0;
-+
-+out_user:
-+ ub_lockedshm_uncharge(info, size);
-+out_ch:
-+ spin_unlock(&info->lock);
-+ return -ENOMEM;
-+#else
-+ if (lock && ub_lockedshm_charge(info, size))
-+ return -ENOMEM;
-+ if (!lock)
-+ ub_lockedshm_uncharge(info, size);
-+ return 0;
-+#endif
- }
-
- /*
-@@ -114,15 +225,24 @@ static void shm_open (struct vm_area_str
- */
- static void shm_destroy (struct shmid_kernel *shp)
- {
-- shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-- shm_rmid (shp->id);
-+ int numpages, *shm_totalp;
-+ struct file *f;
-+ struct super_block *sb;
-+
-+ f = shp->shm_file;
-+ sb = f->f_dentry->d_inode->i_sb;
-+ numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-+ shm_totalp = shm_total_sb(sb);
-+ *shm_totalp -= numpages;
-+
-+ shm_rmid (shp->_shm_ids, shp->id);
- shm_unlock(shp);
- if (!is_file_hugepages(shp->shm_file))
-- shmem_lock(shp->shm_file, 0, shp->mlock_user);
-+ shmem_lock(shp, 0, shp->mlock_user);
- else
- user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
- shp->mlock_user);
-- fput (shp->shm_file);
-+ fput(f);
- security_shm_free(shp);
- ipc_rcu_putref(shp);
- }
-@@ -138,12 +258,24 @@ static void shm_close (struct vm_area_st
- struct file * file = shmd->vm_file;
- int id = file->f_dentry->d_inode->i_ino;
- struct shmid_kernel *shp;
-+ struct super_block *sb;
-+ struct ipc_ids *ids;
-+#ifdef CONFIG_VE
-+ struct ve_struct *ve;
-+
-+ sb = file->f_dentry->d_inode->i_sb;
-+ ve = get_ve(sb_ve(sb));
-+ ids = ve->_shm_ids;
-+#else
-+ sb = file->f_dentry->d_inode->i_sb;
-+ ids = &shm_ids;
-+#endif
-
-- down (&shm_ids.sem);
-+ down (&ids->sem);
- /* remove from the list of attaches of the shm segment */
-- if(!(shp = shm_lock(id)))
-+ if(!(shp = shm_lock_sb(id, sb)))
- BUG();
-- shp->shm_lprid = current->tgid;
-+ shp->shm_lprid = virt_tgid(current);
- shp->shm_dtim = get_seconds();
- shp->shm_nattch--;
- if(shp->shm_nattch == 0 &&
-@@ -151,7 +283,10 @@ static void shm_close (struct vm_area_st
- shm_destroy (shp);
- else
- shm_unlock(shp);
-- up (&shm_ids.sem);
-+ up(&ids->sem);
-+#ifdef CONFIG_VE
-+ put_ve(ve);
-+#endif
- }
-
- static int shm_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -161,7 +296,10 @@ static int shm_mmap(struct file * file,
- ret = shmem_mmap(file, vma);
- if (ret == 0) {
- vma->vm_ops = &shm_vm_ops;
-- shm_inc(file->f_dentry->d_inode->i_ino);
-+ if (!(vma->vm_flags & VM_WRITE))
-+ vma->vm_flags &= ~VM_MAYWRITE;
-+ shm_inc(file->f_dentry->d_inode->i_ino,
-+ file->f_dentry->d_inode->i_sb);
- }
-
- return ret;
-@@ -184,19 +322,19 @@ static struct vm_operations_struct shm_v
- #endif
- };
-
--static int newseg (key_t key, int shmflg, size_t size)
-+static int newseg (key_t key, int shmid, int shmflg, size_t size)
- {
- int error;
- struct shmid_kernel *shp;
- int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
- struct file * file;
-- char name[13];
-+ char name[26];
- int id;
-
- if (size < SHMMIN || size > shm_ctlmax)
- return -EINVAL;
-
-- if (shm_tot + numpages >= shm_ctlall)
-+ if (shm_total + numpages >= shm_ctlall)
- return -ENOSPC;
-
- shp = ipc_rcu_alloc(sizeof(*shp));
-@@ -227,7 +365,11 @@ static int newseg (key_t key, int shmflg
- if ((shmflg & SHM_NORESERVE) &&
- sysctl_overcommit_memory != OVERCOMMIT_NEVER)
- acctflag = 0;
-+#ifdef CONFIG_VE
-+ sprintf (name, "VE%d.SYSV%08x", get_exec_env()->veid, key);
-+#else
- sprintf (name, "SYSV%08x", key);
-+#endif
- file = shmem_file_setup(name, size, acctflag);
- }
- error = PTR_ERR(file);
-@@ -235,17 +377,18 @@ static int newseg (key_t key, int shmflg
- goto no_file;
-
- error = -ENOSPC;
-- id = shm_addid(shp);
-+ id = shm_addid(shp, shmid);
- if(id == -1)
- goto no_id;
-
-- shp->shm_cprid = current->tgid;
-+ shp->shm_cprid = virt_tgid(current);
- shp->shm_lprid = 0;
- shp->shm_atim = shp->shm_dtim = 0;
- shp->shm_ctim = get_seconds();
- shp->shm_segsz = size;
- shp->shm_nattch = 0;
- shp->id = shm_buildid(id,shp->shm_perm.seq);
-+ shp->_shm_ids = &shm_ids;
- shp->shm_file = file;
- file->f_dentry->d_inode->i_ino = shp->id;
-
-@@ -253,7 +396,7 @@ static int newseg (key_t key, int shmflg
- if (!(shmflg & SHM_HUGETLB))
- file->f_op = &shm_file_operations;
-
-- shm_tot += numpages;
-+ shm_total += numpages;
- shm_unlock(shp);
- return shp->id;
-
-@@ -272,12 +415,12 @@ asmlinkage long sys_shmget (key_t key, s
-
- down(&shm_ids.sem);
- if (key == IPC_PRIVATE) {
-- err = newseg(key, shmflg, size);
-+ err = newseg(key, -1, shmflg, size);
- } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
- if (!(shmflg & IPC_CREAT))
- err = -ENOENT;
- else
-- err = newseg(key, shmflg, size);
-+ err = newseg(key, -1, shmflg, size);
- } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
- err = -EEXIST;
- } else {
-@@ -470,7 +613,7 @@ asmlinkage long sys_shmctl (int shmid, i
- down(&shm_ids.sem);
- shm_info.used_ids = shm_ids.in_use;
- shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-- shm_info.shm_tot = shm_tot;
-+ shm_info.shm_tot = shm_total;
- shm_info.swap_attempts = 0;
- shm_info.swap_successes = 0;
- err = shm_ids.max_id;
-@@ -557,14 +700,14 @@ asmlinkage long sys_shmctl (int shmid, i
- if(cmd==SHM_LOCK) {
- struct user_struct * user = current->user;
- if (!is_file_hugepages(shp->shm_file)) {
-- err = shmem_lock(shp->shm_file, 1, user);
-+ err = shmem_lock(shp, 1, user);
- if (!err) {
- shp->shm_perm.mode |= SHM_LOCKED;
- shp->mlock_user = user;
- }
- }
- } else if (!is_file_hugepages(shp->shm_file)) {
-- shmem_lock(shp->shm_file, 0, shp->mlock_user);
-+ shmem_lock(shp, 0, shp->mlock_user);
- shp->shm_perm.mode &= ~SHM_LOCKED;
- shp->mlock_user = NULL;
- }
-@@ -594,7 +737,7 @@ asmlinkage long sys_shmctl (int shmid, i
-
- if (current->euid != shp->shm_perm.uid &&
- current->euid != shp->shm_perm.cuid &&
-- !capable(CAP_SYS_ADMIN)) {
-+ !capable(CAP_VE_SYS_ADMIN)) {
- err=-EPERM;
- goto out_unlock_up;
- }
-@@ -603,13 +746,7 @@ asmlinkage long sys_shmctl (int shmid, i
- if (err)
- goto out_unlock_up;
-
-- if (shp->shm_nattch){
-- shp->shm_perm.mode |= SHM_DEST;
-- /* Do not find it any more */
-- shp->shm_perm.key = IPC_PRIVATE;
-- shm_unlock(shp);
-- } else
-- shm_destroy (shp);
-+ do_shm_rmid(shp);
- up(&shm_ids.sem);
- goto out;
- }
-@@ -633,7 +770,7 @@ asmlinkage long sys_shmctl (int shmid, i
- err=-EPERM;
- if (current->euid != shp->shm_perm.uid &&
- current->euid != shp->shm_perm.cuid &&
-- !capable(CAP_SYS_ADMIN)) {
-+ !capable(CAP_VE_SYS_ADMIN)) {
- goto out_unlock_up;
- }
-
-@@ -916,3 +1053,55 @@ static int sysvipc_shm_proc_show(struct
- shp->shm_ctim);
- }
- #endif
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_MODULE
-+#include <linux/module.h>
-+
-+struct file * sysvipc_setup_shm(key_t key, int shmid, size_t size, int shmflg)
-+{
-+ struct shmid_kernel *shp;
-+ struct file *file;
-+
-+ down(&shm_ids.sem);
-+ shp = shm_lock(shmid);
-+ if (!shp) {
-+ int err;
-+
-+ err = newseg(key, shmid, shmflg, size);
-+ file = ERR_PTR(err);
-+ if (err < 0)
-+ goto out;
-+ shp = shm_lock(shmid);
-+ }
-+ file = ERR_PTR(-EINVAL);
-+ if (shp) {
-+ file = shp->shm_file;
-+ get_file(file);
-+ shm_unlock(shp);
-+ }
-+out:
-+ up(&shm_ids.sem);
-+ return file;
-+}
-+EXPORT_SYMBOL_GPL(sysvipc_setup_shm);
-+
-+int sysvipc_walk_shm(int (*func)(struct shmid_kernel*, void *), void *arg)
-+{
-+ int i;
-+ int err = 0;
-+ struct shmid_kernel* shp;
-+
-+ down(&shm_ids.sem);
-+ for(i = 0; i <= shm_ids.max_id; i++) {
-+ if ((shp = shm_lock(i)) == NULL)
-+ continue;
-+ err = func(shp, arg);
-+ shm_unlock(shp);
-+ if (err)
-+ break;
-+ }
-+ up(&shm_ids.sem);
-+ return err;
-+}
-+EXPORT_SYMBOL_GPL(sysvipc_walk_shm);
-+#endif
-diff -upr linux-2.6.16.orig/ipc/util.c linux-2.6.16-026test009/ipc/util.c
---- linux-2.6.16.orig/ipc/util.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/util.c 2006-04-19 15:02:13.000000000 +0400
-@@ -13,6 +13,7 @@
- */
-
- #include <linux/config.h>
-+#include <linux/module.h>
- #include <linux/mm.h>
- #include <linux/shm.h>
- #include <linux/init.h>
-@@ -30,6 +31,8 @@
-
- #include <asm/unistd.h>
-
-+#include <ub/ub_mem.h>
-+
- #include "util.h"
-
- struct ipc_proc_iface {
-@@ -65,7 +68,7 @@ __initcall(ipc_init);
- * array itself.
- */
-
--void __init ipc_init_ids(struct ipc_ids* ids, int size)
-+void __ve_init ipc_init_ids(struct ipc_ids* ids, int size)
- {
- int i;
- sema_init(&ids->sem,1);
-@@ -94,7 +97,21 @@ void __init ipc_init_ids(struct ipc_ids*
- ids->entries->size = size;
- for(i=0;i<size;i++)
- ids->entries->p[i] = NULL;
-+
-+ ids->owner_env = get_exec_env();
-+}
-+
-+#ifdef CONFIG_VE
-+static inline void ipc_free_ids(struct ipc_ids *ids)
-+{
-+ if (ids == NULL)
-+ return;
-+
-+ if (ids->entries != &ids->nullentry)
-+ ipc_rcu_putref(ids->entries);
-+ kfree(ids);
- }
-+#endif
-
- #ifdef CONFIG_PROC_FS
- static struct file_operations sysvipc_proc_fops;
-@@ -182,8 +199,7 @@ static int grow_ary(struct ipc_ids* ids,
- if(new == NULL)
- return size;
- new->size = newsize;
-- memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
-- sizeof(struct ipc_id_ary));
-+ memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
- for(i=size;i<newsize;i++) {
- new->p[i] = NULL;
- }
-@@ -213,10 +229,20 @@ static int grow_ary(struct ipc_ids* ids,
- * Called with ipc_ids.sem held.
- */
-
--int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
-+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid)
- {
- int id;
-
-+ if (reqid >= 0) {
-+ id = reqid%SEQ_MULTIPLIER;
-+ size = grow_ary(ids,id+1);
-+ if (id >= size)
-+ return -1;
-+ if (ids->entries->p[id] == NULL)
-+ goto found;
-+ return -1;
-+ }
-+
- size = grow_ary(ids,size);
-
- /*
-@@ -229,16 +255,21 @@ int ipc_addid(struct ipc_ids* ids, struc
- }
- return -1;
- found:
-- ids->in_use++;
-+ if (ids->in_use++ == 0)
-+ (void)get_ve(ids->owner_env);
- if (id > ids->max_id)
- ids->max_id = id;
-
- new->cuid = new->uid = current->euid;
- new->gid = new->cgid = current->egid;
-
-- new->seq = ids->seq++;
-- if(ids->seq > ids->seq_max)
-- ids->seq = 0;
-+ if (reqid >= 0) {
-+ new->seq = reqid/SEQ_MULTIPLIER;
-+ } else {
-+ new->seq = ids->seq++;
-+ if(ids->seq > ids->seq_max)
-+ ids->seq = 0;
-+ }
-
- spin_lock_init(&new->lock);
- new->deleted = 0;
-@@ -276,7 +307,8 @@ struct kern_ipc_perm* ipc_rmid(struct ip
- ids->entries->p[lid] = NULL;
- if(p==NULL)
- BUG();
-- ids->in_use--;
-+ if (--ids->in_use == 0)
-+ put_ve(ids->owner_env);
-
- if (lid == ids->max_id) {
- do {
-@@ -302,9 +334,9 @@ void* ipc_alloc(int size)
- {
- void* out;
- if(size > PAGE_SIZE)
-- out = vmalloc(size);
-+ out = ub_vmalloc(size);
- else
-- out = kmalloc(size, GFP_KERNEL);
-+ out = ub_kmalloc(size, GFP_KERNEL);
- return out;
- }
-
-@@ -387,14 +419,14 @@ void* ipc_rcu_alloc(int size)
- * workqueue if necessary (for vmalloc).
- */
- if (rcu_use_vmalloc(size)) {
-- out = vmalloc(HDRLEN_VMALLOC + size);
-+ out = ub_vmalloc(HDRLEN_VMALLOC + size);
- if (out) {
- out += HDRLEN_VMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
- container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
- }
- } else {
-- out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
-+ out = ub_kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
- if (out) {
- out += HDRLEN_KMALLOC;
- container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
-@@ -603,6 +635,71 @@ int ipc_checkid(struct ipc_ids* ids, str
- return 0;
- }
-
-+#ifdef CONFIG_VE
-+void __init prepare_ipc(void)
-+{
-+ prepare_msg();
-+ prepare_sem();
-+ prepare_shm();
-+}
-+
-+int init_ve_ipc(struct ve_struct * envid)
-+{
-+ envid->_msg_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
-+ GFP_KERNEL);
-+ if (envid->_msg_ids == NULL)
-+ goto out_nomem;
-+ envid->_sem_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
-+ GFP_KERNEL);
-+ if (envid->_sem_ids == NULL)
-+ goto out_free_msg;
-+ envid->_shm_ids = kmalloc(sizeof(struct ipc_ids) + sizeof(void *),
-+ GFP_KERNEL);
-+ if (envid->_shm_ids == NULL)
-+ goto out_free_sem;
-+
-+ init_ve_ipc_msg();
-+ init_ve_ipc_sem();
-+ init_ve_ipc_shm();
-+ return 0;
-+
-+out_free_sem:
-+ kfree(envid->_sem_ids);
-+out_free_msg:
-+ kfree(envid->_msg_ids);
-+out_nomem:
-+ return -ENOMEM;
-+}
-+
-+void ve_ipc_cleanup(void)
-+{
-+ cleanup_ve_ipc_msg();
-+ cleanup_ve_ipc_sem();
-+ cleanup_ve_ipc_shm();
-+}
-+
-+void ve_ipc_free(struct ve_struct *env)
-+{
-+ ipc_free_ids(env->_msg_ids);
-+ ipc_free_ids(env->_sem_ids);
-+ ipc_free_ids(env->_shm_ids);
-+ env->_msg_ids = NULL;
-+ env->_sem_ids = NULL;
-+ env->_shm_ids = NULL;
-+}
-+
-+void fini_ve_ipc(struct ve_struct *ptr)
-+{
-+ ve_ipc_cleanup();
-+ ve_ipc_free(ptr);
-+}
-+
-+EXPORT_SYMBOL(init_ve_ipc);
-+EXPORT_SYMBOL(ve_ipc_cleanup);
-+EXPORT_SYMBOL(ve_ipc_free);
-+EXPORT_SYMBOL(fini_ve_ipc);
-+#endif /* CONFIG_VE */
-+
- #ifdef __ARCH_WANT_IPC_PARSE_VERSION
-
-
-diff -upr linux-2.6.16.orig/ipc/util.h linux-2.6.16-026test009/ipc/util.h
---- linux-2.6.16.orig/ipc/util.h 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/ipc/util.h 2006-04-19 15:02:12.000000000 +0400
-@@ -15,6 +15,22 @@ void sem_init (void);
- void msg_init (void);
- void shm_init (void);
-
-+#ifdef CONFIG_VE
-+void prepare_msg(void);
-+void prepare_sem(void);
-+void prepare_shm(void);
-+void init_ve_ipc_msg(void);
-+void init_ve_ipc_sem(void);
-+void init_ve_ipc_shm(void);
-+void cleanup_ve_ipc_msg(void);
-+void cleanup_ve_ipc_sem(void);
-+void cleanup_ve_ipc_shm(void);
-+
-+#define __ve_init
-+#else
-+#define __ve_init __init
-+#endif
-+
- struct ipc_id_ary {
- int size;
- struct kern_ipc_perm *p[0];
-@@ -28,10 +44,11 @@ struct ipc_ids {
- struct semaphore sem;
- struct ipc_id_ary nullentry;
- struct ipc_id_ary* entries;
-+ struct ve_struct *owner_env;
- };
-
- struct seq_file;
--void __init ipc_init_ids(struct ipc_ids* ids, int size);
-+void __ve_init ipc_init_ids(struct ipc_ids *ids, int size);
- #ifdef CONFIG_PROC_FS
- void __init ipc_init_proc_interface(const char *path, const char *header,
- struct ipc_ids *ids,
-@@ -42,7 +59,7 @@ void __init ipc_init_proc_interface(cons
-
- /* must be called with ids->sem acquired.*/
- int ipc_findkey(struct ipc_ids* ids, key_t key);
--int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
-+int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size, int reqid);
-
- /* must be called with both locks acquired. */
- struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
-diff -upr linux-2.6.16.orig/kernel/Kconfig.openvz linux-2.6.16-026test009/kernel/Kconfig.openvz
---- linux-2.6.16.orig/kernel/Kconfig.openvz 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/Kconfig.openvz 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,54 @@
-+# Copyright (C) 2005 SWsoft
-+# All rights reserved.
-+# Licensing governed by "linux/COPYING.SWsoft" file.
-+
-+menu "OpenVZ"
-+
-+config VE
-+ bool "Virtual Environment support"
-+ default y
-+ help
-+ This option adds support of virtual Linux running on the original box
-+ with fully supported virtual network driver, tty subsystem and
-+ configurable access for hardware and other resources.
-+
-+config VE_CALLS
-+ tristate "VE calls interface"
-+ depends on VE
-+ default m
-+ help
-+ This option controls how to build vzmon code containing VE calls.
-+ By default it's build in module vzmon.o
-+
-+config VE_NETDEV
-+ tristate "VE networking"
-+ depends on VE
-+ default m
-+ help
-+ This option controls whether to build VE networking code.
-+
-+config VE_IPTABLES
-+ bool "VE netfiltering"
-+ depends on VE && VE_NETDEV && INET && NETFILTER
-+ default y
-+ help
-+ This option controls whether to build VE netfiltering code.
-+
-+config VZ_WDOG
-+ tristate "VE watchdog module"
-+ depends on VE
-+ default m
-+ help
-+ This option controls building of vzwdog module, which dumps
-+ a lot of useful system info on console periodically.
-+
-+config VZ_CHECKPOINT
-+ tristate "Checkpointing & restoring Virtual Environments"
-+ depends on SOFTWARE_SUSPEND
-+ default m
-+ help
-+ This option adds two modules, "cpt" and "rst", which allow
-+ to save a running Virtual Environment and restore it
-+ on another host (live migration) or on the same host (checkpointing).
-+
-+endmenu
-diff -upr linux-2.6.16.orig/kernel/Makefile linux-2.6.16-026test009/kernel/Makefile
---- linux-2.6.16.orig/kernel/Makefile 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/Makefile 2006-04-19 15:02:12.000000000 +0400
-@@ -10,6 +10,18 @@ obj-y = sched.o fork.o exec_domain.o
- kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o
-
-+obj-y += ub/
-+
-+obj-$(CONFIG_VE) += ve.o
-+obj-$(CONFIG_VE) += veowner.o
-+obj-$(CONFIG_VE_CALLS) += vzdev.o
-+obj-$(CONFIG_VZ_WDOG) += vzwdog.o
-+obj-$(CONFIG_VE_CALLS) += vzmon.o
-+
-+vzmon-objs = vecalls.o
-+
-+obj-$(CONFIG_VZ_CHECKPOINT) += cpt/
-+
- obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
- obj-$(CONFIG_FUTEX) += futex.o
- obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
-diff -upr linux-2.6.16.orig/kernel/audit.c linux-2.6.16-026test009/kernel/audit.c
---- linux-2.6.16.orig/kernel/audit.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/audit.c 2006-04-19 15:02:12.000000000 +0400
-@@ -372,6 +372,9 @@ static int audit_receive_msg(struct sk_b
- uid_t loginuid; /* loginuid of sender */
- struct audit_sig_info sig_data;
-
-+ if (!ve_is_super(VE_OWNER_SKB(skb)))
-+ return -ECONNREFUSED;
-+
- err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
- if (err)
- return err;
-diff -upr linux-2.6.16.orig/kernel/capability.c linux-2.6.16-026test009/kernel/capability.c
---- linux-2.6.16.orig/kernel/capability.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/capability.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,7 @@ EXPORT_SYMBOL(cap_bset);
- * Locking rule: acquire this prior to tasklist_lock.
- */
- static DEFINE_SPINLOCK(task_capability_lock);
-+EXPORT_SYMBOL(task_capability_lock);
-
- /*
- * For sys_getproccap() and sys_setproccap(), any of the three
-@@ -67,8 +68,8 @@ asmlinkage long sys_capget(cap_user_head
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
-- if (pid && pid != current->pid) {
-- target = find_task_by_pid(pid);
-+ if (pid && pid != virt_pid(current)) {
-+ target = find_task_by_pid_ve(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
-@@ -100,9 +101,13 @@ static inline int cap_set_pg(int pgrp, k
- int ret = -EPERM;
- int found = 0;
-
-- do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
-+ pgrp = vpid_to_pid(pgrp);
-+ if (pgrp < 0)
-+ return ret;
-+
-+ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, g) {
- target = g;
-- while_each_thread(g, target) {
-+ while_each_thread_ve(g, target) {
- if (!security_capset_check(target, effective,
- inheritable,
- permitted)) {
-@@ -113,7 +118,7 @@ static inline int cap_set_pg(int pgrp, k
- }
- found = 1;
- }
-- } while_each_task_pid(pgrp, PIDTYPE_PGID, g);
-+ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, g);
-
- if (!found)
- ret = 0;
-@@ -132,7 +137,7 @@ static inline int cap_set_all(kernel_cap
- int ret = -EPERM;
- int found = 0;
-
-- do_each_thread(g, target) {
-+ do_each_thread_ve(g, target) {
- if (target == current || target->pid == 1)
- continue;
- found = 1;
-@@ -141,7 +146,7 @@ static inline int cap_set_all(kernel_cap
- continue;
- ret = 0;
- security_capset_set(target, effective, inheritable, permitted);
-- } while_each_thread(g, target);
-+ } while_each_thread_ve(g, target);
-
- if (!found)
- ret = 0;
-@@ -188,7 +193,7 @@ asmlinkage long sys_capset(cap_user_head
- if (get_user(pid, &header->pid))
- return -EFAULT;
-
-- if (pid && pid != current->pid && !capable(CAP_SETPCAP))
-+ if (pid && pid != virt_pid(current) && !capable(CAP_SETPCAP))
- return -EPERM;
-
- if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
-@@ -199,8 +204,8 @@ asmlinkage long sys_capset(cap_user_head
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
-- if (pid > 0 && pid != current->pid) {
-- target = find_task_by_pid(pid);
-+ if (pid > 0 && pid != virt_pid(current)) {
-+ target = find_task_by_pid_ve(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
-diff -upr linux-2.6.16.orig/kernel/compat.c linux-2.6.16-026test009/kernel/compat.c
---- linux-2.6.16.orig/kernel/compat.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/compat.c 2006-04-19 15:02:12.000000000 +0400
-@@ -21,6 +21,8 @@
- #include <linux/syscalls.h>
- #include <linux/unistd.h>
- #include <linux/security.h>
-+#include <linux/hrtimer.h>
-+#include <linux/module.h>
-
- #include <asm/uaccess.h>
-
-@@ -38,61 +40,73 @@ int put_compat_timespec(const struct tim
- __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
- }
-
--static long compat_nanosleep_restart(struct restart_block *restart)
-+long compat_nanosleep_restart(struct restart_block *restart)
- {
-- unsigned long expire = restart->arg0, now = jiffies;
- struct compat_timespec __user *rmtp;
-+ struct timespec tu;
-+ void *rfn_save = restart->fn;
-+ struct hrtimer timer;
-+ ktime_t rem;
-
-- /* Did it expire while we handled signals? */
-- if (!time_after(expire, now))
-- return 0;
-+ restart->fn = do_no_restart_syscall;
-+
-+ hrtimer_init(&timer, (clockid_t) restart->arg3, HRTIMER_ABS);
-+
-+ timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
-
-- expire = schedule_timeout_interruptible(expire - now);
-- if (expire == 0)
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ rem = schedule_hrtimer(&timer, HRTIMER_ABS);
-+
-+ if (rem.tv64 <= 0)
- return 0;
-
-- rmtp = (struct compat_timespec __user *)restart->arg1;
-- if (rmtp) {
-- struct compat_timespec ct;
-- struct timespec t;
--
-- jiffies_to_timespec(expire, &t);
-- ct.tv_sec = t.tv_sec;
-- ct.tv_nsec = t.tv_nsec;
-- if (copy_to_user(rmtp, &ct, sizeof(ct)))
-- return -EFAULT;
-- }
-- /* The 'restart' block is already filled in */
-+ rmtp = (struct compat_timespec __user *) restart->arg2;
-+ tu = ktime_to_timespec(rem);
-+ if (rmtp && put_compat_timespec(&tu, rmtp))
-+ return -EFAULT;
-+
-+ restart->fn = rfn_save;
-+
-+ /* The other values in restart are already filled in */
- return -ERESTART_RESTARTBLOCK;
- }
-+EXPORT_SYMBOL_GPL(compat_nanosleep_restart);
-
- asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
- struct compat_timespec __user *rmtp)
- {
- struct timespec t;
- struct restart_block *restart;
-- unsigned long expire;
-+ struct hrtimer timer;
-+ ktime_t rem;
-
- if (get_compat_timespec(&t, rqtp))
- return -EFAULT;
-
-- if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
-+ if (!timespec_valid(&t))
- return -EINVAL;
-
-- expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-- expire = schedule_timeout_interruptible(expire);
-- if (expire == 0)
-+ hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_REL);
-+
-+ timer.expires = timespec_to_ktime(t);
-+
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ rem = schedule_hrtimer(&timer, HRTIMER_REL);
-+ if (rem.tv64 <= 0)
- return 0;
-
-- if (rmtp) {
-- jiffies_to_timespec(expire, &t);
-- if (put_compat_timespec(&t, rmtp))
-- return -EFAULT;
-- }
-+ t = ktime_to_timespec(rem);
-+
-+ if (rmtp && put_compat_timespec(&t, rmtp))
-+ return -EFAULT;
-+
- restart = &current_thread_info()->restart_block;
- restart->fn = compat_nanosleep_restart;
-- restart->arg0 = jiffies + expire;
-- restart->arg1 = (unsigned long) rmtp;
-+ restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF;
-+ restart->arg1 = timer.expires.tv64 >> 32;
-+ restart->arg2 = (unsigned long) rmtp;
-+ restart->arg3 = (unsigned long) timer.base->index;
-+
- return -ERESTART_RESTARTBLOCK;
- }
-
-diff -upr linux-2.6.16.orig/kernel/configs.c linux-2.6.16-026test009/kernel/configs.c
---- linux-2.6.16.orig/kernel/configs.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/configs.c 2006-04-19 15:02:12.000000000 +0400
-@@ -89,8 +89,7 @@ static int __init ikconfig_init(void)
- struct proc_dir_entry *entry;
-
- /* create the current config file */
-- entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO,
-- &proc_root);
-+ entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, NULL);
- if (!entry)
- return -ENOMEM;
-
-diff -upr linux-2.6.16.orig/kernel/cpt/Makefile linux-2.6.16-026test009/kernel/cpt/Makefile
---- linux-2.6.16.orig/kernel/cpt/Makefile 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/Makefile 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,34 @@
-+#
-+#
-+# kernel/cpt/Makefile
-+#
-+# Copyright (C) 2000-2005 SWsoft
-+# All rights reserved.
-+#
-+# Licensing governed by "linux/COPYING.SWsoft" file.
-+
-+obj-$(CONFIG_VZ_CHECKPOINT) += vzcpt.o vzrst.o
-+
-+vzcpt-objs := cpt_proc.o cpt_dump.o cpt_obj.o cpt_context.o cpt_process.o \
-+ cpt_mm.o cpt_files.o cpt_kernel.o \
-+ cpt_socket.o cpt_socket_in.o cpt_tty.o cpt_sysvipc.o cpt_net.o \
-+ cpt_conntrack.o cpt_ubc.o cpt_epoll.o
-+
-+vzrst-objs := rst_proc.o rst_undump.o cpt_obj.o rst_context.o rst_process.o \
-+ rst_mm.o rst_files.o cpt_kernel.o \
-+ rst_socket.o rst_socket_in.o rst_tty.o rst_sysvipc.o rst_net.o \
-+ rst_conntrack.o rst_ubc.o rst_epoll.o
-+
-+ifeq ($(CONFIG_VZ_CHECKPOINT_LAZY), y)
-+vzcpt-objs += cpt_pagein.o
-+vzrst-objs += rst_pagein.o
-+endif
-+
-+ifeq ($(CONFIG_X86_64), y)
-+vzcpt-objs += cpt_x8664.o
-+vzrst-objs += cpt_x8664.o rst_x8664.o
-+endif
-+
-+ifeq ($(CONFIG_X86_32), y)
-+vzrst-objs += rst_i386.o
-+endif
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_conntrack.c linux-2.6.16-026test009/kernel/cpt/cpt_conntrack.c
---- linux-2.6.16.orig/kernel/cpt/cpt_conntrack.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_conntrack.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,370 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_conntrack.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/netdevice.h>
-+#include <linux/inetdevice.h>
-+#include <linux/rtnetlink.h>
-+#include <linux/unistd.h>
-+#include <linux/ve.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/cpt_image.h>
-+#include <linux/icmp.h>
-+#include <linux/ip.h>
-+
-+#if defined(CONFIG_VE_IPTABLES) && \
-+ (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
-+
-+#include <linux/netfilter.h>
-+#include <linux/netfilter_ipv4/ip_conntrack.h>
-+#include <linux/netfilter_ipv4/ip_nat.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+
-+/* How does it work?
-+ *
-+ * Network is disabled, so new conntrack entries will not appear.
-+ * However, some of them can disappear because of timeouts.
-+ *
-+ * So, we take read_lock, collect all required information atomically,
-+ * essentially, creating parallel "refcount" structures holding pointers.
-+ * We delete conntrack timers as well, so the structures cannot disappear
-+ * after releasing the lock. Now, after releasing lock we can dump everything
-+ * safely. And on exit we restore timers to their original values.
-+ *
-+ * Note, this approach is not going to work in VE0.
-+ */
-+
-+struct ct_holder
-+{
-+ struct ct_holder *next;
-+ struct ip_conntrack_tuple_hash *cth;
-+ int index;
-+};
-+
-+static void encode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple)
-+{
-+ v->cpt_dst = tuple->dst.ip;
-+ v->cpt_dstport = tuple->dst.u.all;
-+ v->cpt_protonum = tuple->dst.protonum;
-+ v->cpt_dir = tuple->dst.dir;
-+
-+ v->cpt_src = tuple->src.ip;
-+ v->cpt_srcport = tuple->src.u.all;
-+}
-+
-+static int dump_one_expect(struct cpt_ip_connexpect_image *v,
-+ struct ip_conntrack_expect *exp,
-+ int sibling, cpt_context_t *ctx)
-+{
-+ int err = 0;
-+
-+ v->cpt_next = sizeof(*v);
-+ v->cpt_object = CPT_OBJ_NET_CONNTRACK_EXPECT;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ encode_tuple(&v->cpt_tuple, &exp->tuple);
-+ encode_tuple(&v->cpt_mask, &exp->mask);
-+ v->cpt_sibling_conntrack = sibling;
-+ v->cpt_flags = exp->flags;
-+ v->cpt_seq = exp->id;
-+ v->cpt_dir = 0;
-+ v->cpt_manip_proto = 0;
-+#ifdef CONFIG_IP_NF_NAT_NEEDED
-+ v->cpt_manip_proto = exp->saved_proto.all;
-+ v->cpt_dir = exp->dir;
-+#endif
-+ v->cpt_timeout = 0;
-+ if (exp->master->helper->timeout)
-+ v->cpt_timeout = exp->timeout.expires - jiffies;
-+ return err;
-+}
-+
-+/* NOTE. We use one page to dump list of expectations. This may be not enough
-+ * in theory. In practice there is only one expectation per conntrack record.
-+ * Moreover, taking into account that _ALL_ of expecations are saved in one
-+ * global list, which is looked up each incoming/outpging packet, the system
-+ * would be severely dead when even one conntrack would have so much of
-+ * expectations. Shortly, I am not going to repair this.
-+ */
-+
-+static int dump_expect_list(struct ip_conntrack *ct, struct ct_holder *list,
-+ cpt_context_t *ctx)
-+{
-+ int err = 0;
-+ unsigned long pg;
-+ struct cpt_ip_connexpect_image *v;
-+ struct ip_conntrack_expect *exp;
-+
-+ if (ct->expecting == 0)
-+ return err;
-+ if (ct->expecting*sizeof(struct cpt_ip_connexpect_image) > PAGE_SIZE)
-+ return -ENOBUFS;
-+
-+ pg = __get_free_page(GFP_KERNEL);
-+ if (!pg)
-+ return -ENOMEM;
-+ v = (struct cpt_ip_connexpect_image *)pg;
-+
-+ read_lock_bh(&ip_conntrack_lock);
-+ list_for_each_entry(exp, &ve_ip_conntrack_expect_list, list) {
-+ int sibling;
-+
-+ if (exp->master != ct)
-+ continue;
-+
-+ if (ct->helper == NULL) {
-+ eprintk_ctx("conntrack: no helper and non-trivial expectation\n");
-+ err = -EINVAL;
-+ break;
-+ }
-+
-+ sibling = 0;
-+#if 0
-+ /* That's all? No need to calculate sibling? */
-+ if (exp->sibling) {
-+ struct ct_holder *c;
-+ for (c = list; c; c = c->next) {
-+ if (tuplehash_to_ctrack(c->cth) == exp->sibling) {
-+ sibling = c->index;
-+ break;
-+ }
-+ }
-+ /* NOTE: exp->sibling could be not "confirmed" and, hence,
-+ * out of hash table. We should just ignore such a sibling,
-+ * the connection is going to be retried, the packet
-+ * apparently was lost somewhere.
-+ */
-+ if (sibling == 0)
-+ dprintk_ctx("sibling conntrack is not found\n");
-+ }
-+#endif
-+
-+ /* If the expectation still does not have exp->sibling
-+ * and timer is not running, it is about to die on another
-+ * cpu. Skip it. */
-+ if (!sibling &&
-+ ct->helper->timeout &&
-+ !timer_pending(&exp->timeout)) {
-+ dprintk_ctx("conntrack: expectation: no timer\n");
-+ continue;
-+ }
-+
-+ err = dump_one_expect(v, exp, sibling, ctx);
-+ if (err)
-+ break;
-+
-+ v++;
-+ }
-+ read_unlock_bh(&ip_conntrack_lock);
-+
-+ if (err == 0 && (unsigned long)v != pg)
-+ ctx->write((void*)pg, (unsigned long)v - pg, ctx);
-+
-+ free_page(pg);
-+ return err;
-+}
-+
-+static int dump_one_ct(struct ct_holder *c, struct ct_holder *list,
-+ cpt_context_t *ctx)
-+{
-+ struct ip_conntrack_tuple_hash *h = c->cth;
-+ struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-+ struct cpt_ip_conntrack_image v;
-+ int err = 0;
-+
-+ if (sizeof(v.cpt_proto_data) != sizeof(ct->proto)) {
-+ eprintk_ctx("conntrack module ct->proto version mismatch\n");
-+ return -EINVAL;
-+ }
-+ if (sizeof(v.cpt_help_data) != sizeof(ct->help)) {
-+ eprintk_ctx("conntrack module ct->help version mismatch\n");
-+ return -EINVAL;
-+ }
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NET_CONNTRACK;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_ARRAY;
-+
-+ read_lock_bh(&ip_conntrack_lock);
-+ v.cpt_status = ct->status;
-+ v.cpt_timeout = ct->timeout.expires - jiffies;
-+ v.cpt_ct_helper = (ct->helper != NULL);
-+ v.cpt_index = c->index;
-+ v.cpt_id = ct->id;
-+ v.cpt_mark = 0;
-+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-+ v.cpt_mark = ct->mark;
-+#endif
-+ encode_tuple(&v.cpt_tuple[0], &ct->tuplehash[0].tuple);
-+ encode_tuple(&v.cpt_tuple[1], &ct->tuplehash[1].tuple);
-+ memcpy(&v.cpt_proto_data, &ct->proto, sizeof(v.cpt_proto_data));
-+ memcpy(&v.cpt_help_data, &ct->help, sizeof(v.cpt_help_data));
-+
-+ v.cpt_masq_index = 0;
-+ v.cpt_initialized = 0;
-+ v.cpt_num_manips = 0;
-+ v.cpt_nat_helper = 0;
-+#ifdef CONFIG_IP_NF_NAT_NEEDED
-+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-+ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-+ v.cpt_masq_index = ct->nat.masq_index;
-+#endif
-+ /* "help" data is used by pptp, difficult to support */
-+ v.cpt_nat_seq[0].cpt_correction_pos = ct->nat.info.seq[0].correction_pos;
-+ v.cpt_nat_seq[0].cpt_offset_before = ct->nat.info.seq[0].offset_before;
-+ v.cpt_nat_seq[0].cpt_offset_after = ct->nat.info.seq[0].offset_after;
-+ v.cpt_nat_seq[1].cpt_correction_pos = ct->nat.info.seq[1].correction_pos;
-+ v.cpt_nat_seq[1].cpt_offset_before = ct->nat.info.seq[1].offset_before;
-+ v.cpt_nat_seq[1].cpt_offset_after = ct->nat.info.seq[1].offset_after;
-+#endif
-+ read_unlock_bh(&ip_conntrack_lock);
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ err = dump_expect_list(ct, list, ctx);
-+
-+ cpt_close_object(ctx);
-+ return err;
-+}
-+
-+int cpt_dump_ip_conntrack(cpt_context_t * ctx)
-+{
-+ struct ct_holder *ct_list = NULL;
-+ struct ct_holder *c, **cp;
-+ int err = 0;
-+ int index = 0;
-+ int idx;
-+
-+ if (get_exec_env()->_ip_conntrack == NULL)
-+ return 0;
-+
-+ for (idx = atomic_read(&(get_exec_env()->_ip_conntrack->_ip_conntrack_count)); idx >= 0; idx--) {
-+ c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
-+ if (c == NULL) {
-+ err = -ENOMEM;
-+ goto done;
-+ }
-+ memset(c, 0, sizeof(struct ct_holder));
-+ c->next = ct_list;
-+ ct_list = c;
-+ }
-+
-+ c = ct_list;
-+
-+ read_lock_bh(&ip_conntrack_lock);
-+ for (idx = 0; idx < ip_conntrack_htable_size; idx++) {
-+ struct ip_conntrack_tuple_hash *h;
-+ list_for_each_entry(h, &ve_ip_conntrack_hash[idx], list) {
-+ /* Skip reply tuples, they are covered by original
-+ * direction. */
-+ if (DIRECTION(h))
-+ continue;
-+
-+ /* Oops, we have not enough of holders...
-+ * It is impossible. */
-+ if (unlikely(c == NULL)) {
-+ read_unlock_bh(&ip_conntrack_lock);
-+ eprintk_ctx("unexpected conntrack appeared\n");
-+ err = -ENOMEM;
-+ goto done;
-+ }
-+
-+ /* If timer is not running, it means that it
-+ * has just been scheduled on another cpu.
-+ * We should skip this conntrack, it is about to be
-+ * destroyed. */
-+ if (!del_timer(&tuplehash_to_ctrack(h)->timeout)) {
-+ dprintk_ctx("conntrack: no timer\n");
-+ continue;
-+ }
-+
-+ /* Timer is deleted. refcnt is _not_ decreased.
-+ * We are going to restore the timer on exit
-+ * from this function. */
-+ c->cth = h;
-+ c->index = ++index;
-+ c = c->next;
-+ }
-+ }
-+ read_unlock_bh(&ip_conntrack_lock);
-+
-+ /* No conntracks? Good. */
-+ if (index == 0)
-+ goto done;
-+
-+ /* Comb the list a little. */
-+ cp = &ct_list;
-+ while ((c = *cp) != NULL) {
-+ /* Discard unused entries; they can appear, if some
-+ * entries were timed out since we preallocated the list.
-+ */
-+ if (c->cth == NULL) {
-+ *cp = c->next;
-+ kfree(c);
-+ continue;
-+ }
-+
-+ /* Move conntracks attached to expectations to the beginning
-+ * of the list. */
-+ if (tuplehash_to_ctrack(c->cth)->master && c != ct_list) {
-+ *cp = c->next;
-+ c->next = ct_list;
-+ ct_list = c;
-+ dprintk_ctx("conntrack: %d moved in list\n", c->index);
-+ continue;
-+ }
-+ cp = &c->next;
-+ }
-+
-+ cpt_open_section(ctx, CPT_SECT_NET_CONNTRACK);
-+
-+ for (c = ct_list; c; c = c->next) {
-+ err = dump_one_ct(c, ct_list, ctx);
-+ if (err)
-+ goto done;
-+ }
-+
-+ cpt_close_section(ctx);
-+
-+done:
-+ while ((c = ct_list) != NULL) {
-+ ct_list = c->next;
-+ if (c->cth) {
-+ /* Restore timer. refcnt is preserved. */
-+ add_timer(&tuplehash_to_ctrack(c->cth)->timeout);
-+ }
-+ kfree(c);
-+ }
-+ return err;
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_context.c linux-2.6.16-026test009/kernel/cpt/cpt_context.c
---- linux-2.6.16.orig/kernel/cpt/cpt_context.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_context.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,255 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_context.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+
-+static void file_write(const void *addr, size_t count, struct cpt_context *ctx)
-+{
-+ mm_segment_t oldfs;
-+ ssize_t err = -EBADF;
-+ struct file *file = ctx->file;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (file)
-+ err = file->f_op->write(file, addr, count, &file->f_pos);
-+ set_fs(oldfs);
-+ if (err != count && !ctx->write_error)
-+ ctx->write_error = err < 0 ? err : -EIO;
-+}
-+
-+static void file_pwrite(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
-+{
-+ mm_segment_t oldfs;
-+ ssize_t err = -EBADF;
-+ struct file *file = ctx->file;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (file)
-+ err = file->f_op->write(file, addr, count, &pos);
-+ set_fs(oldfs);
-+ if (err != count && !ctx->write_error)
-+ ctx->write_error = err < 0 ? err : -EIO;
-+}
-+
-+static void file_align(struct cpt_context *ctx)
-+{
-+ struct file *file = ctx->file;
-+
-+ if (file)
-+ file->f_pos = CPT_ALIGN(file->f_pos);
-+}
-+
-+void cpt_context_init(struct cpt_context *ctx)
-+{
-+ int i;
-+
-+ memset(ctx, 0, sizeof(*ctx));
-+
-+ init_MUTEX(&ctx->main_sem);
-+ ctx->refcount = 1;
-+
-+ ctx->current_section = -1;
-+ ctx->current_object = -1;
-+ ctx->pagesize = PAGE_SIZE;
-+ ctx->write = file_write;
-+ ctx->pwrite = file_pwrite;
-+ ctx->align = file_align;
-+ for (i=0; i < CPT_SECT_MAX; i++)
-+ ctx->sections[i] = CPT_NULL;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ init_completion(&ctx->pgin_notify);
-+#endif
-+ cpt_object_init(ctx);
-+}
-+
-+int cpt_open_dumpfile(struct cpt_context *ctx)
-+{
-+ ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
-+ if (ctx->tmpbuf == NULL)
-+ return -ENOMEM;
-+ __cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+int cpt_close_dumpfile(struct cpt_context *ctx)
-+{
-+ if (ctx->file) {
-+ fput(ctx->file);
-+ ctx->file = NULL;
-+ }
-+ if (ctx->tmpbuf) {
-+ free_page((unsigned long)ctx->tmpbuf);
-+ ctx->tmpbuf = NULL;
-+ }
-+ if (ctx->write_error)
-+ eprintk_ctx("error while writing dump file: %d\n", ctx->write_error);
-+ return ctx->write_error;
-+}
-+
-+int cpt_major_hdr_out(struct cpt_context *ctx)
-+{
-+ struct cpt_major_hdr hdr;
-+
-+ if (ctx->file == NULL)
-+ return 0;
-+
-+ memset(&hdr, 0, sizeof(hdr));
-+ hdr.cpt_signature[0] = CPT_SIGNATURE0;
-+ hdr.cpt_signature[1] = CPT_SIGNATURE1;
-+ hdr.cpt_signature[2] = CPT_SIGNATURE2;
-+ hdr.cpt_signature[3] = CPT_SIGNATURE3;
-+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_image_version = 1;
-+#ifdef CONFIG_X86_32
-+ hdr.cpt_os_arch = CPT_OS_ARCH_I386;
-+#endif
-+#ifdef CONFIG_X86_64
-+ hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
-+#endif
-+ hdr.cpt_os_version = 0;
-+ hdr.cpt_os_features = 0;
-+ hdr.cpt_pagesize = PAGE_SIZE;
-+ hdr.cpt_hz = HZ;
-+ hdr.cpt_start_jiffies64 = ctx->virt_jiffies64;
-+ hdr.cpt_start_sec = ctx->start_time.tv_sec;
-+ hdr.cpt_start_nsec = ctx->start_time.tv_nsec;
-+ hdr.cpt_cpu_caps[0] = ctx->src_cpu_flags;
-+ hdr.cpt_kernel_config[0] = ctx->kernel_config_flags;
-+ hdr.cpt_iptables_mask = ctx->iptables_mask;
-+
-+ ctx->write(&hdr, sizeof(hdr), ctx);
-+ return 0;
-+}
-+
-+int cpt_close_section(struct cpt_context *ctx)
-+{
-+ if (ctx->file && ctx->current_section >= 0) {
-+ __u64 next = ctx->file->f_pos - ctx->current_section;
-+ ctx->pwrite(&next, 8, ctx, ctx->current_section);
-+ ctx->current_section = -1;
-+ }
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_close_section);
-+
-+int cpt_open_section(struct cpt_context *ctx, __u32 type)
-+{
-+ struct cpt_section_hdr hdr;
-+
-+ if (ctx->file == NULL)
-+ return 0;
-+
-+ cpt_close_section(ctx);
-+
-+ ctx->current_section = ctx->file->f_pos;
-+ ctx->sections[type] = ctx->current_section;
-+
-+ hdr.cpt_next = 0;
-+ hdr.cpt_section = type;
-+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_align = 0;
-+ ctx->write(&hdr, sizeof(hdr), ctx);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_open_section);
-+
-+
-+int cpt_close_object(struct cpt_context *ctx)
-+{
-+ if (ctx->file && ctx->current_object >= 0) {
-+ __u64 next = ctx->file->f_pos - ctx->current_object;
-+ ctx->pwrite(&next, 8, ctx, ctx->current_object);
-+ ctx->current_object = -1;
-+ }
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_close_object);
-+
-+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ if (ctx->file == NULL)
-+ return 0;
-+
-+ cpt_close_object(ctx);
-+
-+ ctx->current_object = ctx->file->f_pos;
-+ if (obj)
-+ cpt_obj_setpos(obj, ctx->current_object, ctx);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_open_object);
-+
-+int cpt_push_object(loff_t *saved, struct cpt_context *ctx)
-+{
-+ if (ctx->file) {
-+ *saved = ctx->current_object;
-+ ctx->current_object = ctx->file->f_pos;
-+ }
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_push_object);
-+
-+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx)
-+{
-+ ctx->current_object = *saved;
-+ return 0;
-+}
-+EXPORT_SYMBOL(cpt_pop_object);
-+
-+int cpt_dump_tail(struct cpt_context *ctx)
-+{
-+ struct cpt_major_tail hdr;
-+ int i;
-+
-+ if (ctx->file == NULL)
-+ return 0;
-+
-+ cpt_open_section(ctx, CPT_SECT_TRAILER);
-+ memset(&hdr, 0, sizeof(hdr));
-+ hdr.cpt_next = sizeof(hdr);
-+ hdr.cpt_object = CPT_OBJ_TRAILER;
-+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_content = CPT_CONTENT_VOID;
-+ hdr.cpt_lazypages = 0;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ hdr.cpt_lazypages = ctx->lazypages;
-+#endif
-+ hdr.cpt_64bit = ctx->tasks64;
-+ hdr.cpt_signature[0] = CPT_SIGNATURE0;
-+ hdr.cpt_signature[1] = CPT_SIGNATURE1;
-+ hdr.cpt_signature[2] = CPT_SIGNATURE2;
-+ hdr.cpt_signature[3] = CPT_SIGNATURE3;
-+ hdr.cpt_nsect = CPT_SECT_MAX_INDEX;
-+ for (i = 0; i < CPT_SECT_MAX_INDEX; i++)
-+ hdr.cpt_sections[i] = ctx->sections[i];
-+
-+ ctx->write(&hdr, sizeof(hdr), ctx);
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_context.h linux-2.6.16-026test009/kernel/cpt/cpt_context.h
---- linux-2.6.16.orig/kernel/cpt/cpt_context.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_context.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,196 @@
-+#include <linux/fs.h>
-+#include <asm/uaccess.h>
-+
-+#define CPT_CTX_ERROR -1
-+#define CPT_CTX_IDLE 0
-+#define CPT_CTX_SUSPENDING 1
-+#define CPT_CTX_SUSPENDED 2
-+#define CPT_CTX_DUMPING 3
-+#define CPT_CTX_UNDUMPING 4
-+#define CPT_CTX_UNDUMPED 5
-+
-+#define CPT_TID(tsk) (tsk)->pid, virt_pid(tsk), (tsk)->comm
-+#define CPT_FID "%d,%d(%s)"
-+
-+
-+typedef struct cpt_context
-+{
-+ struct list_head ctx_list;
-+ int refcount;
-+ int ctx_state;
-+ int objcount;
-+ int sticky;
-+ struct semaphore main_sem;
-+
-+ struct file *errorfile;
-+ struct file *statusfile;
-+ struct file *lockfile;
-+
-+ int errno;
-+ char *error_msg;
-+ loff_t err_offset;
-+
-+ struct file *file;
-+ char *tmpbuf;
-+ int pagesize;
-+
-+ loff_t current_section;
-+ loff_t current_object;
-+
-+ loff_t sections[CPT_SECT_MAX];
-+
-+ __u32 errormask;
-+ __u32 write_error;
-+
-+ struct list_head object_array[CPT_OBJ_MAX];
-+
-+ void (*write)(const void *addr, size_t count, struct cpt_context *ctx);
-+ void (*pwrite)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
-+ ssize_t (*read)(void *addr, size_t count, struct cpt_context *ctx);
-+ ssize_t (*pread)(void *addr, size_t count, struct cpt_context *ctx, loff_t pos);
-+ void (*align)(struct cpt_context *ctx);
-+ int ve_id;
-+ int contextid;
-+ __u64 cpt_jiffies64; /* Host jiffies64 at the moment of cpt/rst,
-+ * corresponging to start_time */
-+ __u64 virt_jiffies64; /* Virtual jiffies64. It is == cpt_jiffies64 when
-+ * VE did not migrate. */
-+ struct timespec start_time;
-+ struct timespec delta_time;
-+ int image_version;
-+ int lo_index;
-+ int lo_index_old;
-+ int venet_index;
-+ int venet_index_old;
-+ __u64 iptables_mask;
-+
-+#define CPT_ANONVMA_HBITS (sizeof(void*) == 4 ? 10 : 9)
-+#define CPT_ANONVMA_HSIZE (1<<CPT_ANONVMA_HBITS)
-+ struct hlist_head *anonvmas;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ struct file *pagein_file_in;
-+ struct file *pagein_file_out;
-+ int lazy_vm;
-+ int lazypages;
-+ int lazytype;
-+ task_t *pgin_task;
-+ unsigned long last_pagein;
-+ struct pagein_desc **pgin_dir;
-+ struct pgin_device *pagein_dev;
-+ struct completion pgin_notify;
-+ struct completion *pgind_completion;
-+ struct swap_info_struct *pgin_swp;
-+#endif
-+ int tasks64;
-+ __u32 src_cpu_flags;
-+ __u32 dst_cpu_flags;
-+ __u32 kernel_config_flags;
-+
-+ struct filejob *filejob_queue;
-+} cpt_context_t;
-+
-+typedef struct {
-+ int pid;
-+ cpt_context_t *ctx;
-+ struct completion done;
-+} pagein_info_t;
-+
-+int pagein_info_printf(char *buf, cpt_context_t *ctx);
-+
-+int cpt_open_dumpfile(struct cpt_context *);
-+int cpt_close_dumpfile(struct cpt_context *);
-+int rst_open_dumpfile(struct cpt_context *);
-+void rst_close_dumpfile(struct cpt_context *);
-+void cpt_context_init(struct cpt_context *);
-+void rst_context_init(struct cpt_context *);
-+void cpt_context_destroy(struct cpt_context *);
-+
-+void rst_report_error(int err, cpt_context_t *ctx);
-+
-+
-+int cpt_major_hdr_out(struct cpt_context *ctx);
-+int cpt_dump_tail(struct cpt_context *ctx);
-+int cpt_close_section(struct cpt_context *ctx);
-+int cpt_open_section(struct cpt_context *ctx, __u32 type);
-+int cpt_close_object(struct cpt_context *ctx);
-+int cpt_open_object(cpt_object_t *obj, struct cpt_context *ctx);
-+int cpt_push_object(loff_t *saved, struct cpt_context *ctx);
-+int cpt_pop_object(loff_t *saved, struct cpt_context *ctx);
-+
-+int rst_get_section(int type, struct cpt_context * ctx, loff_t *, loff_t *);
-+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx);
-+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx);
-+void rst_put_name(__u8 *name, struct cpt_context *ctx);
-+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx);
-+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx);
-+
-+#define rst_get_object(type, pos, tmp, ctx) \
-+ _rst_get_object((type), (pos), (tmp), sizeof(*(tmp)), (ctx))
-+
-+extern int debug_level;
-+
-+#define cpt_printk(lvl, fmt, args...) do { \
-+ if (lvl <= debug_level) \
-+ printk(fmt, ##args); \
-+ } while (0)
-+
-+#define dprintk(a...) cpt_printk(3, "CPT DBG: " a)
-+#define dprintk_ctx(f, arg...) dprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
-+
-+#define wprintk(a...) cpt_printk(2, "CPT WRN: " a)
-+#define wprintk_ctx(f, arg...) wprintk("%p,%u: " f, ctx, ctx->ve_id, ##arg)
-+
-+#define eprintk(a...) cpt_printk(1, "CPT ERR: " a)
-+#define eprintk_ctx(f, arg...) \
-+do { \
-+ eprintk("%p,%u :" f, ctx, ctx->ve_id, ##arg); \
-+ if (ctx->error_msg && ctx->err_offset < PAGE_SIZE) \
-+ ctx->err_offset += snprintf((char*)(ctx->error_msg + \
-+ ctx->err_offset), \
-+ PAGE_SIZE - ctx->err_offset, f, ##arg); \
-+} while(0)
-+
-+#define CPT_TMPBUF_FREE 0x789adf12
-+#define CPT_TMPBUF_BUSY 0xabcd9876
-+
-+static inline void *cpt_get_buf(cpt_context_t *ctx)
-+{
-+ void *buf = ctx->tmpbuf;
-+
-+ BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_FREE);
-+ *(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_BUSY;
-+ return buf;
-+}
-+
-+static inline void __cpt_release_buf(cpt_context_t *ctx)
-+{
-+ void *buf = ctx->tmpbuf;
-+
-+ *(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
-+}
-+
-+static inline void cpt_release_buf(cpt_context_t *ctx)
-+{
-+ void *buf = ctx->tmpbuf;
-+
-+ BUG_ON(*(u32*)(buf + PAGE_SIZE - 4) != CPT_TMPBUF_BUSY);
-+ *(u32*)(buf + PAGE_SIZE - 4) = CPT_TMPBUF_FREE;
-+}
-+
-+static inline void cpt_flush_error(cpt_context_t *ctx)
-+{
-+ mm_segment_t oldfs;
-+
-+ if (ctx->errorfile && ctx->error_msg && ctx->err_offset) {
-+ if (ctx->errorfile->f_op && ctx->errorfile->f_op->write) {
-+ oldfs = get_fs();
-+ set_fs(KERNEL_DS);
-+ ctx->errorfile->f_op->write(ctx->errorfile,
-+ ctx->error_msg, ctx->err_offset,
-+ &ctx->errorfile->f_pos);
-+ set_fs(oldfs);
-+ }
-+ ctx->error_msg[0] = 0;
-+ ctx->err_offset = 0;
-+ }
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_dump.c linux-2.6.16-026test009/kernel/cpt/cpt_dump.c
---- linux-2.6.16.orig/kernel/cpt/cpt_dump.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_dump.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,838 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_dump.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+#include <linux/ptrace.h>
-+#include <linux/smp_lock.h>
-+#include <linux/ve.h>
-+#include <linux/ve_proto.h>
-+#include <linux/virtinfo.h>
-+#include <ub/ub_task.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_dump.h"
-+#include "cpt_files.h"
-+#include "cpt_mm.h"
-+#include "cpt_process.h"
-+#include "cpt_net.h"
-+#include "cpt_socket.h"
-+#include "cpt_ubc.h"
-+#include "cpt_kernel.h"
-+
-+
-+static int vps_child_level(task_t *root, task_t *c)
-+{
-+ int level = 0;
-+ int veid = VE_TASK_INFO(c)->owner_env->veid;
-+
-+ while (VE_TASK_INFO(c)->owner_env->veid == veid) {
-+ if (c->pid != c->tgid)
-+ c = c->group_leader;
-+ if (c == root)
-+ return level;
-+
-+ c = c->real_parent;
-+ level++;
-+ }
-+ return -1;
-+}
-+
-+static inline int freezable(struct task_struct * p)
-+{
-+ if (p->exit_state)
-+ return 0;
-+
-+ switch (p->state) {
-+ case EXIT_ZOMBIE:
-+ case EXIT_DEAD:
-+ case TASK_STOPPED:
-+#if TASK_TRACED != TASK_STOPPED
-+ case TASK_TRACED:
-+#endif
-+ return 0;
-+ default:
-+ return 1;
-+ }
-+}
-+
-+/*
-+ * Some comment is necessary about PF_FREEZE,PF_FROZEN,TIF_FREEZE...
-+ *
-+ * SWSUSP uses PF_FREEZE flag in tsk->flags raising it in context
-+ * of another process. Apparently, it is unacceptable on SMP.
-+ * Let's take freeze_processes() in kernel/power/process.c as an example.
-+ * Unserialized modifications tsk->flags easily
-+ * (believe or not, but it happens with probability of almost 100% :-))
-+ * creates the situation when setting PF_FREEZE in freeze_processes(),
-+ * which quickly spins raising PF_FREEZE of all the processes,
-+ * _clears_ PF_FROZEN just set in refrigerator(), so that suspend deadlocks.
-+ *
-+ * So, to make things clean, we require that those flags may be modified
-+ * only under tsk->sighand->siglock, which is quite natural because PF_FREEZE
-+ * is just a kind of signal.
-+ *
-+ * It is not enough, because we are still not allowed to change tsk->flags
-+ * in context of another process, we can corrupt another flags, when the process
-+ * running on another cpu modifies them. So, we use TIF_FREEZE in thread flags,
-+ * which can be changed atomically.
-+ *
-+ * PF_FROZEN also changes in context of another process, but this happens
-+ * only when the process is already in refrigerator() which does not modify
-+ * tsk->flags.
-+ */
-+
-+static int vps_stop_tasks(struct cpt_context *ctx)
-+{
-+ unsigned long start_time = jiffies;
-+ int err;
-+ task_t *p, *g;
-+ int todo;
-+ int round = 0;
-+
-+ do_gettimespec(&ctx->start_time);
-+ ctx->cpt_jiffies64 = get_jiffies_64();
-+ ctx->virt_jiffies64 = ctx->cpt_jiffies64 + get_exec_env()->jiffies_fixup;
-+
-+ read_lock(&tasklist_lock);
-+ for(;;) {
-+ task_t *root;
-+ todo = 0;
-+
-+ root = find_task_by_pid_ve(1);
-+ if (!root) {
-+ read_unlock(&tasklist_lock);
-+ eprintk_ctx("cannot find ve init\n");
-+ return -ESRCH;
-+ }
-+
-+ do_each_thread_ve(g, p) {
-+ if (vps_child_level(root, p) >= 0) {
-+ if (!is_virtual_pid(virt_pid(p))) {
-+ eprintk_ctx("external process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
-+ todo = -1;
-+ goto out;
-+ }
-+ if (p->vfork_done) {
-+ /* Task between vfork()...exec()
-+ * cannot be frozen, because parent
-+ * wait in uninterruptible state.
-+ * So, we do nothing, waiting for
-+ * exec(), unless:
-+ */
-+ if (p->state == TASK_STOPPED ||
-+ p->state == TASK_TRACED) {
-+ eprintk_ctx("task %d/%d(%s) is stopped while vfork(). Checkpointing is impossible.\n", virt_pid(p), p->pid, p->comm);
-+ todo = -1;
-+ /* It is fatal, _user_ stopped
-+ * vfork()ing task, so that we
-+ * cannot suspend now.
-+ */
-+ } else {
-+ todo = -3;
-+ }
-+ goto out;
-+ }
-+ if (p->state == TASK_TRACED
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
-+ && !p->stopped_state
-+#endif
-+ ) {
-+ int ptrace_id = p->pn_state;
-+ /* Debugger waits for signal. */
-+ switch (ptrace_id) {
-+ case PN_STOP_TF:
-+ case PN_STOP_TF_RT:
-+ case PN_STOP_ENTRY:
-+ case PN_STOP_FORK:
-+ case PN_STOP_VFORK:
-+ case PN_STOP_SIGNAL:
-+ case PN_STOP_EXIT:
-+ case PN_STOP_LEAVE:
-+ break;
-+ default:
-+ eprintk_ctx("task %d/%d(%s) is stopped by debugger while %d.\n", virt_pid(p), p->pid, p->comm, ptrace_id);
-+ todo = -1;
-+ goto out;
-+ }
-+ }
-+ if (p->flags & PF_NOFREEZE)
-+ goto out;
-+ if (p->flags & PF_FROZEN)
-+ continue;
-+ if (!freezable(p))
-+ continue;
-+
-+ spin_lock_irq(&p->sighand->siglock);
-+ set_tsk_thread_flag(p, TIF_FREEZE);
-+ signal_wake_up(p, 0);
-+ spin_unlock_irq(&p->sighand->siglock);
-+
-+ if (round == 10)
-+ wprintk_ctx("%d/%d(%s) is running\n", virt_pid(p), p->pid, p->comm);
-+
-+ todo++;
-+ } else {
-+ if (p != current) {
-+ eprintk_ctx("foreign process %d/%d(%s) inside VPS (e.g. vzctl enter or vzctl exec).\n", virt_pid(p), p->pid, p->comm);
-+ todo = -1;
-+ goto out;
-+ }
-+ }
-+ } while_each_thread_ve(g, p);
-+
-+out:
-+ if (todo &&
-+ (time_after(jiffies, start_time + 10*HZ) ||
-+ signal_pending(current) || todo < 0)) {
-+ do_each_thread_ve(g, p) {
-+ if (vps_child_level(root, p) >= 0) {
-+ spin_lock_irq(&p->sighand->siglock);
-+ clear_tsk_thread_flag(p, TIF_FREEZE);
-+ if (p->flags & PF_FROZEN) {
-+ p->flags &= ~PF_FROZEN;
-+ wake_up_process(p);
-+ }
-+ spin_unlock_irq(&p->sighand->siglock);
-+ }
-+ } while_each_thread_ve(g, p);
-+ if (todo > 0)
-+ todo = -2;
-+ /* This is sign of failure of printk(), which is not
-+ * ours. So, no prefixes. */
-+ printk(">\n");
-+ }
-+
-+ read_unlock(&tasklist_lock);
-+
-+ if (!todo)
-+ return 0;
-+
-+ if (todo == -1) {
-+ eprintk_ctx("suspend is impossible now.\n");
-+ return -EAGAIN;
-+ }
-+
-+ if (todo == -2) {
-+ eprintk_ctx("interrupted or timed out.\n");
-+ return -EINTR;
-+ }
-+
-+ if (time_after(jiffies, start_time + 10*HZ) ||
-+ signal_pending(current)) {
-+ if (todo == -3) {
-+ eprintk_ctx("vfork() is active, suspend is impossible now.\n");
-+ } else {
-+ eprintk_ctx("suspend is impossible, reason %d\n", todo);
-+ }
-+ return -EAGAIN;
-+ }
-+
-+ if (todo < 0 || round > 0) {
-+ current->state = TASK_INTERRUPTIBLE;
-+ schedule_timeout(HZ/50);
-+ } else {
-+ yield();
-+ }
-+
-+ read_lock(&tasklist_lock);
-+ round++;
-+ }
-+
-+ read_unlock(&tasklist_lock);
-+ return err;
-+}
-+
-+static int cpt_unlock_ve(struct cpt_context *ctx)
-+{
-+ struct ve_struct *env;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+ down_write(&env->op_sem);
-+ env->is_locked = 0;
-+ up_write(&env->op_sem);
-+ put_ve(env);
-+ return 0;
-+}
-+
-+int cpt_resume(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_unlock_sockets(ctx);
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ if (ctx->pgin_task) {
-+ wait_for_completion(&ctx->pgin_notify);
-+ put_task_struct(ctx->pgin_task);
-+ ctx->pgin_task = NULL;
-+ }
-+#endif
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ clear_tsk_thread_flag(tsk, TIF_FREEZE);
-+ if (tsk->flags & PF_FROZEN) {
-+ tsk->flags &= ~PF_FROZEN;
-+ wake_up_process(tsk);
-+ } else if (freezable(tsk)) {
-+ eprintk_ctx("strange, %s not frozen\n", tsk->comm );
-+ }
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+ put_task_struct(tsk);
-+ }
-+
-+ cpt_resume_network(ctx);
-+
-+ cpt_unlock_ve(ctx);
-+
-+ cpt_finish_ubc(ctx);
-+ cpt_object_destroy(ctx);
-+ return 0;
-+}
-+
-+int cpt_kill(struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct ve_struct *env;
-+ cpt_object_t *obj;
-+ task_t *root_task = NULL;
-+ long delay;
-+
-+ if (!ctx->ve_id)
-+ return -EINVAL;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+
-+ /* from here cpt_kill succeeds */
-+ if (VE_TASK_INFO(current)->owner_env == env) {
-+ wprintk_ctx("attempt to kill ve from inside, escaping...\n");
-+
-+ write_lock_irq(&tasklist_lock);
-+ VE_TASK_INFO(current)->owner_env = get_ve0();
-+ REMOVE_VE_LINKS(current);
-+ SET_VE_LINKS(current);
-+
-+ atomic_inc(&get_ve0()->pcounter);
-+ atomic_dec(&env->pcounter);
-+ write_unlock_irq(&tasklist_lock);
-+ set_exec_env(get_ve0());
-+ }
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ if (ctx->pgin_task) {
-+ wait_for_completion(&ctx->pgin_notify);
-+ put_task_struct(ctx->pgin_task);
-+ ctx->pgin_task = NULL;
-+ }
-+#endif
-+
-+ cpt_kill_sockets(ctx);
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+
-+ if (tsk->exit_state) {
-+ put_task_struct(tsk);
-+ continue;
-+ }
-+
-+ if (virt_pid(tsk) == 1) {
-+ root_task = tsk;
-+ continue;
-+ }
-+
-+ if (tsk->ptrace) {
-+ write_lock_irq(&tasklist_lock);
-+ tsk->ptrace = 0;
-+ if (!list_empty(&tsk->ptrace_list)) {
-+ list_del_init(&tsk->ptrace_list);
-+ REMOVE_LINKS(tsk);
-+ tsk->parent = tsk->real_parent;
-+ SET_LINKS(tsk);
-+ }
-+ write_unlock_irq(&tasklist_lock);
-+ }
-+
-+ send_sig(SIGKILL, tsk, 1);
-+
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ sigfillset(&tsk->blocked);
-+ sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
-+ set_tsk_thread_flag(tsk, TIF_SIGPENDING);
-+ clear_tsk_thread_flag(tsk, TIF_FREEZE);
-+ if (tsk->flags & PF_FROZEN)
-+ tsk->flags &= ~PF_FROZEN;
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+
-+ wake_up_process(tsk);
-+ put_task_struct(tsk);
-+ }
-+
-+ yield();
-+
-+ if (root_task != NULL) {
-+ send_sig(SIGKILL, root_task, 1);
-+
-+ spin_lock_irq(&root_task->sighand->siglock);
-+ sigfillset(&root_task->blocked);
-+ sigdelsetmask(&root_task->blocked, sigmask(SIGKILL));
-+ set_tsk_thread_flag(root_task, TIF_SIGPENDING);
-+ clear_tsk_thread_flag(root_task, TIF_FREEZE);
-+ if (root_task->flags & PF_FROZEN)
-+ root_task->flags &= ~PF_FROZEN;
-+ spin_unlock_irq(&root_task->sighand->siglock);
-+
-+ wake_up_process(root_task);
-+ put_task_struct(root_task);
-+ }
-+
-+ cpt_finish_ubc(ctx);
-+ cpt_object_destroy(ctx);
-+
-+ delay = 1;
-+ while (atomic_read(&env->counter) != 1) {
-+ if (signal_pending(current))
-+ break;
-+ current->state = TASK_INTERRUPTIBLE;
-+ delay = (delay < HZ) ? (delay << 1) : HZ;
-+ schedule_timeout(delay);
-+ }
-+ put_ve(env);
-+
-+ return err;
-+}
-+
-+static void collect_task_ubc(task_t *t, struct cpt_context *ctx)
-+{
-+ struct task_beancounter *tbc;
-+
-+ tbc = &(t->task_bc);
-+ cpt_add_ubc(tbc->exec_ub, ctx);
-+ cpt_add_ubc(tbc->task_ub, ctx);
-+ cpt_add_ubc(tbc->fork_sub, ctx);
-+}
-+
-+static cpt_object_t * remember_task(task_t * child, cpt_object_t * head,
-+ cpt_context_t * ctx)
-+{
-+ cpt_object_t *cobj;
-+
-+ if (freezable(child) && !(child->flags&PF_FROZEN)) {
-+ eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(child));
-+ put_task_struct(child);
-+ return NULL;
-+ }
-+
-+ if (lookup_cpt_object(CPT_OBJ_TASK, child, ctx)) BUG();
-+ if ((cobj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
-+ put_task_struct(child);
-+ return NULL;
-+ }
-+ cobj->o_count = 1;
-+ cpt_obj_setobj(cobj, child, ctx);
-+ insert_cpt_object(CPT_OBJ_TASK, cobj, head, ctx);
-+ collect_task_ubc(child, ctx);
-+ return cobj;
-+}
-+
-+static int vps_collect_tasks(struct cpt_context *ctx)
-+{
-+ int err = -ESRCH;
-+ cpt_object_t *obj;
-+ task_t *root;
-+
-+ read_lock(&tasklist_lock);
-+ root = find_task_by_pid_ve(1);
-+ if (root)
-+ get_task_struct(root);
-+ read_unlock(&tasklist_lock);
-+
-+ if (!root) {
-+ err = -ESRCH;
-+ eprintk_ctx("vps_collect_tasks: cannot find root\n");
-+ goto out;
-+ }
-+
-+ if ((obj = alloc_cpt_object(GFP_KERNEL, ctx)) == NULL) {
-+ put_task_struct(root);
-+ return -ENOMEM;
-+ }
-+ obj->o_count = 1;
-+ cpt_obj_setobj(obj, root, ctx);
-+ intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
-+ collect_task_ubc(root, ctx);
-+
-+ /* Collect process subtree recursively */
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ cpt_object_t *head = obj;
-+ task_t *tsk = obj->o_obj;
-+ task_t *child;
-+
-+ if (freezable(tsk) && !(tsk->flags&PF_FROZEN)) {
-+ eprintk_ctx("process " CPT_FID " is not frozen\n", CPT_TID(tsk));
-+ err = -EINVAL;
-+ goto out;
-+ }
-+
-+ wait_task_inactive(tsk);
-+
-+ if (tsk->pid == tsk->tgid) {
-+ child = tsk;
-+ for (;;) {
-+ read_lock(&tasklist_lock);
-+ child = next_thread(child);
-+ if (child != tsk)
-+ get_task_struct(child);
-+ read_unlock(&tasklist_lock);
-+
-+ if (child == tsk)
-+ break;
-+
-+ if (child->real_parent != tsk->real_parent) {
-+ put_task_struct(child);
-+ eprintk_ctx("illegal thread structure, kernel bug\n");
-+ return -EINVAL;
-+ }
-+
-+ if ((head = remember_task(child, head, ctx)) == NULL)
-+ return -ENOMEM;
-+ }
-+ }
-+
-+ /* About locking. VE is frozen. But lists of children
-+ * may change at least for init, when entered task reparents
-+ * to init and when reparented task exits. If we take care
-+ * of this case, we still can unlock while scanning
-+ * tasklists.
-+ */
-+ read_lock(&tasklist_lock);
-+ list_for_each_entry(child, &tsk->children, sibling) {
-+ if (child->real_parent != tsk)
-+ continue;
-+ if (child->pid != child->tgid)
-+ continue;
-+ get_task_struct(child);
-+ read_unlock(&tasklist_lock);
-+
-+ if ((head = remember_task(child, head, ctx)) == NULL)
-+ return -ENOMEM;
-+
-+ read_lock(&tasklist_lock);
-+ }
-+
-+ list_for_each_entry(child, &tsk->ptrace_children, ptrace_list) {
-+ if (child->real_parent != tsk)
-+ continue;
-+ if (child->pid != child->tgid)
-+ continue;
-+ get_task_struct(child);
-+ read_unlock(&tasklist_lock);
-+
-+ if ((head = remember_task(child, head, ctx)) == NULL)
-+ return -ENOMEM;
-+
-+ read_lock(&tasklist_lock);
-+ }
-+ read_unlock(&tasklist_lock);
-+ }
-+
-+ return 0;
-+
-+out:
-+ return err;
-+}
-+
-+static int cpt_collect(struct cpt_context *ctx)
-+{
-+ int err;
-+
-+ if ((err = cpt_collect_mm(ctx)) != 0)
-+ return err;
-+
-+ if ((err = cpt_collect_sysv(ctx)) != 0)
-+ return err;
-+
-+ if ((err = cpt_collect_files(ctx)) != 0)
-+ return err;
-+
-+ if ((err = cpt_collect_fs(ctx)) != 0)
-+ return err;
-+
-+ if ((err = cpt_collect_namespace(ctx)) != 0)
-+ return err;
-+
-+ if ((err = cpt_collect_signals(ctx)) != 0)
-+ return err;
-+
-+ return 0;
-+}
-+
-+static int cpt_dump_veinfo(cpt_context_t *ctx)
-+{
-+ struct cpt_veinfo_image i;
-+ struct ve_struct *ve;
-+ struct timespec delta;
-+
-+ cpt_open_section(ctx, CPT_SECT_VEINFO);
-+ cpt_open_object(NULL, ctx);
-+
-+ i.cpt_next = CPT_NULL;
-+ i.cpt_object = CPT_OBJ_VEINFO;
-+ i.cpt_hdrlen = sizeof(i);
-+ i.cpt_content = CPT_CONTENT_VOID;
-+
-+ ve = get_exec_env();
-+ i.shm_ctl_all = ve->_shm_ctlall;
-+ i.shm_ctl_max = ve->_shm_ctlmax;
-+ i.shm_ctl_mni = ve->_shm_ctlmni;
-+
-+ i.msg_ctl_max = ve->_msg_ctlmax;
-+ i.msg_ctl_mni = ve->_msg_ctlmni;
-+ i.msg_ctl_mnb = ve->_msg_ctlmnb;
-+
-+ BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i.sem_ctl_arr));
-+ i.sem_ctl_arr[0] = ve->_sem_ctls[0];
-+ i.sem_ctl_arr[1] = ve->_sem_ctls[1];
-+ i.sem_ctl_arr[2] = ve->_sem_ctls[2];
-+ i.sem_ctl_arr[3] = ve->_sem_ctls[3];
-+
-+ do_posix_clock_monotonic_gettime(&delta);
-+ _set_normalized_timespec(&delta,
-+ delta.tv_sec - ve->start_timespec.tv_sec,
-+ delta.tv_nsec - ve->start_timespec.tv_nsec);
-+ i.start_timespec_delta = cpt_timespec_export(&delta);
-+ i.start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
-+
-+ ctx->write(&i, sizeof(i), ctx);
-+ cpt_close_object(ctx);
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+static int cpt_dump_utsname(cpt_context_t *ctx)
-+{
-+ int len;
-+ struct cpt_object_hdr o;
-+
-+ cpt_open_section(ctx, CPT_SECT_UTSNAME);
-+
-+ len = strlen(ve_utsname.nodename);
-+ o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
-+ o.cpt_object = CPT_OBJ_NAME;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_NAME;
-+
-+ ctx->write(&o, sizeof(o), ctx);
-+ ctx->write(ve_utsname.nodename, len+1, ctx);
-+ ctx->align(ctx);
-+
-+ len = strlen(ve_utsname.domainname);
-+ o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
-+ o.cpt_object = CPT_OBJ_NAME;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_NAME;
-+
-+ ctx->write(&o, sizeof(o), ctx);
-+ ctx->write(ve_utsname.domainname, len+1, ctx);
-+ ctx->align(ctx);
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump(struct cpt_context *ctx)
-+{
-+ struct ve_struct *oldenv, *env;
-+ int err, err2 = 0;
-+
-+ if (!ctx->ve_id)
-+ return -EINVAL;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+
-+ down_read(&env->op_sem);
-+ err = -ESRCH;
-+ if (!env->is_running)
-+ goto out_noenv;
-+ if (!env->is_locked)
-+ goto out_noenv;
-+
-+ oldenv = set_exec_env(env);
-+
-+ /* Phase 2: real checkpointing */
-+ err = cpt_open_dumpfile(ctx);
-+ if (err)
-+ goto out;
-+
-+ cpt_major_hdr_out(ctx);
-+
-+ if (!err)
-+ err = cpt_dump_veinfo(ctx);
-+ if (!err)
-+ err = cpt_dump_ubc(ctx);
-+ if (!err)
-+ err = cpt_dump_ifinfo(ctx);
-+ if (!err)
-+ err = cpt_dump_files(ctx);
-+ if (!err)
-+ err = cpt_dump_files_struct(ctx);
-+ if (!err)
-+ err = cpt_dump_fs_struct(ctx);
-+ if (!err)
-+ err = cpt_dump_namespace(ctx);
-+ if (!err)
-+ err = cpt_dump_sighand(ctx);
-+ if (!err)
-+ err = cpt_dump_vm(ctx);
-+ if (!err)
-+ err = cpt_dump_sysvsem(ctx);
-+ if (!err)
-+ err = cpt_dump_tasks(ctx);
-+ if (!err)
-+ err = cpt_dump_orphaned_sockets(ctx);
-+#if defined(CONFIG_VE_IPTABLES) && \
-+ (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
-+ if (!err)
-+ err = cpt_dump_ip_conntrack(ctx);
-+#endif
-+ if (!err)
-+ err = cpt_dump_utsname(ctx);
-+
-+ if (!err)
-+ err = cpt_dump_tail(ctx);
-+
-+ err2 = cpt_close_dumpfile(ctx);
-+
-+out:
-+ set_exec_env(oldenv);
-+out_noenv:
-+ up_read(&env->op_sem);
-+ put_ve(env);
-+ return err ? : err2;
-+}
-+
-+int cpt_vps_suspend(struct cpt_context *ctx)
-+{
-+ struct ve_struct *oldenv, *env;
-+ int err = 0;
-+
-+ ctx->kernel_config_flags = test_kernel_config();
-+ cpt_object_init(ctx);
-+
-+ if (!ctx->ve_id) {
-+ env = get_exec_env();
-+ if (env == get_ve0())
-+ return -EINVAL;
-+ wprintk("undefined ve_id\n");
-+ ctx->ve_id = env->veid;
-+ get_ve(env);
-+ } else {
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+ }
-+
-+ ctx->iptables_mask = env->_iptables_modules;
-+
-+ down_write(&env->op_sem);
-+ err = -ESRCH;
-+ if (!env->is_running)
-+ goto out_noenv;
-+
-+ err = -EBUSY;
-+ if (env->is_locked)
-+ goto out_noenv;
-+ env->is_locked = 1;
-+ downgrade_write(&env->op_sem);
-+
-+ oldenv = set_exec_env(env);
-+
-+ /* Phase 0: find and stop all the tasks */
-+ if ((err = vps_stop_tasks(ctx)) != 0)
-+ goto out;
-+
-+ if ((err = cpt_suspend_network(ctx)) != 0)
-+ goto out;
-+
-+ /* At the moment all the state is frozen. We do not need to lock
-+ * the state, which can be changed only if the tasks are running.
-+ */
-+
-+ /* Phase 1: collect task tree */
-+ if ((err = vps_collect_tasks(ctx)) != 0)
-+ goto out;
-+
-+ /* Phase 1': collect all the resources */
-+ if ((err = cpt_collect(ctx)) != 0)
-+ goto out;
-+
-+out:
-+ set_exec_env(oldenv);
-+ up_read(&env->op_sem);
-+ put_ve(env);
-+ return err;
-+
-+out_noenv:
-+ up_write(&env->op_sem);
-+ put_ve(env);
-+ return err;
-+}
-+
-+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps)
-+{
-+ task_t *p;
-+ struct ve_struct *env;
-+ unsigned int flags = test_cpu_caps();
-+
-+ if (!ctx->ve_id)
-+ return -EINVAL;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (env == NULL)
-+ return -ESRCH;
-+
-+ *caps = flags & (1<<CPT_CPU_X86_CMOV);
-+ flags &= ~((1<<CPT_CPU_X86_EMT64)|(1<<CPT_CPU_X86_IA64));
-+
-+ read_lock(&tasklist_lock);
-+ for (p = __first_task_ve(env); p != NULL ; p = __next_task_ve(env, p)) {
-+ if (tsk_used_math(p))
-+ *caps |= flags;
-+#ifdef CONFIG_X86_64
-+ if (!(p->thread_info->flags & _TIF_IA32))
-+ *caps |= (1<<CPT_CPU_X86_EMT64);
-+#endif
-+ }
-+ read_unlock(&tasklist_lock);
-+ put_ve(env);
-+
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_dump.h linux-2.6.16-026test009/kernel/cpt/cpt_dump.h
---- linux-2.6.16.orig/kernel/cpt/cpt_dump.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_dump.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,14 @@
-+int cpt_dump(struct cpt_context *cpt);
-+int rst_undump(struct cpt_context *cpt);
-+int cpt_suspend(struct cpt_context *cpt);
-+int cpt_resume(struct cpt_context *cpt);
-+int cpt_kill(struct cpt_context *cpt);
-+int rst_clean(struct cpt_context *cpt);
-+int rst_resume(struct cpt_context *cpt);
-+int rst_kill(struct cpt_context *cpt);
-+
-+int cpt_freeze_one(pid_t pid, int freeze);
-+int cpt_vps_suspend(struct cpt_context *ctx);
-+int vps_rst_undump(struct cpt_context *ctx);
-+
-+int cpt_vps_caps(struct cpt_context *ctx, __u32 *caps);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_epoll.c linux-2.6.16-026test009/kernel/cpt/cpt_epoll.c
---- linux-2.6.16.orig/kernel/cpt/cpt_epoll.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_epoll.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,116 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_epoll.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/namespace.h>
-+#include <linux/mount.h>
-+#include <linux/namei.h>
-+#include <linux/smp_lock.h>
-+#include <asm/uaccess.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/eventpoll.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_kernel.h"
-+#include "cpt_fsmagic.h"
-+#include "cpt_syscalls.h"
-+
-+extern struct file_operations eventpoll_fops;
-+
-+int cpt_dump_epolldev(cpt_object_t *obj, cpt_context_t *ctx)
-+{
-+ int err = 0;
-+ struct file *file = obj->o_obj;
-+ struct eventpoll *ep;
-+ struct rb_node *rbp;
-+ struct cpt_epoll_image ei;
-+
-+ if (file->f_op != &eventpoll_fops) {
-+ eprintk_ctx("bad epoll file\n");
-+ return -EINVAL;
-+ }
-+
-+ ep = file->private_data;
-+
-+ /* eventpoll.c does not protect open /proc/N/fd, silly.
-+ * Opener will get an invalid file with uninitialized private_data
-+ */
-+ if (unlikely(ep == NULL)) {
-+ eprintk_ctx("bad epoll device\n");
-+ return -EINVAL;
-+ }
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ ei.cpt_next = CPT_NULL;
-+ ei.cpt_object = CPT_OBJ_EPOLL;
-+ ei.cpt_hdrlen = sizeof(ei);
-+ ei.cpt_content = CPT_CONTENT_ARRAY;
-+ ei.cpt_file = obj->o_pos;
-+
-+ ctx->write(&ei, sizeof(ei), ctx);
-+
-+ down(&epsem);
-+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
-+ loff_t saved_obj;
-+ cpt_object_t *tobj;
-+ struct cpt_epoll_file_image efi;
-+ struct epitem *epi;
-+ epi = rb_entry(rbp, struct epitem, rbn);
-+ tobj = lookup_cpt_object(CPT_OBJ_FILE, epi->ffd.file, ctx);
-+ if (tobj == NULL) {
-+ eprintk_ctx("epoll device refers to an external file\n");
-+ err = -EBUSY;
-+ break;
-+ }
-+ cpt_push_object(&saved_obj, ctx);
-+ cpt_open_object(NULL, ctx);
-+
-+ efi.cpt_next = CPT_NULL;
-+ efi.cpt_object = CPT_OBJ_EPOLL_FILE;
-+ efi.cpt_hdrlen = sizeof(efi);
-+ efi.cpt_content = CPT_CONTENT_VOID;
-+ efi.cpt_file = tobj->o_pos;
-+ efi.cpt_fd = epi->ffd.fd;
-+ efi.cpt_events = epi->event.events;
-+ efi.cpt_data = epi->event.data;
-+ efi.cpt_revents = epi->revents;
-+ efi.cpt_ready = 0;
-+ if (!list_empty(&epi->rdllink))
-+ efi.cpt_ready = 1;
-+
-+ ctx->write(&efi, sizeof(efi), ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ }
-+ up(&epsem);
-+
-+ cpt_close_object(ctx);
-+
-+ return err;
-+}
-+
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_files.c linux-2.6.16-026test009/kernel/cpt/cpt_files.c
---- linux-2.6.16.orig/kernel/cpt/cpt_files.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_files.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,1343 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_files.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/namespace.h>
-+#include <linux/mount.h>
-+#include <linux/namei.h>
-+#include <linux/smp_lock.h>
-+#include <linux/pagemap.h>
-+#include <asm/uaccess.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/ve_proto.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_socket.h"
-+#include "cpt_kernel.h"
-+#include "cpt_fsmagic.h"
-+#include "cpt_syscalls.h"
-+
-+void cpt_printk_dentry(struct dentry *d, struct vfsmount *mnt)
-+{
-+ char *path;
-+ unsigned long pg = __get_free_page(GFP_KERNEL);
-+
-+ if (!pg)
-+ return;
-+
-+ path = d_path(d, mnt, (char *)pg, PAGE_SIZE);
-+
-+ if (!IS_ERR(path))
-+ printk("<%s>", path);
-+ free_page(pg);
-+}
-+
-+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+ cpt_context_t *ctx)
-+{
-+ if (path[0] == '/' && !IS_ROOT(d) && !d_unhashed(d)) {
-+ struct nameidata nd;
-+ if (path_lookup(path, 0, &nd)) {
-+ eprintk_ctx("d_path cannot be looked up %s\n", path);
-+ return -EINVAL;
-+ }
-+ if (nd.dentry != d || nd.mnt != mnt) {
-+ eprintk_ctx("d_path is invisible %s\n", path);
-+ path_release(&nd);
-+ return -EINVAL;
-+ }
-+ path_release(&nd);
-+ }
-+ return 0;
-+}
-+
-+int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
-+{
-+ int len;
-+ char *path;
-+ char *pg = cpt_get_buf(ctx);
-+
-+ path = d_path(d, mnt, pg, PAGE_SIZE);
-+ len = PTR_ERR(path);
-+
-+ if (IS_ERR(path)) {
-+ struct cpt_object_hdr o;
-+ char tmp[1];
-+ /* VZ changes d_path() to return EINVAL, when path
-+ * is not supposed to be visible inside VE. */
-+ if (len != -EINVAL)
-+ eprintk_ctx("d_path err=%d\n", len);
-+ else
-+ len = 0;
-+
-+ o.cpt_next = sizeof(o) + CPT_ALIGN(1);
-+ o.cpt_object = CPT_OBJ_NAME;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_NAME;
-+ tmp[0] = 0;
-+
-+ ctx->write(&o, sizeof(o), ctx);
-+ ctx->write(tmp, 1, ctx);
-+ ctx->align(ctx);
-+
-+ __cpt_release_buf(ctx);
-+ return len;
-+ } else {
-+ struct cpt_object_hdr o;
-+
-+ len = pg + PAGE_SIZE - 1 - path;
-+ o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
-+ o.cpt_object = CPT_OBJ_NAME;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_NAME;
-+ path[len] = 0;
-+
-+ if (cpt_verify_overmount(path, d, mnt, ctx)) {
-+ __cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+
-+ ctx->write(&o, sizeof(o), ctx);
-+ ctx->write(path, len+1, ctx);
-+ ctx->align(ctx);
-+ __cpt_release_buf(ctx);
-+ }
-+ return 0;
-+}
-+
-+int cpt_dump_string(const char *s, struct cpt_context *ctx)
-+{
-+ int len;
-+ struct cpt_object_hdr o;
-+
-+ len = strlen(s);
-+ o.cpt_next = sizeof(o) + CPT_ALIGN(len + 1);
-+ o.cpt_object = CPT_OBJ_NAME;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_NAME;
-+
-+ ctx->write(&o, sizeof(o), ctx);
-+ ctx->write(s, len+1, ctx);
-+ ctx->align(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump_filename(struct file *file, struct cpt_context *ctx)
-+{
-+ return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, ctx);
-+}
-+
-+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_inode_image *v = cpt_get_buf(ctx);
-+ struct kstat sbuf;
-+
-+ v->cpt_next = sizeof(*v);
-+ v->cpt_object = CPT_OBJ_INODE;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ if ((err = vfs_getattr(mnt, d, &sbuf)) != 0) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+
-+ v->cpt_dev = d->d_inode->i_sb->s_dev;
-+ v->cpt_ino = d->d_inode->i_ino;
-+ v->cpt_mode = sbuf.mode;
-+ v->cpt_nlink = sbuf.nlink;
-+ v->cpt_uid = sbuf.uid;
-+ v->cpt_gid = sbuf.gid;
-+ v->cpt_rdev = d->d_inode->i_rdev;
-+ v->cpt_size = sbuf.size;
-+ v->cpt_atime = cpt_timespec_export(&sbuf.atime);
-+ v->cpt_mtime = cpt_timespec_export(&sbuf.mtime);
-+ v->cpt_ctime = cpt_timespec_export(&sbuf.ctime);
-+ v->cpt_blksize = sbuf.blksize;
-+ v->cpt_blocks = sbuf.blocks;
-+ v->cpt_sb = d->d_inode->i_sb->s_magic;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+int cpt_collect_files(cpt_context_t * ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+ int index = 0;
-+
-+ /* Collect process fd sets */
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->files && cpt_object_add(CPT_OBJ_FILES, tsk->files, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+
-+ /* Collect files from fd sets */
-+ for_each_object(obj, CPT_OBJ_FILES) {
-+ int fd;
-+ struct files_struct *f = obj->o_obj;
-+
-+ cpt_obj_setindex(obj, index++, ctx);
-+
-+ if (obj->o_count != atomic_read(&f->count)) {
-+ eprintk_ctx("files_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&f->count));
-+ return -EBUSY;
-+ }
-+
-+ for (fd = 0; fd < f->fdt->max_fds; fd++) {
-+ struct file *file = fcheck_files(f, fd);
-+ if (file && cpt_object_add(CPT_OBJ_FILE, file, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+ }
-+
-+ /* Collect files queued by AF_UNIX sockets. */
-+ if ((err = cpt_collect_passedfds(ctx)) < 0)
-+ return err;
-+
-+ /* OK. At this point we should count all the references. */
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+ struct file *parent;
-+ cpt_object_t *ino_obj;
-+
-+ if (obj->o_count != atomic_read(&file->f_count)) {
-+ eprintk_ctx("file struct is referenced outside %d %d\n", obj->o_count, atomic_read(&file->f_count));
-+ cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
-+ return -EBUSY;
-+ }
-+
-+ switch (file->f_dentry->d_inode->i_sb->s_magic) {
-+ case FSMAGIC_FUTEX:
-+ case FSMAGIC_MQUEUE:
-+ case FSMAGIC_BDEV:
-+ eprintk_ctx("file on unsupported FS: magic %08lx\n", file->f_dentry->d_inode->i_sb->s_magic);
-+ return -EBUSY;
-+ }
-+
-+ /* Collect inode. It is necessary mostly to resolve deleted
-+ * hard links. */
-+ ino_obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
-+ if (ino_obj == NULL)
-+ return -ENOMEM;
-+
-+ parent = ino_obj->o_parent;
-+ if (!parent || (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry)))
-+ ino_obj->o_parent = file;
-+
-+ if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
-+ int maj = imajor(file->f_dentry->d_inode);
-+ if (maj == PTY_MASTER_MAJOR ||
-+ (maj >= UNIX98_PTY_MASTER_MAJOR &&
-+ maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
-+ maj == PTY_SLAVE_MAJOR ||
-+ maj == UNIX98_PTY_SLAVE_MAJOR ||
-+ maj == TTYAUX_MAJOR) {
-+ err = cpt_collect_tty(file, ctx);
-+ if (err)
-+ return err;
-+ }
-+ }
-+
-+ if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
-+ err = cpt_collect_socket(file, ctx);
-+ if (err)
-+ return err;
-+ }
-+ }
-+
-+ err = cpt_index_sockets(ctx);
-+
-+ return err;
-+}
-+
-+/* /dev/ptmx is special, all the files share one inode, but real tty backend
-+ * is attached via file->private_data.
-+ */
-+
-+static inline int is_cloning_inode(struct inode *ino)
-+{
-+ return S_ISCHR(ino->i_mode) &&
-+ ino->i_rdev == MKDEV(TTYAUX_MAJOR,2);
-+}
-+
-+static int dump_one_flock(struct file_lock *fl, int owner, struct cpt_context *ctx)
-+{
-+ pid_t pid;
-+ struct cpt_flock_image *v = cpt_get_buf(ctx);
-+
-+ v->cpt_next = sizeof(*v);
-+ v->cpt_object = CPT_OBJ_FLOCK;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ v->cpt_owner = owner;
-+
-+ pid = fl->fl_pid;
-+ if (pid && !is_virtual_pid(fl->fl_pid)) {
-+ pid = _pid_type_to_vpid(PIDTYPE_TGID, fl->fl_pid);
-+ if (pid == -1) {
-+ if (!(fl->fl_flags&FL_FLOCK)) {
-+ eprintk_ctx("posix lock from another VE?\n");
-+ cpt_release_buf(ctx);
-+ return -EBUSY;
-+ }
-+ pid = 0;
-+ }
-+ }
-+
-+ v->cpt_pid = pid;
-+ v->cpt_start = fl->fl_start;
-+ v->cpt_end = fl->fl_end;
-+ v->cpt_flags = fl->fl_flags;
-+ v->cpt_type = fl->fl_type;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+
-+int cpt_dump_flock(struct file *file, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct file_lock *fl;
-+
-+ lock_kernel();
-+ for (fl = file->f_dentry->d_inode->i_flock;
-+ fl; fl = fl->fl_next) {
-+ if (file != fl->fl_file)
-+ continue;
-+ if (fl->fl_flags & FL_LEASE) {
-+ eprintk_ctx("lease lock is not supported\n");
-+ err = -EINVAL;
-+ break;
-+ }
-+ if (fl->fl_flags & FL_POSIX) {
-+ cpt_object_t *obj;
-+ obj = lookup_cpt_object(CPT_OBJ_FILES, fl->fl_owner, ctx);
-+ if (obj) {
-+ dump_one_flock(fl, obj->o_index, ctx);
-+ continue;
-+ } else {
-+ eprintk_ctx("unknown lock owner %p\n", fl->fl_owner);
-+ err = -EINVAL;
-+ }
-+ }
-+ if (fl->fl_flags & FL_FLOCK) {
-+ dump_one_flock(fl, -1, ctx);
-+ continue;
-+ }
-+ }
-+ unlock_kernel();
-+ return err;
-+}
-+
-+static int __comb_pid_to_vpid(int pid)
-+{
-+ int vpid = pid;
-+
-+ if (pid > 0) {
-+ vpid = _pid_type_to_vpid(PIDTYPE_PID, pid);
-+ if (unlikely(vpid < 0)) {
-+ dprintk("pid %d does not exist amymore.\n", pid);
-+ return 0;
-+ }
-+ } else if (pid < 0) {
-+ vpid = _pid_type_to_vpid(PIDTYPE_PGID, -pid);
-+ if (unlikely(vpid < 0)) {
-+ dprintk("pgid %d does not exist amymore.\n", -pid);
-+ return 0;
-+ }
-+ vpid = -vpid;
-+ }
-+ return vpid;
-+}
-+
-+static int dump_one_file(cpt_object_t *obj, struct file *file, cpt_context_t *ctx)
-+{
-+ int err = 0;
-+ cpt_object_t *iobj;
-+ struct cpt_file_image *v = cpt_get_buf(ctx);
-+ struct kstat sbuf;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_FILE;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_flags = file->f_flags;
-+ v->cpt_mode = file->f_mode;
-+ v->cpt_pos = file->f_pos;
-+ v->cpt_uid = file->f_uid;
-+ v->cpt_gid = file->f_gid;
-+
-+ vfs_getattr(file->f_vfsmnt, file->f_dentry, &sbuf);
-+
-+ v->cpt_i_mode = sbuf.mode;
-+ v->cpt_lflags = 0;
-+ if (IS_ROOT(file->f_dentry))
-+ v->cpt_lflags |= CPT_DENTRY_ROOT;
-+ else if (d_unhashed(file->f_dentry))
-+ v->cpt_lflags |= CPT_DENTRY_DELETED;
-+ if (is_cloning_inode(file->f_dentry->d_inode))
-+ v->cpt_lflags |= CPT_DENTRY_CLONING;
-+ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
-+ v->cpt_lflags |= CPT_DENTRY_PROC;
-+ v->cpt_inode = CPT_NULL;
-+ iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
-+ if (iobj)
-+ v->cpt_inode = iobj->o_pos;
-+ v->cpt_priv = CPT_NULL;
-+ v->cpt_fown_fd = -1;
-+ if (S_ISCHR(v->cpt_i_mode)) {
-+ iobj = lookup_cpt_object(CPT_OBJ_TTY, file->private_data, ctx);
-+ if (iobj) {
-+ v->cpt_priv = iobj->o_pos;
-+ if (file->f_flags&FASYNC)
-+ v->cpt_fown_fd = cpt_tty_fasync(file, ctx);
-+ }
-+ }
-+ if (S_ISSOCK(v->cpt_i_mode)) {
-+ if (obj->o_index < 0) {
-+ eprintk_ctx("BUG: no socket index\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_priv = obj->o_index;
-+ if (file->f_flags&FASYNC)
-+ v->cpt_fown_fd = cpt_socket_fasync(file, ctx);
-+ }
-+ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
-+ v->cpt_priv = file->f_dentry->d_inode->i_ino;
-+ v->cpt_lflags |= CPT_DENTRY_EPOLL;
-+ }
-+
-+ v->cpt_fown_pid = __comb_pid_to_vpid((int)file->f_owner.pid);
-+ v->cpt_fown_uid = file->f_owner.uid;
-+ v->cpt_fown_euid = file->f_owner.euid;
-+ v->cpt_fown_signo = file->f_owner.signum;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ if (!S_ISSOCK(v->cpt_i_mode)) {
-+ err = cpt_dump_filename(file, ctx);
-+ if (err)
-+ return err;
-+ }
-+
-+ if (file->f_dentry->d_inode->i_flock)
-+ err = cpt_dump_flock(file, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ return err;
-+}
-+
-+/* About this weird function... Crappy code dealing with SYSV shared memory
-+ * defines TMPFS inode and file with f_op doing only mmap. So...
-+ * Maybe, this is wrong and leaks something. It is clear access to
-+ * SYSV shmem via mmap is quite unusual and impossible from user space.
-+ */
-+static int dump_content_shm(struct file *file, struct cpt_context *ctx)
-+{
-+ struct cpt_obj_bits *v;
-+ loff_t saved_pos;
-+ unsigned long addr;
-+
-+ addr = do_mmap_pgoff(file, 0, file->f_dentry->d_inode->i_size,
-+ PROT_READ, MAP_SHARED, 0);
-+ if (IS_ERR((void*)addr))
-+ return PTR_ERR((void*)addr);
-+
-+ cpt_push_object(&saved_pos, ctx);
-+ cpt_open_object(NULL, ctx);
-+ v = cpt_get_buf(ctx);
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_BITS;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_DATA;
-+ v->cpt_size = file->f_dentry->d_inode->i_size;
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+ ctx->write((void*)addr, file->f_dentry->d_inode->i_size, ctx);
-+ ctx->align(ctx);
-+ do_munmap(current->mm, addr, file->f_dentry->d_inode->i_size);
-+
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_pos, ctx);
-+ return 0;
-+}
-+
-+static int data_is_zero(char *addr, int len)
-+{
-+ int i;
-+ unsigned long zerolong = 0;
-+
-+ for (i=0; i<len/sizeof(unsigned long); i++) {
-+ if (((unsigned long*)(addr))[i] != 0)
-+ return 0;
-+ }
-+ i = len % sizeof(unsigned long);
-+ if (!i)
-+ return 1;
-+ return memcmp(addr + len - i, &zerolong, i) == 0;
-+}
-+
-+
-+static int dump_content_regular(struct file *file, struct cpt_context *ctx)
-+{
-+ loff_t saved_pos;
-+ loff_t pos = 0;
-+ loff_t obj_opened = CPT_NULL;
-+ struct cpt_page_block pgb;
-+ ssize_t (*do_read)(struct file *, char __user *, size_t, loff_t *);
-+
-+ if (file->f_op == NULL)
-+ return -EINVAL;
-+
-+ if ((do_read = file->f_op->read) == NULL) {
-+ if (file->f_op->mmap == NULL)
-+ return -EINVAL;
-+ if (file->f_dentry->d_inode->i_sb->s_magic != FSMAGIC_TMPFS) {
-+ eprintk_ctx("unreadable, but not SYSV SHM file\n");
-+ return -EINVAL;
-+ }
-+
-+ do_read = file->f_dentry->d_inode->i_fop->read;
-+ cpt_dump_content_sysvshm(file, ctx);
-+ if (!do_read) {
-+ wprintk_ctx("TMPFS is not configured?\n");
-+ return dump_content_shm(file, ctx);
-+ }
-+ }
-+
-+ if (!(file->f_mode & FMODE_READ) ||
-+ (file->f_flags & O_DIRECT)) {
-+ file = dentry_open(dget(file->f_dentry),
-+ mntget(file->f_vfsmnt), O_RDONLY);
-+ } else {
-+ atomic_inc(&file->f_count);
-+ }
-+
-+ for (;;) {
-+ mm_segment_t oldfs;
-+ int err;
-+
-+ (void)cpt_get_buf(ctx);
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = do_read(file, ctx->tmpbuf, PAGE_SIZE, &pos);
-+ set_fs(oldfs);
-+ if (err < 0) {
-+ eprintk_ctx("dump_content_regular: do_read: %d", err);
-+ fput(file);
-+ __cpt_release_buf(ctx);
-+ return err;
-+ }
-+ if (err == 0) {
-+ __cpt_release_buf(ctx);
-+ break;
-+ }
-+ if (data_is_zero(ctx->tmpbuf, err)) {
-+ if (obj_opened != CPT_NULL) {
-+ ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_pos, ctx);
-+ obj_opened = CPT_NULL;
-+ }
-+ } else {
-+ if (obj_opened == CPT_NULL) {
-+ cpt_push_object(&saved_pos, ctx);
-+ cpt_open_object(NULL, ctx);
-+ obj_opened = ctx->file->f_pos;
-+ pgb.cpt_next = CPT_NULL;
-+ pgb.cpt_object = CPT_OBJ_PAGES;
-+ pgb.cpt_hdrlen = sizeof(pgb);
-+ pgb.cpt_content = CPT_CONTENT_DATA;
-+ pgb.cpt_start = pos - err;
-+ pgb.cpt_end = pgb.cpt_start;
-+ ctx->write(&pgb, sizeof(pgb), ctx);
-+ }
-+ ctx->write(ctx->tmpbuf, err, ctx);
-+ pgb.cpt_end += err;
-+ }
-+ __cpt_release_buf(ctx);
-+ }
-+
-+ fput(file);
-+
-+ if (obj_opened != CPT_NULL) {
-+ ctx->pwrite(&pgb.cpt_end, 8, ctx, obj_opened + offsetof(struct cpt_page_block, cpt_end));
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_pos, ctx);
-+ obj_opened = CPT_NULL;
-+ }
-+ return 0;
-+}
-+
-+
-+static int dump_content_chrdev(struct file *file, struct cpt_context *ctx)
-+{
-+ struct inode *ino = file->f_dentry->d_inode;
-+ int maj;
-+
-+ maj = imajor(ino);
-+ if (maj == MEM_MAJOR) {
-+ /* Well, OK. */
-+ return 0;
-+ }
-+ if (maj == PTY_MASTER_MAJOR ||
-+ (maj >= UNIX98_PTY_MASTER_MAJOR &&
-+ maj < UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) ||
-+ maj == PTY_SLAVE_MAJOR ||
-+ maj == UNIX98_PTY_SLAVE_MAJOR ||
-+ maj == TTYAUX_MAJOR) {
-+ return cpt_dump_content_tty(file, ctx);
-+ }
-+ eprintk_ctx("unsupported chrdev %d/%d\n", maj, iminor(ino));
-+ return -EINVAL;
-+}
-+
-+static int dump_content_blkdev(struct file *file, struct cpt_context *ctx)
-+{
-+ struct inode *ino = file->f_dentry->d_inode;
-+
-+ /* We are not going to transfer them. */
-+ eprintk_ctx("unsupported blkdev %d/%d\n", imajor(ino), iminor(ino));
-+ return -EINVAL;
-+}
-+
-+static int dump_content_fifo(struct file *file, struct cpt_context *ctx)
-+{
-+ struct inode *ino = file->f_dentry->d_inode;
-+ cpt_object_t *obj;
-+ loff_t saved_pos;
-+ int readers;
-+ int writers;
-+ int anon = 0;
-+
-+ mutex_lock(PIPE_MUTEX(*ino));
-+ readers = PIPE_READERS(*ino);
-+ writers = PIPE_WRITERS(*ino);
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file1 = obj->o_obj;
-+ if (file1->f_dentry->d_inode == ino) {
-+ if (file1->f_mode & FMODE_READ)
-+ readers--;
-+ if (file1->f_mode & FMODE_WRITE)
-+ writers--;
-+ }
-+ }
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+ if (readers || writers) {
-+ struct dentry *dr = file->f_dentry->d_sb->s_root;
-+ if (dr->d_name.len == 7 && memcmp(dr->d_name.name,"pipefs:",7) == 0)
-+ anon = 1;
-+
-+ if (anon) {
-+ eprintk_ctx("pipe has %d/%d external readers/writers\n", readers, writers);
-+ return -EBUSY;
-+ }
-+ /* If fifo has external readers/writers, we are in troubles.
-+ * If the buffer is not empty, we must move its content.
-+ * But if the fifo is owned by a service, we cannot do
-+ * this. See?
-+ *
-+ * For now we assume, that if fifo is opened by another
-+ * process, we do not own it and, hence, migrate without
-+ * data.
-+ */
-+ return 0;
-+ }
-+
-+ /* OK, we must save fifo state. No semaphores required. */
-+
-+ if (ino->i_pipe->nrbufs) {
-+ struct cpt_obj_bits *v = cpt_get_buf(ctx);
-+ struct pipe_inode_info *info;
-+ int count, buf, nrbufs;
-+
-+ mutex_lock(PIPE_MUTEX(*ino));
-+ info = ino->i_pipe;
-+ count = 0;
-+ buf = info->curbuf;
-+ nrbufs = info->nrbufs;
-+ while (--nrbufs >= 0) {
-+ if (!info->bufs[buf].ops->can_merge) {
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+ eprintk_ctx("unknown format of pipe buffer\n");
-+ return -EINVAL;
-+ }
-+ count += info->bufs[buf].len;
-+ buf = (buf+1) & (PIPE_BUFFERS-1);
-+ }
-+
-+ if (!count) {
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+ return 0;
-+ }
-+
-+ cpt_push_object(&saved_pos, ctx);
-+ cpt_open_object(NULL, ctx);
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_BITS;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_DATA;
-+ v->cpt_size = count;
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ count = 0;
-+ buf = info->curbuf;
-+ nrbufs = info->nrbufs;
-+ while (--nrbufs >= 0) {
-+ struct pipe_buffer *b = info->bufs + buf;
-+ void * addr = b->ops->map(file, info, b);
-+ ctx->write(addr + b->offset, b->len, ctx);
-+ b->ops->unmap(info, b);
-+ buf = (buf+1) & (PIPE_BUFFERS-1);
-+ }
-+
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_pos, ctx);
-+ }
-+
-+ return 0;
-+}
-+
-+static int dump_content_socket(struct file *file, struct cpt_context *ctx)
-+{
-+ return 0;
-+}
-+
-+static int dump_one_inode(struct file *file, struct dentry *d,
-+ struct vfsmount *mnt, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct inode *ino = d->d_inode;
-+ cpt_object_t *iobj;
-+ int dump_it = 0;
-+
-+ iobj = lookup_cpt_object(CPT_OBJ_INODE, ino, ctx);
-+ if (!iobj)
-+ return -EINVAL;
-+
-+ if (iobj->o_pos >= 0)
-+ return 0;
-+
-+ if (!IS_ROOT(d) && d_unhashed(d))
-+ dump_it = 1;
-+ if (!S_ISREG(ino->i_mode) && !S_ISDIR(ino->i_mode)) {
-+ /* One more bug in epoll: invalid inode mode.
-+ * What a load of crap...
-+ */
-+ if (ino->i_sb->s_magic == FSMAGIC_EPOLL &&
-+ (ino->i_mode & S_IFMT) == 0)
-+ return 0;
-+ dump_it = 1;
-+ }
-+
-+ if (!dump_it)
-+ return 0;
-+
-+ cpt_open_object(iobj, ctx);
-+ cpt_dump_inode(d, mnt, ctx);
-+
-+ if (!IS_ROOT(d) && d_unhashed(d)) {
-+ struct file *parent;
-+ parent = iobj->o_parent;
-+ if (!parent ||
-+ (!IS_ROOT(parent->f_dentry) && d_unhashed(parent->f_dentry))) {
-+ /* Inode is not deleted, but it does not
-+ * have references from inside checkpointed
-+ * process group. We have options:
-+ * A. Fail, abort checkpointing
-+ * B. Proceed. File will be cloned.
-+ * A is correct, B is more complicated */
-+ /* Just as a hint where to create deleted file */
-+ if (ino->i_nlink != 0) {
-+ eprintk_ctx("deleted reference to existing inode, checkpointing is impossible\n");
-+ return -EBUSY;
-+ }
-+ } else {
-+ /* Refer to _another_ file name. */
-+ err = cpt_dump_filename(parent, ctx);
-+ if (err)
-+ return err;
-+ if (S_ISREG(ino->i_mode) || S_ISDIR(ino->i_mode))
-+ dump_it = 0;
-+ }
-+ }
-+ if (dump_it) {
-+ if (S_ISREG(ino->i_mode)) {
-+ if ((err = dump_content_regular(file, ctx)) != 0) {
-+ eprintk_ctx("dump_content_regular ");
-+ cpt_printk_dentry(d, mnt);
-+ }
-+ } else if (S_ISDIR(ino->i_mode)) {
-+ /* We cannot do anything. The directory should be
-+ * empty, so it is not a big deal.
-+ */
-+ } else if (S_ISCHR(ino->i_mode)) {
-+ err = dump_content_chrdev(file, ctx);
-+ } else if (S_ISBLK(ino->i_mode)) {
-+ err = dump_content_blkdev(file, ctx);
-+ } else if (S_ISFIFO(ino->i_mode)) {
-+ err = dump_content_fifo(file, ctx);
-+ } else if (S_ISSOCK(ino->i_mode)) {
-+ err = dump_content_socket(file, ctx);
-+ } else {
-+ eprintk_ctx("unknown inode mode %o\n", ino->i_mode & S_IFMT);
-+ err = -EINVAL;
-+ }
-+ }
-+ cpt_close_object(ctx);
-+
-+ return err;
-+}
-+
-+int cpt_dump_files(struct cpt_context *ctx)
-+{
-+ int epoll_nr;
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_TTY);
-+ for_each_object(obj, CPT_OBJ_TTY) {
-+ int err;
-+
-+ if ((err = cpt_dump_tty(obj, ctx)) != 0)
-+ return err;
-+ }
-+ cpt_close_section(ctx);
-+
-+ cpt_open_section(ctx, CPT_SECT_INODE);
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+ int err;
-+
-+ if ((err = dump_one_inode(file, file->f_dentry,
-+ file->f_vfsmnt, ctx)) != 0)
-+ return err;
-+ }
-+ for_each_object(obj, CPT_OBJ_FS) {
-+ struct fs_struct *fs = obj->o_obj;
-+ int err;
-+
-+ if (fs->root &&
-+ (err = dump_one_inode(NULL, fs->root, fs->rootmnt, ctx)) != 0)
-+ return err;
-+ if (fs->pwd &&
-+ (err = dump_one_inode(NULL, fs->pwd, fs->pwdmnt, ctx)) != 0)
-+ return err;
-+ if (fs->altroot &&
-+ (err = dump_one_inode(NULL, fs->altroot, fs->altrootmnt, ctx)) != 0)
-+ return err;
-+ }
-+ cpt_close_section(ctx);
-+
-+ epoll_nr = 0;
-+ cpt_open_section(ctx, CPT_SECT_FILES);
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+ int err;
-+
-+ if ((err = dump_one_file(obj, file, ctx)) != 0)
-+ return err;
-+ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL)
-+ epoll_nr++;
-+ }
-+ cpt_close_section(ctx);
-+
-+ if (epoll_nr) {
-+ cpt_open_section(ctx, CPT_SECT_EPOLL);
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_EPOLL) {
-+ int err;
-+ if ((err = cpt_dump_epolldev(obj, ctx)) != 0)
-+ return err;
-+ }
-+ }
-+ cpt_close_section(ctx);
-+ }
-+
-+ cpt_open_section(ctx, CPT_SECT_SOCKET);
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ int err;
-+
-+ if ((err = cpt_dump_socket(obj, obj->o_obj, obj->o_index, -1, ctx)) != 0)
-+ return err;
-+ }
-+ cpt_close_section(ctx);
-+
-+ return 0;
-+}
-+
-+static int dump_filedesc(int fd, struct file *file,
-+ struct files_struct *f, struct cpt_context *ctx)
-+{
-+ struct cpt_fd_image *v = cpt_get_buf(ctx);
-+ cpt_object_t *obj;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_FILEDESC;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ v->cpt_fd = fd;
-+ obj = lookup_cpt_object(CPT_OBJ_FILE, file, ctx);
-+ if (!obj) BUG();
-+ v->cpt_file = obj->o_pos;
-+ v->cpt_flags = 0;
-+ if (FD_ISSET(fd, f->fdt->close_on_exec))
-+ v->cpt_flags = CPT_FD_FLAG_CLOSEEXEC;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+
-+static int dump_one_file_struct(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct files_struct *f = obj->o_obj;
-+ struct cpt_files_struct_image *v = cpt_get_buf(ctx);
-+ int fd;
-+ loff_t saved_obj;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_FILES;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_index = obj->o_index;
-+ v->cpt_max_fds = f->fdt->max_fds;
-+ v->cpt_next_fd = f->fdt->next_fd;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ for (fd = 0; fd < f->fdt->max_fds; fd++) {
-+ struct file *file = fcheck_files(f, fd);
-+ if (file)
-+ dump_filedesc(fd, file, f, ctx);
-+ }
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+
-+int cpt_dump_files_struct(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_FILES_STRUCT);
-+
-+ for_each_object(obj, CPT_OBJ_FILES) {
-+ int err;
-+
-+ if ((err = dump_one_file_struct(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+int cpt_collect_fs(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->fs) {
-+ if (cpt_object_add(CPT_OBJ_FS, tsk->fs, ctx) == NULL)
-+ return -ENOMEM;
-+ if (tsk->fs->pwd &&
-+ cpt_object_add(CPT_OBJ_INODE, tsk->fs->pwd->d_inode, ctx) == NULL)
-+ return -ENOMEM;
-+ if (tsk->fs->root &&
-+ cpt_object_add(CPT_OBJ_INODE, tsk->fs->root->d_inode, ctx) == NULL)
-+ return -ENOMEM;
-+ if (tsk->fs->altroot &&
-+ cpt_object_add(CPT_OBJ_INODE, tsk->fs->altroot->d_inode, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+ }
-+ return 0;
-+}
-+
-+static int cpt_dump_dir(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
-+{
-+ struct file file;
-+
-+ memset(&file, 0, sizeof(file));
-+
-+ file.f_dentry = d;
-+ file.f_vfsmnt = mnt;
-+ file.f_mode = FMODE_READ|FMODE_PREAD|FMODE_LSEEK;
-+ return dump_one_file(NULL, &file, ctx);
-+}
-+
-+static int dump_one_fs(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct fs_struct *fs = obj->o_obj;
-+ struct cpt_fs_struct_image *v = cpt_get_buf(ctx);
-+ loff_t saved_obj;
-+ int err;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_FS;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_umask = fs->umask;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ err = cpt_dump_dir(fs->root, fs->rootmnt, ctx);
-+ if (!err)
-+ err = cpt_dump_dir(fs->pwd, fs->pwdmnt, ctx);
-+ if (!err && fs->altroot)
-+ err = cpt_dump_dir(fs->altroot, fs->altrootmnt, ctx);
-+
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ return err;
-+}
-+
-+int cpt_dump_fs_struct(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_FS);
-+
-+ for_each_object(obj, CPT_OBJ_FS) {
-+ int err;
-+
-+ if ((err = dump_one_fs(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+static int check_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct namespace *n = obj->o_obj;
-+ struct list_head *p;
-+ char *path_buf, *path;
-+
-+ path_buf = (char *) __get_free_page(GFP_KERNEL);
-+ if (!path_buf)
-+ return -ENOMEM;
-+
-+ down_read(&namespace_sem);
-+ list_for_each(p, &n->list) {
-+ struct vfsmount *mnt = list_entry(p, struct vfsmount, mnt_list);
-+
-+ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
-+ if (IS_ERR(path))
-+ continue;
-+
-+ if (
-+ strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "ext3") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "simfs") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 &&
-+ strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0) {
-+ eprintk_ctx("unsupported fs type %s\n", mnt->mnt_sb->s_type->name);
-+ err = -EINVAL;
-+ break;
-+ }
-+ }
-+ up_read(&namespace_sem);
-+
-+ free_page((unsigned long) path_buf);
-+
-+ return err;
-+}
-+
-+int cpt_collect_namespace(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->namespace && cpt_object_add(CPT_OBJ_NAMESPACE, tsk->namespace, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+
-+ for_each_object(obj, CPT_OBJ_NAMESPACE) {
-+ int err;
-+ if ((err = check_one_namespace(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ return 0;
-+}
-+
-+struct args_t
-+{
-+ int* pfd;
-+ char* path;
-+};
-+
-+static int dumptmpfs(void *arg)
-+{
-+ int i;
-+ struct args_t *args = arg;
-+ int *pfd = args->pfd;
-+ char *path = args->path;
-+ char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
-+
-+ i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
-+ if (i < 0) {
-+ eprintk("cannot enter ve to dump tmpfs\n");
-+ module_put(THIS_MODULE);
-+ return 1;
-+ }
-+
-+ if (pfd[1] != 1)
-+ sc_dup2(pfd[1], 1);
-+
-+ for (i=0; i<current->files->fdt->max_fds; i++) {
-+ if (i != 1)
-+ sc_close(i);
-+ }
-+
-+ module_put(THIS_MODULE);
-+
-+ set_fs(KERNEL_DS);
-+ i = sc_execve("/bin/tar", argv, NULL);
-+ eprintk("failed to exec /bin/tar: %d\n", i);
-+ return -1;
-+}
-+
-+static int cpt_dump_tmpfs(char *path, struct cpt_context *ctx)
-+{
-+ int err;
-+ int pid;
-+ int pfd[2];
-+ struct file *f;
-+ struct cpt_object_hdr v;
-+ char buf[16];
-+ int n;
-+ loff_t saved_obj;
-+ struct args_t args;
-+
-+ err = sc_pipe(pfd);
-+ if (err < 0)
-+ return err;
-+ args.pfd = pfd;
-+ args.path = path;
-+ err = pid = local_kernel_thread(dumptmpfs, (void*)&args, SIGCHLD, 0);
-+ if (err < 0)
-+ goto out;
-+ f = fget(pfd[0]);
-+ sc_close(pfd[1]);
-+ sc_close(pfd[0]);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ cpt_open_object(NULL, ctx);
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NAME;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_NAME;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ do {
-+ mm_segment_t oldfs;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
-+ set_fs(oldfs);
-+ if (n > 0)
-+ ctx->write(buf, n, ctx);
-+ } while (n > 0);
-+
-+ fput(f);
-+
-+ if ((err = sc_waitx(pid, 0)) < 0)
-+ eprintk_ctx("wait4: %d\n", err);
-+
-+ buf[0] = 0;
-+ ctx->write(buf, 1, ctx);
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ return n;
-+
-+out:
-+ if (pfd[1] >= 0)
-+ sc_close(pfd[1]);
-+ if (pfd[0] >= 0)
-+ sc_close(pfd[0]);
-+ return err;
-+}
-+
-+static int dump_vfsmount(struct vfsmount *mnt, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct cpt_vfsmount_image v;
-+ loff_t saved_obj;
-+ char *path_buf, *path;
-+
-+ path_buf = (char *) __get_free_page(GFP_KERNEL);
-+ if (!path_buf)
-+ return -ENOMEM;
-+
-+ path = d_path(mnt->mnt_root, mnt, path_buf, PAGE_SIZE);
-+ if (IS_ERR(path)) {
-+ free_page((unsigned long) path_buf);
-+ return PTR_ERR(path) == -EINVAL ? 0 : PTR_ERR(path);
-+ }
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v.cpt_next = -1;
-+ v.cpt_object = CPT_OBJ_VFSMOUNT;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v.cpt_mntflags = mnt->mnt_flags;
-+ v.cpt_flags = mnt->mnt_sb->s_flags;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ cpt_dump_string(mnt->mnt_devname ? : "none", ctx);
-+ cpt_dump_string(path, ctx);
-+ cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
-+#if 0
-+ /* This is an evident crap. Ask Savochkin, he might know this.
-+ * Goal is to get some path to mount --bind to.
-+ */
-+ cpt_dump_dentry(mnt->mnt_root, mnt->mnt_parent, ctx);
-+#else
-+ /* For now we just bail, when some FS is mounted not at root. */
-+ if (mnt->mnt_root != mnt->mnt_sb->s_root) {
-+ eprintk_ctx("mount --bind prevents checkpointing\n");
-+ err = -EINVAL;
-+ }
-+#endif
-+
-+ if (strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
-+ cpt_dump_tmpfs(path, ctx);
-+ }
-+
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ free_page((unsigned long) path_buf);
-+
-+ return err;
-+}
-+
-+static int dump_one_namespace(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct namespace *n = obj->o_obj;
-+ struct cpt_object_hdr v;
-+ struct list_head *p;
-+ loff_t saved_obj;
-+ int err = 0;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v.cpt_next = -1;
-+ v.cpt_object = CPT_OBJ_NAMESPACE;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_ARRAY;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+
-+ down_read(&namespace_sem);
-+ list_for_each(p, &n->list) {
-+ err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
-+ if (err)
-+ break;
-+ }
-+ up_read(&namespace_sem);
-+
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ return err;
-+}
-+
-+int cpt_dump_namespace(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_NAMESPACE);
-+
-+ for_each_object(obj, CPT_OBJ_NAMESPACE) {
-+ int err;
-+
-+ if ((err = dump_one_namespace(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_files.h linux-2.6.16-026test009/kernel/cpt/cpt_files.h
---- linux-2.6.16.orig/kernel/cpt/cpt_files.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_files.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,46 @@
-+int cpt_collect_files(cpt_context_t *);
-+int cpt_collect_fs(cpt_context_t *);
-+int cpt_collect_namespace(cpt_context_t *);
-+int cpt_collect_sysvsem_undo(cpt_context_t *);
-+int cpt_collect_tty(struct file *, cpt_context_t *);
-+int cpt_dump_files(struct cpt_context *ctx);
-+int cpt_dump_files_struct(struct cpt_context *ctx);
-+int cpt_dump_fs_struct(struct cpt_context *ctx);
-+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx);
-+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx);
-+int cpt_dump_tty(cpt_object_t *, struct cpt_context *ctx);
-+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx);
-+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii, unsigned flags, struct cpt_context *ctx);
-+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx);
-+
-+int rst_posix_locks(struct cpt_context *ctx);
-+
-+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
-+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
-+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
-+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
-+int rst_restore_fs(struct cpt_context *ctx);
-+
-+int cpt_collect_sysv(cpt_context_t *);
-+int cpt_dump_sysvsem(struct cpt_context *ctx);
-+int rst_sysv_ipc(struct cpt_context *ctx);
-+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
-+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
-+
-+int cpt_dump_namespace(struct cpt_context *ctx);
-+int rst_root_namespace(struct cpt_context *ctx);
-+
-+int rst_stray_files(struct cpt_context *ctx);
-+int rst_tty_jobcontrol(struct cpt_context *ctx);
-+
-+void rst_flush_filejobs(struct cpt_context *);
-+int rst_do_filejobs(struct cpt_context *);
-+
-+int rst_eventpoll(struct cpt_context *);
-+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
-+ unsigned flags,
-+ struct cpt_context *ctx);
-+int cpt_dump_epolldev(cpt_object_t *obj, struct cpt_context *);
-+
-+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+ cpt_context_t *ctx);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_fsmagic.h linux-2.6.16-026test009/kernel/cpt/cpt_fsmagic.h
---- linux-2.6.16.orig/kernel/cpt/cpt_fsmagic.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_fsmagic.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,15 @@
-+/* Collected from kernel sources. */
-+
-+#define FSMAGIC_TMPFS 0x01021994
-+#define FSMAGIC_PIPEFS 0x50495045
-+#define FSMAGIC_SOCKFS 0x534F434B
-+#define FSMAGIC_PFMFS 0xa0b4d889
-+#define FSMAGIC_BDEV 0x62646576
-+#define FSMAGIC_EPOLL 0x03111965
-+#define FSMAGIC_FUTEX 0x0BAD1DEA
-+#define FSMAGIC_MQUEUE 0x19800202
-+#define FSMAGIC_PROC 0x9fa0
-+#define FSMAGIC_DEVPTS 0x1CD1
-+#define FSMAGIC_AUTOFS 0x0187
-+#define FSMAGIC_EXT2 0xEF53
-+#define FSMAGIC_REISER 0x52654973
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_kernel.c linux-2.6.16-026test009/kernel/cpt/cpt_kernel.c
---- linux-2.6.16.orig/kernel/cpt/cpt_kernel.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_kernel.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,124 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_kernel.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#define __KERNEL_SYSCALLS__ 1
-+
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+#include <linux/mm.h>
-+#include <linux/kernel.h>
-+#include <asm/cpufeature.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_kernel.h"
-+#include "cpt_syscalls.h"
-+
-+#ifndef CONFIG_X86_64
-+
-+extern void local_kernel_thread_helper(void);
-+__asm__(".section .text\n"
-+ ".align 4\n"
-+ "local_kernel_thread_helper:\n\t"
-+ "movl %edx,%eax\n\t"
-+ "pushl %edx\n\t"
-+ "call *%ebx\n\t"
-+ "pushl %eax\n\t"
-+ "pushl $0\n\t"
-+ "call complete_and_exit\n"
-+ ".previous");
-+
-+/*
-+ * Create a kernel thread
-+ */
-+int asm_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
-+{
-+ struct pt_regs regs;
-+
-+ memset(&regs, 0, sizeof(regs));
-+
-+ regs.ebx = (unsigned long) fn;
-+ regs.edx = (unsigned long) arg;
-+
-+ regs.xds = __USER_DS;
-+ regs.xes = __USER_DS;
-+ regs.orig_eax = -1;
-+ regs.eip = (unsigned long) local_kernel_thread_helper;
-+ regs.xcs = __KERNEL_CS;
-+ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
-+
-+ /* Ok, create the new process.. */
-+ return do_fork_pid(flags | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL, pid);
-+}
-+#endif
-+
-+int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags, pid_t pid)
-+{
-+ pid_t ret;
-+
-+ if (!try_module_get(THIS_MODULE))
-+ return -EBUSY;
-+ ret = asm_kernel_thread(fn, arg, flags, pid);
-+ if (ret < 0)
-+ module_put(THIS_MODULE);
-+ return ret;
-+}
-+
-+#ifdef __i386__
-+static int errno;
-+#endif
-+
-+int sc_execve(char *cmd, char **argv, char **env)
-+{
-+ int ret;
-+ ret = execve(cmd, argv, env);
-+#ifdef __i386__
-+ if (ret < 0)
-+ ret = -errno;
-+#endif
-+ return ret;
-+}
-+
-+unsigned int test_cpu_caps()
-+{
-+ unsigned int flags = 0;
-+ if (boot_cpu_has(X86_FEATURE_CMOV))
-+ flags |= 1 << CPT_CPU_X86_CMOV;
-+ if (cpu_has_fxsr)
-+ flags |= 1 << CPT_CPU_X86_FXSR;
-+ if (cpu_has_xmm)
-+ flags |= 1 << CPT_CPU_X86_SSE;
-+#ifndef CONFIG_X86_64
-+ if (cpu_has_xmm2)
-+#endif
-+ flags |= 1 << CPT_CPU_X86_SSE2;
-+ if (cpu_has_mmx)
-+ flags |= 1 << CPT_CPU_X86_MMX;
-+ if (boot_cpu_has(X86_FEATURE_3DNOW))
-+ flags |= 1 << CPT_CPU_X86_3DNOW;
-+ if (boot_cpu_has(X86_FEATURE_3DNOWEXT))
-+ flags |= 1 << CPT_CPU_X86_3DNOW2;
-+ if (boot_cpu_has(X86_FEATURE_SEP))
-+ flags |= 1 << CPT_CPU_X86_SEP;
-+#ifdef CONFIG_X86_64
-+ flags |= 1 << CPT_CPU_X86_EMT64;
-+#endif
-+ return flags;
-+}
-+
-+unsigned int test_kernel_config()
-+{
-+ unsigned int flags = 0;
-+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-+ flags |= 1 << CPT_KERNEL_CONFIG_PAE;
-+#endif
-+ return flags;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_kernel.h linux-2.6.16-026test009/kernel/cpt/cpt_kernel.h
---- linux-2.6.16.orig/kernel/cpt/cpt_kernel.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_kernel.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,74 @@
-+/* Interface to kernel vars which we had to _add_. */
-+
-+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-+
-+#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
-+#define TASK_TRACED TASK_STOPPED
-+#define unix_peer(sk) ((sk)->sk_pair)
-+#define page_mapcount(pg) ((pg)->mapcount)
-+#else
-+#define unix_peer(sk) (unix_sk(sk)->peer)
-+#endif
-+
-+#ifdef CONFIG_X86_64
-+#define cpu_has_fxsr 1
-+#endif
-+
-+static inline void do_gettimespec(struct timespec *ts)
-+{
-+ struct timeval tv;
-+ do_gettimeofday(&tv);
-+ ts->tv_sec = tv.tv_sec;
-+ ts->tv_nsec = tv.tv_usec*1000;
-+}
-+
-+int local_kernel_thread(int (*fn)(void *),
-+ void * arg,
-+ unsigned long flags,
-+ pid_t pid);
-+int asm_kernel_thread(int (*fn)(void *),
-+ void * arg,
-+ unsigned long flags,
-+ pid_t pid);
-+
-+unsigned int test_cpu_caps(void);
-+unsigned int test_kernel_config(void);
-+
-+#define test_one_flag(src, dst, flag, message, ret) \
-+if (src & (1 << flag)) \
-+ if (!(dst & (1 << flag))) { \
-+ wprintk("Destination cpu does not have " message "\n"); \
-+ ret = 1; \
-+ }
-+
-+static inline void
-+_set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
-+{
-+ while (nsec >= NSEC_PER_SEC) {
-+ nsec -= NSEC_PER_SEC;
-+ ++sec;
-+ }
-+ while (nsec < 0) {
-+ nsec += NSEC_PER_SEC;
-+ --sec;
-+ }
-+ ts->tv_sec = sec;
-+ ts->tv_nsec = nsec;
-+}
-+
-+static inline struct timespec
-+_ns_to_timespec(const nsec_t nsec)
-+{
-+ struct timespec ts;
-+
-+ if (!nsec)
-+ return (struct timespec) {0, 0};
-+
-+ ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
-+ if (unlikely(nsec < 0))
-+ _set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
-+
-+ return ts;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_mm.c linux-2.6.16-026test009/kernel/cpt/cpt_mm.c
---- linux-2.6.16.orig/kernel/cpt/cpt_mm.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_mm.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,826 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_mm.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/hugetlb.h>
-+#include <linux/errno.h>
-+#include <linux/ve.h>
-+#include <linux/pagemap.h>
-+#include <linux/rmap.h>
-+#include <asm/ldt.h>
-+#include <asm/mmu.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_kernel.h"
-+#include "cpt_fsmagic.h"
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+#include "cpt_pagein.h"
-+#endif
-+#include "cpt_ubc.h"
-+
-+static int collect_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
-+ cpt_context_t *ctx)
-+{
-+ if (!list_empty(&aio_ctx->run_list)) {
-+ /* This is impossible at least with kernel 2.6.8.1 or 2.6.16 */
-+ eprintk_ctx("run list is not empty, cannot suspend AIO\n");
-+ return -EBUSY;
-+ }
-+
-+ /* Wait for pending IOCBs. Linux AIO is mostly _fake_.
-+ * It is actually synchronous, except for direct IO and
-+ * some funny raw USB things, which cannot happen inside VE.
-+ * However, we do this for future.
-+ *
-+ * Later note: in 2.6.16 we may allow O_DIRECT, so that
-+ * it is not meaningless code.
-+ */
-+ wait_for_all_aios(aio_ctx);
-+
-+ if (!list_empty(&aio_ctx->run_list) ||
-+ !list_empty(&aio_ctx->active_reqs) ||
-+ aio_ctx->reqs_active) {
-+ eprintk_ctx("were not able to suspend AIO\n");
-+ return -EBUSY;
-+ }
-+
-+ return 0;
-+}
-+
-+static int collect_one_mm(struct mm_struct *mm, cpt_context_t * ctx)
-+{
-+ struct vm_area_struct *vma;
-+
-+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
-+ if (vma->vm_file) {
-+ if (cpt_object_add(CPT_OBJ_FILE, vma->vm_file, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+ }
-+ if (cpt_add_ubc(mm->mm_ub, ctx) == NULL)
-+ return -ENOMEM;
-+
-+ if (mm->ioctx_list) {
-+ struct kioctx *aio_ctx;
-+ int err;
-+
-+ for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
-+ if ((err = collect_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
-+ return err;
-+ }
-+
-+ return 0;
-+}
-+
-+int cpt_collect_mm(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+ int err;
-+ int index;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->mm && cpt_object_add(CPT_OBJ_MM, tsk->mm, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+
-+ index = 1;
-+ for_each_object(obj, CPT_OBJ_MM) {
-+ struct mm_struct *mm = obj->o_obj;
-+ if (obj->o_count != atomic_read(&mm->mm_users)) {
-+ eprintk_ctx("mm_struct is referenced outside %d %d\n", obj->o_count, atomic_read(&mm->mm_users));
-+ return -EBUSY;
-+ }
-+ cpt_obj_setindex(obj, index++, ctx);
-+
-+ if ((err = collect_one_mm(mm, ctx)) != 0)
-+ return err;
-+ }
-+
-+ return 0;
-+}
-+
-+static int zcnt, scnt, scnt0, ucnt;
-+
-+/* Function where_is_anon_page() returns address of a anonymous page in mm
-+ * of already dumped process. This happens f.e. after fork(). We do not use
-+ * this right now, just keep statistics, it is diffucult to restore such state,
-+ * but the most direct use is to save space in dumped image. */
-+
-+
-+static inline unsigned long
-+vma_address0(struct page *page, struct vm_area_struct *vma)
-+{
-+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-+ unsigned long address;
-+
-+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
-+ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
-+ address |= 1;
-+ return address;
-+}
-+
-+static int really_this_one(struct vm_area_struct *vma, unsigned long address,
-+ struct page *page)
-+{
-+ struct mm_struct *mm = vma->vm_mm;
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+ spinlock_t *ptl;
-+ int result;
-+
-+ pgd = pgd_offset(mm, address);
-+ if (unlikely(!pgd_present(*pgd)))
-+ return 0;
-+
-+ pud = pud_offset(pgd, address);
-+ if (!pud_present(*pud))
-+ return 0;
-+
-+ pmd = pmd_offset(pud, address);
-+ if (unlikely(!pmd_present(*pmd)))
-+ return 0;
-+
-+ result = 0;
-+ pte = pte_offset_map(pmd, address);
-+ if (!pte_present(*pte)) {
-+ pte_unmap(pte);
-+ return 0;
-+ }
-+
-+ ptl = pte_lockptr(mm, pmd);
-+ if (!spin_trylock(ptl)) {
-+ pte_unmap(pte);
-+ return 0;
-+ }
-+ if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte))
-+ result = 1;
-+ pte_unmap_unlock(pte, ptl);
-+ return result;
-+}
-+
-+static loff_t where_is_anon_page(cpt_object_t *mmobj, unsigned long mapaddr,
-+ struct page *page, cpt_context_t * ctx)
-+{
-+ loff_t mmptr = CPT_NULL;
-+ struct anon_vma *anon_vma;
-+ struct vm_area_struct *vma;
-+ int idx = mmobj->o_index;
-+
-+ if (!PageAnon(page))
-+ return CPT_NULL;
-+
-+ anon_vma = page_lock_anon_vma(page);
-+ if (!anon_vma)
-+ return CPT_NULL;
-+
-+ list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-+ unsigned long addr = vma_address0(page, vma);
-+ cpt_object_t *obj;
-+
-+ /* We do not try to support mremapped regions (addr != mapaddr),
-+ * only mmaps directly inherited via fork().
-+ * With this limitation we may check self-consistency of
-+ * vmas (vm_start, vm_pgoff, anon_vma) before
-+ * doing __copy_page_range() in rst_mm.
-+ */
-+ if (mmobj->o_obj != vma->vm_mm && addr == mapaddr) {
-+ obj = lookup_cpt_object(CPT_OBJ_MM, vma->vm_mm, ctx);
-+ if (obj && obj->o_pos != CPT_NULL && obj->o_index < idx) {
-+ if (really_this_one(vma, addr, page)) {
-+ mmptr = obj->o_pos;
-+ idx = obj->o_index;
-+ }
-+ }
-+ }
-+ }
-+ spin_unlock(&anon_vma->lock);
-+
-+ return mmptr;
-+}
-+
-+struct page_area
-+{
-+ int type;
-+ unsigned long start;
-+ unsigned long end;
-+ unsigned long pgoff;
-+ loff_t mm;
-+};
-+
-+struct page_desc
-+{
-+ int type;
-+ int index;
-+ loff_t mm;
-+ int shared;
-+};
-+
-+enum {
-+ PD_ABSENT,
-+ PD_COPY,
-+ PD_ZERO,
-+ PD_CLONE,
-+ PD_FUNKEY,
-+ PD_LAZY
-+};
-+
-+/* 0: page can be obtained from backstore, or still not mapped anonymous page,
-+ or something else, which does not requre copy.
-+ 1: page requires copy
-+ 2: page requres copy but its content is zero. Quite useless.
-+ 3: wp page is shared after fork(). It is to be COWed when modified.
-+ 4: page is something unsupported... We copy it right now.
-+ */
-+
-+
-+
-+static void page_get_desc(cpt_object_t *mmobj,
-+ struct vm_area_struct *vma, unsigned long addr,
-+ struct page_desc *pdesc, cpt_context_t * ctx)
-+{
-+ struct mm_struct *mm = vma->vm_mm;
-+ pgd_t *pgd;
-+ pud_t *pud;
-+ pmd_t *pmd;
-+ pte_t *ptep, pte;
-+ spinlock_t *ptl;
-+ struct page *pg;
-+ int linear_index = (addr - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff;
-+
-+ pdesc->index = linear_index;
-+ pdesc->shared = 0;
-+
-+ if (vma->vm_flags & VM_IO) {
-+ pdesc->type = PD_ABSENT;
-+ return;
-+ }
-+
-+ pgd = pgd_offset(mm, addr);
-+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
-+ goto out_absent;
-+ pud = pud_offset(pgd, addr);
-+ if (pud_none(*pud) || unlikely(pud_bad(*pud)))
-+ goto out_absent;
-+ pmd = pmd_offset(pud, addr);
-+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-+ goto out_absent;
-+ if (pmd_huge(*pmd)) {
-+ eprintk_ctx("page_huge\n");
-+ goto out_unsupported;
-+ }
-+
-+ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
-+ if (!ptep)
-+ goto out_absent;
-+
-+ pte = *ptep;
-+ if (pte_none(pte))
-+ goto out_absent_unmap;
-+
-+ if (!pte_present(pte)) {
-+ if (pte_file(pte)) {
-+ pdesc->index = pte_to_pgoff(pte);
-+ goto out_absent_unmap;
-+ }
-+ if (vma->vm_flags & VM_SHARED) {
-+ /* It is impossible: shared mappings cannot be in swap */
-+ eprintk_ctx("shared mapping is not present: %08lx@%Ld\n", addr, mmobj->o_pos);
-+ goto out_unsupported_unmap;
-+ }
-+ /* Otherwise it is in swap. */
-+ goto out_lazy_unmap;
-+ } else if ((pg = vm_normal_page(vma, addr, pte)) != NULL) {
-+
-+ if (pg->mapping && !PageAnon(pg)) {
-+ if (vma->vm_file == NULL) {
-+ eprintk_ctx("pg->mapping!=NULL for fileless vma: %08lx\n", addr);
-+ goto out_unsupported_unmap;
-+ }
-+ if (vma->vm_file->f_mapping != pg->mapping) {
-+ eprintk_ctx("pg->mapping!=f_mapping: %08lx %p %p %Ld\n", addr, vma->vm_file->f_mapping, pg->mapping, mmobj->o_pos);
-+ goto out_unsupported_unmap;
-+ }
-+ pdesc->index = (pg->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
-+ /* Page is in backstore. For us it is like
-+ * it is not present.
-+ */
-+ goto out_absent_unmap;
-+ }
-+
-+ if (PageReserved(pg)) {
-+ /* Special case: ZERO_PAGE is used, when an
-+ * anonymous page is accessed but not written. */
-+ if (pg == ZERO_PAGE(addr)) {
-+ if (pte_write(pte)) {
-+ eprintk_ctx("not funny already, writable ZERO_PAGE\n");
-+ goto out_unsupported_unmap;
-+ }
-+ zcnt++;
-+ goto out_absent_unmap;
-+ }
-+ eprintk_ctx("reserved page %lu at %08lx@%Ld\n", pg->index, addr, mmobj->o_pos);
-+ goto out_unsupported_unmap;
-+ }
-+
-+ if (pg == ZERO_PAGE(addr)) {
-+ wprintk_ctx("that's how it works now\n");
-+ }
-+
-+ if (!pg->mapping) {
-+ eprintk_ctx("page without mapping at %08lx@%Ld\n", addr, mmobj->o_pos);
-+ goto out_unsupported_unmap;
-+ }
-+
-+ if (pg->mapping && page_mapcount(pg) > 1) {
-+ pdesc->shared = 1;
-+ pdesc->mm = where_is_anon_page(mmobj, addr, pg, ctx);
-+ if (pdesc->mm != CPT_NULL) {
-+ scnt0++;
-+ goto out_clone_unmap;
-+ } else {
-+ scnt++;
-+ }
-+ }
-+
-+ if (!pte_young(pte))
-+ goto out_lazy_unmap;
-+ }
-+ pte_unmap_unlock(ptep, ptl);
-+ pdesc->type = PD_COPY;
-+ return;
-+
-+out_lazy_unmap:
-+ pte_unmap_unlock(ptep, ptl);
-+ pdesc->type = PD_LAZY;
-+ return;
-+
-+out_absent_unmap:
-+ pte_unmap_unlock(ptep, ptl);
-+out_absent:
-+ pdesc->type = PD_ABSENT;
-+ return;
-+
-+out_clone_unmap:
-+ pte_unmap_unlock(ptep, ptl);
-+ pdesc->type = PD_CLONE;
-+ return;
-+
-+out_unsupported_unmap:
-+ pte_unmap_unlock(ptep, ptl);
-+out_unsupported:
-+ ucnt++;
-+ pdesc->type = PD_FUNKEY;
-+ return;
-+}
-+
-+/* ATTN: We give "current" to get_user_pages(). This is wrong, but get_user_pages()
-+ * does not really need this thing. It just stores some page fault stats there.
-+ *
-+ * BUG: some archs (f.e. sparc64, but not Intel*) require flush cache pages
-+ * before accessing vma.
-+ */
-+void dump_pages(struct vm_area_struct *vma, unsigned long start,
-+ unsigned long end, struct cpt_context *ctx)
-+{
-+#define MAX_PAGE_BATCH 16
-+ struct page *pg[MAX_PAGE_BATCH];
-+ int npages = (end - start)/PAGE_SIZE;
-+ int count = 0;
-+
-+ while (count < npages) {
-+ int copy = npages - count;
-+ int n;
-+
-+ if (copy > MAX_PAGE_BATCH)
-+ copy = MAX_PAGE_BATCH;
-+ n = get_user_pages(current, vma->vm_mm, start, copy,
-+ 0, 1, pg, NULL);
-+ if (n == copy) {
-+ int i;
-+ for (i=0; i<n; i++) {
-+ char *maddr = kmap(pg[i]);
-+ ctx->write(maddr, PAGE_SIZE, ctx);
-+ kunmap(pg[i]);
-+ }
-+ } else {
-+ eprintk_ctx("get_user_pages fault");
-+ for ( ; n > 0; n--)
-+ page_cache_release(pg[n-1]);
-+ return;
-+ }
-+ start += n*PAGE_SIZE;
-+ count += n;
-+ for ( ; n > 0; n--)
-+ page_cache_release(pg[n-1]);
-+ }
-+ return;
-+}
-+
-+int dump_page_block(struct vm_area_struct *vma, struct cpt_page_block *pgb,
-+ int copy,
-+ struct cpt_context *ctx)
-+{
-+ loff_t saved_object;
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ pgb->cpt_object = (copy != PD_LAZY) ? CPT_OBJ_PAGES : CPT_OBJ_LAZYPAGES;
-+ pgb->cpt_hdrlen = sizeof(*pgb);
-+ pgb->cpt_content = (copy == PD_COPY || copy == PD_LAZY) ? CPT_CONTENT_DATA : CPT_CONTENT_VOID;
-+
-+ ctx->write(pgb, sizeof(*pgb), ctx);
-+ if (copy == PD_COPY || copy == PD_LAZY)
-+ dump_pages(vma, pgb->cpt_start, pgb->cpt_end, ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_object, ctx);
-+ return 0;
-+}
-+
-+int dump_remappage_block(struct vm_area_struct *vma, struct page_area *pa,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_remappage_block pgb;
-+ loff_t saved_object;
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ pgb.cpt_object = CPT_OBJ_REMAPPAGES;
-+ pgb.cpt_hdrlen = sizeof(pgb);
-+ pgb.cpt_content = CPT_CONTENT_VOID;
-+ pgb.cpt_start = pa->start;
-+ pgb.cpt_end = pa->end;
-+ pgb.cpt_pgoff = pa->pgoff - (pa->end-pa->start)/PAGE_SIZE + 1;
-+
-+ ctx->write(&pgb, sizeof(pgb), ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_object, ctx);
-+ return 0;
-+}
-+
-+int dump_copypage_block(struct vm_area_struct *vma, struct page_area *pa,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_copypage_block pgb;
-+ loff_t saved_object;
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ pgb.cpt_object = CPT_OBJ_COPYPAGES;
-+ pgb.cpt_hdrlen = sizeof(pgb);
-+ pgb.cpt_content = CPT_CONTENT_VOID;
-+ pgb.cpt_start = pa->start;
-+ pgb.cpt_end = pa->end;
-+ pgb.cpt_source = pa->mm;
-+
-+ ctx->write(&pgb, sizeof(pgb), ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_object, ctx);
-+ return 0;
-+}
-+
-+int dump_lazypage_block(struct vm_area_struct *vma, struct page_area *pa,
-+ cpt_context_t *ctx)
-+{
-+ struct cpt_lazypage_block pgb;
-+ loff_t saved_object;
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ pgb.cpt_object = CPT_OBJ_LAZYPAGES;
-+ pgb.cpt_hdrlen = sizeof(pgb);
-+ pgb.cpt_content = CPT_CONTENT_VOID;
-+ pgb.cpt_start = pa->start;
-+ pgb.cpt_end = pa->end;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ pgb.cpt_index = cpt_alloc_pgin_index(vma, pa->start,
-+ (pa->end-pa->start)/PAGE_SIZE, ctx);
-+#endif
-+ ctx->write(&pgb, sizeof(pgb), ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_object, ctx);
-+ return 0;
-+}
-+
-+static int can_expand(struct page_area *pa, struct page_desc *pd)
-+{
-+ if (pa->start == pa->end)
-+ return 1;
-+ if (pa->type != pd->type)
-+ return 0;
-+ if (pa->type == PD_ABSENT)
-+ return pd->index == pa->pgoff + 1;
-+ if (pa->type == PD_CLONE)
-+ return pd->mm == pa->mm;
-+ return 1;
-+}
-+
-+static int dump_one_vma(cpt_object_t *mmobj,
-+ struct vm_area_struct *vma, struct cpt_context *ctx)
-+{
-+ struct cpt_vma_image *v = cpt_get_buf(ctx);
-+ unsigned long addr;
-+ loff_t saved_object;
-+ struct cpt_page_block pgb;
-+ struct page_area pa;
-+ int cloned_pages = 0;
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ v->cpt_object = CPT_OBJ_VMA;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_start = vma->vm_start;
-+ v->cpt_end = vma->vm_end;
-+ v->cpt_flags = vma->vm_flags;
-+ if (vma->vm_flags&VM_HUGETLB) {
-+ eprintk_ctx("huge TLB VMAs are still not supported\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_pgprot = vma->vm_page_prot.pgprot;
-+ v->cpt_pgoff = vma->vm_pgoff;
-+ v->cpt_file = CPT_NULL;
-+ v->cpt_type = CPT_VMA_TYPE_0;
-+ v->cpt_anonvma = 0;
-+
-+ /* We have to remember what VMAs are bound to one anon_vma.
-+ * So, we store an identifier of group of VMAs. It is handy
-+ * to use absolute address of anon_vma as this identifier. */
-+ v->cpt_anonvmaid = (unsigned long)vma->anon_vma;
-+
-+ if (vma->vm_file) {
-+ struct file *filp;
-+ cpt_object_t *obj = lookup_cpt_object(CPT_OBJ_FILE, vma->vm_file, ctx);
-+ if (obj == NULL) BUG();
-+ filp = obj->o_obj;
-+ if (filp->f_op &&
-+ filp->f_op->read == NULL &&
-+ filp->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_TMPFS)
-+ v->cpt_type = CPT_VMA_TYPE_SHM;
-+ v->cpt_file = obj->o_pos;
-+ }
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ pa.type = PD_ABSENT;
-+ pa.pgoff = vma->vm_pgoff;
-+ pa.mm = CPT_NULL;
-+ pa.start = vma->vm_start;
-+ pa.end = vma->vm_start;
-+
-+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
-+ struct page_desc pd;
-+
-+ page_get_desc(mmobj, vma, addr, &pd, ctx);
-+ cloned_pages += pd.shared;
-+
-+ if (pd.type == PD_FUNKEY) {
-+ eprintk_ctx("dump_one_vma: funkey page\n");
-+ return -EINVAL;
-+ }
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ if (pd.type == PD_LAZY &&
-+ (ctx->lazy_vm == 0 || (vma->vm_flags&VM_LOCKED)))
-+ pd.type = PD_COPY;
-+#else
-+ if (pd.type == PD_LAZY)
-+ pd.type = PD_COPY;
-+#endif
-+
-+ if (!can_expand(&pa, &pd)) {
-+ if (pa.type == PD_COPY ||
-+ pa.type == PD_ZERO) {
-+ pgb.cpt_start = pa.start;
-+ pgb.cpt_end = pa.end;
-+ dump_page_block(vma, &pgb, pa.type, ctx);
-+ } else if (pa.type == PD_CLONE) {
-+ dump_copypage_block(vma, &pa, ctx);
-+ cloned_pages++;
-+ } else if (pa.type == PD_LAZY) {
-+ dump_lazypage_block(vma, &pa, ctx);
-+ } else if (pa.type == PD_ABSENT &&
-+ pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
-+ dump_remappage_block(vma, &pa, ctx);
-+ }
-+ pa.start = addr;
-+ }
-+ pa.type = pd.type;
-+ pa.end = addr + PAGE_SIZE;
-+ pa.pgoff = pd.index;
-+ pa.mm = pd.mm;
-+ }
-+
-+ if (pa.end > pa.start) {
-+ if (pa.type == PD_COPY ||
-+ pa.type == PD_ZERO) {
-+ pgb.cpt_start = pa.start;
-+ pgb.cpt_end = pa.end;
-+ dump_page_block(vma, &pgb, pa.type, ctx);
-+ } else if (pa.type == PD_CLONE) {
-+ dump_copypage_block(vma, &pa, ctx);
-+ cloned_pages++;
-+ } else if (pa.type == PD_LAZY) {
-+ dump_lazypage_block(vma, &pa, ctx);
-+ } else if (pa.type == PD_ABSENT &&
-+ pa.pgoff != (pa.end - vma->vm_start)/PAGE_SIZE + vma->vm_pgoff - 1) {
-+ dump_remappage_block(vma, &pa, ctx);
-+ }
-+ }
-+
-+ if (cloned_pages) {
-+ __u32 anonvma = 1;
-+ loff_t anonpos = ctx->current_object + offsetof(struct cpt_vma_image, cpt_anonvma);
-+ ctx->pwrite(&anonvma, 4, ctx, anonpos);
-+ }
-+
-+ cpt_close_object(ctx);
-+
-+ cpt_pop_object(&saved_object, ctx);
-+
-+ return 0;
-+}
-+
-+static int dump_one_aio_ctx(struct mm_struct *mm, struct kioctx *aio_ctx,
-+ cpt_context_t *ctx)
-+{
-+ loff_t saved_object;
-+ struct cpt_aio_ctx_image aimg;
-+
-+ if (!list_empty(&aio_ctx->run_list) ||
-+ !list_empty(&aio_ctx->active_reqs) ||
-+ aio_ctx->reqs_active) {
-+ eprintk_ctx("AIO is active after suspend\n");
-+ return -EBUSY;
-+ }
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ aimg.cpt_next = CPT_ALIGN(sizeof(aimg));
-+ aimg.cpt_object = CPT_OBJ_AIO_CONTEXT;
-+ aimg.cpt_hdrlen = sizeof(aimg);
-+ aimg.cpt_content = CPT_CONTENT_ARRAY;
-+
-+ aimg.cpt_max_reqs = aio_ctx->max_reqs;
-+ aimg.cpt_ring_pages = aio_ctx->ring_info.nr_pages;
-+ aimg.cpt_nr = aio_ctx->ring_info.nr;
-+ aimg.cpt_tail = aio_ctx->ring_info.tail;
-+ aimg.cpt_mmap_base = aio_ctx->ring_info.mmap_base;
-+
-+ ctx->write(&aimg, sizeof(aimg), ctx);
-+
-+ cpt_pop_object(&saved_object, ctx);
-+ return 0;
-+}
-+
-+static int dump_one_mm(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct mm_struct *mm = obj->o_obj;
-+ struct vm_area_struct *vma;
-+ struct cpt_mm_image *v = cpt_get_buf(ctx);
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = -1;
-+ v->cpt_object = CPT_OBJ_MM;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_start_code = mm->start_code;
-+ v->cpt_end_code = mm->end_code;
-+ v->cpt_start_data = mm->start_data;
-+ v->cpt_end_data = mm->end_data;
-+ v->cpt_start_brk = mm->start_brk;
-+ v->cpt_brk = mm->brk;
-+ v->cpt_start_stack = mm->start_stack;
-+ v->cpt_start_arg = mm->arg_start;
-+ v->cpt_end_arg = mm->arg_end;
-+ v->cpt_start_env = mm->env_start;
-+ v->cpt_end_env = mm->env_end;
-+ v->cpt_def_flags = mm->def_flags;
-+ v->cpt_mmub = cpt_lookup_ubc(mm->mm_ub, ctx);
-+ v->cpt_dumpable = mm->dumpable;
-+ v->cpt_vps_dumpable = mm->vps_dumpable;
-+ v->cpt_used_hugetlb = 0;
-+#ifdef CONFIG_HUGETLB_PAGE
-+ v->cpt_used_hugetlb = mm->used_hugetlb;
-+#endif
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ if (mm->context.size) {
-+ loff_t saved_object;
-+ struct cpt_obj_bits b;
-+ int size;
-+
-+ dprintk_ctx("nontrivial LDT\n");
-+
-+ cpt_push_object(&saved_object, ctx);
-+
-+ cpt_open_object(NULL, ctx);
-+ b.cpt_next = CPT_NULL;
-+ b.cpt_object = CPT_OBJ_BITS;
-+ b.cpt_hdrlen = sizeof(b);
-+ b.cpt_content = CPT_CONTENT_MM_CONTEXT;
-+ b.cpt_size = mm->context.size*LDT_ENTRY_SIZE;
-+
-+ ctx->write(&b, sizeof(b), ctx);
-+
-+ size = mm->context.size*LDT_ENTRY_SIZE;
-+
-+#if defined(CONFIG_X86_64) || LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
-+ ctx->write(mm->context.ldt, size, ctx);
-+#else
-+ for (i = 0; i < size; i += PAGE_SIZE) {
-+ int nr = i / PAGE_SIZE, bytes;
-+ char *kaddr = kmap(mm->context.ldt_pages[nr]);
-+
-+ bytes = size - i;
-+ if (bytes > PAGE_SIZE)
-+ bytes = PAGE_SIZE;
-+ ctx->write(kaddr, bytes, ctx);
-+ kunmap(mm->context.ldt_pages[nr]);
-+ }
-+#endif
-+
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_object, ctx);
-+ }
-+
-+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
-+ int err;
-+
-+#ifdef CONFIG_X86_64
-+ if (vma->vm_start == 0xFFFFE000 &&
-+ vma->vm_end == 0xFFFFF000)
-+ continue;
-+#endif
-+
-+ if ((err = dump_one_vma(obj, vma, ctx)) != 0)
-+ return err;
-+ }
-+
-+ if (mm->ioctx_list) {
-+ struct kioctx *aio_ctx;
-+ int err;
-+
-+ for (aio_ctx = mm->ioctx_list; aio_ctx; aio_ctx = aio_ctx->next)
-+ if ((err = dump_one_aio_ctx(mm, aio_ctx, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+
-+int cpt_dump_vm(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ scnt = scnt0 = zcnt = 0;
-+
-+ cpt_open_section(ctx, CPT_SECT_MM);
-+
-+ for_each_object(obj, CPT_OBJ_MM) {
-+ int err;
-+
-+ if ((err = dump_one_mm(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+
-+ if (scnt)
-+ dprintk_ctx("cpt_dump_vm: %d shared private anon pages\n", scnt);
-+ if (scnt0)
-+ dprintk_ctx("cpt_dump_vm: %d anon pages are cloned\n", scnt0);
-+ if (zcnt)
-+ dprintk_ctx("cpt_dump_vm: %d silly pages canceled\n", zcnt);
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_mm.h linux-2.6.16-026test009/kernel/cpt/cpt_mm.h
---- linux-2.6.16.orig/kernel/cpt/cpt_mm.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_mm.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,16 @@
-+int cpt_collect_mm(cpt_context_t *);
-+
-+int cpt_dump_vm(struct cpt_context *ctx);
-+
-+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
-+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx);
-+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
-+
-+int cpt_mm_prepare(unsigned long veid);
-+
-+int cpt_free_pgin_dir(struct cpt_context *);
-+int cpt_start_pagein(struct cpt_context *);
-+int rst_setup_pagein(struct cpt_context *);
-+int rst_complete_pagein(struct cpt_context *, int);
-+int rst_pageind(struct cpt_context *);
-+int rst_swapoff(struct cpt_context *);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_net.c linux-2.6.16-026test009/kernel/cpt/cpt_net.c
---- linux-2.6.16.orig/kernel/cpt/cpt_net.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_net.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,363 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_net.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/netdevice.h>
-+#include <linux/inetdevice.h>
-+#include <linux/rtnetlink.h>
-+#include <linux/ve.h>
-+#include <linux/ve_proto.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_kernel.h"
-+#include "cpt_syscalls.h"
-+
-+int cpt_dump_link(struct cpt_context * ctx)
-+{
-+ struct net_device *dev;
-+
-+ cpt_open_section(ctx, CPT_SECT_NET_DEVICE);
-+ for (dev = dev_base; dev; dev = dev->next) {
-+ struct cpt_netdev_image v;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NET_DEVICE;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_VOID;
-+
-+ v.cpt_index = dev->ifindex;
-+ v.cpt_flags = dev->flags;
-+ memcpy(v.cpt_name, dev->name, IFNAMSIZ);
-+ ctx->write(&v, sizeof(v), ctx);
-+ cpt_close_object(ctx);
-+
-+ if (strcmp(dev->name, "lo") != 0 &&
-+ strcmp(dev->name, "venet0") != 0) {
-+ eprintk_ctx("unsupported netdevice %s\n", dev->name);
-+ cpt_close_section(ctx);
-+ return -EBUSY;
-+ }
-+ }
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+int cpt_suspend_network(struct cpt_context *ctx)
-+{
-+ get_exec_env()->disable_net = 1;
-+ synchronize_net();
-+ return 0;
-+}
-+
-+int cpt_resume_network(struct cpt_context *ctx)
-+{
-+ struct ve_struct *env;
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+ env->disable_net = 0;
-+ put_ve(env);
-+ return 0;
-+}
-+
-+int cpt_dump_ifaddr(struct cpt_context * ctx)
-+{
-+ struct net_device *dev;
-+
-+ cpt_open_section(ctx, CPT_SECT_NET_IFADDR);
-+ for (dev = dev_base; dev; dev = dev->next) {
-+ struct in_device *idev = in_dev_get(dev);
-+ struct in_ifaddr *ifa;
-+
-+ if (!idev)
-+ continue;
-+
-+ for (ifa = idev->ifa_list; ifa; ifa = ifa->ifa_next) {
-+ struct cpt_ifaddr_image v;
-+ cpt_open_object(NULL, ctx);
-+
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NET_IFADDR;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_VOID;
-+
-+ v.cpt_index = dev->ifindex;
-+ v.cpt_family = AF_INET;
-+ v.cpt_masklen = ifa->ifa_prefixlen;
-+ v.cpt_flags = ifa->ifa_flags;
-+ v.cpt_scope = ifa->ifa_scope;
-+ memset(&v.cpt_address, 0, sizeof(v.cpt_address));
-+ memset(&v.cpt_peer, 0, sizeof(v.cpt_peer));
-+ memset(&v.cpt_broadcast, 0, sizeof(v.cpt_broadcast));
-+ v.cpt_address[0] = ifa->ifa_local;
-+ v.cpt_peer[0] = ifa->ifa_address;
-+ v.cpt_broadcast[0] = ifa->ifa_broadcast;
-+ memcpy(v.cpt_label, ifa->ifa_label, IFNAMSIZ);
-+ ctx->write(&v, sizeof(v), ctx);
-+ cpt_close_object(ctx);
-+ }
-+ in_dev_put(idev);
-+ }
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+static int cpt_dump_route(struct cpt_context * ctx)
-+{
-+ int err;
-+ struct socket *sock;
-+ struct msghdr msg;
-+ struct iovec iov;
-+ struct {
-+ struct nlmsghdr nlh;
-+ struct rtgenmsg g;
-+ } req;
-+ struct sockaddr_nl nladdr;
-+ struct cpt_object_hdr v;
-+ mm_segment_t oldfs;
-+ char *pg;
-+
-+ err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
-+ if (err)
-+ return err;
-+
-+ memset(&nladdr, 0, sizeof(nladdr));
-+ nladdr.nl_family = AF_NETLINK;
-+
-+ req.nlh.nlmsg_len = sizeof(req);
-+ req.nlh.nlmsg_type = RTM_GETROUTE;
-+ req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
-+ req.nlh.nlmsg_pid = 0;
-+ req.g.rtgen_family = AF_INET;
-+
-+ iov.iov_base=&req;
-+ iov.iov_len=sizeof(req);
-+ msg.msg_name=&nladdr;
-+ msg.msg_namelen=sizeof(nladdr);
-+ msg.msg_iov=&iov;
-+ msg.msg_iovlen=1;
-+ msg.msg_control=NULL;
-+ msg.msg_controllen=0;
-+ msg.msg_flags=MSG_DONTWAIT;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = sock_sendmsg(sock, &msg, sizeof(req));
-+ set_fs(oldfs);
-+
-+ if (err < 0)
-+ goto out_sock;
-+
-+ pg = (char*)__get_free_page(GFP_KERNEL);
-+ if (pg == NULL) {
-+ err = -ENOMEM;
-+ goto out_sock;
-+ }
-+
-+ cpt_open_section(ctx, CPT_SECT_NET_ROUTE);
-+ cpt_open_object(NULL, ctx);
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NET_ROUTE;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_NLMARRAY;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+
-+ for (;;) {
-+ struct nlmsghdr *h;
-+
-+ iov.iov_base = pg;
-+ iov.iov_len = PAGE_SIZE;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
-+ set_fs(oldfs);
-+
-+ if (err < 0)
-+ goto out_sock_pg;
-+ if (msg.msg_flags & MSG_TRUNC) {
-+ err = -ENOBUFS;
-+ goto out_sock_pg;
-+ }
-+
-+ h = (struct nlmsghdr*)pg;
-+ while (NLMSG_OK(h, err)) {
-+ if (h->nlmsg_type == NLMSG_DONE) {
-+ err = 0;
-+ goto done;
-+ }
-+ if (h->nlmsg_type == NLMSG_ERROR) {
-+ struct nlmsgerr *errm = (struct nlmsgerr*)NLMSG_DATA(h);
-+ err = errm->error;
-+ eprintk_ctx("NLMSG error: %d\n", errm->error);
-+ goto done;
-+ }
-+ if (h->nlmsg_type != RTM_NEWROUTE) {
-+ eprintk_ctx("NLMSG: %d\n", h->nlmsg_type);
-+ err = -EINVAL;
-+ goto done;
-+ }
-+ ctx->write(h, NLMSG_ALIGN(h->nlmsg_len), ctx);
-+ h = NLMSG_NEXT(h, err);
-+ }
-+ if (err) {
-+ eprintk_ctx("!!!Remnant of size %d %d %d\n", err, h->nlmsg_len, h->nlmsg_type);
-+ err = -EINVAL;
-+ break;
-+ }
-+ }
-+done:
-+ cpt_close_object(ctx);
-+ cpt_close_section(ctx);
-+
-+out_sock_pg:
-+ free_page((unsigned long)pg);
-+out_sock:
-+ sock_release(sock);
-+ return err;
-+}
-+
-+static int dumpfn(void *arg)
-+{
-+ int i;
-+ int *pfd = arg;
-+ char *argv[] = { "iptables-save", "-c", NULL };
-+
-+ i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
-+ if (i < 0) {
-+ eprintk("cannot enter ve to dump iptables\n");
-+ module_put(THIS_MODULE);
-+ return 1;
-+ }
-+
-+ if (pfd[1] != 1)
-+ sc_dup2(pfd[1], 1);
-+
-+ for (i=0; i<current->files->fdt->max_fds; i++) {
-+ if (i != 1)
-+ sc_close(i);
-+ }
-+
-+ module_put(THIS_MODULE);
-+
-+ set_fs(KERNEL_DS);
-+ i = sc_execve("/sbin/iptables-save", argv, NULL);
-+ eprintk("failed to exec /sbin/iptables-save: %d\n", i);
-+ return -1;
-+}
-+
-+
-+static int cpt_dump_iptables(struct cpt_context * ctx)
-+{
-+ int err;
-+ int pid;
-+ int pfd[2];
-+ struct file *f;
-+ struct cpt_object_hdr v;
-+ char buf[16];
-+ loff_t pos;
-+ int n;
-+
-+ err = sc_pipe(pfd);
-+ if (err < 0) {
-+ eprintk_ctx("sc_pipe: %d\n", err);
-+ return err;
-+ }
-+ err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
-+ if (err < 0) {
-+ eprintk_ctx("local_kernel_thread: %d\n", err);
-+ goto out;
-+ }
-+ f = fget(pfd[0]);
-+ sc_close(pfd[1]);
-+ sc_close(pfd[0]);
-+
-+ cpt_open_section(ctx, CPT_SECT_NET_IPTABLES);
-+
-+ cpt_open_object(NULL, ctx);
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_NAME;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_NAME;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ pos = ctx->file->f_pos;
-+ do {
-+ mm_segment_t oldfs;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ n = f->f_op->read(f, buf, sizeof(buf), &f->f_pos);
-+ set_fs(oldfs);
-+ if (n > 0)
-+ ctx->write(buf, n, ctx);
-+ } while (n > 0);
-+
-+ if (n < 0)
-+ eprintk_ctx("read: %d\n", n);
-+
-+ fput(f);
-+
-+ if ((err = sc_waitx(pid, 0)) < 0)
-+ eprintk_ctx("wait4: %d\n", err);
-+
-+ if (ctx->file->f_pos != pos) {
-+ buf[0] = 0;
-+ ctx->write(buf, 1, ctx);
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_close_section(ctx);
-+ } else {
-+ pos = ctx->current_section;
-+ cpt_close_object(ctx);
-+ cpt_close_section(ctx);
-+ ctx->sections[CPT_SECT_NET_IPTABLES] = CPT_NULL;
-+ ctx->file->f_pos = pos;
-+ }
-+ return n;
-+
-+out:
-+ if (pfd[1] >= 0)
-+ sc_close(pfd[1]);
-+ if (pfd[0] >= 0)
-+ sc_close(pfd[0]);
-+ return err;
-+}
-+
-+int cpt_dump_ifinfo(struct cpt_context * ctx)
-+{
-+ int err;
-+
-+ err = cpt_dump_link(ctx);
-+ if (!err)
-+ err = cpt_dump_ifaddr(ctx);
-+ if (!err)
-+ err = cpt_dump_route(ctx);
-+ if (!err)
-+ err = cpt_dump_iptables(ctx);
-+ return err;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_net.h linux-2.6.16-026test009/kernel/cpt/cpt_net.h
---- linux-2.6.16.orig/kernel/cpt/cpt_net.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_net.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,7 @@
-+int cpt_dump_ifinfo(struct cpt_context *ctx);
-+int rst_restore_net(struct cpt_context *ctx);
-+int cpt_suspend_network(struct cpt_context *ctx);
-+int cpt_resume_network(struct cpt_context *ctx);
-+int rst_resume_network(struct cpt_context *ctx);
-+int cpt_dump_ip_conntrack(struct cpt_context *ctx);
-+int rst_restore_ip_conntrack(struct cpt_context * ctx);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_obj.c linux-2.6.16-026test009/kernel/cpt/cpt_obj.c
---- linux-2.6.16.orig/kernel/cpt/cpt_obj.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_obj.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,172 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_obj.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = kmalloc(sizeof(cpt_object_t), gfp);
-+ if (obj) {
-+ INIT_LIST_HEAD(&obj->o_list);
-+ INIT_LIST_HEAD(&obj->o_hash);
-+ INIT_LIST_HEAD(&obj->o_alist);
-+ obj->o_count = 1;
-+ obj->o_pos = CPT_NULL;
-+ obj->o_lock = 0;
-+ obj->o_parent = NULL;
-+ obj->o_index = CPT_NOINDEX;
-+ obj->o_obj = NULL;
-+ obj->o_image = NULL;
-+ ctx->objcount++;
-+ }
-+ return obj;
-+}
-+// //EXPORT_SYMBOL(alloc_cpt_object);
-+
-+void free_cpt_object(cpt_object_t *obj, cpt_context_t *ctx)
-+{
-+ list_del(&obj->o_alist);
-+ kfree(obj);
-+ ctx->objcount--;
-+}
-+
-+void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_context_t *ctx)
-+{
-+ list_add_tail(&obj->o_list, &ctx->object_array[type]);
-+}
-+// //EXPORT_SYMBOL(intern_cpt_object);
-+
-+void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj,
-+ cpt_object_t *head, cpt_context_t *ctx)
-+{
-+ list_add(&obj->o_list, &head->o_list);
-+}
-+// //EXPORT_SYMBOL(insert_cpt_object);
-+
-+cpt_object_t * __cpt_object_add(enum _cpt_object_type type, void *p,
-+ unsigned gfp_mask, cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = lookup_cpt_object(type, p, ctx);
-+
-+ if (obj) {
-+ obj->o_count++;
-+ return obj;
-+ }
-+
-+ if ((obj = alloc_cpt_object(gfp_mask, ctx)) != NULL) {
-+ if (p)
-+ cpt_obj_setobj(obj, p, ctx);
-+ intern_cpt_object(type, obj, ctx);
-+ return obj;
-+ }
-+ return NULL;
-+}
-+// //EXPORT_SYMBOL(__cpt_object_add);
-+
-+cpt_object_t * cpt_object_add(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
-+{
-+ return __cpt_object_add(type, p, GFP_KERNEL, ctx);
-+}
-+// //EXPORT_SYMBOL(cpt_object_add);
-+
-+cpt_object_t * cpt_object_get(enum _cpt_object_type type, void *p, cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = lookup_cpt_object(type, p, ctx);
-+
-+ if (obj)
-+ obj->o_count++;
-+
-+ return obj;
-+}
-+// //EXPORT_SYMBOL(cpt_object_get);
-+
-+int cpt_object_init(cpt_context_t *ctx)
-+{
-+ int i;
-+
-+ for (i=0; i<CPT_OBJ_MAX; i++) {
-+ INIT_LIST_HEAD(&ctx->object_array[i]);
-+ }
-+ return 0;
-+}
-+
-+int cpt_object_destroy(cpt_context_t *ctx)
-+{
-+ int i;
-+
-+ for (i=0; i<CPT_OBJ_MAX; i++) {
-+ while (!list_empty(&ctx->object_array[i])) {
-+ struct list_head *head = ctx->object_array[i].next;
-+ cpt_object_t *obj = list_entry(head, cpt_object_t, o_list);
-+ list_del(head);
-+ if (obj->o_image)
-+ kfree(obj->o_image);
-+ free_cpt_object(obj, ctx);
-+ }
-+ }
-+ if (ctx->objcount != 0)
-+ eprintk_ctx("BUG: ctx->objcount=%d\n", ctx->objcount);
-+ return 0;
-+}
-+
-+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, type) {
-+ if (obj->o_obj == p)
-+ return obj;
-+ }
-+ return NULL;
-+}
-+// //EXPORT_SYMBOL(lookup_cpt_object);
-+
-+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, type) {
-+ if (obj->o_pos == pos)
-+ return obj;
-+ }
-+ return NULL;
-+}
-+// //EXPORT_SYMBOL(lookup_cpt_obj_bypos);
-+
-+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, type) {
-+ if (obj->o_index == index)
-+ return obj;
-+ }
-+ return NULL;
-+}
-+// //EXPORT_SYMBOL(lookup_cpt_obj_byindex);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_obj.h linux-2.6.16-026test009/kernel/cpt/cpt_obj.h
---- linux-2.6.16.orig/kernel/cpt/cpt_obj.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_obj.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,62 @@
-+#ifndef __CPT_OBJ_H_
-+#define __CPT_OBJ_H_ 1
-+
-+#include <linux/list.h>
-+#include <linux/cpt_image.h>
-+
-+typedef struct _cpt_object
-+{
-+ struct list_head o_list;
-+ struct list_head o_hash;
-+ int o_count;
-+ int o_index;
-+ int o_lock;
-+ loff_t o_pos;
-+ loff_t o_ppos;
-+ void *o_obj;
-+ void *o_image;
-+ void *o_parent;
-+ struct list_head o_alist;
-+} cpt_object_t;
-+
-+struct cpt_context;
-+
-+#define for_each_object(obj, type) list_for_each_entry(obj, &ctx->object_array[type], o_list)
-+
-+
-+extern cpt_object_t *alloc_cpt_object(int gfp, struct cpt_context *ctx);
-+extern void free_cpt_object(cpt_object_t *obj, struct cpt_context *ctx);
-+
-+cpt_object_t *lookup_cpt_object(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
-+cpt_object_t *lookup_cpt_obj_bypos(enum _cpt_object_type type, loff_t pos, struct cpt_context *ctx);
-+cpt_object_t *lookup_cpt_obj_byindex(enum _cpt_object_type type, __u32 index, struct cpt_context *ctx);
-+
-+static inline void cpt_obj_setpos(cpt_object_t *cpt, loff_t pos, struct cpt_context *ctx)
-+{
-+ cpt->o_pos = pos;
-+ /* Add to pos hash table */
-+}
-+
-+static inline void cpt_obj_setobj(cpt_object_t *cpt, void *ptr, struct cpt_context *ctx)
-+{
-+ cpt->o_obj = ptr;
-+ /* Add to hash table */
-+}
-+
-+static inline void cpt_obj_setindex(cpt_object_t *cpt, __u32 index, struct cpt_context *ctx)
-+{
-+ cpt->o_index = index;
-+ /* Add to index hash table */
-+}
-+
-+
-+extern void intern_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, struct cpt_context *ctx);
-+extern void insert_cpt_object(enum _cpt_object_type type, cpt_object_t *obj, cpt_object_t *head, struct cpt_context *ctx);
-+extern cpt_object_t *cpt_object_add(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
-+extern cpt_object_t *__cpt_object_add(enum _cpt_object_type type, void *p, unsigned int gfp_mask, struct cpt_context *ctx);
-+extern cpt_object_t *cpt_object_get(enum _cpt_object_type type, void *p, struct cpt_context *ctx);
-+
-+extern int cpt_object_init(struct cpt_context *ctx);
-+extern int cpt_object_destroy(struct cpt_context *ctx);
-+
-+#endif /* __CPT_OBJ_H_ */
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_proc.c linux-2.6.16-026test009/kernel/cpt/cpt_proc.c
---- linux-2.6.16.orig/kernel/cpt/cpt_proc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_proc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,577 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_proc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/list.h>
-+#include <linux/proc_fs.h>
-+#include <linux/smp_lock.h>
-+#include <asm/uaccess.h>
-+#include <linux/cpt_ioctl.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_dump.h"
-+#include "cpt_mm.h"
-+#include "cpt_kernel.h"
-+
-+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
-+MODULE_LICENSE("GPL");
-+
-+/* List of contexts and lock protecting the list */
-+struct list_head cpt_context_list;
-+spinlock_t cpt_context_lock;
-+
-+static int proc_read(char *buffer, char **start, off_t offset,
-+ int length, int *eof, void *data)
-+{
-+ off_t pos = 0;
-+ off_t begin = 0;
-+ int len = 0;
-+ cpt_context_t *ctx;
-+
-+ len += sprintf(buffer, "Ctx Id VE State\n");
-+
-+ spin_lock(&cpt_context_lock);
-+
-+ list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
-+ len += sprintf(buffer+len,"%p %08x %-8u %d",
-+ ctx,
-+ ctx->contextid,
-+ ctx->ve_id,
-+ ctx->ctx_state
-+ );
-+
-+ buffer[len++] = '\n';
-+
-+ pos = begin+len;
-+ if (pos < offset) {
-+ len = 0;
-+ begin = pos;
-+ }
-+ if (pos > offset+length)
-+ goto done;
-+ }
-+ *eof = 1;
-+
-+done:
-+ spin_unlock(&cpt_context_lock);
-+ *start = buffer + (offset - begin);
-+ len -= (offset - begin);
-+ if(len > length)
-+ len = length;
-+ if(len < 0)
-+ len = 0;
-+ return len;
-+}
-+
-+void cpt_context_release(cpt_context_t *ctx)
-+{
-+ list_del(&ctx->ctx_list);
-+ spin_unlock(&cpt_context_lock);
-+
-+ if (ctx->ctx_state > 0)
-+ cpt_resume(ctx);
-+ ctx->ctx_state = CPT_CTX_ERROR;
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ if (ctx->pgin_task)
-+ put_task_struct(ctx->pgin_task);
-+ if (ctx->pgin_dir)
-+ cpt_free_pgin_dir(ctx);
-+ if (ctx->pagein_file_out)
-+ fput(ctx->pagein_file_out);
-+ if (ctx->pagein_file_in)
-+ fput(ctx->pagein_file_in);
-+#endif
-+ if (ctx->objcount)
-+ eprintk_ctx("%d objects leaked\n", ctx->objcount);
-+ if (ctx->file)
-+ fput(ctx->file);
-+ cpt_flush_error(ctx);
-+ if (ctx->errorfile) {
-+ fput(ctx->errorfile);
-+ ctx->errorfile = NULL;
-+ }
-+ if (ctx->error_msg) {
-+ free_page((unsigned long)ctx->error_msg);
-+ ctx->error_msg = NULL;
-+ }
-+ if (ctx->statusfile)
-+ fput(ctx->statusfile);
-+ if (ctx->lockfile)
-+ fput(ctx->lockfile);
-+ kfree(ctx);
-+
-+ spin_lock(&cpt_context_lock);
-+}
-+
-+static void __cpt_context_put(cpt_context_t *ctx)
-+{
-+ if (!--ctx->refcount)
-+ cpt_context_release(ctx);
-+}
-+
-+static void cpt_context_put(cpt_context_t *ctx)
-+{
-+ spin_lock(&cpt_context_lock);
-+ __cpt_context_put(ctx);
-+ spin_unlock(&cpt_context_lock);
-+}
-+
-+cpt_context_t * cpt_context_open(void)
-+{
-+ cpt_context_t *ctx;
-+
-+ if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
-+ cpt_context_init(ctx);
-+ spin_lock(&cpt_context_lock);
-+ list_add_tail(&ctx->ctx_list, &cpt_context_list);
-+ spin_unlock(&cpt_context_lock);
-+ ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
-+ if (ctx->error_msg != NULL)
-+ ctx->error_msg[0] = 0;
-+ }
-+ return ctx;
-+}
-+
-+cpt_context_t * cpt_context_lookup(unsigned int contextid)
-+{
-+ cpt_context_t *ctx;
-+
-+ spin_lock(&cpt_context_lock);
-+ list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
-+ if (ctx->contextid == contextid) {
-+ ctx->refcount++;
-+ spin_unlock(&cpt_context_lock);
-+ return ctx;
-+ }
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ return NULL;
-+}
-+
-+int cpt_context_lookup_veid(unsigned int veid)
-+{
-+ cpt_context_t *ctx;
-+
-+ spin_lock(&cpt_context_lock);
-+ list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
-+ if (ctx->ve_id == veid && ctx->ctx_state > 0) {
-+ spin_unlock(&cpt_context_lock);
-+ return 1;
-+ }
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ return 0;
-+}
-+
-+static int cpt_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
-+{
-+ int err = 0;
-+ cpt_context_t *ctx;
-+ struct file *dfile = NULL;
-+
-+ unlock_kernel();
-+
-+ if (cmd == CPT_VMPREP) {
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ err = cpt_mm_prepare(arg);
-+#else
-+ err = -EINVAL;
-+#endif
-+ goto out_lock;
-+ }
-+
-+ if (cmd == CPT_TEST_CAPS) {
-+ unsigned int src_flags, dst_flags = arg;
-+
-+ err = 0;
-+ src_flags = test_cpu_caps();
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
-+ goto out_lock;
-+ }
-+
-+ if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
-+ cpt_context_t *old_ctx;
-+
-+ ctx = NULL;
-+ if (cmd == CPT_JOIN_CONTEXT) {
-+ err = -ENOENT;
-+ ctx = cpt_context_lookup(arg);
-+ if (!ctx)
-+ goto out_lock;
-+ }
-+
-+ spin_lock(&cpt_context_lock);
-+ old_ctx = (cpt_context_t*)file->private_data;
-+ file->private_data = ctx;
-+
-+ if (old_ctx) {
-+ if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
-+ old_ctx->sticky = 0;
-+ old_ctx->refcount--;
-+ }
-+ __cpt_context_put(old_ctx);
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ err = 0;
-+ goto out_lock;
-+ }
-+
-+ spin_lock(&cpt_context_lock);
-+ ctx = (cpt_context_t*)file->private_data;
-+ if (ctx)
-+ ctx->refcount++;
-+ spin_unlock(&cpt_context_lock);
-+
-+ if (!ctx) {
-+ cpt_context_t *old_ctx;
-+
-+ err = -ENOMEM;
-+ ctx = cpt_context_open();
-+ if (!ctx)
-+ goto out_lock;
-+
-+ spin_lock(&cpt_context_lock);
-+ old_ctx = (cpt_context_t*)file->private_data;
-+ if (!old_ctx) {
-+ ctx->refcount++;
-+ file->private_data = ctx;
-+ } else {
-+ old_ctx->refcount++;
-+ }
-+ if (old_ctx) {
-+ __cpt_context_put(ctx);
-+ ctx = old_ctx;
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ }
-+
-+ if (cmd == CPT_GET_CONTEXT) {
-+ unsigned int contextid = (unsigned int)arg;
-+
-+ if (ctx->contextid && ctx->contextid != contextid) {
-+ err = -EINVAL;
-+ goto out_nosem;
-+ }
-+ if (!ctx->contextid) {
-+ cpt_context_t *c1 = cpt_context_lookup(contextid);
-+ if (c1) {
-+ cpt_context_put(c1);
-+ err = -EEXIST;
-+ goto out_nosem;
-+ }
-+ ctx->contextid = contextid;
-+ }
-+ spin_lock(&cpt_context_lock);
-+ if (!ctx->sticky) {
-+ ctx->sticky = 1;
-+ ctx->refcount++;
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ goto out_nosem;
-+ }
-+
-+ down(&ctx->main_sem);
-+
-+ err = -EBUSY;
-+ if (ctx->ctx_state < 0)
-+ goto out;
-+
-+ err = 0;
-+ switch (cmd) {
-+ case CPT_SET_DUMPFD:
-+ if (ctx->ctx_state == CPT_CTX_DUMPING) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ if (dfile->f_op == NULL ||
-+ dfile->f_op->write == NULL) {
-+ fput(dfile);
-+ err = -EBADF;
-+ break;
-+ }
-+ }
-+ if (ctx->file)
-+ fput(ctx->file);
-+ ctx->file = dfile;
-+ break;
-+ case CPT_SET_ERRORFD:
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->errorfile)
-+ fput(ctx->errorfile);
-+ ctx->errorfile = dfile;
-+ break;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ case CPT_SET_PAGEINFDIN:
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->pagein_file_in)
-+ fput(ctx->pagein_file_in);
-+ ctx->pagein_file_in = dfile;
-+ break;
-+ case CPT_SET_PAGEINFDOUT:
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->pagein_file_out)
-+ fput(ctx->pagein_file_out);
-+ ctx->pagein_file_out = dfile;
-+ break;
-+ case CPT_SET_LAZY:
-+ ctx->lazy_vm = arg;
-+ break;
-+ case CPT_PAGEIND:
-+ err = cpt_start_pagein(ctx);
-+ break;
-+#endif
-+ case CPT_SET_VEID:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ ctx->ve_id = arg;
-+ break;
-+ case CPT_SET_CPU_FLAGS:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ ctx->dst_cpu_flags = arg;
-+ ctx->src_cpu_flags = test_cpu_caps();
-+ break;
-+ case CPT_SUSPEND:
-+ if (cpt_context_lookup_veid(ctx->ve_id) ||
-+ ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ ctx->ctx_state = CPT_CTX_SUSPENDING;
-+ err = cpt_vps_suspend(ctx);
-+ if (err) {
-+ if (cpt_resume(ctx) == 0)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ } else {
-+ ctx->ctx_state = CPT_CTX_SUSPENDED;
-+ }
-+ break;
-+ case CPT_DUMP:
-+ if (!ctx->ctx_state) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ err = cpt_dump(ctx);
-+ break;
-+ case CPT_RESUME:
-+ if (ctx->ctx_state == CPT_CTX_IDLE) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ err = cpt_resume(ctx);
-+ if (!err)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ break;
-+ case CPT_KILL:
-+ if (ctx->ctx_state == CPT_CTX_IDLE) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ err = cpt_kill(ctx);
-+ if (!err)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ break;
-+ case CPT_TEST_VECAPS:
-+ {
-+ __u32 dst_flags = arg;
-+ __u32 src_flags;
-+
-+ err = cpt_vps_caps(ctx, &src_flags);
-+ if (err)
-+ break;
-+
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SSE2, "sse2", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_MMX, "mmx", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW, "3dnow", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_3DNOW2, "3dnowext", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SEP, "sysenter", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_EMT64, "emt64", err);
-+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
-+ break;
-+ }
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+
-+out:
-+ cpt_flush_error(ctx);
-+ up(&ctx->main_sem);
-+out_nosem:
-+ cpt_context_put(ctx);
-+out_lock:
-+ lock_kernel();
-+ return err;
-+}
-+
-+static int cpt_open(struct inode *inode, struct file *file)
-+{
-+ if (!try_module_get(THIS_MODULE))
-+ return -EBUSY;
-+
-+ return 0;
-+}
-+
-+static int cpt_release(struct inode * inode, struct file * file)
-+{
-+ cpt_context_t *ctx;
-+
-+ spin_lock(&cpt_context_lock);
-+ ctx = (cpt_context_t*)file->private_data;
-+ file->private_data = NULL;
-+
-+ if (ctx)
-+ __cpt_context_put(ctx);
-+ spin_unlock(&cpt_context_lock);
-+
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+
-+static struct file_operations cpt_fops = {
-+ .owner = THIS_MODULE,
-+ .open = cpt_open,
-+ .release = cpt_release,
-+ .ioctl = cpt_ioctl,
-+};
-+
-+static struct proc_dir_entry *proc_ent;
-+
-+int debug_level = 1;
-+
-+static struct ctl_table_header *ctl_header;
-+
-+static ctl_table debug_table[] = {
-+ {
-+ .ctl_name = 9475,
-+ .procname = "cpt",
-+ .data = &debug_level,
-+ .maxlen = sizeof(debug_level),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ { .ctl_name = 0 }
-+};
-+static ctl_table root_table[] = {
-+ {
-+ .ctl_name = CTL_DEBUG,
-+ .procname = "debug",
-+ .mode = 0555,
-+ .child = debug_table,
-+ },
-+ { .ctl_name = 0 }
-+};
-+
-+static int __init init_cpt(void)
-+{
-+ int err;
-+
-+ err = -ENOMEM;
-+ ctl_header = register_sysctl_table(root_table, 0);
-+ if (!ctl_header)
-+ goto err_mon;
-+
-+ spin_lock_init(&cpt_context_lock);
-+ INIT_LIST_HEAD(&cpt_context_list);
-+
-+ err = -EINVAL;
-+ proc_ent = create_proc_entry("cpt", 0600, NULL);
-+ if (!proc_ent)
-+ goto err_out;
-+
-+ cpt_fops.read = proc_ent->proc_fops->read;
-+ cpt_fops.write = proc_ent->proc_fops->write;
-+ cpt_fops.llseek = proc_ent->proc_fops->llseek;
-+ proc_ent->proc_fops = &cpt_fops;
-+
-+ proc_ent->read_proc = proc_read;
-+ proc_ent->data = NULL;
-+ proc_ent->owner = THIS_MODULE;
-+ return 0;
-+
-+err_out:
-+ unregister_sysctl_table(ctl_header);
-+err_mon:
-+ return err;
-+}
-+module_init(init_cpt);
-+
-+static void __exit exit_cpt(void)
-+{
-+ remove_proc_entry("cpt", NULL);
-+ unregister_sysctl_table(ctl_header);
-+
-+ spin_lock(&cpt_context_lock);
-+ while (!list_empty(&cpt_context_list)) {
-+ cpt_context_t *ctx;
-+ ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
-+
-+ if (!ctx->sticky)
-+ ctx->refcount++;
-+ ctx->sticky = 0;
-+
-+ BUG_ON(ctx->refcount != 1);
-+
-+ __cpt_context_put(ctx);
-+ }
-+ spin_unlock(&cpt_context_lock);
-+}
-+module_exit(exit_cpt);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_process.c linux-2.6.16-026test009/kernel/cpt/cpt_process.c
---- linux-2.6.16.orig/kernel/cpt/cpt_process.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_process.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,964 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_process.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/compat.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_ubc.h"
-+#include "cpt_process.h"
-+#include "cpt_kernel.h"
-+
-+#ifdef CONFIG_X86_32
-+#undef task_pt_regs
-+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
-+#endif
-+
-+static u32 encode_segment(u32 segreg)
-+{
-+ segreg &= 0xFFFF;
-+
-+ if (segreg == 0)
-+ return CPT_SEG_ZERO;
-+ if ((segreg & 3) != 3) {
-+ wprintk("Invalid RPL of a segment reg %x\n", segreg);
-+ return CPT_SEG_ZERO;
-+ }
-+
-+ /* LDT descriptor, it is just an index to LDT array */
-+ if (segreg & 4)
-+ return CPT_SEG_LDT + (segreg >> 3);
-+
-+ /* TLS descriptor. */
-+ if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
-+ (segreg >> 3) <= GDT_ENTRY_TLS_MAX)
-+ return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
-+
-+ /* One of standard desriptors */
-+#ifdef CONFIG_X86_64
-+ if (segreg == __USER32_DS)
-+ return CPT_SEG_USER32_DS;
-+ if (segreg == __USER32_CS)
-+ return CPT_SEG_USER32_CS;
-+ if (segreg == __USER_DS)
-+ return CPT_SEG_USER64_DS;
-+ if (segreg == __USER_CS)
-+ return CPT_SEG_USER64_CS;
-+#else
-+ if (segreg == __USER_DS)
-+ return CPT_SEG_USER32_DS;
-+ if (segreg == __USER_CS)
-+ return CPT_SEG_USER32_CS;
-+#endif
-+ wprintk("Invalid segment reg %x\n", segreg);
-+ return CPT_SEG_ZERO;
-+}
-+
-+#ifdef CONFIG_X86_64
-+static void xlate_ptregs_64_to_32(struct cpt_x86_regs *d, struct pt_regs *s, task_t *tsk)
-+{
-+ d->cpt_ebp = s->rbp;
-+ d->cpt_ebx = s->rbx;
-+ d->cpt_eax = s->rax;
-+ d->cpt_ecx = s->rcx;
-+ d->cpt_edx = s->rdx;
-+ d->cpt_esi = s->rsi;
-+ d->cpt_edi = s->rdi;
-+ d->cpt_orig_eax = s->orig_rax;
-+ d->cpt_eip = s->rip;
-+ d->cpt_xcs = encode_segment(s->cs);
-+ d->cpt_eflags = s->eflags;
-+ d->cpt_esp = s->rsp;
-+ d->cpt_xss = encode_segment(s->ss);
-+ d->cpt_xds = encode_segment(tsk->thread.ds);
-+ d->cpt_xes = encode_segment(tsk->thread.es);
-+}
-+
-+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
-+{
-+ cpt_open_object(NULL, ctx);
-+
-+ if (tsk->thread_info->flags&_TIF_IA32) {
-+ struct cpt_x86_regs ri;
-+ ri.cpt_next = sizeof(ri);
-+ ri.cpt_object = CPT_OBJ_X86_REGS;
-+ ri.cpt_hdrlen = sizeof(ri);
-+ ri.cpt_content = CPT_CONTENT_VOID;
-+
-+ ri.cpt_debugreg[0] = tsk->thread.debugreg0;
-+ ri.cpt_debugreg[1] = tsk->thread.debugreg1;
-+ ri.cpt_debugreg[2] = tsk->thread.debugreg2;
-+ ri.cpt_debugreg[3] = tsk->thread.debugreg3;
-+ ri.cpt_debugreg[4] = 0;
-+ ri.cpt_debugreg[5] = 0;
-+ ri.cpt_debugreg[6] = tsk->thread.debugreg6;
-+ ri.cpt_debugreg[7] = tsk->thread.debugreg7;
-+ ri.cpt_fs = encode_segment(tsk->thread.fsindex);
-+ ri.cpt_gs = encode_segment(tsk->thread.gsindex);
-+
-+ xlate_ptregs_64_to_32(&ri, task_pt_regs(tsk), tsk);
-+
-+ ctx->write(&ri, sizeof(ri), ctx);
-+ } else {
-+ struct cpt_x86_64_regs ri;
-+ ri.cpt_next = sizeof(ri);
-+ ri.cpt_object = CPT_OBJ_X86_64_REGS;
-+ ri.cpt_hdrlen = sizeof(ri);
-+ ri.cpt_content = CPT_CONTENT_VOID;
-+
-+ ri.cpt_fsbase = tsk->thread.fs;
-+ ri.cpt_gsbase = tsk->thread.gs;
-+ ri.cpt_fsindex = encode_segment(tsk->thread.fsindex);
-+ ri.cpt_gsindex = encode_segment(tsk->thread.gsindex);
-+ ri.cpt_ds = encode_segment(tsk->thread.ds);
-+ ri.cpt_es = encode_segment(tsk->thread.es);
-+ ri.cpt_debugreg[0] = tsk->thread.debugreg0;
-+ ri.cpt_debugreg[1] = tsk->thread.debugreg1;
-+ ri.cpt_debugreg[2] = tsk->thread.debugreg2;
-+ ri.cpt_debugreg[3] = tsk->thread.debugreg3;
-+ ri.cpt_debugreg[4] = 0;
-+ ri.cpt_debugreg[5] = 0;
-+ ri.cpt_debugreg[6] = tsk->thread.debugreg6;
-+ ri.cpt_debugreg[7] = tsk->thread.debugreg7;
-+
-+ memcpy(&ri.cpt_r15, task_pt_regs(tsk), sizeof(struct pt_regs));
-+
-+ ri.cpt_cs = encode_segment(task_pt_regs(tsk)->cs);
-+ ri.cpt_ss = encode_segment(task_pt_regs(tsk)->ss);
-+
-+ ctx->write(&ri, sizeof(ri), ctx);
-+
-+#if 0
-+ if (ri.cpt_rip >= VSYSCALL_START && ri.cpt_rip < VSYSCALL_END) {
-+ eprintk_ctx(CPT_FID "cannot be checkpointied while vsyscall, try later\n", CPT_TID(tsk));
-+ return -EAGAIN;
-+ }
-+#endif
-+ }
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+
-+#else
-+
-+static int dump_registers(task_t *tsk, struct cpt_context *ctx)
-+{
-+ struct cpt_x86_regs ri;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ ri.cpt_next = sizeof(ri);
-+ ri.cpt_object = CPT_OBJ_X86_REGS;
-+ ri.cpt_hdrlen = sizeof(ri);
-+ ri.cpt_content = CPT_CONTENT_VOID;
-+
-+ ri.cpt_debugreg[0] = tsk->thread.debugreg[0];
-+ ri.cpt_debugreg[1] = tsk->thread.debugreg[1];
-+ ri.cpt_debugreg[2] = tsk->thread.debugreg[2];
-+ ri.cpt_debugreg[3] = tsk->thread.debugreg[3];
-+ ri.cpt_debugreg[4] = tsk->thread.debugreg[4];
-+ ri.cpt_debugreg[5] = tsk->thread.debugreg[5];
-+ ri.cpt_debugreg[6] = tsk->thread.debugreg[6];
-+ ri.cpt_debugreg[7] = tsk->thread.debugreg[7];
-+ ri.cpt_fs = encode_segment(tsk->thread.fs);
-+ ri.cpt_gs = encode_segment(tsk->thread.gs);
-+
-+ memcpy(&ri.cpt_ebx, task_pt_regs(tsk), sizeof(struct pt_regs));
-+
-+ ri.cpt_xcs = encode_segment(task_pt_regs(tsk)->xcs);
-+ ri.cpt_xss = encode_segment(task_pt_regs(tsk)->xss);
-+ ri.cpt_xds = encode_segment(task_pt_regs(tsk)->xds);
-+ ri.cpt_xes = encode_segment(task_pt_regs(tsk)->xes);
-+
-+ ctx->write(&ri, sizeof(ri), ctx);
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+#endif
-+
-+static int dump_kstack(task_t *tsk, struct cpt_context *ctx)
-+{
-+ struct cpt_obj_bits hdr;
-+ unsigned long size;
-+ void *start;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+#ifdef CONFIG_X86_64
-+ size = tsk->thread.rsp0 - tsk->thread.rsp;
-+ start = (void*)tsk->thread.rsp;
-+#else
-+ size = tsk->thread.esp0 - tsk->thread.esp;
-+ start = (void*)tsk->thread.esp;
-+#endif
-+
-+ hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
-+ hdr.cpt_object = CPT_OBJ_BITS;
-+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_content = CPT_CONTENT_STACK;
-+ hdr.cpt_size = size;
-+
-+ ctx->write(&hdr, sizeof(hdr), ctx);
-+ ctx->write(start, size, ctx);
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+/* Formats of i387_fxsave_struct are the same for x86_64
-+ * and i386. Plain luck. */
-+
-+static int dump_fpustate(task_t *tsk, struct cpt_context *ctx)
-+{
-+ struct cpt_obj_bits hdr;
-+ unsigned long size;
-+ int type;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ type = CPT_CONTENT_X86_FPUSTATE;
-+ size = sizeof(struct i387_fxsave_struct);
-+#ifndef CONFIG_X86_64
-+ if (!cpu_has_fxsr) {
-+ size = sizeof(struct i387_fsave_struct);
-+ type = CPT_CONTENT_X86_FPUSTATE_OLD;
-+ }
-+#endif
-+
-+ hdr.cpt_next = sizeof(hdr) + CPT_ALIGN(size);
-+ hdr.cpt_object = CPT_OBJ_BITS;
-+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_content = type;
-+ hdr.cpt_size = size;
-+
-+ ctx->write(&hdr, sizeof(hdr), ctx);
-+ ctx->write(&tsk->thread.i387, size, ctx);
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+static int encode_siginfo(struct cpt_siginfo_image *si, siginfo_t *info)
-+{
-+ si->cpt_signo = info->si_signo;
-+ si->cpt_errno = info->si_errno;
-+ si->cpt_code = info->si_code;
-+
-+ switch(si->cpt_code & __SI_MASK) {
-+ case __SI_TIMER:
-+ si->cpt_pid = info->si_tid;
-+ si->cpt_uid = info->si_overrun;
-+ si->cpt_sigval = cpt_ptr_export(info->_sifields._timer._sigval.sival_ptr);
-+ si->cpt_utime = info->si_sys_private;
-+ break;
-+ case __SI_POLL:
-+ si->cpt_pid = info->si_band;
-+ si->cpt_uid = info->si_fd;
-+ break;
-+ case __SI_FAULT:
-+ si->cpt_sigval = cpt_ptr_export(info->si_addr);
-+#ifdef __ARCH_SI_TRAPNO
-+ si->cpt_pid = info->si_trapno;
-+#endif
-+ break;
-+ case __SI_CHLD:
-+ si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_PID, info->si_pid);
-+ si->cpt_uid = info->si_uid;
-+ si->cpt_sigval = info->si_status;
-+ si->cpt_stime = info->si_stime;
-+ si->cpt_utime = info->si_utime;
-+ break;
-+ case __SI_KILL:
-+ case __SI_RT:
-+ case __SI_MESGQ:
-+ default:
-+ si->cpt_pid = is_virtual_pid(info->si_pid) ? info->si_pid : pid_type_to_vpid(PIDTYPE_TGID, info->si_pid);
-+ si->cpt_uid = info->si_uid;
-+ si->cpt_sigval = cpt_ptr_export(info->si_ptr);
-+ break;
-+ }
-+ return 0;
-+}
-+
-+static int dump_sigqueue(struct sigpending *list, struct cpt_context *ctx)
-+{
-+ struct sigqueue *q;
-+ loff_t saved_obj;
-+
-+ if (list_empty(&list->list))
-+ return 0;
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ list_for_each_entry(q, &list->list, list) {
-+ struct cpt_siginfo_image si;
-+
-+ si.cpt_next = sizeof(si);
-+ si.cpt_object = CPT_OBJ_SIGINFO;
-+ si.cpt_hdrlen = sizeof(si);
-+ si.cpt_content = CPT_CONTENT_VOID;
-+
-+ si.cpt_qflags = q->flags;
-+ si.cpt_user = q->user->uid;
-+
-+ if (encode_siginfo(&si, &q->info))
-+ return -EINVAL;
-+
-+ ctx->write(&si, sizeof(si), ctx);
-+ }
-+ cpt_pop_object(&saved_obj, ctx);
-+ return 0;
-+}
-+
-+
-+
-+static int dump_one_signal_struct(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct signal_struct *sig = obj->o_obj;
-+ struct cpt_signal_image *v = cpt_get_buf(ctx);
-+ task_t *tsk;
-+ int i;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_SIGNAL_STRUCT;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ if (sig->pgrp <= 0) {
-+ eprintk_ctx("bad pgid\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_pgrp_type = CPT_PGRP_NORMAL;
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->pgrp);
-+ if (tsk == NULL)
-+ v->cpt_pgrp_type = CPT_PGRP_ORPHAN;
-+ read_unlock(&tasklist_lock);
-+ v->cpt_pgrp = pid_type_to_vpid(PIDTYPE_PGID, sig->pgrp);
-+
-+ v->cpt_old_pgrp = 0;
-+ if (sig->tty_old_pgrp < 0) {
-+ eprintk_ctx("bad tty_old_pgrp\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ if (sig->tty_old_pgrp > 0) {
-+ v->cpt_old_pgrp_type = CPT_PGRP_NORMAL;
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->tty_old_pgrp);
-+ if (tsk == NULL) {
-+ v->cpt_old_pgrp_type = CPT_PGRP_ORPHAN;
-+ tsk = find_task_by_pid_type_ve(PIDTYPE_PGID, sig->tty_old_pgrp);
-+ }
-+ read_unlock(&tasklist_lock);
-+ if (tsk == NULL) {
-+ eprintk_ctx("tty_old_pgrp does not exist anymore\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, sig->tty_old_pgrp);
-+ if ((int)v->cpt_old_pgrp < 0) {
-+ dprintk_ctx("stray tty_old_pgrp %d\n", sig->tty_old_pgrp);
-+ v->cpt_old_pgrp = -1;
-+ v->cpt_old_pgrp_type = CPT_PGRP_STRAY;
-+ }
-+ }
-+
-+ if (sig->session <= 0) {
-+ eprintk_ctx("bad session\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_session_type = CPT_PGRP_NORMAL;
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_type_ve(PIDTYPE_PID, sig->session);
-+ if (tsk == NULL)
-+ v->cpt_session_type = CPT_PGRP_ORPHAN;
-+ read_unlock(&tasklist_lock);
-+ v->cpt_session = pid_type_to_vpid(PIDTYPE_SID, sig->session);
-+
-+ v->cpt_leader = sig->leader;
-+ v->cpt_ctty = CPT_NULL;
-+ if (sig->tty) {
-+ cpt_object_t *cobj = lookup_cpt_object(CPT_OBJ_TTY, sig->tty, ctx);
-+ if (cobj)
-+ v->cpt_ctty = cobj->o_pos;
-+ else {
-+ eprintk_ctx("controlling tty is not found\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ }
-+ memcpy(&v->cpt_sigpending, &sig->shared_pending.signal, 8);
-+
-+ v->cpt_curr_target = 0;
-+ if (sig->curr_target)
-+ v->cpt_curr_target = virt_pid(sig->curr_target);
-+ v->cpt_group_exit = ((sig->flags & SIGNAL_GROUP_EXIT) != 0);
-+ v->cpt_group_exit_code = sig->group_exit_code;
-+ v->cpt_group_exit_task = 0;
-+ if (sig->group_exit_task)
-+ v->cpt_group_exit_task = virt_pid(sig->group_exit_task);
-+ v->cpt_notify_count = sig->notify_count;
-+ v->cpt_group_stop_count = sig->group_stop_count;
-+
-+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,8)
-+ v->cpt_utime = sig->utime;
-+ v->cpt_stime = sig->stime;
-+ v->cpt_cutime = sig->cutime;
-+ v->cpt_cstime = sig->cstime;
-+ v->cpt_nvcsw = sig->nvcsw;
-+ v->cpt_nivcsw = sig->nivcsw;
-+ v->cpt_cnvcsw = sig->cnvcsw;
-+ v->cpt_cnivcsw = sig->cnivcsw;
-+ v->cpt_min_flt = sig->min_flt;
-+ v->cpt_maj_flt = sig->maj_flt;
-+ v->cpt_cmin_flt = sig->cmin_flt;
-+ v->cpt_cmaj_flt = sig->cmaj_flt;
-+
-+ if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
-+ __asm__("undefined\n");
-+
-+ for (i=0; i<CPT_RLIM_NLIMITS; i++) {
-+ if (i < RLIM_NLIMITS) {
-+ v->cpt_rlim_cur[i] = sig->rlim[i].rlim_cur;
-+ v->cpt_rlim_max[i] = sig->rlim[i].rlim_max;
-+ } else {
-+ v->cpt_rlim_cur[i] = CPT_NULL;
-+ v->cpt_rlim_max[i] = CPT_NULL;
-+ }
-+ }
-+#endif
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ dump_sigqueue(&sig->shared_pending, ctx);
-+
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+
-+static int dump_one_process(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ task_t *tsk = obj->o_obj;
-+ task_t *next_tsk;
-+ struct cpt_task_image *v = cpt_get_buf(ctx);
-+ cpt_object_t *tobj;
-+ cpt_object_t *tg_obj;
-+ loff_t saved_obj;
-+ int i;
-+ int err;
-+ struct timespec delta;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_signal = CPT_NULL;
-+ tg_obj = lookup_cpt_object(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx);
-+ if (!tg_obj) BUG();
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_TASK;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_state = tsk->state;
-+ if (tsk->state == EXIT_ZOMBIE) {
-+ eprintk_ctx("invalid zombie state on" CPT_FID "\n", CPT_TID(tsk));
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ } else if (tsk->state == EXIT_DEAD) {
-+ if (tsk->exit_state != EXIT_DEAD &&
-+ tsk->exit_state != EXIT_ZOMBIE) {
-+ eprintk_ctx("invalid exit_state %ld on" CPT_FID "\n", tsk->exit_state, CPT_TID(tsk));
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ }
-+ if (tsk->exit_state) {
-+ v->cpt_state = tsk->exit_state;
-+ if (tsk->state != EXIT_DEAD) {
-+ eprintk_ctx("invalid tsk->state %ld/%ld on" CPT_FID "\n",
-+ tsk->state, tsk->exit_state, CPT_TID(tsk));
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ }
-+ v->cpt_flags = tsk->flags&~PF_FROZEN;
-+ v->cpt_ptrace = tsk->ptrace;
-+ v->cpt_prio = tsk->prio;
-+ v->cpt_exit_code = tsk->exit_code;
-+ v->cpt_exit_signal = tsk->exit_signal;
-+ v->cpt_pdeath_signal = tsk->pdeath_signal;
-+ v->cpt_static_prio = tsk->static_prio;
-+ v->cpt_rt_priority = tsk->rt_priority;
-+ v->cpt_policy = tsk->policy;
-+ if (v->cpt_policy != SCHED_NORMAL) {
-+ eprintk_ctx("scheduler policy is not supported %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+
-+ v->cpt_mm = CPT_NULL;
-+ if (tsk->mm) {
-+ tobj = lookup_cpt_object(CPT_OBJ_MM, tsk->mm, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_mm = tobj->o_pos;
-+ }
-+ v->cpt_files = CPT_NULL;
-+ if (tsk->files) {
-+ tobj = lookup_cpt_object(CPT_OBJ_FILES, tsk->files, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_files = tobj->o_pos;
-+ }
-+ v->cpt_fs = CPT_NULL;
-+ if (tsk->fs) {
-+ tobj = lookup_cpt_object(CPT_OBJ_FS, tsk->fs, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_fs = tobj->o_pos;
-+ }
-+ v->cpt_namespace = CPT_NULL;
-+ if (tsk->namespace) {
-+ tobj = lookup_cpt_object(CPT_OBJ_NAMESPACE, tsk->namespace, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_namespace = tobj->o_pos;
-+
-+ if (tsk->namespace != current->namespace)
-+ eprintk_ctx("namespaces are not supported: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
-+ }
-+ v->cpt_sysvsem_undo = CPT_NULL;
-+ if (tsk->sysvsem.undo_list && !tsk->exit_state) {
-+ tobj = lookup_cpt_object(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_sysvsem_undo = tobj->o_pos;
-+ }
-+ v->cpt_sighand = CPT_NULL;
-+ if (tsk->sighand) {
-+ tobj = lookup_cpt_object(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx);
-+ if (!tobj) BUG();
-+ v->cpt_sighand = tobj->o_pos;
-+ }
-+ v->cpt_sigblocked = cpt_sigset_export(&tsk->blocked);
-+ v->cpt_sigrblocked = cpt_sigset_export(&tsk->real_blocked);
-+ v->cpt_sigsuspend_blocked = cpt_sigset_export(&tsk->saved_sigmask);
-+
-+ v->cpt_pid = virt_pid(tsk);
-+ v->cpt_tgid = virt_tgid(tsk);
-+ v->cpt_ppid = 0;
-+ if (tsk->parent) {
-+ if (tsk->parent != tsk->real_parent &&
-+ !lookup_cpt_object(CPT_OBJ_TASK, tsk->parent, ctx)) {
-+ eprintk_ctx("task %d/%d(%s) is ptraced from ve0\n", tsk->pid, virt_pid(tsk), tsk->comm);
-+ cpt_release_buf(ctx);
-+ return -EBUSY;
-+ }
-+ v->cpt_ppid = virt_pid(tsk->parent);
-+ }
-+ v->cpt_rppid = tsk->real_parent ? virt_pid(tsk->real_parent) : 0;
-+ v->cpt_pgrp = virt_pgid(tsk);
-+ v->cpt_session = virt_sid(tsk);
-+ v->cpt_old_pgrp = 0;
-+ if (tsk->signal->tty_old_pgrp)
-+ v->cpt_old_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tsk->signal->tty_old_pgrp);
-+ v->cpt_leader = tsk->group_leader ? virt_pid(tsk->group_leader) : 0;
-+ v->cpt_set_tid = (unsigned long)tsk->set_child_tid;
-+ v->cpt_clear_tid = (unsigned long)tsk->clear_child_tid;
-+ memcpy(v->cpt_comm, tsk->comm, 16);
-+ v->cpt_user = tsk->user->uid;
-+ v->cpt_uid = tsk->uid;
-+ v->cpt_euid = tsk->euid;
-+ v->cpt_suid = tsk->suid;
-+ v->cpt_fsuid = tsk->fsuid;
-+ v->cpt_gid = tsk->gid;
-+ v->cpt_egid = tsk->egid;
-+ v->cpt_sgid = tsk->sgid;
-+ v->cpt_fsgid = tsk->fsgid;
-+ v->cpt_ngids = 0;
-+ if (tsk->group_info && tsk->group_info->ngroups != 0) {
-+ int i = tsk->group_info->ngroups;
-+ if (i > 32) {
-+ /* Shame... I did a simplified version and _forgot_
-+ * about this. Later, later. */
-+ eprintk_ctx("too many of groups " CPT_FID "\n", CPT_TID(tsk));
-+ return -EINVAL;
-+ }
-+ v->cpt_ngids = i;
-+ for (i--; i>=0; i--)
-+ v->cpt_gids[i] = tsk->group_info->small_block[i];
-+ }
-+ memcpy(&v->cpt_ecap, &tsk->cap_effective, 8);
-+ memcpy(&v->cpt_icap, &tsk->cap_inheritable, 8);
-+ memcpy(&v->cpt_pcap, &tsk->cap_permitted, 8);
-+ v->cpt_keepcap = tsk->keep_capabilities;
-+
-+ v->cpt_did_exec = tsk->did_exec;
-+ v->cpt_exec_domain = -1;
-+ v->cpt_thrflags = tsk->thread_info->flags & ~(1<<TIF_FREEZE);
-+ v->cpt_64bit = 0;
-+#ifdef CONFIG_X86_64
-+ /* Clear x86_64 specific flags */
-+ v->cpt_thrflags &= ~(_TIF_FORK|_TIF_ABI_PENDING|_TIF_IA32);
-+ if (!(tsk->thread_info->flags & _TIF_IA32)) {
-+ ctx->tasks64++;
-+ v->cpt_64bit = 1;
-+ }
-+#endif
-+ v->cpt_thrstatus = tsk->thread_info->status;
-+ v->cpt_addr_limit = -1;
-+
-+ v->cpt_personality = tsk->personality;
-+
-+ for (i=0; i<GDT_ENTRY_TLS_ENTRIES; i++) {
-+ if (i>=3) {
-+ eprintk_ctx("too many tls descs\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+#ifndef CONFIG_X86_64
-+ v->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b)<<32) + tsk->thread.tls_array[i].a;
-+#else
-+ v->cpt_tls[i] = tsk->thread.tls_array[i];
-+#endif
-+ }
-+
-+ v->cpt_restart.fn = CPT_RBL_0;
-+ if (tsk->thread_info->restart_block.fn != current->thread_info->restart_block.fn) {
-+ if (tsk->thread_info->restart_block.fn != nanosleep_restart
-+#ifdef CONFIG_X86_64
-+ && tsk->thread_info->restart_block.fn != compat_nanosleep_restart
-+#endif
-+ ) {
-+ eprintk_ctx("unknown restart block %p\n", tsk->thread_info->restart_block.fn);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_restart.fn = CPT_RBL_NANOSLEEP;
-+#ifdef CONFIG_X86_64
-+ if (tsk->thread_info->restart_block.fn == compat_nanosleep_restart)
-+ v->cpt_restart.fn = CPT_RBL_COMPAT_NANOSLEEP;
-+#endif
-+ v->cpt_restart.arg0 = tsk->thread_info->restart_block.arg0;
-+ v->cpt_restart.arg1 = tsk->thread_info->restart_block.arg1;
-+ v->cpt_restart.arg2 = tsk->thread_info->restart_block.arg2;
-+ v->cpt_restart.arg3 = tsk->thread_info->restart_block.arg3;
-+ if (debug_level > 2) {
-+ ktime_t e, e1;
-+ struct timespec now;
-+
-+ do_posix_clock_monotonic_gettime(&now);
-+ e = timespec_to_ktime(now);
-+ e1.tv64 = ((u64)tsk->thread_info->restart_block.arg1 << 32) | (u64) tsk->thread_info->restart_block.arg0;
-+ e = ktime_sub(e1, e);
-+ dprintk("cpt " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(tsk),
-+ tsk->thread_info->restart_block.arg1,
-+ tsk->thread_info->restart_block.arg0, e.tv64);
-+ }
-+ }
-+
-+ v->cpt_it_real_incr = 0;
-+ v->cpt_it_prof_incr = 0;
-+ v->cpt_it_virt_incr = 0;
-+ v->cpt_it_real_value = 0;
-+ v->cpt_it_prof_value = 0;
-+ v->cpt_it_virt_value = 0;
-+ if (thread_group_leader(tsk) && tsk->exit_state == 0) {
-+ ktime_t rem;
-+
-+ v->cpt_it_real_incr = ktime_to_ns(tsk->signal->it_real_incr);
-+ v->cpt_it_prof_incr = tsk->signal->it_prof_incr;
-+ v->cpt_it_virt_incr = tsk->signal->it_virt_incr;
-+
-+ rem = hrtimer_get_remaining(&tsk->signal->real_timer);
-+
-+ if (hrtimer_active(&tsk->signal->real_timer)) {
-+ if (rem.tv64 <= 0)
-+ rem.tv64 = NSEC_PER_USEC;
-+ v->cpt_it_real_value = ktime_to_ns(rem);
-+ dprintk("cpt itimer " CPT_FID " %Lu\n", CPT_TID(tsk), v->cpt_it_real_value);
-+ }
-+ v->cpt_it_prof_value = tsk->signal->it_prof_expires;
-+ v->cpt_it_virt_value = tsk->signal->it_virt_expires;
-+ }
-+ v->cpt_used_math = (tsk_used_math(tsk) != 0);
-+
-+ if (tsk->notifier) {
-+ eprintk_ctx("task notifier is in use: process %d/%d(%s)\n", virt_pid(tsk), tsk->pid, tsk->comm);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+
-+ v->cpt_utime = tsk->utime;
-+ v->cpt_stime = tsk->stime;
-+ delta = tsk->start_time;
-+ _set_normalized_timespec(&delta,
-+ delta.tv_sec - get_exec_env()->init_entry->start_time.tv_sec,
-+ delta.tv_nsec - get_exec_env()->init_entry->start_time.tv_nsec);
-+ v->cpt_starttime = cpt_timespec_export(&delta);
-+ v->cpt_nvcsw = tsk->nvcsw;
-+ v->cpt_nivcsw = tsk->nivcsw;
-+ v->cpt_min_flt = tsk->min_flt;
-+ v->cpt_maj_flt = tsk->maj_flt;
-+
-+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
-+ v->cpt_cutime = tsk->cutime;
-+ v->cpt_cstime = tsk->cstime;
-+ v->cpt_cnvcsw = tsk->cnvcsw;
-+ v->cpt_cnivcsw = tsk->cnivcsw;
-+ v->cpt_cmin_flt = tsk->cmin_flt;
-+ v->cpt_cmaj_flt = tsk->cmaj_flt;
-+
-+ if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
-+ __asm__("undefined\n");
-+
-+ for (i=0; i<CPT_RLIM_NLIMITS; i++) {
-+ if (i < RLIM_NLIMITS) {
-+ v->cpt_rlim_cur[i] = tsk->rlim[i].rlim_cur;
-+ v->cpt_rlim_max[i] = tsk->rlim[i].rlim_max;
-+ } else {
-+ v->cpt_rlim_cur[i] = CPT_NULL;
-+ v->cpt_rlim_max[i] = CPT_NULL;
-+ }
-+ }
-+#else
-+ v->cpt_cutime = tsk->signal->cutime;
-+ v->cpt_cstime = tsk->signal->cstime;
-+ v->cpt_cnvcsw = tsk->signal->cnvcsw;
-+ v->cpt_cnivcsw = tsk->signal->cnivcsw;
-+ v->cpt_cmin_flt = tsk->signal->cmin_flt;
-+ v->cpt_cmaj_flt = tsk->signal->cmaj_flt;
-+
-+ if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
-+ __asm__("undefined\n");
-+
-+ for (i=0; i<CPT_RLIM_NLIMITS; i++) {
-+ if (i < RLIM_NLIMITS) {
-+ v->cpt_rlim_cur[i] = tsk->signal->rlim[i].rlim_cur;
-+ v->cpt_rlim_max[i] = tsk->signal->rlim[i].rlim_max;
-+ } else {
-+ v->cpt_rlim_cur[i] = CPT_NULL;
-+ v->cpt_rlim_max[i] = CPT_NULL;
-+ }
-+ }
-+#endif
-+
-+ if (tsk->mm)
-+ v->cpt_mm_ub = cpt_lookup_ubc(tsk->mm->mm_ub, ctx);
-+ else
-+ v->cpt_mm_ub = CPT_NULL;
-+ v->cpt_task_ub = cpt_lookup_ubc(tsk->task_bc.task_ub, ctx);
-+ v->cpt_exec_ub = cpt_lookup_ubc(tsk->task_bc.exec_ub, ctx);
-+ v->cpt_fork_sub = cpt_lookup_ubc(tsk->task_bc.fork_sub, ctx);
-+
-+ v->cpt_ptrace_message = tsk->ptrace_message;
-+ v->cpt_pn_state = tsk->pn_state;
-+ v->cpt_stopped_state = tsk->stopped_state;
-+ v->cpt_sigsuspend_state = 0;
-+
-+#ifndef CONFIG_X86_64
-+ if (tsk->thread.vm86_info) {
-+ eprintk_ctx("vm86 task is running\n");
-+ cpt_release_buf(ctx);
-+ return -EBUSY;
-+ }
-+#endif
-+
-+ v->cpt_sigpending = cpt_sigset_export(&tsk->pending.signal);
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ dump_kstack(tsk, ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ err = dump_registers(tsk, ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ if (err)
-+ return err;
-+
-+ if (tsk_used_math(tsk)) {
-+ cpt_push_object(&saved_obj, ctx);
-+ dump_fpustate(tsk, ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ }
-+
-+ if (tsk->last_siginfo) {
-+ struct cpt_siginfo_image si;
-+ cpt_push_object(&saved_obj, ctx);
-+
-+ si.cpt_next = sizeof(si);
-+ si.cpt_object = CPT_OBJ_LASTSIGINFO;
-+ si.cpt_hdrlen = sizeof(si);
-+ si.cpt_content = CPT_CONTENT_VOID;
-+
-+ if (encode_siginfo(&si, tsk->last_siginfo))
-+ return -EINVAL;
-+
-+ ctx->write(&si, sizeof(si), ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ }
-+
-+ dump_sigqueue(&tsk->pending, ctx);
-+
-+ next_tsk = NULL;
-+ if (obj->o_list.next != &ctx->object_array[CPT_OBJ_TASK]) {
-+ tobj = list_entry(obj->o_list.next, cpt_object_t, o_list);
-+ next_tsk = tobj->o_obj;
-+ if (next_tsk->tgid != tsk->tgid)
-+ next_tsk = NULL;
-+ }
-+
-+ if (next_tsk == NULL) {
-+ int err;
-+ loff_t pos = ctx->file->f_pos;
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ err = dump_one_signal_struct(tg_obj, ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ if (err)
-+ return err;
-+
-+ for (;;) {
-+ task_t *prev_tsk;
-+ loff_t tg_pos = obj->o_pos + offsetof(struct cpt_task_image, cpt_signal);
-+
-+ ctx->pwrite(&pos, sizeof(pos), ctx, tg_pos);
-+
-+ if (obj->o_list.prev == &ctx->object_array[CPT_OBJ_TASK])
-+ break;
-+
-+ tobj = list_entry(obj->o_list.prev, cpt_object_t, o_list);
-+ prev_tsk = tobj->o_obj;
-+ if (prev_tsk->tgid != tsk->tgid)
-+ break;
-+ obj = tobj;
-+ }
-+ }
-+
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump_tasks(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_TASKS);
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ int err;
-+
-+ if ((err = dump_one_process(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+int cpt_collect_signals(cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ /* Collect process fd sets */
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->signal && !list_empty(&tsk->signal->posix_timers)) {
-+ eprintk_ctx("task %d/%d(%s) uses posix timers\n", tsk->pid, virt_pid(tsk), tsk->comm);
-+ return -EBUSY;
-+ }
-+ if (tsk->signal && cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, tsk->signal, ctx) == NULL)
-+ return -ENOMEM;
-+ if (tsk->sighand && cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, tsk->sighand, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+ return 0;
-+}
-+
-+
-+static int dump_one_sighand_struct(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct sighand_struct *sig = obj->o_obj;
-+ struct cpt_sighand_image *v = cpt_get_buf(ctx);
-+ int i;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_SIGHAND_STRUCT;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ for (i=0; i< _NSIG; i++) {
-+ if (sig->action[i].sa.sa_handler != SIG_DFL) {
-+ loff_t saved_obj;
-+ struct cpt_sighandler_image *o = cpt_get_buf(ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ cpt_open_object(NULL, ctx);
-+
-+ o->cpt_next = CPT_NULL;
-+ o->cpt_object = CPT_OBJ_SIGHANDLER;
-+ o->cpt_hdrlen = sizeof(*o);
-+ o->cpt_content = CPT_CONTENT_VOID;
-+
-+ o->cpt_signo = i;
-+ o->cpt_handler = (unsigned long)sig->action[i].sa.sa_handler;
-+ o->cpt_restorer = (unsigned long)sig->action[i].sa.sa_restorer;
-+ o->cpt_flags = sig->action[i].sa.sa_flags;
-+ memcpy(&o->cpt_mask, &sig->action[i].sa.sa_mask, 8);
-+ ctx->write(o, sizeof(*o), ctx);
-+ cpt_release_buf(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ }
-+ }
-+
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump_sighand(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ cpt_open_section(ctx, CPT_SECT_SIGHAND_STRUCT);
-+
-+ for_each_object(obj, CPT_OBJ_SIGHAND_STRUCT) {
-+ int err;
-+
-+ if ((err = dump_one_sighand_struct(obj, ctx)) != 0)
-+ return err;
-+ }
-+
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_process.h linux-2.6.16-026test009/kernel/cpt/cpt_process.h
---- linux-2.6.16.orig/kernel/cpt/cpt_process.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_process.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,10 @@
-+int cpt_collect_signals(cpt_context_t *);
-+int cpt_dump_signal(struct cpt_context *);
-+int cpt_dump_sighand(struct cpt_context *);
-+int cpt_dump_tasks(struct cpt_context *);
-+
-+int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
-+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
-+
-+int rst_restore_process(struct cpt_context *ctx);
-+int rst_process_linkage(struct cpt_context *ctx);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket.c linux-2.6.16-026test009/kernel/cpt/cpt_socket.c
---- linux-2.6.16.orig/kernel/cpt/cpt_socket.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_socket.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,757 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_socket.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/un.h>
-+#include <linux/tcp.h>
-+#include <net/sock.h>
-+#include <net/scm.h>
-+#include <net/af_unix.h>
-+#include <net/tcp.h>
-+#include <net/netlink_sock.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_socket.h"
-+#include "cpt_files.h"
-+#include "cpt_kernel.h"
-+
-+static int dump_rqueue(int owner, struct sock *sk, struct cpt_context *ctx);
-+
-+
-+/* Sockets are quite different of another kinds of files.
-+ * There is one simplification: only one struct file can refer to a socket,
-+ * so we could store information about socket directly in section FILES as
-+ * a description of a file and append f.e. array of not-yet-accepted
-+ * connections of listening socket as array of auxiliary data.
-+ *
-+ * Complications are:
-+ * 1. TCP sockets can be orphans. We have to relocate orphans as well,
-+ * so we have to create special section for orphans.
-+ * 2. AF_UNIX sockets are distinguished objects: set of links between
-+ * AF_UNIX sockets is quite arbitrary.
-+ * A. Each socket can refers to many of files due to FD passing.
-+ * B. Each socket except for connected ones can have in queue skbs
-+ * sent by any of sockets.
-+ *
-+ * 2A is relatively easy: after our tasks are frozen we make an additional
-+ * recursive pass throgh set of collected files and get referenced to
-+ * FD passed files. After end of recursion, all the files are treated
-+ * in the same way. All they will be stored in section FILES.
-+ *
-+ * 2B. We have to resolve all those references at some point.
-+ * It is the place where pipe-like approach to image fails.
-+ *
-+ * All this makes socket checkpointing quite chumbersome.
-+ * Right now we collect all the sockets and assign some numeric index value
-+ * to each of them. The socket section is separate and put after section FILES,
-+ * so section FILES refers to sockets by index, section SOCKET refers to FILES
-+ * as usual by position in image. All the refs inside socket section are
-+ * by index. When restoring we read socket section, create objects to hold
-+ * mappings index <-> pos. At the second pass we open sockets (simultaneosly
-+ * with their pairs) and create FILE objects.
-+ */
-+
-+
-+/* ====== FD passing ====== */
-+
-+/* Almost nobody does FD passing via AF_UNIX sockets, nevertheless we
-+ * have to implement this. A problem is that in general case we receive
-+ * skbs from an unknown context, so new files can arrive to checkpointed
-+ * set of processes even after they are stopped. Well, we are going just
-+ * to ignore unknown fds while doing real checkpointing. It is fair because
-+ * links outside checkpointed set are going to fail anyway.
-+ *
-+ * ATTN: the procedure is recursive. We linearize the recursion adding
-+ * newly found files to the end of file list, so they will be analyzed
-+ * in the same loop.
-+ */
-+
-+static int collect_one_passedfd(struct file *file, cpt_context_t * ctx)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct socket *sock;
-+ struct sock *sk;
-+ struct sk_buff *skb;
-+
-+ if (!S_ISSOCK(inode->i_mode))
-+ return -ENOTSOCK;
-+
-+ sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
-+
-+ if (sock->ops->family != AF_UNIX)
-+ return 0;
-+
-+ sk = sock->sk;
-+
-+ /* Subtle locking issue. skbs cannot be removed while
-+ * we are scanning, because all the processes are stopped.
-+ * They still can be added to tail of queue. Locking while
-+ * we dereference skb->next is enough to resolve this.
-+ * See above about collision with skbs added after we started
-+ * checkpointing.
-+ */
-+
-+ skb = skb_peek(&sk->sk_receive_queue);
-+ while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
-+ if (UNIXCB(skb).fp && skb->sk &&
-+ (!sock_flag(skb->sk, SOCK_DEAD) || unix_peer(sk) == skb->sk)) {
-+ struct scm_fp_list *fpl = UNIXCB(skb).fp;
-+ int i;
-+
-+ for (i = fpl->count-1; i >= 0; i--) {
-+ if (cpt_object_add(CPT_OBJ_FILE, fpl->fp[i], ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+ }
-+
-+ spin_lock_irq(&sk->sk_receive_queue.lock);
-+ skb = skb->next;
-+ spin_unlock_irq(&sk->sk_receive_queue.lock);
-+ }
-+
-+ return 0;
-+}
-+
-+int cpt_collect_passedfds(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+
-+ if (S_ISSOCK(file->f_dentry->d_inode->i_mode)) {
-+ int err;
-+
-+ if ((err = collect_one_passedfd(file, ctx)) < 0)
-+ return err;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+/* ====== End of FD passing ====== */
-+
-+/* Must be called under bh_lock_sock() */
-+
-+void clear_backlog(struct sock *sk)
-+{
-+ struct sk_buff *skb = sk->sk_backlog.head;
-+
-+ sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
-+ while (skb) {
-+ struct sk_buff *next = skb->next;
-+
-+ skb->next = NULL;
-+ kfree_skb(skb);
-+ skb = next;
-+ }
-+}
-+
-+void release_sock_nobacklog(struct sock *sk)
-+{
-+ spin_lock_bh(&(sk->sk_lock.slock));
-+ clear_backlog(sk);
-+ sk->sk_lock.owner = NULL;
-+ if (waitqueue_active(&(sk->sk_lock.wq)))
-+ wake_up(&(sk->sk_lock.wq));
-+ spin_unlock_bh(&(sk->sk_lock.slock));
-+}
-+
-+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_skb_image *v = cpt_get_buf(ctx);
-+ loff_t saved_obj;
-+ struct timeval tmptv;
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ cpt_open_object(NULL, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_SKB;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_owner = owner;
-+ v->cpt_queue = type;
-+ skb_get_timestamp(skb, &tmptv);
-+ v->cpt_stamp = cpt_timeval_export(&tmptv);
-+ v->cpt_hspace = skb->data - skb->head;
-+ v->cpt_tspace = skb->end - skb->tail;
-+ v->cpt_h = skb->h.raw - skb->head;
-+ v->cpt_nh = skb->nh.raw - skb->head;
-+ v->cpt_mac = skb->mac.raw - skb->head;
-+ if (sizeof(skb->cb) < sizeof(v->cpt_cb)) BUG();
-+ memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
-+ if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
-+ int i;
-+ for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
-+ if (skb->cb[i]) {
-+ wprintk_ctx("dirty skb cb");
-+ break;
-+ }
-+ }
-+ }
-+ v->cpt_len = skb->len;
-+ v->cpt_mac_len = skb->mac_len;
-+ v->cpt_csum = skb->csum;
-+ v->cpt_local_df = skb->local_df;
-+ v->cpt_pkt_type = skb->pkt_type;
-+ v->cpt_ip_summed = skb->ip_summed;
-+ v->cpt_priority = skb->priority;
-+ v->cpt_protocol = skb->protocol;
-+ v->cpt_security = 0;
-+ v->cpt_tso_segs = skb_shinfo(skb)->tso_segs;
-+ v->cpt_tso_size = skb_shinfo(skb)->tso_size;
-+ if (skb_shinfo(skb)->ufo_size) {
-+ eprintk_ctx("skb ufo is not supported\n");
-+ return -EINVAL;
-+ }
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ if (skb->len + (skb->data - skb->head) > 0) {
-+ struct cpt_obj_bits ob;
-+ loff_t saved_obj2;
-+
-+ cpt_push_object(&saved_obj2, ctx);
-+ cpt_open_object(NULL, ctx);
-+ ob.cpt_next = CPT_NULL;
-+ ob.cpt_object = CPT_OBJ_BITS;
-+ ob.cpt_hdrlen = sizeof(ob);
-+ ob.cpt_content = CPT_CONTENT_DATA;
-+ ob.cpt_size = skb->len + v->cpt_hspace;
-+
-+ ctx->write(&ob, sizeof(ob), ctx);
-+
-+ ctx->write(skb->head, (skb->data-skb->head) + (skb->len-skb->data_len), ctx);
-+ if (skb->data_len) {
-+ int offset = skb->len - skb->data_len;
-+ while (offset < skb->len) {
-+ int copy = skb->len - offset;
-+ if (copy > PAGE_SIZE)
-+ copy = PAGE_SIZE;
-+ (void)cpt_get_buf(ctx);
-+ if (skb_copy_bits(skb, offset, ctx->tmpbuf, copy))
-+ BUG();
-+ ctx->write(ctx->tmpbuf, copy, ctx);
-+ __cpt_release_buf(ctx);
-+ offset += copy;
-+ }
-+ }
-+
-+ ctx->align(ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj2, ctx);
-+ }
-+
-+ if (skb->sk && skb->sk->sk_family == AF_UNIX) {
-+ struct scm_fp_list *fpl = UNIXCB(skb).fp;
-+
-+ if (fpl) {
-+ int i;
-+
-+ for (i = 0; i < fpl->count; i++) {
-+ struct cpt_fd_image v;
-+ cpt_object_t *obj;
-+ loff_t saved_obj2;
-+
-+ obj = lookup_cpt_object(CPT_OBJ_FILE, fpl->fp[i], ctx);
-+
-+ if (!obj) {
-+ eprintk_ctx("lost passed FD\n");
-+ return -EINVAL;
-+ }
-+
-+ cpt_push_object(&saved_obj2, ctx);
-+ cpt_open_object(NULL, ctx);
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_FILEDESC;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_VOID;
-+
-+ v.cpt_fd = i;
-+ v.cpt_file = obj->o_pos;
-+ v.cpt_flags = 0;
-+ ctx->write(&v, sizeof(v), ctx);
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj2, ctx);
-+ }
-+ }
-+ }
-+
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ return 0;
-+}
-+
-+static int dump_rqueue(int idx, struct sock *sk, struct cpt_context *ctx)
-+{
-+ struct sk_buff *skb;
-+ struct sock *sk_cache = NULL;
-+
-+ skb = skb_peek(&sk->sk_receive_queue);
-+ while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
-+ int err;
-+
-+ if (sk->sk_family == AF_UNIX) {
-+ cpt_object_t *obj;
-+ if (skb->sk != sk_cache) {
-+ idx = -1;
-+ sk_cache = NULL;
-+ obj = lookup_cpt_object(CPT_OBJ_SOCKET, skb->sk, ctx);
-+ if (obj) {
-+ idx = obj->o_index;
-+ sk_cache = skb->sk;
-+ } else if (unix_peer(sk) != skb->sk)
-+ goto next_skb;
-+ }
-+ }
-+
-+ err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
-+ if (err)
-+ return err;
-+
-+next_skb:
-+ spin_lock_irq(&sk->sk_receive_queue.lock);
-+ skb = skb->next;
-+ spin_unlock_irq(&sk->sk_receive_queue.lock);
-+ }
-+ return 0;
-+}
-+
-+static int dump_wqueue(int idx, struct sock *sk, struct cpt_context *ctx)
-+{
-+ struct sk_buff *skb;
-+
-+ skb = skb_peek(&sk->sk_write_queue);
-+ while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
-+ int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
-+ if (err)
-+ return err;
-+
-+ spin_lock_irq(&sk->sk_write_queue.lock);
-+ skb = skb->next;
-+ spin_unlock_irq(&sk->sk_write_queue.lock);
-+ }
-+ return 0;
-+}
-+
-+
-+/* Dump socket content */
-+
-+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx)
-+{
-+ struct cpt_sock_image *v = cpt_get_buf(ctx);
-+ struct socket *sock;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_SOCKET;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_file = CPT_NULL;
-+ sock = sk->sk_socket;
-+ if (sock && sock->file) {
-+ cpt_object_t *tobj;
-+ tobj = lookup_cpt_object(CPT_OBJ_FILE, sock->file, ctx);
-+ if (tobj)
-+ v->cpt_file = tobj->o_pos;
-+ }
-+ v->cpt_index = index;
-+ v->cpt_parent = parent;
-+
-+ if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
-+ if (sock && !obj->o_lock) {
-+ lock_sock(sk);
-+ obj->o_lock = 1;
-+ }
-+ }
-+
-+ /* Some bits stored in inode */
-+ v->cpt_ssflags = sock ? sock->flags : 0;
-+ v->cpt_sstate = sock ? sock->state : 0;
-+ v->cpt_passcred = sock ? test_bit(SOCK_PASSCRED, &sock->flags) : 0;
-+
-+ /* Common data */
-+ v->cpt_family = sk->sk_family;
-+ v->cpt_type = sk->sk_type;
-+ v->cpt_state = sk->sk_state;
-+ v->cpt_reuse = sk->sk_reuse;
-+ v->cpt_zapped = sock_flag(sk, SOCK_ZAPPED);
-+ v->cpt_shutdown = sk->sk_shutdown;
-+ v->cpt_userlocks = sk->sk_userlocks;
-+ v->cpt_no_check = sk->sk_no_check;
-+ v->cpt_zapped = sock_flag(sk, SOCK_DBG);
-+ v->cpt_rcvtstamp = sock_flag(sk, SOCK_RCVTSTAMP);
-+ v->cpt_localroute = sock_flag(sk, SOCK_LOCALROUTE);
-+ v->cpt_protocol = sk->sk_protocol;
-+ v->cpt_err = sk->sk_err;
-+ v->cpt_err_soft = sk->sk_err_soft;
-+ v->cpt_max_ack_backlog = sk->sk_max_ack_backlog;
-+ v->cpt_priority = sk->sk_priority;
-+ v->cpt_rcvlowat = sk->sk_rcvlowat;
-+ v->cpt_rcvtimeo = CPT_NULL;
-+ if (sk->sk_rcvtimeo != MAX_SCHEDULE_TIMEOUT)
-+ v->cpt_rcvtimeo = sk->sk_rcvtimeo > INT_MAX ? INT_MAX : sk->sk_rcvtimeo;
-+ v->cpt_sndtimeo = CPT_NULL;
-+ if (sk->sk_sndtimeo != MAX_SCHEDULE_TIMEOUT)
-+ v->cpt_sndtimeo = sk->sk_sndtimeo > INT_MAX ? INT_MAX : sk->sk_sndtimeo;
-+ v->cpt_rcvbuf = sk->sk_rcvbuf;
-+ v->cpt_sndbuf = sk->sk_sndbuf;
-+ v->cpt_bound_dev_if = sk->sk_bound_dev_if;
-+ v->cpt_flags = sk->sk_flags;
-+ v->cpt_lingertime = CPT_NULL;
-+ if (sk->sk_lingertime != MAX_SCHEDULE_TIMEOUT)
-+ v->cpt_lingertime = sk->sk_lingertime > INT_MAX ? INT_MAX : sk->sk_lingertime;
-+ v->cpt_peer_pid = sk->sk_peercred.pid;
-+ v->cpt_peer_uid = sk->sk_peercred.uid;
-+ v->cpt_peer_gid = sk->sk_peercred.gid;
-+ v->cpt_stamp = cpt_timeval_export(&sk->sk_stamp);
-+
-+ if (sk->sk_filter) {
-+ eprintk_ctx("checkpointing sk_filter is not implemented\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+
-+ v->cpt_peer = -1;
-+ v->cpt_socketpair = 0;
-+ v->cpt_deleted = 0;
-+
-+ v->cpt_laddrlen = 0;
-+ if (sock) {
-+ int alen = sizeof(v->cpt_laddr);
-+ int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_laddr, &alen, 0);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ v->cpt_laddrlen = alen;
-+ }
-+ v->cpt_raddrlen = 0;
-+ if (sock) {
-+ int alen = sizeof(v->cpt_raddr);
-+ int err = sock->ops->getname(sock, (struct sockaddr*)&v->cpt_raddr, &alen, 2);
-+ if (!err)
-+ v->cpt_raddrlen = alen;
-+ }
-+
-+ if (sk->sk_family == AF_UNIX) {
-+ if (unix_sk(sk)->dentry) {
-+ struct dentry *d = unix_sk(sk)->dentry;
-+ v->cpt_deleted = !IS_ROOT(d) && d_unhashed(d);
-+ if (!v->cpt_deleted) {
-+ int err = 0;
-+ char *path;
-+ unsigned long pg = __get_free_page(GFP_KERNEL);
-+
-+ if (!pg) {
-+ cpt_release_buf(ctx);
-+ return -ENOMEM;
-+ }
-+
-+ path = d_path(d, unix_sk(sk)->mnt, (char *)pg, PAGE_SIZE);
-+
-+ if (!IS_ERR(path)) {
-+ int len = strlen(path);
-+ if (len < 126) {
-+ strcpy(((char*)v->cpt_laddr)+2, path);
-+ v->cpt_laddrlen = len + 2;
-+ } else {
-+ wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
-+ }
-+ err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
-+ } else {
-+ eprintk_ctx("cannot get path of an af_unix socket\n");
-+ err = PTR_ERR(path);
-+ }
-+ free_page(pg);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ }
-+ }
-+
-+ /* If the socket is connected, find its peer. If peer is not
-+ * in our table, the socket is connected to external process
-+ * and we consider it disconnected.
-+ */
-+ if (unix_peer(sk)) {
-+ cpt_object_t *pobj;
-+ pobj = lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(sk), ctx);
-+ if (pobj)
-+ v->cpt_peer = pobj->o_index;
-+ else
-+ v->cpt_shutdown = SHUTDOWN_MASK;
-+
-+ if (unix_peer(unix_peer(sk)) == sk)
-+ v->cpt_socketpair = 1;
-+ }
-+
-+ /* If the socket shares address with another socket it is
-+ * child of some listening socket. Find and record it. */
-+ if (unix_sk(sk)->addr &&
-+ atomic_read(&unix_sk(sk)->addr->refcnt) > 1 &&
-+ sk->sk_state != TCP_LISTEN) {
-+ cpt_object_t *pobj;
-+ for_each_object(pobj, CPT_OBJ_SOCKET) {
-+ struct sock *psk = pobj->o_obj;
-+ if (psk->sk_family == AF_UNIX &&
-+ psk->sk_state == TCP_LISTEN &&
-+ unix_sk(psk)->addr == unix_sk(sk)->addr) {
-+ v->cpt_parent = pobj->o_index;
-+ break;
-+ }
-+ }
-+ }
-+ }
-+
-+ if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
-+ cpt_dump_socket_in(v, sk, ctx);
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ dump_rqueue(index, sk, ctx);
-+ if (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) {
-+ dump_wqueue(index, sk, ctx);
-+ cpt_dump_ofo_queue(index, sk, ctx);
-+ }
-+
-+ if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
-+ && sk->sk_state == TCP_LISTEN)
-+ cpt_dump_synwait_queue(sk, index, ctx);
-+
-+ cpt_close_object(ctx);
-+
-+ if ((sk->sk_family == AF_INET || sk->sk_family == AF_INET6)
-+ && sk->sk_state == TCP_LISTEN)
-+ cpt_dump_accept_queue(sk, index, ctx);
-+
-+ return 0;
-+}
-+
-+int cpt_dump_orphaned_sockets(struct cpt_context *ctx)
-+{
-+ int i;
-+
-+ cpt_open_section(ctx, CPT_SECT_ORPHANS);
-+
-+ for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
-+ struct sock *sk;
-+ struct hlist_node *node;
-+
-+retry:
-+ read_lock_bh(&tcp_hashinfo.ehash[i].lock);
-+ sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
-+
-+ if (VE_OWNER_SK(sk) != get_exec_env())
-+ continue;
-+ if (sk->sk_socket)
-+ continue;
-+ if (!sock_flag(sk, SOCK_DEAD))
-+ continue;
-+ if (lookup_cpt_object(CPT_OBJ_SOCKET, sk, ctx))
-+ continue;
-+ sock_hold(sk);
-+ read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
-+
-+ local_bh_disable();
-+ bh_lock_sock(sk);
-+ if (sock_owned_by_user(sk))
-+ eprintk_ctx("BUG: sk locked by whom?\n");
-+ sk->sk_lock.owner = (void *)1;
-+ bh_unlock_sock(sk);
-+ local_bh_enable();
-+
-+ cpt_dump_socket(NULL, sk, -1, -1, ctx);
-+
-+ local_bh_disable();
-+ bh_lock_sock(sk);
-+ sk->sk_lock.owner = NULL;
-+ clear_backlog(sk);
-+ tcp_done(sk);
-+ bh_unlock_sock(sk);
-+ local_bh_enable();
-+ sock_put(sk);
-+
-+ goto retry;
-+ }
-+ read_unlock_bh(&tcp_hashinfo.ehash[i].lock);
-+ }
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+static int can_dump(struct sock *sk, cpt_context_t *ctx)
-+{
-+ switch (sk->sk_family) {
-+ case AF_NETLINK:
-+ if (((struct netlink_sock *)sk)->cb) {
-+ eprintk_ctx("netlink socket has active callback\n");
-+ return 0;
-+ }
-+ break;
-+ }
-+ return 1;
-+}
-+
-+/* We are not going to block suspend when we have external AF_UNIX connections.
-+ * But we cannot stop feed of new packets/connections to our environment
-+ * from outside. Taking into account that it is intrincically unreliable,
-+ * we collect some amount of data, but when checkpointing/restoring we
-+ * are going to drop everything, which does not make sense: skbs sent
-+ * by outside processes, connections from outside etc. etc.
-+ */
-+
-+/* The first pass. When we see socket referenced by a file, we just
-+ * add it to socket table */
-+int cpt_collect_socket(struct file *file, cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+ struct socket *sock;
-+ struct sock *sk;
-+
-+ if (!S_ISSOCK(file->f_dentry->d_inode->i_mode))
-+ return -ENOTSOCK;
-+ sock = &container_of(file->f_dentry->d_inode, struct socket_alloc, vfs_inode)->socket;
-+ sk = sock->sk;
-+ if (!can_dump(sk, ctx))
-+ return -EBUSY;
-+ if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sk, ctx)) == NULL)
-+ return -ENOMEM;
-+ obj->o_parent = file;
-+
-+ return 0;
-+}
-+
-+/*
-+ * We should end with table containing:
-+ * * all sockets opened by our processes in the table.
-+ * * all the sockets queued in listening queues on _our_ listening sockets,
-+ * which are connected to our opened sockets.
-+ */
-+
-+static int collect_one_unix_listening_sock(cpt_object_t *obj, cpt_context_t * ctx)
-+{
-+ struct sock *sk = obj->o_obj;
-+ cpt_object_t *cobj;
-+ struct sk_buff *skb;
-+
-+ skb = skb_peek(&sk->sk_receive_queue);
-+ while (skb && skb != (struct sk_buff*)&sk->sk_receive_queue) {
-+ struct sock *lsk = skb->sk;
-+ if (unix_peer(lsk) &&
-+ lookup_cpt_object(CPT_OBJ_SOCKET, unix_peer(lsk), ctx)) {
-+ if ((cobj = cpt_object_add(CPT_OBJ_SOCKET, lsk, ctx)) == NULL)
-+ return -ENOMEM;
-+ cobj->o_parent = obj->o_parent;
-+ }
-+ spin_lock_irq(&sk->sk_receive_queue.lock);
-+ skb = skb->next;
-+ spin_unlock_irq(&sk->sk_receive_queue.lock);
-+ }
-+
-+ return 0;
-+}
-+
-+int cpt_index_sockets(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+ unsigned long index = 0;
-+
-+ /* Collect not-yet-accepted children of listening sockets. */
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct sock *sk = obj->o_obj;
-+
-+ if (sk->sk_state != TCP_LISTEN)
-+ continue;
-+
-+ if (sk->sk_family == AF_UNIX)
-+ collect_one_unix_listening_sock(obj, ctx);
-+ }
-+
-+ /* Assign indices to all the sockets. */
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct sock *sk = obj->o_obj;
-+ cpt_obj_setindex(obj, index++, ctx);
-+
-+ if (sk->sk_socket && sk->sk_socket->file) {
-+ cpt_object_t *tobj;
-+ tobj = lookup_cpt_object(CPT_OBJ_FILE, sk->sk_socket->file, ctx);
-+ if (tobj)
-+ cpt_obj_setindex(tobj, obj->o_index, ctx);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+void cpt_unlock_sockets(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct sock *sk = obj->o_obj;
-+ if (sk && obj->o_lock) {
-+ if (sk->sk_socket)
-+ release_sock(sk);
-+ }
-+ }
-+}
-+
-+void cpt_kill_sockets(cpt_context_t * ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct sock *sk = obj->o_obj;
-+ if (sk && obj->o_lock) {
-+ cpt_kill_socket(sk, ctx);
-+ if (sk->sk_socket)
-+ release_sock_nobacklog(sk);
-+ }
-+ }
-+}
-+
-+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx)
-+{
-+ struct fasync_struct *fa;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct socket *sock;
-+
-+ sock = &container_of(inode, struct socket_alloc, vfs_inode)->socket;
-+
-+ for (fa = sock->fasync_list; fa; fa = fa->fa_next) {
-+ if (fa->fa_file == file)
-+ return fa->fa_fd;
-+ }
-+ return -1;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket.h linux-2.6.16-026test009/kernel/cpt/cpt_socket.h
---- linux-2.6.16.orig/kernel/cpt/cpt_socket.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_socket.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,26 @@
-+struct sock;
-+
-+int cpt_collect_passedfds(cpt_context_t *);
-+int cpt_index_sockets(cpt_context_t *);
-+int cpt_collect_socket(struct file *, cpt_context_t *);
-+int cpt_dump_socket(cpt_object_t *obj, struct sock *sk, int index, int parent, struct cpt_context *ctx);
-+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx);
-+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx);
-+int rst_sockets(struct cpt_context *ctx);
-+int rst_sockets_complete(struct cpt_context *ctx);
-+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
-+
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
-+
-+void cpt_unlock_sockets(cpt_context_t *);
-+void cpt_kill_sockets(cpt_context_t *);
-+
-+
-+int cpt_kill_socket(struct sock *, cpt_context_t *);
-+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
-+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
-+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
-+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
-+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
-+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
-+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_socket_in.c linux-2.6.16-026test009/kernel/cpt/cpt_socket_in.c
---- linux-2.6.16.orig/kernel/cpt/cpt_socket_in.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_socket_in.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,372 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_socket_in.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/tcp.h>
-+#include <net/sock.h>
-+#include <net/tcp.h>
-+#include <linux/ipv6.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_socket.h"
-+#include "cpt_kernel.h"
-+
-+static inline __u32 jiffies_export(unsigned long tmo)
-+{
-+ __s32 delta = (long)(tmo - jiffies);
-+ return delta;
-+}
-+
-+static inline __u32 tcp_jiffies_export(__u32 tmo)
-+{
-+ __s32 delta = tmo - tcp_time_stamp;
-+ return delta;
-+}
-+
-+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx)
-+{
-+ struct sk_buff *skb;
-+ struct tcp_sock *tp;
-+
-+ if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP)
-+ return 0;
-+
-+ tp = tcp_sk(sk);
-+
-+ skb = skb_peek(&tp->out_of_order_queue);
-+ while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
-+ int err;
-+
-+ err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
-+ if (err)
-+ return err;
-+
-+ spin_lock_irq(&tp->out_of_order_queue.lock);
-+ skb = skb->next;
-+ spin_unlock_irq(&tp->out_of_order_queue.lock);
-+ }
-+ return 0;
-+}
-+
-+static int cpt_dump_socket_tcp(struct cpt_sock_image *si, struct sock *sk,
-+ struct cpt_context *ctx)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+
-+ si->cpt_pred_flags = tp->pred_flags;
-+ si->cpt_rcv_nxt = tp->rcv_nxt;
-+ si->cpt_snd_nxt = tp->snd_nxt;
-+ si->cpt_snd_una = tp->snd_una;
-+ si->cpt_snd_sml = tp->snd_sml;
-+ si->cpt_rcv_tstamp = tcp_jiffies_export(tp->rcv_tstamp);
-+ si->cpt_lsndtime = tcp_jiffies_export(tp->lsndtime);
-+ si->cpt_tcp_header_len = tp->tcp_header_len;
-+ si->cpt_ack_pending = inet_csk(sk)->icsk_ack.pending;
-+ si->cpt_quick = inet_csk(sk)->icsk_ack.quick;
-+ si->cpt_pingpong = inet_csk(sk)->icsk_ack.pingpong;
-+ si->cpt_blocked = inet_csk(sk)->icsk_ack.blocked;
-+ si->cpt_ato = inet_csk(sk)->icsk_ack.ato;
-+ si->cpt_ack_timeout = jiffies_export(inet_csk(sk)->icsk_ack.timeout);
-+ si->cpt_lrcvtime = tcp_jiffies_export(inet_csk(sk)->icsk_ack.lrcvtime);
-+ si->cpt_last_seg_size = inet_csk(sk)->icsk_ack.last_seg_size;
-+ si->cpt_rcv_mss = inet_csk(sk)->icsk_ack.rcv_mss;
-+ si->cpt_snd_wl1 = tp->snd_wl1;
-+ si->cpt_snd_wnd = tp->snd_wnd;
-+ si->cpt_max_window = tp->max_window;
-+ si->cpt_pmtu_cookie = inet_csk(sk)->icsk_pmtu_cookie;
-+ si->cpt_mss_cache = tp->mss_cache;
-+ si->cpt_mss_cache_std = tp->mss_cache; /* FIXMW was tp->mss_cache_std */
-+ si->cpt_mss_clamp = tp->rx_opt.mss_clamp;
-+ si->cpt_ext_header_len = inet_csk(sk)->icsk_ext_hdr_len;
-+ si->cpt_ext2_header_len = 0;
-+ si->cpt_ca_state = inet_csk(sk)->icsk_ca_state;
-+ si->cpt_retransmits = inet_csk(sk)->icsk_retransmits;
-+ si->cpt_reordering = tp->reordering;
-+ si->cpt_frto_counter = tp->frto_counter;
-+ si->cpt_frto_highmark = tp->frto_highmark;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
-+ // // si->cpt_adv_cong = tp->adv_cong;
-+#endif
-+ si->cpt_defer_accept = inet_csk(sk)->icsk_accept_queue.rskq_defer_accept;
-+ si->cpt_backoff = inet_csk(sk)->icsk_backoff;
-+ si->cpt_srtt = tp->srtt;
-+ si->cpt_mdev = tp->mdev;
-+ si->cpt_mdev_max = tp->mdev_max;
-+ si->cpt_rttvar = tp->rttvar;
-+ si->cpt_rtt_seq = tp->rtt_seq;
-+ si->cpt_rto = inet_csk(sk)->icsk_rto;
-+ si->cpt_packets_out = tp->packets_out;
-+ si->cpt_left_out = tp->left_out;
-+ si->cpt_retrans_out = tp->retrans_out;
-+ si->cpt_lost_out = tp->lost_out;
-+ si->cpt_sacked_out = tp->sacked_out;
-+ si->cpt_fackets_out = tp->fackets_out;
-+ si->cpt_snd_ssthresh = tp->snd_ssthresh;
-+ si->cpt_snd_cwnd = tp->snd_cwnd;
-+ si->cpt_snd_cwnd_cnt = tp->snd_cwnd_cnt;
-+ si->cpt_snd_cwnd_clamp = tp->snd_cwnd_clamp;
-+ si->cpt_snd_cwnd_used = tp->snd_cwnd_used;
-+ si->cpt_snd_cwnd_stamp = tcp_jiffies_export(tp->snd_cwnd_stamp);
-+ si->cpt_timeout = jiffies_export(inet_csk(sk)->icsk_timeout);
-+ si->cpt_ka_timeout = 0;
-+ si->cpt_rcv_wnd = tp->rcv_wnd;
-+ si->cpt_rcv_wup = tp->rcv_wup;
-+ si->cpt_write_seq = tp->write_seq;
-+ si->cpt_pushed_seq = tp->pushed_seq;
-+ si->cpt_copied_seq = tp->copied_seq;
-+ si->cpt_tstamp_ok = tp->rx_opt.tstamp_ok;
-+ si->cpt_wscale_ok = tp->rx_opt.wscale_ok;
-+ si->cpt_sack_ok = tp->rx_opt.sack_ok;
-+ si->cpt_saw_tstamp = tp->rx_opt.saw_tstamp;
-+ si->cpt_snd_wscale = tp->rx_opt.snd_wscale;
-+ si->cpt_rcv_wscale = tp->rx_opt.rcv_wscale;
-+ si->cpt_nonagle = tp->nonagle;
-+ si->cpt_keepalive_probes = tp->keepalive_probes;
-+ si->cpt_rcv_tsval = tp->rx_opt.rcv_tsval;
-+ si->cpt_rcv_tsecr = tp->rx_opt.rcv_tsecr;
-+ si->cpt_ts_recent = tp->rx_opt.ts_recent;
-+ si->cpt_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
-+ si->cpt_user_mss = tp->rx_opt.user_mss;
-+ si->cpt_dsack = tp->rx_opt.dsack;
-+ si->cpt_eff_sacks = tp->rx_opt.eff_sacks;
-+ si->cpt_sack_array[0] = tp->duplicate_sack[0].start_seq;
-+ si->cpt_sack_array[1] = tp->duplicate_sack[0].end_seq;
-+ si->cpt_sack_array[2] = tp->selective_acks[0].start_seq;
-+ si->cpt_sack_array[3] = tp->selective_acks[0].end_seq;
-+ si->cpt_sack_array[4] = tp->selective_acks[1].start_seq;
-+ si->cpt_sack_array[5] = tp->selective_acks[1].end_seq;
-+ si->cpt_sack_array[6] = tp->selective_acks[2].start_seq;
-+ si->cpt_sack_array[7] = tp->selective_acks[2].end_seq;
-+ si->cpt_sack_array[8] = tp->selective_acks[3].start_seq;
-+ si->cpt_sack_array[9] = tp->selective_acks[3].end_seq;
-+ si->cpt_window_clamp = tp->window_clamp;
-+ si->cpt_rcv_ssthresh = tp->rcv_ssthresh;
-+ si->cpt_probes_out = inet_csk(sk)->icsk_probes_out;
-+ si->cpt_num_sacks = tp->rx_opt.num_sacks;
-+ si->cpt_advmss = tp->advmss;
-+ si->cpt_syn_retries = inet_csk(sk)->icsk_syn_retries;
-+ si->cpt_ecn_flags = tp->ecn_flags;
-+ si->cpt_prior_ssthresh = tp->prior_ssthresh;
-+ si->cpt_high_seq = tp->high_seq;
-+ si->cpt_retrans_stamp = tp->retrans_stamp;
-+ si->cpt_undo_marker = tp->undo_marker;
-+ si->cpt_undo_retrans = tp->undo_retrans;
-+ si->cpt_urg_seq = tp->urg_seq;
-+ si->cpt_urg_data = tp->urg_data;
-+ si->cpt_pending = inet_csk(sk)->icsk_pending;
-+ si->cpt_urg_mode = tp->urg_mode;
-+ si->cpt_snd_up = tp->snd_up;
-+ si->cpt_keepalive_time = tp->keepalive_time;
-+ si->cpt_keepalive_intvl = tp->keepalive_intvl;
-+ si->cpt_linger2 = tp->linger2;
-+
-+ if (sk->sk_state != TCP_LISTEN &&
-+ sk->sk_state != TCP_CLOSE &&
-+ sock_flag(sk, SOCK_KEEPOPEN)) {
-+ si->cpt_ka_timeout = jiffies_export(sk->sk_timer.expires);
-+ }
-+
-+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-+ if (sk->sk_family == AF_INET6 && tp->af_specific == &ipv6_mapped)
-+ si->cpt_mapped = 1;
-+#endif
-+
-+ return 0;
-+}
-+
-+
-+int cpt_dump_socket_in(struct cpt_sock_image *si, struct sock *sk,
-+ struct cpt_context *ctx)
-+{
-+ struct inet_sock *inet = inet_sk(sk);
-+ struct ipv6_pinfo *np = inet6_sk(sk);
-+
-+ if (sk->sk_family == AF_INET) {
-+ struct sockaddr_in *sin = ((struct sockaddr_in*)si->cpt_laddr);
-+ sin->sin_family = AF_INET;
-+ sin->sin_port = inet->sport;
-+ sin->sin_addr.s_addr = inet->rcv_saddr;
-+ si->cpt_laddrlen = sizeof(*sin);
-+ } else if (sk->sk_family == AF_INET6) {
-+ struct sockaddr_in6 *sin6 = ((struct sockaddr_in6*)si->cpt_laddr);
-+ sin6->sin6_family = AF_INET6;
-+ sin6->sin6_port = inet->sport;
-+ memcpy(&sin6->sin6_addr, &np->rcv_saddr, 16);
-+ si->cpt_laddrlen = sizeof(*sin6);
-+ }
-+ if (!inet->num)
-+ si->cpt_laddrlen = 0;
-+
-+ si->cpt_daddr = inet->daddr;
-+ si->cpt_dport = inet->dport;
-+ si->cpt_saddr = inet->saddr;
-+ si->cpt_rcv_saddr = inet->rcv_saddr;
-+ si->cpt_sport = inet->sport;
-+ si->cpt_uc_ttl = inet->uc_ttl;
-+ si->cpt_tos = inet->tos;
-+ si->cpt_cmsg_flags = inet->cmsg_flags;
-+ si->cpt_mc_index = inet->mc_index;
-+ si->cpt_mc_addr = inet->mc_addr;
-+ si->cpt_hdrincl = inet->hdrincl;
-+ si->cpt_mc_ttl = inet->mc_ttl;
-+ si->cpt_mc_loop = inet->mc_loop;
-+ si->cpt_pmtudisc = inet->pmtudisc;
-+ si->cpt_recverr = inet->recverr;
-+ si->cpt_freebind = inet->freebind;
-+ si->cpt_idcounter = inet->id;
-+
-+ si->cpt_cork_flags = inet->cork.flags;
-+ si->cpt_cork_fragsize = 0;
-+ si->cpt_cork_length = inet->cork.length;
-+ si->cpt_cork_addr = inet->cork.addr;
-+ si->cpt_cork_saddr = inet->cork.fl.fl4_src;
-+ si->cpt_cork_daddr = inet->cork.fl.fl4_dst;
-+ si->cpt_cork_oif = inet->cork.fl.oif;
-+ if (inet->cork.rt) {
-+ si->cpt_cork_fragsize = inet->cork.fragsize;
-+ si->cpt_cork_saddr = inet->cork.rt->fl.fl4_src;
-+ si->cpt_cork_daddr = inet->cork.rt->fl.fl4_dst;
-+ si->cpt_cork_oif = inet->cork.rt->fl.oif;
-+ }
-+
-+ if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
-+ struct udp_sock *up = udp_sk(sk);
-+ si->cpt_udp_pending = up->pending;
-+ si->cpt_udp_corkflag = up->corkflag;
-+ si->cpt_udp_encap = up->encap_type;
-+ si->cpt_udp_len = up->len;
-+ }
-+
-+ if (sk->sk_family == AF_INET6) {
-+ memcpy(si->cpt_saddr6, &np->saddr, 16);
-+ memcpy(si->cpt_rcv_saddr6, &np->rcv_saddr, 16);
-+ memcpy(si->cpt_daddr6, &np->daddr, 16);
-+ si->cpt_flow_label6 = np->flow_label;
-+ si->cpt_frag_size6 = np->frag_size;
-+ si->cpt_hop_limit6 = np->hop_limit;
-+ si->cpt_mcast_hops6 = np->mcast_hops;
-+ si->cpt_mcast_oif6 = np->mcast_oif;
-+ si->cpt_rxopt6 = np->rxopt.all;
-+ si->cpt_mc_loop6 = np->mc_loop;
-+ si->cpt_recverr6 = np->recverr;
-+ si->cpt_sndflow6 = np->sndflow;
-+ si->cpt_pmtudisc6 = np->pmtudisc;
-+ si->cpt_ipv6only6 = np->ipv6only;
-+ si->cpt_mapped = 0;
-+ }
-+
-+ if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
-+ cpt_dump_socket_tcp(si, sk, ctx);
-+
-+ return 0;
-+}
-+
-+int cpt_dump_accept_queue(struct sock *sk, int index, struct cpt_context *ctx)
-+{
-+ struct request_sock *req;
-+
-+ for (req=inet_csk(sk)->icsk_accept_queue.rskq_accept_head; req; req=req->dl_next)
-+ cpt_dump_socket(NULL, req->sk, -1, index, ctx);
-+ return 0;
-+}
-+
-+
-+static int dump_openreq(struct request_sock *req, struct sock *sk, int index,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_openreq_image *v = cpt_get_buf(ctx);
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_OPENREQ;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ v->cpt_rcv_isn = tcp_rsk(req)->rcv_isn;
-+ v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
-+ v->cpt_rmt_port = inet_rsk(req)->rmt_port;
-+ v->cpt_mss = req->mss;
-+ // // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
-+ v->cpt_retrans = req->retrans;
-+ v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
-+ v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
-+ v->cpt_tstamp_ok = inet_rsk(req)->tstamp_ok;
-+ v->cpt_sack_ok = inet_rsk(req)->sack_ok;
-+ v->cpt_wscale_ok = inet_rsk(req)->wscale_ok;
-+ v->cpt_ecn_ok = inet_rsk(req)->ecn_ok;
-+ v->cpt_acked = inet_rsk(req)->acked;
-+ v->cpt_window_clamp = req->window_clamp;
-+ v->cpt_rcv_wnd = req->rcv_wnd;
-+ v->cpt_ts_recent = req->ts_recent;
-+ v->cpt_expires = jiffies_export(req->expires);
-+
-+ if (v->cpt_family == AF_INET) {
-+ memcpy(v->cpt_loc_addr, &inet_rsk(req)->loc_addr, 4);
-+ memcpy(v->cpt_rmt_addr, &inet_rsk(req)->rmt_addr, 4);
-+ } else {
-+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-+ memcpy(v->cpt_loc_addr, &inet6_rsk(req)->loc_addr, 16);
-+ memcpy(v->cpt_rmt_addr, &inet6_rsk(req)->rmt_addr, 16);
-+ v->cpt_iif = inet6_rsk(req)->iif;
-+#endif
-+ }
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump_synwait_queue(struct sock *sk, int index, struct cpt_context *ctx)
-+{
-+ struct listen_sock *lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-+ struct request_sock *req;
-+ int i;
-+
-+ for (i=0; i<TCP_SYNQ_HSIZE; i++) {
-+ for (req=lopt->syn_table[i]; req; req=req->dl_next) {
-+ loff_t saved_obj;
-+ cpt_push_object(&saved_obj, ctx);
-+ dump_openreq(req, sk, index, ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+ }
-+ }
-+ return 0;
-+}
-+
-+
-+int cpt_kill_socket(struct sock *sk, cpt_context_t * ctx)
-+{
-+ if (sk->sk_state != TCP_CLOSE &&
-+ (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
-+ sk->sk_protocol == IPPROTO_TCP) {
-+ if (sk->sk_state != TCP_LISTEN)
-+ tcp_set_state(sk, TCP_CLOSE);
-+ else
-+ sk->sk_prot->disconnect(sk, 0);
-+ }
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_syscalls.h linux-2.6.16-026test009/kernel/cpt/cpt_syscalls.h
---- linux-2.6.16.orig/kernel/cpt/cpt_syscalls.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_syscalls.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,95 @@
-+#include <linux/unistd.h>
-+#include <linux/syscalls.h>
-+#include <asm/uaccess.h>
-+
-+#define WRAP(c, args) return sys_##c args
-+#define WRAP2(c, args) int err; mm_segment_t oldfs; \
-+ oldfs = get_fs(); set_fs(KERNEL_DS); \
-+ err = sys_##c args ;\
-+ set_fs(oldfs); \
-+ return err
-+
-+static inline int sc_close(int fd)
-+{
-+ WRAP(close, (fd));
-+}
-+
-+static inline int sc_dup2(int fd1, int fd2)
-+{
-+ WRAP(dup2, (fd1, fd2));
-+}
-+
-+static inline int sc_unlink(char *name)
-+{
-+ WRAP2(unlink, (name));
-+}
-+
-+static inline int sc_pipe(int *pfd)
-+{
-+ return do_pipe(pfd);
-+}
-+
-+static inline int sc_mknod(char *name, int mode, int dev)
-+{
-+ WRAP2(mknod, (name, mode, dev));
-+}
-+
-+static inline int sc_chmod(char *name, int mode)
-+{
-+ WRAP2(mkdir, (name, mode));
-+}
-+
-+static inline int sc_chown(char *name, int uid, int gid)
-+{
-+ WRAP2(chown, (name, uid, gid));
-+}
-+
-+static inline int sc_mkdir(char *name, int mode)
-+{
-+ WRAP2(mkdir, (name, mode));
-+}
-+
-+static inline int sc_rmdir(char *name)
-+{
-+ WRAP2(rmdir, (name));
-+}
-+
-+static inline int sc_mount(char *mntdev, char *mntpnt, char *type, unsigned long flags)
-+{
-+ WRAP2(mount, (mntdev ? : "none", mntpnt, type, flags, NULL));
-+}
-+
-+static inline int sc_mprotect(unsigned long start, size_t len,
-+ unsigned long prot)
-+{
-+ WRAP(mprotect, (start, len, prot));
-+}
-+
-+static inline int sc_mlock(unsigned long start, size_t len)
-+{
-+ WRAP(mlock, (start, len));
-+}
-+
-+static inline int sc_munlock(unsigned long start, size_t len)
-+{
-+ WRAP(munlock, (start, len));
-+}
-+
-+static inline int sc_remap_file_pages(unsigned long start, size_t len,
-+ unsigned long prot, unsigned long pgoff,
-+ unsigned long flags)
-+{
-+ WRAP(remap_file_pages, (start, len, prot, pgoff, flags));
-+}
-+
-+static inline int sc_waitx(int pid, int opt)
-+{
-+ WRAP(wait4, (pid, NULL, opt, NULL));
-+}
-+
-+static inline int sc_flock(int fd, int flags)
-+{
-+ WRAP(flock, (fd, flags));
-+}
-+
-+extern int sc_execve(char *cms, char **argv, char **env);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_sysvipc.c linux-2.6.16-026test009/kernel/cpt/cpt_sysvipc.c
---- linux-2.6.16.orig/kernel/cpt/cpt_sysvipc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_sysvipc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,317 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_sysvipc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/shm.h>
-+#include <linux/sem.h>
-+#include <linux/msg.h>
-+#include <asm/uaccess.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_kernel.h"
-+
-+struct _warg {
-+ struct file *file;
-+ struct cpt_sysvshm_image *v;
-+};
-+
-+static int dump_one_shm(struct shmid_kernel *shp, void *arg)
-+{
-+ struct _warg *warg = arg;
-+ struct cpt_sysvshm_image *v = (struct cpt_sysvshm_image *)warg->v;
-+
-+ if (shp->shm_file != warg->file)
-+ return 0;
-+
-+ v->cpt_key = shp->shm_perm.key;
-+ v->cpt_uid = shp->shm_perm.uid;
-+ v->cpt_gid = shp->shm_perm.gid;
-+ v->cpt_cuid = shp->shm_perm.cuid;
-+ v->cpt_cgid = shp->shm_perm.cgid;
-+ v->cpt_mode = shp->shm_perm.mode;
-+ v->cpt_seq = shp->shm_perm.seq;
-+
-+ v->cpt_id = shp->id;
-+ v->cpt_segsz = shp->shm_segsz;
-+ v->cpt_atime = shp->shm_atim;
-+ v->cpt_ctime = shp->shm_ctim;
-+ v->cpt_dtime = shp->shm_dtim;
-+ v->cpt_creator = shp->shm_cprid;
-+ v->cpt_last = shp->shm_lprid;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
-+ v->cpt_mlockuser = shp->mlock_user ? shp->mlock_user->uid : -1;
-+#else
-+ v->cpt_mlockuser = -1;
-+#endif
-+ return 1;
-+}
-+
-+int cpt_dump_content_sysvshm(struct file *file, struct cpt_context *ctx)
-+{
-+ struct cpt_sysvshm_image *v = cpt_get_buf(ctx);
-+ struct _warg warg;
-+
-+ v->cpt_next = sizeof(*v);
-+ v->cpt_object = CPT_OBJ_SYSV_SHM;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ warg.file = file;
-+ warg.v = v;
-+ if (sysvipc_walk_shm(dump_one_shm, &warg) == 0) {
-+ cpt_release_buf(ctx);
-+ return -ESRCH;
-+ }
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+
-+int match_sem(int id, struct sem_array *sema, void *arg)
-+{
-+ if (id != (unsigned long)arg)
-+ return 0;
-+ return sema->sem_nsems + 1;
-+}
-+
-+static int get_sem_nsem(int id, cpt_context_t *ctx)
-+{
-+ int res;
-+ res = sysvipc_walk_sem(match_sem, (void*)(unsigned long)id);
-+ if (res > 0)
-+ return res - 1;
-+ eprintk_ctx("get_sem_nsem: SYSV semaphore %d not found\n", id);
-+ return -ESRCH;
-+}
-+
-+static int dump_one_semundo(struct sem_undo *su, struct cpt_context *ctx)
-+{
-+ struct cpt_sysvsem_undo_image v;
-+ loff_t saved_obj;
-+
-+ cpt_open_object(NULL, ctx);
-+
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_SYSVSEM_UNDO_REC;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_SEMUNDO;
-+ v.cpt_id = su->semid;
-+ v.cpt_nsem = get_sem_nsem(su->semid, ctx);
-+ if ((int)v.cpt_nsem < 0)
-+ return -ESRCH;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ ctx->write(su->semadj, v.cpt_nsem*sizeof(short), ctx);
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+ return 0;
-+}
-+
-+struct sem_warg {
-+ int last_id;
-+ struct cpt_sysvsem_image *v;
-+};
-+
-+static int dump_one_sem(int id, struct sem_array *sma, void *arg)
-+{
-+ struct sem_warg * warg = (struct sem_warg *)arg;
-+ struct cpt_sysvsem_image *v = warg->v;
-+ int i;
-+
-+ if (warg->last_id != -1) {
-+ if ((id % IPCMNI) <= warg->last_id)
-+ return 0;
-+ }
-+
-+ v->cpt_next = sizeof(*v);
-+ v->cpt_object = CPT_OBJ_SYSV_SEM;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_SEMARRAY;
-+
-+ v->cpt_key = sma->sem_perm.key;
-+ v->cpt_uid = sma->sem_perm.uid;
-+ v->cpt_gid = sma->sem_perm.gid;
-+ v->cpt_cuid = sma->sem_perm.cuid;
-+ v->cpt_cgid = sma->sem_perm.cgid;
-+ v->cpt_mode = sma->sem_perm.mode;
-+ v->cpt_seq = sma->sem_perm.seq;
-+
-+ v->cpt_id = id;
-+ v->cpt_ctime = sma->sem_ctime;
-+ v->cpt_otime = sma->sem_otime;
-+
-+ for (i=0; i<sma->sem_nsems; i++) {
-+ struct {
-+ __u32 semval;
-+ __u32 sempid;
-+ } *s = (void*)v + v->cpt_next;
-+ if (v->cpt_next >= PAGE_SIZE - sizeof(*s))
-+ return -EINVAL;
-+ s->semval = sma->sem_base[i].semval;
-+ s->sempid = sma->sem_base[i].sempid;
-+ v->cpt_next += sizeof(*s);
-+ }
-+
-+ warg->last_id = id % IPCMNI;
-+ return 1;
-+}
-+
-+
-+int cpt_dump_sysvsem(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+ struct sem_warg warg;
-+
-+ /* Dumping semaphores is quite tricky because we cannot
-+ * write to dump file under lock inside sysvipc_walk_sem().
-+ */
-+ cpt_open_section(ctx, CPT_SECT_SYSV_SEM);
-+ warg.last_id = -1;
-+ warg.v = cpt_get_buf(ctx);
-+ for (;;) {
-+ if (sysvipc_walk_sem(dump_one_sem, &warg) <= 0)
-+ break;
-+ ctx->write(warg.v, warg.v->cpt_next, ctx);
-+ }
-+ cpt_release_buf(ctx);
-+ cpt_close_section(ctx);
-+
-+ cpt_open_section(ctx, CPT_SECT_SYSVSEM_UNDO);
-+ for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
-+ struct sem_undo_list *semu = obj->o_obj;
-+ struct sem_undo *su;
-+ struct cpt_object_hdr v;
-+ loff_t saved_obj;
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v.cpt_next = CPT_NULL;
-+ v.cpt_object = CPT_OBJ_SYSVSEM_UNDO;
-+ v.cpt_hdrlen = sizeof(v);
-+ v.cpt_content = CPT_CONTENT_ARRAY;
-+
-+ ctx->write(&v, sizeof(v), ctx);
-+
-+ cpt_push_object(&saved_obj, ctx);
-+ for (su = semu->proc_list; su; su = su->proc_next) {
-+ if (su->semid != -1) {
-+ int err;
-+ err = dump_one_semundo(su, ctx);
-+ if (err < 0)
-+ return err;
-+ }
-+ }
-+ cpt_pop_object(&saved_obj, ctx);
-+
-+ cpt_close_object(ctx);
-+ }
-+ cpt_close_section(ctx);
-+ return 0;
-+}
-+
-+static int collect_one_msg(int id, struct msg_queue *msq, void *arg)
-+{
-+ int *retp = arg;
-+ (*retp)++;
-+ return 0;
-+}
-+
-+int cpt_collect_sysvmsg(cpt_context_t * ctx)
-+{
-+ int ret = 0;
-+ sysvipc_walk_msg(collect_one_msg, &ret);
-+ if (ret) {
-+ eprintk_ctx("SYSV msgqueues are not supported, found %d\n", ret);
-+ return -EBUSY;
-+ }
-+ return 0;
-+}
-+
-+static int cpt_collect_sysvsem_undo(cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ if (tsk->exit_state) {
-+ /* ipc/sem.c forgets to clear tsk->sysvsem.undo_list
-+ * on exit. Grrr... */
-+ continue;
-+ }
-+ if (tsk->sysvsem.undo_list &&
-+ cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, tsk->sysvsem.undo_list, ctx) == NULL)
-+ return -ENOMEM;
-+ }
-+
-+ for_each_object(obj, CPT_OBJ_SYSVSEM_UNDO) {
-+ struct sem_undo_list *semu = obj->o_obj;
-+
-+ if (atomic_read(&semu->refcnt) != obj->o_count) {
-+ eprintk_ctx("sem_undo_list is referenced outside %d %d\n", obj->o_count, atomic_read(&semu->refcnt));
-+ return -EBUSY;
-+ }
-+ }
-+ return 0;
-+}
-+
-+static int collect_one_shm(struct shmid_kernel *shp, void *arg)
-+{
-+ cpt_context_t *ctx = arg;
-+
-+ if (__cpt_object_add(CPT_OBJ_FILE, shp->shm_file, GFP_ATOMIC, ctx) == NULL)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+int cpt_collect_sysvshm(cpt_context_t * ctx)
-+{
-+ int err;
-+
-+ err = sysvipc_walk_shm(collect_one_shm, ctx);
-+
-+ return err < 0 ? err : 0;
-+}
-+
-+int cpt_collect_sysv(cpt_context_t * ctx)
-+{
-+ int err;
-+
-+ err = cpt_collect_sysvsem_undo(ctx);
-+ if (err)
-+ return err;
-+ err = cpt_collect_sysvmsg(ctx);
-+ if (err)
-+ return err;
-+ err = cpt_collect_sysvshm(ctx);
-+ if (err)
-+ return err;
-+
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_tty.c linux-2.6.16-026test009/kernel/cpt/cpt_tty.c
---- linux-2.6.16.orig/kernel/cpt/cpt_tty.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_tty.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,216 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_tty.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/tty.h>
-+#include <asm/uaccess.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+/* We must support at least N_TTY. */
-+
-+int cpt_dump_content_tty(struct file *file, struct cpt_context *ctx)
-+{
-+ struct tty_struct *tty = file->private_data;
-+ cpt_object_t *obj;
-+ struct cpt_obj_ref o;
-+ loff_t saved_pos;
-+
-+ obj = lookup_cpt_object(CPT_OBJ_TTY, tty, ctx);
-+ if (!obj)
-+ return -EINVAL;
-+
-+ cpt_push_object(&saved_pos, ctx);
-+
-+ o.cpt_next = sizeof(o);
-+ o.cpt_object = CPT_OBJ_REF;
-+ o.cpt_hdrlen = sizeof(o);
-+ o.cpt_content = CPT_CONTENT_VOID;
-+ o.cpt_pos = obj->o_pos;
-+ ctx->write(&o, sizeof(o), ctx);
-+
-+ cpt_pop_object(&saved_pos, ctx);
-+
-+ return 0;
-+}
-+
-+int cpt_collect_tty(struct file *file, cpt_context_t * ctx)
-+{
-+ struct tty_struct *tty = file->private_data;
-+
-+ if (tty) {
-+ if (cpt_object_add(CPT_OBJ_TTY, tty, ctx) == NULL)
-+ return -ENOMEM;
-+ if (tty->link) {
-+ cpt_object_t *obj;
-+
-+ obj = cpt_object_add(CPT_OBJ_TTY, tty->link, ctx);
-+ if (obj == NULL)
-+ return -ENOMEM;
-+ /* Undo o_count, tty->link is not a reference */
-+ obj->o_count--;
-+ }
-+ }
-+ return 0;
-+}
-+
-+int cpt_dump_tty(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct tty_struct *tty = obj->o_obj;
-+ struct cpt_tty_image *v;
-+
-+ if (tty->link) {
-+ if (lookup_cpt_object(CPT_OBJ_TTY, tty->link, ctx) == NULL) {
-+ eprintk_ctx("orphan pty %s %d\n", tty->name, tty->driver->subtype == PTY_TYPE_SLAVE);
-+ return -EINVAL;
-+ }
-+ if (tty->link->link != tty) {
-+ eprintk_ctx("bad pty pair\n");
-+ return -EINVAL;
-+ }
-+ if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-+ tty->driver->subtype == PTY_TYPE_SLAVE &&
-+ tty->link->count)
-+ obj->o_count++;
-+ }
-+ if (obj->o_count != tty->count) {
-+ eprintk_ctx("tty %s is referenced outside %d %d\n", tty->name, obj->o_count, tty->count);
-+ return -EBUSY;
-+ }
-+
-+ cpt_open_object(obj, ctx);
-+
-+ v = cpt_get_buf(ctx);
-+ v->cpt_next = -1;
-+ v->cpt_object = CPT_OBJ_TTY;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_ARRAY;
-+
-+ v->cpt_index = tty->index;
-+ v->cpt_link = -1;
-+ if (tty->link)
-+ v->cpt_link = tty->link->index;
-+ v->cpt_drv_type = tty->driver->type;
-+ v->cpt_drv_subtype = tty->driver->subtype;
-+ v->cpt_drv_flags = tty->driver->flags;
-+ v->cpt_packet = tty->packet;
-+ v->cpt_stopped = tty->stopped;
-+ v->cpt_hw_stopped = tty->hw_stopped;
-+ v->cpt_flow_stopped = tty->flow_stopped;
-+ v->cpt_flags = tty->flags;
-+ v->cpt_ctrl_status = tty->ctrl_status;
-+ v->cpt_canon_data = tty->canon_data;
-+ v->cpt_canon_head = tty->canon_head - tty->read_tail;
-+ v->cpt_canon_column = tty->canon_column;
-+ v->cpt_column = tty->column;
-+ v->cpt_erasing = tty->erasing;
-+ v->cpt_lnext = tty->lnext;
-+ v->cpt_icanon = tty->icanon;
-+ v->cpt_raw = tty->raw;
-+ v->cpt_real_raw = tty->real_raw;
-+ v->cpt_closing = tty->closing;
-+ v->cpt_minimum_to_wake = tty->minimum_to_wake;
-+ v->cpt_pgrp = 0;
-+ if (tty->pgrp > 0) {
-+ v->cpt_pgrp = _pid_type_to_vpid(PIDTYPE_PGID, tty->pgrp);
-+ if ((int)v->cpt_pgrp < 0) {
-+ dprintk_ctx("cannot map tty->pgrp %d -> %d\n", tty->pgrp, (int)v->cpt_pgrp);
-+ v->cpt_pgrp = -1;
-+ }
-+ }
-+ v->cpt_session = 0;
-+ if (tty->session > 0) {
-+ v->cpt_session = _pid_type_to_vpid(PIDTYPE_SID, tty->session);
-+ if ((int)v->cpt_session < 0) {
-+ eprintk_ctx("cannot map tty->session %d -> %d\n", tty->session, (int)v->cpt_session);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ }
-+ memcpy(v->cpt_name, tty->name, 64);
-+ v->cpt_ws_row = tty->winsize.ws_row;
-+ v->cpt_ws_col = tty->winsize.ws_col;
-+ v->cpt_ws_prow = tty->winsize.ws_ypixel;
-+ v->cpt_ws_pcol = tty->winsize.ws_xpixel;
-+ if (tty->termios == NULL) {
-+ eprintk_ctx("NULL termios");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ v->cpt_c_line = tty->termios->c_line;
-+ v->cpt_c_iflag = tty->termios->c_iflag;
-+ v->cpt_c_oflag = tty->termios->c_oflag;
-+ v->cpt_c_cflag = tty->termios->c_cflag;
-+ v->cpt_c_lflag = tty->termios->c_lflag;
-+ memcpy(v->cpt_c_cc, tty->termios->c_cc, NCCS);
-+ if (NCCS < 32)
-+ memset(v->cpt_c_cc + NCCS, 255, 32 - NCCS);
-+ memcpy(v->cpt_read_flags, tty->read_flags, sizeof(v->cpt_read_flags));
-+
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ if (tty->read_buf && tty->read_cnt) {
-+ struct cpt_obj_bits *v = cpt_get_buf(ctx);
-+ loff_t saved_pos;
-+
-+ cpt_push_object(&saved_pos, ctx);
-+ cpt_open_object(NULL, ctx);
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_BITS;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_DATA;
-+ v->cpt_size = tty->read_cnt;
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_release_buf(ctx);
-+
-+ if (tty->read_cnt) {
-+ int n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
-+ ctx->write(tty->read_buf + tty->read_tail, n, ctx);
-+ if (tty->read_cnt > n)
-+ ctx->write(tty->read_buf, tty->read_cnt-n, ctx);
-+ ctx->align(ctx);
-+ }
-+
-+ cpt_close_object(ctx);
-+ cpt_pop_object(&saved_pos, ctx);
-+ }
-+
-+ cpt_close_object(ctx);
-+
-+ return 0;
-+}
-+
-+__u32 cpt_tty_fasync(struct file *file, struct cpt_context *ctx)
-+{
-+ struct tty_struct * tty;
-+ struct fasync_struct *fa;
-+
-+ tty = (struct tty_struct *)file->private_data;
-+
-+ for (fa = tty->fasync; fa; fa = fa->fa_next) {
-+ if (fa->fa_file == file)
-+ return fa->fa_fd;
-+ }
-+ return -1;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_ubc.c linux-2.6.16-026test009/kernel/cpt/cpt_ubc.c
---- linux-2.6.16.orig/kernel/cpt/cpt_ubc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_ubc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,132 @@
-+/*
-+ *
-+ * kernel/cpt/cpt_ubc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/types.h>
-+#include <ub/beancounter.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = cpt_object_add(CPT_OBJ_UBC, bc, ctx);
-+ if (obj != NULL) {
-+ if (obj->o_count == 1)
-+ get_beancounter(bc);
-+ if (bc->parent != NULL && obj->o_parent == NULL)
-+ obj->o_parent = cpt_add_ubc(bc->parent, ctx);
-+ }
-+ return obj;
-+}
-+
-+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = lookup_cpt_object(CPT_OBJ_UBC, bc, ctx);
-+ if (obj == NULL) {
-+ char buf[48];
-+ print_ub_uid(bc, buf, sizeof(buf));
-+ printk(KERN_ERR "CPT: unknown ub %s (%p)\n", buf, bc);
-+ dump_stack();
-+ return CPT_NULL;
-+ }
-+ return obj->o_pos;
-+}
-+
-+static void dump_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
-+{
-+ dmp[0] = (prm->barrier < UB_MAXVALUE ? prm->barrier : CPT_NULL);
-+ dmp[1] = (prm->limit < UB_MAXVALUE ? prm->limit : CPT_NULL);
-+ dmp[2] = (held ? prm->held : CPT_NULL);
-+ dmp[3] = prm->maxheld;
-+ dmp[4] = prm->minheld;
-+ dmp[5] = prm->failcnt;
-+}
-+
-+static int dump_one_bc(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct user_beancounter *bc;
-+ struct cpt_beancounter_image *v;
-+ int i;
-+
-+ bc = obj->o_obj;
-+ v = cpt_get_buf(ctx);
-+
-+ v->cpt_next = CPT_NULL;
-+ v->cpt_object = CPT_OBJ_UBC;
-+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
-+
-+ if (obj->o_parent != NULL)
-+ v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
-+ else
-+ v->cpt_parent = CPT_NULL;
-+ v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
-+ for (i = 0; i < UB_RESOURCES; i++)
-+ dump_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
-+ for (i = 0; i < UB_RESOURCES; i++)
-+ dump_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
-+ bc->ub_store, 1);
-+ memset(v->cpt_parms + UB_RESOURCES * 12, 0,
-+ sizeof(v->cpt_parms)
-+ - UB_RESOURCES * 12 * sizeof(v->cpt_parms[0]));
-+
-+ cpt_open_object(obj, ctx);
-+ ctx->write(v, sizeof(*v), ctx);
-+ cpt_close_object(ctx);
-+
-+ cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+int cpt_dump_ubc(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+ int skipped;
-+ int top;
-+
-+ cpt_open_section(ctx, CPT_SECT_UBC);
-+
-+ do {
-+ skipped = 0;
-+ top = 0;
-+ for_each_object(obj, CPT_OBJ_UBC) {
-+ if (obj->o_parent == NULL)
-+ top++;
-+ if (obj->o_pos != CPT_NULL)
-+ continue;
-+ if (obj->o_parent != NULL &&
-+ ((cpt_object_t *)obj->o_parent)->o_pos == CPT_NULL)
-+ skipped++;
-+ else
-+ dump_one_bc(obj, ctx);
-+ }
-+ } while (skipped && (top < 2));
-+
-+ cpt_close_section(ctx);
-+ if (top > 1) {
-+ eprintk_ctx("More than one top level ub exist");
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+void cpt_finish_ubc(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_UBC)
-+ put_beancounter(obj->o_obj);
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_ubc.h linux-2.6.16-026test009/kernel/cpt/cpt_ubc.h
---- linux-2.6.16.orig/kernel/cpt/cpt_ubc.h 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_ubc.h 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,9 @@
-+cpt_object_t *cpt_add_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
-+__u64 cpt_lookup_ubc(struct user_beancounter *bc, struct cpt_context *ctx);
-+int cpt_dump_ubc(struct cpt_context *ctx);
-+
-+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx);
-+int rst_undump_ubc(struct cpt_context *ctx);
-+
-+void cpt_finish_ubc(struct cpt_context *ctx);
-+void rst_finish_ubc(struct cpt_context *ctx);
-diff -upr linux-2.6.16.orig/kernel/cpt/cpt_x8664.S linux-2.6.16-026test009/kernel/cpt/cpt_x8664.S
---- linux-2.6.16.orig/kernel/cpt/cpt_x8664.S 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/cpt_x8664.S 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,61 @@
-+#define ASSEMBLY 1
-+#include <linux/config.h>
-+#include <linux/linkage.h>
-+#include <asm/segment.h>
-+#include <asm/smp.h>
-+#include <asm/cache.h>
-+#include <asm/errno.h>
-+#include <asm/dwarf2.h>
-+#include <asm/calling.h>
-+#include <asm/msr.h>
-+#include <asm/unistd.h>
-+#include <asm/thread_info.h>
-+#include <asm/hw_irq.h>
-+#include <asm/errno.h>
-+
-+ .code64
-+
-+ .macro FAKE_STACK_FRAME child_rip
-+ /* push in order ss, rsp, eflags, cs, rip */
-+ xorq %rax, %rax
-+ pushq %rax /* ss */
-+ pushq %rax /* rsp */
-+ pushq $(1<<9) /* eflags - interrupts on */
-+ pushq $__KERNEL_CS /* cs */
-+ pushq \child_rip /* rip */
-+ pushq %rax /* orig rax */
-+ .endm
-+
-+ .macro UNFAKE_STACK_FRAME
-+ addq $8*6, %rsp
-+ .endm
-+
-+ENTRY(asm_kernel_thread)
-+ FAKE_STACK_FRAME $child_rip
-+ SAVE_ALL
-+
-+ # rdi: flags, rsi: usp, rdx: will be &pt_regs
-+ movq %rdx,%rdi
-+ orq $0x00800000,%rdi
-+ movq $-1, %rsi
-+ movq %rsp, %rdx
-+
-+ xorl %r8d,%r8d
-+ xorl %r9d,%r9d
-+ pushq %rcx
-+ call do_fork_pid
-+ addq $8, %rsp
-+ /* call do_fork */
-+ movq %rax,RAX(%rsp)
-+ xorl %edi,%edi
-+ RESTORE_ALL
-+ UNFAKE_STACK_FRAME
-+ ret
-+
-+child_rip:
-+ movq %rdi, %rax
-+ movq %rsi, %rdi
-+ call *%rax
-+ xorq %rdi, %rdi
-+ xorq %rsi, %rsi
-+ call complete_and_exit
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_conntrack.c linux-2.6.16-026test009/kernel/cpt/rst_conntrack.c
---- linux-2.6.16.orig/kernel/cpt/rst_conntrack.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_conntrack.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,294 @@
-+/*
-+ *
-+ * kernel/cpt/rst_conntrack.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/netdevice.h>
-+#include <linux/inetdevice.h>
-+#include <linux/rtnetlink.h>
-+#include <linux/unistd.h>
-+#include <linux/ve.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/cpt_image.h>
-+#include <linux/icmp.h>
-+#include <linux/ip.h>
-+
-+#if defined(CONFIG_VE_IPTABLES) && \
-+ (defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE))
-+
-+#include <linux/netfilter.h>
-+#include <linux/netfilter_ipv4/ip_conntrack.h>
-+#include <linux/netfilter_ipv4/ip_nat.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-+#include <linux/netfilter_ipv4/ip_nat_helper.h>
-+#include <linux/netfilter_ipv4/ip_nat_core.h>
-+
-+#define ASSERT_READ_LOCK(x) do { } while (0)
-+#define ASSERT_WRITE_LOCK(x) do { } while (0)
-+
-+#include <linux/netfilter_ipv4/listhelp.h>
-+
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+struct ct_holder
-+{
-+ struct ct_holder *next;
-+ struct ip_conntrack *ct;
-+ int index;
-+};
-+
-+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
-+{
-+ tuple->dst.ip = v->cpt_dst;
-+ tuple->dst.u.all = v->cpt_dstport;
-+ tuple->dst.protonum = v->cpt_protonum;
-+ tuple->dst.dir = v->cpt_dir;
-+ if (dir != tuple->dst.dir)
-+ wprintk("dir != tuple->dst.dir\n");
-+
-+ tuple->src.ip = v->cpt_src;
-+ tuple->src.u.all = v->cpt_srcport;
-+}
-+
-+
-+static int undump_expect_list(struct ip_conntrack *ct,
-+ struct cpt_ip_conntrack_image *ci,
-+ loff_t pos, struct ct_holder *ct_list,
-+ cpt_context_t *ctx)
-+{
-+ loff_t end;
-+ int err;
-+
-+ end = pos + ci->cpt_next;
-+ pos += ci->cpt_hdrlen;
-+ while (pos < end) {
-+ struct cpt_ip_connexpect_image v;
-+ struct ip_conntrack_expect *exp;
-+ struct ip_conntrack *sibling;
-+
-+ err = rst_get_object(CPT_OBJ_NET_CONNTRACK_EXPECT, pos, &v, ctx);
-+ if (err)
-+ return err;
-+
-+ sibling = NULL;
-+ if (v.cpt_sibling_conntrack) {
-+ struct ct_holder *c;
-+
-+ for (c = ct_list; c; c = c->next) {
-+ if (c->index == v.cpt_sibling_conntrack) {
-+ sibling = c->ct;
-+ break;
-+ }
-+ }
-+ if (!sibling) {
-+ eprintk_ctx("lost sibling of expectation\n");
-+ return -EINVAL;
-+ }
-+ }
-+
-+ write_lock_bh(&ip_conntrack_lock);
-+
-+ /* It is possible. Helper module could be just unregistered,
-+ * if expectation were on the list, it would be destroyed. */
-+ if (ct->helper == NULL) {
-+ write_unlock_bh(&ip_conntrack_lock);
-+ dprintk_ctx("conntrack: no helper and non-trivial expectation\n");
-+ continue;
-+ }
-+
-+ exp = ip_conntrack_expect_alloc(NULL);
-+ if (exp == NULL) {
-+ write_unlock_bh(&ip_conntrack_lock);
-+ return -ENOMEM;
-+ }
-+
-+ if (ct->helper->timeout && !del_timer(&exp->timeout)) {
-+ /* Dying already. We can do nothing. */
-+ write_unlock_bh(&ip_conntrack_lock);
-+ dprintk_ctx("conntrack expectation is dying\n");
-+ continue;
-+ }
-+
-+ decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
-+ decode_tuple(&v.cpt_mask, &exp->mask, 0);
-+
-+ exp->master = ct;
-+ nf_conntrack_get(&ct->ct_general);
-+ ip_conntrack_expect_insert(exp);
-+#if 0
-+ if (sibling) {
-+ exp->sibling = sibling;
-+ sibling->master = exp;
-+ LIST_DELETE(&ve_ip_conntrack_expect_list, exp);
-+ ct->expecting--;
-+ nf_conntrack_get(&master_ct(sibling)->infos[0]);
-+ } else
-+#endif
-+ if (ct->helper->timeout) {
-+ exp->timeout.expires = jiffies + v.cpt_timeout;
-+ add_timer(&exp->timeout);
-+ }
-+ write_unlock_bh(&ip_conntrack_lock);
-+
-+ pos += v.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+static int undump_one_ct(struct cpt_ip_conntrack_image *ci, loff_t pos,
-+ struct ct_holder **ct_list, cpt_context_t *ctx)
-+{
-+ int err = 0;
-+ struct ip_conntrack *conntrack;
-+ struct ct_holder *c;
-+ struct ip_conntrack_tuple orig, repl;
-+
-+ c = kmalloc(sizeof(struct ct_holder), GFP_KERNEL);
-+ if (c == NULL)
-+ return -ENOMEM;
-+
-+ decode_tuple(&ci->cpt_tuple[0], &orig, 0);
-+ decode_tuple(&ci->cpt_tuple[1], &repl, 1);
-+
-+ conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
-+ if (!conntrack) {
-+ kfree(c);
-+ return -ENOMEM;
-+ }
-+
-+ c->ct = conntrack;
-+ c->next = *ct_list;
-+ *ct_list = c;
-+ c->index = ci->cpt_index;
-+
-+ decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
-+ decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
-+
-+ conntrack->status = ci->cpt_status;
-+
-+ memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
-+ memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
-+
-+#ifdef CONFIG_IP_NF_NAT_NEEDED
-+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-+ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-+ conntrack->nat.masq_index = ci->cpt_masq_index;
-+#endif
-+ if (ci->cpt_initialized) {
-+ conntrack->nat.info.seq[0].correction_pos = ci->cpt_nat_seq[0].cpt_correction_pos;
-+ conntrack->nat.info.seq[0].offset_before = ci->cpt_nat_seq[0].cpt_offset_before;
-+ conntrack->nat.info.seq[0].offset_after = ci->cpt_nat_seq[0].cpt_offset_after;
-+ conntrack->nat.info.seq[1].correction_pos = ci->cpt_nat_seq[1].cpt_correction_pos;
-+ conntrack->nat.info.seq[1].offset_before = ci->cpt_nat_seq[1].cpt_offset_before;
-+ conntrack->nat.info.seq[1].offset_after = ci->cpt_nat_seq[1].cpt_offset_after;
-+ }
-+ if (conntrack->status & IPS_NAT_DONE_MASK)
-+ ip_nat_hash_conntrack(conntrack);
-+#endif
-+
-+ write_lock_bh(&ip_conntrack_lock);
-+
-+ if (ci->cpt_ct_helper) {
-+ conntrack->helper = ip_conntrack_helper_find_get(&conntrack->tuplehash[1].tuple);
-+ if (conntrack->helper == NULL) {
-+ eprintk_ctx("conntrack: cannot find helper, some module is not loaded\n");
-+ err = -EINVAL;
-+ }
-+ }
-+
-+ ip_conntrack_hash_insert(conntrack);
-+ conntrack->timeout.expires = jiffies + ci->cpt_timeout;
-+
-+ write_unlock_bh(&ip_conntrack_lock);
-+
-+ if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
-+ err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
-+
-+ return err;
-+}
-+
-+int rst_restore_ip_conntrack(struct cpt_context * ctx)
-+{
-+ int err = 0;
-+ loff_t sec = ctx->sections[CPT_SECT_NET_CONNTRACK];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_ip_conntrack_image ci;
-+ struct ct_holder *c;
-+ struct ct_holder *ct_list = NULL;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ if (sizeof(ci.cpt_proto_data) != sizeof(union ip_conntrack_proto)) {
-+ eprintk_ctx("conntrack module ct->proto version mismatch\n");
-+ return -EINVAL;
-+ }
-+ if (sizeof(ci.cpt_help_data) != sizeof(union ip_conntrack_help)) {
-+ eprintk_ctx("conntrack module ct->help version mismatch\n");
-+ return -EINVAL;
-+ }
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NET_CONNTRACK || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
-+ if (err)
-+ break;
-+ err = undump_one_ct(&ci, sec, &ct_list, ctx);
-+ if (err)
-+ break;
-+ sec += ci.cpt_next;
-+ }
-+
-+ while ((c = ct_list) != NULL) {
-+ ct_list = c->next;
-+ if (c->ct)
-+ add_timer(&c->ct->timeout);
-+ kfree(c);
-+ }
-+
-+ return err;
-+}
-+
-+#else
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+int rst_restore_ip_conntrack(struct cpt_context * ctx)
-+{
-+ if (ctx->sections[CPT_SECT_NET_CONNTRACK] != CPT_NULL)
-+ return -EINVAL;
-+ return 0;
-+}
-+
-+#endif
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_context.c linux-2.6.16-026test009/kernel/cpt/rst_context.c
---- linux-2.6.16.orig/kernel/cpt/rst_context.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_context.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,315 @@
-+/*
-+ *
-+ * kernel/cpt/rst_context.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+static ssize_t file_read(void *addr, size_t count, struct cpt_context *ctx)
-+{
-+ mm_segment_t oldfs;
-+ ssize_t err = -EBADF;
-+ struct file *file = ctx->file;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (file)
-+ err = file->f_op->read(file, addr, count, &file->f_pos);
-+ set_fs(oldfs);
-+ if (err != count)
-+ return err >= 0 ? -EIO : err;
-+ return 0;
-+}
-+
-+static ssize_t file_pread(void *addr, size_t count, struct cpt_context *ctx, loff_t pos)
-+{
-+ mm_segment_t oldfs;
-+ ssize_t err = -EBADF;
-+ struct file *file = ctx->file;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (file)
-+ err = file->f_op->read(file, addr, count, &pos);
-+ set_fs(oldfs);
-+ if (err != count)
-+ return err >= 0 ? -EIO : err;
-+ return 0;
-+}
-+
-+static void file_align(struct cpt_context *ctx)
-+{
-+ struct file *file = ctx->file;
-+
-+ if (file)
-+ file->f_pos = CPT_ALIGN(file->f_pos);
-+}
-+
-+int rst_get_section(int type, struct cpt_context *ctx, loff_t *start, loff_t *end)
-+{
-+ struct cpt_section_hdr hdr;
-+ int err;
-+ loff_t pos;
-+
-+ pos = ctx->sections[type];
-+ *start = *end = pos;
-+
-+ if (pos != CPT_NULL) {
-+ if ((err = ctx->pread(&hdr, sizeof(hdr), ctx, pos)) != 0)
-+ return err;
-+ if (hdr.cpt_section != type || hdr.cpt_hdrlen < sizeof(hdr))
-+ return -EINVAL;
-+ *start = pos + hdr.cpt_hdrlen;
-+ *end = pos + hdr.cpt_next;
-+ }
-+ return 0;
-+}
-+EXPORT_SYMBOL(rst_get_section);
-+
-+void rst_context_init(struct cpt_context *ctx)
-+{
-+ int i;
-+
-+ memset(ctx, 0, sizeof(*ctx));
-+
-+ init_MUTEX(&ctx->main_sem);
-+ ctx->refcount = 1;
-+
-+ ctx->current_section = -1;
-+ ctx->current_object = -1;
-+ ctx->pagesize = PAGE_SIZE;
-+ ctx->read = file_read;
-+ ctx->pread = file_pread;
-+ ctx->align = file_align;
-+ for (i=0; i < CPT_SECT_MAX; i++)
-+ ctx->sections[i] = CPT_NULL;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ init_completion(&ctx->pgin_notify);
-+#endif
-+ cpt_object_init(ctx);
-+}
-+
-+static int parse_sections(loff_t start, loff_t end, cpt_context_t *ctx)
-+{
-+ struct cpt_section_hdr h;
-+
-+ while (start < end) {
-+ int err;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, start);
-+ if (err)
-+ return err;
-+ if (h.cpt_hdrlen < sizeof(h) ||
-+ h.cpt_next < h.cpt_hdrlen ||
-+ start + h.cpt_next > end)
-+ return -EINVAL;
-+ if (h.cpt_section >= CPT_SECT_MAX)
-+ return -EINVAL;
-+ ctx->sections[h.cpt_section] = start;
-+ start += h.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+int rst_open_dumpfile(struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_major_tail *v;
-+ struct cpt_major_hdr h;
-+ unsigned long size;
-+
-+ err = -EBADF;
-+ if (!ctx->file)
-+ goto err_out;
-+
-+ err = -ENOMEM;
-+ ctx->tmpbuf = (char*)__get_free_page(GFP_KERNEL);
-+ if (ctx->tmpbuf == NULL)
-+ goto err_out;
-+ __cpt_release_buf(ctx);
-+
-+ size = ctx->file->f_dentry->d_inode->i_size;
-+
-+ if (size & 7) {
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+ if (size < sizeof(struct cpt_major_hdr) +
-+ sizeof(struct cpt_major_tail)) {
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+ err = ctx->pread(&h, sizeof(h), ctx, 0);
-+ if (err) {
-+ eprintk_ctx("too short image 1 %d\n", err);
-+ goto err_out;
-+ }
-+ if (h.cpt_signature[0] != CPT_SIGNATURE0 ||
-+ h.cpt_signature[1] != CPT_SIGNATURE1 ||
-+ h.cpt_signature[2] != CPT_SIGNATURE2 ||
-+ h.cpt_signature[3] != CPT_SIGNATURE3) {
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+ if (h.cpt_hz != HZ) {
-+ err = -EINVAL;
-+ eprintk_ctx("HZ mismatch: %d != %d\n", h.cpt_hz, HZ);
-+ goto err_out;
-+ }
-+ ctx->virt_jiffies64 = h.cpt_start_jiffies64;
-+ ctx->start_time.tv_sec = h.cpt_start_sec;
-+ ctx->start_time.tv_nsec = h.cpt_start_nsec;
-+ ctx->kernel_config_flags = h.cpt_kernel_config[0];
-+ ctx->iptables_mask = h.cpt_iptables_mask;
-+ ctx->image_version = h.cpt_image_version;
-+
-+ v = cpt_get_buf(ctx);
-+ err = ctx->pread(v, sizeof(*v), ctx, size - sizeof(*v));
-+ if (err) {
-+ eprintk_ctx("too short image 2 %d\n", err);
-+ cpt_release_buf(ctx);
-+ goto err_out;
-+ }
-+ if (v->cpt_signature[0] != CPT_SIGNATURE0 ||
-+ v->cpt_signature[1] != CPT_SIGNATURE1 ||
-+ v->cpt_signature[2] != CPT_SIGNATURE2 ||
-+ v->cpt_signature[3] != CPT_SIGNATURE3 ||
-+ v->cpt_nsect != CPT_SECT_MAX_INDEX) {
-+ err = -EINVAL;
-+ cpt_release_buf(ctx);
-+ goto err_out;
-+ }
-+ if ((err = parse_sections(h.cpt_hdrlen, size - sizeof(*v) - sizeof(struct cpt_section_hdr), ctx)) < 0) {
-+ cpt_release_buf(ctx);
-+ goto err_out;
-+ }
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ ctx->lazypages = v->cpt_lazypages;
-+#endif
-+ ctx->tasks64 = v->cpt_64bit;
-+ cpt_release_buf(ctx);
-+ return 0;
-+
-+err_out:
-+ if (ctx->tmpbuf) {
-+ free_page((unsigned long)ctx->tmpbuf);
-+ ctx->tmpbuf = NULL;
-+ }
-+ return err;
-+}
-+
-+void rst_close_dumpfile(struct cpt_context *ctx)
-+{
-+ if (ctx->file) {
-+ fput(ctx->file);
-+ ctx->file = NULL;
-+ }
-+ if (ctx->tmpbuf) {
-+ free_page((unsigned long)ctx->tmpbuf);
-+ ctx->tmpbuf = NULL;
-+ }
-+}
-+
-+int _rst_get_object(int type, loff_t pos, void *tmp, int size, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_object_hdr *hdr = tmp;
-+ err = ctx->pread(hdr, sizeof(struct cpt_object_hdr), ctx, pos);
-+ if (err)
-+ return err;
-+ if (type > 0 && type != hdr->cpt_object)
-+ return -EINVAL;
-+ if (hdr->cpt_hdrlen > hdr->cpt_next)
-+ return -EINVAL;
-+ if (hdr->cpt_hdrlen < sizeof(struct cpt_object_hdr))
-+ return -EINVAL;
-+ if (size < sizeof(*hdr))
-+ return -EINVAL;
-+ if (size > hdr->cpt_hdrlen)
-+ size = hdr->cpt_hdrlen;
-+ if (size > sizeof(*hdr))
-+ err = ctx->pread(hdr+1, size - sizeof(*hdr),
-+ ctx, pos + sizeof(*hdr));
-+ return err;
-+}
-+EXPORT_SYMBOL(_rst_get_object);
-+
-+void * __rst_get_object(int type, loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ void *tmp;
-+ struct cpt_object_hdr hdr;
-+ err = ctx->pread(&hdr, sizeof(hdr), ctx, pos);
-+ if (err)
-+ return NULL;
-+ if (type > 0 && type != hdr.cpt_object)
-+ return NULL;
-+ if (hdr.cpt_hdrlen > hdr.cpt_next)
-+ return NULL;
-+ if (hdr.cpt_hdrlen < sizeof(struct cpt_object_hdr))
-+ return NULL;
-+ tmp = kmalloc(hdr.cpt_hdrlen, GFP_KERNEL);
-+ if (!tmp)
-+ return NULL;
-+ err = ctx->pread(tmp, hdr.cpt_hdrlen, ctx, pos);
-+ if (!err)
-+ return tmp;
-+ kfree(tmp);
-+ return NULL;
-+}
-+EXPORT_SYMBOL(__rst_get_object);
-+
-+__u8 *__rst_get_name(loff_t *pos_p, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_object_hdr hdr;
-+ __u8 *name;
-+
-+ err = rst_get_object(CPT_OBJ_NAME, *pos_p, &hdr, ctx);
-+ if (err)
-+ return NULL;
-+ if (hdr.cpt_next - hdr.cpt_hdrlen > PAGE_SIZE)
-+ return NULL;
-+ name = (void*)__get_free_page(GFP_KERNEL);
-+ if (!name)
-+ return NULL;
-+ err = ctx->pread(name, hdr.cpt_next - hdr.cpt_hdrlen,
-+ ctx, *pos_p + hdr.cpt_hdrlen);
-+ if (err) {
-+ free_page((unsigned long)name);
-+ return NULL;
-+ }
-+ *pos_p += hdr.cpt_next;
-+ return name;
-+}
-+
-+__u8 *rst_get_name(loff_t pos, struct cpt_context *ctx)
-+{
-+ return __rst_get_name(&pos, ctx);
-+}
-+
-+void rst_put_name(__u8 *name, struct cpt_context *ctx)
-+{
-+ unsigned long addr = (unsigned long)name;
-+
-+ if (addr)
-+ free_page(addr&~(PAGE_SIZE-1));
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_epoll.c linux-2.6.16-026test009/kernel/cpt/rst_epoll.c
---- linux-2.6.16.orig/kernel/cpt/rst_epoll.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_epoll.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,173 @@
-+/*
-+ *
-+ * kernel/cpt/rst_epoll.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/namespace.h>
-+#include <linux/mount.h>
-+#include <linux/namei.h>
-+#include <linux/smp_lock.h>
-+#include <asm/uaccess.h>
-+#include <linux/vzcalluser.h>
-+#include <linux/eventpoll.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_kernel.h"
-+#include "cpt_fsmagic.h"
-+#include "cpt_syscalls.h"
-+
-+/* Those funcations are static in fs/eventpoll.c */
-+extern struct file_operations eventpoll_fops;
-+extern int ep_insert(struct eventpoll *ep, struct epoll_event *event,
-+ struct file *tfile, int fd);
-+extern struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
-+extern void ep_release_epitem(struct epitem *epi);
-+
-+
-+struct file *cpt_open_epolldev(struct cpt_file_image *fi,
-+ unsigned flags,
-+ struct cpt_context *ctx)
-+{
-+ struct file *file;
-+ int efd;
-+
-+ /* Argument "size" is ignored, use just 1 */
-+ efd = sys_epoll_create(1);
-+ if (efd < 0)
-+ return ERR_PTR(efd);
-+
-+ file = fget(efd);
-+ sys_close(efd);
-+ return file;
-+}
-+
-+static int restore_one_epoll(cpt_object_t *obj,
-+ loff_t pos,
-+ struct cpt_epoll_image *ebuf,
-+ cpt_context_t *ctx)
-+{
-+ int err = 0;
-+ loff_t endpos;
-+ struct file *file = obj->o_obj;
-+ struct eventpoll *ep;
-+
-+ if (file->f_op != &eventpoll_fops) {
-+ eprintk_ctx("bad epoll file\n");
-+ return -EINVAL;
-+ }
-+
-+ ep = file->private_data;
-+
-+ if (unlikely(ep == NULL)) {
-+ eprintk_ctx("bad epoll device\n");
-+ return -EINVAL;
-+ }
-+
-+ endpos = pos + ebuf->cpt_next;
-+ pos += ebuf->cpt_hdrlen;
-+ while (pos < endpos) {
-+ struct cpt_epoll_file_image efi;
-+ struct epoll_event epds;
-+
-+ cpt_object_t *tobj;
-+
-+ err = rst_get_object(CPT_OBJ_EPOLL_FILE, pos, &efi, ctx);
-+ if (err)
-+ return err;
-+ tobj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, efi.cpt_file, ctx);
-+ if (!tobj) {
-+ eprintk_ctx("epoll file not found\n");
-+ return -EINVAL;
-+ }
-+ epds.events = efi.cpt_events;
-+ epds.data = efi.cpt_data;
-+ down_write(&ep->sem);
-+ err = ep_insert(ep, &epds, tobj->o_obj, efi.cpt_fd);
-+ if (!err) {
-+ struct epitem *epi;
-+ epi = ep_find(ep, tobj->o_obj, efi.cpt_fd);
-+ if (epi) {
-+ epi->revents = efi.cpt_revents;
-+ if (efi.cpt_ready) {
-+ unsigned long flags;
-+ write_lock_irqsave(&ep->lock, flags);
-+ if (list_empty(&epi->rdllink))
-+ list_add_tail(&epi->rdllink, &ep->rdllist);
-+ write_unlock_irqrestore(&ep->lock, flags);
-+ }
-+ ep_release_epitem(epi);
-+ }
-+ }
-+ up_write(&ep->sem);
-+ if (err)
-+ break;
-+ pos += efi.cpt_next;
-+ }
-+ return err;
-+}
-+
-+int rst_eventpoll(cpt_context_t *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_EPOLL];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_EPOLL || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ cpt_object_t *obj;
-+ struct cpt_epoll_image *ebuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_EPOLL, sec, ebuf, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, ebuf->cpt_file, ctx);
-+ if (obj == NULL) {
-+ eprintk_ctx("cannot find epoll file object\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ err = restore_one_epoll(obj, sec, ebuf, ctx);
-+ cpt_release_buf(ctx);
-+ if (err)
-+ return err;
-+ sec += ebuf->cpt_next;
-+ }
-+
-+ return 0;
-+
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_files.c linux-2.6.16-026test009/kernel/cpt/rst_files.c
---- linux-2.6.16.orig/kernel/cpt/rst_files.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_files.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,1447 @@
-+/*
-+ *
-+ * kernel/cpt/rst_files.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/mount.h>
-+#include <linux/tty.h>
-+#include <linux/namei.h>
-+#include <linux/vmalloc.h>
-+#include <linux/smp_lock.h>
-+#include <linux/vmalloc.h>
-+#include <linux/pagemap.h>
-+#include <asm/uaccess.h>
-+#include <ub/ub_mem.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_kernel.h"
-+#include "cpt_fsmagic.h"
-+
-+#include "cpt_syscalls.h"
-+
-+
-+struct filejob {
-+ struct filejob *next;
-+ int pid;
-+ loff_t fdi;
-+};
-+
-+static int rst_filejob_queue(loff_t pos, cpt_context_t *ctx)
-+{
-+ struct filejob *j;
-+
-+ j = kmalloc(sizeof(*j), GFP_KERNEL);
-+ if (j == NULL)
-+ return -ENOMEM;
-+ j->pid = current->pid;
-+ j->fdi = pos;
-+ j->next = ctx->filejob_queue;
-+ ctx->filejob_queue = j;
-+ return 0;
-+}
-+
-+static void _anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
-+{
-+ struct page *page = buf->page;
-+
-+ if (info->tmp_page) {
-+ __free_page(page);
-+ } else {
-+ info->tmp_page = page;
-+ }
-+ module_put(THIS_MODULE);
-+}
-+
-+static void *_anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
-+{
-+ return kmap(buf->page);
-+}
-+
-+static void _anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
-+{
-+ kunmap(buf->page);
-+}
-+
-+static struct pipe_buf_operations _anon_pipe_buf_ops = {
-+ .can_merge = 1,
-+ .map = _anon_pipe_buf_map,
-+ .unmap = _anon_pipe_buf_unmap,
-+ .release = _anon_pipe_buf_release,
-+};
-+
-+/* Sorta ugly... Multiple readers/writers of named pipe rewrite buffer
-+ * many times. We need to mark it in CPT_OBJ_INODE table in some way.
-+ */
-+static int fixup_pipe_data(struct file *file, struct cpt_file_image *fi,
-+ struct cpt_context *ctx)
-+{
-+ struct inode *ino = file->f_dentry->d_inode;
-+ struct cpt_inode_image ii;
-+ struct cpt_obj_bits b;
-+ struct pipe_inode_info *info;
-+ int err;
-+ int count;
-+
-+ if (!S_ISFIFO(ino->i_mode)) {
-+ eprintk_ctx("fixup_pipe_data: not a pipe %Ld\n", fi->cpt_inode);
-+ return -EINVAL;
-+ }
-+ if (fi->cpt_inode == CPT_NULL)
-+ return 0;
-+
-+ err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
-+ if (err)
-+ return err;
-+
-+ if (ii.cpt_next <= ii.cpt_hdrlen)
-+ return 0;
-+
-+ err = rst_get_object(CPT_OBJ_BITS, fi->cpt_inode + ii.cpt_hdrlen, &b, ctx);
-+ if (err)
-+ return err;
-+
-+ if (b.cpt_size == 0)
-+ return 0;
-+
-+ mutex_lock(PIPE_MUTEX(*ino));
-+ info = ino->i_pipe;
-+ if (info->nrbufs) {
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+ eprintk("pipe buffer is restored already\n");
-+ return -EINVAL;
-+ }
-+ info->curbuf = 0;
-+ count = 0;
-+ while (count < b.cpt_size) {
-+ struct pipe_buffer *buf = info->bufs + info->nrbufs;
-+ void * addr;
-+ int chars;
-+
-+ chars = b.cpt_size - count;
-+ if (chars > PAGE_SIZE)
-+ chars = PAGE_SIZE;
-+ if (!try_module_get(THIS_MODULE)) {
-+ err = -EBUSY;
-+ break;
-+ }
-+
-+ buf->page = alloc_page(GFP_HIGHUSER);
-+ if (buf->page == NULL) {
-+ err = -ENOMEM;
-+ break;
-+ }
-+ buf->ops = &_anon_pipe_buf_ops;
-+ buf->offset = 0;
-+ buf->len = chars;
-+ info->nrbufs++;
-+ addr = kmap(buf->page);
-+ err = ctx->pread(addr, chars, ctx,
-+ fi->cpt_inode + ii.cpt_hdrlen + b.cpt_hdrlen + count);
-+ if (err)
-+ break;
-+ count += chars;
-+ }
-+ mutex_unlock(PIPE_MUTEX(*ino));
-+
-+ return err;
-+}
-+
-+static int make_flags(struct cpt_file_image *fi)
-+{
-+ int flags = O_NOFOLLOW;
-+ switch (fi->cpt_mode&(FMODE_READ|FMODE_WRITE)) {
-+ case FMODE_READ|FMODE_WRITE:
-+ flags |= O_RDWR; break;
-+ case FMODE_WRITE:
-+ flags |= O_WRONLY; break;
-+ case FMODE_READ:
-+ flags |= O_RDONLY; break;
-+ default: break;
-+ }
-+ flags |= fi->cpt_flags&~(O_ACCMODE|O_CREAT|O_TRUNC|O_EXCL|FASYNC);
-+ flags |= O_NONBLOCK|O_NOCTTY;
-+ return flags;
-+}
-+
-+static struct file *open_pipe(char *name,
-+ struct cpt_file_image *fi,
-+ unsigned flags,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+ struct cpt_inode_image ii;
-+ struct file *rf, *wf;
-+
-+ err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
-+ if (err)
-+ return ERR_PTR(err);
-+
-+ if (ii.cpt_sb == FSMAGIC_PIPEFS) {
-+ int pfd[2];
-+
-+ if ((err = sc_pipe(pfd)) < 0)
-+ return ERR_PTR(err);
-+
-+ rf = fcheck(pfd[0]);
-+ wf = fcheck(pfd[1]);
-+ get_file(rf);
-+ get_file(wf);
-+ sc_close(pfd[0]);
-+ sc_close(pfd[1]);
-+
-+ if (fi->cpt_mode&FMODE_READ) {
-+ struct file *tf;
-+ tf = wf; wf = rf; rf = tf;
-+ }
-+ } else {
-+ if (fi->cpt_mode&FMODE_READ) {
-+ rf = filp_open(name, flags, 0);
-+ if (IS_ERR(rf)) {
-+ dprintk_ctx("filp_open\n");
-+ return rf;
-+ }
-+ dprintk_ctx(CPT_FID "open RDONLY fifo ino %Ld %p %x\n", CPT_TID(current), fi->cpt_inode, rf, rf->f_dentry->d_inode->i_mode);
-+ return rf;
-+ }
-+
-+ dprintk_ctx(CPT_FID "open WRONLY fifo ino %Ld\n", CPT_TID(current), fi->cpt_inode);
-+
-+ rf = filp_open(name, O_RDWR|O_NONBLOCK, 0);
-+ if (IS_ERR(rf))
-+ return rf;
-+ wf = dentry_open(dget(rf->f_dentry),
-+ mntget(rf->f_vfsmnt), flags);
-+ }
-+
-+ /* Add pipe inode to obj table. */
-+ obj = cpt_object_add(CPT_OBJ_INODE, wf->f_dentry->d_inode, ctx);
-+ if (obj == NULL) {
-+ fput(rf); fput(wf);
-+ return ERR_PTR(-ENOMEM);
-+ }
-+ cpt_obj_setpos(obj, fi->cpt_inode, ctx);
-+ obj->o_parent = rf;
-+
-+ /* Add another side of pipe to obj table, it will not be used
-+ * (o_pos = PT_NULL), another processes opeining pipe will find
-+ * inode and open it with dentry_open(). */
-+ obj = cpt_object_add(CPT_OBJ_FILE, rf, ctx);
-+ if (obj == NULL) {
-+ fput(wf);
-+ return ERR_PTR(-ENOMEM);
-+ }
-+ return wf;
-+}
-+
-+static struct file *open_special(struct cpt_file_image *fi,
-+ unsigned flags,
-+ int deleted,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_inode_image *ii;
-+ struct file *file;
-+
-+ /* Directories and named pipes are not special actually */
-+ if (S_ISDIR(fi->cpt_i_mode) || S_ISFIFO(fi->cpt_i_mode))
-+ return NULL;
-+
-+ /* No support for block devices at the moment. */
-+ if (S_ISBLK(fi->cpt_i_mode))
-+ return ERR_PTR(-EINVAL);
-+
-+ if (S_ISSOCK(fi->cpt_i_mode)) {
-+ eprintk_ctx("bug: socket is not open\n");
-+ return ERR_PTR(-EINVAL);
-+ }
-+
-+ /* Support only (some) character devices at the moment. */
-+ if (!S_ISCHR(fi->cpt_i_mode))
-+ return ERR_PTR(-EINVAL);
-+
-+ ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
-+ if (ii == NULL)
-+ return ERR_PTR(-ENOMEM);
-+
-+ /* Do not worry about this right now. /dev/null,zero,*random are here.
-+ * To prohibit at least /dev/mem?
-+ */
-+ if (MAJOR(ii->cpt_rdev) == MEM_MAJOR) {
-+ kfree(ii);
-+ return NULL;
-+ }
-+
-+ file = rst_open_tty(fi, ii, flags, ctx);
-+ kfree(ii);
-+ return file;
-+}
-+
-+static int restore_posix_lock(struct file *file, struct cpt_flock_image *fli, cpt_context_t *ctx)
-+{
-+ struct file_lock lock;
-+ cpt_object_t *obj;
-+
-+ memset(&lock, 0, sizeof(lock));
-+ lock.fl_type = fli->cpt_type;
-+ lock.fl_flags = fli->cpt_flags & ~FL_SLEEP;
-+ lock.fl_start = fli->cpt_start;
-+ lock.fl_end = fli->cpt_end;
-+ obj = lookup_cpt_obj_byindex(CPT_OBJ_FILES, fli->cpt_owner, ctx);
-+ if (!obj) {
-+ eprintk_ctx("unknown lock owner %d\n", (int)fli->cpt_owner);
-+ return -EINVAL;
-+ }
-+ lock.fl_owner = obj->o_obj;
-+ lock.fl_pid = vpid_to_pid(fli->cpt_pid);
-+ if (lock.fl_pid < 0) {
-+ eprintk_ctx("unknown lock pid %d\n", lock.fl_pid);
-+ return -EINVAL;
-+ }
-+ lock.fl_file = file;
-+
-+ if (lock.fl_owner == NULL)
-+ eprintk_ctx("no lock owner\n");
-+ return posix_lock_file(file, &lock);
-+}
-+
-+static int restore_flock(struct file *file, struct cpt_flock_image *fli,
-+ cpt_context_t *ctx)
-+{
-+ int cmd, err, fd;
-+ fd = get_unused_fd();
-+ if (fd < 0) {
-+ eprintk_ctx("BSD flock cannot be restored\n");
-+ return fd;
-+ }
-+ get_file(file);
-+ fd_install(fd, file);
-+ if (fli->cpt_type == F_RDLCK) {
-+ cmd = LOCK_SH;
-+ } else if (fli->cpt_type == F_WRLCK) {
-+ cmd = LOCK_EX;
-+ } else {
-+ eprintk_ctx("flock flavor is unknown: %u\n", fli->cpt_type);
-+ sc_close(fd);
-+ return -EINVAL;
-+ }
-+
-+ err = sc_flock(fd, LOCK_NB | cmd);
-+ sc_close(fd);
-+ return err;
-+}
-+
-+
-+static int fixup_posix_locks(struct file *file,
-+ struct cpt_file_image *fi,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t end;
-+ struct cpt_flock_image fli;
-+
-+ end = pos + fi->cpt_next;
-+ pos += fi->cpt_hdrlen;
-+ while (pos < end) {
-+ err = rst_get_object(-1, pos, &fli, ctx);
-+ if (err)
-+ return err;
-+ if (fli.cpt_object == CPT_OBJ_FLOCK &&
-+ (fli.cpt_flags&FL_POSIX)) {
-+ err = restore_posix_lock(file, &fli, ctx);
-+ if (err)
-+ return err;
-+ dprintk_ctx("posix lock restored\n");
-+ }
-+ pos += fli.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+int rst_posix_locks(struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+ struct cpt_file_image fi;
-+
-+ if (obj->o_pos == CPT_NULL)
-+ continue;
-+
-+ err = rst_get_object(CPT_OBJ_FILE, obj->o_pos, &fi, ctx);
-+ if (err < 0)
-+ return err;
-+ if (fi.cpt_next > fi.cpt_hdrlen)
-+ fixup_posix_locks(file, &fi, obj->o_pos, ctx);
-+ }
-+ return 0;
-+}
-+
-+static int fixup_flocks(struct file *file,
-+ struct cpt_file_image *fi,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t end;
-+ struct cpt_flock_image fli;
-+
-+ end = pos + fi->cpt_next;
-+ pos += fi->cpt_hdrlen;
-+ while (pos < end) {
-+ err = rst_get_object(-1, pos, &fli, ctx);
-+ if (err)
-+ return err;
-+ if (fli.cpt_object == CPT_OBJ_FLOCK &&
-+ (fli.cpt_flags&FL_FLOCK)) {
-+ err = restore_flock(file, &fli, ctx);
-+ if (err)
-+ return err;
-+ dprintk_ctx("bsd lock restored\n");
-+ }
-+ pos += fli.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+
-+static int fixup_reg_data(struct file *file, loff_t pos, loff_t end,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_page_block pgb;
-+ ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
-+
-+ do_write = file->f_op->write;
-+ if (do_write == NULL) {
-+ eprintk_ctx("no write method. Cannot restore contents of the file.\n");
-+ return -EINVAL;
-+ }
-+
-+ atomic_inc(&file->f_count);
-+
-+ while (pos < end) {
-+ loff_t opos;
-+ loff_t ipos;
-+ int count;
-+
-+ err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
-+ if (err)
-+ goto out;
-+ dprintk_ctx("restoring file data block: %08x-%08x\n",
-+ (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
-+ ipos = pos + pgb.cpt_hdrlen;
-+ opos = pgb.cpt_start;
-+ count = pgb.cpt_end-pgb.cpt_start;
-+ while (count > 0) {
-+ mm_segment_t oldfs;
-+ int copy = count;
-+
-+ if (copy > PAGE_SIZE)
-+ copy = PAGE_SIZE;
-+ (void)cpt_get_buf(ctx);
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
-+ set_fs(oldfs);
-+ if (err) {
-+ __cpt_release_buf(ctx);
-+ goto out;
-+ }
-+ if (!(file->f_mode & FMODE_WRITE) ||
-+ (file->f_flags&O_DIRECT)) {
-+ fput(file);
-+ file = dentry_open(dget(file->f_dentry),
-+ mntget(file->f_vfsmnt), O_WRONLY);
-+ if (IS_ERR(file)) {
-+ __cpt_release_buf(ctx);
-+ return PTR_ERR(file);
-+ }
-+ }
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ ipos += copy;
-+ err = do_write(file, ctx->tmpbuf, copy, &opos);
-+ set_fs(oldfs);
-+ __cpt_release_buf(ctx);
-+ if (err != copy) {
-+ if (err >= 0)
-+ err = -EIO;
-+ goto out;
-+ }
-+ count -= copy;
-+ }
-+ pos += pgb.cpt_next;
-+ }
-+ err = 0;
-+
-+out:
-+ fput(file);
-+ return err;
-+}
-+
-+
-+static int fixup_file_content(struct file **file_p, struct cpt_file_image *fi,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_inode_image ii;
-+ struct file *file = *file_p;
-+ struct iattr newattrs;
-+
-+ if (!S_ISREG(fi->cpt_i_mode))
-+ return 0;
-+
-+ err = rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, &ii, ctx);
-+ if (err)
-+ return err;
-+
-+ if (file == NULL) {
-+ file = shmem_file_setup("dev/zero", ii.cpt_size, 0);
-+ if (IS_ERR(file))
-+ return PTR_ERR(file);
-+ *file_p = file;
-+ }
-+
-+ if (ii.cpt_next > ii.cpt_hdrlen) {
-+ err = fixup_reg_data(file, fi->cpt_inode+ii.cpt_hdrlen,
-+ fi->cpt_inode+ii.cpt_next, ctx);
-+ if (err)
-+ return err;
-+ }
-+
-+ mutex_lock(&file->f_dentry->d_inode->i_mutex);
-+ /* stage 1 - update size like do_truncate does */
-+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-+ newattrs.ia_size = ii.cpt_size;
-+ cpt_timespec_import(&newattrs.ia_ctime, ii.cpt_ctime);
-+ err = notify_change(file->f_dentry, &newattrs);
-+ if (err)
-+ goto out;
-+
-+ /* stage 2 - update times */
-+ newattrs.ia_valid = ATTR_MTIME | ATTR_ATIME |
-+ ATTR_ATIME_SET | ATTR_MTIME_SET;
-+ cpt_timespec_import(&newattrs.ia_atime, ii.cpt_atime);
-+ cpt_timespec_import(&newattrs.ia_mtime, ii.cpt_mtime);
-+ err = notify_change(file->f_dentry, &newattrs);
-+
-+out:
-+ mutex_unlock(&file->f_dentry->d_inode->i_mutex);
-+ return err;
-+}
-+
-+static int fixup_file_flags(struct file *file, struct cpt_file_image *fi,
-+ int was_dentry_open, loff_t pos,
-+ cpt_context_t *ctx)
-+{
-+ if (fi->cpt_pos != file->f_pos) {
-+ int err = -ESPIPE;
-+ if (file->f_op->llseek)
-+ err = file->f_op->llseek(file, fi->cpt_pos, 0);
-+ if (err < 0) {
-+ dprintk_ctx("file %Ld lseek %Ld - %Ld\n", pos, file->f_pos, fi->cpt_pos);
-+ file->f_pos = fi->cpt_pos;
-+ }
-+ }
-+ file->f_uid = fi->cpt_uid;
-+ file->f_gid = fi->cpt_gid;
-+ file->f_owner.pid = 0;
-+ if (fi->cpt_fown_pid) {
-+ file->f_owner.pid = comb_vpid_to_pid(fi->cpt_fown_pid);
-+ if (file->f_owner.pid == 0) {
-+ wprintk_ctx("fixup_file_flags: owner %d does not exist anymore\n", file->f_owner.pid);
-+ return -EINVAL;
-+ }
-+ }
-+ file->f_owner.uid = fi->cpt_fown_uid;
-+ file->f_owner.euid = fi->cpt_fown_euid;
-+ file->f_owner.signum = fi->cpt_fown_signo;
-+
-+ if (file->f_mode != fi->cpt_mode) {
-+ if (was_dentry_open &&
-+ ((file->f_mode^fi->cpt_mode)&(FMODE_PREAD|FMODE_LSEEK))) {
-+ file->f_mode &= ~(FMODE_PREAD|FMODE_LSEEK);
-+ file->f_mode |= fi->cpt_mode&(FMODE_PREAD|FMODE_LSEEK);
-+ }
-+ if (file->f_mode != fi->cpt_mode)
-+ wprintk_ctx("file %ld mode mismatch %08x %08x\n", (long)pos, file->f_mode, fi->cpt_mode);
-+ }
-+ if (file->f_flags != fi->cpt_flags) {
-+ if (!(fi->cpt_flags&O_NOFOLLOW))
-+ file->f_flags &= ~O_NOFOLLOW;
-+ if ((file->f_flags^fi->cpt_flags)&O_NONBLOCK) {
-+ file->f_flags &= ~O_NONBLOCK;
-+ file->f_flags |= fi->cpt_flags&O_NONBLOCK;
-+ }
-+ if (fi->cpt_flags&FASYNC) {
-+ if (fi->cpt_fown_fd == -1) {
-+ wprintk_ctx("No fd for FASYNC\n");
-+ return -EINVAL;
-+ } else if (file->f_op && file->f_op->fasync) {
-+ if (file->f_op->fasync(fi->cpt_fown_fd, file, 1) < 0) {
-+ wprintk_ctx("FASYNC problem\n");
-+ return -EINVAL;
-+ } else {
-+ file->f_flags |= FASYNC;
-+ }
-+ }
-+ }
-+ if (file->f_flags != fi->cpt_flags) {
-+ eprintk_ctx("file %ld flags mismatch %08x %08x\n", (long)pos, file->f_flags, fi->cpt_flags);
-+ return -EINVAL;
-+ }
-+ }
-+ return 0;
-+}
-+
-+static struct file *
-+open_deleted(char *name, unsigned flags, struct cpt_file_image *fi,
-+ cpt_context_t *ctx)
-+{
-+ struct file * file;
-+ char *suffix = NULL;
-+ int attempt = 0;
-+ int tmp_pass = 0;
-+ mode_t mode = fi->cpt_i_mode;
-+
-+ /* Strip (deleted) part... */
-+ if (strlen(name) > strlen(" (deleted)")) {
-+ if (strcmp(name + strlen(name) - strlen(" (deleted)"), " (deleted)") == 0) {
-+ suffix = &name[strlen(name) - strlen(" (deleted)")];
-+ *suffix = 0;
-+ } else if (memcmp(name, "(deleted) ", strlen("(deleted) ")) == 0) {
-+ memmove(name, name + strlen("(deleted) "), strlen(name) - strlen(" (deleted)") + 1);
-+ suffix = name + strlen(name);
-+ }
-+ }
-+
-+try_again:
-+ for (;;) {
-+ if (attempt) {
-+ if (attempt > 1000) {
-+ eprintk_ctx("open_deleted: failed after %d attempts\n", attempt);
-+ return ERR_PTR(-EEXIST);
-+ }
-+ if (suffix == NULL) {
-+ eprintk_ctx("open_deleted: no suffix\n");
-+ return ERR_PTR(-EEXIST);
-+ }
-+ sprintf(suffix, ".%08x", (unsigned)((xtime.tv_nsec>>10)+attempt));
-+ }
-+ attempt++;
-+
-+ if (S_ISFIFO(mode)) {
-+ int err;
-+ err = sc_mknod(name, S_IFIFO|(mode&017777), 0);
-+ if (err == -EEXIST)
-+ continue;
-+ if (err < 0 && !tmp_pass)
-+ goto change_dir;
-+ if (err < 0)
-+ return ERR_PTR(err);
-+ file = open_pipe(name, fi, flags, ctx);
-+ sc_unlink(name);
-+ } else if (S_ISCHR(mode)) {
-+ int err;
-+ struct cpt_inode_image *ii;
-+
-+ ii = __rst_get_object(CPT_OBJ_INODE, fi->cpt_inode, ctx);
-+ if (ii == NULL)
-+ return ERR_PTR(-ENOMEM);
-+ err = sc_mknod(name, S_IFCHR|(mode&017777), new_encode_dev(ii->cpt_rdev));
-+ kfree(ii);
-+ if (err == -EEXIST)
-+ continue;
-+ if (err < 0 && !tmp_pass)
-+ goto change_dir;
-+ if (err < 0)
-+ return ERR_PTR(err);
-+ file = filp_open(name, flags, mode&017777);
-+ sc_unlink(name);
-+ } else if (S_ISDIR(mode)) {
-+ int err;
-+ err = sc_mkdir(name, mode&017777);
-+ if (err == -EEXIST)
-+ continue;
-+ if (err < 0 && !tmp_pass)
-+ goto change_dir;
-+ if (err < 0)
-+ return ERR_PTR(err);
-+ file = filp_open(name, flags, mode&017777);
-+ sc_rmdir(name);
-+ } else {
-+ file = filp_open(name, O_CREAT|O_EXCL|flags, mode&017777);
-+ if (IS_ERR(file)) {
-+ if (PTR_ERR(file) == -EEXIST)
-+ continue;
-+ if (!tmp_pass)
-+ goto change_dir;
-+ } else {
-+ sc_unlink(name);
-+ }
-+ }
-+ break;
-+ }
-+
-+ if (IS_ERR(file)) {
-+ eprintk_ctx("filp_open %s: %ld\n", name, PTR_ERR(file));
-+ return file;
-+ } else {
-+ dprintk_ctx("deleted file created as %s, %p, %x\n", name, file, file->f_dentry->d_inode->i_mode);
-+ }
-+ return file;
-+
-+change_dir:
-+ sprintf(name, "/tmp/rst%u", current->pid);
-+ suffix = name + strlen(name);
-+ attempt = 1;
-+ tmp_pass = 1;
-+ goto try_again;
-+}
-+
-+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx)
-+{
-+ int err;
-+ int was_dentry_open = 0;
-+ cpt_object_t *obj;
-+ cpt_object_t *iobj;
-+ struct cpt_file_image fi;
-+ __u8 *name = NULL;
-+ struct file *file;
-+ int flags;
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
-+ if (obj) {
-+ file = obj->o_obj;
-+ if (obj->o_index >= 0) {
-+ dprintk_ctx("file is attached to a socket\n");
-+ err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
-+ if (err < 0)
-+ goto err_out;
-+ fixup_file_flags(file, &fi, 0, pos, ctx);
-+ }
-+ get_file(file);
-+ return file;
-+ }
-+
-+ err = rst_get_object(CPT_OBJ_FILE, pos, &fi, ctx);
-+ if (err < 0)
-+ goto err_out;
-+
-+ flags = make_flags(&fi);
-+
-+ /* Easy way, inode has been already open. */
-+ if (fi.cpt_inode != CPT_NULL &&
-+ !(fi.cpt_lflags & CPT_DENTRY_CLONING) &&
-+ (iobj = lookup_cpt_obj_bypos(CPT_OBJ_INODE, fi.cpt_inode, ctx)) != NULL &&
-+ iobj->o_parent) {
-+ struct file *filp = iobj->o_parent;
-+ file = dentry_open(dget(filp->f_dentry),
-+ mntget(filp->f_vfsmnt), flags);
-+ dprintk_ctx("rst_file: file obtained by dentry_open\n");
-+ was_dentry_open = 1;
-+ goto map_file;
-+ }
-+
-+ if (fi.cpt_next > fi.cpt_hdrlen)
-+ name = rst_get_name(pos + sizeof(fi), ctx);
-+
-+ if (fi.cpt_lflags == CPT_DENTRY_DELETED) {
-+ if (fi.cpt_inode == CPT_NULL) {
-+ eprintk_ctx("deleted file and no inode.\n");
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+
-+ /* One very special case... */
-+ if (S_ISREG(fi.cpt_i_mode) &&
-+ (!name || !name[0] || strcmp(name, "/dev/zero (deleted)") == 0)) {
-+ /* MAP_ANON|MAP_SHARED mapping.
-+ * kernel makes this damn ugly way, when file which
-+ * is passed to mmap by user does not match
-+ * file finally attached to VMA. Ok, rst_mm
-+ * has to take care of this. Otherwise, it will fail.
-+ */
-+ file = NULL;
-+ } else if (S_ISREG(fi.cpt_i_mode) ||
-+ S_ISCHR(fi.cpt_i_mode) ||
-+ S_ISFIFO(fi.cpt_i_mode) ||
-+ S_ISDIR(fi.cpt_i_mode)) {
-+ if (S_ISCHR(fi.cpt_i_mode)) {
-+ file = open_special(&fi, flags, 1, ctx);
-+ if (file != NULL)
-+ goto map_file;
-+ }
-+ file = open_deleted(name, flags, &fi, ctx);
-+ if (IS_ERR(file))
-+ goto out;
-+ } else {
-+ eprintk_ctx("not a regular deleted file.\n");
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+
-+ err = fixup_file_content(&file, &fi, ctx);
-+ if (err)
-+ goto err_put;
-+ goto map_file;
-+ } else {
-+ if (!name || !name[0]) {
-+ eprintk_ctx("no name for file?\n");
-+ err = -EINVAL;
-+ goto err_out;
-+ }
-+ if ((fi.cpt_lflags & CPT_DENTRY_EPOLL) &&
-+ (file = cpt_open_epolldev(&fi, flags, ctx)) != NULL)
-+ goto map_file;
-+ if (S_ISFIFO(fi.cpt_i_mode) &&
-+ (file = open_pipe(name, &fi, flags, ctx)) != NULL)
-+ goto map_file;
-+ if (!S_ISREG(fi.cpt_i_mode) &&
-+ (file = open_special(&fi, flags, 0, ctx)) != NULL)
-+ goto map_file;
-+ }
-+
-+ file = filp_open(name, flags, 0);
-+
-+map_file:
-+ if (!IS_ERR(file)) {
-+ fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
-+
-+ if (S_ISFIFO(fi.cpt_i_mode) && !was_dentry_open) {
-+ err = fixup_pipe_data(file, &fi, ctx);
-+ if (err)
-+ goto err_put;
-+ }
-+
-+ obj = cpt_object_get(CPT_OBJ_FILE, file, ctx);
-+ if (!obj) {
-+ obj = cpt_object_add(CPT_OBJ_FILE, file, ctx);
-+ if (obj)
-+ get_file(file);
-+ }
-+ if (obj)
-+ cpt_obj_setpos(obj, pos, ctx);
-+
-+ obj = cpt_object_add(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
-+ if (obj) {
-+ cpt_obj_setpos(obj, fi.cpt_inode, ctx);
-+ if (!obj->o_parent || fi.cpt_lflags != CPT_DENTRY_DELETED)
-+ obj->o_parent = file;
-+ }
-+
-+ if (fi.cpt_next > fi.cpt_hdrlen) {
-+ err = fixup_flocks(file, &fi, pos, ctx);
-+ if (err)
-+ goto err_put;
-+ }
-+ } else {
-+ if (fi.cpt_lflags & CPT_DENTRY_PROC) {
-+ dprintk_ctx("rst_file /proc delayed\n");
-+ file = NULL;
-+ }
-+ }
-+
-+out:
-+ if (name)
-+ rst_put_name(name, ctx);
-+ return file;
-+
-+err_put:
-+ if (file)
-+ fput(file);
-+err_out:
-+ if (name)
-+ rst_put_name(name, ctx);
-+ return ERR_PTR(err);
-+}
-+
-+
-+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ __u32 flag = 0;
-+
-+ if (ti->cpt_files == CPT_NULL ||
-+ lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx))
-+ flag |= CLONE_FILES;
-+ if (ti->cpt_fs == CPT_NULL ||
-+ lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx))
-+ flag |= CLONE_FS;
-+ return flag;
-+}
-+
-+static void local_close_files(struct files_struct * files)
-+{
-+ int i, j;
-+
-+ j = 0;
-+ for (;;) {
-+ unsigned long set;
-+ i = j * __NFDBITS;
-+ if (i >= files->fdt->max_fdset || i >= files->fdt->max_fds)
-+ break;
-+ set = files->fdt->open_fds->fds_bits[j];
-+ while (set) {
-+ if (set & 1) {
-+ struct file * file = xchg(&files->fdt->fd[i], NULL);
-+ if (file)
-+ filp_close(file, files);
-+ }
-+ i++;
-+ set >>= 1;
-+ }
-+ files->fdt->open_fds->fds_bits[j] = 0;
-+ files->fdt->close_on_exec->fds_bits[j] = 0;
-+ j++;
-+ }
-+}
-+
-+extern int expand_fdtable(struct files_struct *files, int nr);
-+
-+
-+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ struct cpt_files_struct_image fi;
-+ struct files_struct *f = current->files;
-+ cpt_object_t *obj;
-+ loff_t pos, endpos;
-+ int err;
-+
-+ if (ti->cpt_files == CPT_NULL) {
-+ current->files = NULL;
-+ if (f)
-+ put_files_struct(f);
-+ return 0;
-+ }
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
-+ if (obj) {
-+ if (obj->o_obj != f) {
-+ put_files_struct(f);
-+ f = obj->o_obj;
-+ atomic_inc(&f->count);
-+ current->files = f;
-+ }
-+ return 0;
-+ }
-+
-+ err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
-+ if (err)
-+ return err;
-+
-+ local_close_files(f);
-+
-+ if (fi.cpt_max_fds > f->fdt->max_fds) {
-+ spin_lock(&f->file_lock);
-+ err = expand_fdtable(f, fi.cpt_max_fds-1);
-+ spin_unlock(&f->file_lock);
-+ if (err)
-+ return err;
-+ }
-+
-+ pos = ti->cpt_files + fi.cpt_hdrlen;
-+ endpos = ti->cpt_files + fi.cpt_next;
-+ while (pos < endpos) {
-+ struct cpt_fd_image fdi;
-+ struct file *filp;
-+
-+ err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
-+ if (err)
-+ return err;
-+ filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
-+ if (IS_ERR(filp)) {
-+ eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
-+ return PTR_ERR(filp);
-+ }
-+ if (filp == NULL) {
-+ int err = rst_filejob_queue(pos, ctx);
-+ if (err)
-+ return err;
-+ } else {
-+ if (fdi.cpt_fd >= f->fdt->max_fds) BUG();
-+ f->fdt->fd[fdi.cpt_fd] = filp;
-+ FD_SET(fdi.cpt_fd, f->fdt->open_fds);
-+ if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
-+ FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
-+ }
-+ pos += fdi.cpt_next;
-+ }
-+ f->fdt->next_fd = fi.cpt_next_fd;
-+
-+ obj = cpt_object_add(CPT_OBJ_FILES, f, ctx);
-+ if (obj) {
-+ cpt_obj_setpos(obj, ti->cpt_files, ctx);
-+ cpt_obj_setindex(obj, fi.cpt_index, ctx);
-+ }
-+ return 0;
-+}
-+
-+int rst_do_filejobs(cpt_context_t *ctx)
-+{
-+ struct filejob *j;
-+
-+ while ((j = ctx->filejob_queue) != NULL) {
-+ int err;
-+ task_t *tsk;
-+ struct cpt_fd_image fdi;
-+ struct file *filp;
-+
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_ve(j->pid);
-+ if (tsk)
-+ get_task_struct(tsk);
-+ read_unlock(&tasklist_lock);
-+ if (!tsk)
-+ return -EINVAL;
-+
-+ err = rst_get_object(CPT_OBJ_FILEDESC, j->fdi, &fdi, ctx);
-+ if (err) {
-+ put_task_struct(tsk);
-+ return err;
-+ }
-+
-+ if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
-+ if (tsk->files->fdt->fd[fdi.cpt_fd] ||
-+ FD_ISSET(fdi.cpt_fd, tsk->files->fdt->open_fds)) {
-+ eprintk_ctx("doing filejob %Ld: fd is busy\n", j->fdi);
-+ put_task_struct(tsk);
-+ return -EBUSY;
-+ }
-+
-+ filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
-+ if (IS_ERR(filp)) {
-+ eprintk_ctx("rst_do_filejobs: 1: %ld %Lu\n", PTR_ERR(filp), fdi.cpt_file);
-+ put_task_struct(tsk);
-+ return PTR_ERR(filp);
-+ }
-+ if (fdi.cpt_fd >= tsk->files->fdt->max_fds) BUG();
-+ tsk->files->fdt->fd[fdi.cpt_fd] = filp;
-+ FD_SET(fdi.cpt_fd, tsk->files->fdt->open_fds);
-+ if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
-+ FD_SET(fdi.cpt_fd, tsk->files->fdt->close_on_exec);
-+
-+ dprintk_ctx("filejob %Ld done\n", j->fdi);
-+
-+ put_task_struct(tsk);
-+ ctx->filejob_queue = j->next;
-+ kfree(j);
-+ }
-+ return 0;
-+}
-+
-+void rst_flush_filejobs(cpt_context_t *ctx)
-+{
-+ struct filejob *j;
-+
-+ while ((j = ctx->filejob_queue) != NULL) {
-+ ctx->filejob_queue = j->next;
-+ kfree(j);
-+ }
-+}
-+
-+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ struct fs_struct *f = current->fs;
-+ cpt_object_t *obj;
-+
-+ if (ti->cpt_fs == CPT_NULL) {
-+ exit_fs(current);
-+ return 0;
-+ }
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FS, ti->cpt_fs, ctx);
-+ if (obj) {
-+ if (obj->o_obj != f) {
-+ exit_fs(current);
-+ f = obj->o_obj;
-+ atomic_inc(&f->count);
-+ current->fs = f;
-+ }
-+ return 0;
-+ }
-+
-+ /* Do _not_ restore root. Image contains absolute pathnames.
-+ * So, we fix it in context of rst process.
-+ */
-+
-+ obj = cpt_object_add(CPT_OBJ_FS, f, ctx);
-+ if (obj)
-+ cpt_obj_setpos(obj, ti->cpt_fs, ctx);
-+
-+ return 0;
-+}
-+
-+static int get_dir(struct dentry **dp, struct vfsmount **mp,
-+ loff_t *pos, struct cpt_context *ctx)
-+{
-+ struct cpt_file_image fi;
-+ struct file * file;
-+ int err;
-+
-+ err = rst_get_object(CPT_OBJ_FILE, *pos, &fi, ctx);
-+ if (err)
-+ return err;
-+
-+ file = rst_file(*pos, -1, ctx);
-+ if (IS_ERR(file))
-+ return PTR_ERR(file);
-+
-+ *dp = dget(file->f_dentry);
-+ *mp = mntget(file->f_vfsmnt);
-+ *pos += fi.cpt_next;
-+ fput(file);
-+ return 0;
-+}
-+
-+static void __set_fs_root(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct dentry *old_root;
-+ struct vfsmount *old_rootmnt;
-+ write_lock(&fs->lock);
-+ old_root = fs->root;
-+ old_rootmnt = fs->rootmnt;
-+ fs->rootmnt = mnt;
-+ fs->root = dentry;
-+ write_unlock(&fs->lock);
-+ if (old_root) {
-+ dput(old_root);
-+ mntput(old_rootmnt);
-+ }
-+}
-+
-+static void __set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct dentry *old_pwd;
-+ struct vfsmount *old_pwdmnt;
-+
-+ write_lock(&fs->lock);
-+ old_pwd = fs->pwd;
-+ old_pwdmnt = fs->pwdmnt;
-+ fs->pwdmnt = mnt;
-+ fs->pwd = dentry;
-+ write_unlock(&fs->lock);
-+
-+ if (old_pwd) {
-+ dput(old_pwd);
-+ mntput(old_pwdmnt);
-+ }
-+}
-+
-+
-+int rst_restore_fs(struct cpt_context *ctx)
-+{
-+ loff_t pos;
-+ cpt_object_t *obj;
-+ int err = 0;
-+
-+ for_each_object(obj, CPT_OBJ_FS) {
-+ struct cpt_fs_struct_image fi;
-+ struct fs_struct *fs = obj->o_obj;
-+ int i;
-+ struct dentry *d[3];
-+ struct vfsmount *m[3];
-+
-+ err = rst_get_object(CPT_OBJ_FS, obj->o_pos, &fi, ctx);
-+ if (err)
-+ return err;
-+
-+ fs->umask = fi.cpt_umask;
-+
-+ pos = obj->o_pos + fi.cpt_hdrlen;
-+ d[0] = d[1] = d[2] = NULL;
-+ m[0] = m[1] = m[2] = NULL;
-+ i = 0;
-+ while (pos < obj->o_pos + fi.cpt_next && i<3) {
-+ err = get_dir(d+i, m+i, &pos, ctx);
-+ if (err) {
-+ eprintk_ctx("cannot get_dir: %d", err);
-+ break;
-+ }
-+ i++;
-+ }
-+ if (d[0])
-+ __set_fs_root(fs, m[0], d[0]);
-+ if (d[1])
-+ __set_fs_pwd(fs, m[1], d[1]);
-+ if (d[2]) {
-+ struct dentry *olddentry;
-+ struct vfsmount *oldmnt;
-+ write_lock(&fs->lock);
-+ oldmnt = fs->altrootmnt;
-+ olddentry = fs->altroot;
-+ fs->altrootmnt = m[2];
-+ fs->altroot = d[2];
-+ write_unlock(&fs->lock);
-+
-+ if (olddentry) {
-+ dput(olddentry);
-+ mntput(oldmnt);
-+ }
-+ }
-+ }
-+ return err;
-+}
-+
-+int do_one_mount(char *mntpnt, char *mnttype, char *mntbind, unsigned long flags, struct cpt_context *ctx)
-+{
-+ int err;
-+
-+ if (mntbind && (strcmp(mntbind, "/") == 0 || strcmp(mntbind, "") == 0))
-+ mntbind = NULL;
-+
-+ if (mntbind)
-+ flags |= MS_BIND;
-+
-+ err = sc_mount(mntbind, mntpnt, mnttype, flags);
-+ if (err < 0) {
-+ eprintk_ctx("%d mounting %s %s %08lx\n", err, mntpnt, mnttype, flags);
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+static int undumptmpfs(void *arg)
-+{
-+ int i;
-+ int *pfd = arg;
-+ char *argv[] = { "tar", "x", "-C", "/", "-S", NULL };
-+
-+ if (pfd[0] != 0)
-+ sc_dup2(pfd[0], 0);
-+
-+ for (i=1; i<current->files->fdt->max_fds; i++)
-+ sc_close(i);
-+
-+ module_put(THIS_MODULE);
-+
-+ set_fs(KERNEL_DS);
-+ i = sc_execve("/bin/tar", argv, NULL);
-+ eprintk("failed to exec /bin/tar: %d\n", i);
-+ return -1;
-+}
-+
-+static int rst_restore_tmpfs(loff_t *pos, struct cpt_context * ctx)
-+{
-+ int err;
-+ int pfd[2];
-+ struct file *f;
-+ struct cpt_object_hdr v;
-+ int n;
-+ loff_t end;
-+ int pid;
-+
-+ err = rst_get_object(CPT_OBJ_NAME, *pos, &v, ctx);
-+ if (err < 0)
-+ return err;
-+
-+ err = sc_pipe(pfd);
-+ if (err < 0)
-+ return err;
-+ pid = err = local_kernel_thread(undumptmpfs, (void*)pfd, SIGCHLD, 0);
-+ if (err < 0)
-+ goto out;
-+ f = fget(pfd[1]);
-+ sc_close(pfd[1]);
-+ sc_close(pfd[0]);
-+
-+ ctx->file->f_pos = *pos + v.cpt_hdrlen;
-+ end = *pos + v.cpt_next;
-+ *pos += v.cpt_next;
-+ do {
-+ char buf[16];
-+ mm_segment_t oldfs;
-+
-+ n = end - ctx->file->f_pos;
-+ if (n > sizeof(buf))
-+ n = sizeof(buf);
-+
-+ if (ctx->read(buf, n, ctx))
-+ break;
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ f->f_op->write(f, buf, n, &f->f_pos);
-+ set_fs(oldfs);
-+ } while (ctx->file->f_pos < end);
-+
-+ fput(f);
-+
-+ clear_tsk_thread_flag(current,TIF_SIGPENDING);
-+
-+ if ((err = sc_waitx(pid, 0)) < 0)
-+ eprintk_ctx("wait4: %d\n", err);
-+
-+ return 0;
-+
-+out:
-+ if (pfd[1] >= 0)
-+ sc_close(pfd[1]);
-+ if (pfd[0] >= 0)
-+ sc_close(pfd[0]);
-+ return err;
-+}
-+
-+int restore_one_vfsmount(struct cpt_vfsmount_image *mi, loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t endpos;
-+
-+ endpos = pos + mi->cpt_next;
-+ pos += mi->cpt_hdrlen;
-+
-+ while (pos < endpos) {
-+ char *mntdev;
-+ char *mntpnt;
-+ char *mnttype;
-+ char *mntbind;
-+
-+ mntdev = __rst_get_name(&pos, ctx);
-+ mntpnt = __rst_get_name(&pos, ctx);
-+ mnttype = __rst_get_name(&pos, ctx);
-+ mntbind = __rst_get_name(&pos, ctx);
-+ err = -EINVAL;
-+ if (mnttype && mntpnt) {
-+ err = 0;
-+ if (strcmp(mntpnt, "/"))
-+ err = do_one_mount(mntpnt, mnttype, mntbind, mi->cpt_flags, ctx);
-+ if (strcmp(mnttype, "tmpfs") == 0) {
-+ rst_restore_tmpfs(&pos, ctx);
-+ }
-+ }
-+ if (mntdev)
-+ rst_put_name(mntdev, ctx);
-+ if (mntpnt)
-+ rst_put_name(mntpnt, ctx);
-+ if (mnttype)
-+ rst_put_name(mnttype, ctx);
-+ if (mntbind)
-+ rst_put_name(mntbind, ctx);
-+ if (err)
-+ return err;
-+ }
-+ return 0;
-+}
-+
-+int restore_one_namespace(loff_t pos, loff_t endpos, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_vfsmount_image mi;
-+
-+ while (pos < endpos) {
-+ err = rst_get_object(CPT_OBJ_VFSMOUNT, pos, &mi, ctx);
-+ if (err)
-+ return err;
-+ err = restore_one_vfsmount(&mi, pos, ctx);
-+ if (err)
-+ return err;
-+ pos += mi.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+int rst_root_namespace(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_NAMESPACE];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_object_hdr sbuf;
-+ int done = 0;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NAMESPACE || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ err = rst_get_object(CPT_OBJ_NAMESPACE, sec, &sbuf, ctx);
-+ if (err)
-+ return err;
-+ if (done) {
-+ eprintk_ctx("multiple namespaces are not supported\n");
-+ break;
-+ }
-+ done++;
-+ err = restore_one_namespace(sec+sbuf.cpt_hdrlen, sec+sbuf.cpt_next, ctx);
-+ if (err)
-+ return err;
-+ sec += sbuf.cpt_next;
-+ }
-+
-+ return 0;
-+}
-+
-+int rst_stray_files(struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ loff_t sec = ctx->sections[CPT_SECT_FILES];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_FILES || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ struct cpt_object_hdr sbuf;
-+ cpt_object_t *obj;
-+
-+ err = _rst_get_object(CPT_OBJ_FILE, sec, &sbuf, sizeof(sbuf), ctx);
-+ if (err)
-+ break;
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, sec, ctx);
-+ if (!obj) {
-+ struct file *file;
-+
-+ dprintk_ctx("stray file %Ld\n", sec);
-+
-+ file = rst_sysv_shm(sec, ctx);
-+
-+ if (IS_ERR(file)) {
-+ eprintk_ctx("rst_stray_files: %ld\n", PTR_ERR(file));
-+ return PTR_ERR(file);
-+ } else {
-+ fput(file);
-+ }
-+ }
-+ sec += sbuf.cpt_next;
-+ }
-+
-+ return err;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_i386.S linux-2.6.16-026test009/kernel/cpt/rst_i386.S
---- linux-2.6.16.orig/kernel/cpt/rst_i386.S 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_i386.S 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,40 @@
-+#define ASSEMBLY 1
-+
-+#include <linux/config.h>
-+#include <linux/linkage.h>
-+#include <asm/thread_info.h>
-+#include <asm/errno.h>
-+#include <asm/segment.h>
-+#include <asm/page.h>
-+#include <asm/smp.h>
-+#include <asm/page.h>
-+
-+ .section .text
-+ .align 4
-+ .global ret_last_siginfo
-+ret_last_siginfo:
-+ call rlsi
-+ movl %eax,%esp
-+ ret
-+
-+ .align 8
-+ .global ret_child_tid
-+ret_child_tid:
-+ push %esp
-+ call rct
-+ movl %eax,%esp
-+ ret
-+
-+ .align 4
-+ .global ret_from_rst
-+ret_from_rst:
-+ pushl %eax
-+ jmp ret_from_fork+6
-+
-+ .align 4
-+ .global pre_ret_from_fork
-+pre_ret_from_fork:
-+ pushl %eax
-+ call schedule_tail
-+ popl %eax
-+ ret
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_mm.c linux-2.6.16-026test009/kernel/cpt/rst_mm.c
---- linux-2.6.16.orig/kernel/cpt/rst_mm.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_mm.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,985 @@
-+/*
-+ *
-+ * kernel/cpt/rst_mm.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/virtinfo.h>
-+#include <linux/hugetlb.h>
-+#include <linux/errno.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+#include <linux/mman.h>
-+#include <linux/vmalloc.h>
-+#include <linux/rmap.h>
-+#include <linux/hash.h>
-+#include <asm/pgalloc.h>
-+#include <asm/tlb.h>
-+#include <asm/tlbflush.h>
-+#include <asm/pgtable.h>
-+#include <asm/mmu.h>
-+#include <asm/ldt.h>
-+#include <asm/desc.h>
-+#include <asm/mmu_context.h>
-+#include <linux/swapops.h>
-+#include <linux/cpt_image.h>
-+
-+#ifdef CONFIG_VE
-+#include <ub/beancounter.h>
-+#include <ub/ub_vmpages.h>
-+#endif
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_files.h"
-+#include "cpt_ubc.h"
-+#include "cpt_mm.h"
-+#include "cpt_kernel.h"
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+#include "cpt_pagein.h"
-+#endif
-+
-+#include "cpt_syscalls.h"
-+
-+#define __PAGE_NX (1ULL<<63)
-+
-+static unsigned long make_prot(struct cpt_vma_image *vmai)
-+{
-+ unsigned long prot = 0;
-+
-+ if (vmai->cpt_flags&VM_READ)
-+ prot |= PROT_READ;
-+ if (vmai->cpt_flags&VM_WRITE)
-+ prot |= PROT_WRITE;
-+ if (vmai->cpt_flags&VM_EXEC)
-+ prot |= PROT_EXEC;
-+ if (vmai->cpt_flags&VM_GROWSDOWN)
-+ prot |= PROT_GROWSDOWN;
-+ if (vmai->cpt_flags&VM_GROWSUP)
-+ prot |= PROT_GROWSUP;
-+ return prot;
-+}
-+
-+static unsigned long make_flags(struct cpt_vma_image *vmai)
-+{
-+ unsigned long flags = MAP_FIXED;
-+
-+ if (vmai->cpt_flags&(VM_SHARED|VM_MAYSHARE))
-+ flags |= MAP_SHARED;
-+ else
-+ flags |= MAP_PRIVATE;
-+
-+ if (vmai->cpt_file == CPT_NULL)
-+ flags |= MAP_ANONYMOUS;
-+ if (vmai->cpt_flags&VM_GROWSDOWN)
-+ flags |= MAP_GROWSDOWN;
-+ if (vmai->cpt_flags&VM_DENYWRITE)
-+ flags |= MAP_DENYWRITE;
-+ if (vmai->cpt_flags&VM_EXECUTABLE)
-+ flags |= MAP_EXECUTABLE;
-+ if (!(vmai->cpt_flags&VM_ACCOUNT))
-+ flags |= MAP_NORESERVE;
-+ return flags;
-+}
-+
-+
-+#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
-+static int __alloc_ldt(mm_context_t *pc, int mincount)
-+{
-+ int oldsize, newsize, i;
-+
-+ if (mincount <= pc->size)
-+ return 0;
-+ /*
-+ * LDT got larger - reallocate if necessary.
-+ */
-+ oldsize = pc->size;
-+ mincount = (mincount+511)&(~511);
-+ newsize = mincount*LDT_ENTRY_SIZE;
-+ for (i = 0; i < newsize; i += PAGE_SIZE) {
-+ int nr = i/PAGE_SIZE;
-+ BUG_ON(i >= 64*1024);
-+ if (!pc->ldt_pages[nr]) {
-+ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC);
-+ if (!pc->ldt_pages[nr])
-+ return -ENOMEM;
-+ clear_highpage(pc->ldt_pages[nr]);
-+ }
-+ }
-+ pc->size = mincount;
-+ return 0;
-+}
-+
-+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
-+{
-+ struct mm_struct *mm = current->mm;
-+ int i;
-+ int err;
-+ int size;
-+
-+ err = __alloc_ldt(&mm->context, li->cpt_size/LDT_ENTRY_SIZE);
-+ if (err)
-+ return err;
-+
-+ size = mm->context.size*LDT_ENTRY_SIZE;
-+
-+ for (i = 0; i < size; i += PAGE_SIZE) {
-+ int nr = i / PAGE_SIZE, bytes;
-+ char *kaddr = kmap(mm->context.ldt_pages[nr]);
-+
-+ bytes = size - i;
-+ if (bytes > PAGE_SIZE)
-+ bytes = PAGE_SIZE;
-+ err = ctx->pread(kaddr, bytes, ctx, pos + li->cpt_hdrlen + i);
-+ kunmap(mm->context.ldt_pages[nr]);
-+ if (err)
-+ return err;
-+ }
-+
-+ load_LDT(&mm->context);
-+ return 0;
-+}
-+
-+#else
-+
-+static int do_rst_ldt(struct cpt_obj_bits *li, loff_t pos, struct cpt_context *ctx)
-+{
-+ struct mm_struct *mm = current->mm;
-+ int oldsize = mm->context.size;
-+ void *oldldt;
-+ void *newldt;
-+ int err;
-+
-+ if (li->cpt_size > PAGE_SIZE)
-+ newldt = vmalloc(li->cpt_size);
-+ else
-+ newldt = kmalloc(li->cpt_size, GFP_KERNEL);
-+
-+ if (!newldt)
-+ return -ENOMEM;
-+
-+ err = ctx->pread(newldt, li->cpt_size, ctx, pos + li->cpt_hdrlen);
-+ if (err)
-+ return err;
-+
-+ oldldt = mm->context.ldt;
-+ mm->context.ldt = newldt;
-+ mm->context.size = li->cpt_size/LDT_ENTRY_SIZE;
-+
-+ load_LDT(&mm->context);
-+
-+ if (oldsize) {
-+ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
-+ vfree(oldldt);
-+ else
-+ kfree(oldldt);
-+ }
-+ return 0;
-+}
-+#endif
-+
-+static int
-+restore_aio_ring(struct kioctx *aio_ctx, struct cpt_aio_ctx_image *aimg)
-+{
-+ struct aio_ring_info *info = &aio_ctx->ring_info;
-+ unsigned nr_events = aio_ctx->max_reqs;
-+ unsigned long size;
-+ int nr_pages;
-+
-+ /* We recalculate parameters of the ring exactly like
-+ * fs/aio.c does and then compare calculated values
-+ * with ones, stored in dump. They must be the same. */
-+
-+ nr_events += 2;
-+
-+ size = sizeof(struct aio_ring);
-+ size += sizeof(struct io_event) * nr_events;
-+ nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
-+
-+ if (nr_pages != aimg->cpt_ring_pages)
-+ return -EINVAL;
-+
-+ info->nr_pages = nr_pages;
-+
-+ nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
-+
-+ if (nr_events != aimg->cpt_nr)
-+ return -EINVAL;
-+
-+ info->nr = 0;
-+ info->ring_pages = info->internal_pages;
-+ if (nr_pages > AIO_RING_PAGES) {
-+ info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
-+ if (!info->ring_pages)
-+ return -ENOMEM;
-+ memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
-+ }
-+
-+ info->mmap_size = nr_pages * PAGE_SIZE;
-+
-+ /* This piece of shit is not entirely my fault. Kernel aio.c makes
-+ * something odd mmap()ping some pages and then pinning them.
-+ * I guess it is just some mud remained of failed attempt to show ring
-+ * to user space. The result is odd. :-) Immediately after
-+ * creation of AIO context, kernel shares those pages with user
-+ * and user can read and even write there. But after the first
-+ * fork, pages are marked COW with evident consequences.
-+ * I remember, I did the same mistake in the first version
-+ * of mmapped packet socket, luckily that crap never reached
-+ * mainstream.
-+ *
-+ * So, what are we going to do? I can simulate this odd behaviour
-+ * exactly, but I am not insane yet. For now just take the pages
-+ * from user space. Alternatively, we could keep kernel copy
-+ * in AIO context image, which would be more correct.
-+ *
-+ * What is wrong now? If the pages are COWed, ring is transferred
-+ * incorrectly.
-+ */
-+ down_read(&current->mm->mmap_sem);
-+ info->mmap_base = aimg->cpt_mmap_base;
-+ info->nr_pages = get_user_pages(current, current->mm,
-+ info->mmap_base, nr_pages,
-+ 1, 0, info->ring_pages, NULL);
-+ up_read(&current->mm->mmap_sem);
-+
-+ if (unlikely(info->nr_pages != nr_pages)) {
-+ int i;
-+
-+ for (i=0; i<info->nr_pages; i++)
-+ put_page(info->ring_pages[i]);
-+ if (info->ring_pages && info->ring_pages != info->internal_pages)
-+ kfree(info->ring_pages);
-+ return -EFAULT;
-+ }
-+
-+ aio_ctx->user_id = info->mmap_base;
-+
-+ info->nr = nr_events;
-+ info->tail = aimg->cpt_tail;
-+
-+ return 0;
-+}
-+
-+static int do_rst_aio(struct cpt_aio_ctx_image *aimg, loff_t pos, cpt_context_t *ctx)
-+{
-+ int err;
-+ struct kioctx *aio_ctx;
-+ extern spinlock_t aio_nr_lock;
-+
-+ aio_ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
-+ if (!aio_ctx)
-+ return -ENOMEM;
-+
-+ memset(aio_ctx, 0, sizeof(*aio_ctx));
-+ aio_ctx->max_reqs = aimg->cpt_max_reqs;
-+
-+ if ((err = restore_aio_ring(aio_ctx, aimg)) < 0) {
-+ kmem_cache_free(kioctx_cachep, aio_ctx);
-+ eprintk_ctx("AIO %Ld restore_aio_ring: %d\n", pos, err);
-+ return err;
-+ }
-+
-+ aio_ctx->mm = current->mm;
-+ atomic_inc(&aio_ctx->mm->mm_count);
-+ atomic_set(&aio_ctx->users, 1);
-+ spin_lock_init(&aio_ctx->ctx_lock);
-+ spin_lock_init(&aio_ctx->ring_info.ring_lock);
-+ init_waitqueue_head(&aio_ctx->wait);
-+ INIT_LIST_HEAD(&aio_ctx->active_reqs);
-+ INIT_LIST_HEAD(&aio_ctx->run_list);
-+ INIT_WORK(&aio_ctx->wq, aio_kick_handler, ctx);
-+
-+ spin_lock(&aio_nr_lock);
-+ aio_nr += aio_ctx->max_reqs;
-+ spin_unlock(&aio_nr_lock);
-+
-+ write_lock(&aio_ctx->mm->ioctx_list_lock);
-+ aio_ctx->next = aio_ctx->mm->ioctx_list;
-+ aio_ctx->mm->ioctx_list = aio_ctx;
-+ write_unlock(&aio_ctx->mm->ioctx_list_lock);
-+
-+ return 0;
-+}
-+
-+struct anonvma_map
-+{
-+ struct hlist_node list;
-+ struct anon_vma *avma;
-+ __u64 id;
-+};
-+
-+static int verify_create_anonvma(struct mm_struct *mm,
-+ struct cpt_vma_image *vmai,
-+ cpt_context_t *ctx)
-+{
-+ struct anon_vma *avma = NULL;
-+ struct anon_vma *new_avma;
-+ struct vm_area_struct *vma;
-+ int h;
-+
-+ if (!ctx->anonvmas) {
-+ if (CPT_ANONVMA_HSIZE*sizeof(struct hlist_head) > PAGE_SIZE)
-+ return -EINVAL;
-+ if ((ctx->anonvmas = (void*)__get_free_page(GFP_KERNEL)) == NULL)
-+ return -ENOMEM;
-+ for (h = 0; h < CPT_ANONVMA_HSIZE; h++)
-+ INIT_HLIST_HEAD(&ctx->anonvmas[h]);
-+ } else {
-+ struct anonvma_map *map;
-+ struct hlist_node *elem;
-+
-+ h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
-+ hlist_for_each_entry(map, elem, &ctx->anonvmas[h], list) {
-+ if (map->id == vmai->cpt_anonvmaid) {
-+ avma = map->avma;
-+ break;
-+ }
-+ }
-+ }
-+
-+ down_read(&mm->mmap_sem);
-+ if ((vma = find_vma(mm, vmai->cpt_start)) == NULL) {
-+ up_read(&mm->mmap_sem);
-+ return -ESRCH;
-+ }
-+ if (vma->vm_start != vmai->cpt_start) {
-+ up_read(&mm->mmap_sem);
-+ eprintk_ctx("vma start mismatch\n");
-+ return -EINVAL;
-+ }
-+ if (vma->vm_pgoff != vmai->cpt_pgoff) {
-+ dprintk_ctx("vma pgoff mismatch, fixing\n");
-+ if (vma->vm_file || (vma->vm_flags&(VM_SHARED|VM_MAYSHARE))) {
-+ eprintk_ctx("cannot fixup vma pgoff\n");
-+ up_read(&mm->mmap_sem);
-+ return -EINVAL;
-+ }
-+ vma->vm_pgoff = vmai->cpt_pgoff;
-+ }
-+
-+ if (!vma->anon_vma) {
-+ if (avma) {
-+ vma->anon_vma = avma;
-+ anon_vma_link(vma);
-+ } else {
-+ int err;
-+
-+ err = anon_vma_prepare(vma);
-+
-+ if (err) {
-+ up_read(&mm->mmap_sem);
-+ return err;
-+ }
-+ }
-+ } else {
-+ /* Note, we _can_ arrive to the situation, when two
-+ * different anonvmaid's point to one anon_vma, this happens
-+ * f.e. when mmap() merged new area to previous one and
-+ * they will share one anon_vma even if they did not on
-+ * original host.
-+ *
-+ * IT IS OK. To all that I understand, we may merge all
-+ * the anon_vma's and rmap can scan all the huge list of vmas
-+ * searching for page. It is just "suboptimal".
-+ *
-+ * Real disaster would happen, if vma already got an anon_vma
-+ * with different id. It is very rare case, kernel does the
-+ * best efforts to merge anon_vmas when some attributes are
-+ * different. In this case we will fall to copying memory.
-+ */
-+ if (avma && vma->anon_vma != avma) {
-+ up_read(&mm->mmap_sem);
-+ eprintk_ctx("anon_vma mismatch\n");
-+ return -ESRCH;
-+ }
-+ }
-+
-+ new_avma = vma->anon_vma;
-+ up_read(&mm->mmap_sem);
-+
-+ if (!avma) {
-+ struct anonvma_map *map;
-+
-+ if (!new_avma)
-+ return -EINVAL;
-+
-+ if ((map = kmalloc(sizeof(*map), GFP_KERNEL)) == NULL)
-+ return -ENOMEM;
-+
-+ map->id = vmai->cpt_anonvmaid;
-+ map->avma = new_avma;
-+ h = hash_long((unsigned long)vmai->cpt_anonvmaid, CPT_ANONVMA_HBITS);
-+ hlist_add_head(&map->list, &ctx->anonvmas[h]);
-+ }
-+ return 0;
-+}
-+
-+static int copy_mm_pages(struct mm_struct *src, unsigned long start,
-+ unsigned long end)
-+{
-+ int err;
-+
-+ for (; start < end; start += PAGE_SIZE) {
-+ struct page *page;
-+ struct page *spage;
-+ void *maddr, *srcaddr;
-+
-+ err = get_user_pages(current, current->mm,
-+ start, 1, 1, 1, &page, NULL);
-+ if (err == 0)
-+ err = -EFAULT;
-+ if (err < 0)
-+ return err;
-+
-+ err = get_user_pages(current, src,
-+ start, 1, 0, 1, &spage, NULL);
-+
-+ if (err == 0)
-+ err = -EFAULT;
-+ if (err < 0) {
-+ page_cache_release(page);
-+ return err;
-+ }
-+
-+ srcaddr = kmap(spage);
-+ maddr = kmap(page);
-+ memcpy(maddr, srcaddr, PAGE_SIZE);
-+ set_page_dirty_lock(page);
-+ kunmap(page);
-+ kunmap(spage);
-+ page_cache_release(page);
-+ page_cache_release(spage);
-+ }
-+ return 0;
-+}
-+
-+static int do_rst_vma(struct cpt_vma_image *vmai, loff_t vmapos, loff_t mmpos, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ unsigned long addr;
-+ struct mm_struct *mm = current->mm;
-+ struct vm_area_struct *vma;
-+ struct file *file = NULL;
-+ unsigned long prot;
-+ int checked = 0;
-+
-+ prot = make_prot(vmai);
-+
-+ if (vmai->cpt_file != CPT_NULL) {
-+ if (vmai->cpt_type == CPT_VMA_TYPE_0) {
-+ file = rst_file(vmai->cpt_file, -1, ctx);
-+ if (IS_ERR(file)) {
-+ eprintk_ctx("do_rst_vma: rst_file: %Ld\n", vmai->cpt_file);
-+ return PTR_ERR(file);
-+ }
-+ } else if (vmai->cpt_type == CPT_VMA_TYPE_SHM) {
-+ file = rst_sysv_shm(vmai->cpt_file, ctx);
-+ if (IS_ERR(file))
-+ return PTR_ERR(file);
-+ }
-+ }
-+
-+ down_write(&mm->mmap_sem);
-+ addr = do_mmap_pgoff(file, vmai->cpt_start,
-+ vmai->cpt_end-vmai->cpt_start,
-+ prot, make_flags(vmai),
-+ vmai->cpt_pgoff);
-+
-+ if (addr != vmai->cpt_start) {
-+ up_write(&mm->mmap_sem);
-+
-+ err = -EINVAL;
-+ if (IS_ERR((void*)addr))
-+ err = addr;
-+ goto out;
-+ }
-+
-+ vma = find_vma(mm, vmai->cpt_start);
-+ if (vma == NULL) {
-+ up_write(&mm->mmap_sem);
-+ eprintk_ctx("cannot find mmapped vma\n");
-+ err = -ESRCH;
-+ goto out;
-+ }
-+
-+ /* do_mmap_pgoff() can merge new area to previous one (not to the next,
-+ * we mmap in order, the rest of mm is still unmapped). This can happen
-+ * f.e. if flags are to be adjusted later, or if we had different
-+ * anon_vma on two adjacent regions. Split it by brute force. */
-+ if (vma->vm_start != vmai->cpt_start) {
-+ dprintk_ctx("vma %Ld merged, split\n", vmapos);
-+ err = split_vma(mm, vma, (unsigned long)vmai->cpt_start, 0);
-+ if (err) {
-+ up_write(&mm->mmap_sem);
-+ eprintk_ctx("cannot split vma\n");
-+ goto out;
-+ }
-+ }
-+ up_write(&mm->mmap_sem);
-+
-+ if (vmai->cpt_anonvma && vmai->cpt_anonvmaid) {
-+ err = verify_create_anonvma(mm, vmai, ctx);
-+ if (err) {
-+ eprintk_ctx("cannot verify_create_anonvma\n");
-+ goto out;
-+ }
-+ }
-+
-+ if (vmai->cpt_next > vmai->cpt_hdrlen) {
-+ loff_t offset = vmapos + vmai->cpt_hdrlen;
-+
-+ do {
-+ union {
-+ struct cpt_page_block pb;
-+ struct cpt_remappage_block rpb;
-+ struct cpt_copypage_block cpb;
-+ struct cpt_lazypage_block lpb;
-+ } u;
-+ loff_t pos;
-+
-+ err = rst_get_object(-1, offset, &u, ctx);
-+ if (err) {
-+ eprintk_ctx("vma fix object: %d\n", err);
-+ goto out;
-+ }
-+ if (u.rpb.cpt_object == CPT_OBJ_REMAPPAGES) {
-+ err = sc_remap_file_pages(u.rpb.cpt_start,
-+ u.rpb.cpt_end-u.rpb.cpt_start,
-+ 0, u.rpb.cpt_pgoff, 0);
-+ if (err < 0) {
-+ eprintk_ctx("remap_file_pages: %d (%08x,%u,%u)\n", err,
-+ (__u32)u.rpb.cpt_start, (__u32)(u.rpb.cpt_end-u.rpb.cpt_start),
-+ (__u32)u.rpb.cpt_pgoff);
-+ goto out;
-+ }
-+ offset += u.rpb.cpt_next;
-+ continue;
-+ } else if (u.cpb.cpt_object == CPT_OBJ_LAZYPAGES) {
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ unsigned long addr = u.lpb.cpt_start;
-+
-+ down_read(&mm->mmap_sem);
-+ if ((vma = find_vma(mm, u.lpb.cpt_start)) == NULL) {
-+ eprintk_ctx("lost vm_area_struct\n");
-+ err = -ESRCH;
-+ goto out;
-+ }
-+ err = anon_vma_prepare(vma);
-+ if (err) {
-+ up_read(&mm->mmap_sem);
-+ goto out;
-+ }
-+ while (addr < u.lpb.cpt_end) {
-+ err = rst_pagein(vma, u.lpb.cpt_index + (addr-u.lpb.cpt_start)/PAGE_SIZE,
-+ addr, ctx);
-+ if (err)
-+ break;
-+ addr += PAGE_SIZE;
-+ }
-+ up_read(&mm->mmap_sem);
-+#else
-+ err = -EINVAL;
-+#endif
-+ if (err)
-+ goto out;
-+ offset += u.cpb.cpt_next;
-+ continue;
-+ } else if (u.cpb.cpt_object == CPT_OBJ_COPYPAGES) {
-+ struct vm_area_struct *vma, *vma1;
-+ struct mm_struct *src;
-+ struct anon_vma *src_anon;
-+ cpt_object_t *mobj;
-+
-+ if (!vmai->cpt_anonvmaid) {
-+ err = -EINVAL;
-+ eprintk_ctx("CPT_OBJ_COPYPAGES in !anonvma\n");
-+ goto out;
-+ }
-+
-+ mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, u.cpb.cpt_source, ctx);
-+ if (!mobj) {
-+ eprintk_ctx("lost mm_struct to clone pages from\n");
-+ err = -ESRCH;
-+ goto out;
-+ }
-+ src = mobj->o_obj;
-+
-+ down_read(&src->mmap_sem);
-+ src_anon = NULL;
-+ vma1 = find_vma(src, u.cpb.cpt_start);
-+ if (vma1)
-+ src_anon = vma1->anon_vma;
-+ up_read(&src->mmap_sem);
-+
-+ if (!vma1) {
-+ eprintk_ctx("lost src vm_area_struct\n");
-+ err = -ESRCH;
-+ goto out;
-+ }
-+
-+ down_read(&mm->mmap_sem);
-+ if ((vma = find_vma(mm, u.cpb.cpt_start)) == NULL) {
-+ up_read(&mm->mmap_sem);
-+ eprintk_ctx("lost vm_area_struct\n");
-+ err = -ESRCH;
-+ goto out;
-+ }
-+
-+ if (!src_anon ||
-+ !vma->anon_vma ||
-+ vma->anon_vma != src_anon ||
-+ vma->vm_start - vma1->vm_start !=
-+ (vma->vm_pgoff - vma1->vm_pgoff) << PAGE_SHIFT) {
-+ up_read(&mm->mmap_sem);
-+ wprintk_ctx("anon_vma mismatch in vm_area_struct %Ld\n", vmapos);
-+ err = copy_mm_pages(mobj->o_obj,
-+ u.cpb.cpt_start,
-+ u.cpb.cpt_end);
-+ } else {
-+ err = __copy_page_range(vma, vma1,
-+ u.cpb.cpt_start,
-+ u.cpb.cpt_end-u.cpb.cpt_start);
-+ up_read(&mm->mmap_sem);
-+ }
-+ if (err) {
-+ eprintk_ctx("clone_page_range: %d (%08x,%u,%ld)\n", err,
-+ (__u32)u.cpb.cpt_start, (__u32)(u.cpb.cpt_end-u.cpb.cpt_start),
-+ (long)u.cpb.cpt_source);
-+ goto out;
-+ }
-+
-+ offset += u.cpb.cpt_next;
-+ continue;
-+ }
-+ if (u.pb.cpt_object != CPT_OBJ_PAGES) {
-+ eprintk_ctx("unknown vma fix object %d\n", u.pb.cpt_object);
-+ err = -EINVAL;
-+ goto out;
-+ }
-+ pos = offset + sizeof(u.pb);
-+ if (!(vmai->cpt_flags&VM_ACCOUNT) && !(prot&PROT_WRITE)) {
-+ /* I guess this is get_user_pages() messed things,
-+ * this happens f.e. when gdb inserts breakpoints.
-+ */
-+ int i;
-+ for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/PAGE_SIZE; i++) {
-+ struct page *page;
-+ void *maddr;
-+ err = get_user_pages(current, current->mm,
-+ (unsigned long)u.pb.cpt_start + i*PAGE_SIZE,
-+ 1, 1, 1, &page, NULL);
-+ if (err == 0)
-+ err = -EFAULT;
-+ if (err < 0) {
-+ eprintk_ctx("get_user_pages: %d\n", err);
-+ goto out;
-+ }
-+ err = 0;
-+ maddr = kmap(page);
-+ if (u.pb.cpt_content == CPT_CONTENT_VOID) {
-+ memset(maddr, 0, PAGE_SIZE);
-+ } else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
-+ err = ctx->pread(maddr, PAGE_SIZE,
-+ ctx, pos + i*PAGE_SIZE);
-+ if (err) {
-+ kunmap(page);
-+ goto out;
-+ }
-+ } else {
-+ err = -EINVAL;
-+ kunmap(page);
-+ goto out;
-+ }
-+ set_page_dirty_lock(page);
-+ kunmap(page);
-+ page_cache_release(page);
-+ }
-+ } else {
-+ if (!(prot&PROT_WRITE))
-+ sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
-+ if (u.pb.cpt_content == CPT_CONTENT_VOID) {
-+ int i;
-+ for (i=0; i<(u.pb.cpt_end-u.pb.cpt_start)/sizeof(unsigned long); i++) {
-+ err = __put_user(0UL, ((unsigned long __user*)(unsigned long)u.pb.cpt_start) + i);
-+ if (err) {
-+ eprintk_ctx("__put_user 2 %d\n", err);
-+ goto out;
-+ }
-+ }
-+ } else if (u.pb.cpt_content == CPT_CONTENT_DATA) {
-+ loff_t tpos = pos;
-+ err = ctx->file->f_op->read(ctx->file, cpt_ptr_import(u.pb.cpt_start),
-+ u.pb.cpt_end-u.pb.cpt_start,
-+ &tpos);
-+ if (err != u.pb.cpt_end-u.pb.cpt_start) {
-+ if (err >= 0)
-+ err = -EIO;
-+ goto out;
-+ }
-+ } else {
-+ err = -EINVAL;
-+ goto out;
-+ }
-+ if (!(prot&PROT_WRITE))
-+ sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
-+ }
-+ err = 0;
-+ offset += u.pb.cpt_next;
-+ } while (offset < vmapos + vmai->cpt_next);
-+ }
-+
-+check:
-+ do {
-+ struct vm_area_struct *vma;
-+ down_read(&mm->mmap_sem);
-+ vma = find_vma(mm, addr);
-+ if (vma) {
-+ if ((vma->vm_flags^vmai->cpt_flags)&VM_READHINTMASK) {
-+ VM_ClearReadHint(vma);
-+ vma->vm_flags |= vmai->cpt_flags&VM_READHINTMASK;
-+ }
-+ if ((vma->vm_flags^vmai->cpt_flags)&VM_LOCKED) {
-+ dprintk_ctx("fixing up VM_LOCKED %Ld\n", vmapos);
-+ up_read(&mm->mmap_sem);
-+ if (vma->vm_flags&VM_LOCKED)
-+ err = sc_munlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
-+ else
-+ err = sc_mlock(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start);
-+ if (err)
-+ goto out;
-+ goto check;
-+ }
-+ if ((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&~__PAGE_NX)
-+ wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
-+ (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
-+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-+ if (((vma->vm_page_prot.pgprot^vmai->cpt_pgprot)&__PAGE_NX) &&
-+ (ctx->kernel_config_flags&CPT_KERNEL_CONFIG_PAE))
-+ wprintk_ctx("VMA %08lx@%ld pgprot mismatch %08Lx %08Lx\n", addr, (long)vmapos,
-+ (__u64)vma->vm_page_prot.pgprot, (__u64)vmai->cpt_pgprot);
-+#endif
-+ if (vma->vm_flags != vmai->cpt_flags) {
-+ unsigned long x = vma->vm_flags ^ vmai->cpt_flags;
-+ if (x & VM_EXEC) {
-+ /* Crap. On i386 this is OK.
-+ * It is impossible to make via mmap/mprotect
-+ * exec.c clears VM_EXEC on stack. */
-+ vma->vm_flags &= ~VM_EXEC;
-+ } else if ((x & VM_ACCOUNT) && !checked) {
-+ checked = 1;
-+ if (!(prot&PROT_WRITE)) {
-+ up_read(&mm->mmap_sem);
-+ sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot | PROT_WRITE);
-+ sc_mprotect(vmai->cpt_start, vmai->cpt_end-vmai->cpt_start, prot);
-+ goto check;
-+ }
-+ wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
-+ (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
-+ } else {
-+ wprintk_ctx("VMA %08lx@%ld flag mismatch %08x %08x\n", addr, (long)vmapos,
-+ (__u32)vma->vm_flags, (__u32)vmai->cpt_flags);
-+ }
-+ }
-+ } else {
-+ wprintk_ctx("no VMA for %08lx@%ld\n", addr, (long)vmapos);
-+ }
-+ up_read(&mm->mmap_sem);
-+ } while (0);
-+
-+out:
-+ if (file)
-+ fput(file);
-+ return err;
-+}
-+
-+static int do_rst_mm(struct cpt_mm_image *vmi, loff_t pos, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ unsigned int def_flags;
-+ struct mm_struct *mm = current->mm;
-+
-+ down_write(&mm->mmap_sem);
-+ do_munmap(mm, 0, TASK_SIZE);
-+
-+ mm->start_code = vmi->cpt_start_code;
-+ mm->end_code = vmi->cpt_end_code;
-+ mm->start_data = vmi->cpt_start_data;
-+ mm->end_data = vmi->cpt_end_data;
-+ mm->start_brk = vmi->cpt_start_brk;
-+ mm->brk = vmi->cpt_brk;
-+ mm->start_stack = vmi->cpt_start_stack;
-+ mm->arg_start = vmi->cpt_start_arg;
-+ mm->arg_end = vmi->cpt_end_arg;
-+ mm->env_start = vmi->cpt_start_env;
-+ mm->env_end = vmi->cpt_end_env;
-+ mm->def_flags = 0;
-+ def_flags = vmi->cpt_def_flags;
-+
-+ mm->dumpable = (vmi->cpt_dumpable != 0);
-+ mm->vps_dumpable = (vmi->cpt_vps_dumpable != 0);
-+
-+#if 0 /* def CONFIG_HUGETLB_PAGE*/
-+/* NB: ? */
-+ int used_hugetlb;
-+#endif
-+ up_write(&mm->mmap_sem);
-+
-+ if (vmi->cpt_next > vmi->cpt_hdrlen) {
-+ loff_t offset = pos + vmi->cpt_hdrlen;
-+ do {
-+ union {
-+ struct cpt_vma_image vmai;
-+ struct cpt_aio_ctx_image aioi;
-+ struct cpt_obj_bits bits;
-+ } u;
-+ err = rst_get_object(-1, offset, &u, ctx);
-+ if (err)
-+ goto out;
-+ if (u.vmai.cpt_object == CPT_OBJ_VMA) {
-+ err = do_rst_vma(&u.vmai, offset, pos, ctx);
-+ if (err)
-+ goto out;
-+ } else if (u.bits.cpt_object == CPT_OBJ_BITS &&
-+ u.bits.cpt_content == CPT_CONTENT_MM_CONTEXT) {
-+ err = do_rst_ldt(&u.bits, offset, ctx);
-+ if (err)
-+ goto out;
-+ } else if (u.aioi.cpt_object == CPT_OBJ_AIO_CONTEXT) {
-+ err = do_rst_aio(&u.aioi, offset, ctx);
-+ if (err)
-+ goto out;
-+ } else {
-+ eprintk_ctx("unknown object %u in mm image\n", u.vmai.cpt_object);
-+ err = -EINVAL;
-+ goto out;
-+ }
-+ offset += u.vmai.cpt_next;
-+ } while (offset < pos + vmi->cpt_next);
-+ }
-+
-+ down_write(&mm->mmap_sem);
-+ mm->def_flags = def_flags;
-+ up_write(&mm->mmap_sem);
-+
-+
-+out:
-+ return err;
-+}
-+
-+extern void exit_mm(struct task_struct * tsk);
-+
-+int rst_mm_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ cpt_object_t *mobj;
-+ void *tmp = (void*)__get_free_page(GFP_KERNEL);
-+ struct cpt_mm_image *vmi = (struct cpt_mm_image *)tmp;
-+
-+ if (!tmp)
-+ return -ENOMEM;
-+
-+ if (ti->cpt_mm == CPT_NULL) {
-+ if (current->mm)
-+ exit_mm(current);
-+ goto out;
-+ }
-+
-+ mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
-+ if (mobj) {
-+ if (current->mm != mobj->o_obj) BUG();
-+ goto out;
-+ }
-+
-+ if (current->mm == NULL) {
-+ struct mm_struct *mm = mm_alloc();
-+ if (mm == NULL) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+ err = init_new_context(current, mm);
-+ if (err) {
-+ mmdrop(mm);
-+ goto out;
-+ }
-+ current->mm = mm;
-+ }
-+
-+ if ((err = rst_get_object(CPT_OBJ_MM, ti->cpt_mm, vmi, ctx)) != 0)
-+ goto out;
-+ if ((err = do_rst_mm(vmi, ti->cpt_mm, ctx)) != 0) {
-+ eprintk_ctx("do_rst_mm %Ld\n", ti->cpt_mm);
-+ goto out;
-+ }
-+ err = -ENOMEM;
-+ mobj = cpt_object_add(CPT_OBJ_MM, current->mm, ctx);
-+ if (mobj != NULL) {
-+ err = 0;
-+ cpt_obj_setpos(mobj, ti->cpt_mm, ctx);
-+ }
-+
-+out:
-+ if (tmp)
-+ free_page((unsigned long)tmp);
-+ return err;
-+}
-+
-+/* This is part of mm setup, made in parent context. Mostly, it is the place,
-+ * where we graft mm of another process to child.
-+ */
-+
-+int rst_mm_basic(cpt_object_t *obj, struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ task_t *tsk = obj->o_obj;
-+ cpt_object_t *mobj;
-+
-+ /* Task without mm. Just get rid of this. */
-+ if (ti->cpt_mm == CPT_NULL) {
-+ if (tsk->mm) {
-+ mmput(tsk->mm);
-+ tsk->mm = NULL;
-+ }
-+ return 0;
-+ }
-+
-+ mobj = lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx);
-+ if (mobj) {
-+ struct mm_struct *newmm = mobj->o_obj;
-+ /* Good, the MM is already created. */
-+ if (newmm == tsk->mm) {
-+ /* Already done by clone(). */
-+ return 0;
-+ }
-+ mmput(tsk->mm);
-+ atomic_inc(&newmm->mm_users);
-+ tsk->mm = newmm;
-+ tsk->active_mm = newmm;
-+ }
-+ return 0;
-+}
-+
-+/* We use CLONE_VM when mm of child is going to be shared with parent.
-+ * Otherwise mm is copied.
-+ */
-+
-+__u32 rst_mm_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ if (ti->cpt_mm == CPT_NULL ||
-+ lookup_cpt_obj_bypos(CPT_OBJ_MM, ti->cpt_mm, ctx))
-+ return CLONE_VM;
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_net.c linux-2.6.16-026test009/kernel/cpt/rst_net.c
---- linux-2.6.16.orig/kernel/cpt/rst_net.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_net.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,444 @@
-+/*
-+ *
-+ * kernel/cpt/rst_net.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/netdevice.h>
-+#include <linux/inetdevice.h>
-+#include <linux/rtnetlink.h>
-+#include <linux/ve.h>
-+#include <linux/ve_proto.h>
-+#include <net/route.h>
-+#include <net/ip_fib.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_kernel.h"
-+#include "cpt_net.h"
-+
-+#include "cpt_syscalls.h"
-+
-+extern struct in_ifaddr *inet_alloc_ifa(void);
-+extern int inet_insert_ifa(struct in_ifaddr *ifa);
-+
-+int rst_restore_ifaddr(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_NET_IFADDR];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_ifaddr_image di;
-+ struct net_device *dev;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NET_IFADDR || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ int cindex = -1;
-+ int err;
-+ err = rst_get_object(CPT_OBJ_NET_IFADDR, sec, &di, ctx);
-+ if (err)
-+ return err;
-+ if (di.cpt_index == ctx->lo_index_old)
-+ cindex = ctx->lo_index;
-+ else if (di.cpt_index == ctx->venet_index_old)
-+ cindex = ctx->venet_index;
-+ if (cindex <= 0)
-+ eprintk_ctx("unknown ifaddr for %d\n", di.cpt_index);
-+ rtnl_lock();
-+ dev = __dev_get_by_index(cindex);
-+ if (dev && di.cpt_family == AF_INET) {
-+ struct in_device *in_dev;
-+ struct in_ifaddr *ifa;
-+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
-+ in_dev = inetdev_init(dev);
-+ ifa = inet_alloc_ifa();
-+ if (ifa) {
-+ ifa->ifa_local = di.cpt_address[0];
-+ ifa->ifa_address = di.cpt_peer[0];
-+ ifa->ifa_broadcast = di.cpt_broadcast[0];
-+ ifa->ifa_prefixlen = di.cpt_masklen;
-+ ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
-+ ifa->ifa_flags = di.cpt_flags;
-+ ifa->ifa_scope = di.cpt_scope;
-+ memcpy(ifa->ifa_label, di.cpt_label, IFNAMSIZ);
-+ in_dev_hold(in_dev);
-+ ifa->ifa_dev = in_dev;
-+ err = inet_insert_ifa(ifa);
-+ if (err && err != -EEXIST) {
-+ rtnl_unlock();
-+ eprintk_ctx("add ifaddr err %d for %d %s\n", err, di.cpt_index, di.cpt_label);
-+ return err;
-+ }
-+ }
-+ } else {
-+ rtnl_unlock();
-+ eprintk_ctx("unknown ifaddr 2 for %d\n", di.cpt_index);
-+ return -EINVAL;
-+ }
-+ rtnl_unlock();
-+ sec += di.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+static int rewrite_rtmsg(struct nlmsghdr *nlh, struct cpt_context *ctx)
-+{
-+ int min_len = NLMSG_LENGTH(sizeof(struct rtmsg));
-+ struct rtmsg *rtm = NLMSG_DATA(nlh);
-+
-+ if (nlh->nlmsg_len > min_len) {
-+ int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-+ struct rtattr *rta = (void*)nlh + NLMSG_ALIGN(min_len);
-+
-+ while (RTA_OK(rta, attrlen)) {
-+ if (rta->rta_type == RTA_OIF) {
-+ int idx = *(int*)RTA_DATA(rta);
-+ if (idx == ctx->lo_index_old)
-+ idx = ctx->lo_index;
-+ else if (idx == ctx->venet_index_old)
-+ idx = ctx->venet_index;
-+ else {
-+ eprintk_ctx("unknown iface %d\n", idx);
-+ return -ENODEV;
-+ }
-+ *(int*)RTA_DATA(rta) = idx;
-+ }
-+ rta = RTA_NEXT(rta, attrlen);
-+ }
-+ }
-+ return rtm->rtm_protocol == RTPROT_KERNEL;
-+}
-+
-+int rst_restore_route(struct cpt_context *ctx)
-+{
-+ int err;
-+ struct socket *sock;
-+ struct msghdr msg;
-+ struct iovec iov;
-+ struct sockaddr_nl nladdr;
-+ mm_segment_t oldfs;
-+ loff_t sec = ctx->sections[CPT_SECT_NET_ROUTE];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_object_hdr v;
-+ char *pg;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NET_ROUTE || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ if (h.cpt_hdrlen >= h.cpt_next)
-+ return 0;
-+
-+ sec += h.cpt_hdrlen;
-+ err = rst_get_object(CPT_OBJ_NET_ROUTE, sec, &v, ctx);
-+ if (err < 0)
-+ return err;
-+
-+ err = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE, &sock);
-+ if (err)
-+ return err;
-+
-+ pg = (char*)__get_free_page(GFP_KERNEL);
-+ if (pg == NULL) {
-+ err = -ENOMEM;
-+ goto out_sock;
-+ }
-+
-+ memset(&nladdr, 0, sizeof(nladdr));
-+ nladdr.nl_family = AF_NETLINK;
-+
-+ endsec = sec + v.cpt_next;
-+ sec += v.cpt_hdrlen;
-+
-+ while (sec < endsec) {
-+ struct nlmsghdr *n;
-+ struct nlmsghdr nh;
-+ int kernel_flag;
-+
-+ err = ctx->pread(&nh, sizeof(nh), ctx, sec);
-+ if (err)
-+ goto out_sock_pg;
-+ if (nh.nlmsg_len > PAGE_SIZE) {
-+ err = -EINVAL;
-+ goto out_sock_pg;
-+ }
-+ err = ctx->pread(pg, nh.nlmsg_len, ctx, sec);
-+ if (err)
-+ goto out_sock_pg;
-+
-+ n = (struct nlmsghdr*)pg;
-+ n->nlmsg_flags = NLM_F_REQUEST|NLM_F_APPEND|NLM_F_CREATE;
-+
-+ err = rewrite_rtmsg(n, ctx);
-+ if (err < 0)
-+ goto out_sock_pg;
-+ kernel_flag = err;
-+
-+ iov.iov_base=n;
-+ iov.iov_len=nh.nlmsg_len;
-+ msg.msg_name=&nladdr;
-+ msg.msg_namelen=sizeof(nladdr);
-+ msg.msg_iov=&iov;
-+ msg.msg_iovlen=1;
-+ msg.msg_control=NULL;
-+ msg.msg_controllen=0;
-+ msg.msg_flags=MSG_DONTWAIT;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = sock_sendmsg(sock, &msg, nh.nlmsg_len);
-+ set_fs(oldfs);
-+
-+ if (err < 0)
-+ goto out_sock_pg;
-+ err = 0;
-+
-+ iov.iov_base=pg;
-+ iov.iov_len=PAGE_SIZE;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = sock_recvmsg(sock, &msg, PAGE_SIZE, MSG_DONTWAIT);
-+ set_fs(oldfs);
-+ if (err != -EAGAIN) {
-+ if (err == NLMSG_LENGTH(sizeof(struct nlmsgerr)) &&
-+ n->nlmsg_type == NLMSG_ERROR) {
-+ struct nlmsgerr *e = NLMSG_DATA(n);
-+ if (e->error != -EEXIST || !kernel_flag)
-+ eprintk_ctx("NLMERR: %d\n", e->error);
-+ } else {
-+ eprintk_ctx("Res: %d %d\n", err, n->nlmsg_type);
-+ }
-+ }
-+ err = 0;
-+ sec += NLMSG_ALIGN(nh.nlmsg_len);
-+ }
-+
-+out_sock_pg:
-+ free_page((unsigned long)pg);
-+out_sock:
-+ sock_release(sock);
-+ return err;
-+}
-+
-+int rst_resume_network(struct cpt_context *ctx)
-+{
-+ struct ve_struct *env;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+ env->disable_net = 0;
-+ put_ve(env);
-+ return 0;
-+}
-+
-+int rst_restore_netdev(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_NET_DEVICE];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_netdev_image di;
-+ struct net_device *dev;
-+
-+ get_exec_env()->disable_net = 1;
-+
-+ dev = __dev_get_by_name("lo");
-+ if (!dev) {
-+ eprintk_ctx("cannot find loopback netdevice\n");
-+ return -EINVAL;
-+ }
-+ ctx->lo_index = dev->ifindex;
-+ ctx->lo_index_old = -1;
-+ dev = __dev_get_by_name("venet0");
-+ if (!dev) {
-+ eprintk_ctx("cannot find venet0 netdevice\n");
-+ return -EINVAL;
-+ }
-+ ctx->venet_index = dev->ifindex;
-+ ctx->venet_index_old = -1;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NET_DEVICE || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ int err;
-+ err = rst_get_object(CPT_OBJ_NET_DEVICE, sec, &di, ctx);
-+ if (err)
-+ return err;
-+ if (strcmp(di.cpt_name, "lo") == 0) {
-+ ctx->lo_index_old = di.cpt_index;
-+ } else if (strcmp(di.cpt_name, "venet0") == 0) {
-+ ctx->venet_index_old = di.cpt_index;
-+ } else {
-+ eprintk_ctx("unknown interface %s\n", di.cpt_name);
-+ }
-+ dev = __dev_get_by_name(di.cpt_name);
-+ if (dev) {
-+ if (di.cpt_flags^dev->flags) {
-+ rtnl_lock();
-+ err = dev_change_flags(dev, di.cpt_flags);
-+ rtnl_unlock();
-+ if (err)
-+ eprintk_ctx("dev_change_flags err: %d\n", err);
-+ }
-+ } else {
-+ eprintk_ctx("unknown interface 2 %s\n", di.cpt_name);
-+ }
-+ sec += di.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+static int dumpfn(void *arg)
-+{
-+ int i;
-+ int *pfd = arg;
-+ char *argv[] = { "iptables-restore", "-c", NULL };
-+
-+ if (pfd[0] != 0)
-+ sc_dup2(pfd[0], 0);
-+
-+ for (i=1; i<current->files->fdt->max_fds; i++)
-+ sc_close(i);
-+
-+ module_put(THIS_MODULE);
-+
-+ set_fs(KERNEL_DS);
-+ i = sc_execve("/sbin/iptables-restore", argv, NULL);
-+ eprintk("failed to exec /sbin/iptables-restore: %d\n", i);
-+ return -1;
-+}
-+
-+static int rst_restore_iptables(struct cpt_context * ctx)
-+{
-+ int err;
-+ int pfd[2];
-+ struct file *f;
-+ struct cpt_object_hdr v;
-+ int n;
-+ struct cpt_section_hdr h;
-+ loff_t sec = ctx->sections[CPT_SECT_NET_IPTABLES];
-+ loff_t end;
-+ int pid;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_NET_IPTABLES || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ if (h.cpt_hdrlen == h.cpt_next)
-+ return 0;
-+ if (h.cpt_hdrlen > h.cpt_next)
-+ return -EINVAL;
-+ sec += h.cpt_hdrlen;
-+ err = rst_get_object(CPT_OBJ_NAME, sec, &v, ctx);
-+ if (err < 0)
-+ return err;
-+
-+ err = sc_pipe(pfd);
-+ if (err < 0)
-+ return err;
-+ pid = err = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
-+ if (err < 0)
-+ goto out;
-+ f = fget(pfd[1]);
-+ sc_close(pfd[1]);
-+ sc_close(pfd[0]);
-+
-+ ctx->file->f_pos = sec + v.cpt_hdrlen;
-+ end = sec + v.cpt_next;
-+ do {
-+ char *p;
-+ char buf[16];
-+ mm_segment_t oldfs;
-+
-+ n = end - ctx->file->f_pos;
-+ if (n > sizeof(buf))
-+ n = sizeof(buf);
-+
-+ if (ctx->read(buf, n, ctx))
-+ break;
-+ if ((p = memchr(buf, 0, n)) != NULL)
-+ n = p - buf;
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ f->f_op->write(f, buf, n, &f->f_pos);
-+ set_fs(oldfs);
-+ } while (ctx->file->f_pos < end);
-+
-+ fput(f);
-+
-+ clear_tsk_thread_flag(current,TIF_SIGPENDING);
-+
-+ if ((err = sc_waitx(pid, 0)) < 0)
-+ eprintk_ctx("wait4: %d\n", err);
-+
-+ return 0;
-+
-+out:
-+ if (pfd[1] >= 0)
-+ sc_close(pfd[1]);
-+ if (pfd[0] >= 0)
-+ sc_close(pfd[0]);
-+ return err;
-+}
-+
-+int rst_restore_net(struct cpt_context *ctx)
-+{
-+ int err;
-+
-+ err = rst_restore_netdev(ctx);
-+ if (!err)
-+ err = rst_restore_ifaddr(ctx);
-+ if (!err)
-+ err = rst_restore_route(ctx);
-+ if (!err)
-+ err = rst_restore_iptables(ctx);
-+ if (!err)
-+ err = rst_restore_ip_conntrack(ctx);
-+ return err;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_proc.c linux-2.6.16-026test009/kernel/cpt/rst_proc.c
---- linux-2.6.16.orig/kernel/cpt/rst_proc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_proc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,606 @@
-+/*
-+ *
-+ * kernel/cpt/rst_proc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/proc_fs.h>
-+#include <linux/smp_lock.h>
-+#include <asm/uaccess.h>
-+#include <linux/cpt_ioctl.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_dump.h"
-+#include "cpt_files.h"
-+#include "cpt_mm.h"
-+#include "cpt_kernel.h"
-+
-+MODULE_AUTHOR("Alexey Kuznetsov <alexey@sw.ru>");
-+MODULE_LICENSE("GPL");
-+
-+/* List of contexts and lock protecting the list */
-+struct list_head cpt_context_list;
-+spinlock_t cpt_context_lock;
-+
-+static int proc_read(char *buffer, char **start, off_t offset,
-+ int length, int *eof, void *data)
-+{
-+ off_t pos = 0;
-+ off_t begin = 0;
-+ int len = 0;
-+ cpt_context_t *ctx;
-+
-+ len += sprintf(buffer, "Ctx Id VE State\n");
-+
-+ spin_lock(&cpt_context_lock);
-+
-+ list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
-+ len += sprintf(buffer+len,"%p %08x %-8u %d",
-+ ctx,
-+ ctx->contextid,
-+ ctx->ve_id,
-+ ctx->ctx_state
-+ );
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ len += pagein_info_printf(buffer+len, ctx);
-+#endif
-+
-+ buffer[len++] = '\n';
-+
-+ pos = begin+len;
-+ if (pos < offset) {
-+ len = 0;
-+ begin = pos;
-+ }
-+ if (pos > offset+length)
-+ goto done;
-+ }
-+ *eof = 1;
-+
-+done:
-+ spin_unlock(&cpt_context_lock);
-+ *start = buffer + (offset - begin);
-+ len -= (offset - begin);
-+ if(len > length)
-+ len = length;
-+ if(len < 0)
-+ len = 0;
-+ return len;
-+}
-+
-+void rst_context_release(cpt_context_t *ctx)
-+{
-+ list_del(&ctx->ctx_list);
-+ spin_unlock(&cpt_context_lock);
-+
-+ if (ctx->ctx_state > 0)
-+ rst_resume(ctx);
-+ ctx->ctx_state = CPT_CTX_ERROR;
-+
-+ rst_close_dumpfile(ctx);
-+
-+ if (ctx->anonvmas) {
-+ int h;
-+ for (h = 0; h < CPT_ANONVMA_HSIZE; h++) {
-+ while (!hlist_empty(&ctx->anonvmas[h])) {
-+ struct hlist_node *elem = ctx->anonvmas[h].first;
-+ hlist_del(elem);
-+ kfree(elem);
-+ }
-+ }
-+ free_page((unsigned long)ctx->anonvmas);
-+ }
-+ cpt_flush_error(ctx);
-+ if (ctx->errorfile) {
-+ fput(ctx->errorfile);
-+ ctx->errorfile = NULL;
-+ }
-+ if (ctx->error_msg) {
-+ free_page((unsigned long)ctx->error_msg);
-+ ctx->error_msg = NULL;
-+ }
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ if (ctx->pagein_file_out)
-+ fput(ctx->pagein_file_out);
-+ if (ctx->pagein_file_in)
-+ fput(ctx->pagein_file_in);
-+ if (ctx->pgin_task)
-+ put_task_struct(ctx->pgin_task);
-+#endif
-+ if (ctx->filejob_queue)
-+ rst_flush_filejobs(ctx);
-+ if (ctx->objcount)
-+ eprintk_ctx("%d objects leaked\n", ctx->objcount);
-+ kfree(ctx);
-+
-+ spin_lock(&cpt_context_lock);
-+}
-+
-+static void __cpt_context_put(cpt_context_t *ctx)
-+{
-+ if (!--ctx->refcount)
-+ rst_context_release(ctx);
-+}
-+
-+static void cpt_context_put(cpt_context_t *ctx)
-+{
-+ spin_lock(&cpt_context_lock);
-+ __cpt_context_put(ctx);
-+ spin_unlock(&cpt_context_lock);
-+}
-+
-+cpt_context_t * rst_context_open(void)
-+{
-+ cpt_context_t *ctx;
-+
-+ if ((ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)) != NULL) {
-+ rst_context_init(ctx);
-+ spin_lock(&cpt_context_lock);
-+ list_add_tail(&ctx->ctx_list, &cpt_context_list);
-+ spin_unlock(&cpt_context_lock);
-+ ctx->error_msg = (char*)__get_free_page(GFP_KERNEL);
-+ if (ctx->error_msg != NULL)
-+ ctx->error_msg[0] = 0;
-+ }
-+ return ctx;
-+}
-+
-+void rst_report_error(int err, cpt_context_t *ctx)
-+{
-+ if (ctx->statusfile) {
-+ mm_segment_t oldfs;
-+ int status = 7 /* VZ_ENVCREATE_ERROR */;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (ctx->statusfile->f_op && ctx->statusfile->f_op->write)
-+ ctx->statusfile->f_op->write(ctx->statusfile, (char*)&status, sizeof(status), &ctx->statusfile->f_pos);
-+ set_fs(oldfs);
-+ fput(ctx->statusfile);
-+ ctx->statusfile = NULL;
-+ }
-+}
-+
-+
-+cpt_context_t * cpt_context_lookup(unsigned int ctxid)
-+{
-+ cpt_context_t *ctx;
-+
-+ spin_lock(&cpt_context_lock);
-+ list_for_each_entry(ctx, &cpt_context_list, ctx_list) {
-+ if (ctx->contextid == ctxid) {
-+ ctx->refcount++;
-+ spin_unlock(&cpt_context_lock);
-+ return ctx;
-+ }
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ return NULL;
-+}
-+
-+static int rst_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
-+{
-+ int err = 0;
-+ cpt_context_t *ctx;
-+ struct file *dfile = NULL;
-+
-+ unlock_kernel();
-+
-+ if (cmd == CPT_TEST_CAPS) {
-+ err = test_cpu_caps();
-+ goto out_lock;
-+ }
-+
-+ if (cmd == CPT_JOIN_CONTEXT || cmd == CPT_PUT_CONTEXT) {
-+ cpt_context_t *old_ctx;
-+
-+ ctx = NULL;
-+ if (cmd == CPT_JOIN_CONTEXT) {
-+ err = -ENOENT;
-+ ctx = cpt_context_lookup(arg);
-+ if (!ctx)
-+ goto out_lock;
-+ }
-+
-+ spin_lock(&cpt_context_lock);
-+ old_ctx = (cpt_context_t*)file->private_data;
-+ file->private_data = ctx;
-+
-+ if (old_ctx) {
-+ if (cmd == CPT_PUT_CONTEXT && old_ctx->sticky) {
-+ old_ctx->sticky = 0;
-+ old_ctx->refcount--;
-+ }
-+ __cpt_context_put(old_ctx);
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ err = 0;
-+ goto out_lock;
-+ }
-+
-+ spin_lock(&cpt_context_lock);
-+ ctx = (cpt_context_t*)file->private_data;
-+ if (ctx)
-+ ctx->refcount++;
-+ spin_unlock(&cpt_context_lock);
-+
-+ if (!ctx) {
-+ cpt_context_t *old_ctx;
-+
-+ err = -ENOMEM;
-+ ctx = rst_context_open();
-+ if (!ctx)
-+ goto out_lock;
-+
-+ spin_lock(&cpt_context_lock);
-+ old_ctx = (cpt_context_t*)file->private_data;
-+ if (!old_ctx) {
-+ ctx->refcount++;
-+ file->private_data = ctx;
-+ } else {
-+ old_ctx->refcount++;
-+ }
-+ if (old_ctx) {
-+ __cpt_context_put(ctx);
-+ ctx = old_ctx;
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ }
-+
-+ if (cmd == CPT_GET_CONTEXT) {
-+ unsigned int contextid = (unsigned int)arg;
-+
-+ err = -EINVAL;
-+ if (ctx->contextid && ctx->contextid != contextid)
-+ goto out_nosem;
-+ if (!ctx->contextid) {
-+ cpt_context_t *c1 = cpt_context_lookup(contextid);
-+ if (c1) {
-+ cpt_context_put(c1);
-+ err = -EEXIST;
-+ goto out_nosem;
-+ }
-+ ctx->contextid = contextid;
-+ }
-+ spin_lock(&cpt_context_lock);
-+ if (!ctx->sticky) {
-+ ctx->sticky = 1;
-+ ctx->refcount++;
-+ }
-+ spin_unlock(&cpt_context_lock);
-+ err = 0;
-+ goto out_nosem;
-+ }
-+
-+ down(&ctx->main_sem);
-+
-+ err = -EBUSY;
-+ if (ctx->ctx_state < 0)
-+ goto out;
-+
-+ err = 0;
-+ switch (cmd) {
-+ case CPT_SET_DUMPFD:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ if (dfile->f_op == NULL ||
-+ dfile->f_op->read == NULL) {
-+ fput(dfile);
-+ err = -EBADF;
-+ break;
-+ }
-+ }
-+ if (ctx->file)
-+ fput(ctx->file);
-+ ctx->file = dfile;
-+ break;
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ case CPT_SET_PAGEINFDIN:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->pagein_file_in)
-+ fput(ctx->pagein_file_in);
-+ ctx->pagein_file_in = dfile;
-+ break;
-+ case CPT_SET_PAGEINFDOUT:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->pagein_file_out)
-+ fput(ctx->pagein_file_out);
-+ ctx->pagein_file_out = dfile;
-+ break;
-+ case CPT_PAGEIND:
-+ err = rst_pageind(ctx);
-+ break;
-+#endif
-+ case CPT_SET_LOCKFD:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->lockfile)
-+ fput(ctx->lockfile);
-+ ctx->lockfile = dfile;
-+ break;
-+ case CPT_SET_STATUSFD:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->statusfile)
-+ fput(ctx->statusfile);
-+ ctx->statusfile = dfile;
-+ break;
-+ case CPT_SET_ERRORFD:
-+ if (arg >= 0) {
-+ dfile = fget(arg);
-+ if (IS_ERR(dfile)) {
-+ err = PTR_ERR(dfile);
-+ break;
-+ }
-+ }
-+ if (ctx->errorfile)
-+ fput(ctx->errorfile);
-+ ctx->errorfile = dfile;
-+ break;
-+ case CPT_SET_VEID:
-+ if (ctx->ctx_state > 0) {
-+ err = -EBUSY;
-+ break;
-+ }
-+ ctx->ve_id = arg;
-+ break;
-+ case CPT_UNDUMP:
-+ if (ctx->ctx_state > 0) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ ctx->ctx_state = CPT_CTX_UNDUMPING;
-+ err = vps_rst_undump(ctx);
-+ if (err) {
-+ rst_report_error(err, ctx);
-+ if (rst_kill(ctx) == 0)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ } else {
-+ ctx->ctx_state = CPT_CTX_UNDUMPED;
-+ }
-+ break;
-+ case CPT_RESUME:
-+ if (!ctx->ctx_state) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ err = rst_resume(ctx);
-+ if (!err)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ break;
-+ case CPT_KILL:
-+ if (!ctx->ctx_state) {
-+ err = -ENOENT;
-+ break;
-+ }
-+ err = rst_kill(ctx);
-+ if (!err)
-+ ctx->ctx_state = CPT_CTX_IDLE;
-+ break;
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+
-+out:
-+ cpt_flush_error(ctx);
-+ up(&ctx->main_sem);
-+out_nosem:
-+ cpt_context_put(ctx);
-+out_lock:
-+ lock_kernel();
-+ return err;
-+}
-+
-+static int rst_open(struct inode * inode, struct file * file)
-+{
-+ if (!try_module_get(THIS_MODULE))
-+ return -EBUSY;
-+
-+ return 0;
-+}
-+
-+static int rst_release(struct inode * inode, struct file * file)
-+{
-+ cpt_context_t *ctx;
-+
-+ spin_lock(&cpt_context_lock);
-+ ctx = (cpt_context_t*)file->private_data;
-+ file->private_data = NULL;
-+ if (ctx)
-+ __cpt_context_put(ctx);
-+ spin_unlock(&cpt_context_lock);
-+
-+
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+static struct file_operations rst_fops =
-+{
-+ .owner = THIS_MODULE,
-+ .ioctl = rst_ioctl,
-+ .open = rst_open,
-+ .release = rst_release,
-+};
-+
-+
-+static struct proc_dir_entry *proc_ent;
-+extern void *schedule_tail_p;
-+extern void schedule_tail_hook(void);
-+
-+int debug_level = 1;
-+
-+static struct ctl_table_header *ctl_header;
-+
-+static ctl_table debug_table[] = {
-+ {
-+ .ctl_name = 9476,
-+ .procname = "rst",
-+ .data = &debug_level,
-+ .maxlen = sizeof(debug_level),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ { .ctl_name = 0 }
-+};
-+static ctl_table root_table[] = {
-+ {
-+ .ctl_name = CTL_DEBUG,
-+ .procname = "debug",
-+ .mode = 0555,
-+ .child = debug_table,
-+ },
-+ { .ctl_name = 0 }
-+};
-+
-+#ifdef CONFIG_X86_64
-+
-+static void *vzentry_forkret_get(void)
-+{
-+ unsigned char *p;
-+
-+ p = (unsigned char *)ret_from_fork;
-+ return (void *)(*(u32 *)(p + 1) + p + 5);
-+}
-+
-+static void vzentry_forkret_set(void *data)
-+{
-+ unsigned char *p;
-+ long offset;
-+
-+ p = (unsigned char *)ret_from_fork;
-+ offset = (unsigned long)data - (unsigned long)(p + 5);
-+ if ((long)(s32)offset != offset) {
-+ printk("vzentry_forkret_set: too long hook offset\n");
-+ BUG();
-+ }
-+ *(u32 *)(p + 1) = offset;
-+}
-+#endif
-+
-+static int __init init_rst(void)
-+{
-+ int err;
-+
-+ err = -ENOMEM;
-+ ctl_header = register_sysctl_table(root_table, 0);
-+ if (!ctl_header)
-+ goto err_mon;
-+
-+ spin_lock_init(&cpt_context_lock);
-+ INIT_LIST_HEAD(&cpt_context_list);
-+
-+ err = -EINVAL;
-+ proc_ent = create_proc_entry("rst", 0600, NULL);
-+ if (!proc_ent)
-+ goto err_out;
-+
-+ rst_fops.read = proc_ent->proc_fops->read;
-+ rst_fops.write = proc_ent->proc_fops->write;
-+ rst_fops.llseek = proc_ent->proc_fops->llseek;
-+ proc_ent->proc_fops = &rst_fops;
-+
-+ proc_ent->read_proc = proc_read;
-+ proc_ent->data = NULL;
-+ proc_ent->owner = THIS_MODULE;
-+#ifdef CONFIG_X86_64
-+ schedule_tail_p = vzentry_forkret_get();
-+ vzentry_forkret_set(&schedule_tail_hook);
-+#endif
-+ return 0;
-+
-+err_out:
-+ unregister_sysctl_table(ctl_header);
-+err_mon:
-+ return err;
-+}
-+module_init(init_rst);
-+
-+static void __exit exit_rst(void)
-+{
-+#ifdef CONFIG_X86_64
-+ /* This is wrong, of course. But still the best what we can do. */
-+ vzentry_forkret_set(schedule_tail_p);
-+#endif
-+
-+ remove_proc_entry("rst", NULL);
-+ unregister_sysctl_table(ctl_header);
-+
-+ spin_lock(&cpt_context_lock);
-+ while (!list_empty(&cpt_context_list)) {
-+ cpt_context_t *ctx;
-+ ctx = list_entry(cpt_context_list.next, cpt_context_t, ctx_list);
-+
-+ if (!ctx->sticky)
-+ ctx->refcount++;
-+ ctx->sticky = 0;
-+
-+ BUG_ON(ctx->refcount != 1);
-+
-+ __cpt_context_put(ctx);
-+ }
-+ spin_unlock(&cpt_context_lock);
-+}
-+module_exit(exit_rst);
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_process.c linux-2.6.16-026test009/kernel/cpt/rst_process.c
---- linux-2.6.16.orig/kernel/cpt/rst_process.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_process.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,1254 @@
-+/*
-+ *
-+ * kernel/cpt/rst_process.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/virtinfo.h>
-+#include <linux/kmem_cache.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+#include <linux/ptrace.h>
-+#include <linux/tty.h>
-+#include <asm/desc.h>
-+#include <asm/unistd.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_misc.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_files.h"
-+#include "cpt_mm.h"
-+#include "cpt_ubc.h"
-+#include "cpt_process.h"
-+#include "cpt_kernel.h"
-+
-+#ifdef CONFIG_X86_64
-+
-+#define _TIF_RESUME (1<<22)
-+
-+#define SYSCALL_NR(regs) ((regs)->orig_rax)
-+#define SYSCALL_RETVAL(regs) ((regs)->rax)
-+#define SYSCALL_PC(regs) ((regs)->rip)
-+
-+#define ESP(tsk) (tsk)->thread.rsp
-+
-+#define __NR32_restart_syscall 0
-+#define __NR32_rt_sigtimedwait 177
-+#define __NR32_pause 29
-+#define __NR32_futex 240
-+
-+#define syscall_is(tsk,regs,name) ((!((tsk)->thread_info->flags&_TIF_IA32) && \
-+ SYSCALL_NR(regs) == __NR_##name) || \
-+ (((tsk)->thread_info->flags&_TIF_IA32) && \
-+ SYSCALL_NR(regs) == __NR32_##name))
-+#else
-+
-+#define SYSCALL_NR(regs) ((regs)->orig_eax)
-+#define SYSCALL_RETVAL(regs) ((regs)->eax)
-+#define SYSCALL_PC(regs) ((regs)->eip)
-+
-+#define ESP(tsk) (tsk)->thread.esp
-+
-+#define syscall_is(tsk,regs,name) (SYSCALL_NR(regs) == __NR_##name)
-+
-+#undef task_pt_regs
-+#define task_pt_regs(t) ((struct pt_regs *)((t)->thread.esp0) - 1)
-+
-+#endif
-+
-+static void decode_siginfo(siginfo_t *info, struct cpt_siginfo_image *si)
-+{
-+ memset(info, 0, sizeof(*info));
-+ switch(si->cpt_code & __SI_MASK) {
-+ case __SI_TIMER:
-+ info->si_tid = si->cpt_pid;
-+ info->si_overrun = si->cpt_uid;
-+ info->_sifields._timer._sigval.sival_ptr = cpt_ptr_import(si->cpt_sigval);
-+ info->si_sys_private = si->cpt_utime;
-+ break;
-+ case __SI_POLL:
-+ info->si_band = si->cpt_pid;
-+ info->si_fd = si->cpt_uid;
-+ break;
-+ case __SI_FAULT:
-+ info->si_addr = cpt_ptr_import(si->cpt_sigval);
-+#ifdef __ARCH_SI_TRAPNO
-+ info->si_trapno = si->cpt_pid;
-+#endif
-+ break;
-+ case __SI_CHLD:
-+ info->si_pid = si->cpt_pid;
-+ info->si_uid = si->cpt_uid;
-+ info->si_status = si->cpt_sigval;
-+ info->si_stime = si->cpt_stime;
-+ info->si_utime = si->cpt_utime;
-+ break;
-+ case __SI_KILL:
-+ case __SI_RT:
-+ case __SI_MESGQ:
-+ default:
-+ info->si_pid = si->cpt_pid;
-+ info->si_uid = si->cpt_uid;
-+ info->si_ptr = cpt_ptr_import(si->cpt_sigval);
-+ break;
-+ }
-+ info->si_signo = si->cpt_signo;
-+ info->si_errno = si->cpt_errno;
-+ info->si_code = si->cpt_code;
-+}
-+
-+static int restore_sigqueue(task_t *tsk,
-+ struct sigpending *queue, unsigned long start,
-+ unsigned long end)
-+{
-+ while (start < end) {
-+ struct cpt_siginfo_image *si = (struct cpt_siginfo_image *)start;
-+ if (si->cpt_object == CPT_OBJ_SIGINFO) {
-+ struct user_beancounter *ub;
-+ struct sigqueue *q = NULL;
-+ struct user_struct *up;
-+ up = alloc_uid(si->cpt_user);
-+ if (!up)
-+ return -ENOMEM;
-+ q = kmem_cache_alloc(sigqueue_cachep, GFP_ATOMIC);
-+ if (!q) {
-+ free_uid(up);
-+ return -ENOMEM;
-+ }
-+ ub = get_beancounter(get_exec_ub());
-+ if (ub_siginfo_charge(q, ub)) {
-+ put_beancounter(ub);
-+ kmem_cache_free(sigqueue_cachep, q);
-+ free_uid(up);
-+ return -ENOMEM;
-+ }
-+
-+ INIT_LIST_HEAD(&q->list);
-+ /* Preallocated elements (posix timers) are not
-+ * supported yet. It is safe to replace them with
-+ * a private one. */
-+ q->flags = 0;
-+ q->user = up;
-+ atomic_inc(&q->user->sigpending);
-+ q->sig_ub = ub;
-+
-+ decode_siginfo(&q->info, si);
-+ list_add_tail(&q->list, &queue->list);
-+ }
-+ start += si->cpt_next;
-+ }
-+ return 0;
-+}
-+
-+int rst_process_linkage(cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ struct cpt_task_image *ti = obj->o_image;
-+
-+ if (tsk == NULL) {
-+ eprintk_ctx("task %u(%s) is missing\n", ti->cpt_pid, ti->cpt_comm);
-+ return -EINVAL;
-+ }
-+
-+ if (virt_pgid(tsk) != ti->cpt_pgrp) {
-+ int pid;
-+
-+ if ((pid = vpid_to_pid(ti->cpt_pgrp)) < 0) {
-+ eprintk_ctx("illegal PGRP " CPT_FID "\n", CPT_TID(tsk));
-+ return -EINVAL;
-+ }
-+
-+ write_lock_irq(&tasklist_lock);
-+ detach_pid(tsk, PIDTYPE_PGID);
-+ tsk->signal->pgrp = pid;
-+ set_virt_pgid(tsk, ti->cpt_pgrp);
-+ if (thread_group_leader(tsk))
-+ attach_pid(tsk, PIDTYPE_PGID, pid);
-+ write_unlock_irq(&tasklist_lock);
-+ }
-+ if (virt_sid(tsk) != ti->cpt_session) {
-+ int pid;
-+
-+ if ((pid = vpid_to_pid(ti->cpt_session)) < 0) {
-+ eprintk_ctx("illegal SID " CPT_FID "\n", CPT_TID(tsk));
-+ return -EINVAL;
-+ }
-+
-+ write_lock_irq(&tasklist_lock);
-+ detach_pid(tsk, PIDTYPE_SID);
-+ tsk->signal->session = pid;
-+ set_virt_sid(tsk, ti->cpt_session);
-+ if (thread_group_leader(tsk))
-+ attach_pid(tsk, PIDTYPE_SID, pid);
-+ write_unlock_irq(&tasklist_lock);
-+ }
-+ if (ti->cpt_old_pgrp > 0 && tsk->signal->tty_old_pgrp == 0) {
-+ int pid;
-+
-+ if ((pid = vpid_to_pid(ti->cpt_old_pgrp)) < 0) {
-+ eprintk_ctx("illegal OLD_PGRP " CPT_FID "\n", CPT_TID(tsk));
-+ return -EINVAL;
-+ }
-+
-+ tsk->signal->tty_old_pgrp = pid;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int restore_one_signal_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_signal_image *si = cpt_get_buf(ctx);
-+
-+ current->signal->tty = NULL;
-+
-+ err = rst_get_object(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, si, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+
-+ if (virt_pgid(current) != si->cpt_pgrp) {
-+ int err;
-+ int pid = 0;
-+
-+ if (si->cpt_pgrp_type == CPT_PGRP_ORPHAN) {
-+ pid = alloc_pidmap();
-+ if (pid < 0) {
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ if ((err = alloc_vpid(pid, si->cpt_pgrp)) < 0) {
-+ free_pidmap(pid);
-+ pid = 0;
-+ if (err != -EEXIST) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ }
-+ }
-+ if (pid ||
-+ (pid = vpid_to_pid(si->cpt_pgrp)) > 0) {
-+ write_lock_irq(&tasklist_lock);
-+ detach_pid(current, PIDTYPE_PGID);
-+ current->signal->pgrp = pid;
-+ set_virt_pgid(current, si->cpt_pgrp);
-+ if (thread_group_leader(current))
-+ attach_pid(current, PIDTYPE_PGID, pid);
-+ write_unlock_irq(&tasklist_lock);
-+ }
-+ }
-+
-+ current->signal->tty_old_pgrp = 0;
-+ if ((int)si->cpt_old_pgrp > 0) {
-+ if (si->cpt_old_pgrp_type == CPT_PGRP_STRAY) {
-+ current->signal->tty_old_pgrp = alloc_pidmap();
-+ if (current->signal->tty_old_pgrp < 0) {
-+ eprintk_ctx("failed to allocate stray tty_old_pgrp\n");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ free_pidmap(current->signal->tty_old_pgrp);
-+ } else {
-+ current->signal->tty_old_pgrp = vpid_to_pid(si->cpt_old_pgrp);
-+ if (current->signal->tty_old_pgrp < 0) {
-+ dprintk_ctx("forward old tty PGID\n");
-+ current->signal->tty_old_pgrp = 0;
-+ }
-+ }
-+ }
-+
-+ if (virt_sid(current) != si->cpt_session) {
-+ int err;
-+ int pid = 0;
-+
-+ if (si->cpt_session_type == CPT_PGRP_ORPHAN) {
-+ pid = alloc_pidmap();
-+ if (pid < 0) {
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ if ((err = alloc_vpid(pid, si->cpt_session)) < 0) {
-+ free_pidmap(pid);
-+ pid = 0;
-+ if (err != -EEXIST) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ }
-+ }
-+ if (pid ||
-+ (pid = vpid_to_pid(si->cpt_session)) > 0) {
-+ write_lock_irq(&tasklist_lock);
-+ detach_pid(current, PIDTYPE_SID);
-+ set_virt_sid(current, si->cpt_session);
-+ current->signal->session = pid;
-+ if (thread_group_leader(current))
-+ attach_pid(current, PIDTYPE_SID, pid);
-+ write_unlock_irq(&tasklist_lock);
-+ }
-+ }
-+
-+ cpt_sigset_import(&current->signal->shared_pending.signal, si->cpt_sigpending);
-+ current->signal->leader = si->cpt_leader;
-+ if (si->cpt_ctty != CPT_NULL) {
-+ cpt_object_t *obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, si->cpt_ctty, ctx);
-+ if (obj) {
-+ struct tty_struct *tty = obj->o_obj;
-+ if (tty->session == 0 || tty->session == current->signal->session) {
-+ tty->session = current->signal->session;
-+ current->signal->tty = tty;
-+ } else {
-+ wprintk_ctx("tty session mismatch\n");
-+ }
-+ }
-+ }
-+
-+ if (si->cpt_curr_target)
-+ current->signal->curr_target = find_task_by_pid_ve(si->cpt_curr_target);
-+ current->signal->flags = 0;
-+ if (si->cpt_group_exit)
-+ current->signal->flags |= SIGNAL_GROUP_EXIT;
-+ current->signal->group_exit_code = si->cpt_group_exit_code;
-+ if (si->cpt_group_exit_task) {
-+ current->signal->group_exit_task = find_task_by_pid_ve(si->cpt_group_exit_task);
-+ if (current->signal->group_exit_task == NULL) {
-+ eprintk_ctx("oops, group_exit_task=NULL, pid=%u\n", si->cpt_group_exit_task);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ }
-+ current->signal->notify_count = si->cpt_notify_count;
-+ current->signal->group_stop_count = si->cpt_group_stop_count;
-+
-+ if (si->cpt_next > si->cpt_hdrlen) {
-+ char *buf = kmalloc(si->cpt_next - si->cpt_hdrlen, GFP_KERNEL);
-+ if (buf == NULL) {
-+ cpt_release_buf(ctx);
-+ return -ENOMEM;
-+ }
-+ err = ctx->pread(buf, si->cpt_next - si->cpt_hdrlen, ctx,
-+ ti->cpt_signal + si->cpt_hdrlen);
-+ if (err) {
-+ kfree(buf);
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ restore_sigqueue(current,
-+ &current->signal->shared_pending, (unsigned long)buf,
-+ (unsigned long)buf + si->cpt_next - si->cpt_hdrlen);
-+ kfree(buf);
-+ }
-+ cpt_release_buf(ctx);
-+ return 0;
-+}
-+
-+int restore_one_sighand_struct(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_sighand_image si;
-+ int i;
-+ loff_t pos, endpos;
-+
-+ err = rst_get_object(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, &si, ctx);
-+ if (err)
-+ return err;
-+
-+ for (i=0; i<_NSIG; i++) {
-+ current->sighand->action[i].sa.sa_handler = SIG_DFL;
-+ current->sighand->action[i].sa.sa_restorer = 0;
-+ current->sighand->action[i].sa.sa_flags = SA_ONESHOT | SA_NOMASK;
-+ memset(&current->sighand->action[i].sa.sa_mask, 0, sizeof(sigset_t));
-+ }
-+
-+ pos = ti->cpt_sighand + si.cpt_hdrlen;
-+ endpos = ti->cpt_sighand + si.cpt_next;
-+ while (pos < endpos) {
-+ struct cpt_sighandler_image shi;
-+
-+ err = rst_get_object(CPT_OBJ_SIGHANDLER, pos, &shi, ctx);
-+ if (err)
-+ return err;
-+ current->sighand->action[shi.cpt_signo].sa.sa_handler = (void*)(unsigned long)shi.cpt_handler;
-+ current->sighand->action[shi.cpt_signo].sa.sa_restorer = (void*)(unsigned long)shi.cpt_restorer;
-+ current->sighand->action[shi.cpt_signo].sa.sa_flags = shi.cpt_flags;
-+ cpt_sigset_import(&current->sighand->action[shi.cpt_signo].sa.sa_mask, shi.cpt_mask);
-+ pos += shi.cpt_next;
-+ }
-+
-+ return 0;
-+}
-+
-+
-+__u32 rst_signal_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ __u32 flag = 0;
-+
-+ if (lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx))
-+ flag |= CLONE_THREAD;
-+ if (ti->cpt_sighand == CPT_NULL ||
-+ lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx))
-+ flag |= CLONE_SIGHAND;
-+ return flag;
-+}
-+
-+int rst_signal_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+
-+ if (ti->cpt_signal == CPT_NULL || ti->cpt_sighand == CPT_NULL) {
-+ return -EINVAL;
-+ }
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGHAND_STRUCT, ti->cpt_sighand, ctx);
-+ if (obj) {
-+ struct sighand_struct *sig = current->sighand;
-+ if (obj->o_obj != sig) {
-+ return -EINVAL;
-+ }
-+ } else {
-+ obj = cpt_object_add(CPT_OBJ_SIGHAND_STRUCT, current->sighand, ctx);
-+ if (obj == NULL)
-+ return -ENOMEM;
-+ cpt_obj_setpos(obj, ti->cpt_sighand, ctx);
-+ err = restore_one_sighand_struct(ti, ctx);
-+ if (err)
-+ return err;
-+ }
-+
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_SIGNAL_STRUCT, ti->cpt_signal, ctx);
-+ if (obj) {
-+ struct signal_struct *sig = current->signal;
-+ if (obj->o_obj != sig) {
-+ return -EINVAL;
-+ }
-+ if (current->signal) {
-+ set_virt_pgid(current, pid_type_to_vpid(PIDTYPE_PGID, current->signal->pgrp));
-+ set_virt_sid(current, pid_type_to_vpid(PIDTYPE_SID, current->signal->session));
-+ }
-+ } else {
-+ obj = cpt_object_add(CPT_OBJ_SIGNAL_STRUCT, current->signal, ctx);
-+ if (obj == NULL)
-+ return -ENOMEM;
-+ cpt_obj_setpos(obj, ti->cpt_signal, ctx);
-+ err = restore_one_signal_struct(ti, ctx);
-+ if (err)
-+ return err;
-+ }
-+
-+ return 0;
-+}
-+
-+static u32 decode_segment(u32 segid)
-+{
-+ if (segid == CPT_SEG_ZERO)
-+ return 0;
-+
-+ /* TLS descriptors */
-+ if (segid <= CPT_SEG_TLS3)
-+ return ((GDT_ENTRY_TLS_MIN + segid-CPT_SEG_TLS1)<<3) + 3;
-+
-+ /* LDT descriptor, it is just an index to LDT array */
-+ if (segid >= CPT_SEG_LDT)
-+ return ((segid - CPT_SEG_LDT) << 3) | 7;
-+
-+ /* Check for one of standard descriptors */
-+#ifdef CONFIG_X86_64
-+ if (segid == CPT_SEG_USER32_DS)
-+ return __USER32_DS;
-+ if (segid == CPT_SEG_USER32_CS)
-+ return __USER32_CS;
-+ if (segid == CPT_SEG_USER64_DS)
-+ return __USER_DS;
-+ if (segid == CPT_SEG_USER64_CS)
-+ return __USER_CS;
-+#else
-+ if (segid == CPT_SEG_USER32_DS)
-+ return __USER_DS;
-+ if (segid == CPT_SEG_USER32_CS)
-+ return __USER_CS;
-+#endif
-+ wprintk("Invalid segment reg %d\n", segid);
-+ return 0;
-+}
-+
-+unsigned long rct(unsigned long *child_tids)
-+{
-+ dprintk("rct: " CPT_FID "\n", CPT_TID(current));
-+ current->clear_child_tid = (void*)child_tids[0];
-+ current->set_child_tid = (void*)child_tids[1];
-+ module_put(THIS_MODULE);
-+ return (unsigned long)(child_tids+2);
-+}
-+
-+unsigned long rlsi(void)
-+{
-+ int signr;
-+ siginfo_t *info = current->last_siginfo;
-+ struct pt_regs *regs = task_pt_regs(current);
-+ struct k_sigaction *ka;
-+ int ptrace_id;
-+
-+ dprintk("rlsi: " CPT_FID "\n", CPT_TID(current));
-+
-+ spin_lock_irq(&current->sighand->siglock);
-+ current->last_siginfo = NULL;
-+ recalc_sigpending();
-+
-+ ptrace_id = current->pn_state;
-+ clear_pn_state(current);
-+
-+ switch (ptrace_id) {
-+ case PN_STOP_TF:
-+ case PN_STOP_TF_RT:
-+ /* frame_*signal */
-+ dprintk("SIGTRAP %u/%u(%s) %u/%u %u %ld %lu %lu\n",
-+ virt_pid(current), current->pid, current->comm,
-+ info->si_signo, info->si_code,
-+ current->exit_code, SYSCALL_NR(regs),
-+ current->ptrace, current->ptrace_message);
-+ goto out;
-+ case PN_STOP_ENTRY:
-+ case PN_STOP_LEAVE:
-+ /* do_syscall_trace */
-+ spin_unlock_irq(&current->sighand->siglock);
-+ dprintk("ptrace do_syscall_trace: %d %d\n", ptrace_id, current->exit_code);
-+ if (current->exit_code) {
-+ send_sig(current->exit_code, current, 1);
-+ current->exit_code = 0;
-+ }
-+ if (ptrace_id == PN_STOP_ENTRY && SYSCALL_RETVAL(regs) == -ENOSYS) {
-+ SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
-+ SYSCALL_PC(regs) -= 2;
-+ } else if (syscall_is(current, regs, rt_sigtimedwait)) {
-+ if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
-+ SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
-+ SYSCALL_PC(regs) -= 2;
-+ }
-+ }
-+ goto out_nolock;
-+ case PN_STOP_FORK:
-+ /* fork */
-+ SYSCALL_RETVAL(regs) = current->ptrace_message;
-+ dprintk("ptrace fork returns pid %ld\n", SYSCALL_RETVAL(regs));
-+ goto out;
-+ case PN_STOP_VFORK:
-+ /* after vfork */
-+ SYSCALL_RETVAL(regs) = current->ptrace_message;
-+ dprintk("ptrace after vfork returns pid %ld\n", SYSCALL_RETVAL(regs));
-+ goto out;
-+ case PN_STOP_SIGNAL:
-+ /* normal case : dequeue signal */
-+ break;
-+ case PN_STOP_EXIT:
-+ dprintk("ptrace exit caught\n");
-+ current->ptrace &= ~PT_TRACE_EXIT;
-+ spin_unlock_irq(&current->sighand->siglock);
-+ module_put(THIS_MODULE);
-+ complete_and_exit(NULL, current->ptrace_message);
-+ BUG();
-+ case PN_STOP_EXEC:
-+ eprintk("ptrace after exec caught: must not happen\n");
-+ BUG();
-+ default:
-+ eprintk("ptrace with unknown identity %d\n", ptrace_id);
-+ BUG();
-+ }
-+
-+ signr = current->exit_code;
-+ if (signr == 0) {
-+ dprintk("rlsi: canceled signal %d\n", info->si_signo);
-+ goto out;
-+ }
-+ current->exit_code = 0;
-+
-+ if (signr != info->si_signo) {
-+ info->si_signo = signr;
-+ info->si_errno = 0;
-+ info->si_code = SI_USER;
-+ info->si_pid = virt_pid(current->parent);
-+ info->si_uid = current->parent->uid;
-+ }
-+
-+ /* If the (new) signal is now blocked, requeue it. */
-+ if (sigismember(&current->blocked, signr)) {
-+ dprintk("going to requeue signal %d\n", signr);
-+ goto out_resend_sig;
-+ }
-+
-+ ka = &current->sighand->action[signr-1];
-+ if (ka->sa.sa_handler == SIG_IGN) {
-+ dprintk("going to resend signal %d (ignored)\n", signr);
-+ goto out;
-+ }
-+ if (ka->sa.sa_handler != SIG_DFL) {
-+ dprintk("going to resend signal %d (not SIG_DFL)\n", signr);
-+ goto out_resend_sig;
-+ }
-+ if (signr == SIGCONT ||
-+ signr == SIGCHLD ||
-+ signr == SIGWINCH ||
-+ signr == SIGURG ||
-+ current->pid == 1)
-+ goto out;
-+
-+ /* All the rest, which we cannot handle are requeued. */
-+ dprintk("going to resend signal %d (sigh)\n", signr);
-+out_resend_sig:
-+ spin_unlock_irq(&current->sighand->siglock);
-+ send_sig_info(signr, info, current);
-+ module_put(THIS_MODULE);
-+ return (unsigned long)(info+1);
-+
-+out:
-+ spin_unlock_irq(&current->sighand->siglock);
-+out_nolock:
-+ module_put(THIS_MODULE);
-+ return (unsigned long)(info+1);
-+}
-+
-+static void ret_finish_stop(void)
-+{
-+ /* ...
-+ * do_signal() ->
-+ * get_signal_to_deliver() ->
-+ * do_signal_stop() ->
-+ * finish_stop()
-+ *
-+ * Normally after SIGCONT it will dequeue the next signal. If no signal
-+ * is found, do_signal restarts syscall unconditionally.
-+ * Otherwise signal handler is pushed on user stack.
-+ */
-+
-+ dprintk("rfs: " CPT_FID "\n", CPT_TID(current));
-+
-+ clear_stop_state(current);
-+ current->exit_code = 0;
-+
-+ module_put(THIS_MODULE);
-+}
-+
-+static void ret_restart_sys(void)
-+{
-+ struct pt_regs *regs = task_pt_regs(current);
-+
-+ /* This hook is supposed to be executed, when we have
-+ * to complete some interrupted syscall.
-+ */
-+ dprintk("rrs: " CPT_FID "\n", CPT_TID(current));
-+
-+ if (syscall_is(current,regs,pause)) {
-+ if (SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
-+ current->state = TASK_INTERRUPTIBLE;
-+ schedule();
-+ }
-+ } else if (syscall_is(current,regs,rt_sigtimedwait)) {
-+ if (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR) {
-+ SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
-+ SYSCALL_PC(regs) -= 2;
-+ }
-+ } else if (syscall_is(current,regs,futex)) {
-+ if (SYSCALL_RETVAL(regs) == -EINTR) {
-+ SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
-+ SYSCALL_PC(regs) -= 2;
-+ }
-+ }
-+
-+ if (!signal_pending(current)) {
-+ if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
-+ SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
-+ SYSCALL_RETVAL(regs) == -ERESTARTNOHAND) {
-+ SYSCALL_RETVAL(regs) = SYSCALL_NR(regs);
-+ SYSCALL_PC(regs) -= 2;
-+ } else if (SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK) {
-+ SYSCALL_RETVAL(regs) = __NR_restart_syscall;
-+#ifdef CONFIG_X86_64
-+ if (current->thread_info->flags&_TIF_IA32)
-+ SYSCALL_RETVAL(regs) = __NR32_restart_syscall;
-+#endif
-+ SYSCALL_PC(regs) -= 2;
-+ }
-+ }
-+
-+ module_put(THIS_MODULE);
-+}
-+
-+extern void ret_last_siginfo(void);
-+extern void ret_child_tid(void);
-+extern void ret_from_rst(void);
-+extern void pre_ret_from_fork(void);
-+
-+#ifndef CONFIG_X86_64
-+
-+/* tsk->thread.eip points to pre_ret_from_fork
-+ * Stack layout:
-+ * [eip of the last hook]
-+ * [args of the last hook]
-+ * [eip of previous hook]
-+ * [args of previous hook]
-+ * ...
-+ * [eip of the first hook]
-+ * [args of the first hook]
-+ * [ret_from_rst]
-+ */
-+
-+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
-+{
-+ ESP(tsk) -= sizeof(unsigned long);
-+ *(unsigned long*)ESP(tsk) = tsk->thread.eip;
-+ ESP(tsk) -= argsize;
-+ tsk->thread.eip = (unsigned long)hook;
-+ if (!try_module_get(THIS_MODULE)) BUG();
-+ (*hooks)++;
-+ return (void*)ESP(tsk);
-+}
-+
-+static int restore_registers(task_t *tsk, struct pt_regs *regs,
-+ struct cpt_task_image *ti, struct cpt_x86_regs *b)
-+{
-+ if (b->cpt_object != CPT_OBJ_X86_REGS)
-+ return -EINVAL;
-+
-+ tsk->thread.esp = (unsigned long) regs;
-+ tsk->thread.esp0 = (unsigned long) (regs+1);
-+ tsk->thread.eip = (unsigned long) ret_from_rst;
-+
-+ tsk->thread.fs = decode_segment(b->cpt_fs);
-+ tsk->thread.gs = decode_segment(b->cpt_gs);
-+ tsk->thread.debugreg[0] = b->cpt_debugreg[0];
-+ tsk->thread.debugreg[1] = b->cpt_debugreg[1];
-+ tsk->thread.debugreg[2] = b->cpt_debugreg[2];
-+ tsk->thread.debugreg[3] = b->cpt_debugreg[3];
-+ tsk->thread.debugreg[4] = b->cpt_debugreg[4];
-+ tsk->thread.debugreg[5] = b->cpt_debugreg[5];
-+ tsk->thread.debugreg[6] = b->cpt_debugreg[6];
-+ tsk->thread.debugreg[7] = b->cpt_debugreg[7];
-+
-+ memcpy(regs, &b->cpt_ebx, sizeof(struct pt_regs));
-+
-+ regs->xcs = decode_segment(b->cpt_xcs);
-+ regs->xss = decode_segment(b->cpt_xss);
-+ regs->xds = decode_segment(b->cpt_xds);
-+ regs->xes = decode_segment(b->cpt_xes);
-+
-+ return 0;
-+}
-+
-+#else
-+
-+/* Stack layout:
-+ *
-+ * [eip of the last hook]
-+ * [args of the last hook]
-+ * ...
-+ * [eip of the first hook]
-+ * [args of the first hook]
-+ * [ret_from_fork+5]
-+ */
-+
-+static void * add_hook(task_t *tsk, void (*hook)(void), int argsize, int *hooks)
-+{
-+ if (!*hooks) {
-+ extern void ret_from_fork2(void);
-+ ESP(tsk) -= sizeof(unsigned long);
-+ *(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
-+ tsk->thread_info->flags |= _TIF_RESUME;
-+ }
-+ ESP(tsk) -= argsize + sizeof(unsigned long);
-+ *(unsigned long*)ESP(tsk) = (unsigned long)hook;
-+ if (!try_module_get(THIS_MODULE)) BUG();
-+ (*hooks)++;
-+ return (void*)(ESP(tsk) + sizeof(unsigned long));
-+}
-+
-+static void xlate_ptregs_32_to_64(struct pt_regs *d, struct cpt_x86_regs *s)
-+{
-+ memset(d, 0, sizeof(struct pt_regs));
-+ d->rbp = s->cpt_ebp;
-+ d->rbx = s->cpt_ebx;
-+ d->rax = (s32)s->cpt_eax;
-+ d->rcx = s->cpt_ecx;
-+ d->rdx = s->cpt_edx;
-+ d->rsi = s->cpt_esi;
-+ d->rdi = s->cpt_edi;
-+ d->orig_rax = (s32)s->cpt_orig_eax;
-+ d->rip = s->cpt_eip;
-+ d->cs = s->cpt_xcs;
-+ d->eflags = s->cpt_eflags;
-+ d->rsp = s->cpt_esp;
-+ d->ss = s->cpt_xss;
-+}
-+
-+static int restore_registers(task_t *tsk, struct pt_regs *regs,
-+ struct cpt_task_image *ti, struct cpt_obj_bits *hdr)
-+{
-+ if (hdr->cpt_object == CPT_OBJ_X86_64_REGS) {
-+ struct cpt_x86_64_regs *b = (void*)hdr;
-+
-+ tsk->thread.rsp = (unsigned long) regs;
-+ tsk->thread.rsp0 = (unsigned long) (regs+1);
-+
-+ tsk->thread.fs = b->cpt_fsbase;
-+ tsk->thread.gs = b->cpt_gsbase;
-+ tsk->thread.fsindex = decode_segment(b->cpt_fsindex);
-+ tsk->thread.gsindex = decode_segment(b->cpt_gsindex);
-+ tsk->thread.ds = decode_segment(b->cpt_ds);
-+ tsk->thread.es = decode_segment(b->cpt_es);
-+ tsk->thread.debugreg0 = b->cpt_debugreg[0];
-+ tsk->thread.debugreg1 = b->cpt_debugreg[1];
-+ tsk->thread.debugreg2 = b->cpt_debugreg[2];
-+ tsk->thread.debugreg3 = b->cpt_debugreg[3];
-+ tsk->thread.debugreg6 = b->cpt_debugreg[6];
-+ tsk->thread.debugreg7 = b->cpt_debugreg[7];
-+
-+ memcpy(regs, &b->cpt_r15, sizeof(struct pt_regs));
-+
-+ tsk->thread.userrsp = regs->rsp;
-+ regs->cs = decode_segment(b->cpt_cs);
-+ regs->ss = decode_segment(b->cpt_ss);
-+ } else if (hdr->cpt_object == CPT_OBJ_X86_REGS) {
-+ struct cpt_x86_regs *b = (void*)hdr;
-+
-+ tsk->thread.rsp = (unsigned long) regs;
-+ tsk->thread.rsp0 = (unsigned long) (regs+1);
-+
-+ tsk->thread.fs = 0;
-+ tsk->thread.gs = 0;
-+ tsk->thread.fsindex = decode_segment(b->cpt_fs);
-+ tsk->thread.gsindex = decode_segment(b->cpt_gs);
-+ tsk->thread.debugreg0 = b->cpt_debugreg[0];
-+ tsk->thread.debugreg1 = b->cpt_debugreg[1];
-+ tsk->thread.debugreg2 = b->cpt_debugreg[2];
-+ tsk->thread.debugreg3 = b->cpt_debugreg[3];
-+ tsk->thread.debugreg6 = b->cpt_debugreg[6];
-+ tsk->thread.debugreg7 = b->cpt_debugreg[7];
-+
-+ xlate_ptregs_32_to_64(regs, b);
-+
-+ tsk->thread.userrsp = regs->rsp;
-+ regs->cs = decode_segment(b->cpt_xcs);
-+ regs->ss = decode_segment(b->cpt_xss);
-+ tsk->thread.ds = decode_segment(b->cpt_xds);
-+ tsk->thread.es = decode_segment(b->cpt_xes);
-+ } else {
-+ return -EINVAL;
-+ }
-+ return 0;
-+}
-+
-+#endif
-+
-+int rst_restore_process(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ struct cpt_task_image *ti = obj->o_image;
-+ struct pt_regs * regs;
-+ struct cpt_object_hdr *b;
-+ struct cpt_siginfo_image *lsi = NULL;
-+ struct group_info *gids, *ogids;
-+ int hooks = 0;
-+ int i;
-+
-+ if (tsk == NULL) {
-+ eprintk_ctx("oops, task %d/%s is missing\n", ti->cpt_pid, ti->cpt_comm);
-+ return -EFAULT;
-+ }
-+
-+ wait_task_inactive(tsk);
-+ regs = task_pt_regs(tsk);
-+
-+ if (!tsk->exit_state) {
-+ tsk->lock_depth = -1;
-+#ifdef CONFIG_PREEMPT
-+ tsk->thread_info->preempt_count--;
-+#endif
-+ }
-+
-+ if (tsk->static_prio != ti->cpt_static_prio)
-+ set_user_nice(tsk, PRIO_TO_NICE(ti->cpt_static_prio));
-+
-+ cpt_sigset_import(&tsk->blocked, ti->cpt_sigblocked);
-+ cpt_sigset_import(&tsk->real_blocked, ti->cpt_sigrblocked);
-+ cpt_sigset_import(&tsk->saved_sigmask, ti->cpt_sigsuspend_blocked);
-+ cpt_sigset_import(&tsk->pending.signal, ti->cpt_sigpending);
-+
-+ tsk->uid = ti->cpt_uid;
-+ tsk->euid = ti->cpt_euid;
-+ tsk->suid = ti->cpt_suid;
-+ tsk->fsuid = ti->cpt_fsuid;
-+ tsk->gid = ti->cpt_gid;
-+ tsk->egid = ti->cpt_egid;
-+ tsk->sgid = ti->cpt_sgid;
-+ tsk->fsgid = ti->cpt_fsgid;
-+ memcpy(&tsk->cap_effective, &ti->cpt_ecap, sizeof(tsk->cap_effective));
-+ memcpy(&tsk->cap_inheritable, &ti->cpt_icap, sizeof(tsk->cap_inheritable));
-+ memcpy(&tsk->cap_permitted, &ti->cpt_pcap, sizeof(tsk->cap_permitted));
-+ tsk->keep_capabilities = (ti->cpt_keepcap != 0);
-+ tsk->did_exec = (ti->cpt_did_exec != 0);
-+ gids = groups_alloc(ti->cpt_ngids);
-+ ogids = tsk->group_info;
-+ if (gids) {
-+ int i;
-+ for (i=0; i<32; i++)
-+ gids->small_block[i] = ti->cpt_gids[i];
-+ tsk->group_info = gids;
-+ }
-+ if (ogids)
-+ put_group_info(ogids);
-+ tsk->utime = ti->cpt_utime;
-+ tsk->stime = ti->cpt_stime;
-+ if (ctx->image_version == 0) {
-+ tsk->start_time = _ns_to_timespec(ti->cpt_starttime*TICK_NSEC);
-+ } else {
-+ cpt_timespec_import(&tsk->start_time, ti->cpt_starttime);
-+ }
-+ _set_normalized_timespec(&tsk->start_time,
-+ tsk->start_time.tv_sec -
-+ get_exec_env()->init_entry->start_time.tv_sec,
-+ tsk->start_time.tv_nsec -
-+ get_exec_env()->init_entry->start_time.tv_nsec);
-+
-+ tsk->nvcsw = ti->cpt_nvcsw;
-+ tsk->nivcsw = ti->cpt_nivcsw;
-+ tsk->min_flt = ti->cpt_min_flt;
-+ tsk->maj_flt = ti->cpt_maj_flt;
-+
-+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,8)
-+ tsk->cutime = ti->cpt_cutime;
-+ tsk->cstime = ti->cpt_cstime;
-+ tsk->cnvcsw = ti->cpt_cnvcsw;
-+ tsk->cnivcsw = ti->cpt_cnivcsw;
-+ tsk->cmin_flt = ti->cpt_cmin_flt;
-+ tsk->cmaj_flt = ti->cpt_cmaj_flt;
-+
-+ if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
-+ __asm__("undefined\n");
-+
-+ for (i=0; i<RLIM_NLIMITS; i++) {
-+ tsk->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
-+ tsk->rlim[i].rlim_max = ti->cpt_rlim_max[i];
-+ }
-+#else
-+ if (thread_group_leader(tsk) && tsk->signal) {
-+ tsk->signal->utime = ti->cpt_utime;
-+ tsk->signal->stime = ti->cpt_stime;
-+ tsk->signal->cutime = ti->cpt_cutime;
-+ tsk->signal->cstime = ti->cpt_cstime;
-+ tsk->signal->nvcsw = ti->cpt_nvcsw;
-+ tsk->signal->nivcsw = ti->cpt_nivcsw;
-+ tsk->signal->cnvcsw = ti->cpt_cnvcsw;
-+ tsk->signal->cnivcsw = ti->cpt_cnivcsw;
-+ tsk->signal->min_flt = ti->cpt_min_flt;
-+ tsk->signal->maj_flt = ti->cpt_maj_flt;
-+ tsk->signal->cmin_flt = ti->cpt_cmin_flt;
-+ tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
-+
-+ if (RLIM_NLIMITS > CPT_RLIM_NLIMITS)
-+ __asm__("undefined\n");
-+
-+ for (i=0; i<RLIM_NLIMITS; i++) {
-+ tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
-+ tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
-+ }
-+ }
-+#endif
-+
-+ for (i=0; i<3; i++) {
-+ if (i >= GDT_ENTRY_TLS_ENTRIES) {
-+ eprintk_ctx("too many tls descs\n");
-+ } else {
-+#ifndef CONFIG_X86_64
-+ tsk->thread.tls_array[i].a = ti->cpt_tls[i]&0xFFFFFFFF;
-+ tsk->thread.tls_array[i].b = ti->cpt_tls[i]>>32;
-+#else
-+ tsk->thread.tls_array[i] = ti->cpt_tls[i];
-+#endif
-+ }
-+ }
-+
-+ clear_stopped_child_used_math(tsk);
-+
-+ b = (void *)(ti+1);
-+ while ((void*)b < ((void*)ti) + ti->cpt_next) {
-+ /* Siginfo objects are at the end of obj array */
-+ if (b->cpt_object == CPT_OBJ_SIGINFO) {
-+ struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
-+ restore_sigqueue(tsk, &tsk->pending, (unsigned long)b, (unsigned long)ti + ti->cpt_next);
-+ set_exec_env(env);
-+ break;
-+ }
-+
-+ switch (b->cpt_object) {
-+ case CPT_OBJ_BITS:
-+ if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE &&
-+ cpu_has_fxsr) {
-+ memcpy(&tsk->thread.i387,
-+ (void*)b + b->cpt_hdrlen,
-+ sizeof(struct i387_fxsave_struct));
-+ if (ti->cpt_used_math)
-+ set_stopped_child_used_math(tsk);
-+ }
-+#ifdef CONFIG_X86_32
-+ else if (b->cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
-+ !cpu_has_fxsr) {
-+ memcpy(&tsk->thread.i387,
-+ (void*)b + b->cpt_hdrlen,
-+ sizeof(struct i387_fsave_struct));
-+ if (ti->cpt_used_math)
-+ set_stopped_child_used_math(tsk);
-+ }
-+#endif
-+ break;
-+ case CPT_OBJ_LASTSIGINFO:
-+ lsi = (void*)b;
-+ break;
-+ case CPT_OBJ_X86_REGS:
-+ case CPT_OBJ_X86_64_REGS:
-+ if (restore_registers(tsk, regs, ti, (void*)b)) {
-+ eprintk_ctx("cannot restore registers: image is corrupted\n");
-+ return -EINVAL;
-+ }
-+ break;
-+ }
-+ b = ((void*)b) + b->cpt_next;
-+ }
-+
-+ if (ti->cpt_ppid != ti->cpt_rppid) {
-+ task_t *parent;
-+ struct ve_struct *env = set_exec_env(VE_TASK_INFO(tsk)->owner_env);
-+ write_lock_irq(&tasklist_lock);
-+ parent = find_task_by_pid_ve(ti->cpt_ppid);
-+ if (parent && parent != tsk->parent) {
-+ list_add(&tsk->ptrace_list, &tsk->parent->ptrace_children);
-+ REMOVE_LINKS(tsk);
-+ tsk->parent = parent;
-+ SET_LINKS(tsk);
-+ }
-+ write_unlock_irq(&tasklist_lock);
-+ set_exec_env(env);
-+ }
-+
-+ tsk->ptrace_message = ti->cpt_ptrace_message;
-+ tsk->pn_state = ti->cpt_pn_state;
-+ tsk->stopped_state = ti->cpt_stopped_state;
-+ tsk->thread_info->flags = ti->cpt_thrflags;
-+
-+ /* The image was created with kernel < 2.6.16, while
-+ * task hanged in sigsuspend -> do_signal.
-+ *
-+ * FIXME! This needs more brain efforts...
-+ */
-+ if (ti->cpt_sigsuspend_state) {
-+ tsk->thread_info->flags |= _TIF_RESTORE_SIGMASK;
-+ }
-+
-+#ifdef CONFIG_X86_64
-+ tsk->thread_info->flags |= _TIF_FORK;
-+ if (!ti->cpt_64bit)
-+ tsk->thread_info->flags |= _TIF_IA32;
-+#endif
-+
-+#ifndef CONFIG_X86_64
-+ do {
-+ if (regs->orig_eax == __NR__newselect && regs->edi) {
-+ struct timeval tv;
-+ if (access_process_vm(tsk, regs->edi, &tv,
-+ sizeof(tv), 0) != sizeof(tv)) {
-+ wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm: edi %ld\n",
-+ virt_pid(tsk), tsk->pid, tsk->comm,
-+ regs->edi);
-+ break;
-+ }
-+ dprintk_ctx("task %d/%d(%s): Old timeval in newselect: %ld.%ld\n",
-+ virt_pid(tsk), tsk->pid, tsk->comm,
-+ tv.tv_sec, tv.tv_usec);
-+ tv.tv_sec -= ctx->delta_time.tv_sec;
-+ if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
-+ tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
-+ tv.tv_sec--;
-+ } else {
-+ tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
-+ }
-+ if (tv.tv_sec < 0) {
-+ tv.tv_sec = 0;
-+ tv.tv_usec = 0;
-+ }
-+ dprintk_ctx("task %d/%d(%s): New timeval in newselect: %ld.%ld\n",
-+ virt_pid(tsk), tsk->pid, tsk->comm,
-+ tv.tv_sec, tv.tv_usec);
-+ if (access_process_vm(tsk, regs->edi, &tv,
-+ sizeof(tv), 1) != sizeof(tv)) {
-+ wprintk_ctx("task %d/%d(%s): Error 1 in access_process_vm write: edi %ld\n",
-+ virt_pid(tsk), tsk->pid, tsk->comm, regs->edi);
-+ }
-+
-+ } else if (regs->orig_eax == __NR_select && regs->edi) {
-+ struct {
-+ unsigned long n;
-+ fd_set __user *inp, *outp, *exp;
-+ struct timeval __user *tvp;
-+ } a;
-+ struct timeval tv;
-+ if (access_process_vm(tsk, regs->ebx, &a,
-+ sizeof(a), 0) != sizeof(a)) {
-+ wprintk_ctx("task %d: Error 2 in access_process_vm\n", tsk->pid);
-+ break;
-+ }
-+ if (access_process_vm(tsk, (unsigned long)a.tvp,
-+ &tv, sizeof(tv), 0) != sizeof(tv)) {
-+ wprintk_ctx("task %d: Error 3 in access_process_vm\n", tsk->pid);
-+ break;
-+ }
-+ dprintk_ctx("task %d: Old timeval in select: %ld.%ld\n",
-+ tsk->pid, tv.tv_sec, tv.tv_usec);
-+ tv.tv_sec -= ctx->delta_time.tv_sec;
-+ if (tv.tv_usec < ctx->delta_time.tv_nsec / 1000) {
-+ tv.tv_usec += 1000000 - ctx->delta_time.tv_nsec / 1000;
-+ tv.tv_sec--;
-+ } else {
-+ tv.tv_usec -= ctx->delta_time.tv_nsec / 1000;
-+ }
-+ if (tv.tv_sec < 0) {
-+ tv.tv_sec = 0;
-+ tv.tv_usec = 0;
-+ }
-+ dprintk_ctx("task %d: New timeval in select: %ld.%ld\n",
-+ tsk->pid, tv.tv_sec, tv.tv_usec);
-+ if (access_process_vm(tsk, (unsigned long)a.tvp,
-+ &tv, sizeof(tv), 1) != sizeof(tv)) {
-+ wprintk_ctx("task %d: Error 3 in access_process_vm write\n", tsk->pid);
-+ }
-+ }
-+ } while (0);
-+#endif
-+
-+ if (!tsk->exit_state && (long)SYSCALL_NR(regs) >= 0) {
-+ if (SYSCALL_RETVAL(regs) == -ERESTARTSYS ||
-+ SYSCALL_RETVAL(regs) == -ERESTARTNOINTR ||
-+ SYSCALL_RETVAL(regs) == -ERESTARTNOHAND ||
-+ SYSCALL_RETVAL(regs) == -ERESTART_RESTARTBLOCK ||
-+ syscall_is(tsk,regs,pause) ||
-+ (syscall_is(tsk,regs,rt_sigtimedwait) &&
-+ (SYSCALL_RETVAL(regs) == -EAGAIN || SYSCALL_RETVAL(regs) == -EINTR)) ||
-+ (syscall_is(tsk,regs,futex) &&
-+ (SYSCALL_RETVAL(regs) == -EINTR)))
-+ add_hook(tsk, ret_restart_sys, 0, &hooks);
-+ }
-+
-+ if (lsi || tsk->pn_state) {
-+ /* ... -> ptrace_notify()
-+ * or
-+ * ... -> do_signal() -> get_signal_to_deliver() ->
-+ * ptrace stop
-+ */
-+ tsk->last_siginfo = add_hook(tsk, ret_last_siginfo, sizeof(siginfo_t), &hooks);
-+ memset(tsk->last_siginfo, 0, sizeof(siginfo_t));
-+ if (lsi)
-+ decode_siginfo(tsk->last_siginfo, lsi);
-+ }
-+
-+ tsk->ptrace = ti->cpt_ptrace;
-+ tsk->flags = ti->cpt_flags & ~PF_FROZEN;
-+ clear_tsk_thread_flag(tsk, TIF_FREEZE);
-+ tsk->exit_signal = ti->cpt_exit_signal;
-+
-+ if (tsk->stopped_state) {
-+ dprintk_ctx("finish_stop\n");
-+ if (ti->cpt_state != TASK_STOPPED)
-+ eprintk_ctx("Hellooo, state is %u\n", (unsigned)ti->cpt_state);
-+ add_hook(tsk, ret_finish_stop, 0, &hooks);
-+ }
-+
-+ if (!tsk->exit_state &&
-+ (ti->cpt_set_tid || ti->cpt_clear_tid)) {
-+ unsigned long *ptr = add_hook(tsk, ret_child_tid, sizeof(unsigned long)*2, &hooks);
-+ ptr[0] = ti->cpt_clear_tid;
-+ ptr[1] = ti->cpt_set_tid;
-+ dprintk_ctx("settids\n");
-+ }
-+
-+#ifdef CONFIG_X86_64
-+ if (!hooks && (long)SYSCALL_NR(regs) < 0) {
-+ extern void ret_from_fork2(void);
-+ ESP(tsk) -= sizeof(unsigned long);
-+ *(unsigned long*)ESP(tsk) = (unsigned long)ret_from_fork2;
-+ tsk->thread_info->flags |= _TIF_RESUME;
-+ }
-+#else
-+ tsk->thread.esp -= 4;
-+ *(__u32*)tsk->thread.esp = tsk->thread.eip;
-+ tsk->thread.eip = (unsigned long)pre_ret_from_fork;
-+#endif
-+
-+ if (ti->cpt_state == TASK_TRACED)
-+ tsk->state = TASK_TRACED;
-+ else if (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD)) {
-+ tsk->signal->it_virt_expires = 0;
-+ tsk->signal->it_prof_expires = 0;
-+ if (tsk->state != EXIT_DEAD)
-+ eprintk_ctx("oops, schedule() did not make us dead\n");
-+ }
-+
-+ if (thread_group_leader(tsk) &&
-+ ti->cpt_it_real_value &&
-+ !(ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
-+ DEFINE_KTIME(val);
-+
-+ if (ctx->image_version != 0) {
-+ ktime_t delta;
-+
-+ val = ktime_add_ns(val, ti->cpt_it_real_value);
-+ delta = timespec_to_ktime(ctx->delta_time);
-+ val = ktime_sub(val, delta);
-+ if (val.tv64 <= 0)
-+ val.tv64 = NSEC_PER_USEC;
-+ dprintk("rst itimer " CPT_FID " +%Ld %Ld %Lu\n", CPT_TID(tsk), val.tv64, delta.tv64, ti->cpt_it_real_value);
-+ } else {
-+ unsigned long jif = ti->cpt_it_real_value -
-+ timespec_to_jiffies(&ctx->delta_time);
-+ if ((long)jif <= 0)
-+ jif = 1;
-+ val = ktime_add_ns(val, (u64)jif*TICK_NSEC);
-+ }
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ if (hrtimer_try_to_cancel(&tsk->signal->real_timer) >= 0) {
-+ /* FIXME. Check!!!! */
-+ hrtimer_start(&tsk->signal->real_timer, val, HRTIMER_REL);
-+ } else {
-+ wprintk_ctx("Timer clash. Impossible?\n");
-+ }
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+
-+ dprintk_ctx("itimer " CPT_FID " +%Lu\n", CPT_TID(tsk), val.tv64);
-+ }
-+
-+ module_put(THIS_MODULE);
-+ }
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_socket.c linux-2.6.16-026test009/kernel/cpt/rst_socket.c
---- linux-2.6.16.orig/kernel/cpt/rst_socket.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_socket.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,797 @@
-+/*
-+ *
-+ * kernel/cpt/rst_socket.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/namei.h>
-+#include <linux/socket.h>
-+#include <linux/un.h>
-+#include <net/tcp.h>
-+#include <net/sock.h>
-+#include <net/scm.h>
-+#include <net/af_unix.h>
-+
-+#include <ub/ub_mem.h>
-+#include <ub/ub_orphan.h>
-+#include <ub/ub_orphan.h>
-+#include <ub/ub_net.h>
-+#include <ub/ub_tcp.h>
-+
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_socket.h"
-+#include "cpt_kernel.h"
-+
-+#include "cpt_syscalls.h"
-+
-+
-+static int setup_sock_common(struct sock *sk, struct cpt_sock_image *si,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ if (sk->sk_socket) {
-+ sk->sk_socket->flags = si->cpt_ssflags;
-+ sk->sk_socket->state = si->cpt_sstate;
-+ }
-+ sk->sk_reuse = si->cpt_reuse;
-+ sk->sk_shutdown = si->cpt_shutdown;
-+ sk->sk_userlocks = si->cpt_userlocks;
-+ sk->sk_no_check = si->cpt_no_check;
-+ sock_reset_flag(sk, SOCK_DBG);
-+ if (si->cpt_debug)
-+ sock_set_flag(sk, SOCK_DBG);
-+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
-+ if (si->cpt_rcvtstamp)
-+ sock_set_flag(sk, SOCK_RCVTSTAMP);
-+ sock_reset_flag(sk, SOCK_LOCALROUTE);
-+ if (si->cpt_localroute)
-+ sock_set_flag(sk, SOCK_LOCALROUTE);
-+ sk->sk_protocol = si->cpt_protocol;
-+ sk->sk_err = si->cpt_err;
-+ sk->sk_err_soft = si->cpt_err_soft;
-+ sk->sk_priority = si->cpt_priority;
-+ sk->sk_rcvlowat = si->cpt_rcvlowat;
-+ sk->sk_rcvtimeo = si->cpt_rcvtimeo;
-+ if (si->cpt_rcvtimeo == CPT_NULL)
-+ sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
-+ sk->sk_sndtimeo = si->cpt_sndtimeo;
-+ if (si->cpt_sndtimeo == CPT_NULL)
-+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
-+ sk->sk_rcvbuf = si->cpt_rcvbuf;
-+ sk->sk_sndbuf = si->cpt_sndbuf;
-+ sk->sk_bound_dev_if = si->cpt_bound_dev_if;
-+ sk->sk_flags = si->cpt_flags;
-+ sk->sk_lingertime = si->cpt_lingertime;
-+ if (si->cpt_lingertime == CPT_NULL)
-+ sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
-+ sk->sk_peercred.pid = si->cpt_peer_pid;
-+ sk->sk_peercred.uid = si->cpt_peer_uid;
-+ sk->sk_peercred.gid = si->cpt_peer_gid;
-+ cpt_timeval_import(&sk->sk_stamp, si->cpt_stamp);
-+ return 0;
-+}
-+
-+static struct file *sock_mapfile(struct socket *sock)
-+{
-+ int fd = sock_map_fd(sock);
-+
-+ if (fd >= 0) {
-+ struct file *file = sock->file;
-+ get_file(file);
-+ sc_close(fd);
-+ return file;
-+ }
-+ return ERR_PTR(fd);
-+}
-+
-+/* Assumption is that /tmp exists and writable.
-+ * In previous versions we assumed that listen() will autobind
-+ * the socket. It does not do this for AF_UNIX by evident reason:
-+ * socket in abstract namespace is accessible, unlike socket bound
-+ * to deleted FS object.
-+ */
-+
-+static int
-+select_deleted_name(char * name, cpt_context_t *ctx)
-+{
-+ int i;
-+
-+ for (i=0; i<100; i++) {
-+ struct nameidata nd;
-+ unsigned int rnd = net_random();
-+
-+ sprintf(name, "/tmp/SOCK.%08x", rnd);
-+
-+ if (path_lookup(name, 0, &nd) != 0)
-+ return 0;
-+
-+ path_release(&nd);
-+ }
-+
-+ eprintk_ctx("failed to allocate deleted socket inode\n");
-+ return -ELOOP;
-+}
-+
-+static int
-+bind_unix_socket(struct socket *sock, struct cpt_sock_image *si,
-+ cpt_context_t *ctx)
-+{
-+ int err;
-+ char *name;
-+ struct sockaddr* addr;
-+ int addrlen;
-+ struct sockaddr_un sun;
-+ struct nameidata nd;
-+
-+ if ((addrlen = si->cpt_laddrlen) <= 2)
-+ return 0;
-+
-+ nd.dentry = NULL;
-+ name = ((char*)si->cpt_laddr) + 2;
-+ addr = (struct sockaddr *)si->cpt_laddr;
-+
-+ if (name[0]) {
-+ err = path_lookup(name, 0, &nd);
-+ if (err) {
-+ nd.dentry = NULL;
-+ } else {
-+ if (si->cpt_deleted) {
-+ path_release(&nd);
-+ nd.dentry = NULL;
-+ addr = (struct sockaddr*)&sun;
-+ addr->sa_family = AF_UNIX;
-+ name = ((char*)addr) + 2;
-+ err = select_deleted_name(name, ctx);
-+ if (err)
-+ return err;
-+ addrlen = 2 + strlen(name);
-+ } else if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) {
-+ eprintk_ctx("bind_unix_socket: not a socket dentry\n");
-+ path_release(&nd);
-+ return -EINVAL;
-+ }
-+ }
-+ if (nd.dentry)
-+ sc_unlink(name);
-+ }
-+
-+ err = sock->ops->bind(sock, addr, addrlen);
-+
-+ if (!err) {
-+ if (nd.dentry) {
-+ sc_chown(name, nd.dentry->d_inode->i_uid,
-+ nd.dentry->d_inode->i_gid);
-+ sc_chmod(name, nd.dentry->d_inode->i_mode);
-+ }
-+ if (si->cpt_deleted && name[0])
-+ sc_unlink(name);
-+ }
-+ if (nd.dentry)
-+ path_release(&nd);
-+ return err;
-+}
-+
-+static int fixup_unix_address(struct socket *sock, struct cpt_sock_image *si,
-+ struct cpt_context *ctx)
-+{
-+ struct sock *sk = sock->sk;
-+ cpt_object_t *obj;
-+ struct sock *parent;
-+
-+ if (sk->sk_family != AF_UNIX || sk->sk_state == TCP_LISTEN)
-+ return 0;
-+
-+ if (si->cpt_parent == -1)
-+ return bind_unix_socket(sock, si, ctx);
-+
-+ obj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
-+ if (!obj)
-+ return 0;
-+
-+ parent = obj->o_obj;
-+ if (unix_sk(parent)->addr) {
-+ if (unix_sk(sk)->addr &&
-+ atomic_dec_and_test(&unix_sk(sk)->addr->refcnt))
-+ kfree(unix_sk(sk)->addr);
-+ atomic_inc(&unix_sk(parent)->addr->refcnt);
-+ unix_sk(sk)->addr = unix_sk(parent)->addr;
-+ }
-+ return 0;
-+}
-+
-+
-+static int open_socket(cpt_object_t *obj, struct cpt_sock_image *si,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ struct socket *sock;
-+ struct socket *sock2 = NULL;
-+ struct file *file;
-+ cpt_object_t *fobj;
-+ cpt_object_t *pobj = NULL;
-+
-+ err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
-+ &sock);
-+ if (err)
-+ return err;
-+
-+ if (si->cpt_socketpair) {
-+ err = sock_create_kern(si->cpt_family, si->cpt_type,
-+ si->cpt_protocol, &sock2);
-+ if (err)
-+ goto err_out;
-+
-+ err = sock->ops->socketpair(sock, sock2);
-+ if (err < 0)
-+ goto err_out;
-+
-+ /* Socketpair with a peer outside our environment.
-+ * So, we create real half-open pipe and do not worry
-+ * about dead end anymore. */
-+ if (si->cpt_peer == -1) {
-+ sock_release(sock2);
-+ sock2 = NULL;
-+ }
-+ }
-+
-+ cpt_obj_setobj(obj, sock->sk, ctx);
-+
-+ if (si->cpt_file != CPT_NULL) {
-+ file = sock_mapfile(sock);
-+ err = PTR_ERR(file);
-+ if (IS_ERR(file))
-+ goto err_out;
-+
-+ err = -ENOMEM;
-+
-+ obj->o_parent = file;
-+
-+ if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
-+ goto err_out;
-+ cpt_obj_setpos(fobj, si->cpt_file, ctx);
-+ cpt_obj_setindex(fobj, si->cpt_index, ctx);
-+ }
-+
-+ if (sock2) {
-+ struct file *file2;
-+
-+ pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_peer, ctx);
-+ if (!pobj) BUG();
-+ if (pobj->o_obj) BUG();
-+ cpt_obj_setobj(pobj, sock2->sk, ctx);
-+
-+ if (pobj->o_ppos != CPT_NULL) {
-+ file2 = sock_mapfile(sock2);
-+ err = PTR_ERR(file2);
-+ if (IS_ERR(file2))
-+ goto err_out;
-+
-+ err = -ENOMEM;
-+ if ((fobj = cpt_object_add(CPT_OBJ_FILE, file2, ctx)) == NULL)
-+ goto err_out;
-+ cpt_obj_setpos(fobj, pobj->o_ppos, ctx);
-+ cpt_obj_setindex(fobj, si->cpt_peer, ctx);
-+
-+ pobj->o_parent = file2;
-+ }
-+ }
-+
-+ setup_sock_common(sock->sk, si, obj->o_pos, ctx);
-+ if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6) {
-+ inet_sk(sock->sk)->freebind = 1;
-+ if (si->cpt_laddrlen) {
-+ err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
-+ if (err) {
-+ dprintk_ctx("binding failed: %d, do not worry\n", err);
-+ }
-+ }
-+ rst_socket_in(si, obj->o_pos, sock->sk, ctx);
-+ } else if (sock->sk->sk_family == AF_NETLINK) {
-+ err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
-+ if (err) {
-+ eprintk_ctx("AF_NETLINK binding failed: %d\n", err);
-+ }
-+ if (si->cpt_raddrlen) {
-+ err = sock->ops->connect(sock, (struct sockaddr *)&si->cpt_raddr, si->cpt_raddrlen, O_NONBLOCK);
-+ if (err) {
-+ eprintk_ctx("oops, AF_NETLINK connect failed: %d\n", err);
-+ }
-+ }
-+ }
-+ fixup_unix_address(sock, si, ctx);
-+
-+ if (sock2) {
-+ err = rst_get_object(CPT_OBJ_SOCKET, pobj->o_pos, si, ctx);
-+ if (err)
-+ return err;
-+ setup_sock_common(sock2->sk, si, pobj->o_pos, ctx);
-+ fixup_unix_address(sock2, si, ctx);
-+ }
-+
-+ if ((sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
-+ && (int)si->cpt_parent != -1) {
-+ cpt_object_t *lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
-+ if (lobj && cpt_attach_accept(lobj->o_obj, sock->sk, ctx) == 0)
-+ sock->sk = NULL;
-+ }
-+
-+
-+ if (si->cpt_file == CPT_NULL && sock->sk &&
-+ sock->sk->sk_family == AF_INET) {
-+ struct sock *sk = sock->sk;
-+
-+ if (sk) {
-+ sock->sk = NULL;
-+
-+ local_bh_disable();
-+ bh_lock_sock(sk);
-+ if (sock_owned_by_user(sk))
-+ eprintk_ctx("oops, sock is locked by user\n");
-+
-+ sock_hold(sk);
-+ sock_orphan(sk);
-+ ub_inc_orphan_count(sk);
-+ bh_unlock_sock(sk);
-+ local_bh_enable();
-+ sock_put(sk);
-+ dprintk_ctx("orphaning socket %p\n", sk);
-+ }
-+ }
-+
-+ if (si->cpt_file == CPT_NULL && sock->sk == NULL)
-+ sock_release(sock);
-+
-+ return 0;
-+
-+err_out:
-+ if (sock2)
-+ sock_release(sock2);
-+ sock_release(sock);
-+ return err;
-+}
-+
-+static int open_listening_socket(loff_t pos, struct cpt_sock_image *si,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ struct socket *sock;
-+ struct file *file;
-+ cpt_object_t *obj, *fobj;
-+
-+ err = sock_create_kern(si->cpt_family, si->cpt_type, si->cpt_protocol,
-+ &sock);
-+ if (err) {
-+ eprintk_ctx("open_listening_socket: sock_create_kern: %d\n", err);
-+ return err;
-+ }
-+
-+ sock->sk->sk_reuse = 2;
-+ sock->sk->sk_bound_dev_if = si->cpt_bound_dev_if;
-+
-+ if (sock->sk->sk_family == AF_UNIX) {
-+ err = bind_unix_socket(sock, si, ctx);
-+ } else if (si->cpt_laddrlen) {
-+ if (sock->sk->sk_family == AF_INET || sock->sk->sk_family == AF_INET6)
-+ inet_sk(sock->sk)->freebind = 1;
-+
-+ err = sock->ops->bind(sock, (struct sockaddr *)&si->cpt_laddr, si->cpt_laddrlen);
-+
-+ if (err) {
-+ eprintk_ctx("open_listening_socket: bind: %d\n", err);
-+ goto err_out;
-+ }
-+ }
-+
-+ err = sock->ops->listen(sock, si->cpt_max_ack_backlog);
-+ if (err) {
-+ eprintk_ctx("open_listening_socket: listen: %d, %Ld, %d\n", err, pos, si->cpt_deleted);
-+ goto err_out;
-+ }
-+
-+ /* Now we may access socket body directly and fixup all the things. */
-+
-+ file = sock_mapfile(sock);
-+ err = PTR_ERR(file);
-+ if (IS_ERR(file)) {
-+ eprintk_ctx("open_listening_socket: map: %d\n", err);
-+ goto err_out;
-+ }
-+
-+ err = -ENOMEM;
-+ if ((fobj = cpt_object_add(CPT_OBJ_FILE, file, ctx)) == NULL)
-+ goto err_out;
-+ if ((obj = cpt_object_add(CPT_OBJ_SOCKET, sock->sk, ctx)) == NULL)
-+ goto err_out;
-+ cpt_obj_setpos(obj, pos, ctx);
-+ cpt_obj_setindex(obj, si->cpt_index, ctx);
-+ obj->o_parent = file;
-+ cpt_obj_setpos(fobj, si->cpt_file, ctx);
-+ cpt_obj_setindex(fobj, si->cpt_index, ctx);
-+
-+ setup_sock_common(sock->sk, si, pos, ctx);
-+
-+ if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
-+ rst_restore_synwait_queue(sock->sk, si, pos, ctx);
-+
-+ return 0;
-+
-+err_out:
-+ sock_release(sock);
-+ return err;
-+}
-+
-+
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct sk_buff *skb;
-+ struct cpt_skb_image v;
-+ loff_t pos = *pos_p;
-+ struct scm_fp_list *fpl = NULL;
-+ struct timeval tmptv;
-+
-+ err = rst_get_object(CPT_OBJ_SKB, pos, &v, ctx);
-+ if (err)
-+ return ERR_PTR(err);
-+ *pos_p = pos + v.cpt_next;
-+
-+ if (owner)
-+ *owner = v.cpt_owner;
-+ if (queue)
-+ *queue = v.cpt_queue;
-+
-+ skb = alloc_skb(v.cpt_len + v.cpt_hspace + v.cpt_tspace, GFP_KERNEL);
-+ if (skb == NULL)
-+ return ERR_PTR(-ENOMEM);
-+ skb_reserve(skb, v.cpt_hspace);
-+ skb_put(skb, v.cpt_len);
-+ skb->h.raw = skb->head + v.cpt_h;
-+ skb->nh.raw = skb->head + v.cpt_nh;
-+ skb->mac.raw = skb->head + v.cpt_mac;
-+ if (sizeof(skb->cb) < sizeof(v.cpt_cb)) BUG();
-+ memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
-+ skb->mac_len = v.cpt_mac_len;
-+
-+ skb->csum = v.cpt_csum;
-+ skb->local_df = v.cpt_local_df;
-+ skb->pkt_type = v.cpt_pkt_type;
-+ skb->ip_summed = v.cpt_ip_summed;
-+ skb->priority = v.cpt_priority;
-+ skb->protocol = v.cpt_protocol;
-+ cpt_timeval_import(&tmptv, v.cpt_stamp);
-+ skb_set_timestamp(skb, &tmptv);
-+
-+ skb_shinfo(skb)->tso_segs = v.cpt_tso_segs;
-+ skb_shinfo(skb)->tso_size = v.cpt_tso_size;
-+ if (ctx->image_version == 0) {
-+ skb_shinfo(skb)->tso_segs = 1;
-+ skb_shinfo(skb)->tso_size = 0;
-+ }
-+
-+ if (v.cpt_next > v.cpt_hdrlen) {
-+ pos = pos + v.cpt_hdrlen;
-+ while (pos < *pos_p) {
-+ union {
-+ struct cpt_obj_bits b;
-+ struct cpt_fd_image f;
-+ } u;
-+
-+ err = rst_get_object(-1, pos, &u, ctx);
-+ if (err) {
-+ kfree_skb(skb);
-+ return ERR_PTR(err);
-+ }
-+ if (u.b.cpt_object == CPT_OBJ_BITS) {
-+ if (u.b.cpt_size != v.cpt_hspace + skb->len) {
-+ eprintk_ctx("invalid skb image %u != %u + %u\n", u.b.cpt_size, v.cpt_hspace, skb->len);
-+ kfree_skb(skb);
-+ return ERR_PTR(-EINVAL);
-+ }
-+
-+ err = ctx->pread(skb->head, u.b.cpt_size, ctx, pos+u.b.cpt_hdrlen);
-+ if (err) {
-+ kfree_skb(skb);
-+ return ERR_PTR(err);
-+ }
-+ } else if (u.f.cpt_object == CPT_OBJ_FILEDESC) {
-+ if (!fpl) {
-+ fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
-+ if (!fpl) {
-+ kfree_skb(skb);
-+ return ERR_PTR(-ENOMEM);
-+ }
-+ fpl->count = 0;
-+ UNIXCB(skb).fp = fpl;
-+ }
-+ fpl->fp[fpl->count] = rst_file(u.f.cpt_file, -1, ctx);
-+ if (!IS_ERR(fpl->fp[fpl->count]))
-+ fpl->count++;
-+ }
-+ pos += u.b.cpt_next;
-+ }
-+ }
-+
-+ return skb;
-+}
-+
-+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-+{
-+ int i;
-+ scm->fp = UNIXCB(skb).fp;
-+ skb->destructor = sock_wfree;
-+ UNIXCB(skb).fp = NULL;
-+
-+ for (i=scm->fp->count-1; i>=0; i--)
-+ unix_notinflight(scm->fp->fp[i]);
-+}
-+
-+static void unix_destruct_fds(struct sk_buff *skb)
-+{
-+ struct scm_cookie scm;
-+ memset(&scm, 0, sizeof(scm));
-+ unix_detach_fds(&scm, skb);
-+ scm_destroy(&scm);
-+ sock_wfree(skb);
-+ module_put(THIS_MODULE);
-+}
-+
-+
-+static int restore_unix_rqueue(struct sock *sk, struct cpt_sock_image *si,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ loff_t endpos;
-+
-+ pos = pos + si->cpt_hdrlen;
-+ endpos = pos + si->cpt_next;
-+ while (pos < endpos) {
-+ struct sk_buff *skb;
-+ struct sock *owner_sk;
-+ __u32 owner;
-+
-+ skb = rst_skb(&pos, &owner, NULL, ctx);
-+ if (IS_ERR(skb))
-+ return PTR_ERR(skb);
-+
-+ owner_sk = unix_peer(sk);
-+ if (owner != -1) {
-+ cpt_object_t *pobj;
-+ pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, owner, ctx);
-+ if (pobj == NULL) {
-+ eprintk_ctx("orphan af_unix skb?\n");
-+ kfree_skb(skb);
-+ continue;
-+ }
-+ owner_sk = pobj->o_obj;
-+ }
-+ if (owner_sk == NULL) {
-+ dprintk_ctx("orphan af_unix skb 2?\n");
-+ kfree_skb(skb);
-+ continue;
-+ }
-+ skb_set_owner_w(skb, owner_sk);
-+ if (UNIXCB(skb).fp) {
-+ skb->destructor = unix_destruct_fds;
-+ if (!try_module_get(THIS_MODULE)) BUG();
-+ }
-+ skb_queue_tail(&sk->sk_receive_queue, skb);
-+ if (sk->sk_state == TCP_LISTEN) {
-+ struct socket *sock = skb->sk->sk_socket;
-+ if (sock == NULL) BUG();
-+ if (sock->file) BUG();
-+ skb->sk->sk_socket = NULL;
-+ skb->sk->sk_sleep = NULL;
-+ sock->sk = NULL;
-+ sock_release(sock);
-+ }
-+ }
-+ return 0;
-+}
-+
-+
-+/* All the sockets are created before we start to open files */
-+
-+int rst_sockets(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_SOCKET];
-+ loff_t endsec;
-+ cpt_object_t *obj;
-+ struct cpt_section_hdr h;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: ctx->pread: %d\n", err);
-+ return err;
-+ }
-+ if (h.cpt_section != CPT_SECT_SOCKET || h.cpt_hdrlen < sizeof(h)) {
-+ eprintk_ctx("rst_sockets: hdr err\n");
-+ return -EINVAL;
-+ }
-+
-+ /* The first pass: we create socket index and open listening sockets. */
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ if (sbuf->cpt_state == TCP_LISTEN) {
-+ err = open_listening_socket(sec, sbuf, ctx);
-+ cpt_release_buf(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: open_listening_socket: %d\n", err);
-+ return err;
-+ }
-+ } else {
-+ cpt_release_buf(ctx);
-+ obj = alloc_cpt_object(GFP_KERNEL, ctx);
-+ if (obj == NULL)
-+ return -ENOMEM;
-+ cpt_obj_setindex(obj, sbuf->cpt_index, ctx);
-+ cpt_obj_setpos(obj, sec, ctx);
-+ obj->o_ppos = sbuf->cpt_file;
-+ intern_cpt_object(CPT_OBJ_SOCKET, obj, ctx);
-+ }
-+ sec += sbuf->cpt_next;
-+ }
-+
-+ /* Pass 2: really restore sockets */
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct cpt_sock_image *sbuf;
-+ if (obj->o_obj != NULL)
-+ continue;
-+ sbuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: rst_get_object: %d\n", err);
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ if (sbuf->cpt_state == TCP_LISTEN) BUG();
-+ err = open_socket(obj, sbuf, ctx);
-+ cpt_release_buf(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: open_socket: %d\n", err);
-+ return err;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+int rst_orphans(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_ORPHANS];
-+ loff_t endsec;
-+ cpt_object_t *obj;
-+ struct cpt_section_hdr h;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_ORPHANS || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ struct cpt_sock_image *sbuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_SOCKET, sec, sbuf, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ obj = alloc_cpt_object(GFP_KERNEL, ctx);
-+ if (obj == NULL) {
-+ cpt_release_buf(ctx);
-+ return -ENOMEM;
-+ }
-+ obj->o_pos = sec;
-+ obj->o_ppos = sbuf->cpt_file;
-+ err = open_socket(obj, sbuf, ctx);
-+ dprintk_ctx("Restoring orphan: %d\n", err);
-+ free_cpt_object(obj, ctx);
-+ cpt_release_buf(ctx);
-+ if (err)
-+ return err;
-+ sec += sbuf->cpt_next;
-+ }
-+
-+ return 0;
-+}
-+
-+
-+/* Pass 3: I understand, this is not funny already :-),
-+ * but we have to do another pass to establish links between
-+ * not-paired AF_UNIX SOCK_DGRAM sockets and to restore AF_UNIX
-+ * skb queues with proper skb->sk links.
-+ *
-+ * This could be made at the end of rst_sockets(), but we defer
-+ * restoring af_unix queues up to the end of restoring files to
-+ * make restoring passed FDs cleaner.
-+ */
-+
-+int rst_sockets_complete(struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct cpt_sock_image *sbuf;
-+ struct sock *sk = obj->o_obj;
-+ struct sock *peer;
-+
-+ if (!sk) BUG();
-+
-+ if (sk->sk_family != AF_UNIX)
-+ continue;
-+
-+ sbuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+
-+ if (sbuf->cpt_next > sbuf->cpt_hdrlen)
-+ restore_unix_rqueue(sk, sbuf, obj->o_pos, ctx);
-+
-+ cpt_release_buf(ctx);
-+
-+ if (sk->sk_type == SOCK_DGRAM && unix_peer(sk) == NULL) {
-+ cpt_object_t *pobj;
-+
-+ sbuf = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_SOCKET, obj->o_pos, sbuf, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+
-+ if (sbuf->cpt_peer != -1) {
-+ pobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, sbuf->cpt_peer, ctx);
-+ if (pobj) {
-+ peer = pobj->o_obj;
-+ sock_hold(peer);
-+ unix_peer(sk) = peer;
-+ }
-+ }
-+ cpt_release_buf(ctx);
-+ }
-+ }
-+
-+ rst_orphans(ctx);
-+
-+ return 0;
-+}
-+
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_socket_in.c linux-2.6.16-026test009/kernel/cpt/rst_socket_in.c
---- linux-2.6.16.orig/kernel/cpt/rst_socket_in.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_socket_in.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,449 @@
-+/*
-+ *
-+ * kernel/cpt/rst_socket_in.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/socket.h>
-+#include <linux/tcp.h>
-+#include <linux/jhash.h>
-+#include <net/sock.h>
-+#include <net/tcp.h>
-+#include <linux/ipv6.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_socket.h"
-+#include "cpt_kernel.h"
-+
-+static inline unsigned long jiffies_import(__u32 tmo)
-+{
-+ __s32 delta = tmo;
-+ return jiffies + (long)delta;
-+}
-+
-+static inline __u32 tcp_jiffies_import(__u32 tmo)
-+{
-+ return ((__u32)jiffies) + tmo;
-+}
-+
-+
-+static int restore_queues(struct sock *sk, struct cpt_sock_image *si,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ loff_t endpos;
-+
-+ pos = pos + si->cpt_hdrlen;
-+ endpos = pos + si->cpt_next;
-+ while (pos < endpos) {
-+ struct sk_buff *skb;
-+ __u32 type;
-+
-+ skb = rst_skb(&pos, NULL, &type, ctx);
-+ if (IS_ERR(skb))
-+ return PTR_ERR(skb);
-+
-+ if (sk->sk_type == SOCK_STREAM) {
-+ if (type == CPT_SKB_RQ) {
-+ sk_stream_set_owner_r(skb, sk);
-+ ub_tcprcvbuf_charge_forced(sk, skb);
-+ skb_queue_tail(&sk->sk_receive_queue, skb);
-+ } else if (type == CPT_SKB_OFOQ) {
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ sk_stream_set_owner_r(skb, sk);
-+ ub_tcprcvbuf_charge_forced(sk, skb);
-+ skb_queue_tail(&tp->out_of_order_queue, skb);
-+ } else if (type == CPT_SKB_WQ) {
-+ sk->sk_wmem_queued += skb->truesize;
-+ sk->sk_forward_alloc -= skb->truesize;
-+ ub_tcpsndbuf_charge_forced(sk, skb);
-+ skb_queue_tail(&sk->sk_write_queue, skb);
-+ } else {
-+ wprintk_ctx("strange stream queue type %u\n", type);
-+ kfree_skb(skb);
-+ }
-+ } else {
-+ if (type == CPT_SKB_RQ) {
-+ skb_set_owner_r(skb, sk);
-+ skb_queue_tail(&sk->sk_receive_queue, skb);
-+ } else if (type == CPT_SKB_WQ) {
-+ struct inet_sock *inet = inet_sk(sk);
-+ if (inet->cork.fragsize) {
-+ skb_set_owner_w(skb, sk);
-+ skb_queue_tail(&sk->sk_write_queue, skb);
-+ } else {
-+ eprintk_ctx("cork skb is dropped\n");
-+ kfree_skb(skb);
-+ }
-+ } else {
-+ wprintk_ctx("strange dgram queue type %u\n", type);
-+ kfree_skb(skb);
-+ }
-+ }
-+ }
-+ return 0;
-+}
-+
-+static struct sock *find_parent(__u16 sport, cpt_context_t *ctx)
-+{
-+ cpt_object_t *obj;
-+ for_each_object(obj, CPT_OBJ_SOCKET) {
-+ struct sock *sk = obj->o_obj;
-+ if (sk &&
-+ sk->sk_state == TCP_LISTEN &&
-+ (sk->sk_family == AF_INET || sk->sk_family == AF_INET6) &&
-+ inet_sk(sk)->sport == sport)
-+ return sk;
-+ }
-+ return NULL;
-+}
-+
-+static int rst_socket_tcp(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
-+ struct cpt_context *ctx)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ struct sk_buff *skb;
-+ tp->pred_flags = si->cpt_pred_flags;
-+ tp->rcv_nxt = si->cpt_rcv_nxt;
-+ tp->snd_nxt = si->cpt_snd_nxt;
-+ tp->snd_una = si->cpt_snd_una;
-+ tp->snd_sml = si->cpt_snd_sml;
-+ tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
-+ tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
-+ tp->tcp_header_len = si->cpt_tcp_header_len;
-+ inet_csk(sk)->icsk_ack.pending = si->cpt_ack_pending;
-+ inet_csk(sk)->icsk_ack.quick = si->cpt_quick;
-+ inet_csk(sk)->icsk_ack.pingpong = si->cpt_pingpong;
-+ inet_csk(sk)->icsk_ack.blocked = si->cpt_blocked;
-+ inet_csk(sk)->icsk_ack.ato = si->cpt_ato;
-+ inet_csk(sk)->icsk_ack.timeout = jiffies_import(si->cpt_ack_timeout);
-+ inet_csk(sk)->icsk_ack.lrcvtime = tcp_jiffies_import(si->cpt_lrcvtime);
-+ inet_csk(sk)->icsk_ack.last_seg_size = si->cpt_last_seg_size;
-+ inet_csk(sk)->icsk_ack.rcv_mss = si->cpt_rcv_mss;
-+ tp->snd_wl1 = si->cpt_snd_wl1;
-+ tp->snd_wnd = si->cpt_snd_wnd;
-+ tp->max_window = si->cpt_max_window;
-+ inet_csk(sk)->icsk_pmtu_cookie = si->cpt_pmtu_cookie;
-+ tp->mss_cache = si->cpt_mss_cache;
-+ tp->rx_opt.mss_clamp = si->cpt_mss_clamp;
-+ inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
-+ inet_csk(sk)->icsk_ca_state = si->cpt_ca_state;
-+ inet_csk(sk)->icsk_retransmits = si->cpt_retransmits;
-+ tp->reordering = si->cpt_reordering;
-+ tp->frto_counter = si->cpt_frto_counter;
-+ tp->frto_highmark = si->cpt_frto_highmark;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
-+ // // tp->adv_cong = si->cpt_adv_cong;
-+#endif
-+ inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
-+ inet_csk(sk)->icsk_backoff = si->cpt_backoff;
-+ tp->srtt = si->cpt_srtt;
-+ tp->mdev = si->cpt_mdev;
-+ tp->mdev_max = si->cpt_mdev_max;
-+ tp->rttvar = si->cpt_rttvar;
-+ tp->rtt_seq = si->cpt_rtt_seq;
-+ inet_csk(sk)->icsk_rto = si->cpt_rto;
-+ tp->packets_out = si->cpt_packets_out;
-+ tp->left_out = si->cpt_left_out;
-+ tp->retrans_out = si->cpt_retrans_out;
-+ tp->lost_out = si->cpt_lost_out;
-+ tp->sacked_out = si->cpt_sacked_out;
-+ tp->fackets_out = si->cpt_fackets_out;
-+ tp->snd_ssthresh = si->cpt_snd_ssthresh;
-+ tp->snd_cwnd = si->cpt_snd_cwnd;
-+ tp->snd_cwnd_cnt = si->cpt_snd_cwnd_cnt;
-+ tp->snd_cwnd_clamp = si->cpt_snd_cwnd_clamp;
-+ tp->snd_cwnd_used = si->cpt_snd_cwnd_used;
-+ tp->snd_cwnd_stamp = tcp_jiffies_import(si->cpt_snd_cwnd_stamp);
-+ inet_csk(sk)->icsk_timeout = tcp_jiffies_import(si->cpt_timeout);
-+ tp->rcv_wnd = si->cpt_rcv_wnd;
-+ tp->rcv_wup = si->cpt_rcv_wup;
-+ tp->write_seq = si->cpt_write_seq;
-+ tp->pushed_seq = si->cpt_pushed_seq;
-+ tp->copied_seq = si->cpt_copied_seq;
-+ tp->rx_opt.tstamp_ok = si->cpt_tstamp_ok;
-+ tp->rx_opt.wscale_ok = si->cpt_wscale_ok;
-+ tp->rx_opt.sack_ok = si->cpt_sack_ok;
-+ tp->rx_opt.saw_tstamp = si->cpt_saw_tstamp;
-+ tp->rx_opt.snd_wscale = si->cpt_snd_wscale;
-+ tp->rx_opt.rcv_wscale = si->cpt_rcv_wscale;
-+ tp->nonagle = si->cpt_nonagle;
-+ tp->keepalive_probes = si->cpt_keepalive_probes;
-+ tp->rx_opt.rcv_tsval = si->cpt_rcv_tsval;
-+ tp->rx_opt.rcv_tsecr = si->cpt_rcv_tsecr;
-+ tp->rx_opt.ts_recent = si->cpt_ts_recent;
-+ tp->rx_opt.ts_recent_stamp = si->cpt_ts_recent_stamp;
-+ tp->rx_opt.user_mss = si->cpt_user_mss;
-+ tp->rx_opt.dsack = si->cpt_dsack;
-+ tp->rx_opt.eff_sacks = si->cpt_num_sacks;
-+ tp->duplicate_sack[0].start_seq = si->cpt_sack_array[0];
-+ tp->duplicate_sack[0].end_seq = si->cpt_sack_array[1];
-+ tp->selective_acks[0].start_seq = si->cpt_sack_array[2];
-+ tp->selective_acks[0].end_seq = si->cpt_sack_array[3];
-+ tp->selective_acks[1].start_seq = si->cpt_sack_array[4];
-+ tp->selective_acks[1].end_seq = si->cpt_sack_array[5];
-+ tp->selective_acks[2].start_seq = si->cpt_sack_array[6];
-+ tp->selective_acks[2].end_seq = si->cpt_sack_array[7];
-+ tp->selective_acks[3].start_seq = si->cpt_sack_array[8];
-+ tp->selective_acks[3].end_seq = si->cpt_sack_array[9];
-+
-+ tp->window_clamp = si->cpt_window_clamp;
-+ tp->rcv_ssthresh = si->cpt_rcv_ssthresh;
-+ inet_csk(sk)->icsk_probes_out = si->cpt_probes_out;
-+ tp->rx_opt.num_sacks = si->cpt_num_sacks;
-+ tp->advmss = si->cpt_advmss;
-+ inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
-+ tp->ecn_flags = si->cpt_ecn_flags;
-+ tp->prior_ssthresh = si->cpt_prior_ssthresh;
-+ tp->high_seq = si->cpt_high_seq;
-+ tp->retrans_stamp = si->cpt_retrans_stamp;
-+ tp->undo_marker = si->cpt_undo_marker;
-+ tp->undo_retrans = si->cpt_undo_retrans;
-+ tp->urg_seq = si->cpt_urg_seq;
-+ tp->urg_data = si->cpt_urg_data;
-+ inet_csk(sk)->icsk_pending = si->cpt_pending;
-+ tp->urg_mode = si->cpt_urg_mode;
-+ tp->snd_up = si->cpt_snd_up;
-+ tp->keepalive_time = si->cpt_keepalive_time;
-+ tp->keepalive_intvl = si->cpt_keepalive_intvl;
-+ tp->linger2 = si->cpt_linger2;
-+
-+ sk->sk_send_head = NULL;
-+ for (skb = skb_peek(&sk->sk_write_queue);
-+ skb && skb != (struct sk_buff*)&sk->sk_write_queue;
-+ skb = skb->next) {
-+ if (!after(tp->snd_nxt, TCP_SKB_CB(skb)->seq)) {
-+ sk->sk_send_head = skb;
-+ break;
-+ }
-+ }
-+
-+ if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) {
-+ struct inet_sock *inet = inet_sk(sk);
-+ if (inet->num == 0) {
-+ cpt_object_t *lobj = NULL;
-+
-+ if ((int)si->cpt_parent != -1)
-+ lobj = lookup_cpt_obj_byindex(CPT_OBJ_SOCKET, si->cpt_parent, ctx);
-+
-+ if (lobj && lobj->o_obj) {
-+ inet->num = ntohs(inet->sport);
-+ local_bh_disable();
-+ __inet_inherit_port(&tcp_hashinfo, lobj->o_obj, sk);
-+ local_bh_enable();
-+ dprintk_ctx("port inherited from parent\n");
-+ } else {
-+ struct sock *lsk = find_parent(inet->sport, ctx);
-+ if (lsk) {
-+ inet->num = ntohs(inet->sport);
-+ local_bh_disable();
-+ __inet_inherit_port(&tcp_hashinfo, lsk, sk);
-+ local_bh_enable();
-+ dprintk_ctx("port inherited\n");
-+ } else {
-+ eprintk_ctx("we are kinda lost...\n");
-+ }
-+ }
-+ }
-+
-+ sk->sk_prot->hash(sk);
-+
-+ if (inet_csk(sk)->icsk_ack.pending&ICSK_ACK_TIMER)
-+ sk_reset_timer(sk, &inet_csk(sk)->icsk_delack_timer, inet_csk(sk)->icsk_ack.timeout);
-+ if (inet_csk(sk)->icsk_pending)
-+ sk_reset_timer(sk, &inet_csk(sk)->icsk_retransmit_timer,
-+ inet_csk(sk)->icsk_timeout);
-+ if (sock_flag(sk, SOCK_KEEPOPEN)) {
-+ unsigned long expires = jiffies_import(si->cpt_ka_timeout);
-+ if (time_after(jiffies, expires))
-+ expires = jiffies + HZ;
-+ sk_reset_timer(sk, &sk->sk_timer, expires);
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+
-+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
-+ struct cpt_context *ctx)
-+{
-+ struct inet_sock *inet = inet_sk(sk);
-+
-+ lock_sock(sk);
-+
-+ sk->sk_state = si->cpt_state;
-+
-+ inet->daddr = si->cpt_daddr;
-+ inet->dport = si->cpt_dport;
-+ inet->saddr = si->cpt_saddr;
-+ inet->rcv_saddr = si->cpt_rcv_saddr;
-+ inet->sport = si->cpt_sport;
-+ inet->uc_ttl = si->cpt_uc_ttl;
-+ inet->tos = si->cpt_tos;
-+ inet->cmsg_flags = si->cpt_cmsg_flags;
-+ inet->mc_index = si->cpt_mc_index;
-+ inet->mc_addr = si->cpt_mc_addr;
-+ inet->hdrincl = si->cpt_hdrincl;
-+ inet->mc_ttl = si->cpt_mc_ttl;
-+ inet->mc_loop = si->cpt_mc_loop;
-+ inet->pmtudisc = si->cpt_pmtudisc;
-+ inet->recverr = si->cpt_recverr;
-+ inet->freebind = si->cpt_freebind;
-+ inet->id = si->cpt_idcounter;
-+
-+ inet->cork.flags = si->cpt_cork_flags;
-+ inet->cork.fragsize = si->cpt_cork_fragsize;
-+ inet->cork.length = si->cpt_cork_length;
-+ inet->cork.addr = si->cpt_cork_addr;
-+ inet->cork.fl.fl4_src = si->cpt_cork_saddr;
-+ inet->cork.fl.fl4_dst = si->cpt_cork_daddr;
-+ inet->cork.fl.oif = si->cpt_cork_oif;
-+ if (inet->cork.fragsize) {
-+ if (ip_route_output_key(&inet->cork.rt, &inet->cork.fl)) {
-+ eprintk_ctx("failed to restore cork route\n");
-+ inet->cork.fragsize = 0;
-+ }
-+ }
-+
-+ if (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP) {
-+ struct udp_sock *up = udp_sk(sk);
-+ up->pending = si->cpt_udp_pending;
-+ up->corkflag = si->cpt_udp_corkflag;
-+ up->encap_type = si->cpt_udp_encap;
-+ up->len = si->cpt_udp_len;
-+ }
-+
-+ if (sk->sk_family == AF_INET6) {
-+ struct ipv6_pinfo *np = inet6_sk(sk);
-+
-+ memcpy(&np->saddr, si->cpt_saddr6, 16);
-+ memcpy(&np->rcv_saddr, si->cpt_rcv_saddr6, 16);
-+ memcpy(&np->daddr, si->cpt_daddr6, 16);
-+ np->flow_label = si->cpt_flow_label6;
-+ np->frag_size = si->cpt_frag_size6;
-+ np->hop_limit = si->cpt_hop_limit6;
-+ np->mcast_hops = si->cpt_mcast_hops6;
-+ np->mcast_oif = si->cpt_mcast_oif6;
-+ np->rxopt.all = si->cpt_rxopt6;
-+ np->mc_loop = si->cpt_mc_loop6;
-+ np->recverr = si->cpt_recverr6;
-+ np->sndflow = si->cpt_sndflow6;
-+ np->pmtudisc = si->cpt_pmtudisc6;
-+ np->ipv6only = si->cpt_ipv6only6;
-+
-+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-+ if (si->cpt_mapped) {
-+ if (sk->sk_type == SOCK_STREAM &&
-+ sk->sk_protocol == IPPROTO_TCP) {
-+ struct tcp_sock *tp = tcp_sk(sk);
-+ tp->af_specific = &ipv6_mapped;
-+ sk->sk_backlog_rcv = tcp_v4_do_rcv;
-+ }
-+ }
-+#endif
-+ }
-+
-+ restore_queues(sk, si, pos, ctx);
-+
-+ if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP)
-+ rst_socket_tcp(si, pos, sk, ctx);
-+
-+ release_sock(sk);
-+ return 0;
-+}
-+
-+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *ctx)
-+{
-+ struct request_sock *req;
-+
-+ if (lsk->sk_state != TCP_LISTEN)
-+ return -EINVAL;
-+
-+ req = reqsk_alloc(&tcp_request_sock_ops);
-+ if (!req)
-+ return -ENOMEM;
-+
-+ sk->sk_socket = NULL;
-+ sk->sk_sleep = NULL;
-+ inet_csk_reqsk_queue_add(lsk, req, sk);
-+ return 0;
-+}
-+
-+static __inline__ u32 __tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
-+{
-+ return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
-+}
-+
-+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si,
-+ loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t end = si->cpt_next;
-+
-+ pos += si->cpt_hdrlen;
-+ while (pos < end) {
-+ struct cpt_openreq_image oi;
-+
-+ err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
-+ if (err)
-+ return err;
-+
-+ if (oi.cpt_object == CPT_OBJ_OPENREQ) {
-+ struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
-+ if (req == NULL)
-+ return -ENOMEM;
-+
-+ memset(req, 0, sizeof(*req));
-+ tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
-+ tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
-+ inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
-+ req->mss = oi.cpt_mss;
-+ req->retrans = oi.cpt_retrans;
-+ inet_rsk(req)->snd_wscale = oi.cpt_snd_wscale;
-+ inet_rsk(req)->rcv_wscale = oi.cpt_rcv_wscale;
-+ inet_rsk(req)->tstamp_ok = oi.cpt_tstamp_ok;
-+ inet_rsk(req)->sack_ok = oi.cpt_sack_ok;
-+ inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
-+ inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
-+ inet_rsk(req)->acked = oi.cpt_acked;
-+ req->window_clamp = oi.cpt_window_clamp;
-+ req->rcv_wnd = oi.cpt_rcv_wnd;
-+ req->ts_recent = oi.cpt_ts_recent;
-+ req->expires = jiffies_import(oi.cpt_expires);
-+
-+ if (oi.cpt_family == AF_INET) {
-+ memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
-+ memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
-+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-+ } else {
-+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-+ memcpy(&req->af.v6_req.loc_addr, oi.cpt_loc_addr, 16);
-+ memcpy(&req->af.v6_req.rmt_addr, oi.cpt_rmt_addr, 16);
-+ req->af.v6_req.iif = oi.cpt_iif;
-+ req->class = &or_ipv6;
-+ tcp_v6_synq_add(sk, req);
-+#endif
-+ }
-+ }
-+ pos += oi.cpt_next;
-+ }
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_sysvipc.c linux-2.6.16-026test009/kernel/cpt/rst_sysvipc.c
---- linux-2.6.16.orig/kernel/cpt/rst_sysvipc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_sysvipc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,409 @@
-+/*
-+ *
-+ * kernel/cpt/rst_sysvipc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/shm.h>
-+/* FIXME. x86_64 has asm/ipc.h forgotten? */
-+#include <asm-generic/ipc.h>
-+#include <asm/uaccess.h>
-+#include <asm/unistd.h>
-+#include <ub/ub_mem.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_kernel.h"
-+
-+struct _warg {
-+ struct file *file;
-+ struct cpt_sysvshm_image *v;
-+};
-+
-+static int fixup_one_shm(struct shmid_kernel *shp, void *arg)
-+{
-+ struct _warg *warg = arg;
-+
-+ if (shp->shm_file != warg->file)
-+ return 0;
-+ if (shp->shm_nattch)
-+ return -EEXIST;
-+
-+ shp->shm_perm.uid = warg->v->cpt_uid;
-+ shp->shm_perm.gid = warg->v->cpt_gid;
-+ shp->shm_perm.cuid = warg->v->cpt_cuid;
-+ shp->shm_perm.cgid = warg->v->cpt_cgid;
-+ shp->shm_perm.mode = warg->v->cpt_mode;
-+
-+ shp->shm_atim = warg->v->cpt_atime;
-+ shp->shm_dtim = warg->v->cpt_dtime;
-+ shp->shm_ctim = warg->v->cpt_ctime;
-+ shp->shm_cprid = warg->v->cpt_creator;
-+ shp->shm_lprid = warg->v->cpt_last;
-+
-+ /* TODO: fix shp->mlock_user? */
-+ return 1;
-+}
-+
-+static int fixup_shm(struct file *file, struct cpt_sysvshm_image *v)
-+{
-+ struct _warg warg;
-+
-+ warg.file = file;
-+ warg.v = v;
-+
-+ return sysvipc_walk_shm(fixup_one_shm, &warg);
-+}
-+
-+static int fixup_shm_data(struct file *file, loff_t pos, loff_t end,
-+ struct cpt_context *ctx)
-+{
-+ struct cpt_page_block pgb;
-+ ssize_t (*do_write)(struct file *, const char __user *, size_t, loff_t *ppos);
-+
-+ do_write = file->f_dentry->d_inode->i_fop->write;
-+ if (do_write == NULL) {
-+ eprintk_ctx("No TMPFS? Cannot restore content of SYSV SHM\n");
-+ return -EINVAL;
-+ }
-+
-+ while (pos < end) {
-+ loff_t opos;
-+ loff_t ipos;
-+ int count;
-+ int err;
-+
-+ err = rst_get_object(CPT_OBJ_PAGES, pos, &pgb, ctx);
-+ if (err)
-+ return err;
-+ dprintk_ctx("restoring SHM block: %08x-%08x\n",
-+ (__u32)pgb.cpt_start, (__u32)pgb.cpt_end);
-+ ipos = pos + pgb.cpt_hdrlen;
-+ opos = pgb.cpt_start;
-+ count = pgb.cpt_end-pgb.cpt_start;
-+ while (count > 0) {
-+ mm_segment_t oldfs;
-+ int copy = count;
-+
-+ if (copy > PAGE_SIZE)
-+ copy = PAGE_SIZE;
-+ (void)cpt_get_buf(ctx);
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ err = ctx->pread(ctx->tmpbuf, copy, ctx, ipos);
-+ set_fs(oldfs);
-+ if (err) {
-+ __cpt_release_buf(ctx);
-+ return err;
-+ }
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ ipos += copy;
-+ err = do_write(file, ctx->tmpbuf, copy, &opos);
-+ set_fs(oldfs);
-+ __cpt_release_buf(ctx);
-+ if (err != copy) {
-+ eprintk_ctx("write() failure\n");
-+ if (err >= 0)
-+ err = -EIO;
-+ return err;
-+ }
-+ count -= copy;
-+ }
-+ pos += pgb.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+struct file * rst_sysv_shm(loff_t pos, struct cpt_context *ctx)
-+{
-+ struct file *file;
-+ int err;
-+ loff_t dpos, epos;
-+ union {
-+ struct cpt_file_image fi;
-+ struct cpt_sysvshm_image shmi;
-+ struct cpt_inode_image ii;
-+ } u;
-+
-+ err = rst_get_object(CPT_OBJ_FILE, pos, &u.fi, ctx);
-+ if (err < 0)
-+ goto err_out;
-+ pos = u.fi.cpt_inode;
-+ err = rst_get_object(CPT_OBJ_INODE, pos, &u.ii, ctx);
-+ if (err < 0)
-+ goto err_out;
-+ dpos = pos + u.ii.cpt_hdrlen;
-+ epos = pos + u.ii.cpt_next;
-+ err = rst_get_object(CPT_OBJ_SYSV_SHM, pos + u.ii.cpt_hdrlen, &u.shmi, ctx);
-+ if (err < 0)
-+ goto err_out;
-+ dpos += u.shmi.cpt_next;
-+
-+ file = sysvipc_setup_shm(u.shmi.cpt_key, u.shmi.cpt_id,
-+ u.shmi.cpt_segsz, u.shmi.cpt_mode);
-+ if (!IS_ERR(file)) {
-+ err = fixup_shm(file, &u.shmi);
-+ if (err != -EEXIST && dpos < epos)
-+ err = fixup_shm_data(file, dpos, epos, ctx);
-+ }
-+
-+ return file;
-+
-+err_out:
-+ return ERR_PTR(err);
-+}
-+
-+static int attach_one_undo(int semid, struct sem_array *sma, void *arg)
-+{
-+ struct sem_undo *su = arg;
-+ struct sem_undo_list *undo_list = current->sysvsem.undo_list;
-+
-+ if (semid != su->semid)
-+ return 0;
-+
-+ su->proc_next = undo_list->proc_list;
-+ undo_list->proc_list = su;
-+
-+ su->id_next = sma->undo;
-+ sma->undo = su;
-+
-+ return 1;
-+}
-+
-+static int attach_undo(struct sem_undo *su)
-+{
-+ return sysvipc_walk_sem(attach_one_undo, su);
-+}
-+
-+static int do_rst_semundo(struct cpt_object_hdr *sui, loff_t pos, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct sem_undo_list *undo_list;
-+
-+ if (current->sysvsem.undo_list) {
-+ eprintk_ctx("Funny undo_list\n");
-+ return 0;
-+ }
-+
-+ undo_list = ub_kmalloc(sizeof(struct sem_undo_list), GFP_KERNEL);
-+ if (undo_list == NULL)
-+ return -ENOMEM;
-+ memset(undo_list, 0, sizeof(struct sem_undo_list));
-+ atomic_set(&undo_list->refcnt, 1);
-+ spin_lock_init(&undo_list->lock);
-+ current->sysvsem.undo_list = undo_list;
-+
-+ if (sui->cpt_next > sui->cpt_hdrlen) {
-+ loff_t offset = pos + sui->cpt_hdrlen;
-+ do {
-+ struct sem_undo *new;
-+ struct cpt_sysvsem_undo_image spi;
-+ err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO_REC, offset, &spi, ctx);
-+ if (err)
-+ goto out;
-+ new = ub_kmalloc(sizeof(struct sem_undo) +
-+ sizeof(short)*spi.cpt_nsem, GFP_KERNEL);
-+ if (!new) {
-+ err = -ENOMEM;
-+ goto out;
-+ }
-+
-+ memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*spi.cpt_nsem);
-+ new->semadj = (short *) &new[1];
-+ new->semid = spi.cpt_id;
-+ err = ctx->pread(new->semadj, spi.cpt_nsem*sizeof(short), ctx, offset + spi.cpt_hdrlen);
-+ if (err) {
-+ kfree(new);
-+ goto out;
-+ }
-+ err = attach_undo(new);
-+ if (err <= 0) {
-+ if (err == 0)
-+ err = -ENOENT;
-+ kfree(new);
-+ goto out;
-+ }
-+ offset += spi.cpt_next;
-+ } while (offset < pos + sui->cpt_next);
-+ }
-+ err = 0;
-+
-+out:
-+ return err;
-+}
-+
-+__u32 rst_semundo_flag(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ __u32 flag = 0;
-+
-+#if 0
-+ if (ti->cpt_sysvsem_undo == CPT_NULL ||
-+ lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo))
-+ flag |= CLONE_SYSVSEM;
-+#endif
-+ return flag;
-+}
-+
-+int rst_semundo_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct sem_undo_list *f = current->sysvsem.undo_list;
-+ cpt_object_t *obj;
-+ struct cpt_object_hdr sui;
-+
-+ if (ti->cpt_sysvsem_undo == CPT_NULL) {
-+ exit_sem(current);
-+ return 0;
-+ }
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, ctx);
-+ if (obj) {
-+ if (obj->o_obj != f) {
-+ exit_sem(current);
-+ f = obj->o_obj;
-+ atomic_inc(&f->refcnt);
-+ current->sysvsem.undo_list = f;
-+ }
-+ return 0;
-+ }
-+
-+ if ((err = rst_get_object(CPT_OBJ_SYSVSEM_UNDO, ti->cpt_sysvsem_undo, &sui, ctx)) != 0)
-+ goto out;
-+
-+ if ((err = do_rst_semundo(&sui, ti->cpt_sysvsem_undo, ctx)) != 0)
-+ goto out;
-+
-+ err = -ENOMEM;
-+ obj = cpt_object_add(CPT_OBJ_SYSVSEM_UNDO, f, ctx);
-+ if (obj) {
-+ err = 0;
-+ cpt_obj_setpos(obj, ti->cpt_sysvsem_undo, ctx);
-+ }
-+
-+ return 0;
-+
-+out:
-+ return err;
-+}
-+
-+struct _sarg {
-+ int semid;
-+ struct cpt_sysvsem_image *v;
-+ __u32 *arr;
-+};
-+
-+static int fixup_one_sem(int semid, struct sem_array *sma, void *arg)
-+{
-+ struct _sarg *warg = arg;
-+
-+ if (semid != warg->semid)
-+ return 0;
-+
-+ sma->sem_perm.uid = warg->v->cpt_uid;
-+ sma->sem_perm.gid = warg->v->cpt_gid;
-+ sma->sem_perm.cuid = warg->v->cpt_cuid;
-+ sma->sem_perm.cgid = warg->v->cpt_cgid;
-+ sma->sem_perm.mode = warg->v->cpt_mode;
-+ sma->sem_perm.seq = warg->v->cpt_seq;
-+
-+ sma->sem_ctime = warg->v->cpt_ctime;
-+ sma->sem_otime = warg->v->cpt_otime;
-+ memcpy(sma->sem_base, warg->arr, sma->sem_nsems*8);
-+ return 1;
-+}
-+
-+static int fixup_sem(int semid, struct cpt_sysvsem_image *v, __u32 *arr)
-+{
-+ struct _sarg warg;
-+
-+ warg.semid = semid;
-+ warg.v = v;
-+ warg.arr = arr;
-+
-+ return sysvipc_walk_sem(fixup_one_sem, &warg);
-+}
-+
-+
-+static int restore_sem(loff_t pos, struct cpt_sysvsem_image *si,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ __u32 *arr;
-+ int nsems = (si->cpt_next - si->cpt_hdrlen)/8;
-+
-+ arr = kmalloc(nsems*8, GFP_KERNEL);
-+ if (!arr)
-+ return -ENOMEM;
-+
-+ err = ctx->pread(arr, nsems*8, ctx, pos+si->cpt_hdrlen);
-+ if (err)
-+ goto out;
-+ err = sysvipc_setup_sem(si->cpt_key, si->cpt_id, nsems, si->cpt_mode);
-+ if (err < 0) {
-+ eprintk_ctx("SEM 3\n");
-+ goto out;
-+ }
-+ err = fixup_sem(si->cpt_id, si, arr);
-+ if (err == 0)
-+ err = -ESRCH;
-+ if (err > 0)
-+ err = 0;
-+out:
-+ kfree(arr);
-+ return err;
-+}
-+
-+static int rst_sysv_sem(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_SYSV_SEM];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_sysvsem_image sbuf;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_SYSV_SEM || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ int err;
-+ err = rst_get_object(CPT_OBJ_SYSV_SEM, sec, &sbuf, ctx);
-+ if (err)
-+ return err;
-+ err = restore_sem(sec, &sbuf, ctx);
-+ if (err)
-+ return err;
-+ sec += sbuf.cpt_next;
-+ }
-+ return 0;
-+}
-+
-+int rst_sysv_ipc(struct cpt_context *ctx)
-+{
-+ return rst_sysv_sem(ctx);
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_tty.c linux-2.6.16-026test009/kernel/cpt/rst_tty.c
---- linux-2.6.16.orig/kernel/cpt/rst_tty.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_tty.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,380 @@
-+/*
-+ *
-+ * kernel/cpt/rst_tty.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/pipe_fs_i.h>
-+#include <linux/mman.h>
-+#include <linux/mount.h>
-+#include <linux/tty.h>
-+#include <linux/vmalloc.h>
-+#include <asm/unistd.h>
-+#include <asm/uaccess.h>
-+#include <linux/cpt_image.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_mm.h"
-+#include "cpt_files.h"
-+#include "cpt_kernel.h"
-+
-+static int pty_setup(struct tty_struct *stty, loff_t pos,
-+ struct cpt_tty_image *pi, struct cpt_context *ctx)
-+{
-+ unsigned long flags;
-+
-+ stty->pgrp = -1;
-+ stty->session = 0;
-+ stty->packet = pi->cpt_packet;
-+ stty->stopped = pi->cpt_stopped;
-+ stty->hw_stopped = pi->cpt_hw_stopped;
-+ stty->flow_stopped = pi->cpt_flow_stopped;
-+#define DONOT_CHANGE ((1<<TTY_CHARGED)|(1<<TTY_CLOSING))
-+ flags = stty->flags & DONOT_CHANGE;
-+ stty->flags = flags | (pi->cpt_flags & ~DONOT_CHANGE);
-+ stty->ctrl_status = pi->cpt_ctrl_status;
-+ stty->winsize.ws_row = pi->cpt_ws_row;
-+ stty->winsize.ws_col = pi->cpt_ws_col;
-+ stty->winsize.ws_ypixel = pi->cpt_ws_prow;
-+ stty->winsize.ws_xpixel = pi->cpt_ws_pcol;
-+ stty->canon_column = pi->cpt_canon_column;
-+ stty->column = pi->cpt_column;
-+ stty->raw = pi->cpt_raw;
-+ stty->real_raw = pi->cpt_real_raw;
-+ stty->erasing = pi->cpt_erasing;
-+ stty->lnext = pi->cpt_lnext;
-+ stty->icanon = pi->cpt_icanon;
-+ stty->closing = pi->cpt_closing;
-+ stty->minimum_to_wake = pi->cpt_minimum_to_wake;
-+
-+ stty->termios->c_iflag = pi->cpt_c_iflag;
-+ stty->termios->c_oflag = pi->cpt_c_oflag;
-+ stty->termios->c_lflag = pi->cpt_c_lflag;
-+ stty->termios->c_cflag = pi->cpt_c_cflag;
-+ memcpy(&stty->termios->c_cc, &pi->cpt_c_cc, NCCS);
-+ memcpy(stty->read_flags, pi->cpt_read_flags, sizeof(stty->read_flags));
-+
-+ if (pi->cpt_next > pi->cpt_hdrlen) {
-+ int err;
-+ struct cpt_obj_bits b;
-+ err = rst_get_object(CPT_OBJ_BITS, pos + pi->cpt_hdrlen, &b, ctx);
-+ if (err)
-+ return err;
-+ if (b.cpt_size == 0)
-+ return 0;
-+ err = ctx->pread(stty->read_buf, b.cpt_size, ctx, pos + pi->cpt_hdrlen + b.cpt_hdrlen);
-+ if (err)
-+ return err;
-+
-+ spin_lock_irq(&stty->read_lock);
-+ stty->read_tail = 0;
-+ stty->read_cnt = b.cpt_size;
-+ stty->read_head = b.cpt_size;
-+ stty->canon_head = stty->read_tail + pi->cpt_canon_head;
-+ stty->canon_data = pi->cpt_canon_data;
-+ spin_unlock_irq(&stty->read_lock);
-+ }
-+
-+ return 0;
-+}
-+
-+/* Find slave/master tty in image, when we already know master/slave.
-+ * It might be optimized, of course. */
-+static loff_t find_pty_pair(struct tty_struct *stty, loff_t pos, struct cpt_tty_image *pi, struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_TTY];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_tty_image *pibuf;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return CPT_NULL;
-+ if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
-+ return CPT_NULL;
-+ pibuf = kmalloc(sizeof(*pibuf), GFP_KERNEL);
-+ if (pibuf == NULL) {
-+ eprintk_ctx("cannot allocate buffer\n");
-+ return CPT_NULL;
-+ }
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx))
-+ return CPT_NULL;
-+ if (pibuf->cpt_index == pi->cpt_index &&
-+ !((pi->cpt_drv_flags^pibuf->cpt_drv_flags)&TTY_DRIVER_DEVPTS_MEM) &&
-+ pos != sec) {
-+ pty_setup(stty, sec, pibuf, ctx);
-+ return sec;
-+ }
-+ sec += pibuf->cpt_next;
-+ }
-+ kfree(pibuf);
-+ return CPT_NULL;
-+}
-+
-+static int fixup_tty_attrs(struct cpt_inode_image *ii, struct file *master,
-+ struct cpt_context *ctx)
-+{
-+ int err;
-+ struct iattr newattrs;
-+ struct dentry *d = master->f_dentry;
-+
-+ newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE;
-+ newattrs.ia_uid = ii->cpt_uid;
-+ newattrs.ia_gid = ii->cpt_gid;
-+ newattrs.ia_mode = ii->cpt_mode;
-+
-+ mutex_lock(&d->d_inode->i_mutex);
-+ err = notify_change(d, &newattrs);
-+ mutex_unlock(&d->d_inode->i_mutex);
-+
-+ return err;
-+}
-+
-+/* NOTE: "portable", but ugly thing. To allocate /dev/pts/N, we open
-+ * /dev/ptmx until we get pty with desired index.
-+ */
-+
-+struct file *ptmx_open(int index, unsigned int flags)
-+{
-+ struct file *file;
-+ struct file **stack = NULL;
-+ int depth = 0;
-+
-+ for (;;) {
-+ struct tty_struct *tty;
-+
-+ file = filp_open("/dev/ptmx", flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
-+ if (IS_ERR(file))
-+ break;
-+ tty = file->private_data;
-+ if (tty->index == index)
-+ break;
-+
-+ if (depth == PAGE_SIZE/sizeof(struct file *)) {
-+ fput(file);
-+ file = ERR_PTR(-EBUSY);
-+ break;
-+ }
-+ if (stack == NULL) {
-+ stack = (struct file **)__get_free_page(GFP_KERNEL);
-+ if (!stack) {
-+ fput(file);
-+ file = ERR_PTR(-ENOMEM);
-+ break;
-+ }
-+ }
-+ stack[depth] = file;
-+ depth++;
-+ }
-+ while (depth > 0) {
-+ depth--;
-+ fput(stack[depth]);
-+ }
-+ if (stack)
-+ free_page((unsigned long)stack);
-+ return file;
-+}
-+
-+
-+struct file * rst_open_tty(struct cpt_file_image *fi, struct cpt_inode_image *ii,
-+ unsigned flags, struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+ struct file *master, *slave;
-+ struct tty_struct *stty;
-+ struct cpt_tty_image *pi;
-+ static char *a = "pqrstuvwxyzabcde";
-+ static char *b = "0123456789abcdef";
-+ char pairname[16];
-+ unsigned master_flags, slave_flags;
-+
-+ if (fi->cpt_priv == CPT_NULL)
-+ return ERR_PTR(-EINVAL);
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, fi->cpt_priv, ctx);
-+ if (obj && obj->o_parent) {
-+ dprintk_ctx("obtained pty as pair to existing\n");
-+ master = obj->o_parent;
-+ stty = master->private_data;
-+
-+ if (stty->driver->subtype == PTY_TYPE_MASTER &&
-+ (stty->driver->flags&TTY_DRIVER_DEVPTS_MEM)) {
-+ wprintk_ctx("cloning ptmx\n");
-+ get_file(master);
-+ return master;
-+ }
-+
-+ master = dentry_open(dget(master->f_dentry),
-+ mntget(master->f_vfsmnt), flags);
-+ if (!IS_ERR(master)) {
-+ stty = master->private_data;
-+ if (stty->driver->subtype != PTY_TYPE_MASTER)
-+ fixup_tty_attrs(ii, master, ctx);
-+ }
-+ return master;
-+ }
-+
-+ pi = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_TTY, fi->cpt_priv, pi, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return ERR_PTR(err);
-+ }
-+
-+ master_flags = slave_flags = 0;
-+ if (pi->cpt_drv_subtype == PTY_TYPE_MASTER)
-+ master_flags = flags;
-+ else
-+ slave_flags = flags;
-+
-+ /*
-+ * Open pair master/slave.
-+ */
-+ if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM) {
-+ master = ptmx_open(pi->cpt_index, master_flags);
-+ } else {
-+ sprintf(pairname, "/dev/pty%c%c", a[pi->cpt_index/16], b[pi->cpt_index%16]);
-+ master = filp_open(pairname, master_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
-+ }
-+ if (IS_ERR(master)) {
-+ eprintk_ctx("filp_open master: %Ld %ld\n", fi->cpt_priv, PTR_ERR(master));
-+ cpt_release_buf(ctx);
-+ return master;
-+ }
-+ stty = master->private_data;
-+ clear_bit(TTY_PTY_LOCK, &stty->flags);
-+ if (pi->cpt_drv_flags&TTY_DRIVER_DEVPTS_MEM)
-+ sprintf(pairname, "/dev/pts/%d", stty->index);
-+ else
-+ sprintf(pairname, "/dev/tty%c%c", a[stty->index/16], b[stty->index%16]);
-+ slave = filp_open(pairname, slave_flags|O_NONBLOCK|O_NOCTTY|O_RDWR, 0);
-+ if (IS_ERR(slave)) {
-+ eprintk_ctx("filp_open slave %s: %ld\n", pairname, PTR_ERR(slave));
-+ fput(master);
-+ cpt_release_buf(ctx);
-+ return slave;
-+ }
-+
-+ if (pi->cpt_drv_subtype != PTY_TYPE_MASTER)
-+ fixup_tty_attrs(ii, slave, ctx);
-+
-+ cpt_object_add(CPT_OBJ_TTY, master->private_data, ctx);
-+ cpt_object_add(CPT_OBJ_TTY, slave->private_data, ctx);
-+ cpt_object_add(CPT_OBJ_FILE, master, ctx);
-+ cpt_object_add(CPT_OBJ_FILE, slave, ctx);
-+
-+ if (pi->cpt_drv_subtype == PTY_TYPE_MASTER) {
-+ loff_t pos;
-+ obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
-+ obj->o_parent = master;
-+ cpt_obj_setpos(obj, fi->cpt_priv, ctx);
-+ pty_setup(stty, fi->cpt_priv, pi, ctx);
-+
-+ obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
-+ obj->o_parent = slave;
-+ pos = find_pty_pair(stty->link, fi->cpt_priv, pi, ctx);
-+ cpt_obj_setpos(obj, pos, ctx);
-+
-+ obj = lookup_cpt_object(CPT_OBJ_FILE, slave, ctx);
-+ cpt_obj_setpos(obj, CPT_NULL, ctx);
-+ get_file(master);
-+ cpt_release_buf(ctx);
-+ return master;
-+ } else {
-+ loff_t pos;
-+ obj = lookup_cpt_object(CPT_OBJ_TTY, slave->private_data, ctx);
-+ obj->o_parent = slave;
-+ cpt_obj_setpos(obj, fi->cpt_priv, ctx);
-+ pty_setup(stty->link, fi->cpt_priv, pi, ctx);
-+
-+ obj = lookup_cpt_object(CPT_OBJ_TTY, master->private_data, ctx);
-+ obj->o_parent = master;
-+ pos = find_pty_pair(stty, fi->cpt_priv, pi, ctx);
-+ cpt_obj_setpos(obj, pos, ctx);
-+
-+ obj = lookup_cpt_object(CPT_OBJ_FILE, master, ctx);
-+ cpt_obj_setpos(obj, CPT_NULL, ctx);
-+ get_file(slave);
-+ cpt_release_buf(ctx);
-+ return slave;
-+ }
-+}
-+
-+int rst_tty_jobcontrol(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_TTY];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_TTY || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ cpt_object_t *obj;
-+ struct cpt_tty_image *pibuf = cpt_get_buf(ctx);
-+
-+ if (rst_get_object(CPT_OBJ_TTY, sec, pibuf, ctx)) {
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_TTY, sec, ctx);
-+ if (obj) {
-+ struct tty_struct *stty = obj->o_obj;
-+ if ((int)pibuf->cpt_pgrp > 0) {
-+ stty->pgrp = vpid_to_pid(pibuf->cpt_pgrp);
-+ if (stty->pgrp == -1)
-+ dprintk_ctx("unknown tty pgrp %d\n", pibuf->cpt_pgrp);
-+ } else if (pibuf->cpt_pgrp) {
-+ stty->pgrp = alloc_pidmap();
-+ if (stty->pgrp < 0) {
-+ eprintk_ctx("cannot allocate stray tty->pgrp");
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ free_pidmap(stty->pgrp);
-+ }
-+ if ((int)pibuf->cpt_session > 0) {
-+ int sess;
-+ sess = vpid_to_pid(pibuf->cpt_session);
-+ if (sess == -1) {
-+ dprintk_ctx("unknown tty session %d\n", pibuf->cpt_session);
-+ } else if (stty->session <= 0) {
-+ stty->session = sess;
-+ } else if (stty->session != sess) {
-+ wprintk_ctx("tty session mismatch 2\n");
-+ }
-+ }
-+ }
-+ sec += pibuf->cpt_next;
-+ cpt_release_buf(ctx);
-+ }
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_ubc.c linux-2.6.16-026test009/kernel/cpt/rst_ubc.c
---- linux-2.6.16.orig/kernel/cpt/rst_ubc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_ubc.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,108 @@
-+/*
-+ *
-+ * kernel/cpt/rst_ubc.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/types.h>
-+#include <ub/beancounter.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+
-+struct user_beancounter *rst_lookup_ubc(__u64 pos, struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ obj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, pos, ctx);
-+ if (obj == NULL) {
-+ printk(KERN_ERR "RST: unknown ub @%Lu\n", pos);
-+ return get_beancounter(get_exec_ub());
-+ }
-+ return get_beancounter(obj->o_obj);
-+}
-+
-+static void restore_one_bc_parm(__u64 *dmp, struct ubparm *prm, int held)
-+{
-+ prm->barrier = (dmp[0] == CPT_NULL ? UB_MAXVALUE : dmp[0]);
-+ prm->limit = (dmp[1] == CPT_NULL ? UB_MAXVALUE : dmp[1]);
-+ if (held)
-+ prm->held = dmp[2];
-+ prm->maxheld = dmp[3];
-+ prm->minheld = dmp[4];
-+ prm->failcnt = dmp[5];
-+}
-+
-+static int restore_one_bc(struct cpt_beancounter_image *v,
-+ cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ struct user_beancounter *bc;
-+ cpt_object_t *pobj;
-+ int i;
-+
-+ if (v->cpt_parent != CPT_NULL) {
-+ pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
-+ if (pobj == NULL)
-+ return -ESRCH;
-+ bc = get_subbeancounter_byid(pobj->o_obj, v->cpt_id, 1);
-+ } else {
-+ bc = get_exec_ub();
-+ while (bc->parent)
-+ bc = bc->parent;
-+ get_beancounter(bc);
-+ }
-+ if (bc == NULL)
-+ return -ENOMEM;
-+ obj->o_obj = bc;
-+
-+ for (i = 0; i < UB_RESOURCES; i++)
-+ restore_one_bc_parm(v->cpt_parms, bc->ub_parms, 0);
-+ for (i = 0; i < UB_RESOURCES; i++)
-+ restore_one_bc_parm(v->cpt_parms + UB_RESOURCES * 6,
-+ bc->ub_store, 1);
-+ return 0;
-+}
-+
-+int rst_undump_ubc(struct cpt_context *ctx)
-+{
-+ loff_t start, end;
-+ struct cpt_beancounter_image *v;
-+ cpt_object_t *obj;
-+ int err;
-+
-+ err = rst_get_section(CPT_SECT_UBC, ctx, &start, &end);
-+ if (err)
-+ return err;
-+
-+ while (start < end) {
-+ v = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_UBC, start, v, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+
-+ obj = alloc_cpt_object(GFP_KERNEL, ctx);
-+ cpt_obj_setpos(obj, start, ctx);
-+ intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
-+
-+ restore_one_bc(v, obj, ctx);
-+
-+ cpt_release_buf(ctx);
-+ start += v->cpt_next;
-+ }
-+ return 0;
-+}
-+
-+void rst_finish_ubc(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+
-+ for_each_object(obj, CPT_OBJ_UBC)
-+ put_beancounter(obj->o_obj);
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_undump.c linux-2.6.16-026test009/kernel/cpt/rst_undump.c
---- linux-2.6.16.orig/kernel/cpt/rst_undump.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_undump.c 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,824 @@
-+/*
-+ *
-+ * kernel/cpt/rst_undump.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/errno.h>
-+#include <linux/pagemap.h>
-+#include <linux/namespace.h>
-+#include <linux/personality.h>
-+#include <linux/binfmts.h>
-+#include <linux/smp_lock.h>
-+#include <linux/ve_proto.h>
-+#include <linux/virtinfo.h>
-+#include <linux/compat.h>
-+#include <linux/vzcalluser.h>
-+#include <ub/beancounter.h>
-+#include <asm/desc.h>
-+#include <asm/unistd.h>
-+
-+#include "cpt_obj.h"
-+#include "cpt_context.h"
-+#include "cpt_files.h"
-+#include "cpt_mm.h"
-+#include "cpt_process.h"
-+#include "cpt_socket.h"
-+#include "cpt_net.h"
-+#include "cpt_ubc.h"
-+#include "cpt_kernel.h"
-+
-+static int rst_utsname(cpt_context_t *ctx);
-+
-+
-+struct thr_context {
-+ struct completion init_complete;
-+ struct completion task_done;
-+ int error;
-+ struct cpt_context *ctx;
-+ cpt_object_t *tobj;
-+};
-+
-+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx);
-+
-+void __put_namespace(struct namespace *namespace)
-+{
-+ eprintk("orphan namespace is lost\n");
-+}
-+
-+static int vps_rst_veinfo(struct cpt_context *ctx)
-+{
-+ int err;
-+ struct cpt_veinfo_image *i;
-+ struct ve_struct *ve;
-+ struct timespec delta;
-+ loff_t start, end;
-+
-+ err = rst_get_section(CPT_SECT_VEINFO, ctx, &start, &end);
-+ if (err)
-+ goto out;
-+
-+ i = cpt_get_buf(ctx);
-+ err = rst_get_object(CPT_OBJ_VEINFO, start, i, ctx);
-+ if (err)
-+ goto out_rel;
-+
-+ ve = get_exec_env();
-+ ve->_shm_ctlall = i->shm_ctl_all;
-+ ve->_shm_ctlmax = i->shm_ctl_max;
-+ ve->_shm_ctlmni = i->shm_ctl_mni;
-+
-+ ve->_msg_ctlmax = i->msg_ctl_max;
-+ ve->_msg_ctlmni = i->msg_ctl_mni;
-+ ve->_msg_ctlmnb = i->msg_ctl_mnb;
-+
-+ BUG_ON(sizeof(ve->_sem_ctls) != sizeof(i->sem_ctl_arr));
-+ ve->_sem_ctls[0] = i->sem_ctl_arr[0];
-+ ve->_sem_ctls[1] = i->sem_ctl_arr[1];
-+ ve->_sem_ctls[2] = i->sem_ctl_arr[2];
-+ ve->_sem_ctls[3] = i->sem_ctl_arr[3];
-+
-+ cpt_timespec_import(&delta, i->start_timespec_delta);
-+ _set_normalized_timespec(&ve->start_timespec,
-+ ve->start_timespec.tv_sec - delta.tv_sec,
-+ ve->start_timespec.tv_nsec - delta.tv_nsec);
-+ ve->start_jiffies -= i->start_jiffies_delta;
-+ // // FIXME: what???
-+ // // ve->start_cycles -= i->start_jiffies_delta * cycles_per_jiffy;
-+
-+ err = 0;
-+out_rel:
-+ cpt_release_buf(ctx);
-+out:
-+ return err;
-+}
-+
-+static int vps_rst_reparent_root(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ int err;
-+ struct env_create_param2 param;
-+
-+ ctx->cpt_jiffies64 = get_jiffies_64();
-+ do_gettimespec(&ctx->delta_time);
-+
-+ ctx->delta_time.tv_sec -= ctx->start_time.tv_sec;
-+ if (ctx->start_time.tv_nsec > ctx->delta_time.tv_nsec) {
-+ ctx->delta_time.tv_sec--;
-+ ctx->delta_time.tv_nsec = 1000000000 - (ctx->start_time.tv_nsec - ctx->delta_time.tv_nsec);
-+ } else {
-+ ctx->delta_time.tv_nsec -= ctx->start_time.tv_nsec;
-+ }
-+
-+ memset(&param, 0, sizeof(param));
-+ param.iptables_mask = ctx->iptables_mask;
-+
-+ err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2, &param, sizeof(param));
-+ if (err < 0)
-+ eprintk_ctx("real_env_create: %d\n", err);
-+ get_exec_env()->jiffies_fixup = ((ctx->delta_time.tv_sec < 0) ?
-+ 0 : timespec_to_jiffies(&ctx->delta_time)) -
-+ (unsigned long)(ctx->cpt_jiffies64 - ctx->virt_jiffies64);
-+ return err < 0 ? err : 0;
-+}
-+
-+
-+static int hook(void *arg)
-+{
-+ struct thr_context *thr_ctx = arg;
-+ struct cpt_context *ctx;
-+ cpt_object_t *tobj;
-+ struct cpt_task_image *ti;
-+ int err = 0;
-+
-+ current->state = TASK_UNINTERRUPTIBLE;
-+ complete(&thr_ctx->init_complete);
-+ schedule();
-+
-+ ctx = thr_ctx->ctx;
-+ tobj = thr_ctx->tobj;
-+ ti = tobj->o_image;
-+
-+ current->fs->umask = 0;
-+
-+ if (ti->cpt_pid == 1) {
-+ err = vps_rst_reparent_root(tobj, ctx);
-+
-+ if (err) {
-+ rst_report_error(err, ctx);
-+ goto out;
-+ }
-+
-+ memcpy(&get_exec_env()->cap_default, &ti->cpt_ecap, sizeof(kernel_cap_t));
-+
-+ if (ctx->statusfile) {
-+ fput(ctx->statusfile);
-+ ctx->statusfile = NULL;
-+ }
-+
-+ if (ctx->lockfile) {
-+ mm_segment_t oldfs;
-+ ssize_t err = -EINVAL;
-+ char b;
-+
-+ oldfs = get_fs(); set_fs(KERNEL_DS);
-+ if (ctx->lockfile->f_op && ctx->lockfile->f_op->read)
-+ err = ctx->lockfile->f_op->read(ctx->lockfile, &b, 1, &ctx->lockfile->f_pos);
-+ set_fs(oldfs);
-+ fput(ctx->lockfile);
-+ ctx->lockfile = NULL;
-+ }
-+
-+ err = vps_rst_veinfo(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_veinfo: %d\n", err);
-+ goto out;
-+ }
-+
-+ err = rst_utsname(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_utsname: %d\n", err);
-+ goto out;
-+ }
-+
-+ err = rst_root_namespace(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_namespace: %d\n", err);
-+ goto out;
-+ }
-+
-+ if ((err = rst_restore_net(ctx)) != 0) {
-+ eprintk_ctx("rst_restore_net: %d\n", err);
-+ goto out;
-+ }
-+
-+ err = rst_sockets(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sockets: %d\n", err);
-+ goto out;
-+ }
-+ err = rst_sysv_ipc(ctx);
-+ if (err) {
-+ eprintk_ctx("rst_sysv_ipc: %d\n", err);
-+ goto out;
-+ }
-+ }
-+
-+ do {
-+ if (current->user->uid != ti->cpt_user) {
-+ struct user_struct *u = alloc_uid(ti->cpt_user);
-+ if (!u) {
-+ eprintk_ctx("alloc_user\n");
-+ } else {
-+ switch_uid(u);
-+ }
-+ }
-+ } while (0);
-+
-+ if ((err = rst_mm_complete(ti, ctx)) != 0) {
-+ eprintk_ctx("rst_mm: %d\n", err);
-+ goto out;
-+ }
-+
-+ if ((err = rst_files_complete(ti, ctx)) != 0) {
-+ eprintk_ctx("rst_files: %d\n", err);
-+ goto out;
-+ }
-+
-+ if ((err = rst_fs_complete(ti, ctx)) != 0) {
-+ eprintk_ctx("rst_fs: %d\n", err);
-+ goto out;
-+ }
-+
-+ if ((err = rst_semundo_complete(ti, ctx)) != 0) {
-+ eprintk_ctx("rst_semundo: %d\n", err);
-+ goto out;
-+ }
-+
-+ if ((err = rst_signal_complete(ti, ctx)) != 0) {
-+ eprintk_ctx("rst_signal: %d\n", err);
-+ goto out;
-+ }
-+
-+ if (ti->cpt_namespace == CPT_NULL)
-+ exit_namespace(current);
-+
-+ if (ti->cpt_personality != 0)
-+ __set_personality(ti->cpt_personality);
-+
-+ current->set_child_tid = NULL;
-+ current->clear_child_tid = NULL;
-+ current->flags &= ~(PF_FORKNOEXEC|PF_SUPERPRIV);
-+ current->flags |= ti->cpt_flags&(PF_FORKNOEXEC|PF_SUPERPRIV);
-+ current->exit_code = ti->cpt_exit_code;
-+ current->pdeath_signal = ti->cpt_pdeath_signal;
-+
-+ if (ti->cpt_restart.fn != CPT_RBL_0) {
-+ if (ti->cpt_restart.fn != CPT_RBL_NANOSLEEP
-+ && ti->cpt_restart.fn != CPT_RBL_COMPAT_NANOSLEEP
-+ ) {
-+ eprintk_ctx("unknown restart block\n");
-+ } else {
-+ current->thread_info->restart_block.fn = nanosleep_restart;
-+#ifdef CONFIG_X86_64
-+ if (!ti->cpt_64bit)
-+ current->thread_info->restart_block.fn = compat_nanosleep_restart;
-+#endif
-+ if (ctx->image_version != 0) {
-+ current->thread_info->restart_block.arg0 = ti->cpt_restart.arg0;
-+ current->thread_info->restart_block.arg1 = ti->cpt_restart.arg1;
-+ current->thread_info->restart_block.arg2 = ti->cpt_restart.arg2;
-+ current->thread_info->restart_block.arg3 = ti->cpt_restart.arg3;
-+ if (debug_level > 2) {
-+ ktime_t e, e1;
-+ struct timespec now;
-+
-+ do_posix_clock_monotonic_gettime(&now);
-+ e = timespec_to_ktime(now);
-+ e1.tv64 = ((u64)current->thread_info->restart_block.arg1 << 32) | (u64) current->thread_info->restart_block.arg0;
-+ e = ktime_sub(e1, e);
-+ dprintk("rst " CPT_FID " RBL %ld/%ld %Ld\n", CPT_TID(current),
-+ current->thread_info->restart_block.arg1,
-+ current->thread_info->restart_block.arg0, e.tv64);
-+ }
-+ } else {
-+ struct timespec now;
-+ ktime_t expire;
-+ unsigned long val = ti->cpt_restart.arg0 -
-+ timespec_to_jiffies(&ctx->delta_time);
-+ if ((long)val <= 0)
-+ val = 1;
-+ do_posix_clock_monotonic_gettime(&now);
-+ expire = ktime_add_ns(timespec_to_ktime(now), (u64)val*TICK_NSEC);
-+ current->thread_info->restart_block.arg0 = expire.tv64 & 0xFFFFFFFF;
-+ current->thread_info->restart_block.arg1 = expire.tv64 >> 32;
-+ current->thread_info->restart_block.arg2 = ti->cpt_restart.arg1;
-+ current->thread_info->restart_block.arg3 = CLOCK_MONOTONIC;
-+ }
-+ }
-+ }
-+
-+ if (thread_group_leader(current)) {
-+ current->signal->it_real_incr.tv64 = 0;
-+ if (ctx->image_version != 0) {
-+ ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr);
-+ } else {
-+ ktime_add_ns(current->signal->it_real_incr, ti->cpt_it_real_incr*TICK_NSEC);
-+ }
-+ current->signal->it_prof_incr = ti->cpt_it_prof_incr;
-+ current->signal->it_virt_incr = ti->cpt_it_virt_incr;
-+ current->signal->it_prof_expires = ti->cpt_it_prof_value;
-+ current->signal->it_virt_expires = ti->cpt_it_virt_value;
-+ }
-+
-+ err = rst_clone_children(tobj, ctx);
-+ if (err) {
-+ eprintk_ctx("rst_clone_children\n");
-+ goto out;
-+ }
-+
-+ if (ti->cpt_pid == 1) {
-+ if ((err = rst_process_linkage(ctx)) != 0) {
-+ eprintk_ctx("rst_process_linkage: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_do_filejobs(ctx)) != 0) {
-+ eprintk_ctx("rst_do_filejobs: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_eventpoll(ctx)) != 0) {
-+ eprintk_ctx("rst_eventpoll: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_sockets_complete(ctx)) != 0) {
-+ eprintk_ctx("rst_sockets_complete: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_stray_files(ctx)) != 0) {
-+ eprintk_ctx("rst_stray_files: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_posix_locks(ctx)) != 0) {
-+ eprintk_ctx("rst_posix_locks: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_tty_jobcontrol(ctx)) != 0) {
-+ eprintk_ctx("rst_tty_jobcontrol: %d\n", err);
-+ goto out;
-+ }
-+ if ((err = rst_restore_fs(ctx)) != 0) {
-+ eprintk_ctx("rst_restore_fs: %d\n", err);
-+ goto out;
-+ }
-+ }
-+
-+out:
-+ thr_ctx->error = err;
-+ lock_kernel();
-+ complete(&thr_ctx->task_done);
-+
-+ if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
-+ preempt_disable();
-+ current->exit_state = EXIT_ZOMBIE;
-+ write_lock_irq(&tasklist_lock);
-+ nr_zombie++;
-+ write_unlock_irq(&tasklist_lock);
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
-+ atomic_dec(&current->signal->live);
-+#endif
-+ current->flags |= PF_DEAD;
-+ if (!(ti->cpt_flags&PF_DEAD))
-+ wprintk_ctx("zombie %d,%d(%s) is not pf_dead\n", current->pid, virt_pid(current), current->comm);
-+ module_put(current->thread_info->exec_domain->module);
-+ if (current->binfmt)
-+ module_put(current->binfmt->module);
-+ } else {
-+ __set_current_state(TASK_UNINTERRUPTIBLE);
-+ }
-+
-+ schedule();
-+
-+ dprintk_ctx("leaked through %d/%d %p\n", current->pid, virt_pid(current), current->mm);
-+
-+ module_put(THIS_MODULE);
-+ complete_and_exit(NULL, 0);
-+ return 0;
-+}
-+
-+#if 0
-+static void set_task_ubs(struct cpt_task_image *ti, struct cpt_context *ctx)
-+{
-+ struct task_beancounter *tbc;
-+
-+ tbc = task_bc(current);
-+
-+ put_beancounter(tbc->fork_sub);
-+ tbc->fork_sub = rst_lookup_ubc(ti->cpt_task_ub, ctx);
-+ if (ti->cpt_mm_ub != CPT_NULL) {
-+ put_beancounter(tbc->exec_ub);
-+ tbc->exec_ub = rst_lookup_ubc(ti->cpt_mm_ub, ctx);
-+ }
-+}
-+#endif
-+
-+static int create_root_task(cpt_object_t *obj, struct cpt_context *ctx,
-+ struct thr_context *thr_ctx)
-+{
-+ task_t *tsk;
-+ int pid;
-+
-+ thr_ctx->ctx = ctx;
-+ thr_ctx->error = 0;
-+ init_completion(&thr_ctx->init_complete);
-+ init_completion(&thr_ctx->task_done);
-+#if 0
-+ set_task_ubs(obj->o_image, ctx);
-+#endif
-+
-+ pid = local_kernel_thread(hook, thr_ctx, 0, 0);
-+ if (pid < 0)
-+ return pid;
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_ve(pid);
-+ if (tsk)
-+ get_task_struct(tsk);
-+ read_unlock(&tasklist_lock);
-+ if (tsk == NULL)
-+ return -ESRCH;
-+ cpt_obj_setobj(obj, tsk, ctx);
-+ thr_ctx->tobj = obj;
-+ return 0;
-+}
-+
-+static int rst_basic_init_task(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ task_t *tsk = obj->o_obj;
-+ struct cpt_task_image *ti = obj->o_image;
-+
-+ memcpy(tsk->comm, ti->cpt_comm, sizeof(tsk->comm));
-+ rst_mm_basic(obj, ti, ctx);
-+ return 0;
-+}
-+
-+static int make_baby(cpt_object_t *cobj,
-+ struct cpt_task_image *pi,
-+ struct cpt_context *ctx)
-+{
-+ unsigned long flags;
-+ struct cpt_task_image *ci = cobj->o_image;
-+ struct thr_context thr_ctx;
-+ task_t *tsk;
-+ pid_t pid;
-+
-+ flags = rst_mm_flag(ci, ctx) | rst_files_flag(ci, ctx)
-+ | rst_signal_flag(ci, ctx) | rst_semundo_flag(ci, ctx);
-+ if (ci->cpt_rppid != pi->cpt_pid) {
-+ flags |= CLONE_THREAD|CLONE_PARENT;
-+ if (ci->cpt_signal != pi->cpt_signal ||
-+ !(flags&CLONE_SIGHAND) ||
-+ (!(flags&CLONE_VM) && pi->cpt_mm != CPT_NULL)) {
-+ eprintk_ctx("something is wrong with threads: %d %d %d %Ld %Ld %08lx\n",
-+ (int)ci->cpt_pid, (int)ci->cpt_rppid, (int)pi->cpt_pid,
-+ ci->cpt_signal, pi->cpt_signal, flags
-+ );
-+ return -EINVAL;
-+ }
-+ }
-+
-+ thr_ctx.ctx = ctx;
-+ thr_ctx.error = 0;
-+ init_completion(&thr_ctx.init_complete);
-+ init_completion(&thr_ctx.task_done);
-+ thr_ctx.tobj = cobj;
-+
-+#if 0
-+ set_task_ubs(ci, ctx);
-+#endif
-+
-+ pid = local_kernel_thread(hook, &thr_ctx, flags, ci->cpt_pid);
-+ if (pid < 0)
-+ return pid;
-+
-+ read_lock(&tasklist_lock);
-+ tsk = find_task_by_pid_ve(pid);
-+ if (tsk)
-+ get_task_struct(tsk);
-+ read_unlock(&tasklist_lock);
-+ if (tsk == NULL)
-+ return -ESRCH;
-+ cpt_obj_setobj(cobj, tsk, ctx);
-+ thr_ctx.tobj = cobj;
-+ wait_for_completion(&thr_ctx.init_complete);
-+#ifdef CONFIG_SMP
-+ wait_task_inactive(cobj->o_obj);
-+#endif
-+ rst_basic_init_task(cobj, ctx);
-+
-+ /* clone() increases group_stop_count if it was not zero and
-+ * CLONE_THREAD was asked. Undo.
-+ */
-+ if (current->signal->group_stop_count && (flags & CLONE_THREAD)) {
-+ if (tsk->signal != current->signal) BUG();
-+ current->signal->group_stop_count--;
-+ }
-+
-+ wake_up_process(tsk);
-+ wait_for_completion(&thr_ctx.task_done);
-+ wait_task_inactive(tsk);
-+
-+ return thr_ctx.error;
-+}
-+
-+static int rst_clone_children(cpt_object_t *obj, struct cpt_context *ctx)
-+{
-+ int err = 0;
-+ struct cpt_task_image *ti = obj->o_image;
-+ cpt_object_t *cobj;
-+
-+ for_each_object(cobj, CPT_OBJ_TASK) {
-+ struct cpt_task_image *ci = cobj->o_image;
-+ if (cobj == obj)
-+ continue;
-+ if ((ci->cpt_rppid == ti->cpt_pid && ci->cpt_tgid == ci->cpt_pid) ||
-+ (ci->cpt_leader == ti->cpt_pid &&
-+ ci->cpt_tgid != ci->cpt_pid && ci->cpt_pid != 1)) {
-+ err = make_baby(cobj, ti, ctx);
-+ if (err) {
-+ eprintk_ctx("make_baby: %d\n", err);
-+ return err;
-+ }
-+ }
-+ }
-+ return 0;
-+}
-+
-+static int read_task_images(struct cpt_context *ctx)
-+{
-+ int err;
-+ loff_t start, end;
-+
-+ err = rst_get_section(CPT_SECT_TASKS, ctx, &start, &end);
-+ if (err)
-+ return err;
-+
-+ while (start < end) {
-+ cpt_object_t *obj;
-+ struct cpt_task_image *ti = cpt_get_buf(ctx);
-+
-+ err = rst_get_object(CPT_OBJ_TASK, start, ti, ctx);
-+ if (err) {
-+ cpt_release_buf(ctx);
-+ return err;
-+ }
-+ if (ti->cpt_pid != 1 && !__is_virtual_pid(ti->cpt_pid)) {
-+ eprintk_ctx("BUG: pid %d is not virtual\n", ti->cpt_pid);
-+ cpt_release_buf(ctx);
-+ return -EINVAL;
-+ }
-+ obj = alloc_cpt_object(GFP_KERNEL, ctx);
-+ cpt_obj_setpos(obj, start, ctx);
-+ intern_cpt_object(CPT_OBJ_TASK, obj, ctx);
-+ obj->o_image = kmalloc(ti->cpt_next, GFP_KERNEL);
-+ if (obj->o_image == NULL) {
-+ cpt_release_buf(ctx);
-+ return -ENOMEM;
-+ }
-+ memcpy(obj->o_image, ti, sizeof(*ti));
-+ err = ctx->pread(obj->o_image + sizeof(*ti),
-+ ti->cpt_next - sizeof(*ti), ctx, start + sizeof(*ti));
-+ cpt_release_buf(ctx);
-+ if (err)
-+ return err;
-+ start += ti->cpt_next;
-+ }
-+ return 0;
-+}
-+
-+
-+static int vps_rst_restore_tree(struct cpt_context *ctx)
-+{
-+ int err;
-+ cpt_object_t *obj;
-+ struct thr_context thr_ctx_root;
-+
-+ err = read_task_images(ctx);
-+ if (err)
-+ return err;
-+
-+ err = rst_undump_ubc(ctx);
-+ if (err)
-+ return err;
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ err = create_root_task(obj, ctx, &thr_ctx_root);
-+ if (err)
-+ return err;
-+
-+ wait_for_completion(&thr_ctx_root.init_complete);
-+#ifdef CONFIG_SMP
-+ wait_task_inactive(obj->o_obj);
-+#endif
-+ rst_basic_init_task(obj, ctx);
-+
-+ wake_up_process(obj->o_obj);
-+ wait_for_completion(&thr_ctx_root.task_done);
-+ wait_task_inactive(obj->o_obj);
-+ err = thr_ctx_root.error;
-+ if (err)
-+ return err;
-+ break;
-+ }
-+
-+ return err;
-+}
-+
-+
-+int vps_rst_undump(struct cpt_context *ctx)
-+{
-+ int err;
-+ unsigned long umask;
-+
-+ err = rst_open_dumpfile(ctx);
-+ if (err)
-+ return err;
-+
-+#ifndef CONFIG_X86_64
-+ if (ctx->tasks64) {
-+ eprintk_ctx("Cannot restore 64 bit VE on this architecture\n");
-+ return -EINVAL;
-+ }
-+#endif
-+
-+ umask = current->fs->umask;
-+ current->fs->umask = 0;
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ err = rst_setup_pagein(ctx);
-+#endif
-+
-+ if (err == 0)
-+ err = vps_rst_restore_tree(ctx);
-+
-+ if (err == 0)
-+ err = rst_restore_process(ctx);
-+
-+ current->fs->umask = umask;
-+
-+ return err;
-+}
-+
-+static int rst_unlock_ve(struct cpt_context *ctx)
-+{
-+ struct ve_struct *env;
-+
-+ env = get_ve_by_id(ctx->ve_id);
-+ if (!env)
-+ return -ESRCH;
-+ down_write(&env->op_sem);
-+ env->is_locked = 0;
-+ up_write(&env->op_sem);
-+ put_ve(env);
-+ return 0;
-+}
-+
-+int rst_resume(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+ int err = 0;
-+
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+
-+ fput(file);
-+ }
-+
-+ rst_resume_network(ctx);
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+ struct cpt_task_image *ti = obj->o_image;
-+
-+ if (!tsk)
-+ continue;
-+
-+ if (ti->cpt_state == TASK_UNINTERRUPTIBLE) {
-+ dprintk_ctx("task %d/%d(%s) is started\n", virt_pid(tsk), tsk->pid, tsk->comm);
-+
-+ /* Weird... If a signal is sent to stopped task,
-+ * nobody makes recalc_sigpending(). We have to do
-+ * this by hands after wake_up_process().
-+ * if we did this before a signal could arrive before
-+ * wake_up_process() and stall.
-+ */
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ if (!signal_pending(tsk))
-+ recalc_sigpending_tsk(tsk);
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+
-+ wake_up_process(tsk);
-+ } else {
-+ if (ti->cpt_state == TASK_STOPPED ||
-+ ti->cpt_state == TASK_TRACED) {
-+ set_task_state(tsk, ti->cpt_state);
-+ }
-+ }
-+ put_task_struct(tsk);
-+ }
-+
-+ rst_unlock_ve(ctx);
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ rst_complete_pagein(ctx, 0);
-+#endif
-+
-+ rst_finish_ubc(ctx);
-+ cpt_object_destroy(ctx);
-+
-+ return err;
-+}
-+
-+int rst_kill(struct cpt_context *ctx)
-+{
-+ cpt_object_t *obj;
-+ int err = 0;
-+
-+ for_each_object(obj, CPT_OBJ_FILE) {
-+ struct file *file = obj->o_obj;
-+
-+ fput(file);
-+ }
-+
-+ for_each_object(obj, CPT_OBJ_TASK) {
-+ task_t *tsk = obj->o_obj;
-+
-+ if (tsk == NULL)
-+ continue;
-+
-+ if (tsk->exit_state == 0) {
-+ send_sig(SIGKILL, tsk, 1);
-+
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ sigfillset(&tsk->blocked);
-+ sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
-+ set_tsk_thread_flag(tsk, TIF_SIGPENDING);
-+ clear_tsk_thread_flag(tsk, TIF_FREEZE);
-+ if (tsk->flags & PF_FROZEN)
-+ tsk->flags &= ~PF_FROZEN;
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+
-+ wake_up_process(tsk);
-+ }
-+
-+ put_task_struct(tsk);
-+ }
-+
-+#ifdef CONFIG_VZ_CHECKPOINT_LAZY
-+ rst_complete_pagein(ctx, 1);
-+#endif
-+
-+ rst_finish_ubc(ctx);
-+ cpt_object_destroy(ctx);
-+
-+ return err;
-+}
-+
-+static int rst_utsname(cpt_context_t *ctx)
-+{
-+ int err;
-+ loff_t sec = ctx->sections[CPT_SECT_UTSNAME];
-+ loff_t endsec;
-+ struct cpt_section_hdr h;
-+ struct cpt_object_hdr o;
-+ int i;
-+
-+ if (sec == CPT_NULL)
-+ return 0;
-+
-+ err = ctx->pread(&h, sizeof(h), ctx, sec);
-+ if (err)
-+ return err;
-+ if (h.cpt_section != CPT_SECT_UTSNAME || h.cpt_hdrlen < sizeof(h))
-+ return -EINVAL;
-+
-+ i = 0;
-+ endsec = sec + h.cpt_next;
-+ sec += h.cpt_hdrlen;
-+ while (sec < endsec) {
-+ int len;
-+ char *ptr;
-+ err = rst_get_object(CPT_OBJ_NAME, sec, &o, ctx);
-+ if (err)
-+ return err;
-+ len = o.cpt_next - o.cpt_hdrlen;
-+ if (len > __NEW_UTS_LEN+1)
-+ return -ENAMETOOLONG;
-+ switch (i) {
-+ case 0:
-+ ptr = ve_utsname.nodename; break;
-+ case 1:
-+ ptr = ve_utsname.domainname; break;
-+ default:
-+ return -EINVAL;
-+ }
-+ err = ctx->pread(ptr, len, ctx, sec+o.cpt_hdrlen);
-+ if (err)
-+ return err;
-+ i++;
-+ sec += o.cpt_next;
-+ }
-+
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/cpt/rst_x8664.S linux-2.6.16-026test009/kernel/cpt/rst_x8664.S
---- linux-2.6.16.orig/kernel/cpt/rst_x8664.S 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpt/rst_x8664.S 2006-04-19 15:02:13.000000000 +0400
-@@ -0,0 +1,61 @@
-+#define ASSEMBLY 1
-+
-+#include <linux/config.h>
-+#include <linux/linkage.h>
-+#include <asm/segment.h>
-+#include <asm/smp.h>
-+#include <asm/cache.h>
-+#include <asm/errno.h>
-+#include <asm/dwarf2.h>
-+#include <asm/calling.h>
-+#include <asm/msr.h>
-+#include <asm/unistd.h>
-+#include <asm/thread_info.h>
-+#include <asm/hw_irq.h>
-+#include <asm/errno.h>
-+#include <asm/asm-offsets.h>
-+
-+ .code64
-+ .global schedule_tail_hook, schedule_tail_p
-+ .align 8
-+schedule_tail_hook:
-+ movq schedule_tail_p(%rip),%r11
-+ call *%r11
-+ GET_THREAD_INFO(%rcx)
-+ btr $22,threadinfo_flags(%rcx) /* TIF_RESUME */
-+ jc 1f
-+ retq
-+
-+ /* If TIF_RESUME is set, (%rsp) is pointer to hook function
-+ * the hook will do the work and jump to the next hook,
-+ * everything should end at ret_from_fork+5.
-+ */
-+1: addq $8,%rsp
-+ retq
-+
-+ .align 8
-+ .global ret_from_fork2
-+ret_from_fork2:
-+ cmpq $0,ORIG_RAX(%rsp)
-+ jge ret_from_fork+5
-+ RESTORE_REST
-+ jmp int_ret_from_sys_call
-+
-+ .align 8
-+ .global ret_last_siginfo
-+ret_last_siginfo:
-+ call rlsi
-+ movq %rax,%rsp
-+ retq
-+
-+ .align 8
-+ .global ret_child_tid
-+ret_child_tid:
-+ movq %rsp,%rdi
-+ call rct
-+ movq %rax,%rsp
-+ retq
-+
-+ .data
-+schedule_tail_p:
-+ .quad 0
-diff -upr linux-2.6.16.orig/kernel/cpu.c linux-2.6.16-026test009/kernel/cpu.c
---- linux-2.6.16.orig/kernel/cpu.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpu.c 2006-04-19 15:02:12.000000000 +0400
-@@ -95,7 +95,7 @@ static inline void check_for_tasks(int c
- struct task_struct *p;
-
- write_lock_irq(&tasklist_lock);
-- for_each_process(p) {
-+ for_each_process_all(p) {
- if (task_cpu(p) == cpu &&
- (!cputime_eq(p->utime, cputime_zero) ||
- !cputime_eq(p->stime, cputime_zero)))
-diff -upr linux-2.6.16.orig/kernel/cpuset.c linux-2.6.16-026test009/kernel/cpuset.c
---- linux-2.6.16.orig/kernel/cpuset.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/cpuset.c 2006-04-19 15:02:12.000000000 +0400
-@@ -897,7 +897,7 @@ static int update_nodemask(struct cpuset
- n = 0;
-
- /* Load up mmarray[] with mm reference for each task in cpuset. */
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- struct mm_struct *mm;
-
- if (n >= ntasks) {
-@@ -911,7 +911,7 @@ static int update_nodemask(struct cpuset
- if (!mm)
- continue;
- mmarray[n++] = mm;
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
- write_unlock_irq(&tasklist_lock);
-
- /*
-@@ -1125,7 +1125,7 @@ static int attach_task(struct cpuset *cs
- if (pid) {
- read_lock(&tasklist_lock);
-
-- tsk = find_task_by_pid(pid);
-+ tsk = find_task_by_pid_all(pid);
- if (!tsk || tsk->flags & PF_EXITING) {
- read_unlock(&tasklist_lock);
- return -ESRCH;
-@@ -1561,13 +1561,13 @@ static int pid_array_load(pid_t *pidarra
-
- read_lock(&tasklist_lock);
-
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- if (p->cpuset == cs) {
- pidarray[n++] = p->pid;
- if (unlikely(n == npids))
- goto array_full;
- }
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
-
- array_full:
- read_unlock(&tasklist_lock);
-diff -upr linux-2.6.16.orig/kernel/exec_domain.c linux-2.6.16-026test009/kernel/exec_domain.c
---- linux-2.6.16.orig/kernel/exec_domain.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/exec_domain.c 2006-04-19 15:02:11.000000000 +0400
-@@ -140,6 +140,7 @@ __set_personality(u_long personality)
- ep = lookup_exec_domain(personality);
- if (ep == current_thread_info()->exec_domain) {
- current->personality = personality;
-+ module_put(ep->module);
- return 0;
- }
-
-diff -upr linux-2.6.16.orig/kernel/exit.c linux-2.6.16-026test009/kernel/exit.c
---- linux-2.6.16.orig/kernel/exit.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/exit.c 2006-04-19 15:02:12.000000000 +0400
-@@ -42,7 +42,7 @@ extern struct task_struct *child_reaper;
-
- int getrusage(struct task_struct *, int, struct rusage __user *);
-
--static void exit_mm(struct task_struct * tsk);
-+void exit_mm(struct task_struct * tsk);
-
- static void __unhash_process(struct task_struct *p)
- {
-@@ -57,18 +57,19 @@ static void __unhash_process(struct task
- }
-
- REMOVE_LINKS(p);
-+ REMOVE_VE_LINKS(p);
- }
-
- void release_task(struct task_struct * p)
- {
- int zap_leader;
- task_t *leader;
-- struct dentry *proc_dentry;
-+ struct dentry *proc_dentry[2];
-
- repeat:
- atomic_dec(&p->user->processes);
- spin_lock(&p->proc_lock);
-- proc_dentry = proc_pid_unhash(p);
-+ proc_pid_unhash(p, proc_dentry);
- write_lock_irq(&tasklist_lock);
- if (unlikely(p->ptrace))
- __ptrace_unlink(p);
-@@ -80,6 +81,8 @@ repeat:
- * the process by __unhash_process.
- */
- __unhash_process(p);
-+ nr_zombie--;
-+ atomic_inc(&nr_dead);
-
- /*
- * If we are the last non-leader member of the thread
-@@ -107,6 +110,10 @@ repeat:
- spin_unlock(&p->proc_lock);
- proc_pid_flush(proc_dentry);
- release_thread(p);
-+#ifdef CONFIG_VE
-+ if (atomic_dec_and_test(&VE_TASK_INFO(p)->owner_env->pcounter))
-+ do_env_cleanup(VE_TASK_INFO(p)->owner_env);
-+#endif
- put_task_struct(p);
-
- p = leader;
-@@ -118,10 +125,10 @@ repeat:
-
- void unhash_process(struct task_struct *p)
- {
-- struct dentry *proc_dentry;
-+ struct dentry *proc_dentry[2];
-
- spin_lock(&p->proc_lock);
-- proc_dentry = proc_pid_unhash(p);
-+ proc_pid_unhash(p, proc_dentry);
- write_lock_irq(&tasklist_lock);
- __unhash_process(p);
- write_unlock_irq(&tasklist_lock);
-@@ -139,14 +146,16 @@ int session_of_pgrp(int pgrp)
- struct task_struct *p;
- int sid = -1;
-
-+ WARN_ON(is_virtual_pid(pgrp));
-+
- read_lock(&tasklist_lock);
-- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-+ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
- if (p->signal->session > 0) {
- sid = p->signal->session;
- goto out;
- }
-- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-- p = find_task_by_pid(pgrp);
-+ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
-+ p = find_task_by_pid_ve(pgrp);
- if (p)
- sid = p->signal->session;
- out:
-@@ -168,17 +177,19 @@ static int will_become_orphaned_pgrp(int
- struct task_struct *p;
- int ret = 1;
-
-- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-+ WARN_ON(is_virtual_pid(pgrp));
-+
-+ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
- if (p == ignored_task
- || p->exit_state
-- || p->real_parent->pid == 1)
-+ || virt_pid(p->real_parent) == 1)
- continue;
- if (process_group(p->real_parent) != pgrp
- && p->real_parent->signal->session == p->signal->session) {
- ret = 0;
- break;
- }
-- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
- return ret; /* (sighing) "Often!" */
- }
-
-@@ -186,6 +197,8 @@ int is_orphaned_pgrp(int pgrp)
- {
- int retval;
-
-+ WARN_ON(is_virtual_pid(pgrp));
-+
- read_lock(&tasklist_lock);
- retval = will_become_orphaned_pgrp(pgrp, NULL);
- read_unlock(&tasklist_lock);
-@@ -198,7 +211,7 @@ static int has_stopped_jobs(int pgrp)
- int retval = 0;
- struct task_struct *p;
-
-- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-+ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
- if (p->state != TASK_STOPPED)
- continue;
-
-@@ -214,7 +227,7 @@ static int has_stopped_jobs(int pgrp)
-
- retval = 1;
- break;
-- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
- return retval;
- }
-
-@@ -263,6 +276,9 @@ void __set_special_pids(pid_t session, p
- {
- struct task_struct *curr = current->group_leader;
-
-+ WARN_ON(is_virtual_pid(pgrp));
-+ WARN_ON(is_virtual_pid(session));
-+
- if (curr->signal->session != session) {
- detach_pid(curr, PIDTYPE_SID);
- curr->signal->session = session;
-@@ -281,6 +297,7 @@ void set_special_pids(pid_t session, pid
- __set_special_pids(session, pgrp);
- write_unlock_irq(&tasklist_lock);
- }
-+EXPORT_SYMBOL(set_special_pids);
-
- /*
- * Let kernel threads use this to say that they
-@@ -500,7 +517,7 @@ EXPORT_SYMBOL_GPL(exit_fs);
- * Turn us into a lazy TLB process if we
- * aren't already..
- */
--static void exit_mm(struct task_struct * tsk)
-+void exit_mm(struct task_struct * tsk)
- {
- struct mm_struct *mm = tsk->mm;
-
-@@ -535,6 +552,7 @@ static void exit_mm(struct task_struct *
- task_unlock(tsk);
- mmput(mm);
- }
-+EXPORT_SYMBOL_GPL(exit_mm);
-
- static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
- {
-@@ -613,13 +631,12 @@ static void reparent_thread(task_t *p, t
- static void forget_original_parent(struct task_struct * father,
- struct list_head *to_release)
- {
-- struct task_struct *p, *reaper = father;
-+ struct task_struct *p, *tsk_reaper, *reaper = father;
- struct list_head *_p, *_n;
-
- do {
- reaper = next_thread(reaper);
- if (reaper == father) {
-- reaper = child_reaper;
- break;
- }
- } while (reaper->exit_state);
-@@ -641,9 +658,16 @@ static void forget_original_parent(struc
- /* if father isn't the real parent, then ptrace must be enabled */
- BUG_ON(father != p->real_parent && !ptrace);
-
-+ tsk_reaper = reaper;
-+ if (tsk_reaper == father)
-+#ifdef CONFIG_VE
-+ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
-+ if (tsk_reaper == p)
-+#endif
-+ tsk_reaper = child_reaper;
- if (father == p->real_parent) {
-- /* reparent with a reaper, real father it's us */
-- choose_new_parent(p, reaper, child_reaper);
-+ /* reparent with a tsk_reaper, real father it's us */
-+ choose_new_parent(p, tsk_reaper, child_reaper);
- reparent_thread(p, father, 0);
- } else {
- /* reparent ptraced task to its real parent */
-@@ -664,7 +688,15 @@ static void forget_original_parent(struc
- }
- list_for_each_safe(_p, _n, &father->ptrace_children) {
- p = list_entry(_p,struct task_struct,ptrace_list);
-- choose_new_parent(p, reaper, child_reaper);
-+
-+ tsk_reaper = reaper;
-+ if (tsk_reaper == father)
-+#ifdef CONFIG_VE
-+ tsk_reaper = VE_TASK_INFO(p)->owner_env->init_entry;
-+ if (tsk_reaper == p)
-+#endif
-+ tsk_reaper = child_reaper;
-+ choose_new_parent(p, tsk_reaper, child_reaper);
- reparent_thread(p, father, 1);
- }
- }
-@@ -760,6 +792,9 @@ static void exit_notify(struct task_stru
- && !capable(CAP_KILL))
- tsk->exit_signal = SIGCHLD;
-
-+ if (tsk->exit_signal != -1 && t == child_reaper)
-+ /* We dont want people slaying init. */
-+ tsk->exit_signal = SIGCHLD;
-
- /* If something other than our normal parent is ptracing us, then
- * send it a SIGCHLD instead of honoring exit_signal. exit_signal
-@@ -778,6 +813,7 @@ static void exit_notify(struct task_stru
- unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
- state = EXIT_DEAD;
- tsk->exit_state = state;
-+ nr_zombie++;
-
- write_unlock_irq(&tasklist_lock);
-
-@@ -792,6 +828,82 @@ static void exit_notify(struct task_stru
- release_task(tsk);
- }
-
-+#ifdef CONFIG_VE
-+/*
-+ * Handle exitting of init process, it's a special case for VE.
-+ */
-+static void do_initproc_exit(void)
-+{
-+ struct task_struct *tsk;
-+ struct ve_struct *env;
-+ struct siginfo info;
-+ struct task_struct *g, *p;
-+ long delay = 1L;
-+
-+ tsk = current;
-+ env = VE_TASK_INFO(current)->owner_env;
-+ if (env->init_entry != tsk)
-+ return;
-+
-+ if (ve_is_super(env) && tsk->pid == 1)
-+ panic("Attempted to kill init!");
-+
-+ memset(&info, 0, sizeof(info));
-+ info.si_errno = 0;
-+ info.si_code = SI_KERNEL;
-+ info.si_pid = virt_pid(tsk);
-+ info.si_uid = current->uid;
-+ info.si_signo = SIGKILL;
-+
-+ /*
-+ * Here the VE changes its state into "not running".
-+ * op_sem taken for write is a barrier to all VE manipulations from
-+ * ioctl: it waits for operations currently in progress and blocks all
-+ * subsequent operations until is_running is set to 0 and op_sem is
-+ * released.
-+ */
-+ down_write(&env->op_sem);
-+ env->is_running = 0;
-+ up_write(&env->op_sem);
-+
-+ /* send kill to all processes of VE */
-+ read_lock(&tasklist_lock);
-+ do_each_thread_ve(g, p) {
-+ force_sig_info(SIGKILL, &info, p);
-+ } while_each_thread_ve(g, p);
-+ read_unlock(&tasklist_lock);
-+
-+ /* wait for all init childs exit */
-+ while (atomic_read(&env->pcounter) > 1) {
-+ if (sys_wait4(-1, NULL, __WALL | WNOHANG, NULL) > 0)
-+ continue;
-+ /* it was ENOCHLD or no more children somehow */
-+ if (atomic_read(&env->pcounter) == 1)
-+ break;
-+
-+ /* clear all signals to avoid wakeups */
-+ if (signal_pending(tsk))
-+ flush_signals(tsk);
-+ /* we have child without signal sent */
-+ __set_current_state(TASK_INTERRUPTIBLE);
-+ schedule_timeout(delay);
-+ delay = (delay < HZ) ? (delay << 1) : HZ;
-+ read_lock(&tasklist_lock);
-+ do_each_thread_ve(g, p) {
-+ if (p != tsk)
-+ force_sig_info(SIGKILL, &info, p);
-+ } while_each_thread_ve(g, p);
-+ read_unlock(&tasklist_lock);
-+ }
-+ env->init_entry = child_reaper;
-+ write_lock_irq(&tasklist_lock);
-+ REMOVE_LINKS(tsk);
-+ tsk->parent = tsk->real_parent = child_reaper;
-+ SET_LINKS(tsk);
-+ write_unlock_irq(&tasklist_lock);
-+}
-+#endif
-+
- fastcall NORET_TYPE void do_exit(long code)
- {
- struct task_struct *tsk = current;
-@@ -805,14 +917,20 @@ fastcall NORET_TYPE void do_exit(long co
- panic("Aiee, killing interrupt handler!");
- if (unlikely(!tsk->pid))
- panic("Attempted to kill the idle task!");
-+#ifdef CONFIG_VE
-+ do_initproc_exit();
-+#else
- if (unlikely(tsk->pid == 1))
- panic("Attempted to kill init!");
-+#endif
- if (tsk->io_context)
- exit_io_context();
-
- if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
- current->ptrace_message = code;
-+ set_pn_state(current, PN_STOP_EXIT);
- ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
-+ clear_pn_state(current);
- }
-
- /*
-@@ -911,7 +1029,14 @@ asmlinkage long sys_exit(int error_code)
-
- task_t fastcall *next_thread(const task_t *p)
- {
-- return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
-+ task_t *tsk;
-+
-+ tsk = pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
-+#ifdef CONFIG_VE
-+ /* all threads should belong to ONE ve! */
-+ BUG_ON(VE_TASK_INFO(tsk)->owner_env != VE_TASK_INFO(p)->owner_env);
-+#endif
-+ return tsk;
- }
-
- EXPORT_SYMBOL(next_thread);
-@@ -960,14 +1085,19 @@ asmlinkage void sys_exit_group(int error
- static int eligible_child(pid_t pid, int options, task_t *p)
- {
- if (pid > 0) {
-- if (p->pid != pid)
-+ if ((is_virtual_pid(pid) ? virt_pid(p) : p->pid) != pid)
- return 0;
- } else if (!pid) {
- if (process_group(p) != process_group(current))
- return 0;
- } else if (pid != -1) {
-- if (process_group(p) != -pid)
-- return 0;
-+ if (__is_virtual_pid(-pid)) {
-+ if (virt_pgid(p) != -pid)
-+ return 0;
-+ } else {
-+ if (process_group(p) != -pid)
-+ return 0;
-+ }
- }
-
- /*
-@@ -1157,7 +1287,7 @@ static int wait_task_zombie(task_t *p, i
- p->exit_state = EXIT_ZOMBIE;
- return retval;
- }
-- retval = p->pid;
-+ retval = get_task_pid(p);
- if (p->real_parent != p->parent) {
- write_lock_irq(&tasklist_lock);
- /* Double-check with lock held. */
-@@ -1292,7 +1422,7 @@ bail_ref:
- if (!retval && infop)
- retval = put_user(p->uid, &infop->si_uid);
- if (!retval)
-- retval = p->pid;
-+ retval = get_task_pid(p);
- put_task_struct(p);
-
- BUG_ON(!retval);
-@@ -1574,6 +1704,7 @@ asmlinkage long sys_wait4(pid_t pid, int
- prevent_tail_call(ret);
- return ret;
- }
-+EXPORT_SYMBOL_GPL(sys_wait4);
-
- #ifdef __ARCH_WANT_SYS_WAITPID
-
-diff -upr linux-2.6.16.orig/kernel/fork.c linux-2.6.16-026test009/kernel/fork.c
---- linux-2.6.16.orig/kernel/fork.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/fork.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,6 +20,7 @@
- #include <linux/vmalloc.h>
- #include <linux/completion.h>
- #include <linux/namespace.h>
-+#include <linux/file.h>
- #include <linux/personality.h>
- #include <linux/mempolicy.h>
- #include <linux/sem.h>
-@@ -52,11 +53,15 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_vmpages.h>
-+#include <ub/ub_misc.h>
-+
- /*
- * Protected counters by write_lock_irq(&tasklist_lock)
- */
- unsigned long total_forks; /* Handle normal Linux uptimes. */
- int nr_threads; /* The idle threads do not count.. */
-+EXPORT_SYMBOL(nr_threads);
-
- int max_threads; /* tunable limit on nr_threads */
-
-@@ -103,6 +108,7 @@ static kmem_cache_t *mm_cachep;
-
- void free_task(struct task_struct *tsk)
- {
-+ ub_task_uncharge(tsk);
- free_thread_info(tsk->thread_info);
- free_task_struct(tsk);
- }
-@@ -122,9 +128,14 @@ void __put_task_struct_cb(struct rcu_hea
- free_uid(tsk->user);
- put_group_info(tsk->group_info);
-
-+#ifdef CONFIG_VE
-+ put_ve(VE_TASK_INFO(tsk)->owner_env);
-+ atomic_dec(&nr_dead);
-+#endif
- if (!profile_handoff_task(tsk))
- free_task(tsk);
- }
-+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
-
- void __init fork_init(unsigned long mempages)
- {
-@@ -135,7 +146,7 @@ void __init fork_init(unsigned long memp
- /* create a slab on which task_structs can be allocated */
- task_struct_cachep =
- kmem_cache_create("task_struct", sizeof(struct task_struct),
-- ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
-+ ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_UBC, NULL, NULL);
- #endif
-
- /*
-@@ -166,22 +177,30 @@ static struct task_struct *dup_task_stru
-
- tsk = alloc_task_struct();
- if (!tsk)
-- return NULL;
-+ goto out;
-
- ti = alloc_thread_info(tsk);
-- if (!ti) {
-- free_task_struct(tsk);
-- return NULL;
-- }
-+ if (!ti)
-+ goto out_tsk;
-
- *tsk = *orig;
- tsk->thread_info = ti;
- setup_thread_stack(tsk, orig);
-
-+ if (ub_task_charge(orig, tsk))
-+ goto out_ti;
-+
- /* One for us, one for whoever does the "release_task()" (usually parent) */
- atomic_set(&tsk->usage,2);
- atomic_set(&tsk->fs_excl, 0);
- return tsk;
-+
-+out_ti:
-+ free_thread_info(ti);
-+out_tsk:
-+ free_task_struct(tsk);
-+out:
-+ return NULL;
- }
-
- #ifdef CONFIG_MMU
-@@ -219,7 +238,12 @@ static inline int dup_mmap(struct mm_str
- -pages);
- continue;
- }
-+
- charge = 0;
-+ if (ub_memory_charge(mm, mpnt->vm_end - mpnt->vm_start,
-+ mpnt->vm_flags & ~VM_LOCKED,
-+ mpnt->vm_file, UB_HARD))
-+ goto fail_noch;
- if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
- if (security_vm_enough_memory(len))
-@@ -238,6 +262,7 @@ static inline int dup_mmap(struct mm_str
- tmp->vm_flags &= ~VM_LOCKED;
- tmp->vm_mm = mm;
- tmp->vm_next = NULL;
-+ set_vma_rss(tmp, 0);
- anon_vma_link(tmp);
- file = tmp->vm_file;
- if (file) {
-@@ -266,7 +291,7 @@ static inline int dup_mmap(struct mm_str
- rb_parent = &tmp->vm_rb;
-
- mm->map_count++;
-- retval = copy_page_range(mm, oldmm, mpnt);
-+ retval = copy_page_range(mm, oldmm, tmp, mpnt);
-
- if (tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-@@ -283,6 +308,9 @@ out:
- fail_nomem_policy:
- kmem_cache_free(vm_area_cachep, tmp);
- fail_nomem:
-+ ub_memory_uncharge(mm, mpnt->vm_end - mpnt->vm_start,
-+ mpnt->vm_flags & ~VM_LOCKED, mpnt->vm_file);
-+fail_noch:
- retval = -ENOMEM;
- vm_unacct_memory(charge);
- goto out;
-@@ -313,7 +341,8 @@ static inline void mm_free_pgd(struct mm
-
- #include <linux/init_task.h>
-
--static struct mm_struct * mm_init(struct mm_struct * mm)
-+static struct mm_struct * mm_init(struct mm_struct * mm,
-+ struct task_struct *tsk)
- {
- atomic_set(&mm->mm_users, 1);
- atomic_set(&mm->mm_count, 1);
-@@ -328,11 +357,14 @@ static struct mm_struct * mm_init(struct
- mm->ioctx_list = NULL;
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
-+ set_mm_ub(mm, tsk);
-
- if (likely(!mm_alloc_pgd(mm))) {
- mm->def_flags = 0;
- return mm;
- }
-+
-+ put_mm_ub(mm);
- free_mm(mm);
- return NULL;
- }
-@@ -347,10 +379,11 @@ struct mm_struct * mm_alloc(void)
- mm = allocate_mm();
- if (mm) {
- memset(mm, 0, sizeof(*mm));
-- mm = mm_init(mm);
-+ mm = mm_init(mm, NULL);
- }
- return mm;
- }
-+EXPORT_SYMBOL_GPL(mm_alloc);
-
- /*
- * Called when the last reference to the mm
-@@ -362,8 +395,10 @@ void fastcall __mmdrop(struct mm_struct
- BUG_ON(mm == &init_mm);
- mm_free_pgd(mm);
- destroy_context(mm);
-+ put_mm_ub(mm);
- free_mm(mm);
- }
-+EXPORT_SYMBOL_GPL(__mmdrop);
-
- /*
- * Decrement the use count and release all resources for an mm.
-@@ -466,7 +501,7 @@ static struct mm_struct *dup_mm(struct t
-
- memcpy(mm, oldmm, sizeof(*mm));
-
-- if (!mm_init(mm))
-+ if (!mm_init(mm, tsk))
- goto fail_nomem;
-
- if (init_new_context(tsk, mm))
-@@ -720,7 +755,7 @@ out_release:
- free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
- free_fd_array(new_fdt->fd, new_fdt->max_fds);
- kmem_cache_free(files_cachep, newf);
-- goto out;
-+ return NULL;
- }
-
- static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
-@@ -896,7 +931,7 @@ asmlinkage long sys_set_tid_address(int
- {
- current->clear_child_tid = tidptr;
-
-- return current->pid;
-+ return virt_pid(current);
- }
-
- /*
-@@ -913,7 +948,7 @@ static task_t *copy_process(unsigned lon
- unsigned long stack_size,
- int __user *parent_tidptr,
- int __user *child_tidptr,
-- int pid)
-+ int pid, long pid0)
- {
- int retval;
- struct task_struct *p = NULL;
-@@ -974,12 +1009,20 @@ static task_t *copy_process(unsigned lon
- p->did_exec = 0;
- copy_flags(clone_flags, p);
- p->pid = pid;
-+#ifdef CONFIG_VE
-+ set_virt_pid(p, alloc_vpid(p->pid, pid0 ? : -1));
-+ if (virt_pid(p) < 0)
-+ goto bad_fork_cleanup_module;
-+#endif
- retval = -EFAULT;
- if (clone_flags & CLONE_PARENT_SETTID)
-- if (put_user(p->pid, parent_tidptr))
-+ if (put_user(virt_pid(p), parent_tidptr))
- goto bad_fork_cleanup;
-
- p->proc_dentry = NULL;
-+#ifdef CONFIG_VE
-+ p->ve_task_info.glob_proc_dentry = NULL;
-+#endif
-
- INIT_LIST_HEAD(&p->children);
- INIT_LIST_HEAD(&p->sibling);
-@@ -1027,8 +1070,13 @@ static task_t *copy_process(unsigned lon
- #endif
-
- p->tgid = p->pid;
-- if (clone_flags & CLONE_THREAD)
-+ set_virt_tgid(p, virt_pid(p));
-+ set_virt_pgid(p, virt_pgid(current));
-+ set_virt_sid(p, virt_sid(current));
-+ if (clone_flags & CLONE_THREAD) {
- p->tgid = current->tgid;
-+ set_virt_tgid(p, virt_tgid(current));
-+ }
-
- if ((retval = security_task_alloc(p)))
- goto bad_fork_cleanup_policy;
-@@ -1181,6 +1229,12 @@ static task_t *copy_process(unsigned lon
- if (unlikely(p->ptrace & PT_PTRACED))
- __ptrace_link(p, current->parent);
-
-+#ifdef CONFIG_VE
-+ SET_VE_LINKS(p);
-+ atomic_inc(&p->ve_task_info.owner_env->pcounter);
-+ get_ve(p->ve_task_info.owner_env);
-+ seqcount_init(&p->ve_task_info.wakeup_lock);
-+#endif
- if (thread_group_leader(p)) {
- p->signal->tty = current->signal->tty;
- p->signal->pgrp = process_group(current);
-@@ -1228,6 +1282,11 @@ bad_fork_cleanup_cpuset:
- #endif
- cpuset_exit(p);
- bad_fork_cleanup:
-+#ifdef CONFIG_VE
-+ if (virt_pid(p) != p->pid && virt_pid(p) > 0)
-+ free_vpid(virt_pid(p), get_exec_env());
-+bad_fork_cleanup_module:
-+#endif
- if (p->binfmt)
- module_put(p->binfmt->module);
- bad_fork_cleanup_put_domain:
-@@ -1253,7 +1312,7 @@ task_t * __devinit fork_idle(int cpu)
- task_t *task;
- struct pt_regs regs;
-
-- task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
-+ task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0, 0);
- if (!task)
- return ERR_PTR(-ENOMEM);
- init_idle(task, cpu);
-@@ -1283,12 +1342,13 @@ static inline int fork_traceflag (unsign
- * It copies the process, and if successful kick-starts
- * it and waits for it to finish using the VM if required.
- */
--long do_fork(unsigned long clone_flags,
-+long do_fork_pid(unsigned long clone_flags,
- unsigned long stack_start,
- struct pt_regs *regs,
- unsigned long stack_size,
- int __user *parent_tidptr,
-- int __user *child_tidptr)
-+ int __user *child_tidptr,
-+ long pid0)
- {
- struct task_struct *p;
- int trace = 0;
-@@ -1302,7 +1362,8 @@ long do_fork(unsigned long clone_flags,
- clone_flags |= CLONE_PTRACE;
- }
-
-- p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
-+ p = copy_process(clone_flags, stack_start, regs, stack_size,
-+ parent_tidptr, child_tidptr, pid, pid0);
- /*
- * Do this prior waking up the new thread - the thread pointer
- * might get invalid after that point, if the thread exits quickly.
-@@ -1310,6 +1371,7 @@ long do_fork(unsigned long clone_flags,
- if (!IS_ERR(p)) {
- struct completion vfork;
-
-+ pid = virt_pid(p);
- if (clone_flags & CLONE_VFORK) {
- p->vfork_done = &vfork;
- init_completion(&vfork);
-@@ -1330,13 +1392,18 @@ long do_fork(unsigned long clone_flags,
-
- if (unlikely (trace)) {
- current->ptrace_message = pid;
-+ set_pn_state(current, PN_STOP_FORK);
- ptrace_notify ((trace << 8) | SIGTRAP);
-+ clear_pn_state(current);
- }
-
- if (clone_flags & CLONE_VFORK) {
- wait_for_completion(&vfork);
-- if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
-+ if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
-+ set_pn_state(current, PN_STOP_VFORK);
- ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
-+ clear_pn_state(current);
-+ }
- }
- } else {
- free_pidmap(pid);
-@@ -1349,26 +1416,39 @@ long do_fork(unsigned long clone_flags,
- #define ARCH_MIN_MMSTRUCT_ALIGN 0
- #endif
-
-+EXPORT_SYMBOL(do_fork_pid);
-+
-+long do_fork(unsigned long clone_flags,
-+ unsigned long stack_start,
-+ struct pt_regs *regs,
-+ unsigned long stack_size,
-+ int __user *parent_tidptr,
-+ int __user *child_tidptr)
-+{
-+ return do_fork_pid(clone_flags, stack_start, regs, stack_size,
-+ parent_tidptr, child_tidptr, 0);
-+}
-+
- void __init proc_caches_init(void)
- {
- sighand_cachep = kmem_cache_create("sighand_cache",
- sizeof(struct sighand_struct), 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
- signal_cachep = kmem_cache_create("signal_cache",
- sizeof(struct signal_struct), 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
- files_cachep = kmem_cache_create("files_cache",
- sizeof(struct files_struct), 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
- fs_cachep = kmem_cache_create("fs_cache",
- sizeof(struct fs_struct), 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
- vm_area_cachep = kmem_cache_create("vm_area_struct",
- sizeof(struct vm_area_struct), 0,
- SLAB_PANIC, NULL, NULL);
- mm_cachep = kmem_cache_create("mm_struct",
- sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
- }
-
-
-diff -upr linux-2.6.16.orig/kernel/hrtimer.c linux-2.6.16-026test009/kernel/hrtimer.c
---- linux-2.6.16.orig/kernel/hrtimer.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/hrtimer.c 2006-04-19 15:02:12.000000000 +0400
-@@ -439,6 +439,7 @@ hrtimer_start(struct hrtimer *timer, kti
-
- return ret;
- }
-+EXPORT_SYMBOL_GPL(hrtimer_start);
-
- /**
- * hrtimer_try_to_cancel - try to deactivate a timer
-@@ -467,6 +468,7 @@ int hrtimer_try_to_cancel(struct hrtimer
- return ret;
-
- }
-+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
-
- /**
- * hrtimer_cancel - cancel a timer and wait for the handler to finish.
-@@ -504,6 +506,7 @@ ktime_t hrtimer_get_remaining(const stru
-
- return rem;
- }
-+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
-
- #ifdef CONFIG_NO_IDLE_HZ
- /**
-@@ -670,7 +673,7 @@ void hrtimer_run_queues(void)
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- */
--static ktime_t __sched
-+ktime_t __sched
- schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode)
- {
- /* fn stays NULL, meaning single-shot wakeup: */
-@@ -697,7 +700,7 @@ schedule_hrtimer_interruptible(struct hr
- return schedule_hrtimer(timer, mode);
- }
-
--static long __sched nanosleep_restart(struct restart_block *restart)
-+long __sched nanosleep_restart(struct restart_block *restart)
- {
- struct timespec __user *rmtp;
- struct timespec tu;
-@@ -726,6 +729,7 @@ static long __sched nanosleep_restart(st
- /* The other values in restart are already filled in */
- return -ERESTART_RESTARTBLOCK;
- }
-+EXPORT_SYMBOL_GPL(nanosleep_restart);
-
- long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
- const enum hrtimer_mode mode, const clockid_t clockid)
-diff -upr linux-2.6.16.orig/kernel/irq/handle.c linux-2.6.16-026test009/kernel/irq/handle.c
---- linux-2.6.16.orig/kernel/irq/handle.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/irq/handle.c 2006-04-19 15:02:12.000000000 +0400
-@@ -14,6 +14,8 @@
-
- #include "internals.h"
-
-+#include <ub/beancounter.h>
-+
- /*
- * Linux has a controller-independent interrupt architecture.
- * Every controller has a 'controller-template', that is used
-@@ -80,10 +82,12 @@ fastcall int handle_IRQ_event(unsigned i
- struct irqaction *action)
- {
- int ret, retval = 0, status = 0;
-+ struct user_beancounter *ub;
-
- if (!(action->flags & SA_INTERRUPT))
- local_irq_enable();
-
-+ ub = set_exec_ub(get_ub0());
- do {
- ret = action->handler(irq, action->dev_id, regs);
- if (ret == IRQ_HANDLED)
-@@ -91,6 +95,7 @@ fastcall int handle_IRQ_event(unsigned i
- retval |= ret;
- action = action->next;
- } while (action);
-+ (void)set_exec_ub(ub);
-
- if (status & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
-diff -upr linux-2.6.16.orig/kernel/kmod.c linux-2.6.16-026test009/kernel/kmod.c
---- linux-2.6.16.orig/kernel/kmod.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/kmod.c 2006-04-19 15:02:12.000000000 +0400
-@@ -78,6 +78,10 @@ int request_module(const char *fmt, ...)
- #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
- static int kmod_loop_msg;
-
-+ /* Don't allow request_module() inside VE. */
-+ if (!ve_is_super(get_exec_env()))
-+ return -EPERM;
-+
- va_start(args, fmt);
- ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
- va_end(args);
-@@ -246,6 +250,9 @@ int call_usermodehelper_keys(char *path,
- };
- DECLARE_WORK(work, __call_usermodehelper, &sub_info);
-
-+ if (!ve_is_super(get_exec_env()))
-+ return -EPERM;
-+
- if (!khelper_wq)
- return -EBUSY;
-
-diff -upr linux-2.6.16.orig/kernel/kthread.c linux-2.6.16-026test009/kernel/kthread.c
---- linux-2.6.16.orig/kernel/kthread.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/kthread.c 2006-04-19 15:02:12.000000000 +0400
-@@ -114,7 +114,7 @@ static void keventd_create_kthread(void
- create->result = ERR_PTR(pid);
- } else {
- wait_for_completion(&create->started);
-- create->result = find_task_by_pid(pid);
-+ create->result = find_task_by_pid_all(pid);
- }
- complete(&create->done);
- }
-diff -upr linux-2.6.16.orig/kernel/module.c linux-2.6.16-026test009/kernel/module.c
---- linux-2.6.16.orig/kernel/module.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/module.c 2006-04-19 15:02:12.000000000 +0400
-@@ -2130,6 +2130,8 @@ static void *m_start(struct seq_file *m,
- loff_t n = 0;
-
- down(&module_mutex);
-+ if (!ve_is_super(get_exec_env()))
-+ return NULL;
- list_for_each(i, &modules) {
- if (n++ == *pos)
- break;
-diff -upr linux-2.6.16.orig/kernel/mutex-debug.c linux-2.6.16-026test009/kernel/mutex-debug.c
---- linux-2.6.16.orig/kernel/mutex-debug.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/mutex-debug.c 2006-04-19 15:02:12.000000000 +0400
-@@ -193,12 +193,12 @@ retry:
- if (count != 10)
- printk(" locked it.\n");
-
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- show_task_locks(p);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
-
- printk("\n");
- show_held_locks(NULL);
-diff -upr linux-2.6.16.orig/kernel/panic.c linux-2.6.16-026test009/kernel/panic.c
---- linux-2.6.16.orig/kernel/panic.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/panic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -23,6 +23,8 @@
- int panic_timeout;
- int panic_on_oops;
- int tainted;
-+int kernel_text_csum_broken;
-+EXPORT_SYMBOL(kernel_text_csum_broken);
-
- EXPORT_SYMBOL(panic_timeout);
-
-@@ -156,7 +158,8 @@ const char *print_tainted(void)
- {
- static char buf[20];
- if (tainted) {
-- snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c",
-+ snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c",
-+ kernel_text_csum_broken ? 'B' : ' ',
- tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
- tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
- tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
-diff -upr linux-2.6.16.orig/kernel/pid.c linux-2.6.16-026test009/kernel/pid.c
---- linux-2.6.16.orig/kernel/pid.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/pid.c 2006-04-19 15:02:12.000000000 +0400
-@@ -27,6 +27,10 @@
- #include <linux/bootmem.h>
- #include <linux/hash.h>
-
-+#ifdef CONFIG_VE
-+static void __free_vpid(int vpid, struct ve_struct *ve);
-+#endif
-+
- #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
- static struct hlist_head *pid_hash[PIDTYPE_MAX];
- static int pidhash_shift;
-@@ -57,8 +61,14 @@ typedef struct pidmap {
- void *page;
- } pidmap_t;
-
-+#ifdef CONFIG_VE
-+#define PIDMAP_NRFREE (BITS_PER_PAGE/2)
-+#else
-+#define PIDMAP_NRFREE BITS_PER_PAGE
-+#endif
-+
- static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-- { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
-+ { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(PIDMAP_NRFREE), NULL } };
-
- static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
-
-@@ -67,9 +77,12 @@ fastcall void free_pidmap(int pid)
- pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
- int offset = pid & BITS_PER_PAGE_MASK;
-
-+ BUG_ON(__is_virtual_pid(pid) || pid == 1);
-+
- clear_bit(offset, map->page);
- atomic_inc(&map->nr_free);
- }
-+EXPORT_SYMBOL_GPL(free_pidmap);
-
- int alloc_pidmap(void)
- {
-@@ -77,6 +90,8 @@ int alloc_pidmap(void)
- pidmap_t *map;
-
- pid = last + 1;
-+ if (__is_virtual_pid(pid))
-+ pid += VPID_DIV;
- if (pid >= pid_max)
- pid = RESERVED_PIDS;
- offset = pid & BITS_PER_PAGE_MASK;
-@@ -106,6 +121,8 @@ int alloc_pidmap(void)
- return pid;
- }
- offset = find_next_offset(map, offset);
-+ if (__is_virtual_pid(offset))
-+ offset += VPID_DIV;
- pid = mk_pid(map, offset);
- /*
- * find_next_offset() found a bit, the pid from it
-@@ -130,6 +147,7 @@ int alloc_pidmap(void)
- }
- return -1;
- }
-+EXPORT_SYMBOL_GPL(alloc_pidmap);
-
- struct pid * fastcall find_pid(enum pid_type type, int nr)
- {
-@@ -143,6 +161,7 @@ struct pid * fastcall find_pid(enum pid_
- }
- return NULL;
- }
-+EXPORT_SYMBOL(find_pid);
-
- int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
- {
-@@ -162,6 +181,7 @@ int fastcall attach_pid(task_t *task, en
-
- return 0;
- }
-+EXPORT_SYMBOL_GPL(attach_pid);
-
- static fastcall int __detach_pid(task_t *task, enum pid_type type)
- {
-@@ -201,13 +221,27 @@ void fastcall detach_pid(task_t *task, e
- if (tmp != type && find_pid(tmp, nr))
- return;
-
-+#ifdef CONFIG_VE
-+ __free_vpid(task->pids[type].vnr, VE_TASK_INFO(task)->owner_env);
-+#endif
- free_pidmap(nr);
- }
-+EXPORT_SYMBOL_GPL(detach_pid);
-
- task_t *find_task_by_pid_type(int type, int nr)
- {
-+ BUG();
-+ return NULL;
-+}
-+
-+EXPORT_SYMBOL(find_task_by_pid_type);
-+
-+task_t *find_task_by_pid_type_all(int type, int nr)
-+{
- struct pid *pid;
-
-+ BUG_ON(nr != -1 && is_virtual_pid(nr));
-+
- pid = find_pid(type, nr);
- if (!pid)
- return NULL;
-@@ -215,7 +249,35 @@ task_t *find_task_by_pid_type(int type,
- return pid_task(&pid->pid_list, type);
- }
-
--EXPORT_SYMBOL(find_task_by_pid_type);
-+EXPORT_SYMBOL(find_task_by_pid_type_all);
-+
-+#ifdef CONFIG_VE
-+
-+task_t *find_task_by_pid_type_ve(int type, int nr)
-+{
-+ task_t *tsk;
-+ int gnr = nr;
-+ struct pid *pid;
-+
-+ if (is_virtual_pid(nr)) {
-+ gnr = __vpid_to_pid(nr);
-+ if (unlikely(gnr == -1))
-+ return NULL;
-+ }
-+
-+ pid = find_pid(type, gnr);
-+ if (!pid)
-+ return NULL;
-+
-+ tsk = pid_task(&pid->pid_list, type);
-+ if (!ve_accessible(VE_TASK_INFO(tsk)->owner_env, get_exec_env()))
-+ return NULL;
-+ return tsk;
-+}
-+
-+EXPORT_SYMBOL(find_task_by_pid_type_ve);
-+
-+#endif
-
- /*
- * This function switches the PIDs if a non-leader thread calls
-@@ -234,12 +296,16 @@ void switch_exec_pids(task_t *leader, ta
-
- leader->pid = leader->tgid = thread->pid;
- thread->pid = thread->tgid;
-+ set_virt_tgid(leader, virt_pid(thread));
-+ set_virt_pid(leader, virt_pid(thread));
-+ set_virt_pid(thread, virt_tgid(thread));
-
- attach_pid(thread, PIDTYPE_PID, thread->pid);
- attach_pid(thread, PIDTYPE_TGID, thread->tgid);
- attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
- attach_pid(thread, PIDTYPE_SID, thread->signal->session);
- list_add_tail(&thread->tasks, &init_task.tasks);
-+ SET_VE_LINKS(thread);
-
- attach_pid(leader, PIDTYPE_PID, leader->pid);
- attach_pid(leader, PIDTYPE_TGID, leader->tgid);
-@@ -247,6 +313,342 @@ void switch_exec_pids(task_t *leader, ta
- attach_pid(leader, PIDTYPE_SID, leader->signal->session);
- }
-
-+#ifdef CONFIG_VE
-+
-+/* Virtual PID bits.
-+ *
-+ * At the moment all internal structures in kernel store real global pid.
-+ * The only place, where virtual PID is used, is at user frontend. We
-+ * remap virtual pids obtained from user to global ones (vpid_to_pid) and
-+ * map globals to virtuals before showing them to user (virt_pid_type).
-+ *
-+ * We hold virtual PIDs inside struct pid, so map global -> virtual is easy.
-+ */
-+
-+pid_t _pid_type_to_vpid(int type, pid_t pid)
-+{
-+ struct pid * p;
-+
-+ if (unlikely(is_virtual_pid(pid)))
-+ return -1;
-+
-+ read_lock(&tasklist_lock);
-+ p = find_pid(type, pid);
-+ if (p) {
-+ pid = p->vnr;
-+ } else {
-+ pid = -1;
-+ }
-+ read_unlock(&tasklist_lock);
-+ return pid;
-+}
-+EXPORT_SYMBOL_GPL(_pid_type_to_vpid);
-+
-+pid_t pid_type_to_vpid(int type, pid_t pid)
-+{
-+ int vpid;
-+
-+ if (unlikely(pid <= 0))
-+ return pid;
-+
-+ BUG_ON(is_virtual_pid(pid));
-+
-+ if (ve_is_super(get_exec_env()))
-+ return pid;
-+
-+ vpid = _pid_type_to_vpid(type, pid);
-+ if (unlikely(vpid == -1)) {
-+ /* It is allowed: global pid can be used everywhere.
-+ * This can happen, when kernel remembers stray pids:
-+ * signal queues, locks etc.
-+ */
-+ vpid = pid;
-+ }
-+ return vpid;
-+}
-+EXPORT_SYMBOL_GPL(pid_type_to_vpid);
-+
-+/* To map virtual pids to global we maintain special hash table.
-+ *
-+ * Mapping entries are allocated when a process with non-trivial
-+ * mapping is forked, which is possible only after VE migrated.
-+ * Mappings are destroyed, when a global pid is removed from global
-+ * pidmap, which means we do not need to refcount mappings.
-+ */
-+
-+static struct hlist_head *vpid_hash;
-+
-+struct vpid_mapping
-+{
-+ int vpid;
-+ int veid;
-+ int pid;
-+ struct hlist_node link;
-+};
-+
-+static kmem_cache_t *vpid_mapping_cachep;
-+
-+static inline int vpid_hashfn(int vnr, int veid)
-+{
-+ return hash_long((unsigned long)(vnr+(veid<<16)), pidhash_shift);
-+}
-+
-+struct vpid_mapping *__lookup_vpid_mapping(int vnr, int veid)
-+{
-+ struct hlist_node *elem;
-+ struct vpid_mapping *map;
-+
-+ hlist_for_each_entry(map, elem,
-+ &vpid_hash[vpid_hashfn(vnr, veid)], link) {
-+ if (map->vpid == vnr && map->veid == veid)
-+ return map;
-+ }
-+ return NULL;
-+}
-+
-+/* __vpid_to_pid() is raw version of vpid_to_pid(). It is to be used
-+ * only under tasklist_lock. In some places we must use only this version
-+ * (f.e. __kill_pg_info is called under write lock!)
-+ *
-+ * Caller should pass virtual pid. This function returns an error, when
-+ * seeing a global pid.
-+ */
-+int __vpid_to_pid(int pid)
-+{
-+ struct vpid_mapping *map;
-+
-+ if (unlikely(!is_virtual_pid(pid) || ve_is_super(get_exec_env())))
-+ return -1;
-+
-+ if (!get_exec_env()->sparse_vpid) {
-+ if (pid != 1)
-+ return pid - VPID_DIV;
-+ return get_exec_env()->init_entry->pid;
-+ }
-+
-+ map = __lookup_vpid_mapping(pid, VEID(get_exec_env()));
-+ if (map)
-+ return map->pid;
-+ return -1;
-+}
-+EXPORT_SYMBOL_GPL(__vpid_to_pid);
-+
-+int vpid_to_pid(int pid)
-+{
-+ /* User gave bad pid. It is his problem. */
-+ if (unlikely(pid <= 0))
-+ return pid;
-+
-+ if (!is_virtual_pid(pid))
-+ return pid;
-+
-+ read_lock(&tasklist_lock);
-+ pid = __vpid_to_pid(pid);
-+ read_unlock(&tasklist_lock);
-+ return pid;
-+}
-+EXPORT_SYMBOL_GPL(vpid_to_pid);
-+
-+/* VEs which never migrated have trivial "arithmetic" mapping pid <-> vpid:
-+ *
-+ * vpid == 1 -> ve->init_task->pid
-+ * else pid & ~VPID_DIV
-+ *
-+ * In this case VE has ve->sparse_vpid = 0 and we do not use vpid hash table.
-+ *
-+ * When VE migrates and we see non-trivial mapping the first time, we
-+ * scan process table and populate mapping hash table.
-+ */
-+
-+static int add_mapping(int pid, int vpid, int veid, struct hlist_head *cache)
-+{
-+ if (pid > 0 && vpid > 0 && !__lookup_vpid_mapping(vpid, veid)) {
-+ struct vpid_mapping *m;
-+ if (hlist_empty(cache)) {
-+ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_ATOMIC);
-+ if (unlikely(m == NULL))
-+ return -ENOMEM;
-+ } else {
-+ m = hlist_entry(cache->first, struct vpid_mapping, link);
-+ hlist_del(&m->link);
-+ }
-+ m->pid = pid;
-+ m->vpid = vpid;
-+ m->veid = veid;
-+ hlist_add_head(&m->link,
-+ &vpid_hash[vpid_hashfn(vpid, veid)]);
-+ }
-+ return 0;
-+}
-+
-+static int switch_to_sparse_mapping(int pid)
-+{
-+ struct ve_struct *env = get_exec_env();
-+ struct hlist_head cache;
-+ task_t *g, *t;
-+ int pcount;
-+ int err;
-+
-+ /* Transition happens under write_lock_irq, so we try to make
-+ * it more reliable and fast preallocating mapping entries.
-+ * pcounter may be not enough, we could have lots of orphaned
-+ * process groups and sessions, which also require mappings.
-+ */
-+ INIT_HLIST_HEAD(&cache);
-+ pcount = atomic_read(&env->pcounter);
-+ err = -ENOMEM;
-+ while (pcount > 0) {
-+ struct vpid_mapping *m;
-+ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
-+ if (!m)
-+ goto out;
-+ hlist_add_head(&m->link, &cache);
-+ pcount--;
-+ }
-+
-+ write_lock_irq(&tasklist_lock);
-+ err = 0;
-+ if (env->sparse_vpid)
-+ goto out_unlock;
-+
-+ err = -ENOMEM;
-+ do_each_thread_ve(g, t) {
-+ if (t->pid == pid)
-+ continue;
-+ if (add_mapping(t->pid, virt_pid(t), VEID(env), &cache))
-+ goto out_unlock;
-+ } while_each_thread_ve(g, t);
-+
-+ for_each_process_ve(t) {
-+ if (t->pid == pid)
-+ continue;
-+
-+ if (add_mapping(t->tgid, virt_tgid(t), VEID(env), &cache))
-+ goto out_unlock;
-+ if (add_mapping(t->signal->pgrp, virt_pgid(t), VEID(env), &cache))
-+ goto out_unlock;
-+ if (add_mapping(t->signal->session, virt_sid(t), VEID(env), &cache))
-+ goto out_unlock;
-+ }
-+ env->sparse_vpid = 1;
-+ err = 0;
-+
-+out_unlock:
-+ if (err) {
-+ int i;
-+
-+ for (i=0; i<(1<<pidhash_shift); i++) {
-+ struct hlist_node *elem, *next;
-+ struct vpid_mapping *map;
-+
-+ hlist_for_each_entry_safe(map, elem, next, &vpid_hash[i], link) {
-+ if (map->veid == VEID(env)) {
-+ hlist_del(elem);
-+ hlist_add_head(elem, &cache);
-+ }
-+ }
-+ }
-+ }
-+ write_unlock_irq(&tasklist_lock);
-+
-+out:
-+ while (!hlist_empty(&cache)) {
-+ struct vpid_mapping *m;
-+ m = hlist_entry(cache.first, struct vpid_mapping, link);
-+ hlist_del(&m->link);
-+ kmem_cache_free(vpid_mapping_cachep, m);
-+ }
-+ return err;
-+}
-+
-+int alloc_vpid(int pid, int virt_pid)
-+{
-+ int result;
-+ struct vpid_mapping *m;
-+ struct ve_struct *env = get_exec_env();
-+
-+ if (ve_is_super(env) || !env->virt_pids)
-+ return pid;
-+
-+ if (!env->sparse_vpid) {
-+ if (virt_pid == -1)
-+ return pid + VPID_DIV;
-+
-+ if (virt_pid == 1 || virt_pid == pid + VPID_DIV)
-+ return virt_pid;
-+
-+ if ((result = switch_to_sparse_mapping(pid)) < 0)
-+ return result;
-+ }
-+
-+ m = kmem_cache_alloc(vpid_mapping_cachep, GFP_KERNEL);
-+ if (!m)
-+ return -ENOMEM;
-+
-+ m->pid = pid;
-+ m->veid = VEID(env);
-+
-+ result = (virt_pid == -1) ? pid + VPID_DIV : virt_pid;
-+
-+ write_lock_irq(&tasklist_lock);
-+ if (unlikely(__lookup_vpid_mapping(result, m->veid))) {
-+ if (virt_pid > 0) {
-+ result = -EEXIST;
-+ goto out;
-+ }
-+
-+ /* No luck. Now we search for some not-existing vpid.
-+ * It is weak place. We do linear search. */
-+ do {
-+ result++;
-+ if (!__is_virtual_pid(result))
-+ result += VPID_DIV;
-+ if (result >= pid_max)
-+ result = RESERVED_PIDS + VPID_DIV;
-+ } while (__lookup_vpid_mapping(result, m->veid) != NULL);
-+
-+ /* And set last_pid in hope future alloc_pidmap to avoid
-+ * collisions after future alloc_pidmap() */
-+ last_pid = result - VPID_DIV;
-+ }
-+ if (result > 0) {
-+ m->vpid = result;
-+ hlist_add_head(&m->link,
-+ &vpid_hash[vpid_hashfn(result, m->veid)]);
-+ }
-+out:
-+ write_unlock_irq(&tasklist_lock);
-+ if (result < 0)
-+ kmem_cache_free(vpid_mapping_cachep, m);
-+ return result;
-+}
-+EXPORT_SYMBOL(alloc_vpid);
-+
-+static void __free_vpid(int vpid, struct ve_struct *ve)
-+{
-+ struct vpid_mapping *m;
-+
-+ if (!ve->sparse_vpid)
-+ return;
-+
-+ if (!__is_virtual_pid(vpid) && (vpid != 1 || ve_is_super(ve)))
-+ return;
-+
-+ m = __lookup_vpid_mapping(vpid, ve->veid);
-+ BUG_ON(m == NULL);
-+ hlist_del(&m->link);
-+ kmem_cache_free(vpid_mapping_cachep, m);
-+}
-+
-+void free_vpid(int vpid, struct ve_struct *ve)
-+{
-+ write_lock_irq(&tasklist_lock);
-+ __free_vpid(vpid, ve);
-+ write_unlock_irq(&tasklist_lock);
-+}
-+EXPORT_SYMBOL(free_vpid);
-+#endif
-+
- /*
- * The pid hash table is scaled according to the amount of memory in the
- * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
-@@ -273,6 +675,14 @@ void __init pidhash_init(void)
- for (j = 0; j < pidhash_size; j++)
- INIT_HLIST_HEAD(&pid_hash[i][j]);
- }
-+
-+#ifdef CONFIG_VE
-+ vpid_hash = alloc_bootmem(pidhash_size * sizeof(struct hlist_head));
-+ if (!vpid_hash)
-+ panic("Could not alloc vpid_hash!\n");
-+ for (j = 0; j < pidhash_size; j++)
-+ INIT_HLIST_HEAD(&vpid_hash[j]);
-+#endif
- }
-
- void __init pidmap_init(void)
-@@ -289,4 +699,12 @@ void __init pidmap_init(void)
-
- for (i = 0; i < PIDTYPE_MAX; i++)
- attach_pid(current, i, 0);
-+
-+#ifdef CONFIG_VE
-+ vpid_mapping_cachep =
-+ kmem_cache_create("vpid_mapping",
-+ sizeof(struct vpid_mapping),
-+ __alignof__(struct vpid_mapping),
-+ SLAB_PANIC|SLAB_UBC, NULL, NULL);
-+#endif
- }
-diff -upr linux-2.6.16.orig/kernel/posix-cpu-timers.c linux-2.6.16-026test009/kernel/posix-cpu-timers.c
---- linux-2.6.16.orig/kernel/posix-cpu-timers.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/posix-cpu-timers.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,7 +20,7 @@ static int check_clock(const clockid_t w
- return 0;
-
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
- p->tgid != current->tgid : p->tgid != pid)) {
- error = -EINVAL;
-@@ -292,7 +292,7 @@ int posix_cpu_clock_get(const clockid_t
- */
- struct task_struct *p;
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (p) {
- if (CPUCLOCK_PERTHREAD(which_clock)) {
- if (p->tgid == current->tgid) {
-@@ -336,7 +336,7 @@ int posix_cpu_timer_create(struct k_itim
- if (pid == 0) {
- p = current;
- } else {
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (p && p->tgid != current->tgid)
- p = NULL;
- }
-@@ -344,7 +344,7 @@ int posix_cpu_timer_create(struct k_itim
- if (pid == 0) {
- p = current->group_leader;
- } else {
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (p && p->tgid != pid)
- p = NULL;
- }
-diff -upr linux-2.6.16.orig/kernel/posix-timers.c linux-2.6.16-026test009/kernel/posix-timers.c
---- linux-2.6.16.orig/kernel/posix-timers.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/posix-timers.c 2006-04-19 15:02:12.000000000 +0400
-@@ -31,6 +31,7 @@
- * POSIX clocks & timers
- */
- #include <linux/mm.h>
-+#include <linux/module.h>
- #include <linux/smp_lock.h>
- #include <linux/interrupt.h>
- #include <linux/slab.h>
-@@ -48,6 +49,8 @@
- #include <linux/workqueue.h>
- #include <linux/module.h>
-
-+#include <ub/beancounter.h>
-+
- /*
- * Management arrays for POSIX timers. Timers are kept in slab memory
- * Timer ids are allocated by an external routine that keeps track of the
-@@ -241,7 +244,8 @@ static __init int init_posix_timers(void
- register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
-
- posix_timers_cache = kmem_cache_create("posix_timers_cache",
-- sizeof (struct k_itimer), 0, 0, NULL, NULL);
-+ sizeof (struct k_itimer), 0,
-+ SLAB_UBC, NULL, NULL);
- idr_init(&posix_timers_id);
- return 0;
- }
-@@ -294,6 +298,13 @@ void do_schedule_next_timer(struct sigin
-
- int posix_timer_event(struct k_itimer *timr,int si_private)
- {
-+ int ret;
-+ struct ve_struct *ve;
-+ struct user_beancounter *ub;
-+
-+ ve = set_exec_env(timr->it_process->ve_task_info.owner_env);
-+ ub = set_exec_ub(timr->it_process->task_bc.task_ub);
-+
- memset(&timr->sigq->info, 0, sizeof(siginfo_t));
- timr->sigq->info.si_sys_private = si_private;
- /* Send signal to the process that owns this timer.*/
-@@ -306,11 +317,11 @@ int posix_timer_event(struct k_itimer *t
-
- if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
- struct task_struct *leader;
-- int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
-+ ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
- timr->it_process);
-
- if (likely(ret >= 0))
-- return ret;
-+ goto out;
-
- timr->it_sigev_notify = SIGEV_SIGNAL;
- leader = timr->it_process->group_leader;
-@@ -318,8 +329,12 @@ int posix_timer_event(struct k_itimer *t
- timr->it_process = leader;
- }
-
-- return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
-+ ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
- timr->it_process);
-+out:
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(ve);
-+ return ret;
- }
- EXPORT_SYMBOL_GPL(posix_timer_event);
-
-@@ -366,7 +381,7 @@ static struct task_struct * good_sigeven
- struct task_struct *rtn = current->group_leader;
-
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-- (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
-+ (!(rtn = find_task_by_pid_ve(event->sigev_notify_thread_id)) ||
- rtn->tgid != current->tgid ||
- (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
- return NULL;
-diff -upr linux-2.6.16.orig/kernel/power/Kconfig linux-2.6.16-026test009/kernel/power/Kconfig
---- linux-2.6.16.orig/kernel/power/Kconfig 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/power/Kconfig 2006-04-19 15:02:13.000000000 +0400
-@@ -38,7 +38,7 @@ config PM_DEBUG
-
- config SOFTWARE_SUSPEND
- bool "Software Suspend"
-- depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)
-+ depends on PM && SWAP && X86 || ((FRV || PPC32) && !SMP)
- ---help---
- Enable the possibility of suspending the machine.
- It doesn't need APM.
-diff -upr linux-2.6.16.orig/kernel/power/process.c linux-2.6.16-026test009/kernel/power/process.c
---- linux-2.6.16.orig/kernel/power/process.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/power/process.c 2006-04-19 15:02:12.000000000 +0400
-@@ -38,18 +38,23 @@ void refrigerator(void)
- processes around? */
- long save;
- save = current->state;
-+ current->state = TASK_UNINTERRUPTIBLE;
- pr_debug("%s entered refrigerator\n", current->comm);
-- printk("=");
-+ /* printk("="); */
-
-- frozen_process(current);
- spin_lock_irq(&current->sighand->siglock);
-- recalc_sigpending(); /* We sent fake signal, clean it up */
-+ if (test_and_clear_thread_flag(TIF_FREEZE)) {
-+ recalc_sigpending(); /* We sent fake signal, clean it up */
-+ current->flags |= PF_FROZEN;
-+ } else {
-+ /* Freeze request could be canceled before we entered
-+ * refrigerator(). In this case we do nothing. */
-+ current->state = save;
-+ }
- spin_unlock_irq(&current->sighand->siglock);
-
-- while (frozen(current)) {
-- current->state = TASK_UNINTERRUPTIBLE;
-+ while (current->flags & PF_FROZEN)
- schedule();
-- }
- pr_debug("%s left refrigerator\n", current->comm);
- current->state = save;
- }
-@@ -67,7 +72,7 @@ int freeze_processes(void)
- do {
- todo = 0;
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- if (!freezeable(p))
- continue;
- if (frozen(p))
-@@ -78,7 +83,7 @@ int freeze_processes(void)
- signal_wake_up(p, 0);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- todo++;
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
- read_unlock(&tasklist_lock);
- yield(); /* Yield is okay here */
- if (todo && time_after(jiffies, start_time + TIMEOUT)) {
-@@ -95,15 +100,15 @@ int freeze_processes(void)
- */
- if (todo) {
- read_lock(&tasklist_lock);
-- do_each_thread(g, p)
-+ do_each_thread_all(g, p)
- if (freezing(p)) {
- pr_debug(" clean up: %s\n", p->comm);
-- p->flags &= ~PF_FREEZE;
- spin_lock_irqsave(&p->sighand->siglock, flags);
-+ clear_tsk_thread_flag(p, TIF_FREEZE);
- recalc_sigpending_tsk(p);
- spin_unlock_irqrestore(&p->sighand->siglock, flags);
- }
-- while_each_thread(g, p);
-+ while_each_thread_all(g, p);
- read_unlock(&tasklist_lock);
- return todo;
- }
-@@ -119,12 +124,12 @@ void thaw_processes(void)
-
- printk( "Restarting tasks..." );
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- if (!freezeable(p))
- continue;
- if (!thaw_process(p))
- printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
-
- read_unlock(&tasklist_lock);
- schedule();
-diff -upr linux-2.6.16.orig/kernel/printk.c linux-2.6.16-026test009/kernel/printk.c
---- linux-2.6.16.orig/kernel/printk.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/printk.c 2006-04-19 15:02:12.000000000 +0400
-@@ -30,7 +30,9 @@
- #include <linux/smp.h>
- #include <linux/security.h>
- #include <linux/bootmem.h>
-+#include <linux/vzratelimit.h>
- #include <linux/syscalls.h>
-+#include <linux/veprintk.h>
-
- #include <asm/uaccess.h>
-
-@@ -83,7 +85,7 @@ static int console_locked;
- * It is also used in interesting ways to provide interlocking in
- * release_console_sem().
- */
--static DEFINE_SPINLOCK(logbuf_lock);
-+DEFINE_SPINLOCK(logbuf_lock);
-
- #define LOG_BUF_MASK (log_buf_len-1)
- #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
-@@ -179,18 +181,18 @@ static int __init log_buf_len_setup(char
-
- spin_lock_irqsave(&logbuf_lock, flags);
- log_buf_len = size;
-- log_buf = new_log_buf;
-+ ve_log_buf = new_log_buf;
-
-- offset = start = min(con_start, log_start);
-+ offset = start = min(con_start, ve_log_start);
- dest_idx = 0;
-- while (start != log_end) {
-- log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
-+ while (start != ve_log_end) {
-+ ve_log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)];
- start++;
- dest_idx++;
- }
-- log_start -= offset;
-+ ve_log_start -= offset;
- con_start -= offset;
-- log_end -= offset;
-+ ve_log_end -= offset;
- spin_unlock_irqrestore(&logbuf_lock, flags);
-
- printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
-@@ -223,6 +225,10 @@ int do_syslog(int type, char __user *buf
- char c;
- int error = 0;
-
-+ if (!ve_is_super(get_exec_env()) &&
-+ (type == 6 || type == 7 || type == 8))
-+ goto out;
-+
- error = security_syslog(type);
- if (error)
- return error;
-@@ -243,15 +249,15 @@ int do_syslog(int type, char __user *buf
- error = -EFAULT;
- goto out;
- }
-- error = wait_event_interruptible(log_wait,
-- (log_start - log_end));
-+ error = wait_event_interruptible(ve_log_wait,
-+ (ve_log_start - ve_log_end));
- if (error)
- goto out;
- i = 0;
- spin_lock_irq(&logbuf_lock);
-- while (!error && (log_start != log_end) && i < len) {
-- c = LOG_BUF(log_start);
-- log_start++;
-+ while (!error && (ve_log_start != ve_log_end) && i < len) {
-+ c = VE_LOG_BUF(ve_log_start);
-+ ve_log_start++;
- spin_unlock_irq(&logbuf_lock);
- error = __put_user(c,buf);
- buf++;
-@@ -277,15 +283,17 @@ int do_syslog(int type, char __user *buf
- error = -EFAULT;
- goto out;
- }
-+ if (ve_log_buf == NULL)
-+ goto out;
- count = len;
-- if (count > log_buf_len)
-- count = log_buf_len;
-+ if (count > ve_log_buf_len)
-+ count = ve_log_buf_len;
- spin_lock_irq(&logbuf_lock);
-- if (count > logged_chars)
-- count = logged_chars;
-+ if (count > ve_logged_chars)
-+ count = ve_logged_chars;
- if (do_clear)
-- logged_chars = 0;
-- limit = log_end;
-+ ve_logged_chars = 0;
-+ limit = ve_log_end;
- /*
- * __put_user() could sleep, and while we sleep
- * printk() could overwrite the messages
-@@ -294,9 +302,9 @@ int do_syslog(int type, char __user *buf
- */
- for (i = 0; i < count && !error; i++) {
- j = limit-1-i;
-- if (j + log_buf_len < log_end)
-+ if (j + ve_log_buf_len < ve_log_end)
- break;
-- c = LOG_BUF(j);
-+ c = VE_LOG_BUF(j);
- spin_unlock_irq(&logbuf_lock);
- error = __put_user(c,&buf[count-1-i]);
- cond_resched();
-@@ -320,7 +328,7 @@ int do_syslog(int type, char __user *buf
- }
- break;
- case 5: /* Clear ring buffer */
-- logged_chars = 0;
-+ ve_logged_chars = 0;
- break;
- case 6: /* Disable logging to console */
- console_loglevel = minimum_console_loglevel;
-@@ -338,10 +346,10 @@ int do_syslog(int type, char __user *buf
- error = 0;
- break;
- case 9: /* Number of chars in the log buffer */
-- error = log_end - log_start;
-+ error = ve_log_end - ve_log_start;
- break;
- case 10: /* Size of the log buffer */
-- error = log_buf_len;
-+ error = ve_log_buf_len;
- break;
- default:
- error = -EINVAL;
-@@ -365,7 +373,7 @@ static void __call_console_drivers(unsig
-
- for (con = console_drivers; con; con = con->next) {
- if ((con->flags & CON_ENABLED) && con->write)
-- con->write(con, &LOG_BUF(start), end - start);
-+ con->write(con, &VE_LOG_BUF(start), end - start);
- }
- }
-
-@@ -377,11 +385,11 @@ static void _call_console_drivers(unsign
- {
- if (msg_log_level < console_loglevel &&
- console_drivers && start != end) {
-- if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) {
-+ if ((start & VE_LOG_BUF_MASK) > (end & VE_LOG_BUF_MASK)) {
- /* wrapped write */
-- __call_console_drivers(start & LOG_BUF_MASK,
-- log_buf_len);
-- __call_console_drivers(0, end & LOG_BUF_MASK);
-+ __call_console_drivers(start & VE_LOG_BUF_MASK,
-+ ve_log_buf_len);
-+ __call_console_drivers(0, end & VE_LOG_BUF_MASK);
- } else {
- __call_console_drivers(start, end);
- }
-@@ -405,16 +413,16 @@ static void call_console_drivers(unsigne
- start_print = start;
- while (cur_index != end) {
- if (msg_level < 0 && ((end - cur_index) > 2) &&
-- LOG_BUF(cur_index + 0) == '<' &&
-- LOG_BUF(cur_index + 1) >= '0' &&
-- LOG_BUF(cur_index + 1) <= '7' &&
-- LOG_BUF(cur_index + 2) == '>') {
-- msg_level = LOG_BUF(cur_index + 1) - '0';
-+ VE_LOG_BUF(cur_index + 0) == '<' &&
-+ VE_LOG_BUF(cur_index + 1) >= '0' &&
-+ VE_LOG_BUF(cur_index + 1) <= '7' &&
-+ VE_LOG_BUF(cur_index + 2) == '>') {
-+ msg_level = VE_LOG_BUF(cur_index + 1) - '0';
- cur_index += 3;
- start_print = cur_index;
- }
- while (cur_index != end) {
-- char c = LOG_BUF(cur_index);
-+ char c = VE_LOG_BUF(cur_index);
-
- cur_index++;
- if (c == '\n') {
-@@ -439,14 +447,14 @@ static void call_console_drivers(unsigne
-
- static void emit_log_char(char c)
- {
-- LOG_BUF(log_end) = c;
-- log_end++;
-- if (log_end - log_start > log_buf_len)
-- log_start = log_end - log_buf_len;
-- if (log_end - con_start > log_buf_len)
-- con_start = log_end - log_buf_len;
-- if (logged_chars < log_buf_len)
-- logged_chars++;
-+ VE_LOG_BUF(ve_log_end) = c;
-+ ve_log_end++;
-+ if (ve_log_end - ve_log_start > ve_log_buf_len)
-+ ve_log_start = ve_log_end - ve_log_buf_len;
-+ if (ve_is_super(get_exec_env()) && ve_log_end - con_start > ve_log_buf_len)
-+ con_start = ve_log_end - ve_log_buf_len;
-+ if (ve_logged_chars < ve_log_buf_len)
-+ ve_logged_chars++;
- }
-
- /*
-@@ -511,18 +519,68 @@ __attribute__((weak)) unsigned long long
- * printf(3)
- */
-
-+static inline int ve_log_init(void)
-+{
-+#ifdef CONFIG_VE
-+ if (ve_log_buf != NULL)
-+ return 0;
-+
-+ if (ve_is_super(get_exec_env())) {
-+ ve0._log_wait = &log_wait;
-+ ve0._log_start = &log_start;
-+ ve0._log_end = &log_end;
-+ ve0._logged_chars = &logged_chars;
-+ ve0.log_buf = log_buf;
-+ return 0;
-+ }
-+
-+ ve_log_buf = kmalloc(ve_log_buf_len, GFP_ATOMIC);
-+ if (!ve_log_buf)
-+ return -ENOMEM;
-+
-+ memset(ve_log_buf, 0, ve_log_buf_len);
-+#endif
-+ return 0;
-+}
-+
- asmlinkage int printk(const char *fmt, ...)
- {
- va_list args;
- int r;
-+ struct ve_struct *ve;
-
- va_start(args, fmt);
-+ ve = set_exec_env(get_ve0());
- r = vprintk(fmt, args);
-+ (void)set_exec_env(ve);
- va_end(args);
-
- return r;
- }
-
-+asmlinkage int ve_printk(int dst, const char *fmt, ...)
-+{
-+ va_list args;
-+ int printed_len;
-+
-+ printed_len = 0;
-+ if (ve_is_super(get_exec_env()) || (dst & VE0_LOG)) {
-+ struct ve_struct *env;
-+ va_start(args, fmt);
-+ env = set_exec_env(get_ve0());
-+ printed_len = vprintk(fmt, args);
-+ (void)set_exec_env(env);
-+ va_end(args);
-+ }
-+ if (!ve_is_super(get_exec_env()) && (dst & VE_LOG)) {
-+ va_start(args, fmt);
-+ printed_len = vprintk(fmt, args);
-+ va_end(args);
-+ }
-+ return printed_len;
-+}
-+EXPORT_SYMBOL(ve_printk);
-+
- /* cpu currently holding logbuf_lock */
- static volatile unsigned int printk_cpu = UINT_MAX;
-
-@@ -533,6 +591,7 @@ asmlinkage int vprintk(const char *fmt,
- char *p;
- static char printk_buf[1024];
- static int log_level_unknown = 1;
-+ int err, need_wake;
-
- preempt_disable();
- if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
-@@ -544,6 +603,12 @@ asmlinkage int vprintk(const char *fmt,
- spin_lock_irqsave(&logbuf_lock, flags);
- printk_cpu = smp_processor_id();
-
-+ err = ve_log_init();
-+ if (err) {
-+ spin_unlock_irqrestore(&logbuf_lock, flags);
-+ return err;
-+ }
-+
- /* Emit the output into the temporary buffer */
- printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
-
-@@ -615,7 +680,12 @@ asmlinkage int vprintk(const char *fmt,
- spin_unlock_irqrestore(&logbuf_lock, flags);
- goto out;
- }
-- if (!down_trylock(&console_sem)) {
-+ if (!ve_is_super(get_exec_env())) {
-+ need_wake = (ve_log_start != ve_log_end);
-+ spin_unlock_irqrestore(&logbuf_lock, flags);
-+ if (!oops_in_progress && need_wake)
-+ wake_up_interruptible(&ve_log_wait);
-+ } else if (!down_trylock(&console_sem)) {
- console_locked = 1;
- /*
- * We own the drivers. We can drop the spinlock and let
-@@ -732,6 +802,12 @@ int is_console_locked(void)
- }
- EXPORT_SYMBOL(is_console_locked);
-
-+void wake_up_klogd(void)
-+{
-+ if (!oops_in_progress && waitqueue_active(&ve_log_wait))
-+ wake_up_interruptible(&ve_log_wait);
-+}
-+
- /**
- * release_console_sem - unlock the console system
- *
-@@ -754,12 +830,12 @@ void release_console_sem(void)
-
- for ( ; ; ) {
- spin_lock_irqsave(&logbuf_lock, flags);
-- wake_klogd |= log_start - log_end;
-- if (con_start == log_end)
-+ wake_klogd |= ve_log_start - ve_log_end;
-+ if (con_start == ve_log_end)
- break; /* Nothing to print */
- _con_start = con_start;
-- _log_end = log_end;
-- con_start = log_end; /* Flush */
-+ _log_end = ve_log_end;
-+ con_start = ve_log_end; /* Flush */
- spin_unlock(&logbuf_lock);
- call_console_drivers(_con_start, _log_end);
- local_irq_restore(flags);
-@@ -768,8 +844,8 @@ void release_console_sem(void)
- console_may_schedule = 0;
- up(&console_sem);
- spin_unlock_irqrestore(&logbuf_lock, flags);
-- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-- wake_up_interruptible(&log_wait);
-+ if (wake_klogd)
-+ wake_up_klogd();
- }
- EXPORT_SYMBOL(release_console_sem);
-
-@@ -940,7 +1016,7 @@ void register_console(struct console *co
- * for us.
- */
- spin_lock_irqsave(&logbuf_lock, flags);
-- con_start = log_start;
-+ con_start = ve_log_start;
- spin_unlock_irqrestore(&logbuf_lock, flags);
- }
- release_console_sem();
-@@ -1049,3 +1125,33 @@ int printk_ratelimit(void)
- printk_ratelimit_burst);
- }
- EXPORT_SYMBOL(printk_ratelimit);
-+
-+/*
-+ * Rate limiting stuff.
-+ */
-+int vz_ratelimit(struct vz_rate_info *p)
-+{
-+ unsigned long cjif, djif;
-+ unsigned long flags;
-+ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
-+ long new_bucket;
-+
-+ spin_lock_irqsave(&ratelimit_lock, flags);
-+ cjif = jiffies;
-+ djif = cjif - p->last;
-+ if (djif < p->interval) {
-+ if (p->bucket >= p->burst) {
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ return 0;
-+ }
-+ p->bucket++;
-+ } else {
-+ new_bucket = p->bucket - (djif / (unsigned)p->interval);
-+ if (new_bucket < 0)
-+ new_bucket = 0;
-+ p->bucket = new_bucket + 1;
-+ }
-+ p->last = cjif;
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ return 1;
-+}
-diff -upr linux-2.6.16.orig/kernel/ptrace.c linux-2.6.16-026test009/kernel/ptrace.c
---- linux-2.6.16.orig/kernel/ptrace.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ptrace.c 2006-04-19 15:02:12.000000000 +0400
-@@ -57,10 +57,6 @@ void ptrace_untrace(task_t *child)
- signal_wake_up(child, 1);
- }
- }
-- if (child->signal->flags & SIGNAL_GROUP_EXIT) {
-- sigaddset(&child->pending.signal, SIGKILL);
-- signal_wake_up(child, 1);
-- }
- spin_unlock(&child->sighand->siglock);
- }
-
-@@ -82,7 +78,8 @@ void __ptrace_unlink(task_t *child)
- SET_LINKS(child);
- }
-
-- ptrace_untrace(child);
-+ if (child->state == TASK_TRACED)
-+ ptrace_untrace(child);
- }
-
- /*
-@@ -136,7 +133,10 @@ static int may_attach(struct task_struct
- smp_rmb();
- if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
- return -EPERM;
--
-+ if (!task->mm->vps_dumpable && !ve_is_super(get_exec_env()))
-+ return -EPERM;
-+ if (!ve_accessible(VE_TASK_INFO(task)->owner_env, get_exec_env()))
-+ return -EPERM;
- return security_ptrace(current, task);
- }
-
-@@ -263,6 +263,7 @@ int access_process_vm(struct task_struct
-
- return buf - old_buf;
- }
-+EXPORT_SYMBOL_GPL(access_process_vm);
-
- int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
- {
-@@ -459,7 +460,7 @@ struct task_struct *ptrace_get_task_stru
- return ERR_PTR(-EPERM);
-
- read_lock(&tasklist_lock);
-- child = find_task_by_pid(pid);
-+ child = find_task_by_pid_ve(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
-diff -upr linux-2.6.16.orig/kernel/sched.c linux-2.6.16-026test009/kernel/sched.c
---- linux-2.6.16.orig/kernel/sched.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/sched.c 2006-04-19 15:02:12.000000000 +0400
-@@ -220,6 +220,9 @@ struct runqueue {
- */
- unsigned long nr_uninterruptible;
-
-+ unsigned long nr_sleeping;
-+ unsigned long nr_stopped;
-+
- unsigned long expired_timestamp;
- unsigned long long timestamp_last_tick;
- task_t *curr, *idle;
-@@ -237,6 +240,7 @@ struct runqueue {
-
- task_t *migration_thread;
- struct list_head migration_queue;
-+ int cpu;
- #endif
-
- #ifdef CONFIG_SCHEDSTATS
-@@ -284,6 +288,11 @@ for (domain = rcu_dereference(cpu_rq(cpu
- # define finish_arch_switch(prev) do { } while (0)
- #endif
-
-+struct kernel_stat_glob kstat_glob;
-+spinlock_t kstat_glb_lock = SPIN_LOCK_UNLOCKED;
-+EXPORT_SYMBOL(kstat_glob);
-+EXPORT_SYMBOL(kstat_glb_lock);
-+
- #ifndef __ARCH_WANT_UNLOCKED_CTXSW
- static inline int task_running(runqueue_t *rq, task_t *p)
- {
-@@ -374,6 +383,186 @@ static inline void task_rq_unlock(runque
- spin_unlock_irqrestore(&rq->lock, *flags);
- }
-
-+#ifdef CONFIG_VE
-+#define ve_nr_iowait_inc(env, cpu) \
-+ do { \
-+ VE_CPU_STATS((env), (cpu))->nr_iowait++; \
-+ } while(0)
-+#define ve_nr_iowait_dec(env, cpu) \
-+ do { \
-+ VE_CPU_STATS((env), (cpu))->nr_iowait--; \
-+ } while(0)
-+#define ve_nr_unint_inc(env, cpu) \
-+ do { \
-+ VE_CPU_STATS((env), (cpu))->nr_unint++; \
-+ } while(0)
-+#define ve_nr_unint_dec(env, cpu) \
-+ do { \
-+ VE_CPU_STATS((env), (cpu))->nr_unint--; \
-+ } while(0)
-+
-+#define cycles_after(a, b) ((long long)(b) - (long long)(a) < 0)
-+
-+cycles_t ve_sched_get_idle_time(struct ve_struct *ve, int cpu)
-+{
-+ struct ve_cpu_stats *ve_stat;
-+ unsigned v;
-+ cycles_t strt, ret, cycles;
-+
-+ ve_stat = VE_CPU_STATS(ve, cpu);
-+ do {
-+ v = read_seqcount_begin(&ve_stat->stat_lock);
-+ ret = ve_stat->idle_time;
-+ strt = ve_stat->strt_idle_time;
-+ if (strt && nr_uninterruptible_ve(ve) == 0) {
-+ cycles = get_cycles();
-+ if (cycles_after(cycles, strt))
-+ ret += cycles - strt;
-+ }
-+ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
-+ return ret;
-+}
-+EXPORT_SYMBOL(ve_sched_get_idle_time);
-+
-+cycles_t ve_sched_get_iowait_time(struct ve_struct *ve, int cpu)
-+{
-+ struct ve_cpu_stats *ve_stat;
-+ unsigned v;
-+ cycles_t strt, ret, cycles;
-+
-+ ve_stat = VE_CPU_STATS(ve, cpu);
-+ do {
-+ v = read_seqcount_begin(&ve_stat->stat_lock);
-+ ret = ve_stat->iowait_time;
-+ strt = ve_stat->strt_idle_time;
-+ if (strt && nr_uninterruptible_ve(ve) > 0) {
-+ cycles = get_cycles();
-+ if (cycles_after(cycles, strt))
-+ ret += cycles - strt;
-+ }
-+ } while (read_seqcount_retry(&ve_stat->stat_lock, v));
-+ return ret;
-+}
-+
-+EXPORT_SYMBOL(ve_sched_get_iowait_time);
-+
-+static inline void ve_stop_idle(struct ve_struct *ve,
-+ unsigned int cpu, cycles_t cycles)
-+{
-+ struct ve_cpu_stats *ve_stat;
-+
-+ ve_stat = VE_CPU_STATS(ve, cpu);
-+
-+ write_seqcount_begin(&ve_stat->stat_lock);
-+ if (ve_stat->strt_idle_time) {
-+ if (cycles_after(cycles, ve_stat->strt_idle_time)) {
-+ if (nr_uninterruptible_ve(ve) == 0)
-+ ve_stat->idle_time += cycles -
-+ ve_stat->strt_idle_time;
-+ else
-+ ve_stat->iowait_time += cycles -
-+ ve_stat->strt_idle_time;
-+ }
-+ ve_stat->strt_idle_time = 0;
-+ }
-+ write_seqcount_end(&ve_stat->stat_lock);
-+}
-+
-+static inline void ve_strt_idle(struct ve_struct *ve,
-+ unsigned int cpu, cycles_t cycles)
-+{
-+ struct ve_cpu_stats *ve_stat;
-+
-+ ve_stat = VE_CPU_STATS(ve, cpu);
-+
-+ write_seqcount_begin(&ve_stat->stat_lock);
-+ ve_stat->strt_idle_time = cycles;
-+ write_seqcount_end(&ve_stat->stat_lock);
-+}
-+
-+#define ve_nr_running_inc(env, cpu, cycles) do { \
-+ if (++VE_CPU_STATS((env), (cpu))->nr_running == 1) \
-+ ve_stop_idle(env, cpu, cycles); \
-+ } while (0)
-+#define ve_nr_running_dec(env, cpu, cyclses) do { \
-+ if (--VE_CPU_STATS((env), (cpu))->nr_running == 0) \
-+ ve_strt_idle(env, cpu, cycles); \
-+ } while (0)
-+
-+void ve_sched_attach(struct ve_struct *envid)
-+{
-+ struct task_struct *tsk;
-+ unsigned int cpu;
-+ cycles_t cycles;
-+
-+ tsk = current;
-+ preempt_disable();
-+ cycles = get_cycles();
-+ cpu = task_cpu(tsk);
-+ ve_nr_running_dec(VE_TASK_INFO(tsk)->owner_env, cpu, cycles);
-+ ve_nr_running_inc(envid, cpu, cycles);
-+ preempt_enable();
-+}
-+EXPORT_SYMBOL(ve_sched_attach);
-+
-+static inline void write_wakeup_stamp(struct task_struct *p, cycles_t cyc)
-+{
-+ struct ve_task_info *ti;
-+
-+ ti = VE_TASK_INFO(p);
-+ write_seqcount_begin(&ti->wakeup_lock);
-+ ti->wakeup_stamp = cyc;
-+ write_seqcount_end(&ti->wakeup_lock);
-+}
-+
-+static inline void update_sched_lat(struct task_struct *t, cycles_t cycles)
-+{
-+ int cpu;
-+ cycles_t ve_wstamp;
-+
-+ /* safe due to runqueue lock */
-+ cpu = smp_processor_id();
-+ ve_wstamp = t->ve_task_info.wakeup_stamp;
-+
-+ if (ve_wstamp && cycles > ve_wstamp) {
-+ KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat,
-+ cpu, cycles - ve_wstamp);
-+ KSTAT_LAT_PCPU_ADD(&t->ve_task_info.exec_env->sched_lat_ve,
-+ cpu, cycles - ve_wstamp);
-+ }
-+}
-+
-+static inline void update_ve_task_info(task_t *prev, cycles_t cycles)
-+{
-+#ifdef CONFIG_FAIRSCHED
-+ if (prev != this_pcpu()->idle) {
-+#else
-+ if (prev != this_rq()->idle) {
-+#endif
-+ VE_CPU_STATS(prev->ve_task_info.owner_env,
-+ smp_processor_id())->used_time +=
-+ cycles - prev->ve_task_info.sched_time;
-+
-+ prev->ve_task_info.sched_time = cycles;
-+ }
-+}
-+
-+#else
-+#define ve_nr_running_inc(env, cpu, cycles) do { } while(0)
-+#define ve_nr_running_dec(env, cpu, cycles) do { } while(0)
-+#define ve_nr_iowait_inc(env, cpu) do { } while(0)
-+#define ve_nr_iowait_dec(env, cpu) do { } while(0)
-+#define ve_nr_unint_inc(env, cpu) do { } while(0)
-+#define ve_nr_unint_dec(env, cpu) do { } while(0)
-+#define update_ve_task_info(prev, cycles) do { } while (0)
-+#endif
-+
-+unsigned long nr_zombie = 0; /* protected by tasklist_lock */
-+EXPORT_SYMBOL(nr_zombie);
-+
-+atomic_t nr_dead = ATOMIC_INIT(0);
-+EXPORT_SYMBOL(nr_dead);
-+
- #ifdef CONFIG_SCHEDSTATS
- /*
- * bump this up when changing the output format or the meaning of an existing
-@@ -666,8 +855,16 @@ static int effective_prio(task_t *p)
- */
- static inline void __activate_task(task_t *p, runqueue_t *rq)
- {
-+ cycles_t cycles;
-+
-+#ifdef CONFIG_VE
-+ cycles = get_cycles();
-+ write_wakeup_stamp(p, cycles);
-+ p->ve_task_info.sleep_time += cycles;
-+#endif
- enqueue_task(p, rq->active);
- rq->nr_running++;
-+ ve_nr_running_inc(VE_TASK_INFO(p)->owner_env, task_cpu(p), cycles);
- }
-
- /*
-@@ -800,6 +997,25 @@ static void activate_task(task_t *p, run
- */
- static void deactivate_task(struct task_struct *p, runqueue_t *rq)
- {
-+ cycles_t cycles;
-+#ifdef CONFIG_VE
-+ unsigned int cpu;
-+ struct ve_struct *ve;
-+
-+ cycles = get_cycles();
-+ cpu = task_cpu(p);
-+ ve = p->ve_task_info.owner_env;
-+
-+ p->ve_task_info.sleep_time -= cycles;
-+#endif
-+ if (p->state == TASK_UNINTERRUPTIBLE)
-+ ve_nr_unint_inc(ve, cpu);
-+ if (p->state == TASK_INTERRUPTIBLE)
-+ rq->nr_sleeping++;
-+ if (p->state == TASK_STOPPED)
-+ rq->nr_stopped++;
-+
-+ ve_nr_running_dec(VE_TASK_INFO(p)->owner_env, task_cpu(p), cycles);
- rq->nr_running--;
- dequeue_task(p, p->array);
- p->array = NULL;
-@@ -913,6 +1129,7 @@ repeat:
- }
- task_rq_unlock(rq, &flags);
- }
-+EXPORT_SYMBOL_GPL(wait_task_inactive);
-
- /***
- * kick_process - kick a running thread to enter/exit the kernel
-@@ -1269,7 +1486,13 @@ out_set_cpu:
-
- out_activate:
- #endif /* CONFIG_SMP */
-- if (old_state == TASK_UNINTERRUPTIBLE) {
-+ if (old_state == TASK_INTERRUPTIBLE)
-+ rq->nr_sleeping--;
-+ else if (old_state == TASK_STOPPED)
-+ rq->nr_stopped--;
-+ else if (old_state == TASK_UNINTERRUPTIBLE) {
-+ ve_nr_unint_dec(p->ve_task_info.owner_env,
-+ smp_processor_id());
- rq->nr_uninterruptible--;
- /*
- * Tasks on involuntary sleep don't earn
-@@ -1369,6 +1592,10 @@ void fastcall sched_fork(task_t *p, int
- p->first_time_slice = 1;
- current->time_slice >>= 1;
- p->timestamp = sched_clock();
-+#ifdef CONFIG_VE
-+ /*cosmetic: sleep till wakeup below*/
-+ p->ve_task_info.sleep_time -= get_cycles();
-+#endif
- if (unlikely(!current->time_slice)) {
- /*
- * This case is rare, it happens when the parent has only
-@@ -1426,6 +1653,8 @@ void fastcall wake_up_new_task(task_t *p
- p->array = current->array;
- p->array->nr_active++;
- rq->nr_running++;
-+ ve_nr_running_inc(VE_TASK_INFO(p)->owner_env,
-+ task_cpu(p), get_cycles());
- }
- set_need_resched();
- } else
-@@ -1569,8 +1798,9 @@ asmlinkage void schedule_tail(task_t *pr
- preempt_enable();
- #endif
- if (current->set_child_tid)
-- put_user(current->pid, current->set_child_tid);
-+ put_user(virt_pid(current), current->set_child_tid);
- }
-+EXPORT_SYMBOL_GPL(schedule_tail);
-
- /*
- * context_switch - switch to the new MM and the new
-@@ -1617,6 +1847,7 @@ unsigned long nr_running(void)
-
- return sum;
- }
-+EXPORT_SYMBOL(nr_running);
-
- unsigned long nr_uninterruptible(void)
- {
-@@ -1635,6 +1866,8 @@ unsigned long nr_uninterruptible(void)
- return sum;
- }
-
-+EXPORT_SYMBOL(nr_uninterruptible);
-+
- unsigned long long nr_context_switches(void)
- {
- unsigned long long i, sum = 0;
-@@ -1645,6 +1878,8 @@ unsigned long long nr_context_switches(v
- return sum;
- }
-
-+EXPORT_SYMBOL(nr_context_switches);
-+
- unsigned long nr_iowait(void)
- {
- unsigned long i, sum = 0;
-@@ -1655,11 +1890,87 @@ unsigned long nr_iowait(void)
- return sum;
- }
-
-+EXPORT_SYMBOL(nr_iowait);
-+
-+unsigned long nr_stopped(void)
-+{
-+ unsigned long i, sum = 0;
-+
-+ for_each_cpu(i)
-+ sum += cpu_rq(i)->nr_stopped;
-+
-+ return sum;
-+}
-+
-+EXPORT_SYMBOL(nr_stopped);
-+
-+unsigned long nr_sleeping(void)
-+{
-+ unsigned long i, sum = 0;
-+
-+ for_each_cpu(i)
-+ sum += cpu_rq(i)->nr_sleeping;
-+
-+ return sum;
-+}
-+
-+EXPORT_SYMBOL(nr_sleeping);
-+
-+#ifdef CONFIG_VE
-+unsigned long nr_running_ve(struct ve_struct *ve)
-+{
-+ int i;
-+ long sum;
-+ cpumask_t ve_cpus;
-+
-+ sum = 0;
-+ ve_cpu_online_map(ve, &ve_cpus);
-+ for_each_cpu_mask(i, ve_cpus)
-+ sum += VE_CPU_STATS(ve, i)->nr_running;
-+ return (unsigned long)(sum < 0 ? 0 : sum);
-+}
-+
-+EXPORT_SYMBOL(nr_running_ve);
-+
-+unsigned long nr_uninterruptible_ve(struct ve_struct *ve)
-+{
-+ int i;
-+ long sum;
-+ cpumask_t ve_cpus;
-+
-+ sum = 0;
-+ ve_cpu_online_map(ve, &ve_cpus);
-+ for_each_cpu_mask(i, ve_cpus)
-+ sum += VE_CPU_STATS(ve, i)->nr_unint;
-+ return (unsigned long)(sum < 0 ? 0 : sum);
-+}
-+
-+EXPORT_SYMBOL(nr_uninterruptible_ve);
-+
-+unsigned long nr_iowait_ve(struct ve_struct *ve)
-+{
-+ int i;
-+ long sum;
-+ cpumask_t ve_cpus;
-+
-+ sum = 0;
-+ ve_cpu_online_map(ve, &ve_cpus);
-+ for_each_cpu_mask(i, ve_cpus)
-+ sum += VE_CPU_STATS(ve, i)->nr_iowait;
-+ return (unsigned long)(sum < 0 ? 0 : sum);
-+}
-+
-+EXPORT_SYMBOL(nr_iowait_ve);
-+#endif
-+
- #ifdef CONFIG_SMP
-
- /*
- * double_rq_lock - safely lock two runqueues
- *
-+ * We must take them in cpu order to match code in
-+ * dependent_sleeper and wake_dependent_sleeper.
-+ *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-@@ -1671,7 +1982,7 @@ static void double_rq_lock(runqueue_t *r
- spin_lock(&rq1->lock);
- __acquire(rq2->lock); /* Fake it out ;) */
- } else {
-- if (rq1 < rq2) {
-+ if (rq1->cpu < rq2->cpu) {
- spin_lock(&rq1->lock);
- spin_lock(&rq2->lock);
- } else {
-@@ -1707,7 +2018,7 @@ static void double_lock_balance(runqueue
- __acquires(this_rq->lock)
- {
- if (unlikely(!spin_trylock(&busiest->lock))) {
-- if (busiest < this_rq) {
-+ if (busiest->cpu < this_rq->cpu) {
- spin_unlock(&this_rq->lock);
- spin_lock(&busiest->lock);
- spin_lock(&this_rq->lock);
-@@ -1769,10 +2080,18 @@ static
- void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
- runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
- {
-+ struct ve_struct *ve;
-+ cycles_t cycles;
-+
-+ cycles = get_cycles();
-+ ve = VE_TASK_INFO(p)->owner_env;
-+
- dequeue_task(p, src_array);
- src_rq->nr_running--;
-+ ve_nr_running_dec(ve, task_cpu(p), cycles);
- set_task_cpu(p, this_cpu);
- this_rq->nr_running++;
-+ ve_nr_running_inc(ve, task_cpu(p), cycles);
- enqueue_task(p, this_array);
- p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
- + this_rq->timestamp_last_tick;
-@@ -2476,6 +2795,15 @@ unsigned long long current_sched_time(co
- STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
- ((rq)->curr->static_prio > (rq)->best_expired_prio))
-
-+#ifdef CONFIG_VE
-+#define update_ve_cpu_time(p, time, tick) do { \
-+ VE_CPU_STATS((p)->ve_task_info.owner_env, \
-+ task_cpu(p))->time += tick; \
-+ } while (0)
-+#else
-+#define update_ve_cpu_time(p, time, tick) do { } while (0)
-+#endif
-+
- /*
- * Account user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
-@@ -2491,10 +2819,13 @@ void account_user_time(struct task_struc
-
- /* Add user time to cpustat. */
- tmp = cputime_to_cputime64(cputime);
-- if (TASK_NICE(p) > 0)
-+ if (TASK_NICE(p) > 0) {
- cpustat->nice = cputime64_add(cpustat->nice, tmp);
-- else
-+ update_ve_cpu_time(p, nice, tmp);
-+ } else {
- cpustat->user = cputime64_add(cpustat->user, tmp);
-+ update_ve_cpu_time(p, user, tmp);
-+ }
- }
-
- /*
-@@ -2511,9 +2842,11 @@ void account_system_time(struct task_str
- cputime64_t tmp;
-
- p->stime = cputime_add(p->stime, cputime);
-+ tmp = cputime_to_cputime64(cputime);
-+
-+ update_ve_cpu_time(p, system, tmp);
-
- /* Add system time to cpustat. */
-- tmp = cputime_to_cputime64(cputime);
- if (hardirq_count() - hardirq_offset)
- cpustat->irq = cputime64_add(cpustat->irq, tmp);
- else if (softirq_count())
-@@ -3015,11 +3348,30 @@ switch_tasks:
-
- sched_info_switch(prev, next);
- if (likely(prev != next)) {
-+ cycles_t cycles;
-+
-+ cycles = get_cycles();
- next->timestamp = now;
- rq->nr_switches++;
- rq->curr = next;
- ++*switch_count;
-
-+#ifdef CONFIG_VE
-+ prev->ve_task_info.sleep_stamp = cycles;
-+ if (prev->state == TASK_RUNNING && prev != this_rq()->idle)
-+ write_wakeup_stamp(prev, cycles);
-+ update_sched_lat(next, cycles);
-+
-+ /* because next & prev are protected with
-+ * runqueue lock we may not worry about
-+ * wakeup_stamp and sched_time protection
-+ * (same thing in 'else' branch below)
-+ */
-+ update_ve_task_info(prev, cycles);
-+ next->ve_task_info.sched_time = cycles;
-+ write_wakeup_stamp(next, 0);
-+#endif
-+
- prepare_task_switch(rq, next);
- prev = context_switch(rq, prev, next);
- barrier();
-@@ -3029,8 +3381,10 @@ switch_tasks:
- * frame will be invalid.
- */
- finish_task_switch(this_rq(), prev);
-- } else
-+ } else {
-+ update_ve_task_info(prev, get_cycles());
- spin_unlock_irq(&rq->lock);
-+ }
-
- prev = current;
- if (unlikely(reacquire_kernel_lock(prev) < 0))
-@@ -3593,7 +3947,7 @@ task_t *idle_task(int cpu)
- */
- static inline task_t *find_process_by_pid(pid_t pid)
- {
-- return pid ? find_task_by_pid(pid) : current;
-+ return pid ? find_task_by_pid_ve(pid) : current;
- }
-
- /* Actually do priority change: must hold rq lock. */
-@@ -3653,7 +4007,7 @@ recheck:
- /*
- * Allow unprivileged RT tasks to decrease priority:
- */
-- if (!capable(CAP_SYS_NICE)) {
-+ if (!capable(CAP_SYS_ADMIN)) {
- /*
- * can't change policy, except between SCHED_NORMAL
- * and SCHED_BATCH:
-@@ -4112,8 +4466,15 @@ void __sched io_schedule(void)
- {
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
-
-+#ifdef CONFIG_VE
-+ struct ve_struct *ve;
-+ ve = current->ve_task_info.owner_env;
-+#endif
-+
- atomic_inc(&rq->nr_iowait);
-+ ve_nr_iowait_inc(ve, smp_processor_id());
- schedule();
-+ ve_nr_iowait_dec(ve, smp_processor_id());
- atomic_dec(&rq->nr_iowait);
- }
-
-@@ -4124,8 +4485,15 @@ long __sched io_schedule_timeout(long ti
- struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
- long ret;
-
-+#ifdef CONFIG_VE
-+ struct ve_struct *ve;
-+ ve = current->ve_task_info.owner_env;
-+#endif
-+
- atomic_inc(&rq->nr_iowait);
-+ ve_nr_iowait_inc(ve, smp_processor_id());
- ret = schedule_timeout(timeout);
-+ ve_nr_iowait_dec(ve, smp_processor_id());
- atomic_dec(&rq->nr_iowait);
- return ret;
- }
-@@ -4248,15 +4616,9 @@ static void show_task(task_t *p)
- else
- printk("?");
- #if (BITS_PER_LONG == 32)
-- if (state == TASK_RUNNING)
-- printk(" running ");
-- else
-- printk(" %08lX ", thread_saved_pc(p));
-+ printk(" %08lX ", (unsigned long)p);
- #else
-- if (state == TASK_RUNNING)
-- printk(" running task ");
-- else
-- printk(" %016lx ", thread_saved_pc(p));
-+ printk(" %016lx ", (unsigned long)p);
- #endif
- #ifdef CONFIG_DEBUG_STACK_USAGE
- {
-@@ -4295,21 +4657,21 @@ void show_state(void)
- #if (BITS_PER_LONG == 32)
- printk("\n"
- " sibling\n");
-- printk(" task PC pid father child younger older\n");
-+ printk(" task taskaddr pid father child younger older\n");
- #else
- printk("\n"
- " sibling\n");
-- printk(" task PC pid father child younger older\n");
-+ printk(" task taskaddr pid father child younger older\n");
- #endif
- read_lock(&tasklist_lock);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- /*
- * reset the NMI-timeout, listing all files on a slow
- * console might take alot of time:
- */
- touch_nmi_watchdog();
- show_task(p);
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
-
- read_unlock(&tasklist_lock);
- mutex_debug_show_all_locks();
-@@ -4590,13 +4952,13 @@ static void migrate_live_tasks(int src_c
-
- write_lock_irq(&tasklist_lock);
-
-- do_each_thread(t, tsk) {
-+ do_each_thread_all(t, tsk) {
- if (tsk == current)
- continue;
-
- if (task_cpu(tsk) == src_cpu)
- move_task_off_dead_cpu(src_cpu, tsk);
-- } while_each_thread(t, tsk);
-+ } while_each_thread_all(t, tsk);
-
- write_unlock_irq(&tasklist_lock);
- }
-@@ -6035,6 +6397,7 @@ void __init sched_init(void)
- rq->push_cpu = 0;
- rq->migration_thread = NULL;
- INIT_LIST_HEAD(&rq->migration_queue);
-+ rq->cpu = i;
- #endif
- atomic_set(&rq->nr_iowait, 0);
-
-@@ -6095,7 +6458,7 @@ void normalize_rt_tasks(void)
- runqueue_t *rq;
-
- read_lock_irq(&tasklist_lock);
-- for_each_process (p) {
-+ for_each_process_all (p) {
- if (!rt_task(p))
- continue;
-
-diff -upr linux-2.6.16.orig/kernel/signal.c linux-2.6.16-026test009/kernel/signal.c
---- linux-2.6.16.orig/kernel/signal.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/signal.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,17 +25,20 @@
- #include <linux/posix-timers.h>
- #include <linux/signal.h>
- #include <linux/audit.h>
-+#include <linux/kmem_cache.h>
- #include <linux/capability.h>
- #include <asm/param.h>
- #include <asm/uaccess.h>
- #include <asm/unistd.h>
- #include <asm/siginfo.h>
-+#include <ub/ub_misc.h>
-
- /*
- * SLAB caches for signal bits.
- */
-
--static kmem_cache_t *sigqueue_cachep;
-+kmem_cache_t *sigqueue_cachep;
-+EXPORT_SYMBOL_GPL(sigqueue_cachep);
-
- /*
- * In POSIX a signal is sent either to a specific thread (Linux task)
-@@ -221,6 +224,7 @@ fastcall void recalc_sigpending_tsk(stru
- else
- clear_tsk_thread_flag(t, TIF_SIGPENDING);
- }
-+EXPORT_SYMBOL_GPL(recalc_sigpending_tsk);
-
- void recalc_sigpending(void)
- {
-@@ -271,8 +275,13 @@ static struct sigqueue *__sigqueue_alloc
- atomic_inc(&t->user->sigpending);
- if (override_rlimit ||
- atomic_read(&t->user->sigpending) <=
-- t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
-+ t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
- q = kmem_cache_alloc(sigqueue_cachep, flags);
-+ if (q && ub_siginfo_charge(q, get_task_ub(t))) {
-+ kmem_cache_free(sigqueue_cachep, q);
-+ q = NULL;
-+ }
-+ }
- if (unlikely(q == NULL)) {
- atomic_dec(&t->user->sigpending);
- } else {
-@@ -289,6 +298,7 @@ static void __sigqueue_free(struct sigqu
- return;
- atomic_dec(&q->user->sigpending);
- free_uid(q->user);
-+ ub_siginfo_uncharge(q);
- kmem_cache_free(sigqueue_cachep, q);
- }
-
-@@ -524,7 +534,16 @@ static int __dequeue_signal(struct sigpe
- {
- int sig = 0;
-
-- sig = next_signal(pending, mask);
-+ /* SIGKILL must have priority, otherwise it is quite easy
-+ * to create an unkillable process, sending sig < SIGKILL
-+ * to self */
-+ if (unlikely(sigismember(&pending->signal, SIGKILL))) {
-+ if (!sigismember(mask, SIGKILL))
-+ sig = SIGKILL;
-+ }
-+
-+ if (likely(!sig))
-+ sig = next_signal(pending, mask);
- if (sig) {
- if (current->notifier) {
- if (sigismember(current->notifier_mask, sig)) {
-@@ -618,6 +637,7 @@ void signal_wake_up(struct task_struct *
- if (!wake_up_state(t, mask))
- kick_process(t);
- }
-+EXPORT_SYMBOL_GPL(signal_wake_up);
-
- /*
- * Remove signals in mask from the pending set and queue.
-@@ -838,7 +858,7 @@ static int send_signal(int sig, struct s
- q->info.si_signo = sig;
- q->info.si_errno = 0;
- q->info.si_code = SI_USER;
-- q->info.si_pid = current->pid;
-+ q->info.si_pid = virt_pid(current);
- q->info.si_uid = current->uid;
- break;
- case (unsigned long) SEND_SIG_PRIV:
-@@ -975,7 +995,6 @@ __group_complete_signal(int sig, struct
- if (t == NULL)
- /* restart balancing at this thread */
- t = p->signal->curr_target = p;
-- BUG_ON(t->tgid != p->tgid);
-
- while (!wants_signal(sig, t)) {
- t = next_thread(t);
-@@ -1159,13 +1178,18 @@ int __kill_pg_info(int sig, struct sigin
- if (pgrp <= 0)
- return -EINVAL;
-
-+ /* Use __vpid_to_pid(). This function is used under write_lock
-+ * tasklist_lock. */
-+ if (is_virtual_pid(pgrp))
-+ pgrp = __vpid_to_pid(pgrp);
-+
- success = 0;
- retval = -ESRCH;
-- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
-+ do_each_task_pid_ve(pgrp, PIDTYPE_PGID, p) {
- int err = group_send_sig_info(sig, info, p);
- success |= !err;
- retval = err;
-- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(pgrp, PIDTYPE_PGID, p);
- return success ? 0 : retval;
- }
-
-@@ -1193,7 +1217,7 @@ kill_proc_info(int sig, struct siginfo *
- read_lock(&tasklist_lock);
- acquired_tasklist_lock = 1;
- }
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- error = -ESRCH;
- if (p)
- error = group_send_sig_info(sig, info, p);
-@@ -1214,7 +1238,7 @@ int kill_proc_info_as_uid(int sig, struc
- return ret;
-
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (!p) {
- ret = -ESRCH;
- goto out_unlock;
-@@ -1253,8 +1277,8 @@ static int kill_something_info(int sig,
- struct task_struct * p;
-
- read_lock(&tasklist_lock);
-- for_each_process(p) {
-- if (p->pid > 1 && p->tgid != current->tgid) {
-+ for_each_process_ve(p) {
-+ if (virt_pid(p) > 1 && p->tgid != current->tgid) {
- int err = group_send_sig_info(sig, info, p);
- ++count;
- if (err != -EPERM)
-@@ -1562,9 +1586,17 @@ void do_notify_parent(struct task_struct
- BUG_ON(!tsk->ptrace &&
- (tsk->group_leader != tsk || !thread_group_empty(tsk)));
-
-+#ifdef CONFIG_VE
-+ /* Allow to send only SIGCHLD from VE */
-+ if (sig != SIGCHLD &&
-+ tsk->ve_task_info.owner_env !=
-+ tsk->parent->ve_task_info.owner_env)
-+ sig = SIGCHLD;
-+#endif
-+
- info.si_signo = sig;
- info.si_errno = 0;
-- info.si_pid = tsk->pid;
-+ info.si_pid = get_task_pid_ve(tsk, tsk->parent->ve_task_info.owner_env);
- info.si_uid = tsk->uid;
-
- /* FIXME: find out whether or not this is supposed to be c*time. */
-@@ -1629,7 +1661,7 @@ static void do_notify_parent_cldstop(str
-
- info.si_signo = SIGCHLD;
- info.si_errno = 0;
-- info.si_pid = tsk->pid;
-+ info.si_pid = get_task_pid_ve(tsk, VE_TASK_INFO(parent)->owner_env);
- info.si_uid = tsk->uid;
-
- /* FIXME: find out whether or not this is supposed to be c*time. */
-@@ -1763,7 +1795,9 @@ finish_stop(int stop_count)
- read_unlock(&tasklist_lock);
-
- out:
-+ set_stop_state(current);
- schedule();
-+ clear_stop_state(current);
- /*
- * Now we don't run again until continued.
- */
-@@ -1940,11 +1974,13 @@ relock:
- ptrace_signal_deliver(regs, cookie);
-
- /* Let the debugger run. */
-+ set_pn_state(current, PN_STOP_SIGNAL);
- ptrace_stop(signr, signr, info);
-+ clear_pn_state(current);
-
-- /* We're back. Did the debugger cancel the sig or group_exit? */
-+ /* We're back. Did the debugger cancel the sig? */
- signr = current->exit_code;
-- if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT)
-+ if (signr == 0)
- continue;
-
- current->exit_code = 0;
-@@ -1957,7 +1993,7 @@ relock:
- info->si_signo = signr;
- info->si_errno = 0;
- info->si_code = SI_USER;
-- info->si_pid = current->parent->pid;
-+ info->si_pid = virt_pid(current->parent);
- info->si_uid = current->parent->uid;
- }
-
-@@ -1988,8 +2024,14 @@ relock:
- continue;
-
- /* Init gets no signals it doesn't want. */
-- if (current->pid == 1)
-+ if (virt_pid(current) == 1) {
-+ /* Allow SIGKILL for non-root VE */
-+#ifdef CONFIG_VE
-+ if (current->pid == 1 ||
-+ signr != SIGKILL)
-+#endif
- continue;
-+ }
-
- if (sig_kernel_stop(signr)) {
- /*
-@@ -2307,7 +2349,6 @@ sys_rt_sigtimedwait(const sigset_t __use
-
- timeout = schedule_timeout_interruptible(timeout);
-
-- try_to_freeze();
- spin_lock_irq(&current->sighand->siglock);
- sig = dequeue_signal(current, &these, &info);
- current->blocked = current->real_blocked;
-@@ -2340,7 +2381,7 @@ sys_kill(int pid, int sig)
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_USER;
-- info.si_pid = current->tgid;
-+ info.si_pid = virt_tgid(current);
- info.si_uid = current->uid;
-
- return kill_something_info(sig, &info, pid);
-@@ -2356,12 +2397,12 @@ static int do_tkill(int tgid, int pid, i
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_TKILL;
-- info.si_pid = current->tgid;
-+ info.si_pid = virt_tgid(current);
- info.si_uid = current->uid;
-
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-- if (p && (tgid <= 0 || p->tgid == tgid)) {
-+ p = find_task_by_pid_ve(pid);
-+ if (p && (tgid <= 0 || virt_tgid(p) == tgid)) {
- error = check_kill_permission(sig, &info, p);
- /*
- * The null signal is a permissions and process existence
-diff -upr linux-2.6.16.orig/kernel/softirq.c linux-2.6.16-026test009/kernel/softirq.c
---- linux-2.6.16.orig/kernel/softirq.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/softirq.c 2006-04-19 15:02:12.000000000 +0400
-@@ -17,6 +17,8 @@
- #include <linux/kthread.h>
- #include <linux/rcupdate.h>
-
-+#include <ub/beancounter.h>
-+
- #include <asm/irq.h>
- /*
- - No shared variables, all the data are CPU local.
-@@ -73,10 +75,14 @@ static inline void wakeup_softirqd(void)
-
- asmlinkage void __do_softirq(void)
- {
-+ struct user_beancounter *ub;
- struct softirq_action *h;
- __u32 pending;
- int max_restart = MAX_SOFTIRQ_RESTART;
- int cpu;
-+ struct ve_struct *envid;
-+
-+ envid = set_exec_env(get_ve0());
-
- pending = local_softirq_pending();
-
-@@ -90,6 +96,7 @@ restart:
-
- h = softirq_vec;
-
-+ ub = set_exec_ub(get_ub0());
- do {
- if (pending & 1) {
- h->action(h);
-@@ -98,6 +105,7 @@ restart:
- h++;
- pending >>= 1;
- } while (pending);
-+ (void)set_exec_ub(ub);
-
- local_irq_disable();
-
-@@ -108,6 +116,7 @@ restart:
- if (pending)
- wakeup_softirqd();
-
-+ (void)set_exec_env(envid);
- __local_bh_enable();
- }
-
-diff -upr linux-2.6.16.orig/kernel/sys.c linux-2.6.16-026test009/kernel/sys.c
---- linux-2.6.16.orig/kernel/sys.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/sys.c 2006-04-19 15:02:12.000000000 +0400
-@@ -11,6 +11,7 @@
- #include <linux/mman.h>
- #include <linux/smp_lock.h>
- #include <linux/notifier.h>
-+#include <linux/virtinfo.h>
- #include <linux/reboot.h>
- #include <linux/prctl.h>
- #include <linux/init.h>
-@@ -236,6 +237,94 @@ int capable(int cap)
- EXPORT_SYMBOL(capable);
- #endif
-
-+static DECLARE_MUTEX(virtinfo_sem);
-+static struct vnotifier_block *virtinfo_chain[VIRT_TYPES];
-+
-+void virtinfo_notifier_register(int type, struct vnotifier_block *nb)
-+{
-+ struct vnotifier_block **p;
-+
-+ down(&virtinfo_sem);
-+ for (p = &virtinfo_chain[type];
-+ *p != NULL && nb->priority < (*p)->priority;
-+ p = &(*p)->next);
-+ nb->next = *p;
-+ smp_wmb();
-+ *p = nb;
-+ up(&virtinfo_sem);
-+}
-+
-+EXPORT_SYMBOL(virtinfo_notifier_register);
-+
-+struct virtinfo_cnt_struct {
-+ volatile unsigned long exit[NR_CPUS];
-+ volatile unsigned long entry;
-+};
-+static DEFINE_PER_CPU(struct virtinfo_cnt_struct, virtcnt);
-+
-+void virtinfo_notifier_unregister(int type, struct vnotifier_block *nb)
-+{
-+ struct vnotifier_block **p;
-+ int entry_cpu, exit_cpu;
-+ unsigned long cnt, ent;
-+
-+ down(&virtinfo_sem);
-+ for (p = &virtinfo_chain[type]; *p != nb; p = &(*p)->next);
-+ *p = nb->next;
-+ smp_mb();
-+
-+ for_each_cpu_mask(entry_cpu, cpu_possible_map) {
-+ while (1) {
-+ cnt = 0;
-+ for_each_cpu_mask(exit_cpu, cpu_possible_map)
-+ cnt +=
-+ per_cpu(virtcnt, entry_cpu).exit[exit_cpu];
-+ smp_rmb();
-+ ent = per_cpu(virtcnt, entry_cpu).entry;
-+ if (cnt == ent)
-+ break;
-+ __set_current_state(TASK_UNINTERRUPTIBLE);
-+ schedule_timeout(HZ / 100);
-+ }
-+ }
-+ up(&virtinfo_sem);
-+}
-+
-+EXPORT_SYMBOL(virtinfo_notifier_unregister);
-+
-+int virtinfo_notifier_call(int type, unsigned long n, void *data)
-+{
-+ int ret;
-+ int entry_cpu, exit_cpu;
-+ struct vnotifier_block *nb;
-+
-+ entry_cpu = get_cpu();
-+ per_cpu(virtcnt, entry_cpu).entry++;
-+ smp_wmb();
-+ put_cpu();
-+
-+ nb = virtinfo_chain[type];
-+ ret = NOTIFY_DONE;
-+ while (nb)
-+ {
-+ ret = nb->notifier_call(nb, n, data, ret);
-+ if(ret & NOTIFY_STOP_MASK) {
-+ ret &= ~NOTIFY_STOP_MASK;
-+ break;
-+ }
-+ nb = nb->next;
-+ }
-+
-+ exit_cpu = get_cpu();
-+ smp_wmb();
-+ per_cpu(virtcnt, entry_cpu).exit[exit_cpu]++;
-+ put_cpu();
-+
-+ return ret;
-+}
-+
-+EXPORT_SYMBOL(virtinfo_notifier_call);
-+
- static int set_one_prio(struct task_struct *p, int niceval, int error)
- {
- int no_nice;
-@@ -281,17 +370,19 @@ asmlinkage long sys_setpriority(int whic
- switch (which) {
- case PRIO_PROCESS:
- if (!who)
-- who = current->pid;
-- p = find_task_by_pid(who);
-+ who = virt_pid(current);
-+ p = find_task_by_pid_ve(who);
- if (p)
- error = set_one_prio(p, niceval, error);
- break;
- case PRIO_PGRP:
- if (!who)
- who = process_group(current);
-- do_each_task_pid(who, PIDTYPE_PGID, p) {
-+ else
-+ who = vpid_to_pid(who);
-+ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
- error = set_one_prio(p, niceval, error);
-- } while_each_task_pid(who, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
- break;
- case PRIO_USER:
- user = current->user;
-@@ -301,10 +392,10 @@ asmlinkage long sys_setpriority(int whic
- if ((who != current->uid) && !(user = find_user(who)))
- goto out_unlock; /* No processes for this user */
-
-- do_each_thread(g, p)
-+ do_each_thread_ve(g, p)
- if (p->uid == who)
- error = set_one_prio(p, niceval, error);
-- while_each_thread(g, p);
-+ while_each_thread_ve(g, p);
- if (who != current->uid)
- free_uid(user); /* For find_user() */
- break;
-@@ -334,8 +425,8 @@ asmlinkage long sys_getpriority(int whic
- switch (which) {
- case PRIO_PROCESS:
- if (!who)
-- who = current->pid;
-- p = find_task_by_pid(who);
-+ who = virt_pid(current);
-+ p = find_task_by_pid_ve(who);
- if (p) {
- niceval = 20 - task_nice(p);
- if (niceval > retval)
-@@ -345,11 +436,13 @@ asmlinkage long sys_getpriority(int whic
- case PRIO_PGRP:
- if (!who)
- who = process_group(current);
-- do_each_task_pid(who, PIDTYPE_PGID, p) {
-+ else
-+ who = vpid_to_pid(who);
-+ do_each_task_pid_ve(who, PIDTYPE_PGID, p) {
- niceval = 20 - task_nice(p);
- if (niceval > retval)
- retval = niceval;
-- } while_each_task_pid(who, PIDTYPE_PGID, p);
-+ } while_each_task_pid_ve(who, PIDTYPE_PGID, p);
- break;
- case PRIO_USER:
- user = current->user;
-@@ -359,13 +452,13 @@ asmlinkage long sys_getpriority(int whic
- if ((who != current->uid) && !(user = find_user(who)))
- goto out_unlock; /* No processes for this user */
-
-- do_each_thread(g, p)
-+ do_each_thread_ve(g, p)
- if (p->uid == who) {
- niceval = 20 - task_nice(p);
- if (niceval > retval)
- retval = niceval;
- }
-- while_each_thread(g, p);
-+ while_each_thread_ve(g, p);
- if (who != current->uid)
- free_uid(user); /* for find_user() */
- break;
-@@ -497,6 +590,35 @@ asmlinkage long sys_reboot(int magic1, i
- magic2 != LINUX_REBOOT_MAGIC2C))
- return -EINVAL;
-
-+#ifdef CONFIG_VE
-+ if (!ve_is_super(get_exec_env()))
-+ switch (cmd) {
-+ case LINUX_REBOOT_CMD_RESTART:
-+ case LINUX_REBOOT_CMD_HALT:
-+ case LINUX_REBOOT_CMD_POWER_OFF:
-+ case LINUX_REBOOT_CMD_RESTART2: {
-+ struct siginfo info;
-+
-+ info.si_errno = 0;
-+ info.si_code = SI_KERNEL;
-+ info.si_pid = virt_pid(current);
-+ info.si_uid = current->uid;
-+ info.si_signo = SIGKILL;
-+
-+ /* Sending to real init is safe */
-+ send_sig_info(SIGKILL, &info,
-+ get_exec_env()->init_entry);
-+ }
-+
-+ case LINUX_REBOOT_CMD_CAD_ON:
-+ case LINUX_REBOOT_CMD_CAD_OFF:
-+ return 0;
-+
-+ default:
-+ return -EINVAL;
-+ }
-+#endif
-+
- /* Instead of trying to make the power_off code look like
- * halt when pm_power_off is not set do it the easy way.
- */
-@@ -686,7 +808,7 @@ asmlinkage long sys_setgid(gid_t gid)
- return 0;
- }
-
--static int set_user(uid_t new_ruid, int dumpclear)
-+int set_user(uid_t new_ruid, int dumpclear)
- {
- struct user_struct *new_user;
-
-@@ -711,6 +833,7 @@ static int set_user(uid_t new_ruid, int
- current->uid = new_ruid;
- return 0;
- }
-+EXPORT_SYMBOL(set_user);
-
- /*
- * Unprivileged users may change the real uid to the effective uid
-@@ -1079,7 +1202,12 @@ asmlinkage long sys_times(struct tms __u
- if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
- return -EFAULT;
- }
-+#ifndef CONFIG_VE
- return (long) jiffies_64_to_clock_t(get_jiffies_64());
-+#else
-+ return (long) jiffies_64_to_clock_t(get_jiffies_64() -
-+ get_exec_env()->start_jiffies);
-+#endif
- }
-
- /*
-@@ -1100,21 +1228,24 @@ asmlinkage long sys_setpgid(pid_t pid, p
- struct task_struct *p;
- struct task_struct *group_leader = current->group_leader;
- int err = -EINVAL;
-+ int _pgid;
-
- if (!pid)
-- pid = group_leader->pid;
-+ pid = virt_pid(group_leader);
- if (!pgid)
- pgid = pid;
- if (pgid < 0)
- return -EINVAL;
-
-+ _pgid = vpid_to_pid(pgid);
-+
- /* From this point forward we keep holding onto the tasklist lock
- * so that our parent does not change from under us. -DaveM
- */
- write_lock_irq(&tasklist_lock);
-
- err = -ESRCH;
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
- if (!p)
- goto out;
-
-@@ -1139,25 +1270,35 @@ asmlinkage long sys_setpgid(pid_t pid, p
- if (p->signal->leader)
- goto out;
-
-- if (pgid != pid) {
-+ pgid = virt_pid(p);
-+ if (_pgid != p->pid) {
- struct task_struct *p;
-
-- do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-- if (p->signal->session == group_leader->signal->session)
-+ do_each_task_pid_ve(_pgid, PIDTYPE_PGID, p) {
-+ if (p->signal->session == group_leader->signal->session) {
-+ pgid = virt_pgid(p);
- goto ok_pgid;
-- } while_each_task_pid(pgid, PIDTYPE_PGID, p);
-+ }
-+ } while_each_task_pid_ve(_pgid, PIDTYPE_PGID, p);
- goto out;
- }
-
- ok_pgid:
-- err = security_task_setpgid(p, pgid);
-+ err = security_task_setpgid(p, _pgid);
- if (err)
- goto out;
-
- if (process_group(p) != pgid) {
- detach_pid(p, PIDTYPE_PGID);
-- p->signal->pgrp = pgid;
-- attach_pid(p, PIDTYPE_PGID, pgid);
-+ p->signal->pgrp = _pgid;
-+ set_virt_pgid(p, pgid);
-+ attach_pid(p, PIDTYPE_PGID, _pgid);
-+ if (atomic_read(&p->signal->count) != 1) {
-+ task_t *t;
-+ for (t = next_thread(p); t != p; t = next_thread(t)) {
-+ set_virt_pgid(t, pgid);
-+ }
-+ }
- }
-
- err = 0;
-@@ -1170,19 +1311,19 @@ out:
- asmlinkage long sys_getpgid(pid_t pid)
- {
- if (!pid) {
-- return process_group(current);
-+ return virt_pgid(current);
- } else {
- int retval;
- struct task_struct *p;
-
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
-
- retval = -ESRCH;
- if (p) {
- retval = security_task_getpgid(p);
- if (!retval)
-- retval = process_group(p);
-+ retval = virt_pgid(p);
- }
- read_unlock(&tasklist_lock);
- return retval;
-@@ -1194,7 +1335,7 @@ asmlinkage long sys_getpgid(pid_t pid)
- asmlinkage long sys_getpgrp(void)
- {
- /* SMP - assuming writes are word atomic this is fine */
-- return process_group(current);
-+ return virt_pgid(current);
- }
-
- #endif
-@@ -1202,19 +1343,19 @@ asmlinkage long sys_getpgrp(void)
- asmlinkage long sys_getsid(pid_t pid)
- {
- if (!pid) {
-- return current->signal->session;
-+ return virt_sid(current);
- } else {
- int retval;
- struct task_struct *p;
-
- read_lock(&tasklist_lock);
-- p = find_task_by_pid(pid);
-+ p = find_task_by_pid_ve(pid);
-
- retval = -ESRCH;
- if(p) {
- retval = security_task_getsid(p);
- if (!retval)
-- retval = p->signal->session;
-+ retval = virt_sid(p);
- }
- read_unlock(&tasklist_lock);
- return retval;
-@@ -1236,9 +1377,20 @@ asmlinkage long sys_setsid(void)
-
- group_leader->signal->leader = 1;
- __set_special_pids(group_leader->pid, group_leader->pid);
-+ set_virt_pgid(group_leader, virt_pid(group_leader));
-+ set_virt_sid(group_leader, virt_pid(group_leader));
- group_leader->signal->tty = NULL;
- group_leader->signal->tty_old_pgrp = 0;
-- err = process_group(group_leader);
-+ if (atomic_read(&group_leader->signal->count) != 1) {
-+ task_t *t;
-+ for (t = next_thread(group_leader); t != group_leader;
-+ t = next_thread(t)) {
-+ set_virt_pgid(t, virt_pid(group_leader));
-+ set_virt_sid(t, virt_pid(group_leader));
-+ }
-+ }
-+
-+ err = virt_pgid(group_leader);
- out:
- write_unlock_irq(&tasklist_lock);
- up(&tty_sem);
-@@ -1518,7 +1670,7 @@ asmlinkage long sys_newuname(struct new_
- int errno = 0;
-
- down_read(&uts_sem);
-- if (copy_to_user(name,&system_utsname,sizeof *name))
-+ if (copy_to_user(name,&ve_utsname,sizeof *name))
- errno = -EFAULT;
- up_read(&uts_sem);
- return errno;
-@@ -1529,15 +1681,15 @@ asmlinkage long sys_sethostname(char __u
- int errno;
- char tmp[__NEW_UTS_LEN];
-
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- if (len < 0 || len > __NEW_UTS_LEN)
- return -EINVAL;
- down_write(&uts_sem);
- errno = -EFAULT;
- if (!copy_from_user(tmp, name, len)) {
-- memcpy(system_utsname.nodename, tmp, len);
-- system_utsname.nodename[len] = 0;
-+ memcpy(ve_utsname.nodename, tmp, len);
-+ ve_utsname.nodename[len] = 0;
- errno = 0;
- }
- up_write(&uts_sem);
-@@ -1553,11 +1705,11 @@ asmlinkage long sys_gethostname(char __u
- if (len < 0)
- return -EINVAL;
- down_read(&uts_sem);
-- i = 1 + strlen(system_utsname.nodename);
-+ i = 1 + strlen(ve_utsname.nodename);
- if (i > len)
- i = len;
- errno = 0;
-- if (copy_to_user(name, system_utsname.nodename, i))
-+ if (copy_to_user(name, ve_utsname.nodename, i))
- errno = -EFAULT;
- up_read(&uts_sem);
- return errno;
-@@ -1574,7 +1726,7 @@ asmlinkage long sys_setdomainname(char _
- int errno;
- char tmp[__NEW_UTS_LEN];
-
-- if (!capable(CAP_SYS_ADMIN))
-+ if (!capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- if (len < 0 || len > __NEW_UTS_LEN)
- return -EINVAL;
-@@ -1582,8 +1734,8 @@ asmlinkage long sys_setdomainname(char _
- down_write(&uts_sem);
- errno = -EFAULT;
- if (!copy_from_user(tmp, name, len)) {
-- memcpy(system_utsname.domainname, tmp, len);
-- system_utsname.domainname[len] = 0;
-+ memcpy(ve_utsname.domainname, tmp, len);
-+ ve_utsname.domainname[len] = 0;
- errno = 0;
- }
- up_write(&uts_sem);
-@@ -1657,7 +1809,19 @@ asmlinkage long sys_setrlimit(unsigned i
- (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
- new_rlim.rlim_cur <= cputime_to_secs(
- current->signal->it_prof_expires))) {
-- cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
-+ unsigned long rlim_cur = new_rlim.rlim_cur;
-+ cputime_t cputime;
-+
-+ if (rlim_cur == 0) {
-+ /*
-+ * The caller is asking for an immediate RLIMIT_CPU
-+ * expiry. But we use the zero value to mean "it was
-+ * never set". So let's cheat and make it one second
-+ * instead
-+ */
-+ rlim_cur = 1;
-+ }
-+ cputime = secs_to_cputime(rlim_cur);
- read_lock(&tasklist_lock);
- spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF,
-diff -upr linux-2.6.16.orig/kernel/sysctl.c linux-2.6.16-026test009/kernel/sysctl.c
---- linux-2.6.16.orig/kernel/sysctl.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/sysctl.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,8 @@
- #include <linux/slab.h>
- #include <linux/sysctl.h>
- #include <linux/proc_fs.h>
-+#include <linux/ve_owner.h>
-+#include <linux/ve.h>
- #include <linux/capability.h>
- #include <linux/ctype.h>
- #include <linux/utsname.h>
-@@ -63,6 +65,7 @@ extern int max_threads;
- extern int sysrq_enabled;
- extern int core_uses_pid;
- extern int suid_dumpable;
-+extern int sysctl_at_vsyscall;
- extern char core_pattern[];
- extern int cad_pid;
- extern int pid_max;
-@@ -72,6 +75,12 @@ extern int printk_ratelimit_burst;
- extern int pid_max_min, pid_max_max;
- extern int sysctl_drop_caches;
- extern int percpu_pagelist_fraction;
-+#ifdef CONFIG_VE
-+int glob_virt_pids = 1;
-+EXPORT_SYMBOL(glob_virt_pids);
-+#endif
-+
-+extern int ve_area_access_check; /* fs/namei.c */
-
- #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
- int unknown_nmi_panic;
-@@ -122,6 +131,7 @@ extern int spin_retry;
- #endif
-
- extern int sysctl_hz_timer;
-+int decode_call_traces;
-
- #ifdef CONFIG_BSD_PROCESS_ACCT
- extern int acct_parm[];
-@@ -133,8 +143,6 @@ extern int no_unaligned_warning;
-
- static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
- ctl_table *, void **);
--static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-- void __user *buffer, size_t *lenp, loff_t *ppos);
-
- static ctl_table root_table[];
- static struct ctl_table_header root_table_header =
-@@ -178,6 +186,8 @@ static void register_proc_table(ctl_tabl
- static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
- #endif
-
-+extern struct new_utsname virt_utsname;
-+
- /* The default sysctl tables: */
-
- static ctl_table root_table[] = {
-@@ -276,6 +286,15 @@ static ctl_table kern_table[] = {
- .strategy = &sysctl_string,
- },
- {
-+ .ctl_name = KERN_VIRT_OSRELEASE,
-+ .procname = "virt_osrelease",
-+ .data = virt_utsname.release,
-+ .maxlen = sizeof(virt_utsname.release),
-+ .mode = 0644,
-+ .proc_handler = &proc_doutsstring,
-+ .strategy = &sysctl_string,
-+ },
-+ {
- .ctl_name = KERN_PANIC,
- .procname = "panic",
- .data = &panic_timeout,
-@@ -590,6 +609,16 @@ static ctl_table kern_table[] = {
- .extra1 = &pid_max_min,
- .extra2 = &pid_max_max,
- },
-+#ifdef CONFIG_VE
-+ {
-+ .ctl_name = KERN_VIRT_PIDS,
-+ .procname = "virt_pids",
-+ .data = &glob_virt_pids,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+#endif
- {
- .ctl_name = KERN_PANIC_ON_OOPS,
- .procname = "panic_on_oops",
-@@ -1046,10 +1075,26 @@ static ctl_table fs_table[] = {
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
-+ {
-+ .ctl_name = FS_AT_VSYSCALL,
-+ .procname = "vsyscall",
-+ .data = &sysctl_at_vsyscall,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
- { .ctl_name = 0 }
- };
-
- static ctl_table debug_table[] = {
-+ {
-+ .ctl_name = DBG_DECODE_CALLTRACES,
-+ .procname = "decode_call_traces",
-+ .data = &decode_call_traces,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
- { .ctl_name = 0 }
- };
-
-@@ -1113,6 +1158,7 @@ int do_sysctl(int __user *name, int nlen
- {
- struct list_head *tmp;
- int error = -ENOTDIR;
-+ struct ve_struct *ve;
-
- if (nlen <= 0 || nlen >= CTL_MAXNAME)
- return -ENOTDIR;
-@@ -1121,13 +1167,24 @@ int do_sysctl(int __user *name, int nlen
- if (!oldlenp || get_user(old_len, oldlenp))
- return -EFAULT;
- }
-+ ve = get_exec_env();
- spin_lock(&sysctl_lock);
-+#ifdef CONFIG_VE
-+ tmp = ve->sysctl_lh.next;
-+#else
- tmp = &root_table_header.ctl_entry;
-+#endif
- do {
-- struct ctl_table_header *head =
-- list_entry(tmp, struct ctl_table_header, ctl_entry);
-+ struct ctl_table_header *head;
- void *context = NULL;
-
-+#ifdef CONFIG_VE
-+ if (tmp == &ve->sysctl_lh)
-+ /* second pass over global variables */
-+ tmp = &root_table_header.ctl_entry;
-+#endif
-+
-+ head = list_entry(tmp, struct ctl_table_header, ctl_entry);
- if (!use_table(head))
- continue;
-
-@@ -1181,10 +1238,14 @@ static int test_perm(int mode, int op)
- static inline int ctl_perm(ctl_table *table, int op)
- {
- int error;
-+ int mode = table->mode;
-+
- error = security_sysctl(table, op);
- if (error)
- return error;
-- return test_perm(table->mode, op);
-+ if (!ve_accessible(table->owner_env, get_exec_env()))
-+ mode &= ~0222; /* disable write access */
-+ return test_perm(mode, op);
- }
-
- static int parse_table(int __user *name, int nlen,
-@@ -1350,6 +1411,8 @@ struct ctl_table_header *register_sysctl
- int insert_at_head)
- {
- struct ctl_table_header *tmp;
-+ struct list_head *lh;
-+
- tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
- if (!tmp)
- return NULL;
-@@ -1358,17 +1421,52 @@ struct ctl_table_header *register_sysctl
- tmp->used = 0;
- tmp->unregistering = NULL;
- spin_lock(&sysctl_lock);
-+#ifdef CONFIG_VE
-+ lh = &get_exec_env()->sysctl_lh;
-+#else
-+ lh = &root_table_header.ctl_entry;
-+#endif
- if (insert_at_head)
-- list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
-+ list_add(&tmp->ctl_entry, lh);
- else
-- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
-+ list_add_tail(&tmp->ctl_entry, lh);
- spin_unlock(&sysctl_lock);
- #ifdef CONFIG_PROC_FS
-+#ifdef CONFIG_VE
-+ register_proc_table(table, get_exec_env()->proc_sys_root, tmp);
-+#else
- register_proc_table(table, proc_sys_root, tmp);
- #endif
-+#endif
- return tmp;
- }
-
-+void free_sysctl_clone(ctl_table *clone)
-+{
-+ kfree(clone);
-+}
-+
-+ctl_table *clone_sysctl_template(ctl_table *tmpl, int nr)
-+{
-+ int i;
-+ ctl_table *clone;
-+
-+ clone = kmalloc(nr * sizeof(ctl_table), GFP_KERNEL);
-+ if (clone == NULL)
-+ return NULL;
-+
-+ memcpy(clone, tmpl, nr * sizeof(ctl_table));
-+ for (i = 0; i < nr; i++) {
-+ if (tmpl[i].ctl_name == 0)
-+ continue;
-+ clone[i].owner_env = get_exec_env();
-+ if (tmpl[i].child == NULL)
-+ continue;
-+ clone[i].child = clone + (tmpl[i].child - tmpl);
-+ }
-+ return clone;
-+}
-+
- /**
- * unregister_sysctl_table - unregister a sysctl table hierarchy
- * @header: the header returned from register_sysctl_table
-@@ -1382,8 +1480,12 @@ void unregister_sysctl_table(struct ctl_
- spin_lock(&sysctl_lock);
- start_unregistering(header);
- #ifdef CONFIG_PROC_FS
-+#ifdef CONFIG_VE
-+ unregister_proc_table(header->ctl_table, get_exec_env()->proc_sys_root);
-+#else
- unregister_proc_table(header->ctl_table, proc_sys_root);
- #endif
-+#endif
- spin_unlock(&sysctl_lock);
- kfree(header);
- }
-@@ -1469,11 +1571,6 @@ static void unregister_proc_table(ctl_ta
- * its fields. We are under sysctl_lock here.
- */
- de->data = NULL;
--
-- /* Don't unregister proc entries that are still being used.. */
-- if (atomic_read(&de->count))
-- continue;
--
- table->de = NULL;
- remove_proc_entry(table->procname, root);
- }
-@@ -1615,7 +1712,7 @@ int proc_dostring(ctl_table *table, int
- * to observe. Should this be in kernel/sys.c ????
- */
-
--static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- int r;
-@@ -2190,7 +2287,7 @@ int proc_dostring(ctl_table *table, int
- return -ENOSYS;
- }
-
--static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-+int proc_doutsstring(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
- return -ENOSYS;
-@@ -2494,6 +2591,14 @@ void unregister_sysctl_table(struct ctl_
- {
- }
-
-+ctl_table * clone_sysctl_template(ctl_table *tmpl, int nr)
-+{
-+ return NULL;
-+}
-+
-+void free_sysctl_clone(ctl_table *tmpl)
-+{
-+}
- #endif /* CONFIG_SYSCTL */
-
- /*
-@@ -2506,6 +2611,7 @@ EXPORT_SYMBOL(proc_dointvec_minmax);
- EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
- EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
- EXPORT_SYMBOL(proc_dostring);
-+EXPORT_SYMBOL(proc_doutsstring);
- EXPORT_SYMBOL(proc_doulongvec_minmax);
- EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
- EXPORT_SYMBOL(register_sysctl_table);
-@@ -2514,3 +2620,5 @@ EXPORT_SYMBOL(sysctl_jiffies);
- EXPORT_SYMBOL(sysctl_ms_jiffies);
- EXPORT_SYMBOL(sysctl_string);
- EXPORT_SYMBOL(unregister_sysctl_table);
-+EXPORT_SYMBOL(clone_sysctl_template);
-+EXPORT_SYMBOL(free_sysctl_clone);
-diff -upr linux-2.6.16.orig/kernel/timer.c linux-2.6.16-026test009/kernel/timer.c
---- linux-2.6.16.orig/kernel/timer.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/timer.c 2006-04-19 15:02:12.000000000 +0400
-@@ -460,7 +460,11 @@ static inline void __run_timers(tvec_bas
- spin_unlock_irq(&base->t_base.lock);
- {
- int preempt_count = preempt_count();
-+ struct ve_struct *ve;
-+
-+ ve = set_exec_env(get_ve0());
- fn(data);
-+ (void)set_exec_env(ve);
- if (preempt_count != preempt_count()) {
- printk(KERN_WARNING "huh, entered %p "
- "with preempt_count %08x, exited"
-@@ -868,6 +872,23 @@ EXPORT_SYMBOL(avenrun);
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-+
-+static void calc_load_ve(void)
-+{
-+ unsigned long flags, nr_unint;
-+
-+ nr_unint = nr_uninterruptible() * FIXED_1;
-+ spin_lock_irqsave(&kstat_glb_lock, flags);
-+ CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
-+ CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
-+ CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
-+ spin_unlock_irqrestore(&kstat_glb_lock, flags);
-+
-+#ifdef CONFIG_VE
-+ do_update_load_avg_ve();
-+#endif
-+}
-+
- static inline void calc_load(unsigned long ticks)
- {
- unsigned long active_tasks; /* fixed-point */
-@@ -880,6 +901,7 @@ static inline void calc_load(unsigned lo
- CALC_LOAD(avenrun[0], EXP_1, active_tasks);
- CALC_LOAD(avenrun[1], EXP_5, active_tasks);
- CALC_LOAD(avenrun[2], EXP_15, active_tasks);
-+ calc_load_ve();
- }
- }
-
-@@ -990,7 +1012,7 @@ asmlinkage unsigned long sys_alarm(unsig
- */
- asmlinkage long sys_getpid(void)
- {
-- return current->tgid;
-+ return virt_tgid(current);
- }
-
- /*
-@@ -1012,12 +1034,13 @@ asmlinkage long sys_getpid(void)
- asmlinkage long sys_getppid(void)
- {
- int pid;
-+#ifndef CONFIG_DEBUG_SLAB
- struct task_struct *me = current;
- struct task_struct *parent;
-
- parent = me->group_leader->real_parent;
- for (;;) {
-- pid = parent->tgid;
-+ pid = virt_tgid(parent);
- #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
- {
- struct task_struct *old = parent;
-@@ -1034,6 +1057,16 @@ asmlinkage long sys_getppid(void)
- #endif
- break;
- }
-+#else
-+ /*
-+ * ->real_parent could be released before dereference and
-+ * we accessed freed kernel memory, which faults with debugging on.
-+ * Keep it simple and stupid.
-+ */
-+ read_lock(&tasklist_lock);
-+ pid = virt_tgid(current->group_leader->real_parent);
-+ read_unlock(&tasklist_lock);
-+#endif
- return pid;
- }
-
-@@ -1164,7 +1197,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterru
- /* Thread ID - the internal kernel "pid" */
- asmlinkage long sys_gettid(void)
- {
-- return current->pid;
-+ return virt_pid(current);
- }
-
- /*
-@@ -1176,11 +1209,12 @@ asmlinkage long sys_sysinfo(struct sysin
- unsigned long mem_total, sav_total;
- unsigned int mem_unit, bitcount;
- unsigned long seq;
-+ unsigned long *__avenrun;
-+ struct timespec tp;
-
- memset((char *)&val, 0, sizeof(struct sysinfo));
-
- do {
-- struct timespec tp;
- seq = read_seqbegin(&xtime_lock);
-
- /*
-@@ -1197,14 +1231,25 @@ asmlinkage long sys_sysinfo(struct sysin
- tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
- tp.tv_sec++;
- }
-- val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
--
-- val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-- val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-- val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
-+ } while (read_seqretry(&xtime_lock, seq));
-
-+ if (ve_is_super(get_exec_env())) {
-+ val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-+ __avenrun = &avenrun[0];
- val.procs = nr_threads;
-- } while (read_seqretry(&xtime_lock, seq));
-+ }
-+#ifdef CONFIG_VE
-+ else {
-+ struct ve_struct *ve;
-+ ve = get_exec_env();
-+ __avenrun = &ve->avenrun[0];
-+ val.procs = atomic_read(&ve->pcounter);
-+ val.uptime = tp.tv_sec - ve->start_timespec.tv_sec;
-+ }
-+#endif
-+ val.loads[0] = __avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-+ val.loads[1] = __avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-+ val.loads[2] = __avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
-
- si_meminfo(&val);
- si_swapinfo(&val);
-diff -upr linux-2.6.16.orig/kernel/ub/Kconfig linux-2.6.16-026test009/kernel/ub/Kconfig
---- linux-2.6.16.orig/kernel/ub/Kconfig 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/Kconfig 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,89 @@
-+#
-+# User resources part (UBC)
-+#
-+# Copyright (C) 2005 SWsoft
-+# All rights reserved.
-+#
-+# Licensing governed by "linux/COPYING.SWsoft" file.
-+
-+menu "User resources"
-+
-+config USER_RESOURCE
-+ bool "Enable user resource accounting"
-+ default y
-+ help
-+ This patch provides accounting and allows to configure
-+ limits for user's consumption of exhaustible system resources.
-+ The most important resource controlled by this patch is unswappable
-+ memory (either mlock'ed or used by internal kernel structures and
-+ buffers). The main goal of this patch is to protect processes
-+ from running short of important resources because of an accidental
-+ misbehavior of processes or malicious activity aiming to ``kill''
-+ the system. It's worth to mention that resource limits configured
-+ by setrlimit(2) do not give an acceptable level of protection
-+ because they cover only small fraction of resources and work on a
-+ per-process basis. Per-process accounting doesn't prevent malicious
-+ users from spawning a lot of resource-consuming processes.
-+
-+config USER_RSS_ACCOUNTING
-+ bool "Account physical memory usage"
-+ default y
-+ depends on USER_RESOURCE
-+ help
-+ This allows to estimate per beancounter physical memory usage.
-+ Implemented alghorithm accounts shared pages of memory as well,
-+ dividing them by number of beancounter which use the page.
-+
-+config USER_SWAP_ACCOUNTING
-+ bool "Account swap usage"
-+ default y
-+ depends on USER_RESOURCE
-+ help
-+ This allows accounting of swap usage.
-+
-+config USER_RESOURCE_PROC
-+ bool "Report resource usage in /proc"
-+ default y
-+ depends on USER_RESOURCE
-+ help
-+ Allows a system administrator to inspect resource accounts and limits.
-+
-+config UBC_DEBUG
-+ bool "User resources debug features"
-+ default n
-+ depends on USER_RESOURCE
-+ help
-+ Enables to setup debug features for user resource accounting
-+
-+config UBC_DEBUG_KMEM
-+ bool "Debug kmemsize with cache counters"
-+ default n
-+ depends on UBC_DEBUG
-+ help
-+ Adds /proc/user_beancounters_debug entry to get statistics
-+ about cache usage of each beancounter
-+
-+config UBC_KEEP_UNUSED
-+ bool "Keep unused beancounter alive"
-+ default y
-+ depends on UBC_DEBUG
-+ help
-+ If on, unused beancounters are kept on the hash and maxheld value
-+ can be looked through.
-+
-+config UBC_DEBUG_ITEMS
-+ bool "Account resources in items rather than in bytes"
-+ default y
-+ depends on UBC_DEBUG
-+ help
-+ When true some of the resources (e.g. kmemsize) are accounted
-+ in items instead of bytes.
-+
-+config UBC_UNLIMITED
-+ bool "Use unlimited ubc settings"
-+ default y
-+ depends on UBC_DEBUG
-+ help
-+ When ON all limits and barriers are set to max values.
-+
-+endmenu
-diff -upr linux-2.6.16.orig/kernel/ub/Makefile linux-2.6.16-026test009/kernel/ub/Makefile
---- linux-2.6.16.orig/kernel/ub/Makefile 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/Makefile 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,20 @@
-+#
-+# User resources part (UBC)
-+#
-+# Copyright (C) 2005 SWsoft
-+# All rights reserved.
-+#
-+# Licensing governed by "linux/COPYING.SWsoft" file.
-+
-+obj-y := ub_sys.o
-+obj-$(CONFIG_USER_RESOURCE) += beancounter.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_dcache.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_mem.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_misc.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_net.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_pages.o
-+obj-$(CONFIG_USER_RESOURCE) += ub_stat.o
-+# obj-$(CONFIG_USER_RESOURCE) += ub_oom.o
-+
-+obj-$(CONFIG_USER_RSS_ACCOUNTING) += ub_page_bc.o
-+obj-$(CONFIG_USER_RESOURCE_PROC) += ub_proc.o
-diff -upr linux-2.6.16.orig/kernel/ub/beancounter.c linux-2.6.16-026test009/kernel/ub/beancounter.c
---- linux-2.6.16.orig/kernel/ub/beancounter.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/beancounter.c 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,675 @@
-+/*
-+ * linux/kernel/ub/beancounter.c
-+ *
-+ * Copyright (C) 1998 Alan Cox
-+ * 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * TODO:
-+ * - more intelligent limit check in mremap(): currently the new size is
-+ * charged and _then_ old size is uncharged
-+ * (almost done: !move_vma case is completely done,
-+ * move_vma in its current implementation requires too many conditions to
-+ * do things right, because it may be not only expansion, but shrinking
-+ * also, plus do_munmap will require an additional parameter...)
-+ * - problem: bad pmd page handling
-+ * - consider /proc redesign
-+ * - TCP/UDP ports
-+ * + consider whether __charge_beancounter_locked should be inline
-+ *
-+ * Changes:
-+ * 1999/08/17 Marcelo Tosatti <marcelo@conectiva.com.br>
-+ * - Set "barrier" and "limit" parts of limits atomically.
-+ * 1999/10/06 Marcelo Tosatti <marcelo@conectiva.com.br>
-+ * - setublimit system call.
-+ */
-+
-+#include <linux/slab.h>
-+#include <linux/module.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_hash.h>
-+#include <ub/ub_vmpages.h>
-+
-+static kmem_cache_t *ub_cachep;
-+static struct user_beancounter default_beancounter;
-+struct user_beancounter ub0;
-+
-+const char *ub_rnames[] = {
-+ "kmemsize", /* 0 */
-+ "lockedpages",
-+ "privvmpages",
-+ "shmpages",
-+ "dummy",
-+ "numproc", /* 5 */
-+ "physpages",
-+ "vmguarpages",
-+ "oomguarpages",
-+ "numtcpsock",
-+ "numflock", /* 10 */
-+ "numpty",
-+ "numsiginfo",
-+ "tcpsndbuf",
-+ "tcprcvbuf",
-+ "othersockbuf", /* 15 */
-+ "dgramrcvbuf",
-+ "numothersock",
-+ "dcachesize",
-+ "numfile",
-+ "dummy", /* 20 */
-+ "dummy",
-+ "dummy",
-+ "numiptent",
-+ "unused_privvmpages", /* UB_RESOURCES */
-+ "tmpfs_respages",
-+ "swap_pages",
-+ "held_pages",
-+};
-+
-+static void init_beancounter_struct(struct user_beancounter *ub);
-+static void init_beancounter_store(struct user_beancounter *ub);
-+static void init_beancounter_nolimits(struct user_beancounter *ub);
-+
-+void print_ub_uid(struct user_beancounter *ub, char *buf, int size)
-+{
-+ if (ub->parent != NULL)
-+ snprintf(buf, size, "%u.%u", ub->parent->ub_uid, ub->ub_uid);
-+ else
-+ snprintf(buf, size, "%u", ub->ub_uid);
-+}
-+EXPORT_SYMBOL(print_ub_uid);
-+
-+#define ub_hash_fun(x) ((((x) >> 8) ^ (x)) & (UB_HASH_SIZE - 1))
-+#define ub_subhash_fun(p, id) ub_hash_fun((p)->ub_uid + (id) * 17)
-+struct ub_hash_slot ub_hash[UB_HASH_SIZE];
-+spinlock_t ub_hash_lock;
-+EXPORT_SYMBOL(ub_hash);
-+EXPORT_SYMBOL(ub_hash_lock);
-+
-+/*
-+ * Per user resource beancounting. Resources are tied to their luid.
-+ * The resource structure itself is tagged both to the process and
-+ * the charging resources (a socket doesn't want to have to search for
-+ * things at irq time for example). Reference counters keep things in
-+ * hand.
-+ *
-+ * The case where a user creates resource, kills all his processes and
-+ * then starts new ones is correctly handled this way. The refcounters
-+ * will mean the old entry is still around with resource tied to it.
-+ */
-+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
-+{
-+ struct user_beancounter *new_ub, *ub;
-+ unsigned long flags;
-+ struct ub_hash_slot *slot;
-+
-+ slot = &ub_hash[ub_hash_fun(uid)];
-+ new_ub = NULL;
-+
-+retry:
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ ub = slot->ubh_beans;
-+ while (ub != NULL && (ub->ub_uid != uid || ub->parent != NULL))
-+ ub = ub->ub_next;
-+
-+ if (ub != NULL) {
-+ /* found */
-+ get_beancounter(ub);
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ if (new_ub != NULL)
-+ kmem_cache_free(ub_cachep, new_ub);
-+ return ub;
-+ }
-+
-+ if (!create) {
-+ /* no ub found */
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return NULL;
-+ }
-+
-+ if (new_ub != NULL) {
-+ /* install new ub */
-+ new_ub->ub_next = slot->ubh_beans;
-+ slot->ubh_beans = new_ub;
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return new_ub;
-+ }
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+
-+ /* alloc new ub */
-+ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
-+ GFP_KERNEL);
-+ if (new_ub == NULL)
-+ return NULL;
-+
-+ ub_debug(UBD_ALLOC, "Creating ub %p in slot %p\n", new_ub, slot);
-+ memcpy(new_ub, &default_beancounter, sizeof(*new_ub));
-+ init_beancounter_struct(new_ub);
-+ new_ub->ub_uid = uid;
-+ goto retry;
-+}
-+EXPORT_SYMBOL(get_beancounter_byuid);
-+
-+struct user_beancounter *get_subbeancounter_byid(struct user_beancounter *p,
-+ int id, int create)
-+{
-+ struct user_beancounter *new_ub, *ub;
-+ unsigned long flags;
-+ struct ub_hash_slot *slot;
-+
-+ slot = &ub_hash[ub_subhash_fun(p, id)];
-+ new_ub = NULL;
-+
-+retry:
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ ub = slot->ubh_beans;
-+ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
-+ ub = ub->ub_next;
-+
-+ if (ub != NULL) {
-+ /* found */
-+ get_beancounter(ub);
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ if (new_ub != NULL) {
-+ put_beancounter(new_ub->parent);
-+ kmem_cache_free(ub_cachep, new_ub);
-+ }
-+ return ub;
-+ }
-+
-+ if (!create) {
-+ /* no ub found */
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return NULL;
-+ }
-+
-+ if (new_ub != NULL) {
-+ /* install new ub */
-+ get_beancounter(new_ub);
-+ new_ub->ub_next = slot->ubh_beans;
-+ slot->ubh_beans = new_ub;
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return new_ub;
-+ }
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+
-+ /* alloc new ub */
-+ new_ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep,
-+ GFP_KERNEL);
-+ if (new_ub == NULL)
-+ return NULL;
-+
-+ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", new_ub, slot);
-+ memset(new_ub, 0, sizeof(*new_ub));
-+ init_beancounter_nolimits(new_ub);
-+ init_beancounter_store(new_ub);
-+ init_beancounter_struct(new_ub);
-+ atomic_set(&new_ub->ub_refcount, 0);
-+ new_ub->ub_uid = id;
-+ new_ub->parent = get_beancounter(p);
-+ goto retry;
-+}
-+EXPORT_SYMBOL(get_subbeancounter_byid);
-+
-+struct user_beancounter *subbeancounter_findcreate(struct user_beancounter *p,
-+ int id)
-+{
-+ struct user_beancounter *ub;
-+ unsigned long flags;
-+ struct ub_hash_slot *slot;
-+
-+ slot = &ub_hash[ub_subhash_fun(p, id)];
-+
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ ub = slot->ubh_beans;
-+ while (ub != NULL && (ub->parent != p || ub->ub_uid != id))
-+ ub = ub->ub_next;
-+
-+ if (ub != NULL) {
-+ /* found */
-+ get_beancounter(ub);
-+ goto done;
-+ }
-+
-+ /* alloc new ub */
-+ /* Can be called from non-atomic contexts. Den */
-+ ub = (struct user_beancounter *)kmem_cache_alloc(ub_cachep, GFP_ATOMIC);
-+ if (ub == NULL)
-+ goto done;
-+
-+ ub_debug(UBD_ALLOC, "Creating sub %p in slot %p\n", ub, slot);
-+ memset(ub, 0, sizeof(*ub));
-+ init_beancounter_nolimits(ub);
-+ init_beancounter_store(ub);
-+ init_beancounter_struct(ub);
-+ atomic_set(&ub->ub_refcount, 0);
-+ ub->ub_uid = id;
-+ ub->parent = get_beancounter(p);
-+
-+ /* install new ub */
-+ get_beancounter(ub);
-+ ub->ub_next = slot->ubh_beans;
-+ slot->ubh_beans = ub;
-+
-+done:
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return ub;
-+}
-+EXPORT_SYMBOL(subbeancounter_findcreate);
-+#ifndef CONFIG_UBC_KEEP_UNUSED
-+
-+static int verify_res(struct user_beancounter *ub, int resource,
-+ unsigned long held)
-+{
-+ char id[64];
-+
-+ if (likely(held == 0))
-+ return 1;
-+
-+ print_ub_uid(ub, id, sizeof(id));
-+ printk(KERN_WARNING "Ub %s helds %lu in %s on put\n",
-+ id, held, ub_rnames[resource]);
-+ return 0;
-+}
-+
-+static inline void verify_held(struct user_beancounter *ub)
-+{
-+ int i, clean;
-+
-+ clean = 1;
-+ for (i = 0; i < UB_RESOURCES; i++)
-+ clean &= verify_res(ub, i, ub->ub_parms[i].held);
-+
-+ clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
-+ clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
-+ clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
-+ clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
-+
-+ ub_debug_trace(!clean, 5, 60*HZ);
-+}
-+
-+static void __unhash_beancounter(struct user_beancounter *ub)
-+{
-+ struct user_beancounter **ubptr;
-+ struct ub_hash_slot *slot;
-+
-+ if (ub->parent != NULL)
-+ slot = &ub_hash[ub_subhash_fun(ub->parent, ub->ub_uid)];
-+ else
-+ slot = &ub_hash[ub_hash_fun(ub->ub_uid)];
-+ ubptr = &slot->ubh_beans;
-+
-+ while (*ubptr != NULL) {
-+ if (*ubptr == ub) {
-+ verify_held(ub);
-+ *ubptr = ub->ub_next;
-+ return;
-+ }
-+ ubptr = &((*ubptr)->ub_next);
-+ }
-+ printk(KERN_ERR "Invalid beancounter %p, luid=%d on free, slot %p\n",
-+ ub, ub->ub_uid, slot);
-+}
-+#endif
-+
-+void __put_beancounter(struct user_beancounter *ub)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *parent;
-+
-+again:
-+ parent = ub->parent;
-+ ub_debug(UBD_ALLOC, "__put bc %p (cnt %d) for %.20s pid %d "
-+ "cur %08lx cpu %d.\n",
-+ ub, atomic_read(&ub->ub_refcount),
-+ current->comm, current->pid,
-+ (unsigned long)current, smp_processor_id());
-+
-+ /* equevalent to atomic_dec_and_lock_irqsave() */
-+ local_irq_save(flags);
-+ if (likely(!atomic_dec_and_lock(&ub->ub_refcount, &ub_hash_lock))) {
-+ if (unlikely(atomic_read(&ub->ub_refcount) < 0))
-+ printk(KERN_ERR "UB: Bad ub refcount: ub=%p, "
-+ "luid=%d, ref=%d\n",
-+ ub, ub->ub_uid,
-+ atomic_read(&ub->ub_refcount));
-+ local_irq_restore(flags);
-+ return;
-+ }
-+
-+ if (unlikely(ub == get_ub0())) {
-+ printk(KERN_ERR "Trying to put ub0\n");
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ return;
-+ }
-+
-+#ifndef CONFIG_UBC_KEEP_UNUSED
-+ __unhash_beancounter(ub);
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ ub_free_counters(ub);
-+ kmem_cache_free(ub_cachep, ub);
-+#else
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+#endif
-+ ub = parent;
-+ if (ub != NULL)
-+ goto again;
-+}
-+EXPORT_SYMBOL(__put_beancounter);
-+
-+/*
-+ * Generic resource charging stuff
-+ */
-+
-+int __charge_beancounter_locked(struct user_beancounter *ub,
-+ int resource, unsigned long val, enum severity strict)
-+{
-+ ub_debug_resource(resource, "Charging %lu for %d of %p with %lu\n",
-+ val, resource, ub, ub->ub_parms[resource].held);
-+ /*
-+ * ub_value <= UB_MAXVALUE, value <= UB_MAXVALUE, and only one addition
-+ * at the moment is possible so an overflow is impossible.
-+ */
-+ ub->ub_parms[resource].held += val;
-+
-+ switch (strict) {
-+ case UB_HARD:
-+ if (ub->ub_parms[resource].held >
-+ ub->ub_parms[resource].barrier)
-+ break;
-+ case UB_SOFT:
-+ if (ub->ub_parms[resource].held >
-+ ub->ub_parms[resource].limit)
-+ break;
-+ case UB_FORCE:
-+ ub_adjust_maxheld(ub, resource);
-+ return 0;
-+ default:
-+ BUG();
-+ }
-+
-+ if (strict == UB_SOFT && ub_ratelimit(&ub->ub_limit_rl))
-+ printk(KERN_INFO "Fatal resource shortage: %s, UB %d.\n",
-+ ub_rnames[resource], ub->ub_uid);
-+ ub->ub_parms[resource].failcnt++;
-+ ub->ub_parms[resource].held -= val;
-+ return -ENOMEM;
-+}
-+
-+int charge_beancounter(struct user_beancounter *ub,
-+ int resource, unsigned long val, enum severity strict)
-+{
-+ int retval;
-+ struct user_beancounter *p, *q;
-+ unsigned long flags;
-+
-+ retval = -EINVAL;
-+ if (val > UB_MAXVALUE)
-+ goto out;
-+
-+ local_irq_save(flags);
-+ for (p = ub; p != NULL; p = p->parent) {
-+ spin_lock(&p->ub_lock);
-+ retval = __charge_beancounter_locked(p, resource, val, strict);
-+ spin_unlock(&p->ub_lock);
-+ if (retval)
-+ goto unroll;
-+ }
-+out_restore:
-+ local_irq_restore(flags);
-+out:
-+ return retval;
-+
-+unroll:
-+ for (q = ub; q != p; q = q->parent) {
-+ spin_lock(&q->ub_lock);
-+ __uncharge_beancounter_locked(q, resource, val);
-+ spin_unlock(&q->ub_lock);
-+ }
-+ goto out_restore;
-+}
-+
-+EXPORT_SYMBOL(charge_beancounter);
-+
-+void charge_beancounter_notop(struct user_beancounter *ub,
-+ int resource, unsigned long val)
-+{
-+ struct user_beancounter *p;
-+ unsigned long flags;
-+
-+ local_irq_save(flags);
-+ for (p = ub; p->parent != NULL; p = p->parent) {
-+ spin_lock(&p->ub_lock);
-+ __charge_beancounter_locked(p, resource, val, UB_FORCE);
-+ spin_unlock(&p->ub_lock);
-+ }
-+ local_irq_restore(flags);
-+}
-+
-+EXPORT_SYMBOL(charge_beancounter_notop);
-+
-+void uncharge_warn(struct user_beancounter *ub, int resource,
-+ unsigned long val, unsigned long held)
-+{
-+ char id[64];
-+
-+ print_ub_uid(ub, id, sizeof(id));
-+ printk(KERN_ERR "Uncharging too much %lu h %lu, res %s ub %s\n",
-+ val, held, ub_rnames[resource], id);
-+ ub_debug_trace(1, 10, 10*HZ);
-+}
-+
-+void __uncharge_beancounter_locked(struct user_beancounter *ub,
-+ int resource, unsigned long val)
-+{
-+ ub_debug_resource(resource, "Uncharging %lu for %d of %p with %lu\n",
-+ val, resource, ub, ub->ub_parms[resource].held);
-+ if (ub->ub_parms[resource].held < val) {
-+ uncharge_warn(ub, resource,
-+ val, ub->ub_parms[resource].held);
-+ val = ub->ub_parms[resource].held;
-+ }
-+ ub->ub_parms[resource].held -= val;
-+}
-+
-+void uncharge_beancounter(struct user_beancounter *ub,
-+ int resource, unsigned long val)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *p;
-+
-+ for (p = ub; p != NULL; p = p->parent) {
-+ spin_lock_irqsave(&p->ub_lock, flags);
-+ __uncharge_beancounter_locked(p, resource, val);
-+ spin_unlock_irqrestore(&p->ub_lock, flags);
-+ }
-+}
-+
-+EXPORT_SYMBOL(uncharge_beancounter);
-+
-+void uncharge_beancounter_notop(struct user_beancounter *ub,
-+ int resource, unsigned long val)
-+{
-+ struct user_beancounter *p;
-+ unsigned long flags;
-+
-+ local_irq_save(flags);
-+ for (p = ub; p->parent != NULL; p = p->parent) {
-+ spin_lock(&p->ub_lock);
-+ __uncharge_beancounter_locked(p, resource, val);
-+ spin_unlock(&p->ub_lock);
-+ }
-+ local_irq_restore(flags);
-+}
-+
-+EXPORT_SYMBOL(uncharge_beancounter_notop);
-+
-+
-+/*
-+ * Rate limiting stuff.
-+ */
-+int ub_ratelimit(struct ub_rate_info *p)
-+{
-+ unsigned long cjif, djif;
-+ unsigned long flags;
-+ static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
-+ long new_bucket;
-+
-+ spin_lock_irqsave(&ratelimit_lock, flags);
-+ cjif = jiffies;
-+ djif = cjif - p->last;
-+ if (djif < p->interval) {
-+ if (p->bucket >= p->burst) {
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ return 0;
-+ }
-+ p->bucket++;
-+ } else {
-+ new_bucket = p->bucket - (djif / (unsigned)p->interval);
-+ if (new_bucket < 0)
-+ new_bucket = 0;
-+ p->bucket = new_bucket + 1;
-+ }
-+ p->last = cjif;
-+ spin_unlock_irqrestore(&ratelimit_lock, flags);
-+ return 1;
-+}
-+EXPORT_SYMBOL(ub_ratelimit);
-+
-+
-+/*
-+ * Initialization
-+ *
-+ * struct user_beancounter contains
-+ * - limits and other configuration settings,
-+ * with a copy stored for accounting purposes,
-+ * - structural fields: lists, spinlocks and so on.
-+ *
-+ * Before these parts are initialized, the structure should be memset
-+ * to 0 or copied from a known clean structure. That takes care of a lot
-+ * of fields not initialized explicitly.
-+ */
-+
-+static void init_beancounter_struct(struct user_beancounter *ub)
-+{
-+ ub->ub_magic = UB_MAGIC;
-+ atomic_set(&ub->ub_refcount, 1);
-+ spin_lock_init(&ub->ub_lock);
-+ INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
-+ INIT_LIST_HEAD(&ub->ub_other_sk_list);
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ INIT_LIST_HEAD(&ub->ub_cclist);
-+#endif
-+}
-+
-+static void init_beancounter_store(struct user_beancounter *ub)
-+{
-+ int k;
-+
-+ for (k = 0; k < UB_RESOURCES; k++) {
-+ memcpy(&ub->ub_store[k], &ub->ub_parms[k],
-+ sizeof(struct ubparm));
-+ }
-+}
-+
-+static void init_beancounter_nolimits(struct user_beancounter *ub)
-+{
-+ int k;
-+
-+ for (k = 0; k < UB_RESOURCES; k++) {
-+ ub->ub_parms[k].limit = UB_MAXVALUE;
-+ /* FIXME: whether this is right for physpages and guarantees? */
-+ ub->ub_parms[k].barrier = UB_MAXVALUE;
-+ }
-+
-+ /* FIXME: set unlimited rate? */
-+ ub->ub_limit_rl.burst = 4;
-+ ub->ub_limit_rl.interval = 300*HZ;
-+}
-+
-+static void init_beancounter_syslimits(struct user_beancounter *ub,
-+ unsigned long mp)
-+{
-+ extern int max_threads;
-+ int k;
-+
-+ ub->ub_parms[UB_KMEMSIZE].limit =
-+ mp > (192*1024*1024 >> PAGE_SHIFT) ?
-+ 32*1024*1024 : (mp << PAGE_SHIFT) / 6;
-+ ub->ub_parms[UB_LOCKEDPAGES].limit = 8;
-+ ub->ub_parms[UB_PRIVVMPAGES].limit = UB_MAXVALUE;
-+ ub->ub_parms[UB_SHMPAGES].limit = 64;
-+ ub->ub_parms[UB_NUMPROC].limit = max_threads / 2;
-+ ub->ub_parms[UB_NUMTCPSOCK].limit = 1024;
-+ ub->ub_parms[UB_TCPSNDBUF].limit = 1024*4*1024; /* 4k per socket */
-+ ub->ub_parms[UB_TCPRCVBUF].limit = 1024*6*1024; /* 6k per socket */
-+ ub->ub_parms[UB_NUMOTHERSOCK].limit = 256;
-+ ub->ub_parms[UB_DGRAMRCVBUF].limit = 256*4*1024; /* 4k per socket */
-+ ub->ub_parms[UB_OTHERSOCKBUF].limit = 256*8*1024; /* 8k per socket */
-+ ub->ub_parms[UB_NUMFLOCK].limit = 1024;
-+ ub->ub_parms[UB_NUMPTY].limit = 16;
-+ ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
-+ ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
-+ ub->ub_parms[UB_NUMFILE].limit = 1024;
-+
-+ for (k = 0; k < UB_RESOURCES; k++)
-+ ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
-+
-+ ub->ub_limit_rl.burst = 4;
-+ ub->ub_limit_rl.interval = 300*HZ;
-+}
-+
-+void __init ub_init_ub0(void)
-+{
-+ struct user_beancounter *ub;
-+
-+ init_cache_counters();
-+ ub = get_ub0();
-+ memset(ub, 0, sizeof(*ub));
-+ ub->ub_uid = 0;
-+ init_beancounter_nolimits(ub);
-+ init_beancounter_store(ub);
-+ init_beancounter_struct(ub);
-+
-+ memset(&current->task_bc, 0, sizeof(struct task_beancounter));
-+ (void)set_exec_ub(get_ub0());
-+ current->task_bc.fork_sub = get_beancounter(get_ub0());
-+ init_mm.mm_ub = get_beancounter(ub);
-+}
-+
-+void __init ub_hash_init(void)
-+{
-+ struct ub_hash_slot *slot;
-+
-+ spin_lock_init(&ub_hash_lock);
-+ /* insert ub0 into the hash */
-+ slot = &ub_hash[ub_hash_fun(get_ub0()->ub_uid)];
-+ slot->ubh_beans = get_ub0();
-+}
-+
-+void __init ub_init_cache(unsigned long mempages)
-+{
-+ extern int skbc_cache_init(void);
-+ int res;
-+
-+ res = 0; /* skbc_cache_init(); */
-+ ub_cachep = kmem_cache_create("user_beancounters",
-+ sizeof(struct user_beancounter),
-+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ if (res < 0 || ub_cachep == NULL)
-+ panic("Can't create ubc caches\n");
-+
-+ memset(&default_beancounter, 0, sizeof(default_beancounter));
-+#ifdef CONFIG_UBC_UNLIMITED
-+ init_beancounter_nolimits(&default_beancounter);
-+#else
-+ init_beancounter_syslimits(&default_beancounter, mempages);
-+#endif
-+ init_beancounter_store(&default_beancounter);
-+ init_beancounter_struct(&default_beancounter);
-+
-+ ub_hash_init();
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_dcache.c linux-2.6.16-026test009/kernel/ub/ub_dcache.c
---- linux-2.6.16.orig/kernel/ub/ub_dcache.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_dcache.c 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,325 @@
-+/*
-+ * kernel/ub/ub_dcache.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/dcache.h>
-+#include <linux/slab.h>
-+#include <linux/kmem_cache.h>
-+#include <linux/fs.h>
-+#include <linux/err.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_mem.h>
-+#include <ub/ub_dcache.h>
-+
-+/*
-+ * Locking
-+ * traverse dcache_lock d_lock
-+ * ub_dentry_charge + + +
-+ * ub_dentry_uncharge + - +
-+ * ub_dentry_charge_nofail + + -
-+ *
-+ * d_inuse is atomic so that we can inc dentry's parent d_inuse in
-+ * ub_dentry_charhe with the only dentry's d_lock held.
-+ *
-+ * Race in uncharge vs charge_nofail is handled with dcache_lock.
-+ * Race in charge vs charge_nofail is inessential since they both inc d_inuse.
-+ * Race in uncharge vs charge is handled by altering d_inuse under d_lock.
-+ *
-+ * Race with d_move is handled this way:
-+ * - charge_nofail and uncharge are protected by dcache_lock;
-+ * - charge works only with dentry and dentry->d_parent->d_inuse, so
-+ * it's enough to lock only the dentry.
-+ */
-+
-+/*
-+ * Beancounting
-+ * UB argument must NOT be NULL
-+ */
-+
-+static int do_charge_dcache(struct user_beancounter *ub, unsigned long size,
-+ enum severity sv)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size), sv))
-+ goto out_mem;
-+ if (__charge_beancounter_locked(ub, UB_DCACHESIZE, size, sv))
-+ goto out_dcache;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return 0;
-+
-+out_dcache:
-+ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
-+out_mem:
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return -ENOMEM;
-+}
-+
-+static void do_uncharge_dcache(struct user_beancounter *ub,
-+ unsigned long size)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, CHARGE_SIZE(size));
-+ __uncharge_beancounter_locked(ub, UB_DCACHESIZE, size);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+static int charge_dcache(struct user_beancounter *ub, unsigned long size,
-+ enum severity sv)
-+{
-+ struct user_beancounter *p, *q;
-+
-+ for (p = ub; p != NULL; p = p->parent) {
-+ if (do_charge_dcache(p, size, sv))
-+ goto unroll;
-+ }
-+ return 0;
-+
-+unroll:
-+ for (q = ub; q != p; q = q->parent)
-+ do_uncharge_dcache(q, size);
-+ return -ENOMEM;
-+}
-+
-+void uncharge_dcache(struct user_beancounter *ub, unsigned long size)
-+{
-+ for (; ub != NULL; ub = ub->parent)
-+ do_uncharge_dcache(ub, size);
-+}
-+
-+static inline void charge_dcache_forced(struct user_beancounter *ub,
-+ unsigned long size)
-+{
-+ charge_dcache(ub, size, UB_FORCE);
-+}
-+
-+static inline void d_forced_charge(struct dentry_beancounter *d_bc)
-+{
-+ d_bc->d_ub = get_beancounter(get_exec_ub());
-+ if (d_bc->d_ub == NULL)
-+ return;
-+
-+ charge_dcache_forced(d_bc->d_ub, d_bc->d_ubsize);
-+}
-+
-+static inline void d_uncharge(struct dentry_beancounter *d_bc)
-+{
-+ if (d_bc->d_ub == NULL)
-+ return;
-+
-+ uncharge_dcache(d_bc->d_ub, d_bc->d_ubsize);
-+ put_beancounter(d_bc->d_ub);
-+ d_bc->d_ub = NULL;
-+}
-+
-+/*
-+ * Alloc / free dentry_beancounter
-+ */
-+
-+static inline int d_alloc_beancounter(struct dentry *d)
-+{
-+ return 0;
-+}
-+
-+static inline void d_free_beancounter(struct dentry_beancounter *d_bc)
-+{
-+}
-+
-+static inline unsigned long d_charge_size(struct dentry *dentry)
-+{
-+ /* dentry's d_name is already set to appropriate value (see d_alloc) */
-+ return inode_cachep->objuse + dentry_cache->objuse +
-+ (dname_external(dentry) ?
-+ kmem_obj_memusage((void *)dentry->d_name.name) : 0);
-+}
-+
-+/*
-+ * dentry mark in use operation
-+ * d_lock is held
-+ */
-+
-+static int d_inc_inuse(struct dentry *dentry)
-+{
-+ struct user_beancounter *ub;
-+ struct dentry_beancounter *d_bc;
-+
-+ if (dentry != dentry->d_parent) {
-+ struct dentry *parent;
-+
-+ /*
-+ * Increment d_inuse of parent.
-+ * It can't change since dentry->d_lock is held.
-+ */
-+ parent = dentry->d_parent;
-+ if (ub_dget_testone(parent))
-+ BUG();
-+ }
-+
-+ d_bc = &dentry->dentry_bc;
-+ ub = get_beancounter(get_exec_ub());
-+
-+ if (ub != NULL && charge_dcache(ub, d_bc->d_ubsize, UB_SOFT))
-+ goto out_err;
-+
-+ d_bc->d_ub = ub;
-+ return 0;
-+
-+out_err:
-+ put_beancounter(ub);
-+ d_bc->d_ub = NULL;
-+ return -ENOMEM;
-+}
-+
-+/*
-+ * no locks
-+ */
-+int ub_dentry_alloc(struct dentry *dentry)
-+{
-+ int err;
-+ struct dentry_beancounter *d_bc;
-+
-+ err = d_alloc_beancounter(dentry);
-+ if (err < 0)
-+ return err;
-+
-+ d_bc = &dentry->dentry_bc;
-+ d_bc->d_ub = get_beancounter(get_exec_ub());
-+ atomic_set(&d_bc->d_inuse, INUSE_INIT); /* see comment in ub_dcache.h */
-+ d_bc->d_ubsize = d_charge_size(dentry);
-+
-+ err = 0;
-+ if (d_bc->d_ub != NULL &&
-+ charge_dcache(d_bc->d_ub, d_bc->d_ubsize, UB_HARD)) {
-+ put_beancounter(d_bc->d_ub);
-+ d_free_beancounter(d_bc);
-+ err = -ENOMEM;
-+ }
-+
-+ return err;
-+}
-+
-+/*
-+ * Charge / uncharge functions.
-+ *
-+ * We take d_lock to protect dentry_bc from concurrent acces
-+ * when simultaneous __d_lookup and d_put happens on one dentry.
-+ */
-+
-+/*
-+ * no dcache_lock, d_lock and rcu_read_lock are held
-+ * drops d_lock, rcu_read_lock and returns error if any
-+ */
-+int ub_dentry_charge(struct dentry *dentry)
-+{
-+ int err;
-+
-+ err = 0;
-+ if (ub_dget_testone(dentry))
-+ err = d_inc_inuse(dentry);
-+
-+ /*
-+ * d_lock and rcu_read_lock are dropped here
-+ * (see also __d_lookup)
-+ */
-+ spin_unlock(&dentry->d_lock);
-+ rcu_read_unlock();
-+
-+ if (!err)
-+ return 0;
-+
-+ /*
-+ * d_invlaidate is required for real_lookup
-+ * since it tries to create new dentry on
-+ * d_lookup failure.
-+ */
-+ if (!d_invalidate(dentry))
-+ return err;
-+
-+ /* didn't succeeded, force dentry to be charged */
-+ d_forced_charge(&dentry->dentry_bc);
-+ return 0;
-+}
-+
-+/*
-+ * dcache_lock is held
-+ * no d_locks, sequentaly takes and drops from dentry upward
-+ */
-+void ub_dentry_uncharge(struct dentry *dentry)
-+{
-+ struct dentry *parent;
-+
-+ /* go up until status is changed and root is not reached */
-+ while (1) {
-+ /*
-+ * We need d_lock here to handle
-+ * the race with ub_dentry_charge
-+ */
-+ spin_lock(&dentry->d_lock);
-+ if (!ub_dput_testzero(dentry)) {
-+ spin_unlock(&dentry->d_lock);
-+ break;
-+ }
-+
-+ /* state transition 0 => -1 */
-+ d_uncharge(&dentry->dentry_bc);
-+ parent = dentry->d_parent;
-+ spin_unlock(&dentry->d_lock);
-+
-+ /*
-+ * dcache_lock is held (see comment in __dget_locked)
-+ * so we can safely move upwards.
-+ */
-+ if (dentry == parent)
-+ break;
-+ dentry = parent;
-+ }
-+}
-+
-+/*
-+ * forced version. for dget in clean cache, when error is not an option
-+ *
-+ * dcache_lock is held
-+ * no d_locks
-+ */
-+void ub_dentry_charge_nofail(struct dentry *dentry)
-+{
-+ struct dentry *parent;
-+
-+ /* go up until status is changed and root is not reached */
-+ while (1) {
-+ if (!ub_dget_testone(dentry))
-+ break;
-+
-+ /*
-+ * state transition -1 => 0
-+ *
-+ * No need to lock dentry before atomic_inc
-+ * like we do in ub_dentry_uncharge.
-+ * We can't race with ub_dentry_uncharge due
-+ * to dcache_lock. The only possible race with
-+ * ub_dentry_charge is OK since they both
-+ * do atomic_inc.
-+ */
-+ d_forced_charge(&dentry->dentry_bc);
-+ /*
-+ * dcache_lock is held (see comment in __dget_locked)
-+ * so we can safely move upwards.
-+ */
-+ parent = dentry->d_parent;
-+
-+ if (dentry == parent)
-+ break;
-+ dentry = parent;
-+ }
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_mem.c linux-2.6.16-026test009/kernel/ub/ub_mem.c
---- linux-2.6.16.orig/kernel/ub/ub_mem.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_mem.c 2006-04-19 15:02:11.000000000 +0400
-@@ -0,0 +1,384 @@
-+/*
-+ * kernel/ub/ub_mem.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/slab.h>
-+#include <linux/kmem_cache.h>
-+#include <linux/kmem_slab.h>
-+#include <linux/highmem.h>
-+#include <linux/vmalloc.h>
-+#include <linux/mm.h>
-+#include <linux/gfp.h>
-+#include <linux/swap.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+#include <linux/module.h>
-+#include <ub/beancounter.h>
-+#include <ub/ub_mem.h>
-+#include <ub/ub_hash.h>
-+
-+/*
-+ * Initialization
-+ */
-+
-+/*
-+ * Slab accounting
-+ */
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+
-+#define CC_HASH_SIZE 1024
-+static struct ub_cache_counter *cc_hash[CC_HASH_SIZE];
-+spinlock_t cc_lock;
-+
-+static void __free_cache_counters(struct user_beancounter *ub,
-+ kmem_cache_t *cachep)
-+{
-+ struct ub_cache_counter *cc, **pprev, *del;
-+ int i;
-+ unsigned long flags;
-+
-+ del = NULL;
-+ spin_lock_irqsave(&cc_lock, flags);
-+ for (i = 0; i < CC_HASH_SIZE; i++) {
-+ pprev = &cc_hash[i];
-+ cc = cc_hash[i];
-+ while (cc != NULL) {
-+ if (cc->ub != ub && cc->cachep != cachep) {
-+ pprev = &cc->next;
-+ cc = cc->next;
-+ continue;
-+ }
-+
-+ list_del(&cc->ulist);
-+ *pprev = cc->next;
-+ cc->next = del;
-+ del = cc;
-+ cc = *pprev;
-+ }
-+ }
-+ spin_unlock_irqrestore(&cc_lock, flags);
-+
-+ while (del != NULL) {
-+ cc = del->next;
-+ kfree(del);
-+ del = cc;
-+ }
-+}
-+
-+void ub_free_counters(struct user_beancounter *ub)
-+{
-+ __free_cache_counters(ub, NULL);
-+}
-+
-+void ub_kmemcache_free(kmem_cache_t *cachep)
-+{
-+ __free_cache_counters(NULL, cachep);
-+}
-+
-+void __init init_cache_counters(void)
-+{
-+ memset(cc_hash, 0, CC_HASH_SIZE * sizeof(cc_hash[0]));
-+ spin_lock_init(&cc_lock);
-+}
-+
-+#define cc_hash_fun(ub, cachep) ( \
-+ (((unsigned long)(ub) >> L1_CACHE_SHIFT) ^ \
-+ ((unsigned long)(ub) >> (BITS_PER_LONG / 2)) ^ \
-+ ((unsigned long)(cachep) >> L1_CACHE_SHIFT) ^ \
-+ ((unsigned long)(cachep) >> (BITS_PER_LONG / 2)) \
-+ ) & (CC_HASH_SIZE - 1))
-+
-+static int change_slab_charged(struct user_beancounter *ub, void *objp,
-+ unsigned long val, int mask)
-+{
-+ struct ub_cache_counter *cc, *new_cnt, **pprev;
-+ kmem_cache_t *cachep;
-+ unsigned long flags;
-+
-+ cachep = virt_to_cache(objp);
-+ new_cnt = NULL;
-+
-+again:
-+ spin_lock_irqsave(&cc_lock, flags);
-+ cc = cc_hash[cc_hash_fun(ub, cachep)];
-+ while (cc) {
-+ if (cc->ub == ub && cc->cachep == cachep)
-+ goto found;
-+ cc = cc->next;
-+ }
-+
-+ if (new_cnt != NULL)
-+ goto insert;
-+
-+ spin_unlock_irqrestore(&cc_lock, flags);
-+
-+ new_cnt = kmalloc(sizeof(*new_cnt), mask & ~__GFP_UBC);
-+ if (new_cnt == NULL)
-+ return -ENOMEM;
-+
-+ new_cnt->counter = 0;
-+ new_cnt->ub = ub;
-+ new_cnt->cachep = cachep;
-+ goto again;
-+
-+insert:
-+ pprev = &cc_hash[cc_hash_fun(ub, cachep)];
-+ new_cnt->next = *pprev;
-+ *pprev = new_cnt;
-+ list_add(&new_cnt->ulist, &ub->ub_cclist);
-+ cc = new_cnt;
-+ new_cnt = NULL;
-+
-+found:
-+ cc->counter += val;
-+ spin_unlock_irqrestore(&cc_lock, flags);
-+ if (new_cnt)
-+ kfree(new_cnt);
-+ return 0;
-+}
-+
-+static inline int inc_slab_charged(struct user_beancounter *ub,
-+ void *objp, int mask)
-+{
-+ return change_slab_charged(ub, objp, 1, mask);
-+}
-+
-+static inline void dec_slab_charged(struct user_beancounter *ub, void *objp)
-+{
-+ if (change_slab_charged(ub, objp, -1, 0) < 0)
-+ BUG();
-+}
-+
-+#include <linux/vmalloc.h>
-+
-+static inline int inc_pages_charged(struct user_beancounter *ub,
-+ struct page *pg, int order)
-+{
-+ int cpu;
-+
-+ cpu = get_cpu();
-+ ub->ub_stat[cpu].pages_charged += (1 << order);
-+ put_cpu();
-+ return 0;
-+}
-+
-+static inline void dec_pages_charged(struct user_beancounter *ub,
-+ struct page *pg, int order)
-+{
-+ int cpu;
-+
-+ cpu = get_cpu();
-+ ub->ub_stat[cpu].pages_charged -= (1 << order);
-+ put_cpu();
-+}
-+
-+void inc_vmalloc_charged(struct vm_struct *vm, int flags)
-+{
-+ int cpu;
-+ struct user_beancounter *ub;
-+
-+ if (!(flags & __GFP_UBC))
-+ return;
-+
-+ ub = get_exec_ub();
-+ if (ub == NULL)
-+ return;
-+
-+ cpu = get_cpu();
-+ ub->ub_stat[cpu].vmalloc_charged += vm->nr_pages;
-+ put_cpu();
-+}
-+
-+void dec_vmalloc_charged(struct vm_struct *vm)
-+{
-+ int cpu;
-+ struct user_beancounter *ub;
-+
-+ ub = page_ub(vm->pages[0]);
-+ if (ub == NULL)
-+ return;
-+
-+ cpu = get_cpu();
-+ ub->ub_stat[cpu].vmalloc_charged -= vm->nr_pages;
-+ put_cpu();
-+}
-+
-+#else
-+#define inc_slab_charged(ub, o, m) (0)
-+#define dec_slab_charged(ub, o) do { } while (0)
-+#define inc_pages_charged(ub, pg, o) (0)
-+#define dec_pages_charged(ub, pg, o) do { } while (0)
-+#endif
-+
-+static inline struct user_beancounter **slab_ub_ref(void *objp)
-+{
-+ kmem_cache_t *cachep;
-+ struct slab *slabp;
-+ int objnr;
-+
-+ cachep = virt_to_cache(objp);
-+ BUG_ON(!(cachep->flags & SLAB_UBC));
-+ slabp = virt_to_slab(objp);
-+ objnr = (objp - slabp->s_mem) / cachep->buffer_size;
-+ return slab_ubcs(cachep, slabp) + objnr;
-+}
-+
-+struct user_beancounter *slab_ub(void *objp)
-+{
-+ struct user_beancounter **ub_ref;
-+
-+ ub_ref = slab_ub_ref(objp);
-+ return *ub_ref;
-+}
-+
-+EXPORT_SYMBOL(slab_ub);
-+
-+static inline int should_charge(void *objp, int flags)
-+{
-+ kmem_cache_t *cachep;
-+
-+ cachep = virt_to_cache(objp);
-+ if (!(cachep->flags & SLAB_UBC))
-+ return 0;
-+ if ((cachep->flags & SLAB_NO_CHARGE) && !(flags & __GFP_UBC))
-+ return 0;
-+ return 1;
-+}
-+
-+#define should_uncharge(objp) should_charge(objp, __GFP_UBC)
-+
-+int ub_slab_charge(void *objp, int flags)
-+{
-+ unsigned int size;
-+ struct user_beancounter *ub;
-+
-+ if (!should_charge(objp, flags))
-+ return 0;
-+
-+ ub = get_beancounter(get_exec_ub());
-+ if (ub == NULL)
-+ return 0;
-+
-+ size = CHARGE_SIZE(kmem_obj_memusage(objp));
-+ if (charge_beancounter(ub, UB_KMEMSIZE, size,
-+ (flags & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
-+ goto out_err;
-+
-+ if (inc_slab_charged(ub, objp, flags) < 0) {
-+ uncharge_beancounter(ub, UB_KMEMSIZE, size);
-+ goto out_err;
-+ }
-+ *slab_ub_ref(objp) = ub;
-+ return 0;
-+
-+out_err:
-+ put_beancounter(ub);
-+ return -ENOMEM;
-+}
-+
-+void ub_slab_uncharge(void *objp)
-+{
-+ unsigned int size;
-+ struct user_beancounter **ub_ref;
-+
-+ if (!should_uncharge(objp))
-+ return;
-+
-+ ub_ref = slab_ub_ref(objp);
-+ if (*ub_ref == NULL)
-+ return;
-+
-+ dec_slab_charged(*ub_ref, objp);
-+ size = CHARGE_SIZE(kmem_obj_memusage(objp));
-+ uncharge_beancounter(*ub_ref, UB_KMEMSIZE, size);
-+ put_beancounter(*ub_ref);
-+ *ub_ref = NULL;
-+}
-+
-+/*
-+ * Pages accounting
-+ */
-+
-+inline int ub_page_charge(struct page *page, int order, int mask)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = NULL;
-+ if (!(mask & __GFP_UBC))
-+ goto out;
-+
-+ ub = get_beancounter(get_exec_ub());
-+ if (ub == NULL)
-+ goto out;
-+
-+ if (charge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order),
-+ (mask & __GFP_SOFT_UBC ? UB_SOFT : UB_HARD)))
-+ goto err;
-+ if (inc_pages_charged(ub, page, order) < 0) {
-+ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
-+ goto err;
-+ }
-+out:
-+ BUG_ON(page_ub(page) != NULL);
-+ page_ub(page) = ub;
-+ return 0;
-+
-+err:
-+ BUG_ON(page_ub(page) != NULL);
-+ put_beancounter(ub);
-+ return -ENOMEM;
-+}
-+
-+inline void ub_page_uncharge(struct page *page, int order)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = page_ub(page);
-+ if (ub == NULL)
-+ return;
-+
-+ dec_pages_charged(ub, page, order);
-+ BUG_ON(ub->ub_magic != UB_MAGIC);
-+ uncharge_beancounter(ub, UB_KMEMSIZE, CHARGE_ORDER(order));
-+ put_beancounter(ub);
-+ page_ub(page) = NULL;
-+}
-+
-+/*
-+ * takes init_mm.page_table_lock
-+ * some outer lock to protect pages from vmalloced area must be held
-+ */
-+struct user_beancounter *vmalloc_ub(void *obj)
-+{
-+ struct page *pg;
-+
-+ pg = vmalloc_to_page(obj);
-+ if (pg == NULL)
-+ return NULL;
-+
-+ return page_ub(pg);
-+}
-+
-+EXPORT_SYMBOL(vmalloc_ub);
-+
-+struct user_beancounter *mem_ub(void *obj)
-+{
-+ struct user_beancounter *ub;
-+
-+ if ((unsigned long)obj >= VMALLOC_START &&
-+ (unsigned long)obj < VMALLOC_END)
-+ ub = vmalloc_ub(obj);
-+ else
-+ ub = slab_ub(obj);
-+
-+ return ub;
-+}
-+
-+EXPORT_SYMBOL(mem_ub);
-diff -upr linux-2.6.16.orig/kernel/ub/ub_misc.c linux-2.6.16-026test009/kernel/ub/ub_misc.c
---- linux-2.6.16.orig/kernel/ub/ub_misc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_misc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,244 @@
-+/*
-+ * kernel/ub/ub_misc.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/tty.h>
-+#include <linux/tty_driver.h>
-+#include <linux/signal.h>
-+#include <linux/slab.h>
-+#include <linux/fs.h>
-+#include <linux/sched.h>
-+#include <linux/kmem_cache.h>
-+#include <linux/module.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_mem.h>
-+
-+/*
-+ * Task staff
-+ */
-+
-+static void init_task_sub(struct task_struct *tsk,
-+ struct task_beancounter *old_bc)
-+{
-+ struct task_beancounter *new_bc;
-+ struct user_beancounter *sub;
-+
-+ new_bc = &tsk->task_bc;
-+ sub = old_bc->fork_sub;
-+ new_bc->fork_sub = get_beancounter(sub);
-+ new_bc->task_fnode = NULL;
-+ new_bc->task_freserv = old_bc->task_freserv;
-+ old_bc->task_freserv = NULL;
-+ memset(&new_bc->task_data, 0, sizeof(new_bc->task_data));
-+}
-+
-+int ub_task_charge(struct task_struct *parent, struct task_struct *task)
-+{
-+ struct task_beancounter *old_bc;
-+ struct task_beancounter *new_bc;
-+ struct user_beancounter *ub;
-+
-+ old_bc = &parent->task_bc;
-+#if 0
-+ if (old_bc->exec_ub == NULL) {
-+ /* FIXME: this won't work if task_bc is outside task_struct */
-+ init_task_sub(task, old_bc);
-+ return 0;
-+ }
-+#endif
-+ ub = old_bc->fork_sub;
-+
-+ if (charge_beancounter(ub, UB_NUMPROC, 1, UB_HARD) < 0)
-+ return -ENOMEM;
-+
-+ new_bc = &task->task_bc;
-+ new_bc->task_ub = get_beancounter(ub);
-+ new_bc->exec_ub = get_beancounter(ub);
-+ init_task_sub(task, old_bc);
-+ return 0;
-+}
-+
-+void ub_task_uncharge(struct task_struct *task)
-+{
-+ struct task_beancounter *task_bc;
-+
-+ task_bc = &task->task_bc;
-+ if (task_bc->task_ub != NULL)
-+ uncharge_beancounter(task_bc->task_ub, UB_NUMPROC, 1);
-+
-+ put_beancounter(task_bc->exec_ub);
-+ put_beancounter(task_bc->task_ub);
-+ put_beancounter(task_bc->fork_sub);
-+ /* can't be freed elsewhere, failures possible in the middle of fork */
-+ if (task_bc->task_freserv != NULL)
-+ kfree(task_bc->task_freserv);
-+
-+ task_bc->exec_ub = (struct user_beancounter *)0xdeadbcbc;
-+}
-+
-+/*
-+ * Files and file locks.
-+ */
-+
-+int ub_file_charge(struct file *f)
-+{
-+ struct user_beancounter *ub;
-+
-+ /* No need to get_beancounter here since it's already got in slab */
-+ ub = slab_ub(f);
-+ if (ub == NULL)
-+ return 0;
-+
-+ return charge_beancounter(ub, UB_NUMFILE, 1, UB_HARD);
-+}
-+
-+void ub_file_uncharge(struct file *f)
-+{
-+ struct user_beancounter *ub;
-+
-+ /* Ub will be put in slab */
-+ ub = slab_ub(f);
-+ if (ub == NULL)
-+ return;
-+
-+ uncharge_beancounter(ub, UB_NUMFILE, 1);
-+}
-+
-+int ub_flock_charge(struct file_lock *fl, int hard)
-+{
-+ struct user_beancounter *ub;
-+ int err;
-+
-+ /* No need to get_beancounter here since it's already got in slab */
-+ ub = slab_ub(fl);
-+ if (ub == NULL)
-+ return 0;
-+
-+ err = charge_beancounter(ub, UB_NUMFLOCK, 1, hard ? UB_HARD : UB_SOFT);
-+ if (!err)
-+ fl->fl_charged = 1;
-+ return err;
-+}
-+
-+void ub_flock_uncharge(struct file_lock *fl)
-+{
-+ struct user_beancounter *ub;
-+
-+ /* Ub will be put in slab */
-+ ub = slab_ub(fl);
-+ if (ub == NULL || !fl->fl_charged)
-+ return;
-+
-+ uncharge_beancounter(ub, UB_NUMFLOCK, 1);
-+ fl->fl_charged = 0;
-+}
-+
-+/*
-+ * Signal handling
-+ */
-+
-+static int do_ub_siginfo_charge(struct user_beancounter *ub,
-+ unsigned long size)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (__charge_beancounter_locked(ub, UB_KMEMSIZE, size, UB_HARD))
-+ goto out_kmem;
-+
-+ if (__charge_beancounter_locked(ub, UB_NUMSIGINFO, 1, UB_HARD))
-+ goto out_num;
-+
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return 0;
-+
-+out_num:
-+ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
-+out_kmem:
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return -ENOMEM;
-+}
-+
-+static void do_ub_siginfo_uncharge(struct user_beancounter *ub,
-+ unsigned long size)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __uncharge_beancounter_locked(ub, UB_KMEMSIZE, size);
-+ __uncharge_beancounter_locked(ub, UB_NUMSIGINFO, 1);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+int ub_siginfo_charge(struct sigqueue *sq, struct user_beancounter *ub)
-+{
-+ unsigned long size;
-+ struct user_beancounter *p, *q;
-+
-+ size = CHARGE_SIZE(kmem_obj_memusage(sq));
-+ for (p = ub; p != NULL; p = p->parent) {
-+ if (do_ub_siginfo_charge(p, size))
-+ goto unroll;
-+ }
-+
-+ sq->sig_ub = get_beancounter(ub);
-+ return 0;
-+
-+unroll:
-+ for (q = ub; q != p; q = q->parent)
-+ do_ub_siginfo_uncharge(q, size);
-+ return -ENOMEM;
-+}
-+EXPORT_SYMBOL(ub_siginfo_charge);
-+
-+void ub_siginfo_uncharge(struct sigqueue *sq)
-+{
-+ unsigned long size;
-+ struct user_beancounter *ub, *p;
-+
-+ p = ub = sq->sig_ub;
-+ sq->sig_ub = NULL;
-+ size = CHARGE_SIZE(kmem_obj_memusage(sq));
-+ for (; ub != NULL; ub = ub->parent)
-+ do_ub_siginfo_uncharge(ub, size);
-+ put_beancounter(p);
-+}
-+
-+/*
-+ * PTYs
-+ */
-+
-+int ub_pty_charge(struct tty_struct *tty)
-+{
-+ struct user_beancounter *ub;
-+ int retval;
-+
-+ ub = slab_ub(tty);
-+ retval = 0;
-+ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
-+ !test_bit(TTY_CHARGED, &tty->flags)) {
-+ retval = charge_beancounter(ub, UB_NUMPTY, 1, UB_HARD);
-+ if (!retval)
-+ set_bit(TTY_CHARGED, &tty->flags);
-+ }
-+ return retval;
-+}
-+
-+void ub_pty_uncharge(struct tty_struct *tty)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = slab_ub(tty);
-+ if (ub && tty->driver->subtype == PTY_TYPE_MASTER &&
-+ test_bit(TTY_CHARGED, &tty->flags)) {
-+ uncharge_beancounter(ub, UB_NUMPTY, 1);
-+ clear_bit(TTY_CHARGED, &tty->flags);
-+ }
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_net.c linux-2.6.16-026test009/kernel/ub/ub_net.c
---- linux-2.6.16.orig/kernel/ub/ub_net.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_net.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,1043 @@
-+/*
-+ * linux/kernel/ub/ub_net.c
-+ *
-+ * Copyright (C) 1998-2004 Andrey V. Savochkin <saw@saw.sw.com.sg>
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * TODO:
-+ * - sizeof(struct inode) charge
-+ * = tcp_mem_schedule() feedback based on ub limits
-+ * + measures so that one socket won't exhaust all send buffers,
-+ * see bug in bugzilla
-+ * = sk->socket check for NULL in snd_wakeups
-+ * (tcp_write_space checks for NULL itself)
-+ * + in tcp_close(), orphaned socket abortion should be based on ubc
-+ * resources (same in tcp_out_of_resources)
-+ * Beancounter should also have separate orphaned socket counter...
-+ * + for rcv, in-order segment should be accepted
-+ * if only barrier is exceeded
-+ * = tcp_rmem_schedule() feedback based on ub limits
-+ * - repair forward_alloc mechanism for receive buffers
-+ * It's idea is that some buffer space is pre-charged so that receive fast
-+ * path doesn't need to take spinlocks and do other heavy stuff
-+ * + tcp_prune_queue actions based on ub limits
-+ * + window adjustments depending on available buffers for receive
-+ * - window adjustments depending on available buffers for send
-+ * + race around usewreserv
-+ * + avoid allocating new page for each tiny-gram, see letter from ANK
-+ * + rename ub_sock_lock
-+ * + sk->sleep wait queue probably can be used for all wakeups, and
-+ * sk->ub_wait is unnecessary
-+ * + for UNIX sockets, the current algorithm will lead to
-+ * UB_UNIX_MINBUF-sized messages only for non-blocking case
-+ * - charge for af_packet sockets
-+ * + all datagram sockets should be charged to NUMUNIXSOCK
-+ * - we do not charge for skb copies and clones staying in device queues
-+ * + live-lock if number of sockets is big and buffer limits are small
-+ * [diff-ubc-dbllim3]
-+ * - check that multiple readers/writers on the same socket won't cause fatal
-+ * consequences
-+ * - check allocation/charge orders
-+ * + There is potential problem with callback_lock. In *snd_wakeup we take
-+ * beancounter first, in sock_def_error_report - callback_lock first.
-+ * then beancounter. This is not a problem if callback_lock taken
-+ * readonly, but anyway...
-+ * - SKB_CHARGE_SIZE doesn't include the space wasted by slab allocator
-+ * General kernel problems:
-+ * - in tcp_sendmsg(), if allocation fails, non-blocking sockets with ASYNC
-+ * notification won't get signals
-+ * - datagram_poll looks racy
-+ *
-+ */
-+
-+#include <linux/net.h>
-+#include <linux/slab.h>
-+#include <linux/kmem_cache.h>
-+#include <linux/gfp.h>
-+#include <linux/err.h>
-+#include <linux/socket.h>
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+
-+#include <net/sock.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_net.h>
-+#include <ub/ub_debug.h>
-+
-+
-+/* Skb truesize definition. Bad place. Den */
-+
-+static inline int skb_chargesize_head(struct sk_buff *skb)
-+{
-+ return skb_charge_size(skb->end - skb->head +
-+ sizeof(struct skb_shared_info));
-+}
-+
-+int skb_charge_fullsize(struct sk_buff *skb)
-+{
-+ int chargesize;
-+ struct sk_buff *skbfrag;
-+
-+ chargesize = skb_chargesize_head(skb) +
-+ PAGE_SIZE * skb_shinfo(skb)->nr_frags;
-+ if (likely(skb_shinfo(skb)->frag_list == NULL))
-+ return chargesize;
-+ for (skbfrag = skb_shinfo(skb)->frag_list;
-+ skbfrag != NULL;
-+ skbfrag = skbfrag->next) {
-+ chargesize += skb_charge_fullsize(skbfrag);
-+ }
-+ return chargesize;
-+}
-+EXPORT_SYMBOL(skb_charge_fullsize);
-+
-+static int ub_sock_makewreserv_locked(struct sock *sk,
-+ int bufid, int sockid, unsigned long size);
-+
-+int __ub_too_many_orphans(struct sock *sk, int count)
-+{
-+ struct user_beancounter *ub;
-+
-+ if (sock_has_ubc(sk)) {
-+ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
-+ if (count >= ub->ub_parms[UB_NUMTCPSOCK].barrier >> 2)
-+ return 1;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Queueing
-+ */
-+
-+static void ub_sock_snd_wakeup(struct user_beancounter *ub)
-+{
-+ struct list_head *p;
-+ struct sock_beancounter *skbc;
-+ struct sock *sk;
-+ struct user_beancounter *cub;
-+ unsigned long added;
-+
-+ while (!list_empty(&ub->ub_other_sk_list)) {
-+ p = ub->ub_other_sk_list.next;
-+ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
-+ sk = skbc_sock(skbc);
-+ ub_debug(UBD_NET_SLEEP, "Found sock to wake up\n");
-+ added = -skbc->poll_reserv;
-+ if (ub_sock_makewreserv_locked(sk, UB_OTHERSOCKBUF,
-+ UB_NUMOTHERSOCK, skbc->ub_waitspc))
-+ break;
-+ added += skbc->poll_reserv;
-+
-+ /*
-+ * See comments in ub_tcp_snd_wakeup.
-+ * Locking note: both unix_write_space and
-+ * sock_def_write_space take callback_lock themselves.
-+ * We take it here just to be on the safe side and to
-+ * act the same way as ub_tcp_snd_wakeup does.
-+ */
-+ sk->sk_write_space(sk);
-+
-+ list_del_init(&skbc->ub_sock_list);
-+
-+ if (skbc->ub != ub && added) {
-+ cub = get_beancounter(skbc->ub);
-+ spin_unlock(&ub->ub_lock);
-+ charge_beancounter_notop(cub, UB_OTHERSOCKBUF, added);
-+ put_beancounter(cub);
-+ spin_lock(&ub->ub_lock);
-+ }
-+ }
-+}
-+
-+static void ub_tcp_snd_wakeup(struct user_beancounter *ub)
-+{
-+ struct list_head *p;
-+ struct sock *sk;
-+ struct sock_beancounter *skbc;
-+ struct socket *sock;
-+ struct user_beancounter *cub;
-+ unsigned long added;
-+
-+ while (!list_empty(&ub->ub_tcp_sk_list)) {
-+ p = ub->ub_tcp_sk_list.next;
-+ skbc = list_entry(p, struct sock_beancounter, ub_sock_list);
-+ sk = skbc_sock(skbc);
-+
-+ added = 0;
-+ sock = sk->sk_socket;
-+ if (sock == NULL)
-+ /* sk being destroyed */
-+ goto cont;
-+
-+ ub_debug(UBD_NET_SLEEP,
-+ "Checking queue, waiting %lu, reserv %lu\n",
-+ skbc->ub_waitspc, skbc->poll_reserv);
-+ added = -skbc->poll_reserv;
-+ if (ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF,
-+ UB_NUMTCPSOCK, skbc->ub_waitspc))
-+ break;
-+ added += skbc->poll_reserv;
-+
-+ /*
-+ * Send async notifications and wake up.
-+ * Locking note: we get callback_lock here because
-+ * tcp_write_space is over-optimistic about calling context
-+ * (socket lock is presumed). So we get the lock here although
-+ * it belongs to the callback.
-+ */
-+ sk->sk_write_space(sk);
-+
-+cont:
-+ list_del_init(&skbc->ub_sock_list);
-+
-+ if (skbc->ub != ub && added) {
-+ cub = get_beancounter(skbc->ub);
-+ spin_unlock(&ub->ub_lock);
-+ charge_beancounter_notop(cub, UB_TCPSNDBUF, added);
-+ put_beancounter(cub);
-+ spin_lock(&ub->ub_lock);
-+ }
-+ }
-+}
-+
-+void ub_sock_snd_queue_add(struct sock *sk, int res, unsigned long size)
-+{
-+ unsigned long flags;
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long added_reserv;
-+
-+ if (!sock_has_ubc(sk))
-+ return;
-+
-+ skbc = sock_bc(sk);
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub_debug(UBD_NET_SLEEP, "attempt to charge for %lu\n", size);
-+ added_reserv = -skbc->poll_reserv;
-+ if (!ub_sock_makewreserv_locked(sk, res, bid2sid(res), size)) {
-+ /*
-+ * It looks a bit hackish, but it is compatible with both
-+ * wait_for_xx_ubspace and poll.
-+ * This __set_current_state is equivalent to a wakeup event
-+ * right after spin_unlock_irqrestore.
-+ */
-+ __set_current_state(TASK_RUNNING);
-+ added_reserv += skbc->poll_reserv;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ if (added_reserv)
-+ charge_beancounter_notop(skbc->ub, res, added_reserv);
-+ return;
-+ }
-+
-+ ub_debug(UBD_NET_SLEEP, "Adding sk to queue\n");
-+ skbc->ub_waitspc = size;
-+ if (!list_empty(&skbc->ub_sock_list)) {
-+ ub_debug(UBD_NET_SOCKET,
-+ "re-adding socket to beancounter %p.\n", ub);
-+ goto out;
-+ }
-+
-+ switch (res) {
-+ case UB_TCPSNDBUF:
-+ list_add_tail(&skbc->ub_sock_list,
-+ &ub->ub_tcp_sk_list);
-+ break;
-+ case UB_OTHERSOCKBUF:
-+ list_add_tail(&skbc->ub_sock_list,
-+ &ub->ub_other_sk_list);
-+ break;
-+ default:
-+ BUG();
-+ }
-+out:
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+
-+/*
-+ * Helpers
-+ */
-+
-+void ub_skb_set_charge(struct sk_buff *skb, struct sock *sk,
-+ unsigned long size, int resource)
-+{
-+ if (!sock_has_ubc(sk))
-+ return;
-+
-+ if (sock_bc(sk)->ub == NULL)
-+ BUG();
-+ skb_bc(skb)->ub = sock_bc(sk)->ub;
-+ skb_bc(skb)->charged = size;
-+ skb_bc(skb)->resource = resource;
-+
-+ /* Ugly. Ugly. Skb in sk writequeue can live without ref to sk */
-+ if (skb->sk == NULL)
-+ skb->sk = sk;
-+}
-+
-+static inline void ub_skb_set_uncharge(struct sk_buff *skb)
-+{
-+ skb_bc(skb)->ub = NULL;
-+ skb_bc(skb)->charged = 0;
-+ skb_bc(skb)->resource = 0;
-+}
-+
-+static inline void __uncharge_sockbuf(struct sock_beancounter *skbc,
-+ struct user_beancounter *ub, int resource, unsigned long size)
-+{
-+ if (ub != NULL)
-+ __uncharge_beancounter_locked(ub, resource, size);
-+
-+ if (skbc != NULL) {
-+ if (skbc->ub_wcharged > size)
-+ skbc->ub_wcharged -= size;
-+ else
-+ skbc->ub_wcharged = 0;
-+ }
-+}
-+
-+static void ub_update_rmem_thres(struct sock_beancounter *skub)
-+{
-+ struct user_beancounter *ub;
-+
-+ if (skub && skub->ub) {
-+ for (ub = skub->ub; ub->parent != NULL; ub = ub->parent);
-+ ub->ub_rmem_thres = ub->ub_parms[UB_TCPRCVBUF].barrier /
-+ (ub->ub_parms[UB_NUMTCPSOCK].held + 1);
-+ }
-+}
-+inline int ub_skb_alloc_bc(struct sk_buff *skb, int gfp_mask)
-+{
-+ memset(skb_bc(skb), 0, sizeof(struct skb_beancounter));
-+ return 0;
-+}
-+
-+inline void ub_skb_free_bc(struct sk_buff *skb)
-+{
-+}
-+
-+
-+/*
-+ * Charge socket number
-+ */
-+
-+static inline int sk_alloc_beancounter(struct sock *sk)
-+{
-+ struct sock_beancounter *skbc;
-+
-+ skbc = sock_bc(sk);
-+ memset(skbc, 0, sizeof(struct sock_beancounter));
-+ return 0;
-+}
-+
-+static inline void sk_free_beancounter(struct sock *sk)
-+{
-+}
-+
-+static int __sock_charge(struct sock *sk, int res)
-+{
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+
-+ ub = get_exec_ub();
-+ if (ub == NULL)
-+ return 0;
-+ if (sk_alloc_beancounter(sk) < 0)
-+ return -ENOMEM;
-+
-+ skbc = sock_bc(sk);
-+ INIT_LIST_HEAD(&skbc->ub_sock_list);
-+
-+ if (charge_beancounter(ub, res, 1, UB_HARD) < 0)
-+ goto out_limit;
-+
-+ /* TCP listen sock or process keeps referrence to UB */
-+ skbc->ub = get_beancounter(ub);
-+ return 0;
-+
-+out_limit:
-+ sk_free_beancounter(sk);
-+ return -ENOMEM;
-+}
-+
-+int ub_tcp_sock_charge(struct sock *sk)
-+{
-+ int ret;
-+
-+ ret = __sock_charge(sk, UB_NUMTCPSOCK);
-+ ub_update_rmem_thres(sock_bc(sk));
-+
-+ return ret;
-+}
-+
-+int ub_other_sock_charge(struct sock *sk)
-+{
-+ return __sock_charge(sk, UB_NUMOTHERSOCK);
-+}
-+
-+EXPORT_SYMBOL(ub_other_sock_charge);
-+
-+int ub_sock_charge(struct sock *sk, int family, int type)
-+{
-+ return (IS_TCP_SOCK(family, type) ?
-+ ub_tcp_sock_charge(sk) : ub_other_sock_charge(sk));
-+}
-+
-+/*
-+ * Uncharge socket number
-+ */
-+
-+void ub_sock_uncharge(struct sock *sk)
-+{
-+ int is_tcp_sock;
-+ unsigned long flags;
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long reserv;
-+
-+ if (!sock_has_ubc(sk))
-+ return;
-+
-+ is_tcp_sock = IS_TCP_SOCK(sk->sk_family, sk->sk_type);
-+ skbc = sock_bc(sk);
-+ ub_debug(UBD_NET_SOCKET, "Calling ub_sock_uncharge on %p\n", sk);
-+
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (!list_empty(&skbc->ub_sock_list)) {
-+ ub_debug(UBD_NET_SOCKET,
-+ "ub_sock_uncharge: removing from ub(%p) queue.\n",
-+ skbc);
-+ list_del_init(&skbc->ub_sock_list);
-+ }
-+
-+ reserv = skbc->poll_reserv;
-+ __uncharge_beancounter_locked(ub,
-+ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
-+ reserv);
-+ __uncharge_beancounter_locked(ub,
-+ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
-+
-+ /* The check sk->sk_family != PF_NETLINK is made as the skb is
-+ * queued to the kernel end of socket while changed to the user one.
-+ * Den */
-+ if (skbc->ub_wcharged > reserv &&
-+ sk->sk_family != PF_NETLINK) {
-+ skbc->ub_wcharged -= reserv;
-+ printk(KERN_WARNING
-+ "ub_sock_uncharge: wch=%lu for ub %p (%d).\n",
-+ skbc->ub_wcharged, skbc->ub, skbc->ub->ub_uid);
-+ } else
-+ skbc->ub_wcharged = 0;
-+ skbc->poll_reserv = 0;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ uncharge_beancounter_notop(skbc->ub,
-+ (is_tcp_sock ? UB_TCPSNDBUF : UB_OTHERSOCKBUF),
-+ reserv);
-+ uncharge_beancounter_notop(skbc->ub,
-+ (is_tcp_sock ? UB_NUMTCPSOCK : UB_NUMOTHERSOCK), 1);
-+
-+ put_beancounter(skbc->ub);
-+ sk_free_beancounter(sk);
-+}
-+
-+/*
-+ * Send - receive buffers
-+ */
-+
-+/* Special case for netlink_dump - (un)charges precalculated size */
-+int ub_nlrcvbuf_charge(struct sk_buff *skb, struct sock *sk)
-+{
-+ int ret;
-+ unsigned long chargesize;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ chargesize = skb_charge_fullsize(skb);
-+ ret = charge_beancounter(sock_bc(sk)->ub,
-+ UB_DGRAMRCVBUF, chargesize, UB_HARD);
-+ if (ret < 0)
-+ return ret;
-+ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
-+ return ret;
-+}
-+
-+/*
-+ * Poll reserv accounting
-+ */
-+static int ub_sock_makewreserv_locked(struct sock *sk,
-+ int bufid, int sockid, unsigned long size)
-+{
-+ unsigned long wcharge_added;
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+
-+ if (!sock_has_ubc(sk))
-+ goto out;
-+
-+ skbc = sock_bc(sk);
-+ if (skbc->poll_reserv >= size) /* no work to be done */
-+ goto out;
-+
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ ub->ub_parms[bufid].held += size - skbc->poll_reserv;
-+
-+ wcharge_added = 0;
-+ /*
-+ * Logic:
-+ * 1) when used memory hits barrier, we set wmem_pressure;
-+ * wmem_pressure is reset under barrier/2;
-+ * between barrier/2 and barrier we limit per-socket buffer growth;
-+ * 2) each socket is guaranteed to get (limit-barrier)/maxsockets
-+ * calculated on the base of memory eaten after the barrier is hit
-+ */
-+ skbc = sock_bc(sk);
-+ if (!ub_hfbarrier_hit(ub, bufid)) {
-+ if (ub->ub_wmem_pressure)
-+ ub_debug(UBD_NET_SEND, "makewres: pressure -> 0 "
-+ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
-+ sk, size, skbc->poll_reserv,
-+ ub->ub_parms[bufid].held,
-+ skbc->ub_wcharged, sk->sk_sndbuf);
-+ ub->ub_wmem_pressure = 0;
-+ }
-+ if (ub_barrier_hit(ub, bufid)) {
-+ if (!ub->ub_wmem_pressure)
-+ ub_debug(UBD_NET_SEND, "makewres: pressure -> 1 "
-+ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
-+ sk, size, skbc->poll_reserv,
-+ ub->ub_parms[bufid].held,
-+ skbc->ub_wcharged, sk->sk_sndbuf);
-+ ub->ub_wmem_pressure = 1;
-+ wcharge_added = size - skbc->poll_reserv;
-+ skbc->ub_wcharged += wcharge_added;
-+ if (skbc->ub_wcharged * ub->ub_parms[sockid].limit +
-+ ub->ub_parms[bufid].barrier >
-+ ub->ub_parms[bufid].limit)
-+ goto unroll;
-+ }
-+ if (ub->ub_parms[bufid].held > ub->ub_parms[bufid].limit)
-+ goto unroll;
-+
-+ ub_adjust_maxheld(ub, bufid);
-+ skbc->poll_reserv = size;
-+out:
-+ return 0;
-+
-+unroll:
-+ ub_debug(UBD_NET_SEND,
-+ "makewres: deny "
-+ "sk %p sz %lu pr %lu hd %lu wc %lu sb %d.\n",
-+ sk, size, skbc->poll_reserv, ub->ub_parms[bufid].held,
-+ skbc->ub_wcharged, sk->sk_sndbuf);
-+ skbc->ub_wcharged -= wcharge_added;
-+ ub->ub_parms[bufid].failcnt++;
-+ ub->ub_parms[bufid].held -= size - skbc->poll_reserv;
-+ return -ENOMEM;
-+}
-+
-+int ub_sock_make_wreserv(struct sock *sk, int bufid, unsigned long size)
-+{
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long flags;
-+ unsigned long added_reserv;
-+ int err;
-+
-+ skbc = sock_bc(sk);
-+
-+ /*
-+ * This function provides that there is sufficient reserve upon return
-+ * only if sk has only one user. We can check poll_reserv without
-+ * serialization and avoid locking if the reserve already exists.
-+ */
-+ if (!sock_has_ubc(sk) || skbc->poll_reserv >= size)
-+ return 0;
-+
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ added_reserv = -skbc->poll_reserv;
-+ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
-+ added_reserv += skbc->poll_reserv;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ if (added_reserv)
-+ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
-+
-+ return err;
-+}
-+
-+int ub_sock_get_wreserv(struct sock *sk, int bufid, unsigned long size)
-+{
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long flags;
-+ unsigned long added_reserv;
-+ int err;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ skbc = sock_bc(sk);
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ added_reserv = -skbc->poll_reserv;
-+ err = ub_sock_makewreserv_locked(sk, bufid, bid2sid(bufid), size);
-+ added_reserv += skbc->poll_reserv;
-+ if (!err)
-+ skbc->poll_reserv -= size;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ if (added_reserv)
-+ charge_beancounter_notop(skbc->ub, bufid, added_reserv);
-+
-+ return err;
-+}
-+
-+void ub_sock_ret_wreserv(struct sock *sk, int bufid,
-+ unsigned long size, unsigned long ressize)
-+{
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long extra;
-+ unsigned long flags;
-+
-+ if (!sock_has_ubc(sk))
-+ return;
-+
-+ extra = 0;
-+ skbc = sock_bc(sk);
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ skbc->poll_reserv += size;
-+ if (skbc->poll_reserv > ressize) {
-+ extra = skbc->poll_reserv - ressize;
-+ __uncharge_beancounter_locked(ub, bufid, extra);
-+
-+ if (skbc->ub_wcharged > skbc->poll_reserv - ressize)
-+ skbc->ub_wcharged -= skbc->poll_reserv - ressize;
-+ else
-+ skbc->ub_wcharged = 0;
-+ skbc->poll_reserv = ressize;
-+ }
-+
-+ ub_tcp_snd_wakeup(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ if (extra)
-+ uncharge_beancounter_notop(skbc->ub, bufid, extra);
-+}
-+
-+long ub_sock_wait_for_space(struct sock *sk, long timeo, unsigned long size)
-+{
-+ DECLARE_WAITQUEUE(wait, current);
-+
-+ add_wait_queue(sk->sk_sleep, &wait);
-+ for (;;) {
-+ if (signal_pending(current))
-+ break;
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ if (!ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size))
-+ break;
-+
-+ if (sk->sk_shutdown & SEND_SHUTDOWN)
-+ break;
-+ if (sk->sk_err)
-+ break;
-+ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, size);
-+ timeo = schedule_timeout(timeo);
-+ }
-+ __set_current_state(TASK_RUNNING);
-+ remove_wait_queue(sk->sk_sleep, &wait);
-+ return timeo;
-+}
-+
-+int ub_sock_makewres_other(struct sock *sk, unsigned long size)
-+{
-+ return ub_sock_make_wreserv(sk, UB_OTHERSOCKBUF, size);
-+}
-+
-+int ub_sock_makewres_tcp(struct sock *sk, unsigned long size)
-+{
-+ return ub_sock_make_wreserv(sk, UB_TCPSNDBUF, size);
-+}
-+
-+int ub_sock_getwres_other(struct sock *sk, unsigned long size)
-+{
-+ return ub_sock_get_wreserv(sk, UB_OTHERSOCKBUF, size);
-+}
-+
-+int ub_sock_getwres_tcp(struct sock *sk, unsigned long size)
-+{
-+ return ub_sock_get_wreserv(sk, UB_TCPSNDBUF, size);
-+}
-+
-+void ub_sock_retwres_other(struct sock *sk, unsigned long size,
-+ unsigned long ressize)
-+{
-+ ub_sock_ret_wreserv(sk, UB_OTHERSOCKBUF, size, ressize);
-+}
-+
-+void ub_sock_retwres_tcp(struct sock *sk, unsigned long size,
-+ unsigned long ressize)
-+{
-+ ub_sock_ret_wreserv(sk, UB_TCPSNDBUF, size, ressize);
-+}
-+
-+void ub_sock_sndqueueadd_other(struct sock *sk, unsigned long sz)
-+{
-+ ub_sock_snd_queue_add(sk, UB_OTHERSOCKBUF, sz);
-+}
-+
-+void ub_sock_sndqueueadd_tcp(struct sock *sk, unsigned long sz)
-+{
-+ ub_sock_snd_queue_add(sk, UB_TCPSNDBUF, sz);
-+}
-+
-+void ub_sock_sndqueuedel(struct sock *sk)
-+{
-+ struct sock_beancounter *skbc;
-+ unsigned long flags;
-+
-+ if (!sock_has_ubc(sk))
-+ return;
-+ skbc = sock_bc(sk);
-+
-+ /* race with write_space callback of other socket */
-+ spin_lock_irqsave(&skbc->ub->ub_lock, flags);
-+ list_del_init(&skbc->ub_sock_list);
-+ spin_unlock_irqrestore(&skbc->ub->ub_lock, flags);
-+}
-+
-+/*
-+ * UB_DGRAMRCVBUF
-+ */
-+
-+int ub_sockrcvbuf_charge(struct sock *sk, struct sk_buff *skb)
-+{
-+ unsigned long chargesize;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ chargesize = skb_charge_fullsize(skb);
-+ if (charge_beancounter(sock_bc(sk)->ub, UB_DGRAMRCVBUF,
-+ chargesize, UB_HARD))
-+ return -ENOMEM;
-+
-+ ub_skb_set_charge(skb, sk, chargesize, UB_DGRAMRCVBUF);
-+ return 0;
-+}
-+
-+EXPORT_SYMBOL(ub_sockrcvbuf_charge);
-+
-+static void ub_sockrcvbuf_uncharge(struct sk_buff *skb)
-+{
-+ uncharge_beancounter(skb_bc(skb)->ub, UB_DGRAMRCVBUF,
-+ skb_bc(skb)->charged);
-+ ub_skb_set_uncharge(skb);
-+}
-+
-+/*
-+ * UB_TCPRCVBUF
-+ */
-+static int charge_tcprcvbuf(struct sock *sk, struct sk_buff *skb,
-+ enum severity strict)
-+{
-+ int retval;
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+ unsigned long chargesize;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ /*
-+ * Memory pressure reactions:
-+ * 1) set UB_RMEM_KEEP (clearing UB_RMEM_EXPAND)
-+ * 2) set UB_RMEM_SHRINK and tcp_clamp_window()
-+ * tcp_collapse_queues() if rmem_alloc > rcvbuf
-+ * 3) drop OFO, tcp_purge_ofo()
-+ * 4) drop all.
-+ * Currently, we do #2 and #3 at once (which means that current
-+ * collapsing of OFO queue in tcp_collapse_queues() is a waste of time,
-+ * for example...)
-+ * On memory pressure we jump from #0 to #3, and when the pressure
-+ * subsides, to #1.
-+ */
-+ retval = 0;
-+ chargesize = skb_charge_fullsize(skb);
-+
-+ for (ub = sock_bc(sk)->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_parms[UB_TCPRCVBUF].held += chargesize;
-+ if (ub->ub_parms[UB_TCPRCVBUF].held >
-+ ub->ub_parms[UB_TCPRCVBUF].barrier &&
-+ strict != UB_FORCE)
-+ goto excess;
-+ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+out:
-+ if (retval == 0) {
-+ charge_beancounter_notop(sock_bc(sk)->ub, UB_TCPRCVBUF,
-+ chargesize);
-+ ub_skb_set_charge(skb, sk, chargesize, UB_TCPRCVBUF);
-+ }
-+ return retval;
-+
-+excess:
-+ ub->ub_rmem_pressure = UB_RMEM_SHRINK;
-+ if (strict == UB_HARD)
-+ retval = -ENOMEM;
-+ if (ub->ub_parms[UB_TCPRCVBUF].held > ub->ub_parms[UB_TCPRCVBUF].limit)
-+ retval = -ENOMEM;
-+ /*
-+ * We try to leave numsock*maxadvmss as a reserve for sockets not
-+ * queueing any data yet (if the difference between the barrier and the
-+ * limit is enough for this reserve).
-+ */
-+ if (ub->ub_parms[UB_TCPRCVBUF].held +
-+ ub->ub_parms[UB_NUMTCPSOCK].limit * ub->ub_maxadvmss
-+ > ub->ub_parms[UB_TCPRCVBUF].limit &&
-+ atomic_read(&sk->sk_rmem_alloc))
-+ retval = -ENOMEM;
-+ if (retval) {
-+ ub->ub_parms[UB_TCPRCVBUF].held -= chargesize;
-+ ub->ub_parms[UB_TCPRCVBUF].failcnt++;
-+ }
-+ ub_adjust_maxheld(ub, UB_TCPRCVBUF);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ goto out;
-+}
-+
-+int ub_tcprcvbuf_charge(struct sock *sk, struct sk_buff *skb)
-+{
-+ return charge_tcprcvbuf(sk, skb, UB_HARD);
-+}
-+
-+int ub_tcprcvbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
-+{
-+ return charge_tcprcvbuf(sk, skb, UB_FORCE);
-+}
-+EXPORT_SYMBOL(ub_tcprcvbuf_charge_forced);
-+
-+static void ub_tcprcvbuf_uncharge(struct sk_buff *skb)
-+{
-+ unsigned long flags;
-+ unsigned long held, bar;
-+ int prev_pres;
-+ struct user_beancounter *ub;
-+
-+ for (ub = skb_bc(skb)->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (ub->ub_parms[UB_TCPRCVBUF].held < skb_bc(skb)->charged) {
-+ printk(KERN_ERR "Uncharging %d for tcprcvbuf of %p with %lu\n",
-+ skb_bc(skb)->charged,
-+ ub, ub->ub_parms[UB_TCPRCVBUF].held);
-+ /* ass-saving bung */
-+ skb_bc(skb)->charged = ub->ub_parms[UB_TCPRCVBUF].held;
-+ }
-+ ub->ub_parms[UB_TCPRCVBUF].held -= skb_bc(skb)->charged;
-+ held = ub->ub_parms[UB_TCPRCVBUF].held;
-+ bar = ub->ub_parms[UB_TCPRCVBUF].barrier;
-+ prev_pres = ub->ub_rmem_pressure;
-+ if (held <= bar - (bar >> 2))
-+ ub->ub_rmem_pressure = UB_RMEM_EXPAND;
-+ else if (held <= bar)
-+ ub->ub_rmem_pressure = UB_RMEM_KEEP;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ uncharge_beancounter_notop(skb_bc(skb)->ub, UB_TCPRCVBUF,
-+ skb_bc(skb)->charged);
-+ ub_skb_set_uncharge(skb);
-+}
-+
-+
-+/*
-+ * UB_OTHERSOCKBUF
-+ */
-+
-+static void ub_socksndbuf_uncharge(struct sk_buff *skb)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *ub, *cub;
-+ struct sock_beancounter *sk_bc;
-+
-+ /* resource was set. no check for ub required */
-+ cub = skb_bc(skb)->ub;
-+ for (ub = cub; ub->parent != NULL; ub = ub->parent);
-+ skb_bc(skb)->ub = NULL;
-+ if (skb->sk != NULL)
-+ sk_bc = sock_bc(skb->sk);
-+ else
-+ sk_bc = NULL;
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __uncharge_sockbuf(sk_bc, ub, UB_OTHERSOCKBUF,
-+ skb_bc(skb)->charged);
-+ ub_sock_snd_wakeup(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ uncharge_beancounter_notop(cub, UB_OTHERSOCKBUF, skb_bc(skb)->charged);
-+ ub_skb_set_uncharge(skb);
-+}
-+
-+static void ub_tcpsndbuf_uncharge(struct sk_buff *skb)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *ub, *cub;
-+
-+ /* resource can be not set, called manually */
-+ cub = skb_bc(skb)->ub;
-+ if (cub == NULL)
-+ return;
-+ for (ub = cub; ub->parent != NULL; ub = ub->parent);
-+ skb_bc(skb)->ub = NULL;
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __uncharge_sockbuf(sock_bc(skb->sk), ub, UB_TCPSNDBUF,
-+ skb_bc(skb)->charged);
-+ ub_tcp_snd_wakeup(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ uncharge_beancounter_notop(cub, UB_TCPSNDBUF, skb_bc(skb)->charged);
-+ ub_skb_set_uncharge(skb);
-+}
-+
-+void ub_skb_uncharge(struct sk_buff *skb)
-+{
-+ switch (skb_bc(skb)->resource) {
-+ case UB_TCPSNDBUF:
-+ ub_tcpsndbuf_uncharge(skb);
-+ break;
-+ case UB_TCPRCVBUF:
-+ ub_tcprcvbuf_uncharge(skb);
-+ break;
-+ case UB_DGRAMRCVBUF:
-+ ub_sockrcvbuf_uncharge(skb);
-+ break;
-+ case UB_OTHERSOCKBUF:
-+ ub_socksndbuf_uncharge(skb);
-+ break;
-+ }
-+}
-+
-+EXPORT_SYMBOL(ub_skb_uncharge); /* due to skb_orphan()/conntracks */
-+
-+/*
-+ * TCP send buffers accouting. Paged part
-+ */
-+int ub_sock_tcp_chargepage(struct sock *sk)
-+{
-+ struct sock_beancounter *skbc;
-+ struct user_beancounter *ub;
-+ unsigned long added;
-+ unsigned long flags;
-+ int err;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ skbc = sock_bc(sk);
-+
-+ for (ub = skbc->ub; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ /* Try to charge full page */
-+ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
-+ PAGE_SIZE);
-+ if (err == 0) {
-+ skbc->poll_reserv -= PAGE_SIZE;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, PAGE_SIZE);
-+ return 0;
-+ }
-+
-+ /* Try to charge page enough to satisfy sys_select. The possible
-+ overdraft for the rest of the page is generally better then
-+ requesting full page in tcp_poll. This should not happen
-+ frequently. Den */
-+ added = -skbc->poll_reserv;
-+ err = ub_sock_makewreserv_locked(sk, UB_TCPSNDBUF, UB_NUMTCPSOCK,
-+ SOCK_MIN_UBCSPACE);
-+ if (err < 0) {
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return err;
-+ }
-+ __charge_beancounter_locked(ub, UB_TCPSNDBUF,
-+ PAGE_SIZE - skbc->poll_reserv,
-+ UB_FORCE);
-+ added += PAGE_SIZE;
-+ skbc->poll_reserv = 0;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ charge_beancounter_notop(skbc->ub, UB_TCPSNDBUF, added);
-+
-+ return 0;
-+
-+}
-+
-+void ub_sock_tcp_detachpage(struct sock *sk)
-+{
-+ struct sk_buff *skb;
-+
-+ if (!sock_has_ubc(sk))
-+ return;
-+
-+ /* The page is just detached from socket. The last skb in queue
-+ with paged part holds referrence to it */
-+ skb = skb_peek_tail(&sk->sk_write_queue);
-+ if (skb == NULL) {
-+ /* If the queue is empty - all data is sent and page is about
-+ to be freed */
-+ uncharge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, PAGE_SIZE);
-+ return;
-+ }
-+ /* Last skb is a good aproximation for a last skb with paged part */
-+ skb_bc(skb)->charged += PAGE_SIZE;
-+}
-+
-+static int charge_tcpsndbuf(struct sock *sk, struct sk_buff *skb,
-+ enum severity strict)
-+{
-+ int ret;
-+ unsigned long chargesize;
-+
-+ if (!sock_has_ubc(sk))
-+ return 0;
-+
-+ chargesize = skb_charge_fullsize(skb);
-+ ret = charge_beancounter(sock_bc(sk)->ub, UB_TCPSNDBUF, chargesize,
-+ strict);
-+ if (ret < 0)
-+ return ret;
-+ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
-+ sock_bc(sk)->ub_wcharged += chargesize;
-+ return ret;
-+}
-+
-+int ub_tcpsndbuf_charge(struct sock *sk, struct sk_buff *skb)
-+{
-+ return charge_tcpsndbuf(sk, skb, UB_HARD);
-+}
-+
-+int ub_tcpsndbuf_charge_forced(struct sock *sk, struct sk_buff *skb)
-+{
-+ return charge_tcpsndbuf(sk, skb, UB_FORCE);
-+}
-+EXPORT_SYMBOL(ub_tcpsndbuf_charge_forced);
-+
-+/*
-+ * Initialization staff
-+ */
-+int __init skbc_cache_init(void)
-+{
-+ return 0;
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_page_bc.c linux-2.6.16-026test009/kernel/ub/ub_page_bc.c
---- linux-2.6.16.orig/kernel/ub/ub_page_bc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_page_bc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,440 @@
-+/*
-+ * kernel/ub/ub_page_bc.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/spinlock.h>
-+#include <linux/slab.h>
-+#include <linux/mm.h>
-+#include <linux/gfp.h>
-+#include <linux/vmalloc.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_hash.h>
-+#include <ub/ub_vmpages.h>
-+#include <ub/ub_page.h>
-+
-+static kmem_cache_t *pb_cachep;
-+static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
-+static struct page_beancounter **pb_hash_table;
-+static unsigned int pb_hash_mask;
-+
-+/*
-+ * Auxiliary staff
-+ */
-+
-+static inline struct page_beancounter *next_page_pb(struct page_beancounter *p)
-+{
-+ return list_entry(p->page_list.next, struct page_beancounter,
-+ page_list);
-+}
-+
-+static inline struct page_beancounter *prev_page_pb(struct page_beancounter *p)
-+{
-+ return list_entry(p->page_list.prev, struct page_beancounter,
-+ page_list);
-+}
-+
-+/*
-+ * Held pages manipulation
-+ */
-+static inline void set_held_pages(struct user_beancounter *bc)
-+{
-+ /* all three depend on ub_held_pages */
-+ __ub_update_physpages(bc);
-+ __ub_update_oomguarpages(bc);
-+ __ub_update_privvm(bc);
-+}
-+
-+static inline void do_dec_held_pages(struct user_beancounter *ub, int value)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_held_pages -= value;
-+ set_held_pages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+static void dec_held_pages(struct user_beancounter *ub, int value)
-+{
-+ for (; ub != NULL; ub = ub->parent)
-+ do_dec_held_pages(ub, value);
-+}
-+
-+static inline void do_inc_held_pages(struct user_beancounter *ub, int value)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_held_pages += value;
-+ set_held_pages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+static void inc_held_pages(struct user_beancounter *ub, int value)
-+{
-+ for (; ub != NULL; ub = ub->parent)
-+ do_inc_held_pages(ub, value);
-+}
-+
-+/*
-+ * Alloc - free
-+ */
-+
-+inline int pb_alloc(struct page_beancounter **pbc)
-+{
-+ *pbc = kmem_cache_alloc(pb_cachep, GFP_KERNEL);
-+ if (*pbc != NULL) {
-+ (*pbc)->next_hash = NULL;
-+ (*pbc)->pb_magic = PB_MAGIC;
-+ }
-+ return (*pbc == NULL);
-+}
-+
-+inline void pb_free(struct page_beancounter **pb)
-+{
-+ if (*pb != NULL) {
-+ kmem_cache_free(pb_cachep, *pb);
-+ *pb = NULL;
-+ }
-+}
-+
-+void pb_free_list(struct page_beancounter **p_pb)
-+{
-+ struct page_beancounter *list, *pb;
-+
-+ list = *p_pb;
-+ if (list == PBC_COPY_SAME)
-+ return;
-+
-+ while (list) {
-+ pb = list;
-+ list = list->next_hash;
-+ pb_free(&pb);
-+ }
-+ *p_pb = NULL;
-+}
-+
-+/*
-+ * head -> <new objs> -> <old objs> -> ...
-+ */
-+static int __alloc_list(struct page_beancounter **head, int num)
-+{
-+ struct page_beancounter *pb;
-+
-+ while (num > 0) {
-+ if (pb_alloc(&pb))
-+ return -1;
-+ pb->next_hash = *head;
-+ *head = pb;
-+ num--;
-+ }
-+
-+ return num;
-+}
-+
-+/*
-+ * Ensure that the list contains at least num elements.
-+ * p_pb points to an initialized list, may be of the zero length.
-+ *
-+ * mm->page_table_lock should be held
-+ */
-+int pb_alloc_list(struct page_beancounter **p_pb, int num)
-+{
-+ struct page_beancounter *list;
-+
-+ for (list = *p_pb; list != NULL && num; list = list->next_hash, num--);
-+ if (!num)
-+ return 0;
-+
-+ /*
-+ * *p_pb(after) *p_pb (before)
-+ * \ \
-+ * <new objs> -...-> <old objs> -> ...
-+ */
-+ if (__alloc_list(p_pb, num) < 0)
-+ goto nomem;
-+ return 0;
-+
-+nomem:
-+ pb_free_list(p_pb);
-+ return -ENOMEM;
-+}
-+
-+/*
-+ * Allocates a page_beancounter for each
-+ * user_beancounter in a hash
-+ */
-+int pb_alloc_all(struct page_beancounter **pbs)
-+{
-+ int i, need_alloc;
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ need_alloc = 0;
-+ for_each_beancounter(i, ub)
-+ need_alloc++;
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+
-+ if (!__alloc_list(pbs, need_alloc))
-+ return 0;
-+
-+ pb_free_list(pbs);
-+ return -ENOMEM;
-+}
-+
-+/*
-+ * Hash routines
-+ */
-+
-+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
-+{
-+ return (((unsigned long)ub << 16) + ((unsigned long)ub >> 16) +
-+ (page_to_pfn(page) >> 7)) & pb_hash_mask;
-+}
-+
-+/* pb_lock should be held */
-+static inline void insert_pb(struct page_beancounter *p, struct page *page,
-+ struct user_beancounter *ub, int hash)
-+{
-+ p->page = page;
-+ p->ub = get_beancounter(ub);
-+ p->next_hash = pb_hash_table[hash];
-+ pb_hash_table[hash] = p;
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ ub->ub_stat[smp_processor_id()].pbcs++;
-+#endif
-+}
-+
-+/*
-+ * Heart
-+ */
-+
-+static int __pb_dup_ref(struct page *page, struct user_beancounter *bc,
-+ int hash)
-+{
-+ struct page_beancounter *p;
-+
-+ for (p = pb_hash_table[hash];
-+ p != NULL && (p->page != page || p->ub != bc);
-+ p = p->next_hash);
-+ if (p == NULL)
-+ return -1;
-+
-+ PB_COUNT_INC(p->refcount);
-+ return 0;
-+}
-+
-+static void __pb_add_ref(struct page *page, struct user_beancounter *bc,
-+ struct page_beancounter **ppb, int hash)
-+{
-+ struct page_beancounter *head, *p;
-+ int shift;
-+
-+ p = *ppb;
-+ *ppb = p->next_hash;
-+
-+ insert_pb(p, page, bc, hash);
-+ head = page_pbc(page);
-+
-+ if (head != NULL) {
-+ /*
-+ * Move the first element to the end of the list.
-+ * List head (pb_head) is set to the next entry.
-+ * Note that this code works even if head is the only element
-+ * on the list (because it's cyclic).
-+ */
-+ BUG_ON(head->pb_magic != PB_MAGIC);
-+ page_pbc(page) = next_page_pb(head);
-+ PB_SHIFT_INC(head->refcount);
-+ shift = PB_SHIFT_GET(head->refcount);
-+ /*
-+ * Update user beancounter, the share of head has been changed.
-+ * Note that the shift counter is taken after increment.
-+ */
-+ dec_held_pages(head->ub, UB_PAGE_WEIGHT >> shift);
-+ /* add the new page beancounter to the end of the list */
-+ list_add_tail(&p->page_list, &page_pbc(page)->page_list);
-+ } else {
-+ page_pbc(page) = p;
-+ shift = 0;
-+ INIT_LIST_HEAD(&p->page_list);
-+ }
-+
-+ p->refcount = PB_REFCOUNT_MAKE(shift, 1);
-+ /* update user beancounter for the new page beancounter */
-+ inc_held_pages(bc, UB_PAGE_WEIGHT >> shift);
-+}
-+
-+void pb_add_ref(struct page *page, struct mm_struct *mm,
-+ struct page_beancounter **p_pb)
-+{
-+ int hash;
-+ struct user_beancounter *bc;
-+
-+ bc = mm->mm_ub;
-+ if (bc == NULL)
-+ return;
-+
-+ if (!PageAnon(page) && is_shmem_mapping(page->mapping))
-+ return;
-+
-+ hash = pb_hash(bc, page);
-+
-+ spin_lock(&pb_lock);
-+ if (__pb_dup_ref(page, bc, hash))
-+ __pb_add_ref(page, bc, p_pb, hash);
-+ spin_unlock(&pb_lock);
-+}
-+
-+void pb_dup_ref(struct page *page, struct mm_struct *mm,
-+ struct page_beancounter **p_pb)
-+{
-+ int hash;
-+ struct user_beancounter *bc;
-+
-+ bc = mm->mm_ub;
-+ if (bc == NULL)
-+ return;
-+
-+ if (!PageAnon(page) && is_shmem_mapping(page->mapping))
-+ return;
-+
-+ hash = pb_hash(bc, page);
-+
-+ spin_lock(&pb_lock);
-+ if (page_pbc(page) == NULL)
-+ /*
-+ * pages like ZERO_PAGE must not be accounted in pbc
-+ * so on fork we just skip them
-+ */
-+ goto out_unlock;
-+
-+ if (unlikely(*p_pb != PBC_COPY_SAME))
-+ __pb_add_ref(page, bc, p_pb, hash);
-+ else if (unlikely(__pb_dup_ref(page, bc, hash)))
-+ WARN_ON(1);
-+out_unlock:
-+ spin_unlock(&pb_lock);
-+}
-+
-+void pb_remove_ref(struct page *page, struct mm_struct *mm)
-+{
-+ int hash;
-+ struct user_beancounter *bc;
-+ struct page_beancounter *p, **q;
-+ int shift, shiftt;
-+
-+ bc = mm->mm_ub;
-+ if (bc == NULL)
-+ return;
-+
-+ if (!PageAnon(page) && is_shmem_mapping(page->mapping))
-+ return;
-+
-+ hash = pb_hash(bc, page);
-+
-+ spin_lock(&pb_lock);
-+ BUG_ON(page_pbc(page) != NULL && page_pbc(page)->pb_magic != PB_MAGIC);
-+ for (q = pb_hash_table + hash, p = *q;
-+ p != NULL && (p->page != page || p->ub != bc);
-+ q = &p->next_hash, p = *q);
-+ if (p == NULL)
-+ goto out_unlock;
-+
-+ PB_COUNT_DEC(p->refcount);
-+ if (PB_COUNT_GET(p->refcount))
-+ /*
-+ * More references from the same user beancounter exist.
-+ * Nothing needs to be done.
-+ */
-+ goto out_unlock;
-+
-+ /* remove from the hash list */
-+ *q = p->next_hash;
-+
-+ shift = PB_SHIFT_GET(p->refcount);
-+
-+ dec_held_pages(p->ub, UB_PAGE_WEIGHT >> shift);
-+
-+ if (page_pbc(page) == p) {
-+ if (list_empty(&p->page_list))
-+ goto out_free;
-+ page_pbc(page) = next_page_pb(p);
-+ }
-+ list_del(&p->page_list);
-+ put_beancounter(p->ub);
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ p->ub->ub_stat[smp_processor_id()].pbcs--;
-+#endif
-+ pb_free(&p);
-+
-+ /* Now balance the list. Move the tail and adjust its shift counter. */
-+ p = prev_page_pb(page_pbc(page));
-+ shiftt = PB_SHIFT_GET(p->refcount);
-+ page_pbc(page) = p;
-+ PB_SHIFT_DEC(p->refcount);
-+
-+ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
-+
-+ /*
-+ * If the shift counter of the moved beancounter is different from the
-+ * removed one's, repeat the procedure for one more tail beancounter
-+ */
-+ if (shiftt > shift) {
-+ p = prev_page_pb(page_pbc(page));
-+ page_pbc(page) = p;
-+ PB_SHIFT_DEC(p->refcount);
-+ inc_held_pages(p->ub, UB_PAGE_WEIGHT >> shiftt);
-+ }
-+ spin_unlock(&pb_lock);
-+ return;
-+
-+out_free:
-+ page_pbc(page) = NULL;
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ p->ub->ub_stat[smp_processor_id()].pbcs--;
-+#endif
-+ put_beancounter(p->ub);
-+ pb_free(&p);
-+out_unlock:
-+ spin_unlock(&pb_lock);
-+ return;
-+}
-+
-+struct user_beancounter *pb_grab_page_ub(struct page *page)
-+{
-+ struct page_beancounter *pb;
-+ struct user_beancounter *ub;
-+
-+ spin_lock(&pb_lock);
-+ pb = page_pbc(page);
-+ ub = (pb == NULL ? ERR_PTR(-EINVAL) :
-+ get_beancounter(pb->ub));
-+ spin_unlock(&pb_lock);
-+ return ub;
-+}
-+
-+void __init ub_init_pbc(void)
-+{
-+ unsigned long hash_size;
-+
-+ pb_cachep = kmem_cache_create("page_beancounter",
-+ sizeof(struct page_beancounter), 0,
-+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
-+ hash_size = num_physpages >> 2;
-+ for (pb_hash_mask = 1;
-+ (hash_size & pb_hash_mask) != hash_size;
-+ pb_hash_mask = (pb_hash_mask << 1) + 1);
-+ hash_size = pb_hash_mask + 1;
-+ printk(KERN_INFO "Page beancounter hash is %lu entries.\n", hash_size);
-+ pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
-+ memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_pages.c linux-2.6.16-026test009/kernel/ub/ub_pages.c
---- linux-2.6.16.orig/kernel/ub/ub_pages.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_pages.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,530 @@
-+/*
-+ * kernel/ub/ub_pages.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/mm.h>
-+#include <linux/highmem.h>
-+#include <linux/virtinfo.h>
-+#include <linux/module.h>
-+#include <linux/shmem_fs.h>
-+#include <linux/vmalloc.h>
-+
-+#include <asm/pgtable.h>
-+#include <asm/page.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_vmpages.h>
-+
-+void warn_bad_rss(struct vm_area_struct *vma, unsigned long freed)
-+{
-+ static struct ub_rate_info ri = {
-+ .burst = 10,
-+ .interval = 40 * HZ,
-+ };
-+ struct user_beancounter *ub;
-+ char ubuid[64] = "No UB";
-+ unsigned long vmrss;
-+
-+ if (!ub_ratelimit(&ri))
-+ return;
-+
-+ ub = vma->vm_mm->mm_ub;
-+ if (ub)
-+ print_ub_uid(ub, ubuid, sizeof(ubuid));
-+
-+ vmrss = get_vma_rss(vma) + freed;
-+ printk(KERN_WARNING
-+ "%s vm_rss: process pid %d comm %.20s flags %lx\n"
-+ "vma %p/%p rss %lu/%lu freed %lu\n"
-+ "flags %lx, ub %s\n",
-+ vmrss > freed ? "Positive" : "Negative",
-+ current->pid, current->comm, current->flags,
-+ vma, vma->vm_mm, vmrss, vma_pages(vma), freed,
-+ vma->vm_flags, ubuid);
-+ dump_stack();
-+}
-+
-+static inline unsigned long pages_in_pte_range(struct vm_area_struct *vma,
-+ pmd_t *pmd, unsigned long addr, unsigned long end,
-+ unsigned long *ret)
-+{
-+ pte_t *pte;
-+ spinlock_t *ptl;
-+
-+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+ do {
-+ if (!pte_none(*pte) && pte_present(*pte))
-+ (*ret)++;
-+ } while (pte++, addr += PAGE_SIZE, (addr != end));
-+ pte_unmap_unlock(pte - 1, ptl);
-+
-+ return addr;
-+}
-+
-+static inline unsigned long pages_in_pmd_range(struct vm_area_struct *vma,
-+ pud_t *pud, unsigned long addr, unsigned long end,
-+ unsigned long *ret)
-+{
-+ pmd_t *pmd;
-+ unsigned long next;
-+
-+ pmd = pmd_offset(pud, addr);
-+ do {
-+ next = pmd_addr_end(addr, end);
-+ if (pmd_none_or_clear_bad(pmd))
-+ continue;
-+ next = pages_in_pte_range(vma, pmd, addr, next, ret);
-+ } while (pmd++, addr = next, (addr != end));
-+
-+ return addr;
-+}
-+
-+static inline unsigned long pages_in_pud_range(struct vm_area_struct *vma,
-+ pgd_t *pgd, unsigned long addr, unsigned long end,
-+ unsigned long *ret)
-+{
-+ pud_t *pud;
-+ unsigned long next;
-+
-+ pud = pud_offset(pgd, addr);
-+ do {
-+ next = pud_addr_end(addr, end);
-+ if (pud_none_or_clear_bad(pud))
-+ continue;
-+ next = pages_in_pmd_range(vma, pud, addr, next, ret);
-+ } while (pud++, addr = next, (addr != end));
-+
-+ return addr;
-+}
-+
-+unsigned long pages_in_vma_range(struct vm_area_struct *vma,
-+ unsigned long addr, unsigned long end)
-+{
-+ pgd_t *pgd;
-+ unsigned long next;
-+ unsigned long ret;
-+
-+ ret = 0;
-+ BUG_ON(addr >= end);
-+ pgd = pgd_offset(vma->vm_mm, addr);
-+ do {
-+ next = pgd_addr_end(addr, end);
-+ if (pgd_none_or_clear_bad(pgd))
-+ continue;
-+ next = pages_in_pud_range(vma, pgd, addr, next, &ret);
-+ } while (pgd++, addr = next, (addr != end));
-+ return ret;
-+}
-+
-+void fastcall __ub_update_physpages(struct user_beancounter *ub)
-+{
-+ ub->ub_parms[UB_PHYSPAGES].held = ub->ub_tmpfs_respages
-+ + (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT);
-+ ub_adjust_maxheld(ub, UB_PHYSPAGES);
-+}
-+
-+void fastcall __ub_update_oomguarpages(struct user_beancounter *ub)
-+{
-+ ub->ub_parms[UB_OOMGUARPAGES].held =
-+ ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
-+ ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
-+}
-+
-+void fastcall __ub_update_privvm(struct user_beancounter *ub)
-+{
-+ ub->ub_parms[UB_PRIVVMPAGES].held =
-+ (ub->ub_held_pages >> UB_PAGE_WEIGHT_SHIFT)
-+ + ub->ub_unused_privvmpages
-+ + ub->ub_parms[UB_SHMPAGES].held;
-+ ub_adjust_maxheld(ub, UB_PRIVVMPAGES);
-+}
-+
-+static inline int __charge_privvm_locked(struct user_beancounter *ub,
-+ unsigned long s, enum severity strict)
-+{
-+ if (__charge_beancounter_locked(ub, UB_PRIVVMPAGES, s, strict) < 0)
-+ return -ENOMEM;
-+
-+ ub->ub_unused_privvmpages += s;
-+ return 0;
-+}
-+
-+static void __unused_privvm_dec_locked(struct user_beancounter *ub,
-+ long size)
-+{
-+ /* catch possible overflow */
-+ if (ub->ub_unused_privvmpages < size) {
-+ uncharge_warn(ub, UB_UNUSEDPRIVVM,
-+ size, ub->ub_unused_privvmpages);
-+ size = ub->ub_unused_privvmpages;
-+ }
-+ ub->ub_unused_privvmpages -= size;
-+ __ub_update_privvm(ub);
-+}
-+
-+void __ub_unused_privvm_dec(struct mm_struct *mm, long size)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return;
-+
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __unused_privvm_dec_locked(ub, size);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+void ub_unused_privvm_sub(struct mm_struct *mm,
-+ struct vm_area_struct *vma, unsigned long count)
-+{
-+ if (VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
-+ __ub_unused_privvm_dec(mm, count);
-+}
-+
-+void ub_unused_privvm_add(struct mm_struct *mm,
-+ struct vm_area_struct *vma, unsigned long size)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL || !VM_UB_PRIVATE(vma->vm_flags, vma->vm_file))
-+ return;
-+
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_unused_privvmpages += size;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+int ub_protected_charge(struct mm_struct *mm, unsigned long size,
-+ unsigned long newflags, struct vm_area_struct *vma)
-+{
-+ unsigned long flags;
-+ struct file *file;
-+ struct user_beancounter *ub;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return PRIVVM_NO_CHARGE;
-+
-+ flags = vma->vm_flags;
-+ if (!((newflags ^ flags) & VM_WRITE))
-+ return PRIVVM_NO_CHARGE;
-+
-+ file = vma->vm_file;
-+ if (!VM_UB_PRIVATE(newflags | VM_WRITE, file))
-+ return PRIVVM_NO_CHARGE;
-+
-+ if (flags & VM_WRITE)
-+ return PRIVVM_TO_SHARED;
-+
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (__charge_privvm_locked(ub, size, UB_SOFT) < 0)
-+ goto err;
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return PRIVVM_TO_PRIVATE;
-+
-+err:
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return PRIVVM_ERROR;
-+}
-+
-+int ub_memory_charge(struct mm_struct *mm, unsigned long size,
-+ unsigned vm_flags, struct file *vm_file, int sv)
-+{
-+ struct user_beancounter *ub, *ubl;
-+ unsigned long flags;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return 0;
-+
-+ size >>= PAGE_SHIFT;
-+ if (size > UB_MAXVALUE)
-+ return -EINVAL;
-+
-+ BUG_ON(sv != UB_SOFT && sv != UB_HARD);
-+
-+ if (vm_flags & VM_LOCKED) {
-+ if (charge_beancounter(ub, UB_LOCKEDPAGES, size, sv))
-+ goto out_err;
-+ }
-+ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
-+ for (ubl = ub; ubl->parent != NULL; ubl = ubl->parent);
-+ spin_lock_irqsave(&ubl->ub_lock, flags);
-+ if (__charge_privvm_locked(ubl, size, sv))
-+ goto out_private;
-+ spin_unlock_irqrestore(&ubl->ub_lock, flags);
-+ }
-+ return 0;
-+
-+out_private:
-+ spin_unlock_irqrestore(&ubl->ub_lock, flags);
-+ if (vm_flags & VM_LOCKED)
-+ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
-+out_err:
-+ return -ENOMEM;
-+}
-+
-+void ub_memory_uncharge(struct mm_struct *mm, unsigned long size,
-+ unsigned vm_flags, struct file *vm_file)
-+{
-+ struct user_beancounter *ub;
-+ unsigned long flags;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return;
-+
-+ size >>= PAGE_SHIFT;
-+
-+ if (vm_flags & VM_LOCKED)
-+ uncharge_beancounter(ub, UB_LOCKEDPAGES, size);
-+ if (VM_UB_PRIVATE(vm_flags, vm_file)) {
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __unused_privvm_dec_locked(ub, size);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ }
-+}
-+
-+int ub_locked_charge(struct mm_struct *mm, unsigned long size)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return 0;
-+
-+ return charge_beancounter(ub, UB_LOCKEDPAGES,
-+ size >> PAGE_SHIFT, UB_HARD);
-+}
-+
-+void ub_locked_uncharge(struct mm_struct *mm, unsigned long size)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = mm->mm_ub;
-+ if (ub == NULL)
-+ return;
-+
-+ uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
-+}
-+
-+int ub_lockedshm_charge(struct shmem_inode_info *shi, unsigned long size)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = shi->shmi_ub;
-+ if (ub == NULL)
-+ return 0;
-+
-+ return charge_beancounter(ub, UB_LOCKEDPAGES,
-+ size >> PAGE_SHIFT, UB_HARD);
-+}
-+
-+void ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = shi->shmi_ub;
-+ if (ub == NULL)
-+ return;
-+
-+ uncharge_beancounter(ub, UB_LOCKEDPAGES, size >> PAGE_SHIFT);
-+}
-+
-+
-+static inline void do_ub_tmpfs_respages_inc(struct user_beancounter *ub)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_tmpfs_respages++;
-+ __ub_update_physpages(ub);
-+ __ub_update_oomguarpages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+void ub_tmpfs_respages_inc(struct shmem_inode_info *shi)
-+{
-+ struct user_beancounter *ub;
-+
-+ for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
-+ do_ub_tmpfs_respages_inc(ub);
-+}
-+
-+static inline void do_ub_tmpfs_respages_sub(struct user_beancounter *ub,
-+ unsigned long size)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ /* catch possible overflow */
-+ if (ub->ub_tmpfs_respages < size) {
-+ uncharge_warn(ub, UB_TMPFSPAGES,
-+ size, ub->ub_tmpfs_respages);
-+ size = ub->ub_tmpfs_respages;
-+ }
-+ ub->ub_tmpfs_respages -= size;
-+ /* update values what is the most interesting */
-+ __ub_update_physpages(ub);
-+ __ub_update_oomguarpages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+void ub_tmpfs_respages_sub(struct shmem_inode_info *shi,
-+ unsigned long size)
-+{
-+ struct user_beancounter *ub;
-+
-+ for (ub = shi->shmi_ub; ub != NULL; ub = ub->parent)
-+ do_ub_tmpfs_respages_sub(ub, size);
-+}
-+
-+int ub_shmpages_charge(struct shmem_inode_info *shi, unsigned long size)
-+{
-+ int ret;
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+
-+ ub = shi->shmi_ub;
-+ if (ub == NULL)
-+ return 0;
-+
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ret = __charge_beancounter_locked(ub, UB_SHMPAGES, size, UB_HARD);
-+ if (ret == 0)
-+ __ub_update_privvm(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+ return ret;
-+}
-+
-+void ub_shmpages_uncharge(struct shmem_inode_info *shi, unsigned long size)
-+{
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+
-+ ub = shi->shmi_ub;
-+ if (ub == NULL)
-+ return;
-+
-+ for (; ub->parent != NULL; ub = ub->parent);
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ __uncharge_beancounter_locked(ub, UB_SHMPAGES, size);
-+ __ub_update_privvm(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+#ifdef CONFIG_USER_SWAP_ACCOUNTING
-+static inline void do_ub_swapentry_inc(struct user_beancounter *ub)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_swap_pages++;
-+ __ub_update_oomguarpages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+void ub_swapentry_inc(struct swap_info_struct *si, pgoff_t num,
-+ struct user_beancounter *ub)
-+{
-+ si->swap_ubs[num] = get_beancounter(ub);
-+ for (; ub != NULL; ub = ub->parent)
-+ do_ub_swapentry_inc(ub);
-+}
-+EXPORT_SYMBOL(ub_swapentry_inc);
-+
-+static inline void do_ub_swapentry_dec(struct user_beancounter *ub)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (ub->ub_swap_pages <= 0)
-+ uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
-+ else
-+ ub->ub_swap_pages--;
-+ __ub_update_oomguarpages(ub);
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+}
-+
-+void ub_swapentry_dec(struct swap_info_struct *si, pgoff_t num)
-+{
-+ struct user_beancounter *ub, *ubp;
-+
-+ ub = si->swap_ubs[num];
-+ si->swap_ubs[num] = NULL;
-+ for (ubp = ub; ubp != NULL; ubp = ubp->parent)
-+ do_ub_swapentry_dec(ubp);
-+ put_beancounter(ub);
-+}
-+EXPORT_SYMBOL(ub_swapentry_dec);
-+
-+int ub_swap_init(struct swap_info_struct *si, pgoff_t num)
-+{
-+ struct user_beancounter **ubs;
-+
-+ ubs = vmalloc(num * sizeof(struct user_beancounter *));
-+ if (ubs == NULL)
-+ return -ENOMEM;
-+
-+ memset(ubs, 0, num * sizeof(struct user_beancounter *));
-+ si->swap_ubs = ubs;
-+ return 0;
-+}
-+
-+void ub_swap_fini(struct swap_info_struct *si)
-+{
-+ if (si->swap_ubs) {
-+ vfree(si->swap_ubs);
-+ si->swap_ubs = NULL;
-+ }
-+}
-+#endif
-+
-+static int vmguar_enough_memory(struct vnotifier_block *self,
-+ unsigned long event, void *arg, int old_ret)
-+{
-+ struct user_beancounter *ub;
-+
-+ if (event != VIRTINFO_ENOUGHMEM)
-+ return old_ret;
-+
-+ for (ub = current->mm->mm_ub; ub->parent != NULL; ub = ub->parent);
-+ if (ub->ub_parms[UB_PRIVVMPAGES].held >
-+ ub->ub_parms[UB_VMGUARPAGES].barrier)
-+ return old_ret;
-+
-+ return NOTIFY_OK;
-+}
-+
-+static struct vnotifier_block vmguar_notifier_block = {
-+ .notifier_call = vmguar_enough_memory
-+};
-+
-+static int __init init_vmguar_notifier(void)
-+{
-+ virtinfo_notifier_register(VITYPE_GENERAL, &vmguar_notifier_block);
-+ return 0;
-+}
-+
-+static void __exit fini_vmguar_notifier(void)
-+{
-+ virtinfo_notifier_unregister(VITYPE_GENERAL, &vmguar_notifier_block);
-+}
-+
-+module_init(init_vmguar_notifier);
-+module_exit(fini_vmguar_notifier);
-diff -upr linux-2.6.16.orig/kernel/ub/ub_proc.c linux-2.6.16-026test009/kernel/ub/ub_proc.c
---- linux-2.6.16.orig/kernel/ub/ub_proc.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_proc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,389 @@
-+/*
-+ * linux/fs/proc/proc_ub.c
-+ *
-+ * Copyright (C) 1998-2000 Andrey V. Savochkin <saw@saw.sw.com.sg>
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ * TODO:
-+ *
-+ * Changes:
-+ */
-+
-+#include <linux/errno.h>
-+#include <linux/sched.h>
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/proc_fs.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_hash.h>
-+#include <ub/ub_debug.h>
-+#include <ub/ub_page.h>
-+
-+#include <asm/page.h>
-+#include <asm/uaccess.h>
-+
-+/*
-+ * we have 8 format strings depending on:
-+ * 1. BITS_PER_LONG
-+ * 2. CONFIG_UBC_KEEP_UNUSED
-+ * 3. resource number (see out_proc_beancounter)
-+ */
-+
-+#ifdef CONFIG_UBC_KEEP_UNUSED
-+#define REF_FORMAT "%5.5s %4i: %-12s "
-+#define UID_HEAD_STR "uid ref"
-+#else
-+#define REF_FORMAT "%10.10s: %-12s "
-+#define UID_HEAD_STR "uid"
-+#endif
-+#define REF2_FORMAT "%10s %-12s "
-+
-+#if BITS_PER_LONG == 32
-+#define RES_FORMAT "%10lu %10lu %10lu %10lu %10lu"
-+#define HEAD_FORMAT "%10s %10s %10s %10s %10s"
-+#define UB_PROC_LINE_TEXT (10+2+12+1+10+1+10+1+10+1+10+1+10)
-+#else
-+#define RES_FORMAT "%20lu %20lu %20lu %20lu %20lu"
-+#define HEAD_FORMAT "%20s %20s %20s %20s %20s"
-+#define UB_PROC_LINE_TEXT (10+2+12+1+20+1+20+1+20+1+20+1+20)
-+#endif
-+
-+#define UB_PROC_LINE_LEN (UB_PROC_LINE_TEXT + 1)
-+
-+static void out_proc_version(char *buf)
-+{
-+ int len;
-+
-+ len = sprintf(buf, "Version: 2.5");
-+ memset(buf + len, ' ', UB_PROC_LINE_TEXT - len);
-+ buf[UB_PROC_LINE_TEXT] = '\n';
-+}
-+
-+static void out_proc_head(char *buf)
-+{
-+ sprintf(buf, REF2_FORMAT HEAD_FORMAT,
-+ UID_HEAD_STR, "resource", "held", "maxheld",
-+ "barrier", "limit", "failcnt");
-+ buf[UB_PROC_LINE_TEXT] = '\n';
-+}
-+
-+static void out_proc_beancounter(char *buf, struct user_beancounter *ub, int r)
-+{
-+ if (r == 0) {
-+ char tmpbuf[64];
-+ print_ub_uid(ub, tmpbuf, sizeof(tmpbuf));
-+ sprintf(buf, REF_FORMAT RES_FORMAT,
-+ tmpbuf,
-+#ifdef CONFIG_UBC_KEEP_UNUSED
-+ atomic_read(&ub->ub_refcount),
-+#endif
-+ ub_rnames[r], ub->ub_parms[r].held,
-+ ub->ub_parms[r].maxheld, ub->ub_parms[r].barrier,
-+ ub->ub_parms[r].limit, ub->ub_parms[r].failcnt);
-+ } else
-+ sprintf(buf, REF2_FORMAT RES_FORMAT,
-+ "", ub_rnames[r],
-+ ub->ub_parms[r].held, ub->ub_parms[r].maxheld,
-+ ub->ub_parms[r].barrier, ub->ub_parms[r].limit,
-+ ub->ub_parms[r].failcnt);
-+
-+ buf[UB_PROC_LINE_TEXT] = '\n';
-+}
-+
-+static int ub_accessible(struct user_beancounter *ub,
-+ struct user_beancounter *exec_ub,
-+ struct file *file)
-+{
-+ struct user_beancounter *p, *q;
-+
-+ for (p = exec_ub; p->parent != NULL; p = p->parent);
-+ for (q = ub; q->parent != NULL; q = q->parent);
-+ if (p != get_ub0() && q != p)
-+ return 0;
-+ if (ub->parent == NULL)
-+ return 1;
-+ return file->private_data == NULL ? 0 : 1;
-+}
-+
-+static ssize_t ub_proc_read(struct file *file, char *usrbuf, size_t len,
-+ loff_t *poff)
-+{
-+ ssize_t retval;
-+ char *buf;
-+ unsigned long flags;
-+ int i, resource;
-+ struct ub_hash_slot *slot;
-+ struct user_beancounter *ub;
-+ struct user_beancounter *exec_ub = get_exec_ub();
-+ loff_t n, off;
-+ int rem, produced, job, tocopy;
-+ const int is_capable =
-+ (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH));
-+
-+ retval = -ENOBUFS;
-+ buf = (char *)__get_free_page(GFP_KERNEL);
-+ if (buf == NULL)
-+ goto out;
-+
-+ retval = 0;
-+ if (!is_capable)
-+ goto out_free;
-+
-+ off = *poff;
-+ if (off < 0) /* can't happen, just in case */
-+ goto inval;
-+
-+again:
-+ i = 0;
-+ slot = ub_hash;
-+ n = off; /* The amount of data tp skip */
-+ produced = 0;
-+ if (n < (UB_PROC_LINE_LEN * 2)) {
-+ if (n < UB_PROC_LINE_LEN) {
-+ out_proc_version(buf);
-+ produced += UB_PROC_LINE_LEN;
-+ n += UB_PROC_LINE_LEN;
-+ }
-+ out_proc_head(buf + produced);
-+ produced += UB_PROC_LINE_LEN;
-+ n += UB_PROC_LINE_LEN;
-+ }
-+ n -= (2 * UB_PROC_LINE_LEN);
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ while (1) {
-+ for (ub = slot->ubh_beans;
-+ ub != NULL && n >= (UB_RESOURCES * UB_PROC_LINE_LEN);
-+ ub = ub->ub_next)
-+ if (is_capable && ub_accessible(ub, exec_ub, file))
-+ n -= (UB_RESOURCES * UB_PROC_LINE_LEN);
-+ if (ub != NULL || ++i >= UB_HASH_SIZE)
-+ break;
-+ ++slot;
-+ }
-+ rem = n; /* the amount of the data in the buffer to skip */
-+ job = PAGE_SIZE - UB_PROC_LINE_LEN + 1; /* end of buffer data */
-+ if (len < job - rem)
-+ job = rem + len;
-+ while (ub != NULL && produced < job) {
-+ if (is_capable && ub_accessible(ub, exec_ub, file))
-+ for (resource = 0;
-+ produced < job && resource < UB_RESOURCES;
-+ resource++, produced += UB_PROC_LINE_LEN)
-+ {
-+ out_proc_beancounter(buf + produced,
-+ ub, resource);
-+ }
-+ if (produced >= job)
-+ break;
-+ /* Find the next beancounter to produce more data. */
-+ ub = ub->ub_next;
-+ while (ub == NULL && ++i < UB_HASH_SIZE) {
-+ ++slot;
-+ ub = slot->ubh_beans;
-+ }
-+ }
-+
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ ub_debug(UBD_ALLOC, KERN_DEBUG "UB_PROC: produced %d, job %d, rem %d\n",
-+ produced, job, rem);
-+
-+ /*
-+ * Temporary buffer `buf' contains `produced' bytes.
-+ * Extract no more than `len' bytes at offset `rem'.
-+ */
-+ if (produced <= rem)
-+ goto out_free;
-+ tocopy = produced - rem;
-+ if (len < tocopy)
-+ tocopy = len;
-+ if (!tocopy)
-+ goto out_free;
-+ if (copy_to_user(usrbuf, buf + rem, tocopy))
-+ goto fault;
-+ off += tocopy; /* can't overflow */
-+ *poff = off;
-+ len -= tocopy;
-+ retval += tocopy;
-+ if (!len)
-+ goto out_free;
-+ usrbuf += tocopy;
-+ goto again;
-+
-+fault:
-+ retval = -EFAULT;
-+out_free:
-+ free_page((unsigned long)buf);
-+out:
-+ return retval;
-+
-+inval:
-+ retval = -EINVAL;
-+ goto out_free;
-+}
-+
-+static int ub_proc_open(struct inode *inode, struct file *file)
-+{
-+ file->private_data = strcmp(file->f_dentry->d_name.name,
-+ "user_beancounters") ?
-+ (void *)-1 : NULL;
-+ return 0;
-+}
-+
-+static struct file_operations ub_file_operations = {
-+ .read = &ub_proc_read,
-+ .open = &ub_proc_open
-+};
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+#include <linux/seq_file.h>
-+#include <linux/kmem_cache.h>
-+
-+static void *ubd_start(struct seq_file *m, loff_t *pos)
-+{
-+ loff_t n = *pos;
-+ struct user_beancounter *ub;
-+ long slot;
-+
-+ spin_lock_irq(&ub_hash_lock);
-+ for (slot = 0; slot < UB_HASH_SIZE; slot++)
-+ for (ub = ub_hash[slot].ubh_beans; ub; ub = ub->ub_next) {
-+ if (n == 0) {
-+ m->private = (void *)slot;
-+ return (void *)ub;
-+ }
-+ n--;
-+ }
-+ return NULL;
-+}
-+
-+static void *ubd_next(struct seq_file *m, void *p, loff_t *pos)
-+{
-+ struct user_beancounter *ub;
-+ long slot;
-+
-+ ub = (struct user_beancounter *)p;
-+ slot = (long)m->private;
-+
-+ ++*pos;
-+ ub = ub->ub_next;
-+ while (1) {
-+ for (; ub; ub = ub->ub_next) {
-+ m->private = (void *)slot;
-+ return (void *)ub;
-+ }
-+ slot++;
-+ if (slot == UB_HASH_SIZE)
-+ break;
-+ ub = ub_hash[slot].ubh_beans;
-+ }
-+ return NULL;
-+}
-+
-+static void ubd_stop(struct seq_file *m, void *p)
-+{
-+ spin_unlock_irq(&ub_hash_lock);
-+}
-+
-+#define PROC_LINE_FMT "\t%-17s\t%5lu\t%5lu\n"
-+
-+static int ubd_show(struct seq_file *m, void *p)
-+{
-+ struct user_beancounter *ub;
-+ struct ub_cache_counter *cc;
-+ long pages, vmpages, pbc, swap, unmap;
-+ int i;
-+ char id[64];
-+
-+ ub = (struct user_beancounter *)p;
-+ print_ub_uid(ub, id, sizeof(id));
-+ seq_printf(m, "%s:%d\n", id, atomic_read(&ub->ub_refcount));
-+
-+ pages = vmpages = pbc = swap = unmap = 0;
-+ for (i = 0; i < NR_CPUS; i++) {
-+ pages += ub->ub_stat[i].pages_charged;
-+ vmpages += ub->ub_stat[i].vmalloc_charged;
-+ pbc += ub->ub_stat[i].pbcs;
-+ swap += ub->ub_stat[i].swapin;
-+ unmap += ub->ub_stat[i].unmap;
-+ }
-+ if (pages < 0)
-+ pages = 0;
-+ if (vmpages < 0)
-+ vmpages = 0;
-+ seq_printf(m, PROC_LINE_FMT, "pages", pages, PAGE_SIZE);
-+ seq_printf(m, PROC_LINE_FMT, "vmalloced", vmpages, PAGE_SIZE);
-+
-+ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_UNUSEDPRIVVM],
-+ ub->ub_unused_privvmpages, PAGE_SIZE);
-+ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_TMPFSPAGES],
-+ ub->ub_tmpfs_respages, PAGE_SIZE);
-+ seq_printf(m, PROC_LINE_FMT, ub_rnames[UB_SWAPPAGES],
-+ ub->ub_swap_pages, PAGE_SIZE);
-+ seq_printf(m, PROC_LINE_FMT, "pbcs", pbc,
-+ (unsigned long)sizeof(struct page_beancounter));
-+
-+ seq_printf(m, PROC_LINE_FMT, "swapin", swap, 0UL);
-+ seq_printf(m, PROC_LINE_FMT, "unmap", unmap, 0UL);
-+ /* interrupts are disabled by locking ub_hash_lock */
-+ spin_lock(&cc_lock);
-+ list_for_each_entry (cc, &ub->ub_cclist, ulist) {
-+ kmem_cache_t *cachep;
-+
-+ cachep = cc->cachep;
-+ seq_printf(m, PROC_LINE_FMT,
-+ cachep->name,
-+ cc->counter,
-+ (unsigned long)cachep->objuse);
-+ }
-+ spin_unlock(&cc_lock);
-+ return 0;
-+}
-+
-+static struct seq_operations kmemdebug_op = {
-+ .start = ubd_start,
-+ .next = ubd_next,
-+ .stop = ubd_stop,
-+ .show = ubd_show,
-+};
-+
-+static int kmem_debug_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &kmemdebug_op);
-+}
-+
-+static struct file_operations kmem_debug_ops = {
-+ .open = kmem_debug_open,
-+ .read = seq_read,
-+ .llseek = seq_lseek,
-+ .release = seq_release,
-+};
-+#endif
-+
-+void __init ub_init_proc(void)
-+{
-+ struct proc_dir_entry *entry;
-+
-+ entry = create_proc_entry("user_beancounters", S_IRUGO, NULL);
-+ if (entry)
-+ entry->proc_fops = &ub_file_operations;
-+ else
-+ panic("Can't create /proc/user_beancounters entry!\n");
-+
-+ entry = create_proc_entry("user_beancounters_sub", S_IRUGO, NULL);
-+ if (entry)
-+ entry->proc_fops = &ub_file_operations;
-+ else
-+ panic("Can't create /proc/user_beancounters2 entry!\n");
-+
-+#ifdef CONFIG_UBC_DEBUG_KMEM
-+ entry = create_proc_entry("user_beancounters_debug", S_IRUGO, NULL);
-+ if (entry)
-+ entry->proc_fops = &kmem_debug_ops;
-+ else
-+ panic("Can't create /proc/user_beancounters_debug entry!\n");
-+#endif
-+}
-diff -upr linux-2.6.16.orig/kernel/ub/ub_stat.c linux-2.6.16-026test009/kernel/ub/ub_stat.c
---- linux-2.6.16.orig/kernel/ub/ub_stat.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_stat.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,465 @@
-+/*
-+ * kernel/ub/ub_stat.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/timer.h>
-+#include <linux/sched.h>
-+#include <linux/init.h>
-+#include <linux/jiffies.h>
-+#include <linux/list.h>
-+#include <linux/errno.h>
-+#include <linux/suspend.h>
-+
-+#include <asm/uaccess.h>
-+#include <asm/param.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_hash.h>
-+#include <ub/ub_stat.h>
-+
-+static spinlock_t ubs_notify_lock = SPIN_LOCK_UNLOCKED;
-+static LIST_HEAD(ubs_notify_list);
-+static long ubs_min_interval;
-+static ubstattime_t ubs_start_time, ubs_end_time;
-+static struct timer_list ubs_timer;
-+
-+static int ubstat_get_list(void *buf, long size)
-+{
-+ int retval;
-+ unsigned long flags;
-+ int slotnr;
-+ struct ub_hash_slot *slot;
-+ struct user_beancounter *ub, *last_ub;
-+ long *page, *ptr, *end;
-+ int len;
-+
-+ page = (long *)__get_free_page(GFP_KERNEL);
-+ if (page == NULL)
-+ return -ENOMEM;
-+
-+ retval = 0;
-+ slotnr = 0;
-+ slot = ub_hash;
-+ last_ub = NULL;
-+ while (1) {
-+ ptr = page;
-+ end = page + PAGE_SIZE / sizeof(*ptr);
-+
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ if (last_ub == NULL)
-+ ub = slot->ubh_beans;
-+ else
-+ ub = last_ub->ub_next;
-+ while (1) {
-+ for (; ub != NULL; ub = ub->ub_next) {
-+ if (ub->parent != NULL)
-+ continue;
-+ *ptr++ = ub->ub_uid;
-+ if (ptr == end)
-+ break;
-+ }
-+ if (ptr == end)
-+ break;
-+ ++slot;
-+ if (++slotnr >= UB_HASH_SIZE)
-+ break;
-+ ub = slot->ubh_beans;
-+ }
-+ if (ptr == page)
-+ goto out_unlock;
-+ if (ub != NULL)
-+ get_beancounter(ub);
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+
-+ if (last_ub != NULL)
-+ put_beancounter(last_ub);
-+ last_ub = ub; /* last visited beancounter in the slot */
-+
-+ len = min_t(long, (ptr - page) * sizeof(*ptr), size);
-+ if (copy_to_user(buf, page, len)) {
-+ retval = -EFAULT;
-+ break;
-+ }
-+ retval += len;
-+ if (len < PAGE_SIZE)
-+ break;
-+ buf += len;
-+ size -= len;
-+ }
-+out:
-+ if (last_ub != NULL)
-+ put_beancounter(last_ub);
-+ free_page((unsigned long)page);
-+ return retval;
-+
-+out_unlock:
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+ goto out;
-+}
-+
-+static int ubstat_gettime(void *buf, long size)
-+{
-+ ubgettime_t data;
-+ int retval;
-+
-+ spin_lock(&ubs_notify_lock);
-+ data.start_time = ubs_start_time;
-+ data.end_time = ubs_end_time;
-+ data.cur_time = ubs_start_time + (jiffies - ubs_start_time * HZ) / HZ;
-+ spin_unlock(&ubs_notify_lock);
-+
-+ retval = min_t(long, sizeof(data), size);
-+ if (copy_to_user(buf, &data, retval))
-+ retval = -EFAULT;
-+ return retval;
-+}
-+
-+static int ubstat_do_read_one(struct user_beancounter *ub, int res, void *kbuf)
-+{
-+ struct {
-+ ubstattime_t start_time;
-+ ubstattime_t end_time;
-+ ubstatparm_t param[1];
-+ } *data;
-+
-+ data = kbuf;
-+ data->start_time = ubs_start_time;
-+ data->end_time = ubs_end_time;
-+
-+ data->param[0].maxheld = ub->ub_store[res].maxheld;
-+ data->param[0].failcnt = ub->ub_store[res].failcnt;
-+
-+ return sizeof(*data);
-+}
-+
-+static int ubstat_do_read_all(struct user_beancounter *ub, void *kbuf, int size)
-+{
-+ int wrote;
-+ struct {
-+ ubstattime_t start_time;
-+ ubstattime_t end_time;
-+ ubstatparm_t param[UB_RESOURCES];
-+ } *data;
-+ int resource;
-+
-+ data = kbuf;
-+ data->start_time = ubs_start_time;
-+ data->end_time = ubs_end_time;
-+ wrote = sizeof(data->start_time) + sizeof(data->end_time);
-+
-+ for (resource = 0; resource < UB_RESOURCES; resource++) {
-+ if (size < wrote + sizeof(data->param[resource]))
-+ break;
-+ data->param[resource].maxheld = ub->ub_store[resource].maxheld;
-+ data->param[resource].failcnt = ub->ub_store[resource].failcnt;
-+ wrote += sizeof(data->param[resource]);
-+ }
-+
-+ return wrote;
-+}
-+
-+static int ubstat_do_read_full(struct user_beancounter *ub, void *kbuf,
-+ int size)
-+{
-+ int wrote;
-+ struct {
-+ ubstattime_t start_time;
-+ ubstattime_t end_time;
-+ ubstatparmf_t param[UB_RESOURCES];
-+ } *data;
-+ int resource;
-+
-+ data = kbuf;
-+ data->start_time = ubs_start_time;
-+ data->end_time = ubs_end_time;
-+ wrote = sizeof(data->start_time) + sizeof(data->end_time);
-+
-+ for (resource = 0; resource < UB_RESOURCES; resource++) {
-+ if (size < wrote + sizeof(data->param[resource]))
-+ break;
-+ /* The beginning of ubstatparmf_t matches struct ubparm. */
-+ memcpy(&data->param[resource], &ub->ub_store[resource],
-+ sizeof(ub->ub_store[resource]));
-+ data->param[resource].__unused1 = 0;
-+ data->param[resource].__unused2 = 0;
-+ wrote += sizeof(data->param[resource]);
-+ }
-+ return wrote;
-+}
-+
-+static int ubstat_get_stat(struct user_beancounter *ub, long cmd,
-+ void *buf, long size)
-+{
-+ void *kbuf;
-+ int retval;
-+
-+ kbuf = (void *)__get_free_page(GFP_KERNEL);
-+ if (kbuf == NULL)
-+ return -ENOMEM;
-+
-+ spin_lock(&ubs_notify_lock);
-+ switch (UBSTAT_CMD(cmd)) {
-+ case UBSTAT_READ_ONE:
-+ retval = -EINVAL;
-+ if (UBSTAT_PARMID(cmd) >= UB_RESOURCES)
-+ break;
-+ retval = ubstat_do_read_one(ub,
-+ UBSTAT_PARMID(cmd), kbuf);
-+ break;
-+ case UBSTAT_READ_ALL:
-+ retval = ubstat_do_read_all(ub, kbuf, PAGE_SIZE);
-+ break;
-+ case UBSTAT_READ_FULL:
-+ retval = ubstat_do_read_full(ub, kbuf, PAGE_SIZE);
-+ break;
-+ default:
-+ retval = -EINVAL;
-+ }
-+ spin_unlock(&ubs_notify_lock);
-+
-+ if (retval > 0) {
-+ retval = min_t(long, retval, size);
-+ if (copy_to_user(buf, kbuf, retval))
-+ retval = -EFAULT;
-+ }
-+
-+ free_page((unsigned long)kbuf);
-+ return retval;
-+}
-+
-+static int ubstat_handle_notifrq(ubnotifrq_t *req)
-+{
-+ int retval;
-+ struct ub_stat_notify *new_notify;
-+ struct list_head *entry;
-+ struct task_struct *tsk_to_free;
-+
-+ new_notify = kmalloc(sizeof(new_notify), GFP_KERNEL);
-+ if (new_notify == NULL)
-+ return -ENOMEM;
-+
-+ tsk_to_free = NULL;
-+ INIT_LIST_HEAD(&new_notify->list);
-+
-+ spin_lock(&ubs_notify_lock);
-+ list_for_each(entry, &ubs_notify_list) {
-+ struct ub_stat_notify *notify;
-+
-+ notify = list_entry(entry, struct ub_stat_notify, list);
-+ if (notify->task == current) {
-+ kfree(new_notify);
-+ new_notify = notify;
-+ break;
-+ }
-+ }
-+
-+ retval = -EINVAL;
-+ if (req->maxinterval < 1)
-+ goto out_unlock;
-+ if (req->maxinterval > TIME_MAX_SEC)
-+ req->maxinterval = TIME_MAX_SEC;
-+ if (req->maxinterval < ubs_min_interval) {
-+ unsigned long dif;
-+
-+ ubs_min_interval = req->maxinterval;
-+ dif = (ubs_timer.expires - jiffies + HZ - 1) / HZ;
-+ if (dif > req->maxinterval)
-+ mod_timer(&ubs_timer,
-+ ubs_timer.expires -
-+ (dif - req->maxinterval) * HZ);
-+ }
-+
-+ if (entry != &ubs_notify_list) {
-+ list_del(&new_notify->list);
-+ tsk_to_free = new_notify->task;
-+ }
-+ if (req->signum) {
-+ new_notify->task = current;
-+ get_task_struct(new_notify->task);
-+ new_notify->signum = req->signum;
-+ list_add(&new_notify->list, &ubs_notify_list);
-+ } else
-+ kfree(new_notify);
-+ retval = 0;
-+out_unlock:
-+ spin_unlock(&ubs_notify_lock);
-+ if (tsk_to_free != NULL)
-+ put_task_struct(tsk_to_free);
-+ return retval;
-+}
-+
-+/*
-+ * former sys_ubstat
-+ */
-+long do_ubstat(int func, unsigned long arg1, unsigned long arg2, void *buf,
-+ long size)
-+{
-+ int retval;
-+ struct user_beancounter *ub;
-+
-+ if (func == UBSTAT_UBPARMNUM)
-+ return UB_RESOURCES;
-+ if (func == UBSTAT_UBLIST)
-+ return ubstat_get_list(buf, size);
-+ if (!(capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)))
-+ return -EPERM;
-+
-+ if (func == UBSTAT_GETTIME) {
-+ retval = ubstat_gettime(buf, size);
-+ goto notify;
-+ }
-+
-+ ub = get_exec_ub();
-+ if (ub != NULL && ub->ub_uid == arg1)
-+ get_beancounter(ub);
-+ else /* FIXME must be if (ve_is_super) */
-+ ub = get_beancounter_byuid(arg1, 0);
-+
-+ if (ub == NULL)
-+ return -ESRCH;
-+
-+ retval = ubstat_get_stat(ub, func, buf, size);
-+ put_beancounter(ub);
-+notify:
-+ /* Handle request for notification */
-+ if (retval >= 0) {
-+ ubnotifrq_t notifrq;
-+ int err;
-+
-+ err = -EFAULT;
-+ if (!copy_from_user(&notifrq, (void *)arg2, sizeof(notifrq)))
-+ err = ubstat_handle_notifrq(&notifrq);
-+ if (err)
-+ retval = err;
-+ }
-+
-+ return retval;
-+}
-+
-+static void ubstat_save_onestat(struct user_beancounter *ub)
-+{
-+ int resource;
-+
-+ /* called with local irq disabled */
-+ spin_lock(&ub->ub_lock);
-+ for (resource = 0; resource < UB_RESOURCES; resource++) {
-+ memcpy(&ub->ub_store[resource], &ub->ub_parms[resource],
-+ sizeof(struct ubparm));
-+ ub->ub_parms[resource].minheld =
-+ ub->ub_parms[resource].maxheld =
-+ ub->ub_parms[resource].held;
-+ }
-+ spin_unlock(&ub->ub_lock);
-+}
-+
-+static void ubstat_save_statistics(void)
-+{
-+ unsigned long flags;
-+ int i;
-+ struct user_beancounter *ub;
-+
-+ spin_lock_irqsave(&ub_hash_lock, flags);
-+ for_each_beancounter(i, ub)
-+ ubstat_save_onestat(ub);
-+ spin_unlock_irqrestore(&ub_hash_lock, flags);
-+}
-+
-+static void ubstatd_timeout(unsigned long __data)
-+{
-+ struct task_struct *p;
-+
-+ p = (struct task_struct *) __data;
-+ wake_up_process(p);
-+}
-+
-+/*
-+ * Safe wrapper for send_sig. It prevents a race with release_task
-+ * for sighand.
-+ * Should be called under tasklist_lock.
-+ */
-+static void task_send_sig(struct ub_stat_notify *notify)
-+{
-+ if (likely(notify->task->sighand != NULL))
-+ send_sig(notify->signum, notify->task, 1);
-+}
-+
-+static inline void do_notifies(void)
-+{
-+ LIST_HEAD(notif_free_list);
-+ struct ub_stat_notify *notify;
-+ struct ub_stat_notify *tmp;
-+
-+ spin_lock(&ubs_notify_lock);
-+ ubs_start_time = ubs_end_time;
-+ /*
-+ * the expression below relies on time being unsigned long and
-+ * arithmetic promotion rules
-+ */
-+ ubs_end_time += (ubs_timer.expires - ubs_start_time * HZ) / HZ;
-+ mod_timer(&ubs_timer, ubs_timer.expires + ubs_min_interval * HZ);
-+ ubs_min_interval = TIME_MAX_SEC;
-+ /* save statistics accumulated for the interval */
-+ ubstat_save_statistics();
-+ /* send signals */
-+ read_lock(&tasklist_lock);
-+ while (!list_empty(&ubs_notify_list)) {
-+ notify = list_entry(ubs_notify_list.next,
-+ struct ub_stat_notify, list);
-+ task_send_sig(notify);
-+ list_del(&notify->list);
-+ list_add(&notify->list, &notif_free_list);
-+ }
-+ read_unlock(&tasklist_lock);
-+ spin_unlock(&ubs_notify_lock);
-+
-+ list_for_each_entry_safe(notify, tmp, &notif_free_list, list) {
-+ put_task_struct(notify->task);
-+ kfree(notify);
-+ }
-+}
-+
-+/*
-+ * Kernel thread
-+ */
-+static int ubstatd(void *unused)
-+{
-+ /* daemonize call will take care of signals */
-+ daemonize("ubstatd");
-+
-+ ubs_timer.data = (unsigned long)current;
-+ ubs_timer.function = ubstatd_timeout;
-+ add_timer(&ubs_timer);
-+
-+ while (1) {
-+ set_task_state(current, TASK_INTERRUPTIBLE);
-+ if (time_after(ubs_timer.expires, jiffies)) {
-+ schedule();
-+ try_to_freeze();
-+ continue;
-+ }
-+
-+ __set_task_state(current, TASK_RUNNING);
-+ do_notifies();
-+ }
-+ return 0;
-+}
-+
-+static int __init ubstatd_init(void)
-+{
-+ init_timer(&ubs_timer);
-+ ubs_timer.expires = TIME_MAX_JIF;
-+ ubs_min_interval = TIME_MAX_SEC;
-+ ubs_start_time = ubs_end_time = 0;
-+
-+ kernel_thread(ubstatd, NULL, 0);
-+ return 0;
-+}
-+
-+module_init(ubstatd_init);
-diff -upr linux-2.6.16.orig/kernel/ub/ub_sys.c linux-2.6.16-026test009/kernel/ub/ub_sys.c
---- linux-2.6.16.orig/kernel/ub/ub_sys.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ub/ub_sys.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,154 @@
-+/*
-+ * kernel/ub/ub_sys.c
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/config.h>
-+#include <asm/uaccess.h>
-+
-+#include <ub/beancounter.h>
-+
-+#ifndef CONFIG_USER_RESOURCE
-+asmlinkage long sys_getluid(void)
-+{
-+ return -ENOSYS;
-+}
-+
-+asmlinkage long sys_setluid(uid_t uid)
-+{
-+ return -ENOSYS;
-+}
-+
-+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
-+ unsigned long *limits)
-+{
-+ return -ENOSYS;
-+}
-+
-+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
-+ void *buf, long size)
-+{
-+ return -ENOSYS;
-+}
-+#else /* CONFIG_USER_RESOURCE */
-+
-+/*
-+ * The (rather boring) getluid syscall
-+ */
-+asmlinkage long sys_getluid(void)
-+{
-+ struct user_beancounter *ub;
-+
-+ ub = get_exec_ub();
-+ if (ub == NULL)
-+ return -EINVAL;
-+
-+ return ub->ub_uid;
-+}
-+
-+/*
-+ * The setluid syscall
-+ */
-+asmlinkage long sys_setluid(uid_t uid)
-+{
-+ struct user_beancounter *ub;
-+ struct task_beancounter *task_bc;
-+ int error;
-+
-+ task_bc = &current->task_bc;
-+
-+ /* You may not disown a setluid */
-+ error = -EINVAL;
-+ if (uid == (uid_t)-1)
-+ goto out;
-+
-+ /* You may only set an ub as root */
-+ error = -EPERM;
-+ if (!capable(CAP_SETUID))
-+ goto out;
-+
-+ /* Ok - set up a beancounter entry for this user */
-+ error = -ENOBUFS;
-+ ub = get_beancounter_byuid(uid, 1);
-+ if (ub == NULL)
-+ goto out;
-+
-+ ub_debug(UBD_ALLOC | UBD_LIMIT, "setluid, bean %p (count %d) "
-+ "for %.20s pid %d\n",
-+ ub, atomic_read(&ub->ub_refcount),
-+ current->comm, current->pid);
-+ /* install bc */
-+ put_beancounter(task_bc->exec_ub);
-+ task_bc->exec_ub = ub;
-+ put_beancounter(task_bc->fork_sub);
-+ task_bc->fork_sub = get_beancounter(ub);
-+ error = 0;
-+out:
-+ return error;
-+}
-+
-+/*
-+ * The setbeanlimit syscall
-+ */
-+asmlinkage long sys_setublimit(uid_t uid, unsigned long resource,
-+ unsigned long *limits)
-+{
-+ int error;
-+ unsigned long flags;
-+ struct user_beancounter *ub;
-+ unsigned long new_limits[2];
-+
-+ error = -EPERM;
-+ if(!capable(CAP_SYS_RESOURCE))
-+ goto out;
-+
-+ if (!ve_is_super(get_exec_env()))
-+ goto out;
-+
-+ error = -EINVAL;
-+ if (resource >= UB_RESOURCES)
-+ goto out;
-+
-+ error = -EFAULT;
-+ if (copy_from_user(&new_limits, limits, sizeof(new_limits)))
-+ goto out;
-+
-+ error = -EINVAL;
-+ if (new_limits[0] > UB_MAXVALUE || new_limits[1] > UB_MAXVALUE)
-+ goto out;
-+
-+ error = -ENOENT;
-+ ub = get_beancounter_byuid(uid, 0);
-+ if (ub == NULL) {
-+ ub_debug(UBD_LIMIT, "No login bc for uid %d\n", uid);
-+ goto out;
-+ }
-+
-+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_parms[resource].barrier = new_limits[0];
-+ ub->ub_parms[resource].limit = new_limits[1];
-+ spin_unlock_irqrestore(&ub->ub_lock, flags);
-+
-+ put_beancounter(ub);
-+
-+ error = 0;
-+out:
-+ return error;
-+}
-+
-+extern long do_ubstat(int func, unsigned long arg1, unsigned long arg2,
-+ void *buf, long size);
-+asmlinkage long sys_ubstat(int func, unsigned long arg1, unsigned long arg2,
-+ void *buf, long size)
-+{
-+ if (!ve_is_super(get_exec_env()))
-+ return -EPERM;
-+
-+ return do_ubstat(func, arg1, arg2, buf, size);
-+}
-+#endif
-diff -upr linux-2.6.16.orig/kernel/user.c linux-2.6.16-026test009/kernel/user.c
---- linux-2.6.16.orig/kernel/user.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/user.c 2006-04-19 15:02:12.000000000 +0400
-@@ -14,6 +14,7 @@
- #include <linux/bitops.h>
- #include <linux/key.h>
- #include <linux/interrupt.h>
-+#include <linux/module.h>
-
- /*
- * UID task count cache, to get fast user lookup in "alloc_uid"
-@@ -24,7 +25,20 @@
- #define UIDHASH_SZ (1 << UIDHASH_BITS)
- #define UIDHASH_MASK (UIDHASH_SZ - 1)
- #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
--#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
-+#define __uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
-+
-+#ifdef CONFIG_VE
-+#define UIDHASH_MASK_VE (UIDHASH_SZ_VE - 1)
-+#define __uidhashfn_ve(uid) (((uid >> UIDHASH_BITS_VE) ^ uid) & \
-+ UIDHASH_MASK_VE)
-+#define __uidhashentry_ve(uid, envid) ((envid)->uidhash_table + \
-+ __uidhashfn_ve(uid))
-+#define uidhashentry_ve(uid) (ve_is_super(get_exec_env()) ? \
-+ __uidhashentry(uid) : \
-+ __uidhashentry_ve(uid, get_exec_env()))
-+#else
-+#define uidhashentry_ve(uid) __uidhashentry(uid)
-+#endif
-
- static kmem_cache_t *uid_cachep;
- static struct list_head uidhash_table[UIDHASH_SZ];
-@@ -96,7 +110,7 @@ struct user_struct *find_user(uid_t uid)
- unsigned long flags;
-
- spin_lock_irqsave(&uidhash_lock, flags);
-- ret = uid_hash_find(uid, uidhashentry(uid));
-+ ret = uid_hash_find(uid, uidhashentry_ve(uid));
- spin_unlock_irqrestore(&uidhash_lock, flags);
- return ret;
- }
-@@ -115,10 +129,11 @@ void free_uid(struct user_struct *up)
- }
- local_irq_restore(flags);
- }
-+EXPORT_SYMBOL_GPL(free_uid);
-
- struct user_struct * alloc_uid(uid_t uid)
- {
-- struct list_head *hashent = uidhashentry(uid);
-+ struct list_head *hashent = uidhashentry_ve(uid);
- struct user_struct *up;
-
- spin_lock_irq(&uidhash_lock);
-@@ -168,6 +183,7 @@ struct user_struct * alloc_uid(uid_t uid
- }
- return up;
- }
-+EXPORT_SYMBOL_GPL(alloc_uid);
-
- void switch_uid(struct user_struct *new_user)
- {
-@@ -186,21 +202,21 @@ void switch_uid(struct user_struct *new_
- free_uid(old_user);
- suid_keys(current);
- }
--
-+EXPORT_SYMBOL_GPL(switch_uid);
-
- static int __init uid_cache_init(void)
- {
- int n;
-
- uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_UBC, NULL, NULL);
-
- for(n = 0; n < UIDHASH_SZ; ++n)
- INIT_LIST_HEAD(uidhash_table + n);
-
- /* Insert the root user immediately (init already runs as root) */
- spin_lock_irq(&uidhash_lock);
-- uid_hash_insert(&root_user, uidhashentry(0));
-+ uid_hash_insert(&root_user, __uidhashentry(0));
- spin_unlock_irq(&uidhash_lock);
-
- return 0;
-diff -upr linux-2.6.16.orig/kernel/ve.c linux-2.6.16-026test009/kernel/ve.c
---- linux-2.6.16.orig/kernel/ve.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/ve.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,188 @@
-+/*
-+ * linux/kernel/ve.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+/*
-+ * 've.c' helper file performing VE sub-system initialization
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/delay.h>
-+#include <linux/capability.h>
-+#include <linux/ve.h>
-+#include <linux/smp_lock.h>
-+#include <linux/init.h>
-+
-+#include <linux/errno.h>
-+#include <linux/unistd.h>
-+#include <linux/slab.h>
-+#include <linux/sys.h>
-+#include <linux/kdev_t.h>
-+#include <linux/termios.h>
-+#include <linux/tty_driver.h>
-+#include <linux/netdevice.h>
-+#include <linux/utsname.h>
-+#include <linux/proc_fs.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/module.h>
-+#include <linux/rcupdate.h>
-+#include <linux/ve_proto.h>
-+#include <linux/ve_owner.h>
-+#include <linux/devpts_fs.h>
-+
-+#include <linux/nfcalls.h>
-+
-+unsigned long vz_rstamp = 0x37e0f59d;
-+
-+#ifdef CONFIG_MODULES
-+struct module no_module = { .state = MODULE_STATE_GOING };
-+EXPORT_SYMBOL(no_module);
-+#endif
-+
-+#ifdef CONFIG_VE
-+
-+DCL_VE_OWNER(SKB, struct sk_buff, owner_env)
-+DCL_VE_OWNER(SK, struct sock, sk_owner_env)
-+DCL_VE_OWNER(TW, struct tcp_tw_bucket, tw_owner_env)
-+DCL_VE_OWNER(FILP, struct file, owner_env)
-+DCL_VE_OWNER(FSTYPE, struct file_system_type, owner_env)
-+
-+#if defined(CONFIG_VE_IPTABLES)
-+INIT_KSYM_MODULE(x_tables);
-+INIT_KSYM_MODULE(xt_tcpudp);
-+INIT_KSYM_MODULE(ip_tables);
-+INIT_KSYM_MODULE(iptable_filter);
-+INIT_KSYM_MODULE(iptable_mangle);
-+INIT_KSYM_MODULE(xt_limit);
-+INIT_KSYM_MODULE(ipt_multiport);
-+INIT_KSYM_MODULE(ipt_tos);
-+INIT_KSYM_MODULE(ipt_TOS);
-+INIT_KSYM_MODULE(ipt_REJECT);
-+INIT_KSYM_MODULE(ipt_TCPMSS);
-+INIT_KSYM_MODULE(xt_tcpmss);
-+INIT_KSYM_MODULE(ipt_ttl);
-+INIT_KSYM_MODULE(ipt_LOG);
-+INIT_KSYM_MODULE(xt_length);
-+INIT_KSYM_MODULE(ip_conntrack);
-+INIT_KSYM_MODULE(ip_conntrack_ftp);
-+INIT_KSYM_MODULE(ip_conntrack_irc);
-+INIT_KSYM_MODULE(xt_conntrack);
-+INIT_KSYM_MODULE(xt_state);
-+INIT_KSYM_MODULE(xt_helper);
-+INIT_KSYM_MODULE(ip_nat);
-+INIT_KSYM_MODULE(iptable_nat);
-+INIT_KSYM_MODULE(ip_nat_ftp);
-+INIT_KSYM_MODULE(ip_nat_irc);
-+INIT_KSYM_MODULE(ipt_REDIRECT);
-+
-+INIT_KSYM_CALL(int, init_netfilter, (void));
-+INIT_KSYM_CALL(int, init_xtables, (void));
-+INIT_KSYM_CALL(int, init_xt_tcpudp, (void));
-+INIT_KSYM_CALL(int, init_iptables, (void));
-+INIT_KSYM_CALL(int, init_iptable_filter, (void));
-+INIT_KSYM_CALL(int, init_iptable_mangle, (void));
-+INIT_KSYM_CALL(int, init_xt_limit, (void));
-+INIT_KSYM_CALL(int, init_iptable_multiport, (void));
-+INIT_KSYM_CALL(int, init_iptable_tos, (void));
-+INIT_KSYM_CALL(int, init_iptable_TOS, (void));
-+INIT_KSYM_CALL(int, init_iptable_REJECT, (void));
-+INIT_KSYM_CALL(int, init_iptable_TCPMSS, (void));
-+INIT_KSYM_CALL(int, init_xt_tcpmss, (void));
-+INIT_KSYM_CALL(int, init_iptable_ttl, (void));
-+INIT_KSYM_CALL(int, init_iptable_LOG, (void));
-+INIT_KSYM_CALL(int, init_xt_length, (void));
-+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
-+INIT_KSYM_CALL(int, init_iptable_ftp, (void));
-+INIT_KSYM_CALL(int, init_iptable_irc, (void));
-+INIT_KSYM_CALL(int, init_xt_conntrack_match, (void));
-+INIT_KSYM_CALL(int, init_xt_state, (void));
-+INIT_KSYM_CALL(int, init_xt_helper, (void));
-+INIT_KSYM_CALL(int, ip_nat_init, (void));
-+INIT_KSYM_CALL(int, init_iptable_nat, (void));
-+INIT_KSYM_CALL(int, init_iptable_nat_ftp, (void));
-+INIT_KSYM_CALL(int, init_iptable_nat_irc, (void));
-+INIT_KSYM_CALL(int, init_iptable_REDIRECT, (void));
-+INIT_KSYM_CALL(void, fini_iptable_nat_irc, (void));
-+INIT_KSYM_CALL(void, fini_iptable_nat_ftp, (void));
-+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
-+INIT_KSYM_CALL(void, ip_nat_cleanup, (void));
-+INIT_KSYM_CALL(void, fini_xt_helper, (void));
-+INIT_KSYM_CALL(void, fini_xt_state, (void));
-+INIT_KSYM_CALL(void, fini_xt_conntrack_match, (void));
-+INIT_KSYM_CALL(void, fini_iptable_irc, (void));
-+INIT_KSYM_CALL(void, fini_iptable_ftp, (void));
-+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
-+INIT_KSYM_CALL(void, fini_xt_length, (void));
-+INIT_KSYM_CALL(void, fini_iptable_LOG, (void));
-+INIT_KSYM_CALL(void, fini_iptable_ttl, (void));
-+INIT_KSYM_CALL(void, fini_xt_tcpmss, (void));
-+INIT_KSYM_CALL(void, fini_iptable_TCPMSS, (void));
-+INIT_KSYM_CALL(void, fini_iptable_REJECT, (void));
-+INIT_KSYM_CALL(void, fini_iptable_TOS, (void));
-+INIT_KSYM_CALL(void, fini_iptable_tos, (void));
-+INIT_KSYM_CALL(void, fini_iptable_multiport, (void));
-+INIT_KSYM_CALL(void, fini_xt_limit, (void));
-+INIT_KSYM_CALL(void, fini_iptable_filter, (void));
-+INIT_KSYM_CALL(void, fini_iptable_mangle, (void));
-+INIT_KSYM_CALL(void, fini_iptables, (void));
-+INIT_KSYM_CALL(void, fini_xt_tcpudp, (void));
-+INIT_KSYM_CALL(void, fini_xtables, (void));
-+INIT_KSYM_CALL(void, fini_netfilter, (void));
-+INIT_KSYM_CALL(void, fini_iptable_REDIRECT, (void));
-+
-+INIT_KSYM_CALL(void, ipt_flush_table, (struct xt_table *table));
-+#endif
-+
-+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
-+INIT_KSYM_MODULE(vzmon);
-+INIT_KSYM_CALL(int, real_get_device_perms_ve,
-+ (int dev_type, dev_t dev, int access_mode));
-+INIT_KSYM_CALL(void, real_do_env_cleanup, (struct ve_struct *env));
-+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
-+INIT_KSYM_CALL(void, real_update_load_avg_ve, (void));
-+
-+int get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
-+{
-+ return KSYMSAFECALL(int, vzmon, real_get_device_perms_ve,
-+ (dev_type, dev, access_mode));
-+}
-+EXPORT_SYMBOL(get_device_perms_ve);
-+
-+void do_env_cleanup(struct ve_struct *env)
-+{
-+ KSYMSAFECALL_VOID(vzmon, real_do_env_cleanup, (env));
-+}
-+
-+void do_env_free(struct ve_struct *env)
-+{
-+ KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
-+}
-+EXPORT_SYMBOL(do_env_free);
-+
-+void do_update_load_avg_ve(void)
-+{
-+ KSYMSAFECALL_VOID(vzmon, real_update_load_avg_ve, ());
-+}
-+#endif
-+
-+struct ve_struct ve0 = {
-+ .utsname = &system_utsname,
-+ .vetask_lh = LIST_HEAD_INIT(ve0.vetask_lh),
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ ._net_dev_tail = &ve0._net_dev_base,
-+ .ifindex = -1,
-+#endif
-+#ifdef CONFIG_UNIX98_PTYS
-+ .devpts_config = &devpts_config,
-+#endif
-+};
-+
-+EXPORT_SYMBOL(ve0);
-+
-+#endif /* CONFIG_VE */
-diff -upr linux-2.6.16.orig/kernel/vecalls.c linux-2.6.16-026test009/kernel/vecalls.c
---- linux-2.6.16.orig/kernel/vecalls.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/vecalls.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,3275 @@
-+/*
-+ * linux/kernel/vecalls.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ */
-+
-+/*
-+ * 'vecalls.c' is file with basic VE support. It provides basic primities
-+ * along with initialization script
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/delay.h>
-+#include <linux/capability.h>
-+#include <linux/ve.h>
-+#include <linux/smp_lock.h>
-+#include <linux/init.h>
-+#include <linux/list.h>
-+#include <linux/ve_owner.h>
-+#include <linux/errno.h>
-+#include <linux/unistd.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/sys.h>
-+#include <linux/fs.h>
-+#include <linux/namespace.h>
-+#include <linux/termios.h>
-+#include <linux/tty_driver.h>
-+#include <linux/netdevice.h>
-+#include <linux/wait.h>
-+#include <linux/inetdevice.h>
-+#include <linux/utsname.h>
-+#include <linux/sysctl.h>
-+#include <linux/proc_fs.h>
-+#include <linux/seq_file.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/module.h>
-+#include <linux/suspend.h>
-+#include <linux/rcupdate.h>
-+#include <linux/in.h>
-+#include <linux/major.h>
-+#include <linux/kdev_t.h>
-+#include <linux/idr.h>
-+#include <linux/inetdevice.h>
-+#include <net/pkt_sched.h>
-+#include <linux/divert.h>
-+#include <ub/beancounter.h>
-+
-+#include <net/route.h>
-+#include <net/ip_fib.h>
-+
-+#include <linux/ve_proto.h>
-+#include <linux/venet.h>
-+#include <linux/vzctl.h>
-+#include <linux/vzcalluser.h>
-+#ifdef CONFIG_FAIRSCHED
-+#include <linux/fairsched.h>
-+#endif
-+
-+#include <linux/nfcalls.h>
-+
-+struct ve_struct *ve_list_head = NULL;
-+int nr_ve = 1; /* One VE always exists. Compatibility with vestat */
-+rwlock_t ve_list_guard = RW_LOCK_UNLOCKED;
-+static rwlock_t devperms_hash_guard = RW_LOCK_UNLOCKED;
-+
-+extern int glob_virt_pids;
-+
-+static int do_env_enter(struct ve_struct *ve, unsigned int flags);
-+static void do_clean_devperms(envid_t veid);
-+static int alloc_ve_tty_drivers(struct ve_struct* ve);
-+static void free_ve_tty_drivers(struct ve_struct* ve);
-+static int register_ve_tty_drivers(struct ve_struct* ve);
-+static void unregister_ve_tty_drivers(struct ve_struct* ve);
-+static int init_ve_tty_drivers(struct ve_struct *);
-+static void fini_ve_tty_drivers(struct ve_struct *);
-+static void clear_termios(struct tty_driver* driver );
-+static void ve_mapped_devs_cleanup(struct ve_struct *ve);
-+
-+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf);
-+
-+static void vecalls_exit(void);
-+
-+struct ve_struct *__find_ve_by_id(envid_t veid)
-+{
-+ struct ve_struct *ve;
-+ for (ve = ve_list_head;
-+ ve != NULL && ve->veid != veid;
-+ ve = ve->next);
-+ return ve;
-+}
-+
-+struct ve_struct *get_ve_by_id(envid_t veid)
-+{
-+ struct ve_struct *ve;
-+ read_lock(&ve_list_guard);
-+ ve = __find_ve_by_id(veid);
-+ get_ve(ve);
-+ read_unlock(&ve_list_guard);
-+ return ve;
-+}
-+
-+/*
-+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
-+ */
-+void real_do_env_free(struct ve_struct *ve);
-+static inline void real_put_ve(struct ve_struct *ve)
-+{
-+ if (ve && atomic_dec_and_test(&ve->counter)) {
-+ if (atomic_read(&ve->pcounter) > 0)
-+ BUG();
-+ if (ve->is_running)
-+ BUG();
-+ real_do_env_free(ve);
-+ }
-+}
-+
-+extern struct file_system_type devpts_fs_type;
-+extern struct file_system_type sysfs_fs_type;
-+extern struct file_system_type tmpfs_fs_type;
-+extern struct file_system_type proc_fs_type;
-+
-+extern spinlock_t task_capability_lock;
-+extern void ve_ipc_free(struct ve_struct * ve);
-+extern void ip_fragment_cleanup(struct ve_struct *ve);
-+
-+static int ve_get_cpu_stat(envid_t veid, struct vz_cpu_stat *buf)
-+{
-+ struct ve_struct *ve;
-+ struct vz_cpu_stat *vstat;
-+ int retval;
-+ int i, cpu;
-+ unsigned long tmp;
-+
-+ if (!ve_is_super(get_exec_env()) && (veid != get_exec_env()->veid))
-+ return -EPERM;
-+ if (veid == 0)
-+ return -ESRCH;
-+
-+ vstat = kmalloc(sizeof(*vstat), GFP_KERNEL);
-+ if (!vstat)
-+ return -ENOMEM;
-+ memset(vstat, 0, sizeof(*vstat));
-+
-+ retval = -ESRCH;
-+ read_lock(&ve_list_guard);
-+ ve = __find_ve_by_id(veid);
-+ if (ve == NULL)
-+ goto out_unlock;
-+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+ struct ve_cpu_stats *st;
-+
-+ st = VE_CPU_STATS(ve, cpu);
-+ vstat->user_jif += st->user;
-+ vstat->nice_jif += st->nice;
-+ vstat->system_jif += st->system;
-+ vstat->idle_clk += ve_sched_get_idle_time(ve, cpu);
-+ }
-+ vstat->uptime_clk = get_cycles() - ve->start_cycles;
-+ vstat->uptime_jif = jiffies - ve->start_jiffies;
-+ for (i = 0; i < 3; i++) {
-+ tmp = ve->avenrun[i] + (FIXED_1/200);
-+ vstat->avenrun[i].val_int = LOAD_INT(tmp);
-+ vstat->avenrun[i].val_frac = LOAD_FRAC(tmp);
-+ }
-+ read_unlock(&ve_list_guard);
-+
-+ retval = 0;
-+ if (copy_to_user(buf, vstat, sizeof(*vstat)))
-+ retval = -EFAULT;
-+out_free:
-+ kfree(vstat);
-+ return retval;
-+
-+out_unlock:
-+ read_unlock(&ve_list_guard);
-+ goto out_free;
-+}
-+
-+/**********************************************************************
-+ * Devices permissions routines,
-+ * character and block devices separately
-+ **********************************************************************/
-+
-+/* Rules applied in the following order:
-+ MAJOR!=0, MINOR!=0
-+ MAJOR!=0, MINOR==0
-+ MAJOR==0, MINOR==0
-+*/
-+struct devperms_struct
-+{
-+ dev_t dev; /* device id */
-+ unsigned char mask;
-+ unsigned type;
-+ envid_t veid;
-+
-+ struct devperms_struct *devhash_next;
-+ struct devperms_struct **devhash_pprev;
-+};
-+
-+static struct devperms_struct original_perms[] =
-+{{
-+ MKDEV(0,0), /*device*/
-+ S_IROTH | S_IWOTH,
-+ S_IFCHR, /*type*/
-+ 0, /*veid*/
-+ NULL, NULL
-+},
-+{
-+ MKDEV(0,0), /*device*/
-+ S_IXGRP | S_IROTH | S_IWOTH,
-+ S_IFBLK, /*type*/
-+ 0, /*veid*/
-+ NULL, NULL
-+}};
-+
-+static struct devperms_struct default_major_perms[] = {
-+ {MKDEV(UNIX98_PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
-+ {MKDEV(UNIX98_PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
-+ {MKDEV(PTY_MASTER_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
-+ {MKDEV(PTY_SLAVE_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},
-+};
-+static struct devperms_struct default_minor_perms[] = {
-+ {MKDEV(MEM_MAJOR, 3), S_IROTH | S_IWOTH, S_IFCHR}, /* null */
-+ {MKDEV(MEM_MAJOR, 5), S_IROTH | S_IWOTH, S_IFCHR}, /* zero */
-+ {MKDEV(MEM_MAJOR, 7), S_IROTH | S_IWOTH, S_IFCHR}, /* full */
-+ {MKDEV(TTYAUX_MAJOR, 0), S_IROTH | S_IWOTH, S_IFCHR},/* tty */
-+ {MKDEV(TTYAUX_MAJOR, 2), S_IROTH | S_IWOTH, S_IFCHR},/* ptmx */
-+ {MKDEV(MEM_MAJOR, 8), S_IROTH, S_IFCHR}, /* random */
-+ {MKDEV(MEM_MAJOR, 9), S_IROTH, S_IFCHR}, /* urandom */
-+};
-+
-+static struct devperms_struct default_deny_perms = {
-+ MKDEV(0, 0), 0, S_IFCHR
-+};
-+
-+static inline struct devperms_struct *find_default_devperms(int type,
-+ dev_t dev)
-+{
-+ int i;
-+
-+ /* XXX all defaults perms are S_IFCHR */
-+ if (type != S_IFCHR)
-+ return &default_deny_perms;
-+
-+ for (i = 0;
-+ i < sizeof(default_minor_perms)/sizeof(struct devperms_struct);
-+ i++)
-+ if (MAJOR(dev) == MAJOR(default_minor_perms[i].dev) &&
-+ MINOR(dev) == MINOR(default_minor_perms[i].dev))
-+ return &default_minor_perms[i];
-+ for (i = 0;
-+ i < sizeof(default_major_perms)/sizeof(struct devperms_struct);
-+ i++)
-+ if (MAJOR(dev) == MAJOR(default_major_perms[i].dev))
-+ return &default_major_perms[i];
-+
-+ return &default_deny_perms;
-+}
-+
-+#define DEVPERMS_HASH_SZ 512
-+struct devperms_struct *devperms_hash[DEVPERMS_HASH_SZ];
-+
-+#define devperms_hashfn(id,dev) \
-+ ( (id << 5) ^ (id >> 5) ^ (MAJOR(dev)) ^ MINOR(dev) ) & \
-+ (DEVPERMS_HASH_SZ - 1)
-+
-+static inline void hash_devperms(struct devperms_struct *p)
-+{
-+ struct devperms_struct **htable =
-+ &devperms_hash[devperms_hashfn(p->veid,p->dev)];
-+
-+ if ((p->devhash_next = *htable) != NULL)
-+ (*htable)->devhash_pprev = &p->devhash_next;
-+ *htable = p;
-+ p->devhash_pprev = htable;
-+}
-+
-+static inline void unhash_devperms(struct devperms_struct *p)
-+{
-+ if (p->devhash_next)
-+ p->devhash_next->devhash_pprev = p->devhash_pprev;
-+ *p->devhash_pprev = p->devhash_next;
-+}
-+
-+static int __init init_devperms_hash(void)
-+{
-+ write_lock_irq(&devperms_hash_guard);
-+ memset(devperms_hash, 0, sizeof(devperms_hash));
-+ hash_devperms(original_perms);
-+ hash_devperms(original_perms+1);
-+ write_unlock_irq(&devperms_hash_guard);
-+ return 0;
-+}
-+
-+static inline void fini_devperms_hash(void)
-+{
-+}
-+
-+static inline struct devperms_struct *find_devperms(envid_t veid,
-+ int type,
-+ dev_t dev)
-+{
-+ struct devperms_struct *p, **htable =
-+ &devperms_hash[devperms_hashfn(veid,dev)];
-+
-+ for (p = *htable; p && !(p->type==type &&
-+ MAJOR(dev)==MAJOR(p->dev) &&
-+ MINOR(dev)==MINOR(p->dev) &&
-+ p->veid==veid);
-+ p = p->devhash_next)
-+ ;
-+ return p;
-+}
-+
-+
-+static void do_clean_devperms(envid_t veid)
-+{
-+ int i;
-+ struct devperms_struct* ve;
-+
-+ write_lock_irq(&devperms_hash_guard);
-+ for (i = 0; i < DEVPERMS_HASH_SZ; i++)
-+ for (ve = devperms_hash[i]; ve;) {
-+ struct devperms_struct *next = ve->devhash_next;
-+ if (ve->veid == veid) {
-+ unhash_devperms(ve);
-+ kfree(ve);
-+ }
-+
-+ ve = next;
-+ }
-+ write_unlock_irq(&devperms_hash_guard);
-+}
-+
-+/*
-+ * Mode is a mask of
-+ * FMODE_READ for read access (configurable by S_IROTH)
-+ * FMODE_WRITE for write access (configurable by S_IWOTH)
-+ * FMODE_QUOTACTL for quotactl access (configurable by S_IXGRP)
-+ */
-+int real_get_device_perms_ve(int dev_type, dev_t dev, int access_mode)
-+{
-+ struct devperms_struct *perms;
-+ struct ve_struct *ve;
-+ envid_t veid;
-+
-+ perms = NULL;
-+ ve = get_exec_env();
-+ veid = ve->veid;
-+
-+ read_lock(&devperms_hash_guard);
-+
-+ perms = find_devperms(veid, dev_type|VE_USE_MINOR, dev);
-+ if (perms)
-+ goto end;
-+
-+ perms = find_devperms(veid, dev_type|VE_USE_MAJOR, MKDEV(MAJOR(dev),0));
-+ if (perms)
-+ goto end;
-+
-+ perms = find_devperms(veid, dev_type, MKDEV(0,0));
-+ if (perms)
-+ goto end;
-+
-+ perms = find_default_devperms(dev_type, dev);
-+
-+end:
-+ read_unlock(&devperms_hash_guard);
-+
-+ access_mode = "\000\004\002\006\010\014\012\016"[access_mode];
-+ return perms ?
-+ (((perms->mask & access_mode) == access_mode) ? 0 : -EACCES) :
-+ -ENODEV;
-+}
-+EXPORT_SYMBOL(real_get_device_perms_ve);
-+
-+int do_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
-+{
-+ struct devperms_struct *perms;
-+
-+ write_lock_irq(&devperms_hash_guard);
-+ perms = find_devperms(veid, type, dev);
-+ if (!perms) {
-+ struct devperms_struct *perms_new;
-+ write_unlock_irq(&devperms_hash_guard);
-+
-+ perms_new = kmalloc(sizeof(struct devperms_struct), GFP_KERNEL);
-+ if (!perms_new)
-+ return -ENOMEM;
-+
-+ write_lock_irq(&devperms_hash_guard);
-+ perms = find_devperms(veid, type, dev);
-+ if (perms) {
-+ kfree(perms_new);
-+ perms_new = perms;
-+ }
-+
-+ switch (type & VE_USE_MASK) {
-+ case 0:
-+ dev = 0;
-+ break;
-+ case VE_USE_MAJOR:
-+ dev = MKDEV(MAJOR(dev),0);
-+ break;
-+ }
-+
-+ perms_new->veid = veid;
-+ perms_new->dev = dev;
-+ perms_new->type = type;
-+ perms_new->mask = mask & S_IALLUGO;
-+ hash_devperms(perms_new);
-+ } else
-+ perms->mask = mask & S_IALLUGO;
-+ write_unlock_irq(&devperms_hash_guard);
-+ return 0;
-+}
-+EXPORT_SYMBOL(do_setdevperms);
-+
-+int real_setdevperms(envid_t veid, unsigned type, dev_t dev, unsigned mask)
-+{
-+ struct ve_struct *ve;
-+ int err;
-+
-+ if (!capable(CAP_SETVEID) || veid == 0)
-+ return -EPERM;
-+
-+ if ((ve = get_ve_by_id(veid)) == NULL)
-+ return -ESRCH;
-+
-+ down_read(&ve->op_sem);
-+ err = -ESRCH;
-+ if (ve->is_running)
-+ err = do_setdevperms(veid, type, dev, mask);
-+ up_read(&ve->op_sem);
-+ real_put_ve(ve);
-+ return err;
-+}
-+
-+void real_update_load_avg_ve(void)
-+{
-+ struct ve_struct *ve;
-+ unsigned long nr_active;
-+
-+ read_lock(&ve_list_guard);
-+ for (ve = ve_list_head; ve != NULL; ve = ve->next) {
-+ nr_active = nr_running_ve(ve) + nr_uninterruptible_ve(ve);
-+ nr_active *= FIXED_1;
-+ CALC_LOAD(ve->avenrun[0], EXP_1, nr_active);
-+ CALC_LOAD(ve->avenrun[1], EXP_5, nr_active);
-+ CALC_LOAD(ve->avenrun[2], EXP_15, nr_active);
-+ }
-+ read_unlock(&ve_list_guard);
-+}
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * FS-related helpers to VE start/stop
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+/*
-+ * DEVPTS needs a virtualization: each environment should see each own list of
-+ * pseudo-terminals.
-+ * To implement it we need to have separate devpts superblocks for each
-+ * VE, and each VE should mount its own one.
-+ * Thus, separate vfsmount structures are required.
-+ * To minimize intrusion into vfsmount lookup code, separate file_system_type
-+ * structures are created.
-+ *
-+ * In addition to this, patch fo character device itself is required, as file
-+ * system itself is used only for MINOR/MAJOR lookup.
-+ */
-+static int register_ve_fs_type(struct ve_struct *ve,
-+ struct file_system_type *template,
-+ struct file_system_type **p_fs_type, struct vfsmount **p_mnt)
-+{
-+ struct vfsmount *mnt;
-+ struct file_system_type *local_fs_type;
-+ int ret;
-+
-+ VZTRACE("register_ve_fs_type(\"%s\")\n", template->name);
-+
-+ local_fs_type = kmalloc(sizeof(*local_fs_type) + sizeof(void *),
-+ GFP_KERNEL);
-+ if (local_fs_type == NULL)
-+ return -ENOMEM;
-+
-+ memset(local_fs_type, 0, sizeof(*local_fs_type));
-+ local_fs_type->name = template->name;
-+ local_fs_type->fs_flags = template->fs_flags;
-+ local_fs_type->get_sb = template->get_sb;
-+ local_fs_type->kill_sb = template->kill_sb;
-+ local_fs_type->owner = template->owner;
-+ /*
-+ * 1. we do not have refcounter on fstype
-+ * 2. fstype holds reference to ve using get_ve()/put_ve().
-+ * so we free fstype when freeing ve and we are sure it's ok to free it
-+ */
-+ SET_VE_OWNER_FSTYPE(local_fs_type, ve);
-+ get_filesystem(local_fs_type); /* get_ve() inside */
-+
-+ ret = register_filesystem(local_fs_type); /* does not get */
-+ if (ret)
-+ goto reg_err;
-+
-+ mnt = kern_mount(local_fs_type);
-+ if (IS_ERR(mnt))
-+ goto mnt_err;
-+
-+ /* Usage counters after succesful execution kern_mount:
-+ * local_fs_type - +1 (get_fs_type,get_sb_single,put_filesystem)
-+ * mnt - +1 == 1 (alloc_vfsmnt)
-+ */
-+
-+ *p_fs_type = local_fs_type;
-+ *p_mnt = mnt;
-+ return 0;
-+
-+mnt_err:
-+ ret = PTR_ERR(mnt);
-+ unregister_filesystem(local_fs_type); /* does not put */
-+
-+reg_err:
-+ put_filesystem(local_fs_type);
-+ kfree(local_fs_type);
-+ printk(KERN_DEBUG
-+ "register_ve_fs_type(\"%s\") err=%d\n", template->name, ret);
-+ return ret;
-+}
-+
-+static void umount_ve_fs_type(struct file_system_type *local_fs_type)
-+{
-+ struct vfsmount *mnt;
-+ struct list_head *p, *q;
-+ LIST_HEAD(kill);
-+ LIST_HEAD(umount_list);
-+
-+ down_write(&namespace_sem);
-+ spin_lock(&vfsmount_lock);
-+ list_for_each_safe(p, q, &current->namespace->list) {
-+ mnt = list_entry(p, struct vfsmount, mnt_list);
-+ if (mnt->mnt_sb->s_type != local_fs_type)
-+ continue;
-+ list_del(p);
-+ list_add(p, &kill);
-+ }
-+
-+ while (!list_empty(&kill)) {
-+ mnt = list_entry(kill.next, struct vfsmount, mnt_list);
-+ umount_tree(mnt, 1, &umount_list);
-+ }
-+ spin_unlock(&vfsmount_lock);
-+ up_write(&namespace_sem);
-+ release_mounts(&umount_list);
-+}
-+
-+static void unregister_ve_fs_type(struct file_system_type *local_fs_type,
-+ struct vfsmount *local_fs_mount)
-+{
-+ if (local_fs_mount == NULL ||
-+ local_fs_type == NULL) {
-+ if (local_fs_mount != NULL ||
-+ local_fs_type != NULL)
-+ BUG();
-+ return;
-+ }
-+
-+ VZTRACE("unregister_ve_fs_type(\"%s\")\n", local_fs_type->name);
-+
-+ unregister_filesystem(local_fs_type);
-+ umount_ve_fs_type(local_fs_type);
-+ kern_umount(local_fs_mount); /* alias to mntput, drop our ref */
-+ put_filesystem(local_fs_type);
-+}
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * FS-related helpers to VE start/stop
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+#ifdef CONFIG_SYSCTL
-+static ctl_table ve_sysctl_tables[] = {
-+ /* kernel */
-+ {
-+ .ctl_name = CTL_KERN,
-+ .procname = "kernel",
-+ .mode = 0555,
-+ .child = &ve_sysctl_tables[2],
-+ },
-+ { .ctl_name = 0 },
-+ /* kernel/[vars] */
-+ {
-+ .ctl_name = KERN_NODENAME,
-+ .procname = "hostname",
-+ .maxlen = 64,
-+ .mode = 0644,
-+ .proc_handler = &proc_doutsstring,
-+ .strategy = &sysctl_string,
-+ },
-+ {
-+ .ctl_name = KERN_DOMAINNAME,
-+ .procname = "domainname",
-+ .maxlen = 64,
-+ .mode = 0644,
-+ .proc_handler = &proc_doutsstring,
-+ .strategy = &sysctl_string,
-+ },
-+ {
-+ .ctl_name = KERN_SHMMAX,
-+ .procname = "shmmax",
-+ .maxlen = sizeof(size_t),
-+ .mode = 0644,
-+ .proc_handler = &proc_doulongvec_minmax,
-+ },
-+ {
-+ .ctl_name = KERN_SHMALL,
-+ .procname = "shmall",
-+ .maxlen = sizeof(size_t),
-+ .mode = 0644,
-+ .proc_handler = &proc_doulongvec_minmax,
-+ },
-+ {
-+ .ctl_name = KERN_SHMMNI,
-+ .procname = "shmmni",
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {
-+ .ctl_name = KERN_MSGMAX,
-+ .procname = "msgmax",
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {
-+ .ctl_name = KERN_MSGMNI,
-+ .procname = "msgmni",
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {
-+ .ctl_name = KERN_MSGMNB,
-+ .procname = "msgmnb",
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {
-+ .ctl_name = KERN_SEM,
-+ .procname = "sem",
-+ .maxlen = 4 * sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ { .ctl_name = 0, }
-+};
-+
-+static int register_ve_sysctltables(struct ve_struct *ve)
-+{
-+ struct ctl_table_header *header;
-+ ctl_table *root, *table;
-+
-+ VZTRACE("register_ve_sysctltables\n");
-+
-+ root = clone_sysctl_template(ve_sysctl_tables,
-+ sizeof(ve_sysctl_tables) / sizeof(ctl_table));
-+ if (root == NULL)
-+ goto out;
-+
-+ table = root->child;
-+ table[0].data = &ve->utsname->nodename;
-+ table[1].data = &ve->utsname->domainname;
-+ table[2].data = &ve->_shm_ctlmax;
-+ table[3].data = &ve->_shm_ctlall;
-+ table[4].data = &ve->_shm_ctlmni;
-+ table[5].data = &ve->_msg_ctlmax;
-+ table[6].data = &ve->_msg_ctlmni;
-+ table[7].data = &ve->_msg_ctlmnb;
-+ table[8].data = &ve->_sem_ctls[0];
-+
-+ /* insert at head to override kern entries */
-+ header = register_sysctl_table(root, 1);
-+ if (header == NULL)
-+ goto out_free;
-+
-+ ve->kern_header = header;
-+ ve->kern_table = root;
-+ return 0;
-+
-+out_free:
-+ free_sysctl_clone(root);
-+out:
-+ return -ENOMEM;
-+}
-+
-+static inline void unregister_ve_sysctltables(struct ve_struct *ve)
-+{
-+ unregister_sysctl_table(ve->kern_header);
-+}
-+
-+static inline void free_ve_sysctltables(struct ve_struct *ve)
-+{
-+ free_sysctl_clone(ve->kern_table);
-+}
-+#endif
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * VE start: subsystems
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#include <net/ip.h>
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <net/icmp.h>
-+
-+extern struct new_utsname virt_utsname;
-+
-+static int init_ve_utsname(struct ve_struct *ve)
-+{
-+ ve->utsname = kmalloc(sizeof(*ve->utsname), GFP_KERNEL);
-+ if (ve->utsname == NULL)
-+ return -ENOMEM;
-+
-+ down_read(&uts_sem); /* protect the source */
-+ memcpy(ve->utsname, &system_utsname, sizeof(*ve->utsname));
-+ memcpy(ve->utsname->release, virt_utsname.release,
-+ sizeof(virt_utsname.release));
-+ up_read(&uts_sem);
-+
-+ return 0;
-+}
-+
-+static void free_ve_utsname(struct ve_struct *ve)
-+{
-+ kfree(ve->utsname);
-+ ve->utsname = NULL;
-+}
-+
-+static int init_fini_ve_mibs(struct ve_struct *ve, int fini)
-+{
-+ if (fini)
-+ goto fini;
-+ if (!(ve->_net_statistics[0] = alloc_percpu(struct linux_mib)))
-+ goto out1;
-+ if (!(ve->_net_statistics[1] = alloc_percpu(struct linux_mib)))
-+ goto out2;
-+ if (!(ve->_ip_statistics[0] = alloc_percpu(struct ipstats_mib)))
-+ goto out3;
-+ if (!(ve->_ip_statistics[1] = alloc_percpu(struct ipstats_mib)))
-+ goto out4;
-+ if (!(ve->_icmp_statistics[0] = alloc_percpu(struct icmp_mib)))
-+ goto out5;
-+ if (!(ve->_icmp_statistics[1] = alloc_percpu(struct icmp_mib)))
-+ goto out6;
-+ if (!(ve->_tcp_statistics[0] = alloc_percpu(struct tcp_mib)))
-+ goto out7;
-+ if (!(ve->_tcp_statistics[1] = alloc_percpu(struct tcp_mib)))
-+ goto out8;
-+ if (!(ve->_udp_statistics[0] = alloc_percpu(struct udp_mib)))
-+ goto out9;
-+ if (!(ve->_udp_statistics[1] = alloc_percpu(struct udp_mib)))
-+ goto out10;
-+ return 0;
-+fini:
-+ free_percpu(ve->_udp_statistics[1]);
-+out10:
-+ free_percpu(ve->_udp_statistics[0]);
-+out9:
-+ free_percpu(ve->_tcp_statistics[1]);
-+out8:
-+ free_percpu(ve->_tcp_statistics[0]);
-+out7:
-+ free_percpu(ve->_icmp_statistics[1]);
-+out6:
-+ free_percpu(ve->_icmp_statistics[0]);
-+out5:
-+ free_percpu(ve->_ip_statistics[1]);
-+out4:
-+ free_percpu(ve->_ip_statistics[0]);
-+out3:
-+ free_percpu(ve->_net_statistics[1]);
-+out2:
-+ free_percpu(ve->_net_statistics[0]);
-+out1:
-+ return -ENOMEM;
-+}
-+
-+static inline int init_ve_mibs(struct ve_struct *ve)
-+{
-+ return init_fini_ve_mibs(ve, 0);
-+}
-+
-+static inline void fini_ve_mibs(struct ve_struct *ve)
-+{
-+ (void)init_fini_ve_mibs(ve, 1);
-+}
-+
-+extern struct net_device templ_loopback_dev;
-+static void veloop_setup(struct net_device *dev)
-+{
-+ int padded;
-+ padded = dev->padded;
-+ memcpy(dev, &templ_loopback_dev, sizeof(struct net_device));
-+ dev->padded = padded;
-+}
-+
-+static int init_ve_netdev(void)
-+{
-+ struct ve_struct *ve;
-+ struct net_device_stats *stats;
-+ int err;
-+
-+ ve = get_exec_env();
-+ INIT_HLIST_HEAD(&ve->_net_dev_head);
-+ ve->_net_dev_base = NULL;
-+ ve->_net_dev_tail = &ve->_net_dev_base;
-+
-+ ve->_loopback_dev = alloc_netdev(0, templ_loopback_dev.name,
-+ veloop_setup);
-+ if (ve->_loopback_dev == NULL)
-+ return -ENOMEM;
-+ if (loopback_dev.get_stats != NULL) {
-+ stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
-+ if (stats != NULL) {
-+ memset(stats, 0, sizeof(struct net_device_stats));
-+ ve->_loopback_dev->priv = stats;
-+ ve->_loopback_dev->get_stats = loopback_dev.get_stats;
-+ ve->_loopback_dev->destructor = loopback_dev.destructor;
-+ }
-+ }
-+ err = register_netdev(ve->_loopback_dev);
-+ if (err) {
-+ if (ve->_loopback_dev->priv != NULL)
-+ kfree(ve->_loopback_dev->priv);
-+ free_netdev(ve->_loopback_dev);
-+ }
-+ return err;
-+}
-+
-+static void fini_ve_netdev(void)
-+{
-+ struct ve_struct *ve;
-+ struct net_device *dev;
-+
-+ ve = get_exec_env();
-+ while (1) {
-+ rtnl_lock();
-+ /*
-+ * loopback is special, it can be referenced in fib's,
-+ * so it must be freed the last. Doing so is
-+ * sufficient to guarantee absence of such references.
-+ */
-+ if (dev_base == ve->_loopback_dev)
-+ dev = dev_base->next;
-+ else
-+ dev = dev_base;
-+ if (dev == NULL)
-+ break;
-+ unregister_netdevice(dev);
-+ rtnl_unlock();
-+ free_netdev(dev);
-+ }
-+ unregister_netdevice(ve->_loopback_dev);
-+ rtnl_unlock();
-+ free_netdev(ve->_loopback_dev);
-+ ve->_loopback_dev = NULL;
-+}
-+#else
-+#define init_ve_mibs(ve) (0)
-+#define fini_ve_mibs(ve) do { } while (0)
-+#define init_ve_netdev() (0)
-+#define fini_ve_netdev() do { } while (0)
-+#endif
-+
-+static int prepare_proc_root(struct ve_struct *ve)
-+{
-+ struct proc_dir_entry *de;
-+
-+ de = kmalloc(sizeof(struct proc_dir_entry) + 6, GFP_KERNEL);
-+ if (de == NULL)
-+ return -ENOMEM;
-+ memset(de, 0, sizeof(struct proc_dir_entry));
-+ memcpy(de + 1, "/proc", 6);
-+ de->name = (char *)(de + 1);
-+ de->namelen = 5;
-+ de->mode = S_IFDIR | S_IRUGO | S_IXUGO;
-+ de->nlink = 2;
-+ atomic_set(&de->count, 1);
-+
-+ ve->proc_root = de;
-+ return 0;
-+}
-+
-+#ifdef CONFIG_PROC_FS
-+static int init_ve_proc(struct ve_struct *ve)
-+{
-+ int err;
-+ struct proc_dir_entry *de;
-+
-+ err = prepare_proc_root(ve);
-+ if (err)
-+ goto out_root;
-+
-+ err = register_ve_fs_type(ve, &proc_fs_type,
-+ &ve->proc_fstype, &ve->proc_mnt);
-+ if (err)
-+ goto out_reg;
-+
-+ /* create necessary /proc subdirs in VE local proc tree */
-+ err = -ENOMEM;
-+ de = create_proc_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
-+ if (!de)
-+ goto out_vz;
-+
-+#ifdef CONFIG_VE_IPTABLES
-+ proc_net = proc_mkdir("net", NULL);
-+ if (!proc_net)
-+ goto out_net;
-+#endif
-+
-+ return 0;
-+
-+#ifdef CONFIG_VE_IPTABLES
-+out_net:
-+ remove_proc_entry("vz", NULL);
-+#endif
-+out_vz:
-+ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
-+ ve->proc_mnt = NULL;
-+out_reg:
-+ /* proc_fstype and proc_root are freed in real_put_ve -> free_ve_proc */
-+ ;
-+out_root:
-+ return err;
-+}
-+
-+static void fini_ve_proc(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_VE_IPTABLES
-+ remove_proc_entry("net", NULL);
-+ proc_net = NULL;
-+#endif
-+ remove_proc_entry("vz", NULL);
-+ unregister_ve_fs_type(ve->proc_fstype, ve->proc_mnt);
-+ ve->proc_mnt = NULL;
-+}
-+
-+static void free_ve_proc(struct ve_struct *ve)
-+{
-+ /* proc filesystem frees proc_dir_entries on remove_proc_entry() only,
-+ so we check that everything was removed and not lost */
-+ if (ve->proc_root && ve->proc_root->subdir) {
-+ struct proc_dir_entry *p = ve->proc_root;
-+ printk(KERN_WARNING "VPS: %d: proc entry /proc", ve->veid);
-+ while ((p = p->subdir) != NULL)
-+ printk("/%s", p->name);
-+ printk(" is not removed!\n");
-+ }
-+
-+ kfree(ve->proc_root);
-+ kfree(ve->proc_fstype);
-+
-+ ve->proc_fstype = NULL;
-+ ve->proc_root = NULL;
-+}
-+#else
-+#define init_ve_proc(ve) (0)
-+#define fini_ve_proc(ve) do { } while (0)
-+#define free_ve_proc(ve) do { } while (0)
-+#endif
-+
-+#ifdef CONFIG_SYSCTL
-+static int init_ve_sysctl(struct ve_struct *ve)
-+{
-+ int err;
-+
-+#ifdef CONFIG_PROC_FS
-+ err = -ENOMEM;
-+ ve->proc_sys_root = proc_mkdir("sys", 0);
-+ if (ve->proc_sys_root == NULL)
-+ goto out_proc;
-+#endif
-+ INIT_LIST_HEAD(&ve->sysctl_lh);
-+ err = register_ve_sysctltables(ve);
-+ if (err)
-+ goto out_reg;
-+
-+ err = devinet_sysctl_init(ve);
-+ if (err)
-+ goto out_dev;
-+
-+ return 0;
-+
-+out_dev:
-+ unregister_ve_sysctltables(ve);
-+ free_ve_sysctltables(ve);
-+out_reg:
-+#ifdef CONFIG_PROC_FS
-+ remove_proc_entry("sys", NULL);
-+out_proc:
-+#endif
-+ return err;
-+}
-+
-+static void fini_ve_sysctl(struct ve_struct *ve)
-+{
-+ devinet_sysctl_fini(ve);
-+ unregister_ve_sysctltables(ve);
-+ remove_proc_entry("sys", NULL);
-+}
-+
-+static void free_ve_sysctl(struct ve_struct *ve)
-+{
-+ devinet_sysctl_free(ve);
-+ free_ve_sysctltables(ve);
-+}
-+#else
-+#define init_ve_sysctl(ve) (0)
-+#define fini_ve_sysctl(ve) do { } while (0)
-+#define free_ve_sysctl(ve) do { } while (0)
-+#endif
-+
-+#ifdef CONFIG_UNIX98_PTYS
-+#include <linux/devpts_fs.h>
-+
-+static int init_ve_devpts(struct ve_struct *ve)
-+{
-+ int err;
-+
-+ err = -ENOMEM;
-+ ve->devpts_config = kmalloc(sizeof(struct devpts_config), GFP_KERNEL);
-+ if (ve->devpts_config == NULL)
-+ goto out;
-+ memset(ve->devpts_config, 0, sizeof(struct devpts_config));
-+ ve->devpts_config->mode = 0600;
-+ err = register_ve_fs_type(ve, &devpts_fs_type,
-+ &ve->devpts_fstype, &ve->devpts_mnt);
-+ if (err) {
-+ kfree(ve->devpts_config);
-+ ve->devpts_config = NULL;
-+ }
-+out:
-+ return err;
-+}
-+
-+static void fini_ve_devpts(struct ve_struct *ve)
-+{
-+ unregister_ve_fs_type(ve->devpts_fstype, ve->devpts_mnt);
-+ /* devpts_fstype is freed in real_put_ve -> free_ve_filesystems */
-+ ve->devpts_mnt = NULL;
-+ kfree(ve->devpts_config);
-+ ve->devpts_config = NULL;
-+}
-+#else
-+#define init_ve_devpts(ve) (0)
-+#define fini_ve_devpts(ve) do { } while (0)
-+#endif
-+
-+static int init_ve_shmem(struct ve_struct *ve)
-+{
-+ return register_ve_fs_type(ve,
-+ &tmpfs_fs_type,
-+ &ve->shmem_fstype,
-+ &ve->shmem_mnt);
-+}
-+
-+static void fini_ve_shmem(struct ve_struct *ve)
-+{
-+ unregister_ve_fs_type(ve->shmem_fstype, ve->shmem_mnt);
-+ /* shmem_fstype is freed in real_put_ve -> free_ve_filesystems */
-+ ve->shmem_mnt = NULL;
-+}
-+
-+static inline int init_ve_sysfs_root(struct ve_struct *ve)
-+{
-+ struct sysfs_dirent *sysfs_root;
-+
-+ sysfs_root = kmalloc(sizeof(struct sysfs_dirent), GFP_KERNEL);
-+ if (sysfs_root == NULL)
-+ return -ENOMEM;
-+
-+ memset(sysfs_root, 0, sizeof(struct sysfs_dirent));
-+ INIT_LIST_HEAD(&sysfs_root->s_sibling);
-+ INIT_LIST_HEAD(&sysfs_root->s_children);
-+ sysfs_root->s_type = SYSFS_ROOT;
-+ ve->sysfs_root = sysfs_root;
-+ return 0;
-+}
-+
-+static int init_ve_sysfs(struct ve_struct *ve)
-+{
-+ struct subsystem *subsys;
-+ struct class *nc;
-+ int err;
-+ extern struct subsystem class_obj_subsys;
-+ extern struct subsystem class_subsys;
-+ extern struct class net_class;
-+
-+#ifdef CONFIG_SYSFS
-+ err = 0;
-+ if (ve->features & VE_FEATURE_SYSFS) {
-+ err = init_ve_sysfs_root(ve);
-+ if (err != 0)
-+ goto out;
-+ err = register_ve_fs_type(ve,
-+ &sysfs_fs_type,
-+ &ve->sysfs_fstype,
-+ &ve->sysfs_mnt);
-+ }
-+ if (err != 0)
-+ goto out_fs_type;
-+#endif
-+ err = -ENOMEM;
-+ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
-+ if (subsys == NULL)
-+ goto out_class_obj;
-+ /* ick, this is ugly, the things we go through to keep from showing up
-+ * in sysfs... */
-+ memset(subsys, 0, sizeof(*subsys));
-+ memcpy(&subsys->kset.kobj.name, &class_obj_subsys.kset.kobj.name,
-+ sizeof(subsys->kset.kobj.name));
-+ subsys->kset.ktype = class_obj_subsys.kset.ktype;
-+ subsys->kset.uevent_ops = class_obj_subsys.kset.uevent_ops;
-+ subsystem_init(subsys);
-+ if (!subsys->kset.subsys)
-+ subsys->kset.subsys = subsys;
-+ ve->class_obj_subsys = subsys;
-+
-+ err = -ENOMEM;
-+ subsys = kmalloc(sizeof(*subsys), GFP_KERNEL);
-+ if (subsys == NULL)
-+ goto out_class_subsys;
-+ /* ick, this is ugly, the things we go through to keep from showing up
-+ * in sysfs... */
-+ memset(subsys, 0, sizeof(*subsys));
-+ memcpy(&subsys->kset.kobj.name, &class_subsys.kset.kobj.name,
-+ sizeof(subsys->kset.kobj.name));
-+ subsys->kset.ktype = class_subsys.kset.ktype;
-+ subsys->kset.uevent_ops = class_subsys.kset.uevent_ops;
-+ ve->class_subsys = subsys;
-+ err = subsystem_register(subsys);
-+ if (err != 0)
-+ goto out_register;
-+
-+ err = -ENOMEM;
-+ nc = kmalloc(sizeof(*nc), GFP_KERNEL);
-+ if (nc == NULL)
-+ goto out_nc;
-+ memset(nc, 0, sizeof(*nc));
-+ nc->name = net_class.name;
-+ nc->release = net_class.release;
-+ nc->uevent = net_class.uevent;
-+ err = class_register(nc);
-+ if (err != 0)
-+ goto out_class_register;
-+ ve->net_class = nc;
-+
-+ return err;
-+
-+out_class_register:
-+ kfree(nc);
-+out_nc:
-+ subsystem_unregister(subsys);
-+out_register:
-+ kfree(ve->class_subsys);
-+out_class_subsys:
-+ kfree(ve->class_obj_subsys);
-+out_class_obj:
-+#ifdef CONFIG_SYSFS
-+ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
-+ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
-+out_fs_type:
-+ kfree(ve->sysfs_root);
-+ ve->sysfs_root = NULL;
-+#endif
-+ ve->class_subsys = NULL;
-+ ve->class_obj_subsys = NULL;
-+out:
-+ return err;
-+}
-+
-+static void fini_ve_sysfs(struct ve_struct *ve)
-+{
-+ class_unregister(ve->net_class);
-+ subsystem_unregister(ve->class_subsys);
-+
-+ kfree(ve->net_class);
-+ kfree(ve->class_subsys);
-+ kfree(ve->class_obj_subsys);
-+
-+ ve->net_class = NULL;
-+ ve->class_subsys = NULL;
-+ ve->class_obj_subsys = NULL;
-+#ifdef CONFIG_SYSFS
-+ unregister_ve_fs_type(ve->sysfs_fstype, ve->sysfs_mnt);
-+ ve->sysfs_mnt = NULL;
-+ kfree(ve->sysfs_root);
-+ ve->sysfs_root = NULL;
-+ /* sysfs_fstype is freed in real_put_ve -> free_ve_filesystems */
-+#endif
-+}
-+
-+static void free_ve_filesystems(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_SYSFS
-+ kfree(ve->sysfs_fstype);
-+ ve->sysfs_fstype = NULL;
-+#endif
-+ kfree(ve->shmem_fstype);
-+ ve->shmem_fstype = NULL;
-+
-+ kfree(ve->devpts_fstype);
-+ ve->devpts_fstype = NULL;
-+
-+ free_ve_proc(ve);
-+}
-+
-+static int init_printk(struct ve_struct *ve)
-+{
-+ struct ve_prep_printk {
-+ wait_queue_head_t log_wait;
-+ unsigned long log_start;
-+ unsigned long log_end;
-+ unsigned long logged_chars;
-+ } *tmp;
-+
-+ tmp = kmalloc(sizeof(struct ve_prep_printk), GFP_KERNEL);
-+ if (!tmp)
-+ return -ENOMEM;
-+ memset(tmp, 0, sizeof(struct ve_prep_printk));
-+ init_waitqueue_head(&tmp->log_wait);
-+ ve->_log_wait = &tmp->log_wait;
-+ ve->_log_start = &tmp->log_start;
-+ ve->_log_end = &tmp->log_end;
-+ ve->_logged_chars = &tmp->logged_chars;
-+ /* ve->log_buf will be initialized later by ve_log_init() */
-+ return 0;
-+}
-+
-+static void fini_printk(struct ve_struct *ve)
-+{
-+ /*
-+ * there is no spinlock protection here because nobody can use
-+ * log_buf at the moments when this code is called.
-+ */
-+ kfree(ve->log_buf);
-+ kfree(ve->_log_wait);
-+}
-+
-+static void fini_venet(struct ve_struct *ve)
-+{
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ tcp_v4_kill_ve_sockets(ve);
-+#endif
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ ve_mapped_devs_cleanup(ve);
-+#endif
-+}
-+
-+static int init_ve_sched(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_FAIRSCHED
-+ int err;
-+
-+ /*
-+ * We refuse to switch to an already existing node since nodes
-+ * keep a pointer to their ve_struct...
-+ */
-+ err = sys_fairsched_mknod(0, 1, ve->veid);
-+ if (err < 0) {
-+ printk(KERN_WARNING "Can't create fairsched node %d\n",
-+ ve->veid);
-+ return err;
-+ }
-+ err = sys_fairsched_mvpr(current->pid, ve->veid);
-+ if (err) {
-+ printk(KERN_WARNING "Can't switch to fairsched node %d\n",
-+ ve->veid);
-+ if (sys_fairsched_rmnod(ve->veid))
-+ printk(KERN_ERR "Can't clean fairsched node %d\n",
-+ ve->veid);
-+ return err;
-+ }
-+#endif
-+ ve_sched_attach(ve);
-+ return 0;
-+}
-+
-+static void fini_ve_sched(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_FAIRSCHED
-+ if (task_vsched_id(current) == ve->veid)
-+ if (sys_fairsched_mvpr(current->pid, fairsched_init_node.id))
-+ printk(KERN_WARNING "Can't leave fairsched node %d\n",
-+ ve->veid);
-+ if (sys_fairsched_rmnod(ve->veid))
-+ printk(KERN_ERR "Can't remove fairsched node %d\n",
-+ ve->veid);
-+#endif
-+}
-+
-+static int init_ve_struct(struct ve_struct *ve, envid_t veid,
-+ u32 class_id, env_create_param_t *data,
-+ struct task_struct *init_tsk)
-+{
-+ int n;
-+
-+ memset(ve, 0, sizeof(*ve));
-+ (void)get_ve(ve);
-+ ve->veid = veid;
-+ ve->class_id = class_id;
-+ ve->init_entry = init_tsk;
-+ ve->features = data->feature_mask;
-+ INIT_LIST_HEAD(&ve->vetask_lh);
-+ init_rwsem(&ve->op_sem);
-+ ve->ifindex = -1;
-+
-+ for(n = 0; n < UIDHASH_SZ_VE; ++n)
-+ INIT_LIST_HEAD(&ve->uidhash_table[n]);
-+
-+ do_posix_clock_monotonic_gettime(&ve->start_timespec);
-+ ve->start_jiffies = jiffies;
-+ ve->start_cycles = get_cycles();
-+ ve->virt_pids = glob_virt_pids;
-+
-+ return 0;
-+}
-+
-+static void set_ve_root(struct ve_struct *ve, struct task_struct *tsk)
-+{
-+ read_lock(&tsk->fs->lock);
-+ ve->fs_rootmnt = tsk->fs->rootmnt;
-+ ve->fs_root = tsk->fs->root;
-+ read_unlock(&tsk->fs->lock);
-+ mark_tree_virtual(ve->fs_rootmnt, ve->fs_root);
-+}
-+
-+static void set_ve_caps(struct ve_struct *ve, struct task_struct *tsk)
-+{
-+ /* required for real_setdevperms from register_ve_<fs> above */
-+ memcpy(&ve->cap_default, &tsk->cap_effective, sizeof(kernel_cap_t));
-+ cap_lower(ve->cap_default, CAP_SETVEID);
-+}
-+
-+static int ve_list_add(struct ve_struct *ve)
-+{
-+ write_lock_irq(&ve_list_guard);
-+ if (__find_ve_by_id(ve->veid) != NULL)
-+ goto err_exists;
-+
-+ ve->prev = NULL;
-+ ve->next = ve_list_head;
-+ if (ve_list_head)
-+ ve_list_head->prev = ve;
-+ ve_list_head = ve;
-+ nr_ve++;
-+ write_unlock_irq(&ve_list_guard);
-+ return 0;
-+
-+err_exists:
-+ write_unlock_irq(&ve_list_guard);
-+ return -EEXIST;
-+}
-+
-+static void ve_list_del(struct ve_struct *ve)
-+{
-+ write_lock_irq(&ve_list_guard);
-+ if (ve->prev)
-+ ve->prev->next = ve->next;
-+ else
-+ ve_list_head = ve->next;
-+ if (ve->next)
-+ ve->next->prev = ve->prev;
-+ nr_ve--;
-+ write_unlock_irq(&ve_list_guard);
-+}
-+
-+static void set_task_ve_caps(struct task_struct *tsk, struct ve_struct *ve)
-+{
-+ spin_lock(&task_capability_lock);
-+ cap_mask(tsk->cap_effective, ve->cap_default);
-+ cap_mask(tsk->cap_inheritable, ve->cap_default);
-+ cap_mask(tsk->cap_permitted, ve->cap_default);
-+ spin_unlock(&task_capability_lock);
-+}
-+
-+static void move_task(struct task_struct *tsk, struct ve_struct *new,
-+ struct ve_struct *old)
-+{
-+ /* this probihibts ptracing of task entered to VPS from host system */
-+ tsk->mm->vps_dumpable = 0;
-+ /* setup capabilities before enter */
-+ set_task_ve_caps(tsk, new);
-+
-+ write_lock_irq(&tasklist_lock);
-+ VE_TASK_INFO(tsk)->owner_env = new;
-+ VE_TASK_INFO(tsk)->exec_env = new;
-+ REMOVE_VE_LINKS(tsk);
-+ SET_VE_LINKS(tsk);
-+
-+ atomic_dec(&old->pcounter);
-+ atomic_inc(&new->pcounter);
-+ real_put_ve(old);
-+ get_ve(new);
-+ write_unlock_irq(&tasklist_lock);
-+}
-+
-+#if (defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)) && \
-+ defined(CONFIG_NETFILTER) && defined(CONFIG_VE_IPTABLES)
-+extern int init_netfilter(void);
-+extern void fini_netfilter(void);
-+#define init_ve_netfilter() init_netfilter()
-+#define fini_ve_netfilter() fini_netfilter()
-+#else
-+#define init_ve_netfilter() (0)
-+#define fini_ve_netfilter() do { } while (0)
-+#endif
-+
-+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args) \
-+({ \
-+ int ret = 0; \
-+ if (VE_IPT_CMP(mask, full_mask) && \
-+ VE_IPT_CMP((ve)->_iptables_modules, \
-+ full_mask & ~(full_mask##_MOD))) { \
-+ ret = KSYMERRCALL(1, mod, name, args); \
-+ if (ret == 0) \
-+ (ve)->_iptables_modules |= \
-+ full_mask##_MOD; \
-+ if (ret == 1) \
-+ ret = 0; \
-+ } \
-+ ret; \
-+})
-+
-+#define KSYMIPTFINI(mask, full_mask, mod, name, args) \
-+({ \
-+ if (VE_IPT_CMP(mask, full_mask##_MOD)) \
-+ KSYMSAFECALL_VOID(mod, name, args); \
-+})
-+
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
-+ int init_or_cleanup)
-+{
-+ int err;
-+
-+ err = 0;
-+ if (!init_or_cleanup)
-+ goto cleanup;
-+
-+ /* init part */
-+#if defined(CONFIG_NETFILTER_XTABLES) || \
-+ defined(CONFIG_NETFILTER_XTABLES_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
-+ x_tables, init_xtables, ());
-+ if (err < 0)
-+ goto err_xtables;
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
-+ xt_tcpudp, init_xt_tcpudp, ());
-+ if (err < 0)
-+ goto err_xt_tcpudp;
-+#endif
-+#if defined(CONFIG_IP_NF_IPTABLES) || \
-+ defined(CONFIG_IP_NF_IPTABLES_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLES,
-+ ip_tables, init_iptables, ());
-+ if (err < 0)
-+ goto err_iptables;
-+#endif
-+#if defined(CONFIG_IP_NF_CONNTRACK) || \
-+ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
-+ ip_conntrack, init_iptable_conntrack, ());
-+ if (err < 0)
-+ goto err_iptable_conntrack;
-+#endif
-+#if defined(CONFIG_IP_NF_FTP) || \
-+ defined(CONFIG_IP_NF_FTP_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_FTP,
-+ ip_conntrack_ftp, init_iptable_ftp, ());
-+ if (err < 0)
-+ goto err_iptable_ftp;
-+#endif
-+#if defined(CONFIG_IP_NF_IRC) || \
-+ defined(CONFIG_IP_NF_IRC_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK_IRC,
-+ ip_conntrack_irc, init_iptable_irc, ());
-+ if (err < 0)
-+ goto err_iptable_irc;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_CONNTRACK,
-+ xt_conntrack, init_xt_conntrack_match, ());
-+ if (err < 0)
-+ goto err_xt_conntrack_match;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_STATE,
-+ xt_state, init_xt_state, ());
-+ if (err < 0)
-+ goto err_xt_state;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_HELPER,
-+ xt_helper, init_xt_helper, ());
-+ if (err < 0)
-+ goto err_xt_helper;
-+#endif
-+#if defined(CONFIG_IP_NF_NAT) || \
-+ defined(CONFIG_IP_NF_NAT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
-+ ip_nat, ip_nat_init, ());
-+ if (err < 0)
-+ goto err_iptable_nat;
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
-+ iptable_nat, init_iptable_nat, ());
-+ if (err < 0)
-+ goto err_iptable_nat2;
-+#endif
-+#if defined(CONFIG_IP_NF_NAT_FTP) || \
-+ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_FTP,
-+ ip_nat_ftp, init_iptable_nat_ftp, ());
-+ if (err < 0)
-+ goto err_iptable_nat_ftp;
-+#endif
-+#if defined(CONFIG_IP_NF_NAT_IRC) || \
-+ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT_IRC,
-+ ip_nat_irc, init_iptable_nat_irc, ());
-+ if (err < 0)
-+ goto err_iptable_nat_irc;
-+#endif
-+#if defined(CONFIG_IP_NF_FILTER) || \
-+ defined(CONFIG_IP_NF_FILTER_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_FILTER,
-+ iptable_filter, init_iptable_filter, ());
-+ if (err < 0)
-+ goto err_iptable_filter;
-+#endif
-+#if defined(CONFIG_IP_NF_MANGLE) || \
-+ defined(CONFIG_IP_NF_MANGLE_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MANGLE,
-+ iptable_mangle, init_iptable_mangle, ());
-+ if (err < 0)
-+ goto err_iptable_mangle;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LIMIT,
-+ xt_limit, init_xt_limit, ());
-+ if (err < 0)
-+ goto err_xt_limit;
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
-+ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_MULTIPORT,
-+ ipt_multiport, init_iptable_multiport, ());
-+ if (err < 0)
-+ goto err_iptable_multiport;
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
-+ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TOS,
-+ ipt_tos, init_iptable_tos, ());
-+ if (err < 0)
-+ goto err_iptable_tos;
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
-+ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TOS,
-+ ipt_TOS, init_iptable_TOS, ());
-+ if (err < 0)
-+ goto err_iptable_TOS;
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
-+ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REJECT,
-+ ipt_REJECT, init_iptable_REJECT, ());
-+ if (err < 0)
-+ goto err_iptable_REJECT;
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
-+ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_TCPMSS,
-+ ipt_TCPMSS, init_iptable_TCPMSS, ());
-+ if (err < 0)
-+ goto err_iptable_TCPMSS;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TCPMSS,
-+ xt_tcpmss, init_xt_tcpmss, ());
-+ if (err < 0)
-+ goto err_xt_tcpmss;
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
-+ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_TTL,
-+ ipt_ttl, init_iptable_ttl, ());
-+ if (err < 0)
-+ goto err_iptable_ttl;
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
-+ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_LOG,
-+ ipt_LOG, init_iptable_LOG, ());
-+ if (err < 0)
-+ goto err_iptable_LOG;
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_MATCH_LENGTH,
-+ xt_length, init_xt_length, ());
-+ if (err < 0)
-+ goto err_xt_length;
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
-+ defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_TARGET_REDIRECT,
-+ ipt_REDIRECT, init_iptable_REDIRECT, ());
-+ if (err < 0)
-+ goto err_iptable_REDIRECT;
-+#endif
-+ return 0;
-+
-+/* ------------------------------------------------------------------------- */
-+
-+cleanup:
-+#if defined(CONFIG_IP_NF_TARGET_REDIRECT) || \
-+ defined(CONFIG_IP_NF_TARGET_REDIRECT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REDIRECT,
-+ ipt_REDIRECT, fini_iptable_REDIRECT, ());
-+err_iptable_REDIRECT:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_LENGTH) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_LENGTH_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LENGTH,
-+ xt_length, fini_xt_length, ());
-+err_xt_length:
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_LOG) || \
-+ defined(CONFIG_IP_NF_TARGET_LOG_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_LOG,
-+ ipt_LOG, fini_iptable_LOG, ());
-+err_iptable_LOG:
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_TTL) || \
-+ defined(CONFIG_IP_NF_MATCH_TTL_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TTL,
-+ ipt_ttl, fini_iptable_ttl, ());
-+err_iptable_ttl:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_TCPMSS_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TCPMSS,
-+ xt_tcpmss, fini_xt_tcpmss, ());
-+err_xt_tcpmss:
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_TCPMSS) || \
-+ defined(CONFIG_IP_NF_TARGET_TCPMSS_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TCPMSS,
-+ ipt_TCPMSS, fini_iptable_TCPMSS, ());
-+err_iptable_TCPMSS:
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_REJECT) || \
-+ defined(CONFIG_IP_NF_TARGET_REJECT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_REJECT,
-+ ipt_REJECT, fini_iptable_REJECT, ());
-+err_iptable_REJECT:
-+#endif
-+#if defined(CONFIG_IP_NF_TARGET_TOS) || \
-+ defined(CONFIG_IP_NF_TARGET_TOS_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_TARGET_TOS,
-+ ipt_TOS, fini_iptable_TOS, ());
-+err_iptable_TOS:
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_TOS) || \
-+ defined(CONFIG_IP_NF_MATCH_TOS_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_TOS,
-+ ipt_tos, fini_iptable_tos, ());
-+err_iptable_tos:
-+#endif
-+#if defined(CONFIG_IP_NF_MATCH_MULTIPORT) || \
-+ defined(CONFIG_IP_NF_MATCH_MULTIPORT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_MULTIPORT,
-+ ipt_multiport, fini_iptable_multiport, ());
-+err_iptable_multiport:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_LIMIT) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_LIMIT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_LIMIT,
-+ xt_limit, fini_xt_limit, ());
-+err_xt_limit:
-+#endif
-+#if defined(CONFIG_IP_NF_MANGLE) || \
-+ defined(CONFIG_IP_NF_MANGLE_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE,
-+ iptable_mangle, fini_iptable_mangle, ());
-+err_iptable_mangle:
-+#endif
-+#if defined(CONFIG_IP_NF_FILTER) || \
-+ defined(CONFIG_IP_NF_FILTER_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER,
-+ iptable_filter, fini_iptable_filter, ());
-+err_iptable_filter:
-+#endif
-+#if defined(CONFIG_IP_NF_NAT_IRC) || \
-+ defined(CONFIG_IP_NF_NAT_IRC_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_IRC,
-+ ip_nat_irc, fini_iptable_nat_irc, ());
-+err_iptable_nat_irc:
-+#endif
-+#if defined(CONFIG_IP_NF_NAT_FTP) || \
-+ defined(CONFIG_IP_NF_NAT_FTP_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT_FTP,
-+ ip_nat_ftp, fini_iptable_nat_ftp, ());
-+err_iptable_nat_ftp:
-+#endif
-+#if defined(CONFIG_IP_NF_NAT) || \
-+ defined(CONFIG_IP_NF_NAT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
-+ iptable_nat, fini_iptable_nat, ());
-+err_iptable_nat2:
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
-+ ip_nat, ip_nat_cleanup, ());
-+err_iptable_nat:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_HELPER) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_HELPER_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_HELPER,
-+ xt_helper, fini_xt_helper, ());
-+err_xt_helper:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_STATE) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_STATE_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_STATE,
-+ xt_state, fini_xt_state, ());
-+err_xt_state:
-+#endif
-+#if defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) || \
-+ defined(CONFIG_NETFILTER_XT_MATCH_CONNTRACK_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MATCH_CONNTRACK,
-+ xt_conntrack, fini_xt_conntrack_match, ());
-+err_xt_conntrack_match:
-+#endif
-+#if defined(CONFIG_IP_NF_IRC) || \
-+ defined(CONFIG_IP_NF_IRC_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_IRC,
-+ ip_conntrack_irc, fini_iptable_irc, ());
-+err_iptable_irc:
-+#endif
-+#if defined(CONFIG_IP_NF_FTP) || \
-+ defined(CONFIG_IP_NF_FTP_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK_FTP,
-+ ip_conntrack_ftp, fini_iptable_ftp, ());
-+err_iptable_ftp:
-+#endif
-+#if defined(CONFIG_IP_NF_CONNTRACK) || \
-+ defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
-+ ip_conntrack, fini_iptable_conntrack, ());
-+err_iptable_conntrack:
-+#endif
-+#if defined(CONFIG_IP_NF_IPTABLES) || \
-+ defined(CONFIG_IP_NF_IPTABLES_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
-+ ip_tables, fini_iptables, ());
-+err_iptables:
-+#endif
-+#if defined(CONFIG_NETFILTER_XTABLES) || \
-+ defined(CONFIG_NETFILTER_XTABLES_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
-+ xt_tcpudp, fini_xt_tcpudp, ());
-+err_xt_tcpudp:
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLES,
-+ x_tables, fini_xtables, ());
-+err_xtables:
-+#endif
-+ ve->_iptables_modules = 0;
-+
-+ return err;
-+}
-+#else
-+#define do_ve_iptables(ve, initmask, init) (0)
-+#endif
-+
-+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
-+{
-+ return do_ve_iptables(ve, init_mask, 1);
-+}
-+
-+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
-+{
-+ (void)do_ve_iptables(ve, init_mask, 0);
-+}
-+
-+static void flush_ve_iptables(struct ve_struct *ve)
-+{
-+ /*
-+ * flush all rule tables first,
-+ * this helps us to avoid refs to freed objs
-+ */
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_MANGLE, ip_tables,
-+ ipt_flush_table, (ve->_ipt_mangle_table));
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_FILTER, ip_tables,
-+ ipt_flush_table, (ve->_ve_ipt_filter_pf));
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT, ip_tables,
-+ ipt_flush_table, (ve->_ip_conntrack->_ip_nat_table));
-+}
-+
-+static struct list_head ve_hooks[VE_MAX_HOOKS];
-+static DECLARE_RWSEM(ve_hook_sem);
-+
-+int ve_hook_register(struct ve_hook *vh)
-+{
-+ struct list_head *lh;
-+ struct ve_hook *tmp;
-+
-+ down_write(&ve_hook_sem);
-+ list_for_each(lh, &ve_hooks[vh->hooknum]) {
-+ tmp = list_entry(lh, struct ve_hook, list);
-+ if (vh->priority < tmp->priority)
-+ break;
-+ }
-+ list_add_tail(&vh->list, lh);
-+ up_write(&ve_hook_sem);
-+ return 0;
-+}
-+EXPORT_SYMBOL(ve_hook_register);
-+
-+void ve_hook_unregister(struct ve_hook *vh)
-+{
-+ down_write(&ve_hook_sem);
-+ list_del(&vh->list);
-+ up_write(&ve_hook_sem);
-+}
-+EXPORT_SYMBOL(ve_hook_unregister);
-+
-+static int ve_hook_iterate(unsigned int hooknum, void *data)
-+{
-+ struct ve_hook *vh;
-+ int err;
-+
-+ err = 0;
-+ down_read(&ve_hook_sem);
-+ list_for_each_entry(vh, &ve_hooks[hooknum], list) {
-+ if (!try_module_get(vh->owner))
-+ continue;
-+ err = vh->hook(hooknum, data);
-+ module_put(vh->owner);
-+ if (err)
-+ break;
-+ }
-+
-+ if (err) {
-+ list_for_each_entry_continue_reverse(vh,
-+ &ve_hooks[hooknum], list) {
-+ if (!try_module_get(vh->owner))
-+ continue;
-+ if (vh->undo)
-+ vh->undo(hooknum, data);
-+ module_put(vh->owner);
-+ }
-+ }
-+ up_read(&ve_hook_sem);
-+ return err;
-+}
-+
-+static void ve_hook_iterate_cleanup(unsigned int hooknum, void *data)
-+{
-+ struct ve_hook *vh;
-+
-+ down_read(&ve_hook_sem);
-+ list_for_each_entry_reverse(vh, &ve_hooks[hooknum], list) {
-+ if (!try_module_get(vh->owner))
-+ continue;
-+ (void)vh->hook(hooknum, data);
-+ module_put(vh->owner);
-+ }
-+ up_read(&ve_hook_sem);
-+}
-+
-+static int do_env_create(envid_t veid, unsigned int flags, u32 class_id,
-+ env_create_param_t *data, int datalen)
-+{
-+ struct task_struct *tsk;
-+ struct ve_struct *old;
-+ struct ve_struct *old_exec;
-+ struct ve_struct *ve;
-+ __u64 init_mask;
-+ int err;
-+
-+ tsk = current;
-+ old = VE_TASK_INFO(tsk)->owner_env;
-+
-+ if (!thread_group_leader(tsk))
-+ return -EINVAL;
-+
-+ if (tsk->signal->tty) {
-+ printk("ERR: VE init has controlling terminal\n");
-+ return -EINVAL;
-+ }
-+ if (tsk->signal->pgrp != tsk->pid || tsk->signal->session != tsk->pid) {
-+ int may_setsid;
-+ read_lock(&tasklist_lock);
-+ may_setsid = (find_pid(PIDTYPE_PGID, tsk->pid) == NULL);
-+ read_unlock(&tasklist_lock);
-+ if (!may_setsid) {
-+ printk("ERR: VE init is process group leader\n");
-+ return -EINVAL;
-+ }
-+ }
-+
-+
-+ VZTRACE("%s: veid=%d classid=%d pid=%d\n",
-+ __FUNCTION__, veid, class_id, current->pid);
-+
-+ err = -ENOMEM;
-+ ve = kmalloc(sizeof(struct ve_struct), GFP_KERNEL);
-+ if (ve == NULL)
-+ goto err_struct;
-+
-+ init_ve_struct(ve, veid, class_id, data, tsk);
-+ __module_get(THIS_MODULE);
-+ down_write(&ve->op_sem);
-+ if (flags & VE_LOCK)
-+ ve->is_locked = 1;
-+ if ((err = ve_list_add(ve)) < 0)
-+ goto err_exist;
-+
-+ /* this should be done before context switching */
-+ if ((err = init_printk(ve)) < 0)
-+ goto err_log_wait;
-+
-+ old_exec = set_exec_env(ve);
-+
-+ if ((err = init_ve_sched(ve)) < 0)
-+ goto err_sched;
-+
-+ /* move user to VE */
-+ if ((err = set_user(0, 0)) < 0)
-+ goto err_set_user;
-+
-+ set_ve_root(ve, tsk);
-+
-+ if ((err = init_ve_utsname(ve)))
-+ goto err_utsname;
-+
-+ if ((err = init_ve_mibs(ve)))
-+ goto err_mibs;
-+
-+ if ((err = init_ve_proc(ve)))
-+ goto err_proc;
-+
-+ if ((err = init_ve_sysctl(ve)))
-+ goto err_sysctl;
-+
-+ if ((err = init_ve_sysfs(ve)))
-+ goto err_sysfs;
-+
-+ if ((err = init_ve_route(ve)) < 0)
-+ goto err_route;
-+
-+ if ((err = init_ve_netdev()))
-+ goto err_dev;
-+
-+ if ((err = init_ve_tty_drivers(ve)) < 0)
-+ goto err_tty;
-+
-+ if ((err = init_ve_shmem(ve)))
-+ goto err_shmem;
-+
-+ if ((err = init_ve_devpts(ve)))
-+ goto err_devpts;
-+
-+ /* init SYSV IPC variables */
-+ if ((err = init_ve_ipc(ve)) < 0)
-+ goto err_ipc;
-+
-+ set_ve_caps(ve, tsk);
-+
-+ /* It is safe to initialize netfilter here as routing initialization and
-+ interface setup will be done below. This means that NO skb can be
-+ passed inside. Den */
-+ /* iptables ve initialization for non ve0;
-+ ve0 init is in module_init */
-+ if ((err = init_ve_netfilter()) < 0)
-+ goto err_netfilter;
-+
-+ init_mask = data ? data->iptables_mask : VE_IP_DEFAULT;
-+ if ((err = init_ve_iptables(ve, init_mask)) < 0)
-+ goto err_iptables;
-+
-+ if ((err = alloc_vpid(tsk->pid, 1)) < 0)
-+ goto err_vpid;
-+
-+ if ((err = ve_hook_iterate(VE_HOOK_INIT, (void *)ve)) < 0)
-+ goto err_ve_hook;
-+
-+ /* finally: set vpids and move inside */
-+ move_task(tsk, ve, old);
-+
-+ set_virt_pid(tsk, 1);
-+ set_virt_tgid(tsk, 1);
-+
-+ set_special_pids(tsk->pid, tsk->pid);
-+ current->signal->tty_old_pgrp = 0;
-+ set_virt_pgid(tsk, 1);
-+ set_virt_sid(tsk, 1);
-+
-+ ve->is_running = 1;
-+ up_write(&ve->op_sem);
-+
-+ printk(KERN_INFO "VPS: %d: started\n", veid);
-+ return veid;
-+
-+err_ve_hook:
-+ free_vpid(1, ve);
-+err_vpid:
-+ fini_venet(ve);
-+ fini_ve_iptables(ve, init_mask);
-+err_iptables:
-+ fini_ve_netfilter();
-+err_netfilter:
-+ fini_ve_ipc(ve);
-+err_ipc:
-+ fini_ve_devpts(ve);
-+err_devpts:
-+ fini_ve_shmem(ve);
-+err_shmem:
-+ fini_ve_tty_drivers(ve);
-+err_tty:
-+ fini_ve_netdev();
-+err_dev:
-+ fini_ve_route(ve);
-+err_route:
-+ fini_ve_sysfs(ve);
-+err_sysfs:
-+ fini_ve_sysctl(ve);
-+err_sysctl:
-+ fini_ve_proc(ve);
-+err_proc:
-+ do_clean_devperms(ve->veid); /* register procfs adds devperms */
-+ fini_ve_mibs(ve);
-+err_mibs:
-+ /* free_ve_utsname() is called inside real_put_ve() */ ;
-+err_utsname:
-+ /* It is safe to restore current->envid here because
-+ * ve_fairsched_detach does not use current->envid. */
-+ /* Really fairsched code uses current->envid in sys_fairsched_mknod
-+ * only. It is correct if sys_fairsched_mknod is called from
-+ * userspace. If sys_fairsched_mknod is called from
-+ * ve_fairsched_attach, then node->envid and node->parent_node->envid
-+ * are explicitly set to valid value after the call. */
-+ /* FIXME */
-+ VE_TASK_INFO(tsk)->owner_env = old;
-+ VE_TASK_INFO(tsk)->exec_env = old_exec;
-+ /* move user back */
-+ if (set_user(0, 0) < 0)
-+ printk(KERN_WARNING"Can't restore UID\n");
-+
-+err_set_user:
-+ fini_ve_sched(ve);
-+err_sched:
-+ (void)set_exec_env(old_exec);
-+
-+ /* we can jump here having incorrect envid */
-+ VE_TASK_INFO(tsk)->owner_env = old;
-+ fini_printk(ve);
-+err_log_wait:
-+ ve_list_del(ve);
-+ up_write(&ve->op_sem);
-+
-+ real_put_ve(ve);
-+err_struct:
-+ printk(KERN_INFO "VPS: %d: failed to start with err=%d\n", veid, err);
-+ return err;
-+
-+err_exist:
-+ kfree(ve);
-+ goto err_struct;
-+}
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * VE start/stop callbacks
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+int real_env_create(envid_t veid, unsigned flags, u32 class_id,
-+ env_create_param_t *data, int datalen)
-+{
-+ int status;
-+ struct ve_struct *ve;
-+
-+ if (!flags) {
-+ status = get_exec_env()->veid;
-+ goto out;
-+ }
-+
-+ status = -EPERM;
-+ if (!capable(CAP_SETVEID))
-+ goto out;
-+
-+ status = -EINVAL;
-+ if ((flags & VE_TEST) && (flags & (VE_ENTER|VE_CREATE)))
-+ goto out;
-+
-+ status = -EINVAL;
-+ ve = get_ve_by_id(veid);
-+ if (ve) {
-+ if (flags & VE_TEST) {
-+ status = 0;
-+ goto out_put;
-+ }
-+ if (flags & VE_EXCLUSIVE) {
-+ status = -EACCES;
-+ goto out_put;
-+ }
-+ if (flags & VE_CREATE) {
-+ flags &= ~VE_CREATE;
-+ flags |= VE_ENTER;
-+ }
-+ } else {
-+ if (flags & (VE_TEST|VE_ENTER)) {
-+ status = -ESRCH;
-+ goto out;
-+ }
-+ }
-+
-+ if (flags & VE_CREATE) {
-+ status = do_env_create(veid, flags, class_id, data, datalen);
-+ goto out;
-+ } else if (flags & VE_ENTER)
-+ status = do_env_enter(ve, flags);
-+
-+ /* else: returning EINVAL */
-+
-+out_put:
-+ real_put_ve(ve);
-+out:
-+ return status;
-+}
-+
-+static int do_env_enter(struct ve_struct *ve, unsigned int flags)
-+{
-+ struct task_struct *tsk = current;
-+ int err;
-+
-+ VZTRACE("%s: veid=%d\n", __FUNCTION__, ve->veid);
-+
-+ err = -EBUSY;
-+ down_read(&ve->op_sem);
-+ if (!ve->is_running)
-+ goto out_up;
-+ if (ve->is_locked && !(flags & VE_SKIPLOCK))
-+ goto out_up;
-+
-+#ifdef CONFIG_FAIRSCHED
-+ err = sys_fairsched_mvpr(current->pid, ve->veid);
-+ if (err)
-+ goto out_up;
-+#endif
-+
-+ ve_sched_attach(ve);
-+ move_task(current, ve, VE_TASK_INFO(tsk)->owner_env);
-+ err = VE_TASK_INFO(tsk)->owner_env->veid;
-+
-+out_up:
-+ up_read(&ve->op_sem);
-+ return err;
-+}
-+
-+static void env_cleanup(struct ve_struct *ve)
-+{
-+ struct ve_struct *old_ve;
-+
-+ VZTRACE("real_do_env_cleanup\n");
-+
-+ down_read(&ve->op_sem);
-+ old_ve = set_exec_env(ve);
-+
-+ ve_hook_iterate_cleanup(VE_HOOK_FINI, (void *)ve);
-+
-+ fini_venet(ve);
-+
-+ /* no new packets in flight beyond this point */
-+ synchronize_net();
-+ /* skb hold dst_entry, and in turn lies in the ip fragment queue */
-+ ip_fragment_cleanup(ve);
-+
-+ fini_ve_netdev();
-+ fini_ve_route(ve);
-+
-+ /* kill iptables */
-+ /* No skb belonging to VE can exist at this point as unregister_netdev
-+ is an operation awaiting until ALL skb's gone */
-+ flush_ve_iptables(ve);
-+ fini_ve_iptables(ve, ve->_iptables_modules);
-+ fini_ve_netfilter();
-+
-+ ve_ipc_cleanup();
-+
-+ fini_ve_sched(ve);
-+ do_clean_devperms(ve->veid);
-+
-+ fini_ve_devpts(ve);
-+ fini_ve_shmem(ve);
-+ fini_ve_sysfs(ve);
-+ unregister_ve_tty_drivers(ve);
-+ fini_ve_sysctl(ve);
-+ fini_ve_proc(ve);
-+
-+ fini_ve_mibs(ve);
-+
-+ (void)set_exec_env(old_ve);
-+ fini_printk(ve); /* no printk can happen in ve context anymore */
-+
-+ ve_list_del(ve);
-+ up_read(&ve->op_sem);
-+
-+ real_put_ve(ve);
-+}
-+
-+static struct list_head ve_cleanup_list;
-+static spinlock_t ve_cleanup_lock;
-+
-+static DECLARE_COMPLETION(vzmond_complete);
-+static struct task_struct *vzmond_thread;
-+static volatile int stop_vzmond;
-+
-+void real_do_env_cleanup(struct ve_struct *ve)
-+{
-+ spin_lock(&ve_cleanup_lock);
-+ list_add_tail(&ve->cleanup_list, &ve_cleanup_list);
-+ spin_unlock(&ve_cleanup_lock);
-+ wake_up_process(vzmond_thread);
-+}
-+
-+static void do_pending_env_cleanups(void)
-+{
-+ struct ve_struct *ve;
-+
-+ spin_lock(&ve_cleanup_lock);
-+ while (1) {
-+ if (list_empty(&ve_cleanup_list) || need_resched())
-+ break;
-+ ve = list_entry(ve_cleanup_list.next, struct ve_struct,
-+ cleanup_list);
-+ list_del(&ve->cleanup_list);
-+ spin_unlock(&ve_cleanup_lock);
-+ env_cleanup(ve);
-+ spin_lock(&ve_cleanup_lock);
-+ }
-+ spin_unlock(&ve_cleanup_lock);
-+}
-+
-+static int have_pending_cleanups(void)
-+{
-+ return !list_empty(&ve_cleanup_list);
-+}
-+
-+static int vzmond(void *arg)
-+{
-+ daemonize("vzmond");
-+ vzmond_thread = current;
-+ set_current_state(TASK_INTERRUPTIBLE);
-+
-+ while (!stop_vzmond) {
-+ schedule();
-+ try_to_freeze();
-+ if (signal_pending(current))
-+ flush_signals(current);
-+
-+ do_pending_env_cleanups();
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ if (have_pending_cleanups())
-+ __set_current_state(TASK_RUNNING);
-+ }
-+
-+ __set_task_state(current, TASK_RUNNING);
-+ complete_and_exit(&vzmond_complete, 0);
-+}
-+
-+static int __init init_vzmond(void)
-+{
-+ INIT_LIST_HEAD(&ve_cleanup_list);
-+ spin_lock_init(&ve_cleanup_lock);
-+ stop_vzmond = 0;
-+ return kernel_thread(vzmond, NULL, 0);
-+}
-+
-+static void fini_vzmond(void)
-+{
-+ stop_vzmond = 1;
-+ wake_up_process(vzmond_thread);
-+ wait_for_completion(&vzmond_complete);
-+ WARN_ON(!list_empty(&ve_cleanup_list));
-+}
-+
-+void real_do_env_free(struct ve_struct *ve)
-+{
-+ VZTRACE("real_do_env_free\n");
-+
-+ ve_ipc_free(ve); /* free SYSV IPC resources */
-+ free_ve_tty_drivers(ve);
-+ free_ve_utsname(ve);
-+ free_ve_sysctl(ve); /* free per ve sysctl data */
-+ free_ve_filesystems(ve);
-+ printk(KERN_INFO "VPS: %d: stopped\n", VEID(ve));
-+ kfree(ve);
-+
-+ module_put(THIS_MODULE);
-+}
-+EXPORT_SYMBOL(real_do_env_free);
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * VE TTY handling
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+DCL_VE_OWNER(TTYDRV, struct tty_driver, owner_env)
-+
-+static struct tty_driver *alloc_ve_tty_driver(struct tty_driver *base,
-+ struct ve_struct *ve)
-+{
-+ size_t size;
-+ struct tty_driver *driver;
-+
-+ driver = kmalloc(sizeof(struct tty_driver), GFP_KERNEL);
-+ if (!driver)
-+ goto out;
-+
-+ memcpy(driver, base, sizeof(struct tty_driver));
-+
-+ driver->driver_state = NULL;
-+
-+ size = base->num * 3 * sizeof(void *);
-+ if (!(driver->flags & TTY_DRIVER_DEVPTS_MEM)) {
-+ void **p;
-+ p = kmalloc(size, GFP_KERNEL);
-+ if (!p)
-+ goto out_free;
-+ memset(p, 0, size);
-+ driver->ttys = (struct tty_struct **)p;
-+ driver->termios = (struct termios **)(p + driver->num);
-+ driver->termios_locked = (struct termios **)(p + driver->num * 2);
-+ } else {
-+ driver->ttys = NULL;
-+ driver->termios = NULL;
-+ driver->termios_locked = NULL;
-+ }
-+
-+ SET_VE_OWNER_TTYDRV(driver, ve);
-+ driver->flags |= TTY_DRIVER_INSTALLED;
-+
-+ return driver;
-+
-+out_free:
-+ kfree(driver);
-+out:
-+ return NULL;
-+}
-+
-+static void free_ve_tty_driver(struct tty_driver *driver)
-+{
-+ if (!driver)
-+ return;
-+
-+ clear_termios(driver);
-+ kfree(driver->ttys);
-+ kfree(driver);
-+}
-+
-+static int alloc_ve_tty_drivers(struct ve_struct* ve)
-+{
-+#ifdef CONFIG_LEGACY_PTYS
-+ /* Traditional BSD devices */
-+ ve->pty_driver = alloc_ve_tty_driver(pty_driver, ve);
-+ if (!ve->pty_driver)
-+ goto out_mem;
-+
-+ ve->pty_slave_driver = alloc_ve_tty_driver(pty_slave_driver, ve);
-+ if (!ve->pty_slave_driver)
-+ goto out_mem;
-+
-+ ve->pty_driver->other = ve->pty_slave_driver;
-+ ve->pty_slave_driver->other = ve->pty_driver;
-+#endif
-+
-+#ifdef CONFIG_UNIX98_PTYS
-+ ve->ptm_driver = alloc_ve_tty_driver(ptm_driver, ve);
-+ if (!ve->ptm_driver)
-+ goto out_mem;
-+
-+ ve->pts_driver = alloc_ve_tty_driver(pts_driver, ve);
-+ if (!ve->pts_driver)
-+ goto out_mem;
-+
-+ ve->ptm_driver->other = ve->pts_driver;
-+ ve->pts_driver->other = ve->ptm_driver;
-+
-+ ve->allocated_ptys = kmalloc(sizeof(*ve->allocated_ptys), GFP_KERNEL);
-+ if (!ve->allocated_ptys)
-+ goto out_mem;
-+ idr_init(ve->allocated_ptys);
-+#endif
-+ return 0;
-+
-+out_mem:
-+ free_ve_tty_drivers(ve);
-+ return -ENOMEM;
-+}
-+
-+static void free_ve_tty_drivers(struct ve_struct* ve)
-+{
-+#ifdef CONFIG_LEGACY_PTYS
-+ free_ve_tty_driver(ve->pty_driver);
-+ free_ve_tty_driver(ve->pty_slave_driver);
-+ ve->pty_driver = ve->pty_slave_driver = NULL;
-+#endif
-+#ifdef CONFIG_UNIX98_PTYS
-+ free_ve_tty_driver(ve->ptm_driver);
-+ free_ve_tty_driver(ve->pts_driver);
-+ kfree(ve->allocated_ptys);
-+ ve->ptm_driver = ve->pts_driver = NULL;
-+ ve->allocated_ptys = NULL;
-+#endif
-+}
-+
-+static inline void __register_tty_driver(struct tty_driver *driver)
-+{
-+ list_add(&driver->tty_drivers, &tty_drivers);
-+}
-+
-+static inline void __unregister_tty_driver(struct tty_driver *driver)
-+{
-+ if (!driver)
-+ return;
-+ list_del(&driver->tty_drivers);
-+}
-+
-+static int register_ve_tty_drivers(struct ve_struct* ve)
-+{
-+ write_lock_irq(&tty_driver_guard);
-+#ifdef CONFIG_UNIX98_PTYS
-+ __register_tty_driver(ve->ptm_driver);
-+ __register_tty_driver(ve->pts_driver);
-+#endif
-+#ifdef CONFIG_LEGACY_PTYS
-+ __register_tty_driver(ve->pty_driver);
-+ __register_tty_driver(ve->pty_slave_driver);
-+#endif
-+ write_unlock_irq(&tty_driver_guard);
-+
-+ return 0;
-+}
-+
-+static void unregister_ve_tty_drivers(struct ve_struct* ve)
-+{
-+ VZTRACE("unregister_ve_tty_drivers\n");
-+
-+ write_lock_irq(&tty_driver_guard);
-+ __unregister_tty_driver(ve->pty_driver);
-+ __unregister_tty_driver(ve->pty_slave_driver);
-+#ifdef CONFIG_UNIX98_PTYS
-+ __unregister_tty_driver(ve->ptm_driver);
-+ __unregister_tty_driver(ve->pts_driver);
-+#endif
-+ write_unlock_irq(&tty_driver_guard);
-+}
-+
-+static int init_ve_tty_drivers(struct ve_struct *ve)
-+{
-+ int err;
-+
-+ if ((err = alloc_ve_tty_drivers(ve)))
-+ goto err_ttyalloc;
-+ if ((err = register_ve_tty_drivers(ve)))
-+ goto err_ttyreg;
-+ return 0;
-+
-+err_ttyreg:
-+ free_ve_tty_drivers(ve);
-+err_ttyalloc:
-+ return err;
-+}
-+
-+static void fini_ve_tty_drivers(struct ve_struct *ve)
-+{
-+ unregister_ve_tty_drivers(ve);
-+ free_ve_tty_drivers(ve);
-+}
-+
-+/*
-+ * Free the termios and termios_locked structures because
-+ * we don't want to get memory leaks when modular tty
-+ * drivers are removed from the kernel.
-+ */
-+static void clear_termios(struct tty_driver *driver)
-+{
-+ int i;
-+ struct termios *tp;
-+
-+ if (driver->termios == NULL)
-+ return;
-+ for (i = 0; i < driver->num; i++) {
-+ tp = driver->termios[i];
-+ if (tp) {
-+ driver->termios[i] = NULL;
-+ kfree(tp);
-+ }
-+ tp = driver->termios_locked[i];
-+ if (tp) {
-+ driver->termios_locked[i] = NULL;
-+ kfree(tp);
-+ }
-+ }
-+}
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * Pieces of VE network
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#include <asm/uaccess.h>
-+#include <net/sock.h>
-+#include <linux/netlink.h>
-+#include <linux/rtnetlink.h>
-+#include <net/route.h>
-+#include <net/ip_fib.h>
-+#endif
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+static void ve_del_ip_addrs(struct net_device *dev)
-+{
-+ struct in_device *in_dev;
-+
-+ in_dev = in_dev_get(dev);
-+ if (in_dev == NULL)
-+ return;
-+
-+ while (in_dev->ifa_list != NULL) {
-+ inet_del_ifa(in_dev, &in_dev->ifa_list, 1);
-+ }
-+ in_dev_put(in_dev);
-+}
-+
-+static int ve_netdev_cleanup(struct net_device *dev, int to_ve)
-+{
-+ int err;
-+
-+ err = 0;
-+ ve_del_ip_addrs(dev);
-+ if ((dev->flags & IFF_UP) != 0)
-+ err = dev_close(dev);
-+ synchronize_net();
-+ dev_shutdown(dev);
-+ dev_mc_discard(dev);
-+ free_divert_blk(dev);
-+ synchronize_net();
-+
-+ if (to_ve)
-+ dev->orig_mtu = dev->mtu;
-+ else {
-+ int rc = dev_set_mtu(dev, dev->orig_mtu);
-+ if (err == 0)
-+ err = rc;
-+ }
-+
-+ return err;
-+}
-+
-+static void __ve_dev_move(struct net_device *dev, struct ve_struct *ve_src,
-+ struct ve_struct *ve_dst, struct user_beancounter *exec_ub)
-+{
-+ struct net_device **dp, *d;
-+ struct user_beancounter *ub;
-+
-+ for (d = ve_src->_net_dev_base, dp = NULL; d != NULL;
-+ dp = &d->next, d = d->next) {
-+ if (d == dev) {
-+ hlist_del(&dev->name_hlist);
-+ hlist_del(&dev->index_hlist);
-+ if (ve_src->_net_dev_tail == &dev->next)
-+ ve_src->_net_dev_tail = dp;
-+ if (dp)
-+ *dp = dev->next;
-+ dev->next = NULL;
-+ break;
-+ }
-+ }
-+ *ve_dst->_net_dev_tail = dev;
-+ ve_dst->_net_dev_tail = &dev->next;
-+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name, ve_dst));
-+ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex, ve_dst));
-+ dev->owner_env = ve_dst;
-+
-+ ub = netdev_bc(dev)->exec_ub;
-+ netdev_bc(dev)->exec_ub = get_beancounter(exec_ub);
-+ put_beancounter(ub);
-+}
-+
-+static int ve_dev_add(envid_t veid, char *dev_name)
-+{
-+ int err;
-+ struct net_device *dev;
-+ struct ve_struct *ve;
-+ struct hlist_node *p;
-+
-+ dev = NULL;
-+ err = -ESRCH;
-+
-+ ve = get_ve_by_id(veid);
-+ if (ve == NULL)
-+ goto out;
-+
-+ rtnl_lock();
-+
-+ read_lock(&dev_base_lock);
-+ hlist_for_each(p, dev_name_hash(dev_name, get_ve0())) {
-+ struct net_device *d = hlist_entry(p, struct net_device,
-+ name_hlist);
-+ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
-+ dev = d;
-+ break;
-+ }
-+ }
-+ read_unlock(&dev_base_lock);
-+ if (dev == NULL)
-+ goto out_unlock;
-+
-+ err = -EPERM;
-+ if (!ve_is_dev_movable(dev))
-+ goto out_unlock;
-+
-+ err = -EINVAL;
-+ if (dev->flags & (IFF_SLAVE|IFF_MASTER))
-+ goto out_unlock;
-+
-+ ve_netdev_cleanup(dev, 1);
-+
-+ write_lock_bh(&dev_base_lock);
-+ __ve_dev_move(dev, get_ve0(), ve, get_exec_ub());
-+ write_unlock_bh(&dev_base_lock);
-+
-+ err = 0;
-+
-+out_unlock:
-+ rtnl_unlock();
-+ real_put_ve(ve);
-+
-+ if (dev == NULL)
-+ printk(KERN_WARNING "Device %s not found\n", dev_name);
-+
-+out:
-+ return err;
-+}
-+
-+static int ve_dev_del(envid_t veid, char *dev_name)
-+{
-+ int err;
-+ struct net_device *dev;
-+ struct ve_struct *ve, *old_exec;
-+ struct hlist_node *p;
-+
-+ dev = NULL;
-+ err = -ESRCH;
-+
-+ ve = get_ve_by_id(veid);
-+ if (ve == NULL)
-+ goto out;
-+
-+ rtnl_lock();
-+
-+ read_lock(&dev_base_lock);
-+ hlist_for_each(p, dev_name_hash(dev_name, ve)) {
-+ struct net_device *d = hlist_entry(p, struct net_device,
-+ name_hlist);
-+ if (strncmp(d->name, dev_name, IFNAMSIZ) == 0) {
-+ dev = d;
-+ break;
-+ }
-+ }
-+ read_unlock(&dev_base_lock);
-+ if (dev == NULL)
-+ goto out_unlock;
-+
-+ err = -EPERM;
-+ if (!ve_is_dev_movable(dev))
-+ goto out_unlock;
-+
-+ old_exec = set_exec_env(ve);
-+ ve_netdev_cleanup(dev, 0);
-+ (void)set_exec_env(old_exec);
-+
-+ write_lock_bh(&dev_base_lock);
-+ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
-+ write_unlock_bh(&dev_base_lock);
-+
-+ err = 0;
-+
-+out_unlock:
-+ rtnl_unlock();
-+ real_put_ve(ve);
-+
-+ if (dev == NULL)
-+ printk(KERN_WARNING "Device %s not found\n", dev_name);
-+
-+out:
-+ return err;
-+}
-+
-+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
-+{
-+ int err;
-+ err = -EPERM;
-+ if (!capable(CAP_SETVEID))
-+ goto out;
-+ switch (op)
-+ {
-+ case VE_NETDEV_ADD:
-+ err = ve_dev_add(veid, dev_name);
-+ break;
-+ case VE_NETDEV_DEL:
-+ err = ve_dev_del(veid, dev_name);
-+ break;
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+out:
-+ return err;
-+}
-+
-+static void ve_mapped_devs_cleanup(struct ve_struct *ve)
-+{
-+ struct net_device *dev;
-+
-+ rtnl_lock();
-+ write_lock_bh(&dev_base_lock);
-+restart:
-+ for (dev = ve->_net_dev_base; dev != NULL; dev = dev->next)
-+ {
-+ if ((dev->features & NETIF_F_VENET) ||
-+ (dev == ve->_loopback_dev)) /* Skip loopback dev */
-+ continue;
-+ write_unlock_bh(&dev_base_lock);
-+ ve_netdev_cleanup(dev, 0);
-+ write_lock_bh(&dev_base_lock);
-+ __ve_dev_move(dev, ve, get_ve0(), netdev_bc(dev)->owner_ub);
-+ goto restart;
-+ }
-+ write_unlock_bh(&dev_base_lock);
-+ rtnl_unlock();
-+}
-+#endif
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * VE information via /proc
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+#ifdef CONFIG_PROC_FS
-+static int devperms_seq_show(struct seq_file *m, void *v)
-+{
-+ struct devperms_struct *dp;
-+ char dev_s[32], type_c;
-+ unsigned use, type;
-+ dev_t dev;
-+
-+ dp = (struct devperms_struct *)v;
-+ if (dp == (struct devperms_struct *)1L) {
-+ seq_printf(m, "Version: 2.7\n");
-+ return 0;
-+ }
-+
-+ use = dp->type & VE_USE_MASK;
-+ type = dp->type & S_IFMT;
-+ dev = dp->dev;
-+
-+ if ((use | VE_USE_MINOR) == use)
-+ snprintf(dev_s, sizeof(dev_s), "%d:%d", MAJOR(dev), MINOR(dev));
-+ else if ((use | VE_USE_MAJOR) == use)
-+ snprintf(dev_s, sizeof(dev_s), "%d:*", MAJOR(dp->dev));
-+ else
-+ snprintf(dev_s, sizeof(dev_s), "*:*");
-+
-+ if (type == S_IFCHR)
-+ type_c = 'c';
-+ else if (type == S_IFBLK)
-+ type_c = 'b';
-+ else
-+ type_c = '?';
-+
-+ seq_printf(m, "%10u %c %03o %s\n", dp->veid, type_c, dp->mask, dev_s);
-+ return 0;
-+}
-+
-+static void *devperms_seq_start(struct seq_file *m, loff_t *pos)
-+{
-+ loff_t cpos;
-+ long slot;
-+ struct devperms_struct *dp;
-+
-+ cpos = *pos;
-+ read_lock(&devperms_hash_guard);
-+ if (cpos-- == 0)
-+ return (void *)1L;
-+
-+ for (slot = 0; slot < DEVPERMS_HASH_SZ; slot++)
-+ for (dp = devperms_hash[slot]; dp; dp = dp->devhash_next)
-+ if (cpos-- == 0) {
-+ m->private = (void *)slot;
-+ return dp;
-+ }
-+ return NULL;
-+}
-+
-+static void *devperms_seq_next(struct seq_file *m, void *v, loff_t *pos)
-+{
-+ long slot;
-+ struct devperms_struct *dp;
-+
-+ dp = (struct devperms_struct *)v;
-+
-+ if (dp == (struct devperms_struct *)1L)
-+ slot = 0;
-+ else if (dp->devhash_next == NULL)
-+ slot = (long)m->private + 1;
-+ else {
-+ (*pos)++;
-+ return dp->devhash_next;
-+ }
-+
-+ for (; slot < DEVPERMS_HASH_SZ; slot++)
-+ if (devperms_hash[slot]) {
-+ (*pos)++;
-+ m->private = (void *)slot;
-+ return devperms_hash[slot];
-+ }
-+ return NULL;
-+}
-+
-+static void devperms_seq_stop(struct seq_file *m, void *v)
-+{
-+ read_unlock(&devperms_hash_guard);
-+}
-+
-+static struct seq_operations devperms_seq_op = {
-+ .start = devperms_seq_start,
-+ .next = devperms_seq_next,
-+ .stop = devperms_seq_stop,
-+ .show = devperms_seq_show,
-+};
-+
-+static int devperms_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &devperms_seq_op);
-+}
-+
-+static struct file_operations proc_devperms_ops = {
-+ .open = devperms_open,
-+ .read = seq_read,
-+ .llseek = seq_lseek,
-+ .release = seq_release,
-+};
-+
-+#if BITS_PER_LONG == 32
-+#define VESTAT_LINE_WIDTH (6 * 11 + 6 * 21)
-+#define VESTAT_LINE_FMT "%10u %10lu %10lu %10lu %10lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %10lu\n"
-+#define VESTAT_HEAD_FMT "%10s %10s %10s %10s %10s %20s %20s %20s %20s %20s %20s %10s\n"
-+#else
-+#define VESTAT_LINE_WIDTH (12 * 21)
-+#define VESTAT_LINE_FMT "%20u %20lu %20lu %20lu %20lu %20Lu %20Lu %20Lu %20Lu %20Lu %20Lu %20lu\n"
-+#define VESTAT_HEAD_FMT "%20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s %20s\n"
-+#endif
-+
-+static int vestat_seq_show(struct seq_file *m, void *v)
-+{
-+ struct ve_struct *ve = (struct ve_struct *)v;
-+ struct ve_struct *curve;
-+ int cpu;
-+ unsigned long user_ve, nice_ve, system_ve, uptime;
-+ cycles_t uptime_cycles, idle_time, strv_time, used;
-+
-+ curve = get_exec_env();
-+ if (ve == ve_list_head ||
-+ (!ve_is_super(curve) && ve == curve)) {
-+ /* print header */
-+ seq_printf(m, "%-*s\n",
-+ VESTAT_LINE_WIDTH - 1,
-+ "Version: 2.2");
-+ seq_printf(m, VESTAT_HEAD_FMT, "VEID",
-+ "user", "nice", "system",
-+ "uptime", "idle",
-+ "strv", "uptime", "used",
-+ "maxlat", "totlat", "numsched");
-+ }
-+
-+ if (ve == get_ve0())
-+ return 0;
-+
-+ user_ve = nice_ve = system_ve = 0;
-+ idle_time = strv_time = used = 0;
-+
-+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+ struct ve_cpu_stats *st;
-+
-+ st = VE_CPU_STATS(ve, cpu);
-+ user_ve += st->user;
-+ nice_ve += st->nice;
-+ system_ve += st->system;
-+ used += VE_CPU_STATS(ve, cpu)->used_time;
-+ idle_time += ve_sched_get_idle_time(ve, cpu);
-+ }
-+ uptime_cycles = get_cycles() - ve->start_cycles;
-+ uptime = jiffies - ve->start_jiffies;
-+
-+ seq_printf(m, VESTAT_LINE_FMT, ve->veid,
-+ user_ve, nice_ve, system_ve,
-+ uptime, idle_time,
-+ strv_time, uptime_cycles, used,
-+ ve->sched_lat_ve.last.maxlat,
-+ ve->sched_lat_ve.last.totlat,
-+ ve->sched_lat_ve.last.count);
-+ return 0;
-+}
-+
-+static void *ve_seq_start(struct seq_file *m, loff_t *pos)
-+{
-+ struct ve_struct *ve, *curve;
-+ loff_t l;
-+
-+ curve = get_exec_env();
-+ read_lock(&ve_list_guard);
-+ if (!ve_is_super(curve)) {
-+ if (*pos != 0)
-+ return NULL;
-+ return curve;
-+ }
-+ for (ve = ve_list_head, l = *pos;
-+ ve != NULL && l > 0;
-+ ve = ve->next, l--);
-+ return ve;
-+}
-+
-+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos)
-+{
-+ struct ve_struct *ve = (struct ve_struct *)v;
-+
-+ if (!ve_is_super(get_exec_env()))
-+ return NULL;
-+ (*pos)++;
-+ return ve->next;
-+}
-+
-+static void ve_seq_stop(struct seq_file *m, void *v)
-+{
-+ read_unlock(&ve_list_guard);
-+}
-+
-+static struct seq_operations vestat_seq_op = {
-+ start: ve_seq_start,
-+ next: ve_seq_next,
-+ stop: ve_seq_stop,
-+ show: vestat_seq_show
-+};
-+
-+static int vestat_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &vestat_seq_op);
-+}
-+
-+static struct file_operations proc_vestat_operations = {
-+ open: vestat_open,
-+ read: seq_read,
-+ llseek: seq_lseek,
-+ release: seq_release
-+};
-+
-+static int __init init_vecalls_proc(void)
-+{
-+ struct proc_dir_entry *de;
-+
-+ de = create_proc_glob_entry("vz/vestat",
-+ S_IFREG|S_IRUSR, NULL);
-+ if (de == NULL) {
-+ /* create "vz" subdirectory, if not exist */
-+ (void) create_proc_glob_entry("vz",
-+ S_IFDIR|S_IRUGO|S_IXUGO, NULL);
-+ de = create_proc_glob_entry("vz/vestat",
-+ S_IFREG|S_IRUSR, NULL);
-+ }
-+ if (de)
-+ de->proc_fops = &proc_vestat_operations;
-+ else
-+ printk(KERN_WARNING
-+ "VZMON: can't make vestat proc entry\n");
-+
-+ de = create_proc_entry("vz/devperms", S_IFREG | S_IRUSR, NULL);
-+ if (de)
-+ de->proc_fops = &proc_devperms_ops;
-+ else
-+ printk(KERN_WARNING
-+ "VZMON: can't make devperms proc entry\n");
-+ return 0;
-+}
-+
-+static void fini_vecalls_proc(void)
-+{
-+ remove_proc_entry("vz/devperms", NULL);
-+ remove_proc_entry("vz/vestat", NULL);
-+}
-+#else
-+#define init_vecalls_proc() (0)
-+#define fini_vecalls_proc() do { } while (0)
-+#endif /* CONFIG_PROC_FS */
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * User ctl
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+int vzcalls_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
-+static struct vzioctlinfo vzcalls = {
-+ type: VZCTLTYPE,
-+ func: vzcalls_ioctl,
-+ owner: THIS_MODULE,
-+};
-+
-+int vzcalls_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ int err;
-+
-+ err = -ENOTTY;
-+ switch(cmd) {
-+ case VZCTL_MARK_ENV_TO_DOWN: {
-+ /* Compatibility issue */
-+ err = 0;
-+ }
-+ break;
-+ case VZCTL_SETDEVPERMS: {
-+ /* Device type was mistakenly declared as dev_t
-+ * in the old user-kernel interface.
-+ * That's wrong, dev_t is a kernel internal type.
-+ * I use `unsigned' not having anything better in mind.
-+ * 2001/08/11 SAW */
-+ struct vzctl_setdevperms s;
-+ err = -EFAULT;
-+ if (copy_from_user(&s, (void *)arg, sizeof(s)))
-+ break;
-+ err = real_setdevperms(s.veid, s.type,
-+ new_decode_dev(s.dev), s.mask);
-+ }
-+ break;
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ case VZCTL_VE_NETDEV: {
-+ struct vzctl_ve_netdev d;
-+ char *s;
-+ err = -EFAULT;
-+ if (copy_from_user(&d, (void *)arg, sizeof(d)))
-+ break;
-+ err = -ENOMEM;
-+ s = kmalloc(IFNAMSIZ+1, GFP_KERNEL);
-+ if (s == NULL)
-+ break;
-+ err = -EFAULT;
-+ if (strncpy_from_user(s, d.dev_name, IFNAMSIZ) > 0) {
-+ s[IFNAMSIZ] = 0;
-+ err = real_ve_dev_map(d.veid, d.op, s);
-+ }
-+ kfree(s);
-+ }
-+ break;
-+#endif
-+ case VZCTL_ENV_CREATE: {
-+ struct vzctl_env_create s;
-+ err = -EFAULT;
-+ if (copy_from_user(&s, (void *)arg, sizeof(s)))
-+ break;
-+ err = real_env_create(s.veid, s.flags, s.class_id,
-+ NULL, 0);
-+ }
-+ break;
-+ case VZCTL_ENV_CREATE_DATA: {
-+ struct vzctl_env_create_data s;
-+ env_create_param_t *data;
-+ err = -EFAULT;
-+ if (copy_from_user(&s, (void *)arg, sizeof(s)))
-+ break;
-+ err=-EINVAL;
-+ if (s.datalen < VZCTL_ENV_CREATE_DATA_MINLEN ||
-+ s.datalen > VZCTL_ENV_CREATE_DATA_MAXLEN ||
-+ s.data == 0)
-+ break;
-+ err = -ENOMEM;
-+ data = kmalloc(sizeof(*data), GFP_KERNEL);
-+ if (!data)
-+ break;
-+ memset(data, 0, sizeof(*data));
-+ err = -EFAULT;
-+ if (copy_from_user(data, (void *)s.data, s.datalen))
-+ goto free_data;
-+ err = real_env_create(s.veid, s.flags, s.class_id,
-+ data, s.datalen);
-+free_data:
-+ kfree(data);
-+ }
-+ break;
-+ case VZCTL_GET_CPU_STAT: {
-+ struct vzctl_cpustatctl s;
-+ err = -EFAULT;
-+ if (copy_from_user(&s, (void *)arg, sizeof(s)))
-+ break;
-+ err = ve_get_cpu_stat(s.veid, s.cpustat);
-+ }
-+ break;
-+ }
-+ return err;
-+}
-+EXPORT_SYMBOL(real_env_create);
-+
-+
-+/**********************************************************************
-+ **********************************************************************
-+ *
-+ * Init/exit stuff
-+ *
-+ **********************************************************************
-+ **********************************************************************/
-+
-+#ifdef CONFIG_VE_CALLS_MODULE
-+static int __init init_vecalls_symbols(void)
-+{
-+ KSYMRESOLVE(real_get_device_perms_ve);
-+ KSYMRESOLVE(real_do_env_cleanup);
-+ KSYMRESOLVE(real_do_env_free);
-+ KSYMRESOLVE(real_update_load_avg_ve);
-+ KSYMMODRESOLVE(vzmon);
-+ return 0;
-+}
-+
-+static void fini_vecalls_symbols(void)
-+{
-+ KSYMMODUNRESOLVE(vzmon);
-+ KSYMUNRESOLVE(real_get_device_perms_ve);
-+ KSYMUNRESOLVE(real_do_env_cleanup);
-+ KSYMUNRESOLVE(real_do_env_free);
-+ KSYMUNRESOLVE(real_update_load_avg_ve);
-+}
-+#else
-+#define init_vecalls_symbols() (0)
-+#define fini_vecalls_symbols() do { } while (0)
-+#endif
-+
-+static inline __init int init_vecalls_ioctls(void)
-+{
-+ vzioctl_register(&vzcalls);
-+ return 0;
-+}
-+
-+static inline void fini_vecalls_ioctls(void)
-+{
-+ vzioctl_unregister(&vzcalls);
-+}
-+
-+static int __init vecalls_init(void)
-+{
-+ int err;
-+ int i;
-+
-+ ve_list_head = get_ve0();
-+
-+ err = init_vzmond();
-+ if (err < 0)
-+ goto out_vzmond;
-+
-+ err = init_devperms_hash();
-+ if (err < 0)
-+ goto out_perms;
-+
-+ err = init_vecalls_symbols();
-+ if (err < 0)
-+ goto out_sym;
-+
-+ err = init_vecalls_proc();
-+ if (err < 0)
-+ goto out_proc;
-+
-+ err = init_vecalls_ioctls();
-+ if (err < 0)
-+ goto out_ioctls;
-+
-+ for (i = 0; i < VE_MAX_HOOKS; i++)
-+ INIT_LIST_HEAD(&ve_hooks[i]);
-+
-+ return 0;
-+
-+out_ioctls:
-+ fini_vecalls_proc();
-+out_proc:
-+ fini_vecalls_symbols();
-+out_sym:
-+ fini_devperms_hash();
-+out_perms:
-+ fini_vzmond();
-+out_vzmond:
-+ return err;
-+}
-+
-+static void vecalls_exit(void)
-+{
-+ fini_vecalls_ioctls();
-+ fini_vecalls_proc();
-+ fini_vecalls_symbols();
-+ fini_devperms_hash();
-+ fini_vzmond();
-+}
-+
-+EXPORT_SYMBOL(get_ve_by_id);
-+EXPORT_SYMBOL(__find_ve_by_id);
-+EXPORT_SYMBOL(ve_list_guard);
-+EXPORT_SYMBOL(ve_list_head);
-+EXPORT_SYMBOL(nr_ve);
-+
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Virtuozzo Control");
-+MODULE_LICENSE("GPL v2");
-+
-+module_init(vecalls_init)
-+module_exit(vecalls_exit)
-diff -upr linux-2.6.16.orig/kernel/veowner.c linux-2.6.16-026test009/kernel/veowner.c
---- linux-2.6.16.orig/kernel/veowner.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/veowner.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,304 @@
-+/*
-+ * kernel/veowner.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/ve.h>
-+#include <linux/ve_owner.h>
-+#include <linux/ve_proto.h>
-+#include <linux/ipc.h>
-+#include <linux/fs.h>
-+#include <linux/proc_fs.h>
-+#include <linux/file.h>
-+#include <linux/mm.h>
-+#include <linux/delay.h>
-+#include <linux/vmalloc.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/list.h>
-+#include <linux/inetdevice.h>
-+#include <asm/system.h>
-+#include <asm/io.h>
-+
-+#include <net/tcp.h>
-+
-+void prepare_ve0_process(struct task_struct *tsk)
-+{
-+ set_virt_pid(tsk, tsk->pid);
-+ set_virt_tgid(tsk, tsk->tgid);
-+ if (tsk->signal) {
-+ set_virt_pgid(tsk, tsk->signal->pgrp);
-+ set_virt_sid(tsk, tsk->signal->session);
-+ }
-+ VE_TASK_INFO(tsk)->exec_env = get_ve0();
-+ VE_TASK_INFO(tsk)->owner_env = get_ve0();
-+ VE_TASK_INFO(tsk)->sleep_time = 0;
-+ VE_TASK_INFO(tsk)->wakeup_stamp = 0;
-+ VE_TASK_INFO(tsk)->sched_time = 0;
-+ seqcount_init(&VE_TASK_INFO(tsk)->wakeup_lock);
-+
-+ if (tsk->pid) {
-+ SET_VE_LINKS(tsk);
-+ atomic_inc(&get_ve0()->pcounter);
-+ }
-+}
-+
-+void prepare_ve0_loopback(void)
-+{
-+ get_ve0()->_loopback_dev = &loopback_dev;
-+}
-+
-+/*
-+ * ------------------------------------------------------------------------
-+ * proc entries
-+ * ------------------------------------------------------------------------
-+ */
-+
-+#ifdef CONFIG_PROC_FS
-+static void proc_move(struct proc_dir_entry *ddir,
-+ struct proc_dir_entry *sdir,
-+ const char *name)
-+{
-+ struct proc_dir_entry **p, *q;
-+ int len;
-+
-+ len = strlen(name);
-+ for (p = &sdir->subdir, q = *p; q != NULL; p = &q->next, q = *p)
-+ if (proc_match(len, name, q))
-+ break;
-+ if (q == NULL)
-+ return;
-+ *p = q->next;
-+ q->parent = ddir;
-+ q->next = ddir->subdir;
-+ ddir->subdir = q;
-+}
-+static void prepare_proc_misc(void)
-+{
-+ static char *table[] = {
-+ "loadavg",
-+ "uptime",
-+ "meminfo",
-+ "version",
-+ "stat",
-+ "filesystems",
-+ "locks",
-+ "swaps",
-+ "mounts",
-+ "net",
-+ "cpuinfo",
-+ "sysvipc",
-+ "sys",
-+ "fs",
-+ "vz",
-+ "user_beancounters",
-+ "cmdline",
-+ "vmstat",
-+ "modules",
-+ "kmsg",
-+ NULL,
-+ };
-+ char **p;
-+
-+ for (p = table; *p != NULL; p++)
-+ proc_move(&proc_root, ve0.proc_root, *p);
-+}
-+int prepare_proc(void)
-+{
-+ struct ve_struct *envid;
-+ struct proc_dir_entry *de;
-+ struct proc_dir_entry *ve_root;
-+
-+ envid = set_exec_env(&ve0);
-+ ve_root = ve0.proc_root->subdir;
-+ /* move the whole tree to be visible in VE0 only */
-+ ve0.proc_root->subdir = proc_root.subdir;
-+ for (de = ve0.proc_root->subdir; de->next != NULL; de = de->next)
-+ de->parent = ve0.proc_root;
-+ de->parent = ve0.proc_root;
-+ de->next = ve_root;
-+
-+ /* move back into the global scope some specific entries */
-+ proc_root.subdir = NULL;
-+ prepare_proc_misc();
-+ proc_net = proc_mkdir("net", ve0.proc_root);
-+ proc_net_stat = proc_mkdir("stat", proc_net);
-+ proc_mkdir("vz", 0);
-+#ifdef CONFIG_SYSVIPC
-+ proc_mkdir("sysvipc", 0);
-+#endif
-+ proc_root_fs = proc_mkdir("fs", 0);
-+ /* XXX proc_tty_init(); */
-+
-+ /* XXX process inodes */
-+
-+ (void)set_exec_env(envid);
-+
-+ (void)create_proc_glob_entry("vz", S_IFDIR|S_IRUGO|S_IXUGO, NULL);
-+ return 0;
-+}
-+
-+static struct proc_dir_entry ve0_proc_root = {
-+ .name = "/proc",
-+ .namelen = 5,
-+ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
-+ .nlink = 2
-+};
-+
-+void prepare_ve0_proc_root(void)
-+{
-+ ve0.proc_root = &ve0_proc_root;
-+}
-+#endif
-+
-+/*
-+ * ------------------------------------------------------------------------
-+ * Virtualized sysctl
-+ * ------------------------------------------------------------------------
-+ */
-+
-+static int semmin[4] = { 1, 1, 1, 1 };
-+static int semmax[4] = { 8000, INT_MAX, 1000, IPCMNI };
-+static ctl_table kern_table[] = {
-+ {KERN_NODENAME, "hostname", system_utsname.nodename, 64,
-+ 0644, NULL, &proc_doutsstring, &sysctl_string},
-+ {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64,
-+ 0644, NULL, &proc_doutsstring, &sysctl_string},
-+#ifdef CONFIG_SYSVIPC
-+#define get_ve0_field(fname) &ve0._##fname
-+ {KERN_SHMMAX, "shmmax", get_ve0_field(shm_ctlmax), sizeof (size_t),
-+ 0644, NULL, &proc_doulongvec_minmax },
-+ {KERN_SHMALL, "shmall", get_ve0_field(shm_ctlall), sizeof (size_t),
-+ 0644, NULL, &proc_doulongvec_minmax },
-+ {KERN_SHMMNI, "shmmni", get_ve0_field(shm_ctlmni), sizeof (int),
-+ 0644, NULL, &proc_dointvec_minmax, NULL,
-+ NULL, &semmin[0], &semmax[3] },
-+ {KERN_MSGMAX, "msgmax", get_ve0_field(msg_ctlmax), sizeof (int),
-+ 0644, NULL, &proc_dointvec },
-+ {KERN_MSGMNI, "msgmni", get_ve0_field(msg_ctlmni), sizeof (int),
-+ 0644, NULL, &proc_dointvec_minmax, NULL,
-+ NULL, &semmin[0], &semmax[3] },
-+ {KERN_MSGMNB, "msgmnb", get_ve0_field(msg_ctlmnb), sizeof (int),
-+ 0644, NULL, &proc_dointvec },
-+ {KERN_SEM, "sem", get_ve0_field(sem_ctls), 4*sizeof (int),
-+ 0644, NULL, &proc_dointvec },
-+#endif
-+ {0}
-+};
-+static ctl_table root_table[] = {
-+ {CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
-+ {0}
-+};
-+extern int ip_rt_src_check;
-+extern int ve_area_access_check;
-+static ctl_table vz_ipv4_route_table[] = {
-+ {
-+ ctl_name: NET_IPV4_ROUTE_SRC_CHECK,
-+ procname: "src_check",
-+ data: &ip_rt_src_check,
-+ maxlen: sizeof(int),
-+ mode: 0644,
-+ proc_handler: &proc_dointvec,
-+ },
-+ { 0 }
-+};
-+static ctl_table vz_ipv4_table[] = {
-+ {NET_IPV4_ROUTE, "route", NULL, 0, 0555, vz_ipv4_route_table},
-+ { 0 }
-+};
-+static ctl_table vz_net_table[] = {
-+ {NET_IPV4, "ipv4", NULL, 0, 0555, vz_ipv4_table},
-+ { 0 }
-+};
-+static ctl_table vz_fs_table[] = {
-+ {
-+ ctl_name: 226,
-+ procname: "ve-area-access-check",
-+ data: &ve_area_access_check,
-+ maxlen: sizeof(int),
-+ mode: 0644,
-+ proc_handler: &proc_dointvec,
-+ },
-+ { 0 }
-+};
-+static ctl_table root_table2[] = {
-+ {CTL_NET, "net", NULL, 0, 0555, vz_net_table},
-+ {CTL_FS, "fs", NULL, 0, 0555, vz_fs_table},
-+ { 0 }
-+};
-+int prepare_sysctl(void)
-+{
-+ struct ve_struct *envid;
-+
-+ envid = set_exec_env(&ve0);
-+ ve0.kern_header = register_sysctl_table(root_table, 1);
-+ register_sysctl_table(root_table2, 0);
-+ (void)set_exec_env(envid);
-+ return 0;
-+}
-+
-+void prepare_ve0_sysctl(void)
-+{
-+ INIT_LIST_HEAD(&ve0.sysctl_lh);
-+#ifdef CONFIG_SYSCTL
-+ ve0.proc_sys_root = proc_mkdir("sys", 0);
-+#endif
-+}
-+
-+/*
-+ * ------------------------------------------------------------------------
-+ * XXX init_ve_system
-+ * ------------------------------------------------------------------------
-+ */
-+
-+void init_ve_system(void)
-+{
-+ struct task_struct *init_entry, *p, *tsk;
-+ struct ve_struct *ptr;
-+ unsigned long flags;
-+ int i;
-+
-+ ptr = get_ve0();
-+ (void)get_ve(ptr);
-+ atomic_set(&ptr->pcounter, 1);
-+
-+ /* Don't forget about idle tasks */
-+ write_lock_irqsave(&tasklist_lock, flags);
-+ for (i = 0; i < NR_CPUS; i++) {
-+ tsk = idle_task(i);
-+ if (tsk == NULL)
-+ continue;
-+
-+ prepare_ve0_process(tsk);
-+ }
-+ do_each_thread_all(p, tsk) {
-+ prepare_ve0_process(tsk);
-+ } while_each_thread_all(p, tsk);
-+ write_unlock_irqrestore(&tasklist_lock, flags);
-+
-+ init_entry = child_reaper;
-+ ptr->init_entry = init_entry;
-+ /* XXX: why? */
-+ cap_set_full(ptr->cap_default);
-+
-+ ptr->_ipv4_devconf = &ipv4_devconf;
-+ ptr->_ipv4_devconf_dflt = &ipv4_devconf_dflt;
-+
-+ read_lock(&init_entry->fs->lock);
-+ ptr->fs_rootmnt = init_entry->fs->rootmnt;
-+ ptr->fs_root = init_entry->fs->root;
-+ read_unlock(&init_entry->fs->lock);
-+
-+ /* common prepares */
-+#ifdef CONFIG_PROC_FS
-+ prepare_proc();
-+#endif
-+ prepare_sysctl();
-+ prepare_ipc();
-+}
-diff -upr linux-2.6.16.orig/kernel/vzdev.c linux-2.6.16-026test009/kernel/vzdev.c
---- linux-2.6.16.orig/kernel/vzdev.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/vzdev.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,126 @@
-+/*
-+ * kernel/vzdev.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/list.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/vzctl.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/vzcalluser.h>
-+#include <asm/uaccess.h>
-+#include <asm/pgalloc.h>
-+#include <linux/device.h>
-+
-+#define VZCTL_MAJOR 126
-+#define VZCTL_NAME "vzctl"
-+
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Virtuozzo Interface");
-+MODULE_LICENSE("GPL v2");
-+
-+static LIST_HEAD(ioctls);
-+static spinlock_t ioctl_lock = SPIN_LOCK_UNLOCKED;
-+
-+int vzctl_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ int err;
-+ struct list_head *p;
-+ struct vzioctlinfo *inf;
-+
-+ err = -ENOTTY;
-+ spin_lock(&ioctl_lock);
-+ list_for_each(p, &ioctls) {
-+ inf = list_entry(p, struct vzioctlinfo, list);
-+ if (inf->type != _IOC_TYPE(cmd))
-+ continue;
-+
-+ err = try_module_get(inf->owner) ? 0 : -EBUSY;
-+ spin_unlock(&ioctl_lock);
-+ if (!err) {
-+ err = (*inf->func)(ino, file, cmd, arg);
-+ module_put(inf->owner);
-+ }
-+ return err;
-+ }
-+ spin_unlock(&ioctl_lock);
-+ return err;
-+}
-+
-+void vzioctl_register(struct vzioctlinfo *inf)
-+{
-+ spin_lock(&ioctl_lock);
-+ list_add(&inf->list, &ioctls);
-+ spin_unlock(&ioctl_lock);
-+}
-+
-+void vzioctl_unregister(struct vzioctlinfo *inf)
-+{
-+ spin_lock(&ioctl_lock);
-+ list_del_init(&inf->list);
-+ spin_unlock(&ioctl_lock);
-+}
-+
-+EXPORT_SYMBOL(vzioctl_register);
-+EXPORT_SYMBOL(vzioctl_unregister);
-+
-+/*
-+ * Init/exit stuff.
-+ */
-+static struct file_operations vzctl_fops = {
-+ .owner = THIS_MODULE,
-+ .ioctl = vzctl_ioctl,
-+};
-+
-+static struct class *vzctl_class;
-+
-+static void __exit vzctl_exit(void)
-+{
-+ class_device_destroy(vzctl_class, MKDEV(VZCTL_MAJOR, 0));
-+ class_destroy(vzctl_class);
-+ unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
-+}
-+
-+static int __init vzctl_init(void)
-+{
-+ int ret;
-+ struct class_device *class_err;
-+
-+ ret = register_chrdev(VZCTL_MAJOR, VZCTL_NAME, &vzctl_fops);
-+ if (ret < 0)
-+ goto out;
-+
-+ vzctl_class = class_create(THIS_MODULE, "vzctl");
-+ if (IS_ERR(vzctl_class)) {
-+ ret = PTR_ERR(vzctl_class);
-+ goto out_cleandev;
-+ }
-+
-+ class_err = class_device_create(vzctl_class, NULL, MKDEV(VZCTL_MAJOR, 0),
-+ NULL, VZCTL_NAME);
-+ if (IS_ERR(class_err)) {
-+ ret = PTR_ERR(class_err);
-+ goto out_rmclass;
-+ }
-+
-+ goto out;
-+
-+out_rmclass:
-+ class_destroy(vzctl_class);
-+out_cleandev:
-+ unregister_chrdev(VZCTL_MAJOR, VZCTL_NAME);
-+out:
-+ return ret;
-+}
-+
-+module_init(vzctl_init)
-+module_exit(vzctl_exit);
-diff -upr linux-2.6.16.orig/kernel/vzwdog.c linux-2.6.16-026test009/kernel/vzwdog.c
---- linux-2.6.16.orig/kernel/vzwdog.c 2006-04-19 15:02:49.000000000 +0400
-+++ linux-2.6.16-026test009/kernel/vzwdog.c 2006-04-19 15:02:12.000000000 +0400
-@@ -0,0 +1,278 @@
-+/*
-+ * kernel/vzwdog.c
-+ *
-+ * Copyright (C) 2000-2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/list.h>
-+#include <linux/ctype.h>
-+#include <linux/kobject.h>
-+#include <linux/genhd.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/smp_lock.h>
-+#include <linux/errno.h>
-+#include <linux/suspend.h>
-+#include <linux/ve.h>
-+#include <linux/vzstat.h>
-+
-+/* Staff regading kernel thread polling VE validity */
-+static int sleep_timeout = 60;
-+static pid_t wdog_thread_pid;
-+static int wdog_thread_continue = 1;
-+static DECLARE_COMPLETION(license_thread_exited);
-+
-+extern void show_mem(void);
-+extern struct ve_struct *ve_list_head;
-+
-+#if 0
-+static char page[PAGE_SIZE];
-+
-+static void parse_irq_list(int len)
-+{
-+ int i, k, skip;
-+ for (i = 0; i < len; ) {
-+ k = i;
-+ while (i < len && page[i] != '\n' && page[i] != ':')
-+ i++;
-+ skip = 0;
-+ if (i < len && page[i] != '\n') {
-+ i++; /* skip ':' */
-+ while (i < len && (page[i] == ' ' || page[i] == '0'))
-+ i++;
-+ skip = (i < len && (page[i] < '0' || page[i] > '9'));
-+ while (i < len && page[i] != '\n')
-+ i++;
-+ }
-+ if (!skip)
-+ printk("\n%.*s", i - k, page + k);
-+ if (i < len)
-+ i++; /* skip '\n' */
-+ }
-+}
-+#endif
-+
-+static void show_irq_list(void)
-+{
-+#if 0
-+ i = KSYMSAFECALL(int, get_irq_list, (page));
-+ parse_irq_list(i); /* Safe, zero was returned if unassigned */
-+#endif
-+}
-+
-+static void show_alloc_latency(void)
-+{
-+ static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
-+ "A0",
-+ "L0",
-+ "H0",
-+ "L1",
-+ "H1"
-+ };
-+ int i;
-+
-+ printk("lat: ");
-+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
-+ struct kstat_lat_struct *p;
-+ cycles_t maxlat, avg0, avg1, avg2;
-+
-+ p = &kstat_glob.alloc_lat[i];
-+ spin_lock_irq(&kstat_glb_lock);
-+ maxlat = p->last.maxlat;
-+ avg0 = p->avg[0];
-+ avg1 = p->avg[1];
-+ avg2 = p->avg[2];
-+ spin_unlock_irq(&kstat_glb_lock);
-+
-+ printk("%s %Lu (%Lu %Lu %Lu)",
-+ alloc_descr[i],
-+ maxlat,
-+ avg0,
-+ avg1,
-+ avg2);
-+ }
-+ printk("\n");
-+}
-+
-+static void show_schedule_latency(void)
-+{
-+ struct kstat_lat_pcpu_struct *p;
-+ cycles_t maxlat, totlat, avg0, avg1, avg2;
-+ unsigned long count;
-+
-+ p = &kstat_glob.sched_lat;
-+ spin_lock_irq(&kstat_glb_lock);
-+ maxlat = p->last.maxlat;
-+ totlat = p->last.totlat;
-+ count = p->last.count;
-+ avg0 = p->avg[0];
-+ avg1 = p->avg[1];
-+ avg2 = p->avg[2];
-+ spin_unlock_irq(&kstat_glb_lock);
-+
-+ printk("sched lat: %Lu/%Lu/%lu (%Lu %Lu %Lu)\n",
-+ maxlat,
-+ totlat,
-+ count,
-+ avg0,
-+ avg1,
-+ avg2);
-+}
-+
-+static void show_header(void)
-+{
-+ struct timeval tv;
-+
-+ do_gettimeofday(&tv);
-+ printk("*** VZWDOG 1.14: time %lu.%06lu uptime %Lu CPU %d ***\n",
-+ tv.tv_sec, tv.tv_usec,
-+ get_jiffies_64(), smp_processor_id());
-+#ifdef CONFIG_FAIRSCHED
-+ printk("*** cycles_per_jiffy %lu jiffies_per_second %u ***\n",
-+ cycles_per_jiffy, HZ);
-+#else
-+ printk("*** jiffies_per_second %u ***\n", HZ);
-+#endif
-+}
-+
-+static void show_pgdatinfo(void)
-+{
-+ pg_data_t *pgdat;
-+
-+ printk("pgdat:");
-+ for_each_pgdat(pgdat) {
-+ printk(" %d: %lu,%lu,%lu,%p",
-+ pgdat->node_id,
-+ pgdat->node_start_pfn,
-+ pgdat->node_present_pages,
-+ pgdat->node_spanned_pages,
-+ pgdat->node_mem_map);
-+ }
-+ printk("\n");
-+}
-+
-+static void show_diskio(void)
-+{
-+ struct gendisk *gd;
-+ char buf[BDEVNAME_SIZE];
-+
-+ printk("disk_io: ");
-+
-+ down_read(&block_subsys.rwsem);
-+ list_for_each_entry(gd, &block_subsys.kset.list, kobj.entry) {
-+ char *name;
-+ name = disk_name(gd, 0, buf);
-+ if ((strlen(name) > 4) && (strncmp(name, "loop", 4) == 0) &&
-+ isdigit(name[4]))
-+ continue;
-+ if ((strlen(name) > 3) && (strncmp(name, "ram", 3) == 0) &&
-+ isdigit(name[3]))
-+ continue;
-+ printk("(%u,%u) %s r(%u %u %u) w(%u %u %u)\n",
-+ gd->major, gd->first_minor,
-+ name,
-+ disk_stat_read(gd, ios[READ]),
-+ disk_stat_read(gd, sectors[READ]),
-+ disk_stat_read(gd, merges[READ]),
-+ disk_stat_read(gd, ios[WRITE]),
-+ disk_stat_read(gd, sectors[WRITE]),
-+ disk_stat_read(gd, merges[WRITE]));
-+ }
-+ up_read(&block_subsys.rwsem);
-+
-+ printk("\n");
-+}
-+
-+static void show_nrprocs(void)
-+{
-+ unsigned long _nr_running, _nr_sleeping,
-+ _nr_unint, _nr_zombie, _nr_dead, _nr_stopped;
-+
-+ _nr_running = nr_running();
-+ _nr_unint = nr_uninterruptible();
-+ _nr_sleeping = nr_sleeping();
-+ _nr_zombie = nr_zombie;
-+ _nr_dead = atomic_read(&nr_dead);
-+ _nr_stopped = nr_stopped();
-+
-+ printk("VEnum: %d, proc R %lu, S %lu, D %lu, "
-+ "Z %lu, X %lu, T %lu (tot %d)\n",
-+ nr_ve, _nr_running, _nr_sleeping, _nr_unint,
-+ _nr_zombie, _nr_dead, _nr_stopped, nr_threads);
-+}
-+
-+static void wdog_print(void)
-+{
-+ show_header();
-+ show_irq_list();
-+ show_pgdatinfo();
-+ show_mem();
-+ show_diskio();
-+ show_schedule_latency();
-+ show_alloc_latency();
-+ show_nrprocs();
-+}
-+
-+static int wdog_loop(void* data)
-+{
-+ struct task_struct *tsk = current;
-+ DECLARE_WAIT_QUEUE_HEAD(thread_wait_queue);
-+
-+ /*
-+ * This thread doesn't need any user-level access,
-+ * so get rid of all our resources
-+ */
-+ daemonize("wdogd");
-+
-+ spin_lock_irq(&tsk->sighand->siglock);
-+ sigfillset(&tsk->blocked);
-+ sigdelset(&tsk->blocked, SIGHUP);
-+ recalc_sigpending();
-+ spin_unlock_irq(&tsk->sighand->siglock);
-+
-+ while (wdog_thread_continue) {
-+ wdog_print();
-+ interruptible_sleep_on_timeout(&thread_wait_queue,
-+ sleep_timeout*HZ);
-+ try_to_freeze();
-+ /* clear all signals */
-+ if (signal_pending(tsk))
-+ flush_signals(tsk);
-+ }
-+
-+ complete_and_exit(&license_thread_exited, 0);
-+}
-+
-+static int __init wdog_init(void)
-+{
-+ wdog_thread_pid = kernel_thread(wdog_loop, NULL, 0);
-+ if (wdog_thread_pid < 0)
-+ return wdog_thread_pid;
-+
-+ return 0;
-+}
-+
-+static void __exit wdog_exit(void)
-+{
-+ wdog_thread_continue = 0;
-+ if (wdog_thread_pid > 0) {
-+ kill_proc(wdog_thread_pid, SIGHUP, 1);
-+ wait_for_completion(&license_thread_exited);
-+ }
-+}
-+
-+module_param(sleep_timeout, int, 0);
-+MODULE_AUTHOR("SWsoft <info@sw-soft.com>");
-+MODULE_DESCRIPTION("Virtuozzo WDOG");
-+MODULE_LICENSE("GPL v2");
-+
-+module_init(wdog_init)
-+module_exit(wdog_exit)
-diff -upr linux-2.6.16.orig/lib/bust_spinlocks.c linux-2.6.16-026test009/lib/bust_spinlocks.c
---- linux-2.6.16.orig/lib/bust_spinlocks.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/lib/bust_spinlocks.c 2006-04-19 15:02:11.000000000 +0400
-@@ -20,19 +20,11 @@ void bust_spinlocks(int yes)
- if (yes) {
- oops_in_progress = 1;
- } else {
-- int loglevel_save = console_loglevel;
- #ifdef CONFIG_VT
- unblank_screen();
- #endif
- oops_in_progress = 0;
-- /*
-- * OK, the message is on the console. Now we call printk()
-- * without oops_in_progress set so that printk() will give klogd
-- * and the blanked console a poke. Hold onto your hats...
-- */
-- console_loglevel = 15; /* NMI oopser may have shut the console up */
-- printk(" ");
-- console_loglevel = loglevel_save;
-+ wake_up_klogd();
- }
- }
-
-diff -upr linux-2.6.16.orig/mm/filemap_xip.c linux-2.6.16-026test009/mm/filemap_xip.c
---- linux-2.6.16.orig/mm/filemap_xip.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/filemap_xip.c 2006-04-19 15:02:12.000000000 +0400
-@@ -190,7 +190,10 @@ __xip_unmap (struct address_space * mapp
- flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
- page_remove_rmap(page);
-+ pb_remove_ref(page, mm);
-+ ub_unused_privvm_inc(mm, vma);
- dec_mm_counter(mm, file_rss);
-+ dec_vma_rss(vma);
- BUG_ON(pte_dirty(pteval));
- pte_unmap_unlock(pte, ptl);
- page_cache_release(page);
-diff -upr linux-2.6.16.orig/mm/fremap.c linux-2.6.16-026test009/mm/fremap.c
---- linux-2.6.16.orig/mm/fremap.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/fremap.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,6 +20,8 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_vmpages.h>
-+
- static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
- {
-@@ -34,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
- if (pte_dirty(pte))
- set_page_dirty(page);
- page_remove_rmap(page);
-+ pb_remove_ref(page, mm);
- page_cache_release(page);
- }
- } else {
-@@ -57,6 +60,10 @@ int install_page(struct mm_struct *mm, s
- pte_t *pte;
- pte_t pte_val;
- spinlock_t *ptl;
-+ struct page_beancounter *pbc;
-+
-+ if (unlikely(pb_alloc(&pbc)))
-+ goto out_nopb;
-
- pte = get_locked_pte(mm, addr, &ptl);
- if (!pte)
-@@ -75,11 +82,15 @@ int install_page(struct mm_struct *mm, s
- if (page_mapcount(page) > INT_MAX/2)
- goto unlock;
-
-- if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
-+ if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) {
-+ ub_unused_privvm_dec(mm, vma);
- inc_mm_counter(mm, file_rss);
-+ inc_vma_rss(vma);
-+ }
-
- flush_icache_page(vma, page);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
-+ pb_add_ref(page, mm, &pbc);
- page_add_file_rmap(page);
- pte_val = *pte;
- update_mmu_cache(vma, addr, pte_val);
-@@ -87,6 +98,8 @@ int install_page(struct mm_struct *mm, s
- unlock:
- pte_unmap_unlock(pte, ptl);
- out:
-+ pb_free(&pbc);
-+out_nopb:
- return err;
- }
- EXPORT_SYMBOL(install_page);
-@@ -109,7 +122,9 @@ int install_file_pte(struct mm_struct *m
-
- if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
- update_hiwater_rss(mm);
-+ ub_unused_privvm_inc(mm, vma);
- dec_mm_counter(mm, file_rss);
-+ dec_vma_rss(vma);
- }
-
- set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
-@@ -220,4 +235,5 @@ asmlinkage long sys_remap_file_pages(uns
-
- return err;
- }
-+EXPORT_SYMBOL_GPL(sys_remap_file_pages);
-
-diff -upr linux-2.6.16.orig/mm/madvise.c linux-2.6.16-026test009/mm/madvise.c
---- linux-2.6.16.orig/mm/madvise.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/madvise.c 2006-04-19 15:02:11.000000000 +0400
-@@ -168,6 +168,9 @@ static long madvise_remove(struct vm_are
- return -EINVAL;
- }
-
-+ if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
-+ return -EACCES;
-+
- mapping = vma->vm_file->f_mapping;
-
- offset = (loff_t)(start - vma->vm_start)
-diff -upr linux-2.6.16.orig/mm/memory.c linux-2.6.16-026test009/mm/memory.c
---- linux-2.6.16.orig/mm/memory.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/memory.c 2006-04-19 15:02:12.000000000 +0400
-@@ -58,6 +58,8 @@
- #include <linux/swapops.h>
- #include <linux/elf.h>
-
-+#include <ub/ub_vmpages.h>
-+
- #ifndef CONFIG_NEED_MULTIPLE_NODES
- /* use the per-pgdat data instead for discontigmem - mbligh */
- unsigned long max_mapnr;
-@@ -81,6 +83,7 @@ unsigned long vmalloc_earlyreserve;
- EXPORT_SYMBOL(num_physpages);
- EXPORT_SYMBOL(high_memory);
- EXPORT_SYMBOL(vmalloc_earlyreserve);
-+EXPORT_SYMBOL_GPL(empty_zero_page);
-
- int randomize_va_space __read_mostly = 1;
-
-@@ -103,18 +106,21 @@ void pgd_clear_bad(pgd_t *pgd)
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
- }
-+EXPORT_SYMBOL_GPL(pgd_clear_bad);
-
- void pud_clear_bad(pud_t *pud)
- {
- pud_ERROR(*pud);
- pud_clear(pud);
- }
-+EXPORT_SYMBOL_GPL(pud_clear_bad);
-
- void pmd_clear_bad(pmd_t *pmd)
- {
- pmd_ERROR(*pmd);
- pmd_clear(pmd);
- }
-+EXPORT_SYMBOL_GPL(pmd_clear_bad);
-
- /*
- * Note: this doesn't free the actual pages themselves. That
-@@ -318,6 +324,7 @@ int __pte_alloc(struct mm_struct *mm, pm
- spin_unlock(&mm->page_table_lock);
- return 0;
- }
-+EXPORT_SYMBOL_GPL(__pte_alloc);
-
- int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
- {
-@@ -418,6 +425,7 @@ struct page *vm_normal_page(struct vm_ar
- */
- return pfn_to_page(pfn);
- }
-+EXPORT_SYMBOL_GPL(vm_normal_page);
-
- /*
- * copy one vm_area from one task to the other. Assumes the page tables
-@@ -428,7 +436,7 @@ struct page *vm_normal_page(struct vm_ar
- static inline void
- copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-- unsigned long addr, int *rss)
-+ unsigned long addr, int *rss, struct page_beancounter **pbc)
- {
- unsigned long vm_flags = vma->vm_flags;
- pte_t pte = *src_pte;
-@@ -471,6 +479,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
- if (page) {
- get_page(page);
- page_dup_rmap(page);
-+ pb_dup_ref(page, dst_mm, pbc);
- rss[!!PageAnon(page)]++;
- }
-
-@@ -478,20 +487,36 @@ out_set_pte:
- set_pte_at(dst_mm, addr, dst_pte, pte);
- }
-
-+#define pte_ptrs(a) (PTRS_PER_PTE - ((a >> PAGE_SHIFT)&(PTRS_PER_PTE - 1)))
-+#ifdef CONFIG_USER_RESOURCE
-+#define same_ub(mm1, mm2) ((mm1)->mm_ub == (mm2)->mm_ub)
-+#else
-+#define same_ub(mm1, mm2) (1)
-+#endif
-+
- static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-- pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
-+ pmd_t *dst_pmd, pmd_t *src_pmd,
-+ struct vm_area_struct *dst_vma,
-+ struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
- {
- pte_t *src_pte, *dst_pte;
- spinlock_t *src_ptl, *dst_ptl;
- int progress = 0;
-- int rss[2];
-+ int rss[2], rss_tot;
-+ struct page_beancounter *pbc;
-+ int err;
-
-+ err = -ENOMEM;
-+ pbc = same_ub(src_mm, dst_mm) ? PBC_COPY_SAME : NULL;
- again:
-+ if (pbc != PBC_COPY_SAME && pb_alloc_list(&pbc, pte_ptrs(addr)))
-+ goto out;
- rss[1] = rss[0] = 0;
- dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
- if (!dst_pte)
-- return -ENOMEM;
-+ goto out;
-+
- src_pte = pte_offset_map_nested(src_pmd, addr);
- src_ptl = pte_lockptr(src_mm, src_pmd);
- spin_lock(src_ptl);
-@@ -512,22 +537,32 @@ again:
- progress++;
- continue;
- }
-- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
-+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
-+ vma, addr, rss, &pbc);
- progress += 8;
- } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
-
- spin_unlock(src_ptl);
- pte_unmap_nested(src_pte - 1);
-+ rss_tot = rss[0] + rss[1];
-+ add_vma_rss(dst_vma, rss_tot);
-+ ub_unused_privvm_sub(dst_mm, dst_vma, rss_tot);
- add_mm_rss(dst_mm, rss[0], rss[1]);
- pte_unmap_unlock(dst_pte - 1, dst_ptl);
- cond_resched();
- if (addr != end)
- goto again;
-- return 0;
-+
-+ err = 0;
-+out:
-+ pb_free_list(&pbc);
-+ return err;
- }
-
- static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-- pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
-+ pud_t *dst_pud, pud_t *src_pud,
-+ struct vm_area_struct *dst_vma,
-+ struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
- {
- pmd_t *src_pmd, *dst_pmd;
-@@ -542,14 +577,16 @@ static inline int copy_pmd_range(struct
- if (pmd_none_or_clear_bad(src_pmd))
- continue;
- if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-- vma, addr, next))
-+ dst_vma, vma, addr, next))
- return -ENOMEM;
- } while (dst_pmd++, src_pmd++, addr = next, addr != end);
- return 0;
- }
-
- static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-- pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
-+ pgd_t *dst_pgd, pgd_t *src_pgd,
-+ struct vm_area_struct *dst_vma,
-+ struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
- {
- pud_t *src_pud, *dst_pud;
-@@ -564,19 +601,20 @@ static inline int copy_pud_range(struct
- if (pud_none_or_clear_bad(src_pud))
- continue;
- if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-- vma, addr, next))
-+ dst_vma, vma, addr, next))
- return -ENOMEM;
- } while (dst_pud++, src_pud++, addr = next, addr != end);
- return 0;
- }
-
--int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-- struct vm_area_struct *vma)
-+int __copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *vma,
-+ unsigned long addr, size_t size)
- {
-+ struct mm_struct *dst_mm = dst_vma->vm_mm;
-+ struct mm_struct *src_mm = vma->vm_mm;
- pgd_t *src_pgd, *dst_pgd;
- unsigned long next;
-- unsigned long addr = vma->vm_start;
-- unsigned long end = vma->vm_end;
-+ unsigned long end = addr + size;
-
- /*
- * Don't copy ptes where a page fault will fill them correctly.
-@@ -599,11 +637,22 @@ int copy_page_range(struct mm_struct *ds
- if (pgd_none_or_clear_bad(src_pgd))
- continue;
- if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
-- vma, addr, next))
-+ dst_vma, vma, addr, next))
- return -ENOMEM;
- } while (dst_pgd++, src_pgd++, addr = next, addr != end);
- return 0;
- }
-+EXPORT_SYMBOL_GPL(__copy_page_range);
-+
-+int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-+ struct vm_area_struct *dst_vma, struct vm_area_struct *vma)
-+{
-+ if (dst_vma->vm_mm != dst)
-+ BUG();
-+ if (vma->vm_mm != src)
-+ BUG();
-+ return __copy_page_range(dst_vma, vma, vma->vm_start, vma->vm_end-vma->vm_start);
-+}
-
- static unsigned long zap_pte_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma, pmd_t *pmd,
-@@ -615,6 +664,7 @@ static unsigned long zap_pte_range(struc
- spinlock_t *ptl;
- int file_rss = 0;
- int anon_rss = 0;
-+ int rss;
-
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
- do {
-@@ -668,6 +718,7 @@ static unsigned long zap_pte_range(struc
- file_rss--;
- }
- page_remove_rmap(page);
-+ pb_remove_ref(page, mm);
- tlb_remove_page(tlb, page);
- continue;
- }
-@@ -682,6 +733,9 @@ static unsigned long zap_pte_range(struc
- pte_clear_full(mm, addr, pte, tlb->fullmm);
- } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
-
-+ rss = -(file_rss + anon_rss);
-+ ub_unused_privvm_add(mm, vma, rss);
-+ sub_vma_rss(vma, rss);
- add_mm_rss(mm, file_rss, anon_rss);
- pte_unmap_unlock(pte - 1, ptl);
-
-@@ -1087,12 +1141,14 @@ int get_user_pages(struct task_struct *t
- }
- EXPORT_SYMBOL(get_user_pages);
-
--static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
-+static int zeromap_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t prot)
- {
- pte_t *pte;
- spinlock_t *ptl;
-+ struct mm_struct *mm;
-
-+ mm = vma->vm_mm;
- pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
-@@ -1102,6 +1158,7 @@ static int zeromap_pte_range(struct mm_s
- page_cache_get(page);
- page_add_file_rmap(page);
- inc_mm_counter(mm, file_rss);
-+ inc_vma_rss(vma);
- BUG_ON(!pte_none(*pte));
- set_pte_at(mm, addr, pte, zero_pte);
- } while (pte++, addr += PAGE_SIZE, addr != end);
-@@ -1109,35 +1166,35 @@ static int zeromap_pte_range(struct mm_s
- return 0;
- }
-
--static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
-+static inline int zeromap_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end, pgprot_t prot)
- {
- pmd_t *pmd;
- unsigned long next;
-
-- pmd = pmd_alloc(mm, pud, addr);
-+ pmd = pmd_alloc(vma->vm_mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
-- if (zeromap_pte_range(mm, pmd, addr, next, prot))
-+ if (zeromap_pte_range(vma, pmd, addr, next, prot))
- return -ENOMEM;
- } while (pmd++, addr = next, addr != end);
- return 0;
- }
-
--static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
-+static inline int zeromap_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end, pgprot_t prot)
- {
- pud_t *pud;
- unsigned long next;
-
-- pud = pud_alloc(mm, pgd, addr);
-+ pud = pud_alloc(vma->vm_mm, pgd, addr);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
-- if (zeromap_pmd_range(mm, pud, addr, next, prot))
-+ if (zeromap_pmd_range(vma, pud, addr, next, prot))
- return -ENOMEM;
- } while (pud++, addr = next, addr != end);
- return 0;
-@@ -1149,15 +1206,14 @@ int zeromap_page_range(struct vm_area_st
- pgd_t *pgd;
- unsigned long next;
- unsigned long end = addr + size;
-- struct mm_struct *mm = vma->vm_mm;
- int err;
-
- BUG_ON(addr >= end);
-- pgd = pgd_offset(mm, addr);
-+ pgd = pgd_offset(vma->vm_mm, addr);
- flush_cache_range(vma, addr, end);
- do {
- next = pgd_addr_end(addr, end);
-- err = zeromap_pud_range(mm, pgd, addr, next, prot);
-+ err = zeromap_pud_range(vma, pgd, addr, next, prot);
- if (err)
- break;
- } while (pgd++, addr = next, addr != end);
-@@ -1183,11 +1239,14 @@ pte_t * fastcall get_locked_pte(struct m
- * old drivers should use this, and they needed to mark their
- * pages reserved for the old functions anyway.
- */
--static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
-+static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot)
- {
- int retval;
- pte_t *pte;
-- spinlock_t *ptl;
-+ spinlock_t *ptl;
-+ struct mm_struct *mm;
-+
-+ mm = vma->vm_mm;
-
- retval = -EINVAL;
- if (PageAnon(page))
-@@ -1204,6 +1263,7 @@ static int insert_page(struct mm_struct
- /* Ok, finally just insert the thing.. */
- get_page(page);
- inc_mm_counter(mm, file_rss);
-+ inc_vma_rss(vma);
- page_add_file_rmap(page);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
-
-@@ -1240,7 +1300,7 @@ int vm_insert_page(struct vm_area_struct
- if (!page_count(page))
- return -EINVAL;
- vma->vm_flags |= VM_INSERTPAGE;
-- return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
-+ return insert_page(vma, addr, page, vma->vm_page_prot);
- }
- EXPORT_SYMBOL(vm_insert_page);
-
-@@ -1449,6 +1509,7 @@ static int do_wp_page(struct mm_struct *
- struct page *old_page, *new_page;
- pte_t entry;
- int ret = VM_FAULT_MINOR;
-+ struct page_beancounter *pbc;
-
- old_page = vm_normal_page(vma, address, orig_pte);
- if (!old_page)
-@@ -1476,6 +1537,9 @@ static int do_wp_page(struct mm_struct *
- gotten:
- pte_unmap_unlock(page_table, ptl);
-
-+ if (unlikely(pb_alloc(&pbc)))
-+ goto oom_nopb;
-+
- if (unlikely(anon_vma_prepare(vma)))
- goto oom;
- if (old_page == ZERO_PAGE(address)) {
-@@ -1496,12 +1560,16 @@ gotten:
- if (likely(pte_same(*page_table, orig_pte))) {
- if (old_page) {
- page_remove_rmap(old_page);
-+ pb_remove_ref(old_page, mm);
- if (!PageAnon(old_page)) {
- dec_mm_counter(mm, file_rss);
- inc_mm_counter(mm, anon_rss);
- }
-- } else
-+ } else {
-+ ub_unused_privvm_dec(mm, vma);
- inc_mm_counter(mm, anon_rss);
-+ inc_vma_rss(vma);
-+ }
- flush_cache_page(vma, address, pte_pfn(orig_pte));
- entry = mk_pte(new_page, vma->vm_page_prot);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-@@ -1510,6 +1578,7 @@ gotten:
- lazy_mmu_prot_update(entry);
- lru_cache_add_active(new_page);
- page_add_new_anon_rmap(new_page, vma, address);
-+ pb_add_ref(new_page, mm, &pbc);
-
- /* Free the old page.. */
- new_page = old_page;
-@@ -1519,10 +1588,13 @@ gotten:
- page_cache_release(new_page);
- if (old_page)
- page_cache_release(old_page);
-+ pb_free(&pbc);
- unlock:
- pte_unmap_unlock(page_table, ptl);
- return ret;
- oom:
-+ pb_free(&pbc);
-+oom_nopb:
- if (old_page)
- page_cache_release(old_page);
- return VM_FAULT_OOM;
-@@ -1877,10 +1949,16 @@ static int do_swap_page(struct mm_struct
- swp_entry_t entry;
- pte_t pte;
- int ret = VM_FAULT_MINOR;
-+ struct page_beancounter *pbc;
-+ cycles_t start;
-
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
-- goto out;
-+ goto out_nostat;
-+
-+ if (unlikely(pb_alloc(&pbc)))
-+ return VM_FAULT_OOM;
-
-+ start = get_cycles();
- entry = pte_to_swp_entry(orig_pte);
- again:
- page = lookup_swap_cache(entry);
-@@ -1928,6 +2006,8 @@ again:
- /* The page isn't present yet, go ahead with the fault. */
-
- inc_mm_counter(mm, anon_rss);
-+ inc_vma_rss(vma);
-+ ub_swapin_inc(mm);
- pte = mk_pte(page, vma->vm_page_prot);
- if (write_access && can_share_swap_page(page)) {
- pte = maybe_mkwrite(pte_mkdirty(pte), vma);
-@@ -1937,6 +2017,8 @@ again:
- flush_icache_page(vma, page);
- set_pte_at(mm, address, page_table, pte);
- page_add_anon_rmap(page, vma, address);
-+ pb_add_ref(page, mm, &pbc);
-+ ub_unused_privvm_dec(mm, vma);
-
- swap_free(entry);
- if (vm_swap_full())
-@@ -1947,7 +2029,7 @@ again:
- if (do_wp_page(mm, vma, address,
- page_table, pmd, ptl, pte) == VM_FAULT_OOM)
- ret = VM_FAULT_OOM;
-- goto out;
-+ goto out_wp;
- }
-
- /* No need to invalidate - it was non-present before */
-@@ -1955,10 +2037,16 @@ again:
- lazy_mmu_prot_update(pte);
- unlock:
- pte_unmap_unlock(page_table, ptl);
--out:
-+out_wp:
-+ pb_free(&pbc);
-+ spin_lock_irq(&kstat_glb_lock);
-+ KSTAT_LAT_ADD(&kstat_glob.swap_in, get_cycles() - start);
-+ spin_unlock_irq(&kstat_glb_lock);
-+out_nostat:
- return ret;
- out_nomap:
- pte_unmap_unlock(page_table, ptl);
-+ pb_free(&pbc);
- unlock_page(page);
- page_cache_release(page);
- return ret;
-@@ -1976,11 +2064,15 @@ static int do_anonymous_page(struct mm_s
- struct page *page;
- spinlock_t *ptl;
- pte_t entry;
-+ struct page_beancounter *pbc;
-
- if (write_access) {
- /* Allocate our own private page. */
- pte_unmap(page_table);
-
-+ if (unlikely(pb_alloc(&pbc)))
-+ goto oom_nopb;
-+
- if (unlikely(anon_vma_prepare(vma)))
- goto oom;
- page = alloc_zeroed_user_highpage(vma, address);
-@@ -1996,7 +2088,10 @@ static int do_anonymous_page(struct mm_s
- inc_mm_counter(mm, anon_rss);
- lru_cache_add_active(page);
- page_add_new_anon_rmap(page, vma, address);
-+ pb_add_ref(page, mm, &pbc);
- } else {
-+ pbc = NULL;
-+
- /* Map the ZERO_PAGE - vm_page_prot is readonly */
- page = ZERO_PAGE(address);
- page_cache_get(page);
-@@ -2010,18 +2105,23 @@ static int do_anonymous_page(struct mm_s
- page_add_file_rmap(page);
- }
-
-+ inc_vma_rss(vma);
-+ ub_unused_privvm_dec(mm, vma);
- set_pte_at(mm, address, page_table, entry);
-
- /* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, address, entry);
- lazy_mmu_prot_update(entry);
- unlock:
-+ pb_free(&pbc);
- pte_unmap_unlock(page_table, ptl);
- return VM_FAULT_MINOR;
- release:
- page_cache_release(page);
- goto unlock;
- oom:
-+ pb_free(&pbc);
-+oom_nopb:
- return VM_FAULT_OOM;
- }
-
-@@ -2049,6 +2149,7 @@ static int do_no_page(struct mm_struct *
- unsigned int sequence = 0;
- int ret = VM_FAULT_MINOR;
- int anon = 0;
-+ struct page_beancounter *pbc;
-
- pte_unmap(page_table);
- BUG_ON(vma->vm_flags & VM_PFNMAP);
-@@ -2058,6 +2159,9 @@ static int do_no_page(struct mm_struct *
- sequence = mapping->truncate_count;
- smp_rmb(); /* serializes i_size against truncate_count */
- }
-+
-+ if (unlikely(pb_alloc(&pbc)))
-+ goto oom_nopb;
- retry:
- new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
- /*
-@@ -2070,9 +2174,9 @@ retry:
-
- /* no page was available -- either SIGBUS or OOM */
- if (new_page == NOPAGE_SIGBUS)
-- return VM_FAULT_SIGBUS;
-+ goto bus_nopg;
- if (new_page == NOPAGE_OOM)
-- return VM_FAULT_OOM;
-+ goto oom_nopg;
-
- /*
- * Should we do an early C-O-W break?
-@@ -2131,6 +2235,9 @@ retry:
- inc_mm_counter(mm, file_rss);
- page_add_file_rmap(new_page);
- }
-+ inc_vma_rss(vma);
-+ pb_add_ref(new_page, mm, &pbc);
-+ ub_unused_privvm_dec(mm, vma);
- } else {
- /* One of our sibling threads was faster, back out. */
- page_cache_release(new_page);
-@@ -2142,10 +2249,18 @@ retry:
- lazy_mmu_prot_update(entry);
- unlock:
- pte_unmap_unlock(page_table, ptl);
-+ pb_free(&pbc);
- return ret;
- oom:
- page_cache_release(new_page);
-+oom_nopg:
-+ pb_free(&pbc);
-+oom_nopb:
- return VM_FAULT_OOM;
-+
-+bus_nopg:
-+ pb_free(&pbc);
-+ return VM_FAULT_SIGBUS;
- }
-
- /*
-@@ -2314,6 +2429,8 @@ int __pud_alloc(struct mm_struct *mm, pg
- }
- #endif /* __PAGETABLE_PUD_FOLDED */
-
-+EXPORT_SYMBOL_GPL(__pud_alloc);
-+
- #ifndef __PAGETABLE_PMD_FOLDED
- /*
- * Allocate page middle directory.
-@@ -2348,6 +2465,8 @@ int __pmd_alloc(struct mm_struct *mm, pu
- }
- #endif /* __PAGETABLE_PMD_FOLDED */
-
-+EXPORT_SYMBOL_GPL(__pmd_alloc);
-+
- int make_pages_present(unsigned long addr, unsigned long end)
- {
- int ret, len, write;
-diff -upr linux-2.6.16.orig/mm/mempolicy.c linux-2.6.16-026test009/mm/mempolicy.c
---- linux-2.6.16.orig/mm/mempolicy.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mempolicy.c 2006-04-19 15:02:12.000000000 +0400
-@@ -933,7 +933,7 @@ asmlinkage long sys_migrate_pages(pid_t
-
- /* Find the mm_struct */
- read_lock(&tasklist_lock);
-- task = pid ? find_task_by_pid(pid) : current;
-+ task = pid ? find_task_by_pid_ve(pid) : current;
- if (!task) {
- read_unlock(&tasklist_lock);
- return -ESRCH;
-diff -upr linux-2.6.16.orig/mm/mempool.c linux-2.6.16-026test009/mm/mempool.c
---- linux-2.6.16.orig/mm/mempool.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mempool.c 2006-04-19 15:02:11.000000000 +0400
-@@ -14,6 +14,7 @@
- #include <linux/mempool.h>
- #include <linux/blkdev.h>
- #include <linux/writeback.h>
-+#include <linux/kmem_cache.h>
-
- static void add_element(mempool_t *pool, void *element)
- {
-@@ -78,6 +79,8 @@ mempool_t *mempool_create_node(int min_n
- init_waitqueue_head(&pool->wait);
- pool->alloc = alloc_fn;
- pool->free = free_fn;
-+ if (alloc_fn == mempool_alloc_slab)
-+ kmem_mark_nocharge((kmem_cache_t *)pool_data);
-
- /*
- * First pre-allocate the guaranteed number of buffers.
-@@ -119,6 +122,7 @@ int mempool_resize(mempool_t *pool, int
- unsigned long flags;
-
- BUG_ON(new_min_nr <= 0);
-+ gfp_mask &= ~__GFP_UBC;
-
- spin_lock_irqsave(&pool->lock, flags);
- if (new_min_nr <= pool->min_nr) {
-@@ -212,6 +216,7 @@ void * mempool_alloc(mempool_t *pool, gf
- gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
- gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
- gfp_mask |= __GFP_NOWARN; /* failures are OK */
-+ gfp_mask &= ~__GFP_UBC;
-
- gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO);
-
-diff -upr linux-2.6.16.orig/mm/mlock.c linux-2.6.16-026test009/mm/mlock.c
---- linux-2.6.16.orig/mm/mlock.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mlock.c 2006-04-19 15:02:12.000000000 +0400
-@@ -8,9 +8,11 @@
- #include <linux/capability.h>
- #include <linux/mman.h>
- #include <linux/mm.h>
-+#include <linux/module.h>
- #include <linux/mempolicy.h>
- #include <linux/syscalls.h>
-
-+#include <ub/ub_vmpages.h>
-
- static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
- unsigned long start, unsigned long end, unsigned int newflags)
-@@ -25,6 +27,14 @@ static int mlock_fixup(struct vm_area_st
- goto out;
- }
-
-+ if (newflags & VM_LOCKED) {
-+ ret = ub_locked_charge(mm, end - start);
-+ if (ret < 0) {
-+ *prev = vma;
-+ goto out;
-+ }
-+ }
-+
- pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
- vma->vm_file, pgoff, vma_policy(vma));
-@@ -38,13 +48,13 @@ static int mlock_fixup(struct vm_area_st
- if (start != vma->vm_start) {
- ret = split_vma(mm, vma, start, 1);
- if (ret)
-- goto out;
-+ goto out_uncharge;
- }
-
- if (end != vma->vm_end) {
- ret = split_vma(mm, vma, end, 0);
- if (ret)
-- goto out;
-+ goto out_uncharge;
- }
-
- success:
-@@ -63,13 +73,19 @@ success:
- pages = -pages;
- if (!(newflags & VM_IO))
- ret = make_pages_present(start, end);
-- }
-+ } else
-+ ub_locked_uncharge(mm, end - start);
-
- vma->vm_mm->locked_vm -= pages;
- out:
- if (ret == -ENOMEM)
- ret = -EAGAIN;
- return ret;
-+
-+out_uncharge:
-+ if (newflags & VM_LOCKED)
-+ ub_locked_uncharge(mm, end - start);
-+ goto out;
- }
-
- static int do_mlock(unsigned long start, size_t len, int on)
-@@ -146,6 +162,7 @@ asmlinkage long sys_mlock(unsigned long
- up_write(&current->mm->mmap_sem);
- return error;
- }
-+EXPORT_SYMBOL_GPL(sys_mlock);
-
- asmlinkage long sys_munlock(unsigned long start, size_t len)
- {
-@@ -158,6 +175,7 @@ asmlinkage long sys_munlock(unsigned lon
- up_write(&current->mm->mmap_sem);
- return ret;
- }
-+EXPORT_SYMBOL_GPL(sys_munlock);
-
- static int do_mlockall(int flags)
- {
-diff -upr linux-2.6.16.orig/mm/mmap.c linux-2.6.16-026test009/mm/mmap.c
---- linux-2.6.16.orig/mm/mmap.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mmap.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,14 +25,18 @@
- #include <linux/mount.h>
- #include <linux/mempolicy.h>
- #include <linux/rmap.h>
-+#include <linux/virtinfo.h>
-
- #include <asm/uaccess.h>
- #include <asm/cacheflush.h>
- #include <asm/tlb.h>
-
-+#include <ub/ub_vmpages.h>
-+
- static void unmap_region(struct mm_struct *mm,
- struct vm_area_struct *vma, struct vm_area_struct *prev,
- unsigned long start, unsigned long end);
-+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft);
-
- /*
- * WARNING: the debugging will use recursive algorithms so never enable this
-@@ -87,6 +91,16 @@ int __vm_enough_memory(long pages, int c
-
- vm_acct_memory(pages);
-
-+ switch (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_ENOUGHMEM,
-+ (void *)pages)
-+ & (NOTIFY_OK | NOTIFY_FAIL)) {
-+ case NOTIFY_OK:
-+ return 0;
-+ case NOTIFY_FAIL:
-+ vm_unacct_memory(pages);
-+ return -ENOMEM;
-+ }
-+
- /*
- * Sometimes we want to use more memory than we have
- */
-@@ -201,11 +215,16 @@ static struct vm_area_struct *remove_vma
- struct vm_area_struct *next = vma->vm_next;
-
- might_sleep();
-+
-+ ub_memory_uncharge(vma->vm_mm, vma->vm_end - vma->vm_start,
-+ vma->vm_flags, vma->vm_file);
- if (vma->vm_ops && vma->vm_ops->close)
- vma->vm_ops->close(vma);
- if (vma->vm_file)
- fput(vma->vm_file);
- mpol_free(vma_policy(vma));
-+ if (get_vma_rss(vma))
-+ warn_bad_rss(vma, 0);
- kmem_cache_free(vm_area_cachep, vma);
- return next;
- }
-@@ -242,7 +261,7 @@ asmlinkage unsigned long sys_brk(unsigne
- goto out;
-
- /* Ok, looks good - let it rip. */
-- if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
-+ if (__do_brk(oldbrk, newbrk-oldbrk, UB_HARD) != oldbrk)
- goto out;
- set_brk:
- mm->brk = brk;
-@@ -726,7 +745,7 @@ struct vm_area_struct *vma_merge(struct
- else
- next = mm->mmap;
- area = next;
-- if (next && next->vm_end == end) /* cases 6, 7, 8 */
-+ if (next && next->vm_end == end) /* cases 6, 7, 8 */
- next = next->vm_next;
-
- /*
-@@ -746,11 +765,22 @@ struct vm_area_struct *vma_merge(struct
- is_mergeable_anon_vma(prev->anon_vma,
- next->anon_vma)) {
- /* cases 1, 6 */
-+ add_vma_rss(prev, get_vma_rss(next));
-+ if (area != next) /* case 6 */
-+ add_vma_rss(prev, get_vma_rss(area));
- vma_adjust(prev, prev->vm_start,
- next->vm_end, prev->vm_pgoff, NULL);
-- } else /* cases 2, 5, 7 */
-+ } else { /* cases 2, 5, 7 */
-+ if (next && addr == next->vm_start) { /* case 5 */
-+ unsigned long rss;
-+ rss = pages_in_vma_range(next, addr, end);
-+ sub_vma_rss(next, rss);
-+ add_vma_rss(prev, rss);
-+ } else if (area != next) /* case 7 */
-+ add_vma_rss(prev, get_vma_rss(area));
- vma_adjust(prev, prev->vm_start,
- end, prev->vm_pgoff, NULL);
-+ }
- return prev;
- }
-
-@@ -761,12 +791,19 @@ struct vm_area_struct *vma_merge(struct
- mpol_equal(policy, vma_policy(next)) &&
- can_vma_merge_before(next, vm_flags,
- anon_vma, file, pgoff+pglen)) {
-- if (prev && addr < prev->vm_end) /* case 4 */
-+ if (prev && addr < prev->vm_end) { /* case 4 */
-+ unsigned long rss;
-+ rss = pages_in_vma_range(prev, addr, end);
-+ sub_vma_rss(prev, rss);
-+ add_vma_rss(next, rss);
- vma_adjust(prev, prev->vm_start,
- addr, prev->vm_pgoff, NULL);
-- else /* cases 3, 8 */
-+ } else { /* cases 3, 8 */
-+ if (area != next) /* case 8 */
-+ add_vma_rss(area, get_vma_rss(next));
- vma_adjust(area, addr, next->vm_end,
- next->vm_pgoff - pglen, NULL);
-+ }
- return area;
- }
-
-@@ -1033,6 +1070,10 @@ munmap_back:
- }
- }
-
-+ if (ub_memory_charge(mm, len, vm_flags, file,
-+ (flags & MAP_EXECPRIO ? UB_SOFT : UB_HARD)))
-+ goto charge_error;
-+
- /*
- * Can we just expand an old private anonymous mapping?
- * The VM_SHARED test is necessary because shmem_zero_setup
-@@ -1048,7 +1089,8 @@ munmap_back:
- * specific mapper. the address has already been validated, but
- * not unmapped, but the maps are removed from the list.
- */
-- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
-+ (flags & MAP_EXECPRIO ? __GFP_SOFT_UBC : 0));
- if (!vma) {
- error = -ENOMEM;
- goto unacct_error;
-@@ -1142,6 +1184,8 @@ unmap_and_free_vma:
- free_vma:
- kmem_cache_free(vm_area_cachep, vma);
- unacct_error:
-+ ub_memory_uncharge(mm, len, vm_flags, file);
-+charge_error:
- if (charged)
- vm_unacct_memory(charged);
- return error;
-@@ -1471,12 +1515,16 @@ static int acct_stack_growth(struct vm_a
- return -ENOMEM;
- }
-
-+ if (ub_memory_charge(mm, grow << PAGE_SHIFT, vma->vm_flags,
-+ vma->vm_file, UB_SOFT))
-+ goto fail_charge;
-+
- /*
- * Overcommit.. This must be the final test, as it will
- * update security statistics.
- */
- if (security_vm_enough_memory(grow))
-- return -ENOMEM;
-+ goto fail_sec;
-
- /* Ok, everything looks good - let it rip */
- mm->total_vm += grow;
-@@ -1484,6 +1532,11 @@ static int acct_stack_growth(struct vm_a
- mm->locked_vm += grow;
- vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
- return 0;
-+
-+fail_sec:
-+ ub_memory_uncharge(mm, grow << PAGE_SHIFT, vma->vm_flags, vma->vm_file);
-+fail_charge:
-+ return -ENOMEM;
- }
-
- #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
-@@ -1744,8 +1797,13 @@ int split_vma(struct mm_struct * mm, str
- else
- vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
-
-+ /* protected with mmap sem */
-+ set_vma_rss(vma, pages_in_vma(vma));
-+ set_vma_rss(new, pages_in_vma(new));
-+
- return 0;
- }
-+EXPORT_SYMBOL_GPL(split_vma);
-
- /* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
-@@ -1839,7 +1897,7 @@ static inline void verify_mm_writelocked
- * anonymous maps. eventually we may be able to do some
- * brk-specific accounting here.
- */
--unsigned long do_brk(unsigned long addr, unsigned long len)
-+static unsigned long __do_brk(unsigned long addr, unsigned long len, int soft)
- {
- struct mm_struct * mm = current->mm;
- struct vm_area_struct * vma, * prev;
-@@ -1891,11 +1949,14 @@ unsigned long do_brk(unsigned long addr,
- if (mm->map_count > sysctl_max_map_count)
- return -ENOMEM;
-
-- if (security_vm_enough_memory(len >> PAGE_SHIFT))
-- return -ENOMEM;
--
- flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
-+ if (ub_memory_charge(mm, len, flags, NULL, soft))
-+ goto fail_charge;
-+
-+ if (security_vm_enough_memory(len >> PAGE_SHIFT))
-+ goto fail_sec;
-+
- /* Can we just expand an old private anonymous mapping? */
- if (vma_merge(mm, prev, addr, addr + len, flags,
- NULL, NULL, pgoff, NULL))
-@@ -1904,11 +1965,11 @@ unsigned long do_brk(unsigned long addr,
- /*
- * create a vma struct for an anonymous mapping
- */
-- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-- if (!vma) {
-- vm_unacct_memory(len >> PAGE_SHIFT);
-- return -ENOMEM;
-- }
-+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL |
-+ (soft == UB_SOFT ? __GFP_SOFT_UBC : 0));
-+ if (!vma)
-+ goto fail_alloc;
-+
- memset(vma, 0, sizeof(*vma));
-
- vma->vm_mm = mm;
-@@ -1925,8 +1986,19 @@ out:
- make_pages_present(addr, addr + len);
- }
- return addr;
-+
-+fail_alloc:
-+ vm_unacct_memory(len >> PAGE_SHIFT);
-+fail_sec:
-+ ub_memory_uncharge(mm, len, flags, NULL);
-+fail_charge:
-+ return -ENOMEM;
- }
-
-+unsigned long do_brk(unsigned long addr, unsigned long len)
-+{
-+ return __do_brk(addr, len, UB_SOFT);
-+}
- EXPORT_SYMBOL(do_brk);
-
- /* Release all mmaps. */
-@@ -2036,6 +2108,7 @@ struct vm_area_struct *copy_vma(struct v
- new_vma->vm_start = addr;
- new_vma->vm_end = addr + len;
- new_vma->vm_pgoff = pgoff;
-+ set_vma_rss(new_vma, 0);
- if (new_vma->vm_file)
- get_file(new_vma->vm_file);
- if (new_vma->vm_ops && new_vma->vm_ops->open)
-diff -upr linux-2.6.16.orig/mm/mprotect.c linux-2.6.16-026test009/mm/mprotect.c
---- linux-2.6.16.orig/mm/mprotect.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mprotect.c 2006-04-19 15:02:12.000000000 +0400
-@@ -9,6 +9,7 @@
- */
-
- #include <linux/mm.h>
-+#include <linux/module.h>
- #include <linux/hugetlb.h>
- #include <linux/slab.h>
- #include <linux/shm.h>
-@@ -25,6 +26,8 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_vmpages.h>
-+
- static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t newprot)
- {
-@@ -109,12 +112,20 @@ mprotect_fixup(struct vm_area_struct *vm
- pgprot_t newprot;
- pgoff_t pgoff;
- int error;
-+ unsigned long ch_size;
-+ int ch_dir;
-
- if (newflags == oldflags) {
- *pprev = vma;
- return 0;
- }
-
-+ error = -ENOMEM;
-+ ch_size = nrpages - pages_in_vma_range(vma, start, end);
-+ ch_dir = ub_protected_charge(mm, ch_size, newflags, vma);
-+ if (ch_dir == PRIVVM_ERROR)
-+ goto fail_ch;
-+
- /*
- * If we make a private mapping writable we increase our commit;
- * but (without finer accounting) cannot reduce our commit if we
-@@ -127,7 +138,7 @@ mprotect_fixup(struct vm_area_struct *vm
- if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
- charged = nrpages;
- if (security_vm_enough_memory(charged))
-- return -ENOMEM;
-+ goto fail_sec;
- newflags |= VM_ACCOUNT;
- }
- }
-@@ -169,10 +180,16 @@ success:
- change_protection(vma, start, end, newprot);
- vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
- vm_stat_account(mm, newflags, vma->vm_file, nrpages);
-+ if (ch_dir == PRIVVM_TO_SHARED)
-+ __ub_unused_privvm_dec(mm, ch_size);
- return 0;
-
- fail:
- vm_unacct_memory(charged);
-+fail_sec:
-+ if (ch_dir == PRIVVM_TO_PRIVATE)
-+ __ub_unused_privvm_dec(mm, ch_size);
-+fail_ch:
- return error;
- }
-
-@@ -280,3 +297,4 @@ out:
- up_write(&current->mm->mmap_sem);
- return error;
- }
-+EXPORT_SYMBOL_GPL(sys_mprotect);
-diff -upr linux-2.6.16.orig/mm/mremap.c linux-2.6.16-026test009/mm/mremap.c
---- linux-2.6.16.orig/mm/mremap.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/mremap.c 2006-04-19 15:02:12.000000000 +0400
-@@ -23,6 +23,8 @@
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_vmpages.h>
-+
- static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
- {
- pgd_t *pgd;
-@@ -106,6 +108,8 @@ static void move_ptes(struct vm_area_str
- pte = ptep_clear_flush(vma, old_addr, old_pte);
- /* ZERO_PAGE can be dependant on virtual addr */
- pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
-+ dec_vma_rss(vma);
-+ inc_vma_rss(new_vma);
- set_pte_at(mm, new_addr, new_pte, pte);
- }
-
-@@ -166,17 +170,21 @@ static unsigned long move_vma(struct vm_
- unsigned long hiwater_vm;
- int split = 0;
-
-+ if (ub_memory_charge(mm, new_len, vm_flags,
-+ vma->vm_file, UB_HARD))
-+ goto err;
-+
- /*
- * We'd prefer to avoid failure later on in do_munmap:
- * which may split one vma into three before unmapping.
- */
- if (mm->map_count >= sysctl_max_map_count - 3)
-- return -ENOMEM;
-+ goto err_nomem;
-
- new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
- new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
- if (!new_vma)
-- return -ENOMEM;
-+ goto err_nomem;
-
- moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
- if (moved_len < old_len) {
-@@ -235,7 +243,13 @@ static unsigned long move_vma(struct vm_
- new_addr + new_len);
- }
-
-- return new_addr;
-+ if (new_addr != -ENOMEM)
-+ return new_addr;
-+
-+err_nomem:
-+ ub_memory_uncharge(mm, new_len, vm_flags, vma->vm_file);
-+err:
-+ return -ENOMEM;
- }
-
- /*
-@@ -361,6 +375,11 @@ unsigned long do_mremap(unsigned long ad
- if (max_addr - addr >= new_len) {
- int pages = (new_len - old_len) >> PAGE_SHIFT;
-
-+ ret = -ENOMEM;
-+ if (ub_memory_charge(mm, new_len, vma->vm_flags,
-+ vma->vm_file, UB_HARD))
-+ goto out;
-+
- vma_adjust(vma, vma->vm_start,
- addr + new_len, vma->vm_pgoff, NULL);
-
-diff -upr linux-2.6.16.orig/mm/oom_kill.c linux-2.6.16-026test009/mm/oom_kill.c
---- linux-2.6.16.orig/mm/oom_kill.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/oom_kill.c 2006-04-19 15:02:12.000000000 +0400
-@@ -176,7 +176,7 @@ static struct task_struct *select_bad_pr
- *ppoints = 0;
-
- do_posix_clock_monotonic_gettime(&uptime);
-- do_each_thread(g, p) {
-+ do_each_thread_all(g, p) {
- unsigned long points;
- int releasing;
-
-@@ -205,7 +205,7 @@ static struct task_struct *select_bad_pr
- chosen = p;
- *ppoints = points;
- }
-- } while_each_thread(g, p);
-+ } while_each_thread_all(g, p);
- return chosen;
- }
-
-@@ -261,10 +261,10 @@ static struct mm_struct *oom_kill_task(t
- * kill all processes that share the ->mm (i.e. all threads),
- * but are in a different thread group
- */
-- do_each_thread(g, q)
-+ do_each_thread_all(g, q) {
- if (q->mm == mm && q->tgid != p->tgid)
- __oom_kill_task(q, message);
-- while_each_thread(g, q);
-+ } while_each_thread_all(g, q);
-
- return mm;
- }
-diff -upr linux-2.6.16.orig/mm/page_alloc.c linux-2.6.16-026test009/mm/page_alloc.c
---- linux-2.6.16.orig/mm/page_alloc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/page_alloc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -41,6 +41,8 @@
- #include <asm/tlbflush.h>
- #include "internal.h"
-
-+#include <ub/ub_mem.h>
-+
- /*
- * MCD - HACK: Find somewhere to initialize this EARLY, or make this
- * initializer cleaner
-@@ -50,6 +52,7 @@ EXPORT_SYMBOL(node_online_map);
- nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
- EXPORT_SYMBOL(node_possible_map);
- struct pglist_data *pgdat_list __read_mostly;
-+EXPORT_SYMBOL(pgdat_list);
- unsigned long totalram_pages __read_mostly;
- unsigned long totalhigh_pages __read_mostly;
- long nr_swap_pages;
-@@ -153,7 +156,8 @@ static void bad_page(struct page *page)
- 1 << PG_reclaim |
- 1 << PG_slab |
- 1 << PG_swapcache |
-- 1 << PG_writeback );
-+ 1 << PG_writeback |
-+ 1 << PG_buddy );
- set_page_count(page, 0);
- reset_page_mapcount(page);
- page->mapping = NULL;
-@@ -224,12 +228,12 @@ static inline unsigned long page_order(s
-
- static inline void set_page_order(struct page *page, int order) {
- set_page_private(page, order);
-- __SetPagePrivate(page);
-+ __SetPageBuddy(page);
- }
-
- static inline void rmv_page_order(struct page *page)
- {
-- __ClearPagePrivate(page);
-+ __ClearPageBuddy(page);
- set_page_private(page, 0);
- }
-
-@@ -268,11 +272,13 @@ __find_combined_index(unsigned long page
- * This function checks whether a page is free && is the buddy
- * we can do coalesce a page and its buddy if
- * (a) the buddy is not in a hole &&
-- * (b) the buddy is free &&
-- * (c) the buddy is on the buddy system &&
-- * (d) a page and its buddy have the same order.
-- * for recording page's order, we use page_private(page) and PG_private.
-+ * (b) the buddy is in the buddy system &&
-+ * (c) a page and its buddy have the same order.
- *
-+ * For recording whether a page is in the buddy system, we use PG_buddy.
-+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
-+ *
-+ * For recording page's order, we use page_private(page).
- */
- static inline int page_is_buddy(struct page *page, int order)
- {
-@@ -281,10 +287,10 @@ static inline int page_is_buddy(struct p
- return 0;
- #endif
-
-- if (PagePrivate(page) &&
-- (page_order(page) == order) &&
-- page_count(page) == 0)
-+ if (PageBuddy(page) && page_order(page) == order) {
-+ BUG_ON(page_count(page) != 0);
- return 1;
-+ }
- return 0;
- }
-
-@@ -301,7 +307,7 @@ static inline int page_is_buddy(struct p
- * as necessary, plus some accounting needed to play nicely with other
- * parts of the VM system.
- * At each level, we keep a list of pages, which are heads of continuous
-- * free pages of length of (1 << order) and marked with PG_Private.Page's
-+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
- * order is recorded in page_private(page) field.
- * So when we are allocating or freeing one, we can derive the state of the
- * other. That is, if we allocate a small block, and both were
-@@ -364,7 +370,8 @@ static inline int free_pages_check(struc
- 1 << PG_slab |
- 1 << PG_swapcache |
- 1 << PG_writeback |
-- 1 << PG_reserved ))))
-+ 1 << PG_reserved |
-+ 1 << PG_buddy ))))
- bad_page(page);
- if (PageDirty(page))
- __ClearPageDirty(page);
-@@ -434,6 +441,7 @@ static void __free_pages_ok(struct page
- return;
-
- kernel_map_pages(page, 1 << order, 0);
-+ ub_page_uncharge(page, order);
- local_irq_save(flags);
- __mod_page_state(pgfree, 1 << order);
- free_one_page(page_zone(page), page, order);
-@@ -522,7 +530,8 @@ static int prep_new_page(struct page *pa
- 1 << PG_slab |
- 1 << PG_swapcache |
- 1 << PG_writeback |
-- 1 << PG_reserved ))))
-+ 1 << PG_reserved |
-+ 1 << PG_buddy ))))
- bad_page(page);
-
- /*
-@@ -721,6 +730,7 @@ static void fastcall free_hot_cold_page(
- kernel_map_pages(page, 1, 0);
-
- pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
-+ ub_page_uncharge(page, 0);
- local_irq_save(flags);
- __inc_page_state(pgfree);
- list_add(&page->lru, &pcp->list);
-@@ -894,6 +904,26 @@ get_page_from_freelist(gfp_t gfp_mask, u
- return page;
- }
-
-+static void __alloc_collect_stats(unsigned int gfp_mask,
-+ unsigned int order, struct page *page, cycles_t time)
-+{
-+ int ind;
-+ unsigned long flags;
-+
-+ time = get_cycles() - time;
-+ if (!(gfp_mask & __GFP_WAIT))
-+ ind = 0;
-+ else if (!(gfp_mask & __GFP_HIGHMEM))
-+ ind = (order > 0 ? 2 : 1);
-+ else
-+ ind = (order > 0 ? 4 : 3);
-+ spin_lock_irqsave(&kstat_glb_lock, flags);
-+ KSTAT_LAT_ADD(&kstat_glob.alloc_lat[ind], time);
-+ if (!page)
-+ kstat_glob.alloc_fails[ind]++;
-+ spin_unlock_irqrestore(&kstat_glb_lock, flags);
-+}
-+
- /*
- * This is the 'heart' of the zoned buddy allocator.
- */
-@@ -909,6 +939,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned i
- int do_retry;
- int alloc_flags;
- int did_some_progress;
-+ cycles_t start;
-
- might_sleep_if(wait);
-
-@@ -920,6 +951,7 @@ restart:
- return NULL;
- }
-
-+ start = get_cycles();
- page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
- zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
- if (page)
-@@ -1038,6 +1070,7 @@ rebalance:
- }
-
- nopage:
-+ __alloc_collect_stats(gfp_mask, order, page, start);
- if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
- printk(KERN_WARNING "%s: page allocation failure."
- " order:%d, mode:0x%x\n",
-@@ -1045,7 +1078,13 @@ nopage:
- dump_stack();
- show_mem();
- }
-+ return NULL;
-+
- got_pg:
-+ if (ub_page_charge(page, order, gfp_mask)) {
-+ __free_pages(page, order);
-+ page = NULL;
-+ }
- return page;
- }
-
-@@ -2378,7 +2417,10 @@ static void *vmstat_start(struct seq_fil
- m->private = ps;
- if (!ps)
- return ERR_PTR(-ENOMEM);
-- get_full_page_state(ps);
-+ if (ve_is_super(get_exec_env()))
-+ get_full_page_state(ps);
-+ else
-+ memset(ps, 0, sizeof(*ps));
- ps->pgpgin /= 2; /* sectors -> kbytes */
- ps->pgpgout /= 2;
- return (unsigned long *)ps + *pos;
-diff -upr linux-2.6.16.orig/mm/rmap.c linux-2.6.16-026test009/mm/rmap.c
---- linux-2.6.16.orig/mm/rmap.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/rmap.c 2006-04-19 15:02:12.000000000 +0400
-@@ -56,6 +56,8 @@
-
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_vmpages.h>
-+
- //#define RMAP_DEBUG /* can be enabled only for debugging */
-
- kmem_cache_t *anon_vma_cachep;
-@@ -117,6 +119,7 @@ int anon_vma_prepare(struct vm_area_stru
- }
- return 0;
- }
-+EXPORT_SYMBOL_GPL(anon_vma_prepare);
-
- void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
- {
-@@ -145,6 +148,7 @@ void anon_vma_link(struct vm_area_struct
- spin_unlock(&anon_vma->lock);
- }
- }
-+EXPORT_SYMBOL_GPL(anon_vma_link);
-
- void anon_vma_unlink(struct vm_area_struct *vma)
- {
-@@ -180,14 +184,15 @@ static void anon_vma_ctor(void *data, km
- void __init anon_vma_init(void)
- {
- anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
-- 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor, NULL);
-+ 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_UBC,
-+ anon_vma_ctor, NULL);
- }
-
- /*
- * Getting a lock on a stable anon_vma from a page off the LRU is
- * tricky: page_lock_anon_vma rely on RCU to guard against the races.
- */
--static struct anon_vma *page_lock_anon_vma(struct page *page)
-+struct anon_vma *page_lock_anon_vma(struct page *page)
- {
- struct anon_vma *anon_vma = NULL;
- unsigned long anon_mapping;
-@@ -205,6 +210,7 @@ out:
- rcu_read_unlock();
- return anon_vma;
- }
-+EXPORT_SYMBOL_GPL(page_lock_anon_vma);
-
- #ifdef CONFIG_MIGRATION
- /*
-@@ -220,6 +226,7 @@ void remove_from_swap(struct page *page)
- struct anon_vma *anon_vma;
- struct vm_area_struct *vma;
- unsigned long mapping;
-+ struct page_beancounter *pb;
-
- if (!PageSwapCache(page))
- return;
-@@ -229,6 +236,10 @@ void remove_from_swap(struct page *page)
- if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
- return;
-
-+ pb = NULL;
-+ if (pb_alloc_all(&pb))
-+ return;
-+
- /*
- * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
- */
-@@ -236,10 +247,12 @@ void remove_from_swap(struct page *page)
- spin_lock(&anon_vma->lock);
-
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
-- remove_vma_swap(vma, page);
-+ remove_vma_swap(vma, page, &pb);
-
- spin_unlock(&anon_vma->lock);
- delete_from_swap_cache(page);
-+
-+ pb_free_list(&pb);
- }
- EXPORT_SYMBOL(remove_from_swap);
- #endif
-@@ -638,7 +651,11 @@ static int try_to_unmap_one(struct page
- } else
- dec_mm_counter(mm, file_rss);
-
-+ dec_vma_rss(vma);
- page_remove_rmap(page);
-+ ub_unused_privvm_inc(mm, vma);
-+ ub_unmap_inc(mm);
-+ pb_remove_ref(page, mm);
- page_cache_release(page);
-
- out_unmap:
-@@ -729,8 +746,12 @@ static void try_to_unmap_cluster(unsigne
- set_page_dirty(page);
-
- page_remove_rmap(page);
-+ ub_unmap_inc(mm);
-+ pb_remove_ref(page, mm);
-+ ub_unused_privvm_inc(mm, vma);
- page_cache_release(page);
- dec_mm_counter(mm, file_rss);
-+ dec_vma_rss(vma);
- (*mapcount)--;
- }
- pte_unmap_unlock(pte - 1, ptl);
-diff -upr linux-2.6.16.orig/mm/shmem.c linux-2.6.16-026test009/mm/shmem.c
---- linux-2.6.16.orig/mm/shmem.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/shmem.c 2006-04-19 15:02:12.000000000 +0400
-@@ -50,6 +50,8 @@
- #include <asm/div64.h>
- #include <asm/pgtable.h>
-
-+#include <ub/ub_vmpages.h>
-+
- /* This magic number is used in glibc for posix shared memory */
- #define TMPFS_MAGIC 0x01021994
-
-@@ -211,7 +213,7 @@ static void shmem_free_blocks(struct ino
- *
- * It has to be called with the spinlock held.
- */
--static void shmem_recalc_inode(struct inode *inode)
-+static void shmem_recalc_inode(struct inode *inode, long swp_freed)
- {
- struct shmem_inode_info *info = SHMEM_I(inode);
- long freed;
-@@ -221,6 +223,8 @@ static void shmem_recalc_inode(struct in
- info->alloced -= freed;
- shmem_unacct_blocks(info->flags, freed);
- shmem_free_blocks(inode, freed);
-+ if (freed > swp_freed)
-+ ub_tmpfs_respages_sub(info, freed - swp_freed);
- }
- }
-
-@@ -326,6 +330,11 @@ static void shmem_swp_set(struct shmem_i
- struct page *page = kmap_atomic_to_page(entry);
- set_page_private(page, page_private(page) + incdec);
- }
-+
-+ if (incdec == 1)
-+ ub_tmpfs_respages_dec(info);
-+ else
-+ ub_tmpfs_respages_inc(info);
- }
-
- /*
-@@ -342,14 +351,24 @@ static swp_entry_t *shmem_swp_alloc(stru
- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
- struct page *page = NULL;
- swp_entry_t *entry;
-+ unsigned long ub_val;
-
- if (sgp != SGP_WRITE &&
- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
- return ERR_PTR(-EINVAL);
-
-+ ub_val = 0;
-+ if (info->next_index <= index) {
-+ ub_val = index + 1 - info->next_index;
-+ if (ub_shmpages_charge(info, ub_val))
-+ return ERR_PTR(-ENOSPC);
-+ }
-+
- while (!(entry = shmem_swp_entry(info, index, &page))) {
-- if (sgp == SGP_READ)
-- return shmem_swp_map(ZERO_PAGE(0));
-+ if (sgp == SGP_READ) {
-+ entry = shmem_swp_map(ZERO_PAGE(0));
-+ goto out;
-+ }
- /*
- * Test free_blocks against 1 not 0, since we have 1 data
- * page (and perhaps indirect index pages) yet to allocate:
-@@ -359,7 +378,8 @@ static swp_entry_t *shmem_swp_alloc(stru
- spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks <= 1) {
- spin_unlock(&sbinfo->stat_lock);
-- return ERR_PTR(-ENOSPC);
-+ entry = ERR_PTR(-ENOSPC);
-+ goto out;
- }
- sbinfo->free_blocks--;
- inode->i_blocks += BLOCKS_PER_PAGE;
-@@ -367,31 +387,43 @@ static swp_entry_t *shmem_swp_alloc(stru
- }
-
- spin_unlock(&info->lock);
-- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
-+ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) |
-+ __GFP_ZERO | __GFP_UBC);
- if (page)
- set_page_private(page, 0);
- spin_lock(&info->lock);
-
- if (!page) {
-- shmem_free_blocks(inode, 1);
-- return ERR_PTR(-ENOMEM);
-+ entry = ERR_PTR(-ENOMEM);
-+ goto out_block;
- }
- if (sgp != SGP_WRITE &&
- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
- entry = ERR_PTR(-EINVAL);
-- break;
-+ goto out_dir;
- }
-- if (info->next_index <= index)
-+ if (info->next_index <= index) {
-+ ub_val = 0;
- info->next_index = index + 1;
-+ }
- }
- if (page) {
- /* another task gave its page, or truncated the file */
- shmem_free_blocks(inode, 1);
- shmem_dir_free(page);
- }
-- if (info->next_index <= index && !IS_ERR(entry))
-+ if (info->next_index <= index)
- info->next_index = index + 1;
- return entry;
-+
-+out_dir:
-+ shmem_dir_free(page);
-+out_block:
-+ shmem_free_blocks(inode, 1);
-+out:
-+ if (ub_val)
-+ ub_shmpages_uncharge(info, ub_val);
-+ return entry;
- }
-
- /*
-@@ -484,6 +516,7 @@ static void shmem_truncate_range(struct
- return;
-
- spin_lock(&info->lock);
-+ ub_shmpages_uncharge(info, info->next_index - idx);
- info->flags |= SHMEM_TRUNCATE;
- if (likely(end == (loff_t) -1)) {
- limit = info->next_index;
-@@ -613,7 +646,7 @@ done2:
- info->swapped -= nr_swaps_freed;
- if (nr_pages_to_free)
- shmem_free_blocks(inode, nr_pages_to_free);
-- shmem_recalc_inode(inode);
-+ shmem_recalc_inode(inode, nr_swaps_freed);
- spin_unlock(&info->lock);
-
- /*
-@@ -696,6 +729,7 @@ static void shmem_delete_inode(struct in
- sbinfo->free_inodes++;
- spin_unlock(&sbinfo->stat_lock);
- }
-+ shmi_ub_put(info);
- clear_inode(inode);
- }
-
-@@ -817,6 +851,12 @@ int shmem_unuse(swp_entry_t entry, struc
- return found;
- }
-
-+#ifdef CONFIG_USER_RESOURCE
-+#define shm_get_swap_page(info) (get_swap_page((info)->shmi_ub))
-+#else
-+#define shm_get_swap_page(info) (get_swap_page(NULL))
-+#endif
-+
- /*
- * Move the page from the page cache to the swap cache.
- */
-@@ -837,12 +877,12 @@ static int shmem_writepage(struct page *
- info = SHMEM_I(inode);
- if (info->flags & VM_LOCKED)
- goto redirty;
-- swap = get_swap_page();
-+ swap = shm_get_swap_page(info);
- if (!swap.val)
- goto redirty;
-
- spin_lock(&info->lock);
-- shmem_recalc_inode(inode);
-+ shmem_recalc_inode(inode, 0);
- if (index >= info->next_index) {
- BUG_ON(!(info->flags & SHMEM_TRUNCATE));
- goto unlock;
-@@ -1030,7 +1070,7 @@ repeat:
- goto failed;
-
- spin_lock(&info->lock);
-- shmem_recalc_inode(inode);
-+ shmem_recalc_inode(inode, 0);
- entry = shmem_swp_alloc(info, idx, sgp);
- if (IS_ERR(entry)) {
- spin_unlock(&info->lock);
-@@ -1206,6 +1246,7 @@ repeat:
- spin_unlock(&info->lock);
- flush_dcache_page(filepage);
- SetPageUptodate(filepage);
-+ ub_tmpfs_respages_inc(info);
- }
- done:
- if (*pagep != filepage) {
-@@ -1307,28 +1348,6 @@ shmem_get_policy(struct vm_area_struct *
- }
- #endif
-
--int shmem_lock(struct file *file, int lock, struct user_struct *user)
--{
-- struct inode *inode = file->f_dentry->d_inode;
-- struct shmem_inode_info *info = SHMEM_I(inode);
-- int retval = -ENOMEM;
--
-- spin_lock(&info->lock);
-- if (lock && !(info->flags & VM_LOCKED)) {
-- if (!user_shm_lock(inode->i_size, user))
-- goto out_nomem;
-- info->flags |= VM_LOCKED;
-- }
-- if (!lock && (info->flags & VM_LOCKED) && user) {
-- user_shm_unlock(inode->i_size, user);
-- info->flags &= ~VM_LOCKED;
-- }
-- retval = 0;
--out_nomem:
-- spin_unlock(&info->lock);
-- return retval;
--}
--
- int shmem_mmap(struct file *file, struct vm_area_struct *vma)
- {
- file_accessed(file);
-@@ -1365,6 +1384,7 @@ shmem_get_inode(struct super_block *sb,
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- info = SHMEM_I(inode);
- memset(info, 0, (char *)inode - (char *)info);
-+ shmi_ub_set(info, get_exec_ub());
- spin_lock_init(&info->lock);
- INIT_LIST_HEAD(&info->swaplist);
-
-@@ -2226,6 +2246,10 @@ static struct vm_operations_struct shmem
- #endif
- };
-
-+int is_shmem_mapping(struct address_space *map)
-+{
-+ return (map != NULL && map->a_ops == &shmem_aops);
-+}
-
- static struct super_block *shmem_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-@@ -2233,13 +2257,19 @@ static struct super_block *shmem_get_sb(
- return get_sb_nodev(fs_type, flags, data, shmem_fill_super);
- }
-
--static struct file_system_type tmpfs_fs_type = {
-+struct file_system_type tmpfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "tmpfs",
- .get_sb = shmem_get_sb,
- .kill_sb = kill_litter_super,
- };
-+EXPORT_SYMBOL(tmpfs_fs_type);
-+
-+#ifdef CONFIG_VE
-+#define shm_mnt (get_exec_env()->shmem_mnt)
-+#else
- static struct vfsmount *shm_mnt;
-+#endif
-
- static int __init init_tmpfs(void)
- {
-@@ -2276,6 +2306,36 @@ out3:
- }
- module_init(init_tmpfs)
-
-+static inline int shm_charge_ahead(struct inode *inode)
-+{
-+#ifdef CONFIG_USER_RESOURCE
-+ struct shmem_inode_info *info = SHMEM_I(inode);
-+ unsigned long idx;
-+ swp_entry_t *entry;
-+
-+ if (!inode->i_size)
-+ return 0;
-+ idx = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
-+ /*
-+ * Just touch info to allocate space for entry and
-+ * make all UBC checks
-+ */
-+ spin_lock(&info->lock);
-+ entry = shmem_swp_alloc(info, idx, SGP_CACHE);
-+ if (IS_ERR(entry))
-+ goto err;
-+ shmem_swp_unmap(entry);
-+ spin_unlock(&info->lock);
-+ return 0;
-+
-+err:
-+ spin_unlock(&info->lock);
-+ return PTR_ERR(entry);
-+#else
-+ return 0;
-+#endif
-+}
-+
- /*
- * shmem_file_setup - get an unlinked file living in tmpfs
- *
-@@ -2323,6 +2383,10 @@ struct file *shmem_file_setup(char *name
- d_instantiate(dentry, inode);
- inode->i_size = size;
- inode->i_nlink = 0; /* It is unlinked */
-+ error = shm_charge_ahead(inode);
-+ if (error)
-+ goto close_file;
-+
- file->f_vfsmnt = mntget(shm_mnt);
- file->f_dentry = dentry;
- file->f_mapping = inode->i_mapping;
-@@ -2338,6 +2402,7 @@ put_memory:
- shmem_unacct_size(flags, size);
- return ERR_PTR(error);
- }
-+EXPORT_SYMBOL_GPL(shmem_file_setup);
-
- /*
- * shmem_zero_setup - setup a shared anonymous mapping
-@@ -2355,6 +2420,8 @@ int shmem_zero_setup(struct vm_area_stru
-
- if (vma->vm_file)
- fput(vma->vm_file);
-+ else if (vma->vm_flags & VM_WRITE)
-+ __ub_unused_privvm_dec(vma->vm_mm, size >> PAGE_SHIFT);
- vma->vm_file = file;
- vma->vm_ops = &shmem_vm_ops;
- return 0;
-diff -upr linux-2.6.16.orig/mm/slab.c linux-2.6.16-026test009/mm/slab.c
---- linux-2.6.16.orig/mm/slab.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/slab.c 2006-04-19 15:02:12.000000000 +0400
-@@ -105,32 +105,19 @@
- #include <linux/nodemask.h>
- #include <linux/mempolicy.h>
- #include <linux/mutex.h>
-+#include <linux/kmem_slab.h>
-+#include <linux/kmem_cache.h>
-
- #include <asm/uaccess.h>
- #include <asm/cacheflush.h>
- #include <asm/tlbflush.h>
- #include <asm/page.h>
-
--/*
-- * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
-- * SLAB_RED_ZONE & SLAB_POISON.
-- * 0 for faster, smaller code (especially in the critical paths).
-- *
-- * STATS - 1 to collect stats for /proc/slabinfo.
-- * 0 for faster, smaller code (especially in the critical paths).
-- *
-- * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
-- */
-+#include <ub/ub_mem.h>
-
--#ifdef CONFIG_DEBUG_SLAB
--#define DEBUG 1
--#define STATS 1
--#define FORCED_DEBUG 1
--#else
--#define DEBUG 0
--#define STATS 0
--#define FORCED_DEBUG 0
--#endif
-+#define DEBUG SLAB_DEBUG
-+#define STATS SLAB_STATS
-+#define FORCED_DEBUG SLAB_FORCED_DEBUG
-
- /* Shouldn't this be in a header file somewhere? */
- #define BYTES_PER_WORD sizeof(void *)
-@@ -173,134 +160,20 @@
- SLAB_NO_REAP | SLAB_CACHE_DMA | \
- SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-- SLAB_DESTROY_BY_RCU)
-+ SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
- #else
- # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
- SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-- SLAB_DESTROY_BY_RCU)
-+ SLAB_DESTROY_BY_RCU | SLAB_UBC | SLAB_NO_CHARGE)
- #endif
-
--/*
-- * kmem_bufctl_t:
-- *
-- * Bufctl's are used for linking objs within a slab
-- * linked offsets.
-- *
-- * This implementation relies on "struct page" for locating the cache &
-- * slab an object belongs to.
-- * This allows the bufctl structure to be small (one int), but limits
-- * the number of objects a slab (not a cache) can contain when off-slab
-- * bufctls are used. The limit is the size of the largest general cache
-- * that does not use off-slab slabs.
-- * For 32bit archs with 4 kB pages, is this 56.
-- * This is not serious, as it is only for large objects, when it is unwise
-- * to have too many per slab.
-- * Note: This limit can be raised by introducing a general cache whose size
-- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
-- */
--
--typedef unsigned int kmem_bufctl_t;
--#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
--#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
--#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
--
- /* Max number of objs-per-slab for caches which use off-slab slabs.
- * Needed to avoid a possible looping condition in cache_grow().
- */
- static unsigned long offslab_limit;
-
- /*
-- * struct slab
-- *
-- * Manages the objs in a slab. Placed either at the beginning of mem allocated
-- * for a slab, or allocated from an general cache.
-- * Slabs are chained into three list: fully used, partial, fully free slabs.
-- */
--struct slab {
-- struct list_head list;
-- unsigned long colouroff;
-- void *s_mem; /* including colour offset */
-- unsigned int inuse; /* num of objs active in slab */
-- kmem_bufctl_t free;
-- unsigned short nodeid;
--};
--
--/*
-- * struct slab_rcu
-- *
-- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
-- * arrange for kmem_freepages to be called via RCU. This is useful if
-- * we need to approach a kernel structure obliquely, from its address
-- * obtained without the usual locking. We can lock the structure to
-- * stabilize it and check it's still at the given address, only if we
-- * can be sure that the memory has not been meanwhile reused for some
-- * other kind of object (which our subsystem's lock might corrupt).
-- *
-- * rcu_read_lock before reading the address, then rcu_read_unlock after
-- * taking the spinlock within the structure expected at that address.
-- *
-- * We assume struct slab_rcu can overlay struct slab when destroying.
-- */
--struct slab_rcu {
-- struct rcu_head head;
-- struct kmem_cache *cachep;
-- void *addr;
--};
--
--/*
-- * struct array_cache
-- *
-- * Purpose:
-- * - LIFO ordering, to hand out cache-warm objects from _alloc
-- * - reduce the number of linked list operations
-- * - reduce spinlock operations
-- *
-- * The limit is stored in the per-cpu structure to reduce the data cache
-- * footprint.
-- *
-- */
--struct array_cache {
-- unsigned int avail;
-- unsigned int limit;
-- unsigned int batchcount;
-- unsigned int touched;
-- spinlock_t lock;
-- void *entry[0]; /*
-- * Must have this definition in here for the proper
-- * alignment of array_cache. Also simplifies accessing
-- * the entries.
-- * [0] is for gcc 2.95. It should really be [].
-- */
--};
--
--/* bootstrap: The caches do not work without cpuarrays anymore,
-- * but the cpuarrays are allocated from the generic caches...
-- */
--#define BOOT_CPUCACHE_ENTRIES 1
--struct arraycache_init {
-- struct array_cache cache;
-- void *entries[BOOT_CPUCACHE_ENTRIES];
--};
--
--/*
-- * The slab lists for all objects.
-- */
--struct kmem_list3 {
-- struct list_head slabs_partial; /* partial list first, better asm code */
-- struct list_head slabs_full;
-- struct list_head slabs_free;
-- unsigned long free_objects;
-- unsigned long next_reap;
-- int free_touched;
-- unsigned int free_limit;
-- unsigned int colour_next; /* Per-node cache coloring */
-- spinlock_t list_lock;
-- struct array_cache *shared; /* shared per node */
-- struct array_cache **alien; /* on other nodes */
--};
--
--/*
- * Need this for bootstrapping a per node allocator.
- */
- #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
-@@ -364,79 +237,6 @@ static void kmem_list3_init(struct kmem_
- MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
- } while (0)
-
--/*
-- * struct kmem_cache
-- *
-- * manages a cache.
-- */
--
--struct kmem_cache {
--/* 1) per-cpu data, touched during every alloc/free */
-- struct array_cache *array[NR_CPUS];
-- unsigned int batchcount;
-- unsigned int limit;
-- unsigned int shared;
-- unsigned int buffer_size;
--/* 2) touched by every alloc & free from the backend */
-- struct kmem_list3 *nodelists[MAX_NUMNODES];
-- unsigned int flags; /* constant flags */
-- unsigned int num; /* # of objs per slab */
-- spinlock_t spinlock;
--
--/* 3) cache_grow/shrink */
-- /* order of pgs per slab (2^n) */
-- unsigned int gfporder;
--
-- /* force GFP flags, e.g. GFP_DMA */
-- gfp_t gfpflags;
--
-- size_t colour; /* cache colouring range */
-- unsigned int colour_off; /* colour offset */
-- struct kmem_cache *slabp_cache;
-- unsigned int slab_size;
-- unsigned int dflags; /* dynamic flags */
--
-- /* constructor func */
-- void (*ctor) (void *, struct kmem_cache *, unsigned long);
--
-- /* de-constructor func */
-- void (*dtor) (void *, struct kmem_cache *, unsigned long);
--
--/* 4) cache creation/removal */
-- const char *name;
-- struct list_head next;
--
--/* 5) statistics */
--#if STATS
-- unsigned long num_active;
-- unsigned long num_allocations;
-- unsigned long high_mark;
-- unsigned long grown;
-- unsigned long reaped;
-- unsigned long errors;
-- unsigned long max_freeable;
-- unsigned long node_allocs;
-- unsigned long node_frees;
-- atomic_t allochit;
-- atomic_t allocmiss;
-- atomic_t freehit;
-- atomic_t freemiss;
--#endif
--#if DEBUG
-- /*
-- * If debugging is enabled, then the allocator can add additional
-- * fields and/or padding to every object. buffer_size contains the total
-- * object size including these internal fields, the following two
-- * variables contain the offset to the user object and its size.
-- */
-- int obj_offset;
-- int obj_size;
--#endif
--};
--
--#define CFLGS_OFF_SLAB (0x80000000UL)
--#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
--
- #define BATCHREFILL_LIMIT 16
- /* Optimization question: fewer reaps means less
- * probability for unnessary cpucache drain/refill cycles.
-@@ -573,42 +373,6 @@ static void **dbg_userword(struct kmem_c
- #define BREAK_GFP_ORDER_LO 0
- static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
-
--/* Functions for storing/retrieving the cachep and or slab from the
-- * global 'mem_map'. These are used to find the slab an obj belongs to.
-- * With kfree(), these are used to find the cache which an obj belongs to.
-- */
--static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
--{
-- page->lru.next = (struct list_head *)cache;
--}
--
--static inline struct kmem_cache *page_get_cache(struct page *page)
--{
-- return (struct kmem_cache *)page->lru.next;
--}
--
--static inline void page_set_slab(struct page *page, struct slab *slab)
--{
-- page->lru.prev = (struct list_head *)slab;
--}
--
--static inline struct slab *page_get_slab(struct page *page)
--{
-- return (struct slab *)page->lru.prev;
--}
--
--static inline struct kmem_cache *virt_to_cache(const void *obj)
--{
-- struct page *page = virt_to_page(obj);
-- return page_get_cache(page);
--}
--
--static inline struct slab *virt_to_slab(const void *obj)
--{
-- struct page *page = virt_to_page(obj);
-- return page_get_slab(page);
--}
--
- /* These are the default caches for kmalloc. Custom caches can have other sizes. */
- struct cache_sizes malloc_sizes[] = {
- #define CACHE(x) { .cs_size = (x) },
-@@ -715,9 +479,17 @@ struct kmem_cache *kmem_find_general_cac
- }
- EXPORT_SYMBOL(kmem_find_general_cachep);
-
--static size_t slab_mgmt_size(size_t nr_objs, size_t align)
-+static size_t slab_mgmt_size_noalign(size_t nr_objs, int flags)
- {
-- return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
-+ size_t size_noub;
-+
-+ size_noub = sizeof(struct slab) + nr_objs * sizeof(kmem_bufctl_t);
-+ return ALIGN(size_noub, UB_ALIGN(flags)) + nr_objs * UB_EXTRA(flags);
-+}
-+
-+static size_t slab_mgmt_size(size_t nr_objs, size_t align, int flags)
-+{
-+ return ALIGN(slab_mgmt_size_noalign(nr_objs, flags), align);
- }
-
- /* Calculate the number of objects and left-over bytes for a given
-@@ -761,20 +533,23 @@ static void cache_estimate(unsigned long
- * into account.
- */
- nr_objs = (slab_size - sizeof(struct slab)) /
-- (buffer_size + sizeof(kmem_bufctl_t));
-+ (buffer_size + sizeof(kmem_bufctl_t) +
-+ UB_EXTRA(flags));
-
- /*
- * This calculated number will be either the right
- * amount, or one greater than what we want.
- */
-- if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
-- > slab_size)
-+ if (slab_mgmt_size(nr_objs, align, flags) +
-+ nr_objs * buffer_size > slab_size)
- nr_objs--;
-+ BUG_ON(slab_mgmt_size(nr_objs, align, flags) +
-+ nr_objs * buffer_size > slab_size);
-
- if (nr_objs > SLAB_LIMIT)
- nr_objs = SLAB_LIMIT;
-
-- mgmt_size = slab_mgmt_size(nr_objs, align);
-+ mgmt_size = slab_mgmt_size(nr_objs, align, flags);
- }
- *num = nr_objs;
- *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
-@@ -1254,6 +1029,7 @@ void __init kmem_cache_init(void)
- sizes[INDEX_AC].cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS |
-+ SLAB_UBC|SLAB_NO_CHARGE |
- SLAB_PANIC), NULL, NULL);
-
- if (INDEX_AC != INDEX_L3)
-@@ -1261,8 +1037,9 @@ void __init kmem_cache_init(void)
- kmem_cache_create(names[INDEX_L3].name,
- sizes[INDEX_L3].cs_size,
- ARCH_KMALLOC_MINALIGN,
-- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
-- NULL);
-+ (ARCH_KMALLOC_FLAGS |
-+ SLAB_UBC | SLAB_NO_CHARGE |
-+ SLAB_PANIC), NULL, NULL);
-
- while (sizes->cs_size != ULONG_MAX) {
- /*
-@@ -1277,14 +1054,14 @@ void __init kmem_cache_init(void)
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS
-+ | SLAB_UBC
-+ | SLAB_NO_CHARGE
- | SLAB_PANIC),
- NULL, NULL);
-
- /* Inc off-slab bufctl limit until the ceiling is hit. */
-- if (!(OFF_SLAB(sizes->cs_cachep))) {
-- offslab_limit = sizes->cs_size - sizeof(struct slab);
-- offslab_limit /= sizeof(kmem_bufctl_t);
-- }
-+ if (!(OFF_SLAB(sizes->cs_cachep)))
-+ offslab_limit = sizes->cs_size;
-
- sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
- sizes->cs_size,
-@@ -1704,8 +1481,13 @@ static inline size_t calculate_slab_orde
- continue;
-
- /* More than offslab_limit objects will cause problems */
-- if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
-- break;
-+ if (flags & CFLGS_OFF_SLAB) {
-+ unsigned long slab_size;
-+
-+ slab_size = slab_mgmt_size_noalign(num, flags);
-+ if (slab_size > offslab_limit)
-+ break;
-+ }
-
- /* Found something acceptable - save it away */
- cachep->num = num;
-@@ -1950,8 +1732,7 @@ kmem_cache_create (const char *name, siz
- cachep = NULL;
- goto oops;
- }
-- slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-- + sizeof(struct slab), align);
-+ slab_size = slab_mgmt_size(cachep->num, align, flags);
-
- /*
- * If the slab has been placed off-slab, and we have enough space then
-@@ -1964,8 +1745,7 @@ kmem_cache_create (const char *name, siz
-
- if (flags & CFLGS_OFF_SLAB) {
- /* really off slab. No need for manual alignment */
-- slab_size =
-- cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
-+ slab_size = slab_mgmt_size_noalign(cachep->num, flags);
- }
-
- cachep->colour_off = cache_line_size();
-@@ -2045,6 +1825,7 @@ kmem_cache_create (const char *name, siz
-
- /* cache setup completed, link it into the list */
- list_add(&cachep->next, &cache_chain);
-+ set_cache_objuse(cachep);
- oops:
- if (!cachep && (flags & SLAB_PANIC))
- panic("kmem_cache_create(): failed to create slab `%s'\n",
-@@ -2282,7 +2063,8 @@ static struct slab *alloc_slabmgmt(struc
-
- if (OFF_SLAB(cachep)) {
- /* Slab management obj is off-slab. */
-- slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
-+ slabp = kmem_cache_alloc(cachep->slabp_cache,
-+ local_flags & (~__GFP_UBC));
- if (!slabp)
- return NULL;
- } else {
-@@ -2292,15 +2074,11 @@ static struct slab *alloc_slabmgmt(struc
- slabp->inuse = 0;
- slabp->colouroff = colour_off;
- slabp->s_mem = objp + colour_off;
-+ init_slab_ubps(cachep, slabp);
-
- return slabp;
- }
-
--static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
--{
-- return (kmem_bufctl_t *) (slabp + 1);
--}
--
- static void cache_init_objs(struct kmem_cache *cachep,
- struct slab *slabp, unsigned long ctor_flags)
- {
-@@ -2470,7 +2248,7 @@ static int cache_grow(struct kmem_cache
- /* Get mem for the objs.
- * Attempt to allocate a physical page from 'nodeid',
- */
-- if (!(objp = kmem_getpages(cachep, flags, nodeid)))
-+ if (!(objp = kmem_getpages(cachep, flags & (~__GFP_UBC), nodeid)))
- goto failed;
-
- /* Get slab management. */
-@@ -2823,6 +2601,11 @@ __cache_alloc(struct kmem_cache *cachep,
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
-+
-+ if (objp && ub_slab_charge(objp, flags)) {
-+ kmem_cache_free(cachep, objp);
-+ objp = NULL;
-+ }
- return objp;
- }
-
-@@ -2997,6 +2780,8 @@ static inline void __cache_free(struct k
- check_irq_off();
- objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
-
-+ ub_slab_uncharge(objp);
-+
- /* Make sure we are not freeing a object from another
- * node to the array cache on this cpu.
- */
-@@ -3128,6 +2913,10 @@ void *kmem_cache_alloc_node(struct kmem_
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
- __builtin_return_address(0));
-
-+ if (ptr && ub_slab_charge(ptr, flags)) {
-+ kmem_cache_free(cachep, ptr);
-+ ptr = NULL;
-+ }
- return ptr;
- }
- EXPORT_SYMBOL(kmem_cache_alloc_node);
-@@ -3543,6 +3332,7 @@ static void cache_reap(void *unused)
- return;
- }
-
-+ {KSTAT_PERF_ENTER(cache_reap)
- list_for_each(walk, &cache_chain) {
- struct kmem_cache *searchp;
- struct list_head *p;
-@@ -3608,6 +3398,7 @@ static void cache_reap(void *unused)
- check_irq_on();
- mutex_unlock(&cache_chain_mutex);
- next_reap_node();
-+ KSTAT_PERF_LEAVE(cache_reap)}
- /* Setup the next iteration */
- schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
- }
-diff -upr linux-2.6.16.orig/mm/swap_state.c linux-2.6.16-026test009/mm/swap_state.c
---- linux-2.6.16.orig/mm/swap_state.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/swap_state.c 2006-04-19 15:02:12.000000000 +0400
-@@ -18,6 +18,8 @@
-
- #include <asm/pgtable.h>
-
-+#include <ub/ub_vmpages.h>
-+
- /*
- * swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_list, to make sync_page look nicer, and to allow
-@@ -53,6 +55,7 @@ static struct {
- unsigned long noent_race;
- unsigned long exist_race;
- } swap_cache_info;
-+EXPORT_SYMBOL(swap_cache_info);
-
- void show_swap_cache_info(void)
- {
-@@ -151,7 +154,14 @@ int add_to_swap(struct page * page, gfp_
- BUG();
-
- for (;;) {
-- entry = get_swap_page();
-+ struct user_beancounter *ub;
-+
-+ ub = pb_grab_page_ub(page);
-+ if (IS_ERR(ub))
-+ return 0;
-+
-+ entry = get_swap_page(ub);
-+ put_beancounter(ub);
- if (!entry.val)
- return 0;
-
-diff -upr linux-2.6.16.orig/mm/swapfile.c linux-2.6.16-026test009/mm/swapfile.c
---- linux-2.6.16.orig/mm/swapfile.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/swapfile.c 2006-04-19 15:02:12.000000000 +0400
-@@ -33,6 +33,8 @@
- #include <asm/tlbflush.h>
- #include <linux/swapops.h>
-
-+#include <ub/ub_vmpages.h>
-+
- DEFINE_SPINLOCK(swap_lock);
- unsigned int nr_swapfiles;
- long total_swap_pages;
-@@ -172,7 +174,7 @@ no_page:
- return 0;
- }
-
--swp_entry_t get_swap_page(void)
-+swp_entry_t get_swap_page(struct user_beancounter *ub)
- {
- struct swap_info_struct *si;
- pgoff_t offset;
-@@ -202,6 +204,7 @@ swp_entry_t get_swap_page(void)
- offset = scan_swap_map(si);
- if (offset) {
- spin_unlock(&swap_lock);
-+ ub_swapentry_inc(si, offset, ub);
- return swp_entry(type, offset);
- }
- next = swap_list.next;
-@@ -277,6 +280,7 @@ static int swap_entry_free(struct swap_i
- count--;
- p->swap_map[offset] = count;
- if (!count) {
-+ ub_swapentry_dec(p, offset);
- if (offset < p->lowest_bit)
- p->lowest_bit = offset;
- if (offset > p->highest_bit)
-@@ -423,11 +427,18 @@ void free_swap_and_cache(swp_entry_t ent
- * force COW, vm_page_prot omits write permission from any private vma.
- */
- static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
-- unsigned long addr, swp_entry_t entry, struct page *page)
-+ unsigned long addr, swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
-- inc_mm_counter(vma->vm_mm, anon_rss);
-+ struct mm_struct *mm;
-+
-+ mm = vma->vm_mm;
-+ inc_mm_counter(mm, anon_rss);
-+ inc_vma_rss(vma);
-+ ub_unused_privvm_dec(mm, vma);
-+ pb_add_ref(page, mm, pb);
- get_page(page);
-- set_pte_at(vma->vm_mm, addr, pte,
-+ set_pte_at(mm, addr, pte,
- pte_mkold(mk_pte(page, vma->vm_page_prot)));
- page_add_anon_rmap(page, vma, addr);
- swap_free(entry);
-@@ -440,7 +451,8 @@ static void unuse_pte(struct vm_area_str
-
- static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, unsigned long end,
-- swp_entry_t entry, struct page *page)
-+ swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
- pte_t swp_pte = swp_entry_to_pte(entry);
- pte_t *pte;
-@@ -454,7 +466,7 @@ static int unuse_pte_range(struct vm_are
- * Test inline before going to call unuse_pte.
- */
- if (unlikely(pte_same(*pte, swp_pte))) {
-- unuse_pte(vma, pte++, addr, entry, page);
-+ unuse_pte(vma, pte++, addr, entry, page, pb);
- found = 1;
- break;
- }
-@@ -465,7 +477,8 @@ static int unuse_pte_range(struct vm_are
-
- static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
- unsigned long addr, unsigned long end,
-- swp_entry_t entry, struct page *page)
-+ swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
- pmd_t *pmd;
- unsigned long next;
-@@ -475,7 +488,7 @@ static inline int unuse_pmd_range(struct
- next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
- continue;
-- if (unuse_pte_range(vma, pmd, addr, next, entry, page))
-+ if (unuse_pte_range(vma, pmd, addr, next, entry, page, pb))
- return 1;
- } while (pmd++, addr = next, addr != end);
- return 0;
-@@ -483,7 +496,8 @@ static inline int unuse_pmd_range(struct
-
- static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
- unsigned long addr, unsigned long end,
-- swp_entry_t entry, struct page *page)
-+ swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
- pud_t *pud;
- unsigned long next;
-@@ -493,14 +507,15 @@ static inline int unuse_pud_range(struct
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
-- if (unuse_pmd_range(vma, pud, addr, next, entry, page))
-+ if (unuse_pmd_range(vma, pud, addr, next, entry, page, pb))
- return 1;
- } while (pud++, addr = next, addr != end);
- return 0;
- }
-
- static int unuse_vma(struct vm_area_struct *vma,
-- swp_entry_t entry, struct page *page)
-+ swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
- pgd_t *pgd;
- unsigned long addr, end, next;
-@@ -521,14 +536,15 @@ static int unuse_vma(struct vm_area_stru
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
-- if (unuse_pud_range(vma, pgd, addr, next, entry, page))
-+ if (unuse_pud_range(vma, pgd, addr, next, entry, page, pb))
- return 1;
- } while (pgd++, addr = next, addr != end);
- return 0;
- }
-
- static int unuse_mm(struct mm_struct *mm,
-- swp_entry_t entry, struct page *page)
-+ swp_entry_t entry, struct page *page,
-+ struct page_beancounter **pb)
- {
- struct vm_area_struct *vma;
-
-@@ -543,7 +559,7 @@ static int unuse_mm(struct mm_struct *mm
- lock_page(page);
- }
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
-- if (vma->anon_vma && unuse_vma(vma, entry, page))
-+ if (vma->anon_vma && unuse_vma(vma, entry, page, pb))
- break;
- }
- up_read(&mm->mmap_sem);
-@@ -555,11 +571,12 @@ static int unuse_mm(struct mm_struct *mm
- }
-
- #ifdef CONFIG_MIGRATION
--int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
-+int remove_vma_swap(struct vm_area_struct *vma, struct page *page,
-+ struct page_beancounter **pb)
- {
- swp_entry_t entry = { .val = page_private(page) };
-
-- return unuse_vma(vma, entry, page);
-+ return unuse_vma(vma, entry, page, pb);
- }
- #endif
-
-@@ -618,6 +635,7 @@ static int try_to_unuse(unsigned int typ
- int retval = 0;
- int reset_overflow = 0;
- int shmem;
-+ struct page_beancounter *pb;
-
- /*
- * When searching mms for an entry, a good strategy is to
-@@ -670,6 +688,13 @@ again:
- break;
- }
-
-+ pb = NULL;
-+ if (pb_alloc_all(&pb)) {
-+ page_cache_release(page);
-+ retval = -ENOMEM;
-+ break;
-+ }
-+
- /*
- * Don't hold on to start_mm if it looks like exiting.
- */
-@@ -698,6 +723,20 @@ again:
- }
- wait_on_page_writeback(page);
-
-+ /* If read failed we cannot map not-uptodate page to
-+ * user space. Actually, we are in serious troubles,
-+ * we do not even know what process to kill. So, the only
-+ * variant remains: to stop swapoff() and allow someone
-+ * to kill processes to zap invalid pages.
-+ */
-+ if (unlikely(!PageUptodate(page))) {
-+ pb_free_list(&pb);
-+ unlock_page(page);
-+ page_cache_release(page);
-+ retval = -EIO;
-+ break;
-+ }
-+
- /*
- * Remove all references to entry.
- * Whenever we reach init_mm, there's no address space
-@@ -709,7 +748,7 @@ again:
- if (start_mm == &init_mm)
- shmem = shmem_unuse(entry, page);
- else
-- retval = unuse_mm(start_mm, entry, page);
-+ retval = unuse_mm(start_mm, entry, page, &pb);
- }
- if (*swap_map > 1) {
- int set_start_mm = (*swap_map >= swcount);
-@@ -741,7 +780,7 @@ again:
- set_start_mm = 1;
- shmem = shmem_unuse(entry, page);
- } else
-- retval = unuse_mm(mm, entry, page);
-+ retval = unuse_mm(mm, entry, page, &pb);
- if (set_start_mm && *swap_map < swcount) {
- mmput(new_start_mm);
- atomic_inc(&mm->mm_users);
-@@ -755,6 +794,8 @@ again:
- mmput(start_mm);
- start_mm = new_start_mm;
- }
-+
-+ pb_free_list(&pb);
- if (retval) {
- unlock_page(page);
- page_cache_release(page);
-@@ -1100,6 +1141,10 @@ asmlinkage long sys_swapoff(const char _
- int i, type, prev;
- int err;
-
-+ /* VE admin check is just to be on the safe side, the admin may affect
-+ * swaps only if he has access to special, i.e. if he has been granted
-+ * access to the block device or if the swap file is in the area
-+ * visible to him. */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
-@@ -1199,6 +1244,7 @@ asmlinkage long sys_swapoff(const char _
- spin_unlock(&swap_lock);
- mutex_unlock(&swapon_mutex);
- vfree(swap_map);
-+ ub_swap_fini(p);
- inode = mapping->host;
- if (S_ISBLK(inode->i_mode)) {
- struct block_device *bdev = I_BDEV(inode);
-@@ -1557,6 +1603,11 @@ asmlinkage long sys_swapon(const char __
- goto bad_swap;
- }
-
-+ if (ub_swap_init(p, maxpages)) {
-+ error = -ENOMEM;
-+ goto bad_swap;
-+ }
-+
- mutex_lock(&swapon_mutex);
- spin_lock(&swap_lock);
- p->flags = SWP_ACTIVE;
-diff -upr linux-2.6.16.orig/mm/vmalloc.c linux-2.6.16-026test009/mm/vmalloc.c
---- linux-2.6.16.orig/mm/vmalloc.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/vmalloc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,6 +20,8 @@
- #include <asm/uaccess.h>
- #include <asm/tlbflush.h>
-
-+#include <ub/ub_debug.h>
-+
-
- DEFINE_RWLOCK(vmlist_lock);
- struct vm_struct *vmlist;
-@@ -256,6 +258,68 @@ struct vm_struct *get_vm_area_node(unsig
- return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
- }
-
-+struct vm_struct * get_vm_area_best(unsigned long size, unsigned long flags)
-+{
-+ unsigned long addr, best_addr, delta, best_delta;
-+ struct vm_struct **p, **best_p, *tmp, *area;
-+
-+ area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
-+ if (!area)
-+ return NULL;
-+
-+ size += PAGE_SIZE; /* one-page gap at the end */
-+ addr = VMALLOC_START;
-+ best_addr = 0UL;
-+ best_p = NULL;
-+ best_delta = PAGE_ALIGN(VMALLOC_END) - VMALLOC_START;
-+
-+ write_lock(&vmlist_lock);
-+ for (p = &vmlist; (tmp = *p) &&
-+ (tmp->addr <= (void *)PAGE_ALIGN(VMALLOC_END));
-+ p = &tmp->next) {
-+ if ((size + addr) < addr)
-+ break;
-+ delta = (unsigned long) tmp->addr - (size + addr);
-+ if (delta < best_delta) {
-+ best_delta = delta;
-+ best_addr = addr;
-+ best_p = p;
-+ }
-+ addr = tmp->size + (unsigned long) tmp->addr;
-+ if (addr > VMALLOC_END-size)
-+ break;
-+ }
-+
-+ if (!tmp || (tmp->addr > (void *)PAGE_ALIGN(VMALLOC_END))) {
-+ /* check free area after list end */
-+ delta = (unsigned long) PAGE_ALIGN(VMALLOC_END) - (size + addr);
-+ if (delta < best_delta) {
-+ best_delta = delta;
-+ best_addr = addr;
-+ best_p = p;
-+ }
-+ }
-+ if (best_addr) {
-+ area->flags = flags;
-+ /* allocate at the end of this area */
-+ area->addr = (void *)(best_addr + best_delta);
-+ area->size = size;
-+ area->next = *best_p;
-+ area->pages = NULL;
-+ area->nr_pages = 0;
-+ area->phys_addr = 0;
-+ *best_p = area;
-+ /* check like in __vunmap */
-+ WARN_ON((PAGE_SIZE - 1) & (unsigned long)area->addr);
-+ } else {
-+ kfree(area);
-+ area = NULL;
-+ }
-+ write_unlock(&vmlist_lock);
-+
-+ return area;
-+}
-+
- /* Caller must hold vmlist_lock */
- struct vm_struct *__remove_vm_area(void *addr)
- {
-@@ -296,7 +360,7 @@ struct vm_struct *remove_vm_area(void *a
- return v;
- }
-
--void __vunmap(void *addr, int deallocate_pages)
-+void __vunmap(void *addr, int deallocate_pages, int uncharge)
- {
- struct vm_struct *area;
-
-@@ -320,6 +384,8 @@ void __vunmap(void *addr, int deallocate
- if (deallocate_pages) {
- int i;
-
-+ if (uncharge)
-+ dec_vmalloc_charged(area);
- for (i = 0; i < area->nr_pages; i++) {
- if (unlikely(!area->pages[i]))
- BUG();
-@@ -350,7 +416,7 @@ void __vunmap(void *addr, int deallocate
- void vfree(void *addr)
- {
- BUG_ON(in_interrupt());
-- __vunmap(addr, 1);
-+ __vunmap(addr, 1, 1);
- }
- EXPORT_SYMBOL(vfree);
-
-@@ -367,7 +433,7 @@ EXPORT_SYMBOL(vfree);
- void vunmap(void *addr)
- {
- BUG_ON(in_interrupt());
-- __vunmap(addr, 0);
-+ __vunmap(addr, 0, 0);
- }
- EXPORT_SYMBOL(vunmap);
-
-@@ -439,10 +505,12 @@ void *__vmalloc_area_node(struct vm_stru
-
- if (map_vm_area(area, prot, &pages))
- goto fail;
-+
-+ inc_vmalloc_charged(area, gfp_mask);
- return area->addr;
-
- fail:
-- vfree(area->addr);
-+ __vunmap(area->addr, 1, 0);
- return NULL;
- }
-
-@@ -486,6 +554,21 @@ void *__vmalloc(unsigned long size, gfp_
- }
- EXPORT_SYMBOL(__vmalloc);
-
-+static void *____vmalloc(unsigned long size, gfp_t mask, pgprot_t prot)
-+{
-+ struct vm_struct *area;
-+
-+ size = PAGE_ALIGN(size);
-+ if (!size || (size >> PAGE_SHIFT) > num_physpages)
-+ return NULL;
-+
-+ area = get_vm_area_best(size, VM_ALLOC);
-+ if (!area)
-+ return NULL;
-+
-+ return __vmalloc_area_node(area, mask, prot, -1);
-+}
-+
- /**
- * vmalloc - allocate virtually contiguous memory
- *
-@@ -503,6 +586,20 @@ void *vmalloc(unsigned long size)
- }
- EXPORT_SYMBOL(vmalloc);
-
-+void *vmalloc_best(unsigned long size)
-+{
-+ return ____vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
-+}
-+
-+EXPORT_SYMBOL(vmalloc_best);
-+
-+void *ub_vmalloc_best(unsigned long size)
-+{
-+ return ____vmalloc(size, GFP_KERNEL_UBC | __GFP_HIGHMEM, PAGE_KERNEL);
-+}
-+
-+EXPORT_SYMBOL(ub_vmalloc_best);
-+
- /**
- * vmalloc_node - allocate memory on a specific node
- *
-@@ -631,3 +728,37 @@ finished:
- read_unlock(&vmlist_lock);
- return buf - buf_start;
- }
-+
-+void vprintstat(void)
-+{
-+ struct vm_struct *p, *last_p = NULL;
-+ unsigned long addr, size, free_size, max_free_size;
-+ int num;
-+
-+ addr = VMALLOC_START;
-+ size = max_free_size = 0;
-+ num = 0;
-+
-+ read_lock(&vmlist_lock);
-+ for (p = vmlist; p; p = p->next) {
-+ free_size = (unsigned long)p->addr - addr;
-+ if (free_size > max_free_size)
-+ max_free_size = free_size;
-+ addr = (unsigned long)p->addr + p->size;
-+ size += p->size;
-+ ++num;
-+ last_p = p;
-+ }
-+ if (last_p) {
-+ free_size = VMALLOC_END -
-+ ((unsigned long)last_p->addr + last_p->size);
-+ if (free_size > max_free_size)
-+ max_free_size = free_size;
-+ }
-+ read_unlock(&vmlist_lock);
-+
-+ printk("VMALLOC Used: %luKB Total: %luKB Entries: %d\n"
-+ " Max_Free: %luKB Start: %lx End: %lx\n",
-+ size/1024, (VMALLOC_END - VMALLOC_START)/1024, num,
-+ max_free_size/1024, VMALLOC_START, VMALLOC_END);
-+}
-diff -upr linux-2.6.16.orig/mm/vmscan.c linux-2.6.16-026test009/mm/vmscan.c
---- linux-2.6.16.orig/mm/vmscan.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/mm/vmscan.c 2006-04-19 15:02:12.000000000 +0400
-@@ -1243,6 +1243,7 @@ refill_inactive_zone(struct zone *zone,
- reclaim_mapped = 1;
- }
-
-+ KSTAT_PERF_ENTER(refill_inact)
- lru_add_drain();
- spin_lock_irq(&zone->lru_lock);
- pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
-@@ -1322,6 +1323,7 @@ refill_inactive_zone(struct zone *zone,
- local_irq_enable();
-
- pagevec_release(&pvec);
-+ KSTAT_PERF_LEAVE(refill_inact);
- }
-
- /*
-@@ -1438,6 +1440,7 @@ int try_to_free_pages(struct zone **zone
- unsigned long lru_pages = 0;
- int i;
-
-+ KSTAT_PERF_ENTER(ttfp);
- sc.gfp_mask = gfp_mask;
- sc.may_writepage = !laptop_mode;
- sc.may_swap = 1;
-@@ -1500,6 +1503,7 @@ out:
-
- zone->prev_priority = zone->temp_priority;
- }
-+ KSTAT_PERF_LEAVE(ttfp);
- return ret;
- }
-
-@@ -1832,7 +1836,8 @@ static int __init kswapd_init(void)
- swap_setup();
- for_each_pgdat(pgdat)
- pgdat->kswapd
-- = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
-+ = find_task_by_pid_all(kernel_thread(kswapd,
-+ pgdat, CLONE_KERNEL));
- total_memory = nr_free_pagecache_pages();
- hotcpu_notifier(cpu_callback, 0);
- return 0;
-diff -upr linux-2.6.16.orig/net/atm/clip.c linux-2.6.16-026test009/net/atm/clip.c
---- linux-2.6.16.orig/net/atm/clip.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/atm/clip.c 2006-04-19 15:02:11.000000000 +0400
-@@ -613,12 +613,19 @@ static int clip_create(int number)
-
-
- static int clip_device_event(struct notifier_block *this,unsigned long event,
-- void *dev)
-+ void *arg)
- {
-+ struct net_device *dev = arg;
-+
-+ if (event == NETDEV_UNREGISTER) {
-+ neigh_ifdown(&clip_tbl, dev);
-+ return NOTIFY_DONE;
-+ }
-+
- /* ignore non-CLIP devices */
-- if (((struct net_device *) dev)->type != ARPHRD_ATM ||
-- ((struct net_device *) dev)->hard_start_xmit != clip_start_xmit)
-+ if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit)
- return NOTIFY_DONE;
-+
- switch (event) {
- case NETDEV_UP:
- DPRINTK("clip_device_event NETDEV_UP\n");
-@@ -686,14 +693,12 @@ static struct notifier_block clip_inet_n
- static void atmarpd_close(struct atm_vcc *vcc)
- {
- DPRINTK("atmarpd_close\n");
-- atmarpd = NULL; /* assumed to be atomic */
-- barrier();
-- unregister_inetaddr_notifier(&clip_inet_notifier);
-- unregister_netdevice_notifier(&clip_dev_notifier);
-- if (skb_peek(&sk_atm(vcc)->sk_receive_queue))
-- printk(KERN_ERR "atmarpd_close: closing with requests "
-- "pending\n");
-+
-+ rtnl_lock();
-+ atmarpd = NULL;
- skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
-+ rtnl_unlock();
-+
- DPRINTK("(done)\n");
- module_put(THIS_MODULE);
- }
-@@ -714,7 +719,12 @@ static struct atm_dev atmarpd_dev = {
-
- static int atm_init_atmarp(struct atm_vcc *vcc)
- {
-- if (atmarpd) return -EADDRINUSE;
-+ rtnl_lock();
-+ if (atmarpd) {
-+ rtnl_unlock();
-+ return -EADDRINUSE;
-+ }
-+
- if (start_timer) {
- start_timer = 0;
- init_timer(&idle_timer);
-@@ -731,10 +741,7 @@ static int atm_init_atmarp(struct atm_vc
- vcc->push = NULL;
- vcc->pop = NULL; /* crash */
- vcc->push_oam = NULL; /* crash */
-- if (register_netdevice_notifier(&clip_dev_notifier))
-- printk(KERN_ERR "register_netdevice_notifier failed\n");
-- if (register_inetaddr_notifier(&clip_inet_notifier))
-- printk(KERN_ERR "register_inetaddr_notifier failed\n");
-+ rtnl_unlock();
- return 0;
- }
-
-@@ -992,6 +999,8 @@ static int __init atm_clip_init(void)
-
- clip_tbl_hook = &clip_tbl;
- register_atm_ioctl(&clip_ioctl_ops);
-+ register_netdevice_notifier(&clip_dev_notifier);
-+ register_inetaddr_notifier(&clip_inet_notifier);
-
- #ifdef CONFIG_PROC_FS
- {
-@@ -1012,6 +1021,9 @@ static void __exit atm_clip_exit(void)
-
- remove_proc_entry("arp", atm_proc_root);
-
-+ unregister_inetaddr_notifier(&clip_inet_notifier);
-+ unregister_netdevice_notifier(&clip_dev_notifier);
-+
- deregister_atm_ioctl(&clip_ioctl_ops);
-
- /* First, stop the idle timer, so it stops banging
-diff -upr linux-2.6.16.orig/net/bridge/br_netfilter.c linux-2.6.16-026test009/net/bridge/br_netfilter.c
---- linux-2.6.16.orig/net/bridge/br_netfilter.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/bridge/br_netfilter.c 2006-04-19 15:02:11.000000000 +0400
-@@ -739,6 +739,15 @@ out:
- return NF_STOLEN;
- }
-
-+static int br_nf_dev_queue_xmit(struct sk_buff *skb)
-+{
-+ if (skb->protocol == htons(ETH_P_IP) &&
-+ skb->len > skb->dev->mtu &&
-+ !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-+ return ip_fragment(skb, br_dev_queue_push_xmit);
-+ else
-+ return br_dev_queue_push_xmit(skb);
-+}
-
- /* PF_BRIDGE/POST_ROUTING ********************************************/
- static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
-@@ -798,7 +807,7 @@ static unsigned int br_nf_post_routing(u
- realoutdev = nf_bridge->netoutdev;
- #endif
- NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
-- br_dev_queue_push_xmit);
-+ br_nf_dev_queue_xmit);
-
- return NF_STOLEN;
-
-@@ -843,7 +852,7 @@ static unsigned int ip_sabotage_out(unsi
- if ((out->hard_start_xmit == br_dev_xmit &&
- okfn != br_nf_forward_finish &&
- okfn != br_nf_local_out_finish &&
-- okfn != br_dev_queue_push_xmit)
-+ okfn != br_nf_dev_queue_xmit)
- #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
- || ((out->priv_flags & IFF_802_1Q_VLAN) &&
- VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
-diff -upr linux-2.6.16.orig/net/compat.c linux-2.6.16-026test009/net/compat.c
---- linux-2.6.16.orig/net/compat.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/compat.c 2006-04-19 15:02:11.000000000 +0400
-@@ -308,107 +308,6 @@ void scm_detach_fds_compat(struct msghdr
- }
-
- /*
-- * For now, we assume that the compatibility and native version
-- * of struct ipt_entry are the same - sfr. FIXME
-- */
--struct compat_ipt_replace {
-- char name[IPT_TABLE_MAXNAMELEN];
-- u32 valid_hooks;
-- u32 num_entries;
-- u32 size;
-- u32 hook_entry[NF_IP_NUMHOOKS];
-- u32 underflow[NF_IP_NUMHOOKS];
-- u32 num_counters;
-- compat_uptr_t counters; /* struct ipt_counters * */
-- struct ipt_entry entries[0];
--};
--
--static int do_netfilter_replace(int fd, int level, int optname,
-- char __user *optval, int optlen)
--{
-- struct compat_ipt_replace __user *urepl;
-- struct ipt_replace __user *repl_nat;
-- char name[IPT_TABLE_MAXNAMELEN];
-- u32 origsize, tmp32, num_counters;
-- unsigned int repl_nat_size;
-- int ret;
-- int i;
-- compat_uptr_t ucntrs;
--
-- urepl = (struct compat_ipt_replace __user *)optval;
-- if (get_user(origsize, &urepl->size))
-- return -EFAULT;
--
-- /* Hack: Causes ipchains to give correct error msg --RR */
-- if (optlen != sizeof(*urepl) + origsize)
-- return -ENOPROTOOPT;
--
-- /* XXX Assumes that size of ipt_entry is the same both in
-- * native and compat environments.
-- */
-- repl_nat_size = sizeof(*repl_nat) + origsize;
-- repl_nat = compat_alloc_user_space(repl_nat_size);
--
-- ret = -EFAULT;
-- if (put_user(origsize, &repl_nat->size))
-- goto out;
--
-- if (!access_ok(VERIFY_READ, urepl, optlen) ||
-- !access_ok(VERIFY_WRITE, repl_nat, optlen))
-- goto out;
--
-- if (__copy_from_user(name, urepl->name, sizeof(urepl->name)) ||
-- __copy_to_user(repl_nat->name, name, sizeof(repl_nat->name)))
-- goto out;
--
-- if (__get_user(tmp32, &urepl->valid_hooks) ||
-- __put_user(tmp32, &repl_nat->valid_hooks))
-- goto out;
--
-- if (__get_user(tmp32, &urepl->num_entries) ||
-- __put_user(tmp32, &repl_nat->num_entries))
-- goto out;
--
-- if (__get_user(num_counters, &urepl->num_counters) ||
-- __put_user(num_counters, &repl_nat->num_counters))
-- goto out;
--
-- if (__get_user(ucntrs, &urepl->counters) ||
-- __put_user(compat_ptr(ucntrs), &repl_nat->counters))
-- goto out;
--
-- if (__copy_in_user(&repl_nat->entries[0],
-- &urepl->entries[0],
-- origsize))
-- goto out;
--
-- for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-- if (__get_user(tmp32, &urepl->hook_entry[i]) ||
-- __put_user(tmp32, &repl_nat->hook_entry[i]) ||
-- __get_user(tmp32, &urepl->underflow[i]) ||
-- __put_user(tmp32, &repl_nat->underflow[i]))
-- goto out;
-- }
--
-- /*
-- * Since struct ipt_counters just contains two u_int64_t members
-- * we can just do the access_ok check here and pass the (converted)
-- * pointer into the standard syscall. We hope that the pointer is
-- * not misaligned ...
-- */
-- if (!access_ok(VERIFY_WRITE, compat_ptr(ucntrs),
-- num_counters * sizeof(struct ipt_counters)))
-- goto out;
--
--
-- ret = sys_setsockopt(fd, level, optname,
-- (char __user *)repl_nat, repl_nat_size);
--
--out:
-- return ret;
--}
--
--/*
- * A struct sock_filter is architecture independent.
- */
- struct compat_sock_fprog {
-@@ -460,10 +359,6 @@ static int do_set_sock_timeout(int fd, i
- asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
- char __user *optval, int optlen)
- {
-- /* SO_SET_REPLACE seems to be the same in all levels */
-- if (optname == IPT_SO_SET_REPLACE)
-- return do_netfilter_replace(fd, level, optname,
-- optval, optlen);
- if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
- return do_set_attach_filter(fd, level, optname,
- optval, optlen);
-diff -upr linux-2.6.16.orig/net/core/datagram.c linux-2.6.16-026test009/net/core/datagram.c
---- linux-2.6.16.orig/net/core/datagram.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/datagram.c 2006-04-19 15:02:12.000000000 +0400
-@@ -56,6 +56,8 @@
- #include <net/sock.h>
- #include <net/tcp_states.h>
-
-+#include <ub/ub_net.h>
-+
- /*
- * Is a socket 'connection oriented' ?
- */
-@@ -493,6 +495,7 @@ unsigned int datagram_poll(struct file *
- {
- struct sock *sk = sock->sk;
- unsigned int mask;
-+ int no_ubc_space;
-
- poll_wait(file, sk->sk_sleep, wait);
- mask = 0;
-@@ -500,8 +503,14 @@ unsigned int datagram_poll(struct file *
- /* exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
-- if (sk->sk_shutdown == SHUTDOWN_MASK)
-+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
-+ no_ubc_space = 0;
- mask |= POLLHUP;
-+ } else {
-+ no_ubc_space = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
-+ if (no_ubc_space)
-+ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
-+ }
-
- /* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
-@@ -518,7 +527,7 @@ unsigned int datagram_poll(struct file *
- }
-
- /* writable? */
-- if (sock_writeable(sk))
-+ if (!no_ubc_space && sock_writeable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
- else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-diff -upr linux-2.6.16.orig/net/core/dev.c linux-2.6.16-026test009/net/core/dev.c
---- linux-2.6.16.orig/net/core/dev.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/dev.c 2006-04-19 15:02:12.000000000 +0400
-@@ -115,6 +115,10 @@
- #include <net/iw_handler.h>
- #endif /* CONFIG_NET_RADIO */
- #include <asm/current.h>
-+#include <ub/beancounter.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_mem.h>
-
- /*
- * The list of packet types we will receive (as opposed to discard)
-@@ -167,25 +171,40 @@ static struct list_head ptype_all; /* T
- * unregister_netdevice(), which must be called with the rtnl
- * semaphore held.
- */
-+#ifdef CONFIG_VE
-+#define dev_tail (get_exec_env()->_net_dev_tail)
-+#else
- struct net_device *dev_base;
- static struct net_device **dev_tail = &dev_base;
-+EXPORT_SYMBOL(dev_base);
-+#endif
- DEFINE_RWLOCK(dev_base_lock);
-
--EXPORT_SYMBOL(dev_base);
- EXPORT_SYMBOL(dev_base_lock);
-
-+#ifdef CONFIG_VE
-+#define MAX_UNMOVABLE_NETDEVICES (8*4096)
-+static uint8_t unmovable_ifindex_list[MAX_UNMOVABLE_NETDEVICES/8];
-+static LIST_HEAD(dev_global_list);
-+#endif
-+
- #define NETDEV_HASHBITS 8
- static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
- static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
-
--static inline struct hlist_head *dev_name_hash(const char *name)
-+struct hlist_head *dev_name_hash(const char *name, struct ve_struct *env)
- {
-- unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
-+ unsigned hash;
-+ if (!ve_is_super(env))
-+ return visible_dev_head(env);
-+ hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
- return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
- }
-
--static inline struct hlist_head *dev_index_hash(int ifindex)
-+struct hlist_head *dev_index_hash(int ifindex, struct ve_struct *env)
- {
-+ if (!ve_is_super(env))
-+ return visible_dev_index_head(env);
- return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
- }
-
-@@ -469,7 +488,7 @@ struct net_device *__dev_get_by_name(con
- {
- struct hlist_node *p;
-
-- hlist_for_each(p, dev_name_hash(name)) {
-+ hlist_for_each(p, dev_name_hash(name, get_exec_env())) {
- struct net_device *dev
- = hlist_entry(p, struct net_device, name_hlist);
- if (!strncmp(dev->name, name, IFNAMSIZ))
-@@ -502,6 +521,32 @@ struct net_device *dev_get_by_name(const
- }
-
- /**
-+ * __dev_global_get_by_name - find a device by its name in dev_global_list
-+ * @name: name to find
-+ *
-+ * Find an interface by name. Must be called under RTNL semaphore
-+ * If the name is found a pointer to the device
-+ * is returned. If the name is not found then %NULL is returned. The
-+ * reference counters are not incremented so the caller must be
-+ * careful with locks.
-+ */
-+
-+#ifdef CONFIG_VE
-+struct net_device *__dev_global_get_by_name(const char *name)
-+{
-+ struct net_device *dev;
-+ /* It's called relatively rarely */
-+ list_for_each_entry(dev, &dev_global_list, dev_global_list_entry) {
-+ if (strncmp(dev->name, name, IFNAMSIZ) == 0)
-+ return dev;
-+ }
-+ return NULL;
-+}
-+#else /* CONFIG_VE */
-+#define __dev_global_get_by_name(name) __dev_get_by_name(name)
-+#endif /* CONFIG_VE */
-+
-+/**
- * __dev_get_by_index - find a device by its ifindex
- * @ifindex: index of device
- *
-@@ -516,7 +561,7 @@ struct net_device *__dev_get_by_index(in
- {
- struct hlist_node *p;
-
-- hlist_for_each(p, dev_index_hash(ifindex)) {
-+ hlist_for_each(p, dev_index_hash(ifindex, get_exec_env())) {
- struct net_device *dev
- = hlist_entry(p, struct net_device, index_hlist);
- if (dev->ifindex == ifindex)
-@@ -635,6 +680,23 @@ int dev_valid_name(const char *name)
- || strchr(name, '/'));
- }
-
-+static inline void __dev_check_name(const char *dev_name, const char *name,
-+ long *inuse, const int max_netdevices)
-+{
-+ int i = 0;
-+ char buf[IFNAMSIZ];
-+
-+ if (!sscanf(dev_name, name, &i))
-+ return;
-+ if (i < 0 || i >= max_netdevices)
-+ return;
-+
-+ /* avoid cases where sscanf is not exact inverse of printf */
-+ snprintf(buf, sizeof(buf), name, i);
-+ if (!strncmp(buf, dev_name, IFNAMSIZ))
-+ set_bit(i, inuse);
-+}
-+
- /**
- * dev_alloc_name - allocate a name for a device
- * @dev: device
-@@ -671,16 +733,20 @@ int dev_alloc_name(struct net_device *de
- if (!inuse)
- return -ENOMEM;
-
-- for (d = dev_base; d; d = d->next) {
-- if (!sscanf(d->name, name, &i))
-- continue;
-- if (i < 0 || i >= max_netdevices)
-- continue;
--
-- /* avoid cases where sscanf is not exact inverse of printf */
-- snprintf(buf, sizeof(buf), name, i);
-- if (!strncmp(buf, d->name, IFNAMSIZ))
-- set_bit(i, inuse);
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env())) {
-+ list_for_each_entry(d, &dev_global_list,
-+ dev_global_list_entry) {
-+ __dev_check_name(d->name, name, inuse,
-+ max_netdevices);
-+ }
-+ } else
-+#endif
-+ {
-+ for (d = dev_base; d; d = d->next) {
-+ __dev_check_name(d->name, name, inuse,
-+ max_netdevices);
-+ }
- }
-
- i = find_first_zero_bit(inuse, max_netdevices);
-@@ -688,7 +754,11 @@ int dev_alloc_name(struct net_device *de
- }
-
- snprintf(buf, sizeof(buf), name, i);
-- if (!__dev_get_by_name(buf)) {
-+ if (ve_is_super(get_exec_env()))
-+ d = __dev_global_get_by_name(buf);
-+ else
-+ d = __dev_get_by_name(buf);
-+ if (d == NULL) {
- strlcpy(dev->name, buf, IFNAMSIZ);
- return i;
- }
-@@ -721,13 +791,14 @@ int dev_change_name(struct net_device *d
- if (!dev_valid_name(newname))
- return -EINVAL;
-
-+ /* Rename of devices in VE is prohibited by CAP_NET_ADMIN */
- if (strchr(newname, '%')) {
- err = dev_alloc_name(dev, newname);
- if (err < 0)
- return err;
- strcpy(newname, dev->name);
- }
-- else if (__dev_get_by_name(newname))
-+ else if (__dev_global_get_by_name(newname))
- return -EEXIST;
- else
- strlcpy(dev->name, newname, IFNAMSIZ);
-@@ -735,7 +806,8 @@ int dev_change_name(struct net_device *d
- err = class_device_rename(&dev->class_dev, dev->name);
- if (!err) {
- hlist_del(&dev->name_hlist);
-- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-+ hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name,
-+ get_exec_env()));
- notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
- }
-
-@@ -1294,6 +1366,25 @@ int dev_queue_xmit(struct sk_buff *skb)
- skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
- #endif
- if (q->enqueue) {
-+ struct user_beancounter *ub;
-+
-+ ub = netdev_bc(dev)->exec_ub;
-+ /* the skb CAN be already charged if it transmitted via
-+ * something like bonding device */
-+ if (ub && (skb_bc(skb)->resource == 0)) {
-+ unsigned long chargesize;
-+ chargesize = skb_charge_fullsize(skb);
-+ if (charge_beancounter(ub, UB_OTHERSOCKBUF,
-+ chargesize, UB_SOFT)) {
-+ rcu_read_unlock();
-+ rc = -ENOMEM;
-+ goto out_kfree_skb;
-+ }
-+ skb_bc(skb)->ub = ub;
-+ skb_bc(skb)->charged = chargesize;
-+ skb_bc(skb)->resource = UB_OTHERSOCKBUF;
-+ }
-+
- /* Grab device queue */
- spin_lock(&dev->queue_lock);
-
-@@ -1580,6 +1671,7 @@ int netif_receive_skb(struct sk_buff *sk
- struct net_device *orig_dev;
- int ret = NET_RX_DROP;
- unsigned short type;
-+ struct ve_struct *old_env;
-
- /* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
-@@ -1598,6 +1690,17 @@ int netif_receive_skb(struct sk_buff *sk
- skb->h.raw = skb->nh.raw = skb->data;
- skb->mac_len = skb->nh.raw - skb->mac.raw;
-
-+#ifdef CONFIG_VE
-+ /*
-+ * Skb might be alloced in another VE context, than its device works.
-+ * So, set the correct owner_env.
-+ */
-+ skb->owner_env = skb->dev->owner_env;
-+ BUG_ON(skb->owner_env == NULL);
-+#endif
-+
-+ old_env = set_exec_env(VE_OWNER_SKB(skb));
-+
- pt_prev = NULL;
-
- rcu_read_lock();
-@@ -1663,6 +1766,7 @@ ncls:
-
- out:
- rcu_read_unlock();
-+ (void)set_exec_env(old_env);
- return ret;
- }
-
-@@ -2038,7 +2142,7 @@ static int __init dev_proc_init(void)
- {
- int rc = -ENOMEM;
-
-- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
-+ if (!proc_glob_fops_create("net/dev", S_IRUGO, &dev_seq_fops))
- goto out;
- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
- goto out_dev;
-@@ -2050,7 +2154,7 @@ out:
- out_softnet:
- proc_net_remove("softnet_stat");
- out_dev:
-- proc_net_remove("dev");
-+ remove_proc_glob_entry("net/dev", NULL);
- goto out;
- }
- #else
-@@ -2115,6 +2219,9 @@ void dev_set_promiscuity(struct net_devi
- dev->flags &= ~IFF_PROMISC;
- else
- dev->flags |= IFF_PROMISC;
-+ /* Promiscous mode on these devices does not mean anything */
-+ if (dev->flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
-+ return;
- if (dev->flags != old_flags) {
- dev_mc_upload(dev);
- printk(KERN_INFO "device %s %s promiscuous mode\n",
-@@ -2529,9 +2636,28 @@ int dev_ioctl(unsigned int cmd, void __u
- * - require strict serialization.
- * - do not return a value
- */
-+ case SIOCSIFMTU:
-+ if (!capable(CAP_NET_ADMIN) &&
-+ !capable(CAP_VE_NET_ADMIN))
-+ return -EPERM;
-+ dev_load(ifr.ifr_name);
-+ rtnl_lock();
-+ if (!ve_is_super(get_exec_env())) {
-+ struct net_device *dev;
-+ ret = -ENODEV;
-+ if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
-+ goto out_set_mtu_unlock;
-+ ret = -EPERM;
-+ if (ifr.ifr_mtu > dev->orig_mtu)
-+ goto out_set_mtu_unlock;
-+ }
-+ ret = dev_ifsioc(&ifr, cmd);
-+out_set_mtu_unlock:
-+ rtnl_unlock();
-+ return ret;
-+
- case SIOCSIFFLAGS:
- case SIOCSIFMETRIC:
-- case SIOCSIFMTU:
- case SIOCSIFMAP:
- case SIOCSIFHWADDR:
- case SIOCSIFSLAVE:
-@@ -2613,20 +2739,73 @@ int dev_ioctl(unsigned int cmd, void __u
- * dev_new_index - allocate an ifindex
- *
- * Returns a suitable unique value for a new device interface
-- * number. The caller must hold the rtnl semaphore or the
-+ * number. The caller must hold the rtnl semaphore or the
- * dev_base_lock to be sure it remains unique.
-+ *
-+ * Note: dev->name must be valid on entrance
- */
--static int dev_new_index(void)
-+static int dev_ve_new_index(void)
- {
-- static int ifindex;
-+#ifdef CONFIG_VE
-+ int *ifindex = &get_exec_env()->ifindex;
-+ int delta = 2;
-+#else
-+ static int s_ifindex;
-+ int *ifindex = &s_ifindex;
-+ int delta = 1;
-+#endif
- for (;;) {
-- if (++ifindex <= 0)
-- ifindex = 1;
-- if (!__dev_get_by_index(ifindex))
-- return ifindex;
-+ *ifindex += delta;
-+ if (*ifindex <= 0)
-+ *ifindex = 1;
-+ if (!__dev_get_by_index(*ifindex))
-+ return *ifindex;
- }
- }
-
-+#ifdef CONFIG_VE
-+static int dev_glb_new_index(void)
-+{
-+ int i;
-+
-+ i = find_first_zero_bit((long*)unmovable_ifindex_list,
-+ MAX_UNMOVABLE_NETDEVICES);
-+
-+ if (i == MAX_UNMOVABLE_NETDEVICES)
-+ return -EMFILE;
-+
-+ __set_bit(i, (long*)unmovable_ifindex_list);
-+ return (i + 1) * 2;
-+}
-+#endif
-+
-+static void dev_glb_free_index(struct net_device *dev)
-+{
-+#ifdef CONFIG_VE
-+ int bit;
-+
-+ bit = dev->ifindex / 2 - 1;
-+ BUG_ON(bit >= MAX_UNMOVABLE_NETDEVICES);
-+ __clear_bit(bit, (long*)unmovable_ifindex_list);
-+#endif
-+}
-+
-+static int dev_new_index(struct net_device *dev)
-+{
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
-+ return dev_glb_new_index();
-+#endif
-+
-+ return dev_ve_new_index();
-+}
-+
-+static void dev_free_index(struct net_device *dev)
-+{
-+ if ((dev->ifindex % 2) == 0)
-+ dev_glb_free_index(dev);
-+}
-+
- static int dev_boot_phase = 1;
-
- /* Delayed registration/unregisteration */
-@@ -2669,6 +2848,10 @@ int register_netdevice(struct net_device
- /* When net_device's are persistent, this will be fatal. */
- BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
-
-+ ret = -EPERM;
-+ if (!ve_is_super(get_exec_env()) && ve_is_dev_movable(dev))
-+ goto out;
-+
- spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->xmit_lock);
- dev->xmit_lock_owner = -1;
-@@ -2688,27 +2871,32 @@ int register_netdevice(struct net_device
- if (ret) {
- if (ret > 0)
- ret = -EIO;
-- goto out_err;
-+ goto out_free_div;
- }
- }
-
- if (!dev_valid_name(dev->name)) {
- ret = -EINVAL;
-- goto out_err;
-+ goto out_free_div;
-+ }
-+
-+ dev->ifindex = dev_new_index(dev);
-+ if (dev->ifindex < 0) {
-+ ret = dev->ifindex;
-+ goto out_free_div;
- }
-
-- dev->ifindex = dev_new_index();
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
-
- /* Check for existence of name */
-- head = dev_name_hash(dev->name);
-+ head = dev_name_hash(dev->name, get_exec_env());
- hlist_for_each(p, head) {
- struct net_device *d
- = hlist_entry(p, struct net_device, name_hlist);
- if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
- ret = -EEXIST;
-- goto out_err;
-+ goto out_free_ind;
- }
- }
-
-@@ -2760,12 +2948,21 @@ int register_netdevice(struct net_device
- set_bit(__LINK_STATE_PRESENT, &dev->state);
-
- dev->next = NULL;
-+ dev->owner_env = get_exec_env();
-+ dev->orig_mtu = dev->mtu;
-+ netdev_bc(dev)->owner_ub = get_beancounter(get_exec_ub());
-+ netdev_bc(dev)->exec_ub = get_beancounter(get_exec_ub());
- dev_init_scheduler(dev);
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env()))
-+ list_add_tail(&dev->dev_global_list_entry, &dev_global_list);
-+#endif
- write_lock_bh(&dev_base_lock);
- *dev_tail = dev;
- dev_tail = &dev->next;
- hlist_add_head(&dev->name_hlist, head);
-- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
-+ hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex,
-+ get_exec_env()));
- dev_hold(dev);
- dev->reg_state = NETREG_REGISTERING;
- write_unlock_bh(&dev_base_lock);
-@@ -2779,7 +2976,9 @@ int register_netdevice(struct net_device
-
- out:
- return ret;
--out_err:
-+out_free_ind:
-+ dev_free_index(dev);
-+out_free_div:
- free_divert_blk(dev);
- goto out;
- }
-@@ -2825,6 +3024,10 @@ int register_netdev(struct net_device *d
- err = register_netdevice(dev);
- out:
- rtnl_unlock();
-+ if (err == 0 && dev->reg_state != NETREG_REGISTERED) {
-+ unregister_netdev(dev);
-+ err = -ENOMEM;
-+ }
- return err;
- }
- EXPORT_SYMBOL(register_netdev);
-@@ -2907,6 +3110,7 @@ void netdev_run_todo(void)
- {
- struct list_head list = LIST_HEAD_INIT(list);
- int err;
-+ struct ve_struct *current_env;
-
-
- /* Need to guard against multiple cpu's getting out of order. */
-@@ -2925,22 +3129,30 @@ void netdev_run_todo(void)
- list_splice_init(&net_todo_list, &list);
- spin_unlock(&net_todo_list_lock);
-
-+ current_env = get_exec_env();
- while (!list_empty(&list)) {
- struct net_device *dev
- = list_entry(list.next, struct net_device, todo_list);
- list_del(&dev->todo_list);
-
-+ (void)set_exec_env(dev->owner_env);
- switch(dev->reg_state) {
- case NETREG_REGISTERING:
- err = netdev_register_sysfs(dev);
-- if (err)
-+ if (err) {
- printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
- dev->name, err);
-+ dev->reg_state = NETREG_REGISTER_ERR;
-+ break;
-+ }
- dev->reg_state = NETREG_REGISTERED;
- break;
-
- case NETREG_UNREGISTERING:
- netdev_unregister_sysfs(dev);
-+ /* fall through */
-+
-+ case NETREG_REGISTER_ERR:
- dev->reg_state = NETREG_UNREGISTERED;
-
- netdev_wait_allrefs(dev);
-@@ -2951,6 +3163,10 @@ void netdev_run_todo(void)
- BUG_TRAP(!dev->ip6_ptr);
- BUG_TRAP(!dev->dn_ptr);
-
-+ put_beancounter(netdev_bc(dev)->exec_ub);
-+ put_beancounter(netdev_bc(dev)->owner_ub);
-+ netdev_bc(dev)->exec_ub = NULL;
-+ netdev_bc(dev)->owner_ub = NULL;
-
- /* It must be the very last action,
- * after this 'dev' may point to freed up memory.
-@@ -2965,6 +3181,7 @@ void netdev_run_todo(void)
- break;
- }
- }
-+ (void)set_exec_env(current_env);
-
- out:
- up(&net_todo_run_mutex);
-@@ -2990,7 +3207,7 @@ struct net_device *alloc_netdev(int size
- alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
- alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
-
-- p = kmalloc(alloc_size, GFP_KERNEL);
-+ p = ub_kmalloc(alloc_size, GFP_KERNEL);
- if (!p) {
- printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
- return NULL;
-@@ -3070,7 +3287,8 @@ int unregister_netdevice(struct net_devi
- return -ENODEV;
- }
-
-- BUG_ON(dev->reg_state != NETREG_REGISTERED);
-+ BUG_ON(dev->reg_state != NETREG_REGISTERED &&
-+ dev->reg_state != NETREG_REGISTER_ERR);
-
- /* If device is running, close it first. */
- if (dev->flags & IFF_UP)
-@@ -3086,6 +3304,10 @@ int unregister_netdevice(struct net_devi
- dev_tail = dp;
- *dp = d->next;
- write_unlock_bh(&dev_base_lock);
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env()))
-+ list_del(&dev->dev_global_list_entry);
-+#endif
- break;
- }
- }
-@@ -3095,7 +3317,8 @@ int unregister_netdevice(struct net_devi
- return -ENODEV;
- }
-
-- dev->reg_state = NETREG_UNREGISTERING;
-+ if (dev->reg_state != NETREG_REGISTER_ERR)
-+ dev->reg_state = NETREG_UNREGISTERING;
-
- synchronize_net();
-
-@@ -3119,6 +3342,8 @@ int unregister_netdevice(struct net_devi
- /* Notifier chain MUST detach us from master device. */
- BUG_TRAP(!dev->master);
-
-+ dev_free_index(dev);
-+
- free_divert_blk(dev);
-
- /* Finish processing unregister after unlock */
-@@ -3276,6 +3501,8 @@ EXPORT_SYMBOL(dev_close);
- EXPORT_SYMBOL(dev_get_by_flags);
- EXPORT_SYMBOL(dev_get_by_index);
- EXPORT_SYMBOL(dev_get_by_name);
-+EXPORT_SYMBOL(dev_name_hash);
-+EXPORT_SYMBOL(dev_index_hash);
- EXPORT_SYMBOL(dev_open);
- EXPORT_SYMBOL(dev_queue_xmit);
- EXPORT_SYMBOL(dev_remove_pack);
-diff -upr linux-2.6.16.orig/net/core/dev_mcast.c linux-2.6.16-026test009/net/core/dev_mcast.c
---- linux-2.6.16.orig/net/core/dev_mcast.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/dev_mcast.c 2006-04-19 15:02:12.000000000 +0400
-@@ -290,9 +290,10 @@ static struct file_operations dev_mc_seq
-
- void __init dev_mcast_init(void)
- {
-- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
-+ proc_glob_fops_create("net/dev_mcast", 0, &dev_mc_seq_fops);
- }
-
- EXPORT_SYMBOL(dev_mc_add);
- EXPORT_SYMBOL(dev_mc_delete);
- EXPORT_SYMBOL(dev_mc_upload);
-+EXPORT_SYMBOL(dev_mc_discard);
-diff -upr linux-2.6.16.orig/net/core/dst.c linux-2.6.16-026test009/net/core/dst.c
---- linux-2.6.16.orig/net/core/dst.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/dst.c 2006-04-19 15:02:12.000000000 +0400
-@@ -260,11 +260,14 @@ static int dst_dev_event(struct notifier
- switch (event) {
- case NETDEV_UNREGISTER:
- case NETDEV_DOWN:
-- spin_lock_bh(&dst_lock);
-+ local_bh_disable();
-+ dst_run_gc(0);
-+ spin_lock(&dst_lock);
- for (dst = dst_garbage_list; dst; dst = dst->next) {
- dst_ifdown(dst, dev, event != NETDEV_DOWN);
- }
-- spin_unlock_bh(&dst_lock);
-+ spin_unlock(&dst_lock);
-+ local_bh_enable();
- break;
- }
- return NOTIFY_DONE;
-diff -upr linux-2.6.16.orig/net/core/filter.c linux-2.6.16-026test009/net/core/filter.c
---- linux-2.6.16.orig/net/core/filter.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/filter.c 2006-04-19 15:02:11.000000000 +0400
-@@ -34,6 +34,7 @@
- #include <linux/timer.h>
- #include <asm/system.h>
- #include <asm/uaccess.h>
-+#include <asm/unaligned.h>
- #include <linux/filter.h>
-
- /* No hurry in this branch */
-@@ -177,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buf
- load_w:
- ptr = load_pointer(skb, k, 4, &tmp);
- if (ptr != NULL) {
-- A = ntohl(*(u32 *)ptr);
-+ A = ntohl(get_unaligned((u32 *)ptr));
- continue;
- }
- break;
-@@ -186,7 +187,7 @@ load_w:
- load_h:
- ptr = load_pointer(skb, k, 2, &tmp);
- if (ptr != NULL) {
-- A = ntohs(*(u16 *)ptr);
-+ A = ntohs(get_unaligned((u16 *)ptr));
- continue;
- }
- break;
-@@ -406,7 +407,7 @@ int sk_attach_filter(struct sock_fprog *
- if (fprog->filter == NULL)
- return -EINVAL;
-
-- fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
-+ fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL_UBC);
- if (!fp)
- return -ENOMEM;
- if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-diff -upr linux-2.6.16.orig/net/core/neighbour.c linux-2.6.16-026test009/net/core/neighbour.c
---- linux-2.6.16.orig/net/core/neighbour.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/neighbour.c 2006-04-19 15:02:12.000000000 +0400
-@@ -727,6 +727,11 @@ static void neigh_timer_handler(unsigned
- struct neighbour *neigh = (struct neighbour *)arg;
- unsigned state;
- int notify = 0;
-+ struct ve_struct *env;
-+ struct user_beancounter *ub;
-+
-+ env = set_exec_env(neigh->dev->owner_env);
-+ ub = set_exec_ub(netdev_bc(neigh->dev)->exec_ub);
-
- write_lock(&neigh->lock);
-
-@@ -824,6 +829,8 @@ out:
- neigh_app_notify(neigh);
- #endif
- neigh_release(neigh);
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(env);
- }
-
- int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
-@@ -1213,6 +1220,12 @@ static void neigh_proxy_process(unsigned
- skb = skb->next;
- if (tdif <= 0) {
- struct net_device *dev = back->dev;
-+ struct ve_struct *env;
-+ struct user_beancounter *ub;
-+
-+ env = set_exec_env(dev->owner_env);
-+ ub = set_exec_ub(netdev_bc(dev)->exec_ub);
-+
- __skb_unlink(back, &tbl->proxy_queue);
- if (tbl->proxy_redo && netif_running(dev))
- tbl->proxy_redo(back);
-@@ -1220,6 +1233,9 @@ static void neigh_proxy_process(unsigned
- kfree_skb(back);
-
- dev_put(dev);
-+
-+ (void)set_exec_ub(ub);
-+ (void)set_exec_env(env);
- } else if (!sched_next || tdif < sched_next)
- sched_next = tdif;
- }
-@@ -1424,6 +1440,9 @@ int neigh_delete(struct sk_buff *skb, st
- struct net_device *dev = NULL;
- int err = -ENODEV;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return -EACCES;
-+
- if (ndm->ndm_ifindex &&
- (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
- goto out;
-@@ -1475,6 +1494,9 @@ int neigh_add(struct sk_buff *skb, struc
- struct net_device *dev = NULL;
- int err = -ENODEV;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return -EACCES;
-+
- if (ndm->ndm_ifindex &&
- (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
- goto out;
-@@ -1936,6 +1958,9 @@ int neigh_dump_info(struct sk_buff *skb,
- struct neigh_table *tbl;
- int t, family, s_t;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return -EACCES;
-+
- read_lock(&neigh_tbl_lock);
- family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
- s_t = cb->args[0];
-@@ -2530,11 +2555,17 @@ int neigh_sysctl_register(struct net_dev
- int p_id, int pdev_id, char *p_name,
- proc_handler *handler, ctl_handler *strategy)
- {
-- struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-+ struct neigh_sysctl_table *t;
- const char *dev_name_source = NULL;
- char *dev_name = NULL;
- int err = 0;
-
-+ /* This function is called from VExx only from devinet_init,
-+ and it is does not matter what is returned */
-+ if (!ve_is_super(get_exec_env()))
-+ return 0;
-+
-+ t = kmalloc(sizeof(*t), GFP_KERNEL);
- if (!t)
- return -ENOBUFS;
- memcpy(t, &neigh_sysctl_template, sizeof(*t));
-@@ -2625,6 +2656,8 @@ int neigh_sysctl_register(struct net_dev
-
- void neigh_sysctl_unregister(struct neigh_parms *p)
- {
-+ if (!ve_is_super(get_exec_env()))
-+ return;
- if (p->sysctl_table) {
- struct neigh_sysctl_table *t = p->sysctl_table;
- p->sysctl_table = NULL;
-diff -upr linux-2.6.16.orig/net/core/net-sysfs.c linux-2.6.16-026test009/net/core/net-sysfs.c
---- linux-2.6.16.orig/net/core/net-sysfs.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/net-sysfs.c 2006-04-19 15:02:12.000000000 +0400
-@@ -388,12 +388,13 @@ static void netdev_release(struct class_
- struct net_device *dev
- = container_of(cd, struct net_device, class_dev);
-
-- BUG_ON(dev->reg_state != NETREG_RELEASED);
-+ BUG_ON(dev->reg_state != NETREG_RELEASED &&
-+ dev->reg_state != NETREG_REGISTERING);
-
- kfree((char *)dev - dev->padded);
- }
-
--static struct class net_class = {
-+struct class net_class = {
- .name = "net",
- .release = netdev_release,
- .class_dev_attrs = net_class_attributes,
-@@ -401,6 +402,13 @@ static struct class net_class = {
- .uevent = netdev_uevent,
- #endif
- };
-+EXPORT_SYMBOL(net_class);
-+
-+#ifndef CONFIG_VE
-+#define visible_net_class net_class
-+#else
-+#define visible_net_class (*get_exec_env()->net_class)
-+#endif
-
- void netdev_unregister_sysfs(struct net_device * net)
- {
-@@ -424,7 +432,7 @@ int netdev_register_sysfs(struct net_dev
- struct class_device *class_dev = &(net->class_dev);
- int ret;
-
-- class_dev->class = &net_class;
-+ class_dev->class = &visible_net_class;
- class_dev->class_data = net;
-
- strlcpy(class_dev->class_id, net->name, BUS_ID_SIZE);
-@@ -453,12 +461,21 @@ out_cleanup:
- out_unreg:
- printk(KERN_WARNING "%s: sysfs attribute registration failed %d\n",
- net->name, ret);
-- class_device_unregister(class_dev);
-+ /* put is called in free_netdev() */
-+ class_device_del(class_dev);
- out:
- return ret;
- }
-
-+void prepare_sysfs_netdev(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->net_class = &net_class;
-+#endif
-+}
-+
- int netdev_sysfs_init(void)
- {
-+ prepare_sysfs_netdev();
- return class_register(&net_class);
- }
-diff -upr linux-2.6.16.orig/net/core/rtnetlink.c linux-2.6.16-026test009/net/core/rtnetlink.c
---- linux-2.6.16.orig/net/core/rtnetlink.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/rtnetlink.c 2006-04-19 15:02:12.000000000 +0400
-@@ -434,6 +434,8 @@ static int rtnetlink_dump_all(struct sk_
- if (rtnetlink_links[idx] == NULL ||
- rtnetlink_links[idx][type].dumpit == NULL)
- continue;
-+ if (vz_security_proto_check(idx, 0, 0))
-+ continue;
- if (idx > s_idx)
- memset(&cb->args[0], 0, sizeof(cb->args));
- if (rtnetlink_links[idx][type].dumpit(skb, cb))
-@@ -501,7 +503,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, s
- return 0;
-
- family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-- if (family >= NPROTO) {
-+ if (family >= NPROTO || vz_security_proto_check(family, 0, 0)) {
- *errp = -EAFNOSUPPORT;
- return -1;
- }
-diff -upr linux-2.6.16.orig/net/core/scm.c linux-2.6.16-026test009/net/core/scm.c
---- linux-2.6.16.orig/net/core/scm.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/scm.c 2006-04-19 15:02:12.000000000 +0400
-@@ -34,6 +34,7 @@
- #include <net/compat.h>
- #include <net/scm.h>
-
-+#include <ub/ub_mem.h>
-
- /*
- * Only allow a user to send credentials, that they could set with
-@@ -42,7 +43,9 @@
-
- static __inline__ int scm_check_creds(struct ucred *creds)
- {
-- if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
-+ if ((creds->pid == virt_tgid(current) ||
-+ creds->pid == current->tgid ||
-+ capable(CAP_VE_SYS_ADMIN)) &&
- ((creds->uid == current->uid || creds->uid == current->euid ||
- creds->uid == current->suid) || capable(CAP_SETUID)) &&
- ((creds->gid == current->gid || creds->gid == current->egid ||
-@@ -69,7 +72,7 @@ static int scm_fp_copy(struct cmsghdr *c
-
- if (!fpl)
- {
-- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
-+ fpl = ub_kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
- if (!fpl)
- return -ENOMEM;
- *fplp = fpl;
-@@ -275,7 +278,7 @@ struct scm_fp_list *scm_fp_dup(struct sc
- if (!fpl)
- return NULL;
-
-- new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
-+ new_fpl = ub_kmalloc(sizeof(*fpl), GFP_KERNEL);
- if (new_fpl) {
- for (i=fpl->count-1; i>=0; i--)
- get_file(fpl->fp[i]);
-diff -upr linux-2.6.16.orig/net/core/skbuff.c linux-2.6.16-026test009/net/core/skbuff.c
---- linux-2.6.16.orig/net/core/skbuff.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/skbuff.c 2006-04-19 15:02:12.000000000 +0400
-@@ -48,6 +48,7 @@
- #include <linux/in.h>
- #include <linux/inet.h>
- #include <linux/slab.h>
-+#include <linux/kmem_cache.h>
- #include <linux/netdevice.h>
- #ifdef CONFIG_NET_CLS_ACT
- #include <net/pkt_sched.h>
-@@ -68,6 +69,8 @@
- #include <asm/uaccess.h>
- #include <asm/system.h>
-
-+#include <ub/ub_net.h>
-+
- static kmem_cache_t *skbuff_head_cache __read_mostly;
- static kmem_cache_t *skbuff_fclone_cache __read_mostly;
-
-@@ -147,6 +150,9 @@ struct sk_buff *__alloc_skb(unsigned int
- if (!skb)
- goto out;
-
-+ if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
-+ goto nobc;
-+
- /* Get the DATA. Size must match skb_add_mtu(). */
- size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-@@ -160,6 +166,7 @@ struct sk_buff *__alloc_skb(unsigned int
- skb->data = data;
- skb->tail = data;
- skb->end = data + size;
-+ SET_VE_OWNER_SKB(skb, get_exec_env());
- /* make sure we initialize shinfo sequentially */
- shinfo = skb_shinfo(skb);
- atomic_set(&shinfo->dataref, 1);
-@@ -182,6 +189,8 @@ struct sk_buff *__alloc_skb(unsigned int
- out:
- return skb;
- nodata:
-+ ub_skb_free_bc(skb);
-+nobc:
- kmem_cache_free(cache, skb);
- skb = NULL;
- goto out;
-@@ -214,6 +223,9 @@ struct sk_buff *alloc_skb_from_cache(kme
- if (!skb)
- goto out;
-
-+ if (ub_skb_alloc_bc(skb, gfp_mask & ~__GFP_DMA))
-+ goto nobc;
-+
- /* Get the DATA. */
- size = SKB_DATA_ALIGN(size);
- data = kmem_cache_alloc(cp, gfp_mask);
-@@ -227,6 +239,7 @@ struct sk_buff *alloc_skb_from_cache(kme
- skb->data = data;
- skb->tail = data;
- skb->end = data + size;
-+ SET_VE_OWNER_SKB(skb, get_exec_env());
-
- atomic_set(&(skb_shinfo(skb)->dataref), 1);
- skb_shinfo(skb)->nr_frags = 0;
-@@ -236,6 +249,8 @@ struct sk_buff *alloc_skb_from_cache(kme
- out:
- return skb;
- nodata:
-+ ub_skb_free_bc(skb);
-+nobc:
- kmem_cache_free(skbuff_head_cache, skb);
- skb = NULL;
- goto out;
-@@ -290,6 +305,7 @@ void kfree_skbmem(struct sk_buff *skb)
- atomic_t *fclone_ref;
-
- skb_release_data(skb);
-+ ub_skb_free_bc(skb);
- switch (skb->fclone) {
- case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_head_cache, skb);
-@@ -331,6 +347,7 @@ void __kfree_skb(struct sk_buff *skb)
- #ifdef CONFIG_XFRM
- secpath_put(skb->sp);
- #endif
-+ ub_skb_uncharge(skb);
- if (skb->destructor) {
- WARN_ON(in_irq());
- skb->destructor(skb);
-@@ -386,6 +403,11 @@ struct sk_buff *skb_clone(struct sk_buff
- n->fclone = SKB_FCLONE_UNAVAILABLE;
- }
-
-+ if (ub_skb_alloc_bc(n, gfp_mask)) {
-+ kmem_cache_free(skbuff_head_cache, n);
-+ return NULL;
-+ }
-+
- #define C(x) n->x = skb->x
-
- n->next = n->prev = NULL;
-@@ -415,6 +437,7 @@ struct sk_buff *skb_clone(struct sk_buff
- C(ipvs_property);
- #endif
- C(protocol);
-+ SET_VE_OWNER_SKB(n, VE_OWNER_SKB(skb));
- n->destructor = NULL;
- #ifdef CONFIG_NETFILTER
- C(nfmark);
-diff -upr linux-2.6.16.orig/net/core/sock.c linux-2.6.16-026test009/net/core/sock.c
---- linux-2.6.16.orig/net/core/sock.c 2006-04-19 15:02:01.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/sock.c 2006-04-19 15:02:12.000000000 +0400
-@@ -108,6 +108,7 @@
- #include <linux/net.h>
- #include <linux/mm.h>
- #include <linux/slab.h>
-+#include <linux/kmem_cache.h>
- #include <linux/interrupt.h>
- #include <linux/poll.h>
- #include <linux/tcp.h>
-@@ -124,6 +125,9 @@
- #include <net/xfrm.h>
- #include <linux/ipsec.h>
-
-+#include <ub/ub_net.h>
-+#include <ub/beancounter.h>
-+
- #include <linux/filter.h>
-
- #ifdef CONFIG_INET
-@@ -172,7 +176,7 @@ static void sock_warn_obsolete_bsdism(co
- static char warncomm[TASK_COMM_LEN];
- if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
-- printk(KERN_WARNING "process `%s' is using obsolete "
-+ ve_printk(VE_LOG, KERN_WARNING "process `%s' is using obsolete "
- "%s SO_BSDCOMPAT\n", warncomm, name);
- warned++;
- }
-@@ -404,8 +408,9 @@ set_rcvbuf:
- if (!valbool) {
- sk->sk_bound_dev_if = 0;
- } else {
-- if (optlen > IFNAMSIZ)
-- optlen = IFNAMSIZ;
-+ if (optlen > IFNAMSIZ - 1)
-+ optlen = IFNAMSIZ - 1;
-+ memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
- ret = -EFAULT;
- break;
-@@ -659,6 +664,7 @@ struct sock *sk_alloc(int family, gfp_t
- */
- sk->sk_prot = sk->sk_prot_creator = prot;
- sock_lock_init(sk);
-+ SET_VE_OWNER_SK(sk, get_exec_env());
- }
-
- if (security_sk_alloc(sk, family, priority))
-@@ -698,6 +704,7 @@ void sk_free(struct sock *sk)
- __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
-
- security_sk_free(sk);
-+ ub_sock_uncharge(sk);
- if (sk->sk_prot_creator->slab != NULL)
- kmem_cache_free(sk->sk_prot_creator->slab, sk);
- else
-@@ -714,6 +721,11 @@ struct sock *sk_clone(const struct sock
-
- memcpy(newsk, sk, sk->sk_prot->obj_size);
-
-+ if (ub_sock_charge(newsk, sk->sk_family, sk->sk_type) < 0) {
-+ sk_free(newsk);
-+ return NULL;
-+ }
-+
- /* SANITY */
- sk_node_init(&newsk->sk_node);
- sock_lock_init(newsk);
-@@ -934,14 +946,12 @@ static long sock_wait_for_wmem(struct so
- /*
- * Generic send/receive buffer handlers
- */
--
--static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
-- unsigned long header_len,
-- unsigned long data_len,
-- int noblock, int *errcode)
-+struct sk_buff *sock_alloc_send_skb2(struct sock *sk, unsigned long size,
-+ unsigned long size2, int noblock,
-+ int *errcode)
- {
- struct sk_buff *skb;
-- gfp_t gfp_mask;
-+ unsigned int gfp_mask;
- long timeo;
- int err;
-
-@@ -959,46 +969,35 @@ static struct sk_buff *sock_alloc_send_p
- if (sk->sk_shutdown & SEND_SHUTDOWN)
- goto failure;
-
-- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-- skb = alloc_skb(header_len, sk->sk_allocation);
-- if (skb) {
-- int npages;
-- int i;
--
-- /* No pages, we're done... */
-- if (!data_len)
-- break;
--
-- npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-- skb->truesize += data_len;
-- skb_shinfo(skb)->nr_frags = npages;
-- for (i = 0; i < npages; i++) {
-- struct page *page;
-- skb_frag_t *frag;
--
-- page = alloc_pages(sk->sk_allocation, 0);
-- if (!page) {
-- err = -ENOBUFS;
-- skb_shinfo(skb)->nr_frags = i;
-- kfree_skb(skb);
-- goto failure;
-- }
--
-- frag = &skb_shinfo(skb)->frags[i];
-- frag->page = page;
-- frag->page_offset = 0;
-- frag->size = (data_len >= PAGE_SIZE ?
-- PAGE_SIZE :
-- data_len);
-- data_len -= PAGE_SIZE;
-- }
-+ if (ub_sock_getwres_other(sk, skb_charge_size(size))) {
-+ if (size2 < size) {
-+ size = size2;
-+ continue;
-+ }
-+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-+ err = -EAGAIN;
-+ if (!timeo)
-+ goto failure;
-+ if (signal_pending(current))
-+ goto interrupted;
-+ timeo = ub_sock_wait_for_space(sk, timeo,
-+ skb_charge_size(size));
-+ continue;
-+ }
-
-+ if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-+ skb = alloc_skb(size, sk->sk_allocation);
-+ if (skb)
- /* Full success... */
- break;
-- }
-+ ub_sock_retwres_other(sk, skb_charge_size(size),
-+ SOCK_MIN_UBCSPACE_CH);
- err = -ENOBUFS;
- goto failure;
- }
-+ ub_sock_retwres_other(sk,
-+ skb_charge_size(size),
-+ SOCK_MIN_UBCSPACE_CH);
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
-@@ -1009,6 +1008,7 @@ static struct sk_buff *sock_alloc_send_p
- timeo = sock_wait_for_wmem(sk, timeo);
- }
-
-+ ub_skb_set_charge(skb, sk, skb_charge_size(size), UB_OTHERSOCKBUF);
- skb_set_owner_w(skb, sk);
- return skb;
-
-@@ -1022,7 +1022,7 @@ failure:
- struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode)
- {
-- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
-+ return sock_alloc_send_skb2(sk, size, size, noblock, errcode);
- }
-
- static void __lock_sock(struct sock *sk)
-@@ -1462,7 +1462,8 @@ int proto_register(struct proto *prot, i
-
- if (alloc_slab) {
- prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-- SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN | SLAB_UBC,
-+ NULL, NULL);
-
- if (prot->slab == NULL) {
- printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
-@@ -1478,9 +1479,11 @@ int proto_register(struct proto *prot, i
- goto out_free_sock_slab;
-
- sprintf(request_sock_slab_name, mask, prot->name);
-- prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
-- prot->rsk_prot->obj_size, 0,
-- SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ prot->rsk_prot->slab =
-+ kmem_cache_create(request_sock_slab_name,
-+ prot->rsk_prot->obj_size, 0,
-+ SLAB_HWCACHE_ALIGN | SLAB_UBC,
-+ NULL, NULL);
-
- if (prot->rsk_prot->slab == NULL) {
- printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
-@@ -1501,7 +1504,7 @@ int proto_register(struct proto *prot, i
- prot->twsk_prot->twsk_slab =
- kmem_cache_create(timewait_sock_slab_name,
- prot->twsk_prot->twsk_obj_size,
-- 0, SLAB_HWCACHE_ALIGN,
-+ 0, SLAB_HWCACHE_ALIGN | SLAB_UBC,
- NULL, NULL);
- if (prot->twsk_prot->twsk_slab == NULL)
- goto out_free_timewait_sock_slab_name;
-diff -upr linux-2.6.16.orig/net/core/stream.c linux-2.6.16-026test009/net/core/stream.c
---- linux-2.6.16.orig/net/core/stream.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/core/stream.c 2006-04-19 15:02:12.000000000 +0400
-@@ -111,8 +111,9 @@ EXPORT_SYMBOL(sk_stream_wait_close);
- * sk_stream_wait_memory - Wait for more memory for a socket
- * @sk: socket to wait for memory
- * @timeo_p: for how long
-+ * @amount - amount of memory to wait for (in UB space!)
- */
--int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
-+int sk_stream_wait_memory(struct sock *sk, long *timeo_p, unsigned long amount)
- {
- int err = 0;
- long vm_wait = 0;
-@@ -134,8 +135,11 @@ int sk_stream_wait_memory(struct sock *s
- if (signal_pending(current))
- goto do_interrupted;
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-- if (sk_stream_memory_free(sk) && !vm_wait)
-- break;
-+ if (amount == 0) {
-+ if (sk_stream_memory_free(sk) && !vm_wait)
-+ break;
-+ } else
-+ ub_sock_sndqueueadd_tcp(sk, amount);
-
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- sk->sk_write_pending++;
-@@ -144,6 +148,8 @@ int sk_stream_wait_memory(struct sock *s
- sk_stream_memory_free(sk) &&
- vm_wait);
- sk->sk_write_pending--;
-+ if (amount > 0)
-+ ub_sock_sndqueuedel(sk);
-
- if (vm_wait) {
- vm_wait -= current_timeo;
-diff -upr linux-2.6.16.orig/net/ipv4/af_inet.c linux-2.6.16-026test009/net/ipv4/af_inet.c
---- linux-2.6.16.orig/net/ipv4/af_inet.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/af_inet.c 2006-04-19 15:02:12.000000000 +0400
-@@ -114,6 +114,7 @@
- #ifdef CONFIG_IP_MROUTE
- #include <linux/mroute.h>
- #endif
-+#include <ub/ub_net.h>
-
- DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-
-@@ -298,6 +299,13 @@ lookup_protocol:
- if (sk == NULL)
- goto out;
-
-+ err = -ENOBUFS;
-+ if (ub_sock_charge(sk, PF_INET, sock->type))
-+ goto out_sk_free;
-+ /* if charge was successful, sock_init_data() MUST be called to
-+ * set sk->sk_type. otherwise sk will be uncharged to wrong resource
-+ */
-+
- err = 0;
- sk->sk_no_check = answer_no_check;
- if (INET_PROTOSW_REUSE & answer_flags)
-@@ -355,6 +363,9 @@ out:
- out_rcu_unlock:
- rcu_read_unlock();
- goto out;
-+out_sk_free:
-+ sk_free(sk);
-+ return err;
- }
-
-
-@@ -369,6 +380,9 @@ int inet_release(struct socket *sock)
-
- if (sk) {
- long timeout;
-+ struct ve_struct *saved_env;
-+
-+ saved_env = set_exec_env(VE_OWNER_SK(sk));
-
- /* Applications forget to leave groups before exiting */
- ip_mc_drop_socket(sk);
-@@ -386,6 +400,8 @@ int inet_release(struct socket *sock)
- timeout = sk->sk_lingertime;
- sock->sk = NULL;
- sk->sk_prot->close(sk, timeout);
-+
-+ (void)set_exec_env(saved_env);
- }
- return 0;
- }
-@@ -1108,20 +1124,20 @@ static struct net_protocol icmp_protocol
-
- static int __init init_ipv4_mibs(void)
- {
-- net_statistics[0] = alloc_percpu(struct linux_mib);
-- net_statistics[1] = alloc_percpu(struct linux_mib);
-- ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-- ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-- icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-- icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-- tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-- tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-- udp_statistics[0] = alloc_percpu(struct udp_mib);
-- udp_statistics[1] = alloc_percpu(struct udp_mib);
-+ ve_net_statistics[0] = alloc_percpu(struct linux_mib);
-+ ve_net_statistics[1] = alloc_percpu(struct linux_mib);
-+ ve_ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-+ ve_ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-+ ve_icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-+ ve_icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-+ ve_tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-+ ve_tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-+ ve_udp_statistics[0] = alloc_percpu(struct udp_mib);
-+ ve_udp_statistics[1] = alloc_percpu(struct udp_mib);
- if (!
-- (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-- && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-- && udp_statistics[0] && udp_statistics[1]))
-+ (ve_net_statistics[0] && ve_net_statistics[1] && ve_ip_statistics[0]
-+ && ve_ip_statistics[1] && ve_tcp_statistics[0] && ve_tcp_statistics[1]
-+ && ve_udp_statistics[0] && ve_udp_statistics[1]))
- return -ENOMEM;
-
- (void) tcp_mib_init();
-diff -upr linux-2.6.16.orig/net/ipv4/arp.c linux-2.6.16-026test009/net/ipv4/arp.c
---- linux-2.6.16.orig/net/ipv4/arp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/arp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -988,7 +988,7 @@ static int arp_req_set(struct arpreq *r,
- return 0;
- }
- if (dev == NULL) {
-- ipv4_devconf.proxy_arp = 1;
-+ ve_ipv4_devconf.proxy_arp = 1;
- return 0;
- }
- if (__in_dev_get_rtnl(dev)) {
-@@ -1094,7 +1094,7 @@ static int arp_req_delete(struct arpreq
- return pneigh_delete(&arp_tbl, &ip, dev);
- if (mask == 0) {
- if (dev == NULL) {
-- ipv4_devconf.proxy_arp = 0;
-+ ve_ipv4_devconf.proxy_arp = 0;
- return 0;
- }
- if (__in_dev_get_rtnl(dev)) {
-@@ -1145,6 +1145,8 @@ int arp_ioctl(unsigned int cmd, void __u
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- case SIOCGARP:
-+ if (!ve_is_super(get_exec_env()))
-+ return -EACCES;
- err = copy_from_user(&r, arg, sizeof(struct arpreq));
- if (err)
- return -EFAULT;
-@@ -1372,8 +1374,12 @@ static int arp_seq_open(struct inode *in
- {
- struct seq_file *seq;
- int rc = -ENOMEM;
-- struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
--
-+ struct neigh_seq_state *s;
-+
-+ if (!ve_is_super(get_exec_env()))
-+ return -EPERM;
-+
-+ s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- goto out;
-
-@@ -1401,7 +1407,7 @@ static struct file_operations arp_seq_fo
-
- static int __init arp_proc_init(void)
- {
-- if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
-+ if (!proc_glob_fops_create("net/arp", S_IRUGO, &arp_seq_fops))
- return -ENOMEM;
- return 0;
- }
-diff -upr linux-2.6.16.orig/net/ipv4/devinet.c linux-2.6.16-026test009/net/ipv4/devinet.c
---- linux-2.6.16.orig/net/ipv4/devinet.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/devinet.c 2006-04-19 15:02:13.000000000 +0400
-@@ -71,7 +71,7 @@ struct ipv4_devconf ipv4_devconf = {
- .shared_media = 1,
- };
-
--static struct ipv4_devconf ipv4_devconf_dflt = {
-+struct ipv4_devconf ipv4_devconf_dflt = {
- .accept_redirects = 1,
- .send_redirects = 1,
- .secure_redirects = 1,
-@@ -79,10 +79,16 @@ static struct ipv4_devconf ipv4_devconf_
- .accept_source_route = 1,
- };
-
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ve_ipv4_devconf_dflt (*(get_exec_env()->_ipv4_devconf_dflt))
-+#else
-+#define ve_ipv4_devconf_dflt ipv4_devconf_dflt
-+#endif
-+
- static void rtmsg_ifa(int event, struct in_ifaddr *);
-
- static struct notifier_block *inetaddr_chain;
--static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy);
- #ifdef CONFIG_SYSCTL
- static void devinet_sysctl_register(struct in_device *in_dev,
-@@ -92,7 +98,7 @@ static void devinet_sysctl_unregister(st
-
- /* Locks all the inet devices. */
-
--static struct in_ifaddr *inet_alloc_ifa(void)
-+struct in_ifaddr *inet_alloc_ifa(void)
- {
- struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
-
-@@ -103,6 +109,7 @@ static struct in_ifaddr *inet_alloc_ifa(
-
- return ifa;
- }
-+EXPORT_SYMBOL_GPL(inet_alloc_ifa);
-
- static void inet_rcu_free_ifa(struct rcu_head *head)
- {
-@@ -175,6 +182,7 @@ out_kfree:
- in_dev = NULL;
- goto out;
- }
-+EXPORT_SYMBOL_GPL(inetdev_init);
-
- static void in_dev_rcu_put(struct rcu_head *head)
- {
-@@ -190,7 +198,7 @@ static void inetdev_destroy(struct in_de
- ASSERT_RTNL();
-
- dev = in_dev->dev;
-- if (dev == &loopback_dev)
-+ if (dev == &ve0_loopback)
- return;
-
- in_dev->dead = 1;
-@@ -232,7 +240,7 @@ int inet_addr_onlink(struct in_device *i
- return 0;
- }
-
--static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-+void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy)
- {
- struct in_ifaddr *promote = NULL;
-@@ -320,7 +328,7 @@ static void inet_del_ifa(struct in_devic
- }
- }
-
--static int inet_insert_ifa(struct in_ifaddr *ifa)
-+int inet_insert_ifa(struct in_ifaddr *ifa)
- {
- struct in_device *in_dev = ifa->ifa_dev;
- struct in_ifaddr *ifa1, **ifap, **last_primary;
-@@ -370,6 +378,7 @@ static int inet_insert_ifa(struct in_ifa
-
- return 0;
- }
-+EXPORT_SYMBOL_GPL(inet_insert_ifa);
-
- static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
- {
-@@ -578,7 +587,7 @@ int devinet_ioctl(unsigned int cmd, void
-
- case SIOCSIFFLAGS:
- ret = -EACCES;
-- if (!capable(CAP_NET_ADMIN))
-+ if (!capable(CAP_VE_NET_ADMIN))
- goto out;
- break;
- case SIOCSIFADDR: /* Set interface address (and family) */
-@@ -586,7 +595,7 @@ int devinet_ioctl(unsigned int cmd, void
- case SIOCSIFDSTADDR: /* Set the destination address */
- case SIOCSIFNETMASK: /* Set the netmask for the interface */
- ret = -EACCES;
-- if (!capable(CAP_NET_ADMIN))
-+ if (!capable(CAP_VE_NET_ADMIN))
- goto out;
- ret = -EINVAL;
- if (sin->sin_family != AF_INET)
-@@ -1163,10 +1172,10 @@ static struct rtnetlink_link inet_rtnetl
- void inet_forward_change(void)
- {
- struct net_device *dev;
-- int on = ipv4_devconf.forwarding;
-+ int on = ve_ipv4_devconf.forwarding;
-
-- ipv4_devconf.accept_redirects = !on;
-- ipv4_devconf_dflt.forwarding = on;
-+ ve_ipv4_devconf.accept_redirects = !on;
-+ ve_ipv4_devconf_dflt.forwarding = on;
-
- read_lock(&dev_base_lock);
- for (dev = dev_base; dev; dev = dev->next) {
-@@ -1191,9 +1200,9 @@ static int devinet_sysctl_forward(ctl_ta
- int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-
- if (write && *valp != val) {
-- if (valp == &ipv4_devconf.forwarding)
-+ if (valp == &ve_ipv4_devconf.forwarding)
- inet_forward_change();
-- else if (valp != &ipv4_devconf_dflt.forwarding)
-+ else if (valp != &ve_ipv4_devconf_dflt.forwarding)
- rt_cache_flush(0);
- }
-
-@@ -1464,30 +1473,22 @@ static struct devinet_sysctl_table {
- },
- };
-
--static void devinet_sysctl_register(struct in_device *in_dev,
-- struct ipv4_devconf *p)
-+static struct devinet_sysctl_table *__devinet_sysctl_register(char *dev_name,
-+ int ifindex, struct ipv4_devconf *p)
- {
- int i;
-- struct net_device *dev = in_dev ? in_dev->dev : NULL;
-- struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
-- char *dev_name = NULL;
-+ struct devinet_sysctl_table *t;
-
-+ t = kmalloc(sizeof(*t), GFP_KERNEL);
- if (!t)
-- return;
-+ goto out;
-+
- memcpy(t, &devinet_sysctl, sizeof(*t));
- for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
- t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
- t->devinet_vars[i].de = NULL;
- }
-
-- if (dev) {
-- dev_name = dev->name;
-- t->devinet_dev[0].ctl_name = dev->ifindex;
-- } else {
-- dev_name = "default";
-- t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-- }
--
- /*
- * Make a copy of dev_name, because '.procname' is regarded as const
- * by sysctl and we wouldn't want anyone to change it under our feet
-@@ -1495,8 +1496,9 @@ static void devinet_sysctl_register(stru
- */
- dev_name = kstrdup(dev_name, GFP_KERNEL);
- if (!dev_name)
-- goto free;
-+ goto out_free_table;
-
-+ t->devinet_dev[0].ctl_name = ifindex;
- t->devinet_dev[0].procname = dev_name;
- t->devinet_dev[0].child = t->devinet_vars;
- t->devinet_dev[0].de = NULL;
-@@ -1509,17 +1511,38 @@ static void devinet_sysctl_register(stru
-
- t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
- if (!t->sysctl_header)
-- goto free_procname;
-+ goto out_free_procname;
-
-- p->sysctl = t;
-- return;
-+ return t;
-
- /* error path */
-- free_procname:
-+out_free_procname:
- kfree(dev_name);
-- free:
-+out_free_table:
- kfree(t);
-- return;
-+out:
-+ printk(KERN_DEBUG "Can't register net/ipv4/conf sysctls.\n");
-+ return NULL;
-+}
-+
-+static void devinet_sysctl_register(struct in_device *in_dev,
-+ struct ipv4_devconf *p)
-+{
-+ struct net_device *dev;
-+ char *dev_name;
-+ int ifindex;
-+
-+ dev = in_dev ? in_dev->dev : NULL;
-+
-+ if (dev) {
-+ dev_name = dev->name;
-+ ifindex = dev->ifindex;
-+ } else {
-+ dev_name = "default";
-+ ifindex = NET_PROTO_CONF_DEFAULT;
-+ }
-+
-+ p->sysctl = __devinet_sysctl_register(dev_name, ifindex, p);
- }
-
- static void devinet_sysctl_unregister(struct ipv4_devconf *p)
-@@ -1532,7 +1555,170 @@ static void devinet_sysctl_unregister(st
- kfree(t);
- }
- }
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+static ctl_table net_sysctl_tables[] = {
-+ /* 0: net */
-+ {
-+ .ctl_name = CTL_NET,
-+ .procname = "net",
-+ .mode = 0555,
-+ .child = &net_sysctl_tables[2],
-+ },
-+ { .ctl_name = 0, },
-+ /* 2: net/ipv4 */
-+ {
-+ .ctl_name = NET_IPV4,
-+ .procname = "ipv4",
-+ .mode = 0555,
-+ .child = &net_sysctl_tables[4],
-+ },
-+ { .ctl_name = 0, },
-+ /* 4, 5: net/ipv4/[vars] */
-+ {
-+ .ctl_name = NET_IPV4_FORWARD,
-+ .procname = "ip_forward",
-+ .data = &ipv4_devconf.forwarding,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &ipv4_sysctl_forward,
-+ .strategy = &ipv4_sysctl_forward_strategy,
-+ },
-+ {
-+ .ctl_name = NET_IPV4_ROUTE,
-+ .procname = "route",
-+ .maxlen = 0,
-+ .mode = 0555,
-+ .child = &net_sysctl_tables[7],
-+ },
-+ { .ctl_name = 0 },
-+ /* 7: net/ipv4/route/flush */
-+ {
-+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
-+ .procname = "flush",
-+ .data = NULL, /* setuped below */
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &ipv4_sysctl_rtcache_flush,
-+ .strategy = &ipv4_sysctl_rtcache_flush_strategy,
-+ },
-+ { .ctl_name = 0 },
-+};
-+
-+static int ip_forward_sysctl_register(struct ve_struct *ve,
-+ struct ipv4_devconf *p)
-+{
-+ struct ctl_table_header *hdr;
-+ ctl_table *root;
-+
-+ root = clone_sysctl_template(net_sysctl_tables,
-+ sizeof(net_sysctl_tables) / sizeof(ctl_table));
-+ if (root == NULL)
-+ goto out;
-+
-+ root[4].data = &p->forwarding;
-+ root[7].data = &ipv4_flush_delay;
-+
-+ hdr = register_sysctl_table(root, 1);
-+ if (hdr == NULL)
-+ goto out_free;
-+
-+ ve->forward_header = hdr;
-+ ve->forward_table = root;
-+ return 0;
-+
-+out_free:
-+ free_sysctl_clone(root);
-+out:
-+ return -ENOMEM;
-+}
-+
-+static inline void ip_forward_sysctl_unregister(struct ve_struct *ve)
-+{
-+ unregister_sysctl_table(ve->forward_header);
-+ ve->forward_header = NULL;
-+}
-+
-+static inline void ip_forward_sysctl_free(struct ve_struct *ve)
-+{
-+ free_sysctl_clone(ve->forward_table);
-+ ve->forward_table = NULL;
-+}
-+#endif
-+#endif
-+
-+int devinet_sysctl_init(struct ve_struct *ve)
-+{
-+ int err = 0;
-+#ifdef CONFIG_SYSCTL
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ struct ipv4_devconf *conf, *conf_def;
-+
-+ err = -ENOMEM;
-+
-+ conf = kmalloc(sizeof(*conf), GFP_KERNEL);
-+ if (!conf)
-+ goto err1;
-+
-+ memcpy(conf, &ipv4_devconf, sizeof(*conf));
-+ conf->sysctl = __devinet_sysctl_register("all",
-+ NET_PROTO_CONF_ALL, conf);
-+ if (!conf->sysctl)
-+ goto err2;
-+
-+ conf_def = kmalloc(sizeof(*conf_def), GFP_KERNEL);
-+ if (!conf_def)
-+ goto err3;
-+
-+ memcpy(conf_def, &ipv4_devconf_dflt, sizeof(*conf_def));
-+ conf_def->sysctl = __devinet_sysctl_register("default",
-+ NET_PROTO_CONF_DEFAULT, conf_def);
-+ if (!conf_def->sysctl)
-+ goto err4;
-+
-+ err = ip_forward_sysctl_register(ve, conf);
-+ if (err)
-+ goto err5;
-+
-+ ve->_ipv4_devconf = conf;
-+ ve->_ipv4_devconf_dflt = conf_def;
-+ return 0;
-+
-+err5:
-+ devinet_sysctl_unregister(conf_def);
-+err4:
-+ kfree(conf_def);
-+err3:
-+ devinet_sysctl_unregister(conf);
-+err2:
-+ kfree(conf);
-+err1:
- #endif
-+#endif
-+ return err;
-+}
-+
-+void devinet_sysctl_fini(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_SYSCTL
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ ip_forward_sysctl_unregister(ve);
-+ devinet_sysctl_unregister(ve->_ipv4_devconf);
-+ devinet_sysctl_unregister(ve->_ipv4_devconf_dflt);
-+#endif
-+#endif
-+}
-+
-+void devinet_sysctl_free(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_SYSCTL
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+ ip_forward_sysctl_free(ve);
-+ kfree(ve->_ipv4_devconf);
-+ kfree(ve->_ipv4_devconf_dflt);
-+#endif
-+#endif
-+}
-
- void __init devinet_init(void)
- {
-@@ -1542,13 +1728,18 @@ void __init devinet_init(void)
- #ifdef CONFIG_SYSCTL
- devinet_sysctl.sysctl_header =
- register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
-- devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
-+ __devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
-+ &ipv4_devconf_dflt);
- #endif
- }
-
- EXPORT_SYMBOL(devinet_ioctl);
- EXPORT_SYMBOL(in_dev_finish_destroy);
- EXPORT_SYMBOL(inet_select_addr);
-+EXPORT_SYMBOL(inet_del_ifa);
- EXPORT_SYMBOL(inetdev_by_index);
-+EXPORT_SYMBOL(devinet_sysctl_init);
-+EXPORT_SYMBOL(devinet_sysctl_fini);
-+EXPORT_SYMBOL(devinet_sysctl_free);
- EXPORT_SYMBOL(register_inetaddr_notifier);
- EXPORT_SYMBOL(unregister_inetaddr_notifier);
-diff -upr linux-2.6.16.orig/net/ipv4/fib_frontend.c linux-2.6.16-026test009/net/ipv4/fib_frontend.c
---- linux-2.6.16.orig/net/ipv4/fib_frontend.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_frontend.c 2006-04-19 15:02:12.000000000 +0400
-@@ -53,14 +53,46 @@
-
- #define RT_TABLE_MIN RT_TABLE_MAIN
-
-+#undef ip_fib_local_table
-+#undef ip_fib_main_table
- struct fib_table *ip_fib_local_table;
- struct fib_table *ip_fib_main_table;
-+void prepare_fib_tables(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->_local_table = ip_fib_local_table;
-+ ip_fib_local_table = (struct fib_table *)0x12345678;
-+ get_ve0()->_main_table = ip_fib_main_table;
-+ ip_fib_main_table = (struct fib_table *)0x12345678;
-+#endif
-+}
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define ip_fib_local_table get_exec_env()->_local_table
-+#define ip_fib_main_table get_exec_env()->_main_table
-+#endif
-
- #else
-
- #define RT_TABLE_MIN 1
-
-+#undef fib_tables
- struct fib_table *fib_tables[RT_TABLE_MAX+1];
-+void prepare_fib_tables(void)
-+{
-+#ifdef CONFIG_VE
-+ int i;
-+
-+ BUG_ON(sizeof(fib_tables) !=
-+ sizeof(((struct ve_struct *)0)->_fib_tables));
-+ memcpy(get_ve0()->_fib_tables, fib_tables, sizeof(fib_tables));
-+ for (i = 0; i <= RT_TABLE_MAX; i++)
-+ fib_tables[i] = (void *)0x12366678;
-+#endif
-+}
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define fib_tables get_exec_env()->_fib_tables
-+#endif
-
- struct fib_table *__fib_new_table(int id)
- {
-@@ -250,7 +282,7 @@ int ip_rt_ioctl(unsigned int cmd, void _
- switch (cmd) {
- case SIOCADDRT: /* Add a route */
- case SIOCDELRT: /* Delete a route */
-- if (!capable(CAP_NET_ADMIN))
-+ if (!capable(CAP_VE_NET_ADMIN))
- return -EPERM;
- if (copy_from_user(&r, arg, sizeof(struct rtentry)))
- return -EFAULT;
-@@ -653,6 +685,7 @@ static struct notifier_block fib_netdev_
-
- void __init ip_fib_init(void)
- {
-+ prepare_fib_tables();
- #ifndef CONFIG_IP_MULTIPLE_TABLES
- ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
- ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
-diff -upr linux-2.6.16.orig/net/ipv4/fib_hash.c linux-2.6.16-026test009/net/ipv4/fib_hash.c
---- linux-2.6.16.orig/net/ipv4/fib_hash.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_hash.c 2006-04-19 15:02:12.000000000 +0400
-@@ -36,6 +36,7 @@
- #include <linux/skbuff.h>
- #include <linux/netlink.h>
- #include <linux/init.h>
-+#include <linux/ve.h>
-
- #include <net/ip.h>
- #include <net/protocol.h>
-@@ -73,11 +74,6 @@ struct fn_zone {
- * can be cheaper than memory lookup, so that FZ_* macros are used.
- */
-
--struct fn_hash {
-- struct fn_zone *fn_zones[33];
-- struct fn_zone *fn_zone_list;
--};
--
- static inline u32 fn_hash(u32 key, struct fn_zone *fz)
- {
- u32 h = ntohl(key)>>(32 - fz->fz_order);
-@@ -623,7 +619,7 @@ fn_hash_delete(struct fib_table *tb, str
- return -ESRCH;
- }
-
--static int fn_flush_list(struct fn_zone *fz, int idx)
-+static int fn_flush_list(struct fn_zone *fz, int idx, int destroy)
- {
- struct hlist_head *head = &fz->fz_hash[idx];
- struct hlist_node *node, *n;
-@@ -638,7 +634,9 @@ static int fn_flush_list(struct fn_zone
- list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
- struct fib_info *fi = fa->fa_info;
-
-- if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
-+ if (fi == NULL)
-+ continue;
-+ if (destroy || (fi->fib_flags&RTNH_F_DEAD)) {
- write_lock_bh(&fib_hash_lock);
- list_del(&fa->fa_list);
- if (list_empty(&f->fn_alias)) {
-@@ -660,7 +658,7 @@ static int fn_flush_list(struct fn_zone
- return found;
- }
-
--static int fn_hash_flush(struct fib_table *tb)
-+static int __fn_hash_flush(struct fib_table *tb, int destroy)
- {
- struct fn_hash *table = (struct fn_hash *) tb->tb_data;
- struct fn_zone *fz;
-@@ -670,11 +668,84 @@ static int fn_hash_flush(struct fib_tabl
- int i;
-
- for (i = fz->fz_divisor - 1; i >= 0; i--)
-- found += fn_flush_list(fz, i);
-+ found += fn_flush_list(fz, i, destroy);
- }
- return found;
- }
-
-+static int fn_hash_flush(struct fib_table *tb)
-+{
-+ return __fn_hash_flush(tb, 0);
-+}
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+void fib_hash_destroy(struct fib_table *tb)
-+{
-+ __fn_hash_flush(tb, 1);
-+ kfree(tb);
-+}
-+
-+/*
-+ * Initialization of virtualized networking subsystem.
-+ */
-+int init_ve_route(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_IP_MULTIPLE_TABLES
-+ if (fib_rules_create())
-+ return -ENOMEM;
-+ ve->_fib_tables[RT_TABLE_LOCAL] = fib_hash_init(RT_TABLE_LOCAL);
-+ if (!ve->_fib_tables[RT_TABLE_LOCAL])
-+ goto out_destroy;
-+ ve->_fib_tables[RT_TABLE_MAIN] = fib_hash_init(RT_TABLE_MAIN);
-+ if (!ve->_fib_tables[RT_TABLE_MAIN])
-+ goto out_destroy_local;
-+
-+ return 0;
-+
-+out_destroy_local:
-+ fib_hash_destroy(ve->_fib_tables[RT_TABLE_LOCAL]);
-+out_destroy:
-+ fib_rules_destroy();
-+ ve->_local_rule = NULL;
-+ return -ENOMEM;
-+#else
-+ ve->_local_table = fib_hash_init(RT_TABLE_LOCAL);
-+ if (!ve->_local_table)
-+ return -ENOMEM;
-+ ve->_main_table = fib_hash_init(RT_TABLE_MAIN);
-+ if (!ve->_main_table) {
-+ fib_hash_destroy(ve->_local_table);
-+ return -ENOMEM;
-+ }
-+ return 0;
-+#endif
-+}
-+
-+void fini_ve_route(struct ve_struct *ve)
-+{
-+#ifdef CONFIG_IP_MULTIPLE_TABLES
-+ int i;
-+ for (i=0; i<RT_TABLE_MAX+1; i++)
-+ {
-+ if (!ve->_fib_tables[i])
-+ continue;
-+ fib_hash_destroy(ve->_fib_tables[i]);
-+ }
-+ fib_rules_destroy();
-+ ve->_local_rule = NULL;
-+#else
-+ fib_hash_destroy(ve->_local_table);
-+ fib_hash_destroy(ve->_main_table);
-+#endif
-+ fib_hash_free(ve->_fib_info_hash, ve->_fib_hash_size);
-+ fib_hash_free(ve->_fib_info_laddrhash, ve->_fib_hash_size);
-+ ve->_fib_info_hash = ve->_fib_info_laddrhash = NULL;
-+}
-+
-+EXPORT_SYMBOL(init_ve_route);
-+EXPORT_SYMBOL(fini_ve_route);
-+#endif
-+
-
- static inline int
- fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
-@@ -766,7 +837,7 @@ static int fn_hash_dump(struct fib_table
- return skb->len;
- }
-
--#ifdef CONFIG_IP_MULTIPLE_TABLES
-+#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
- struct fib_table * fib_hash_init(int id)
- #else
- struct fib_table * __init fib_hash_init(int id)
-@@ -1076,13 +1147,13 @@ static struct file_operations fib_seq_fo
-
- int __init fib_proc_init(void)
- {
-- if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
-+ if (!proc_glob_fops_create("net/route", S_IRUGO, &fib_seq_fops))
- return -ENOMEM;
- return 0;
- }
-
- void __init fib_proc_exit(void)
- {
-- proc_net_remove("route");
-+ remove_proc_glob_entry("net/route", NULL);
- }
- #endif /* CONFIG_PROC_FS */
-diff -upr linux-2.6.16.orig/net/ipv4/fib_lookup.h linux-2.6.16-026test009/net/ipv4/fib_lookup.h
---- linux-2.6.16.orig/net/ipv4/fib_lookup.h 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_lookup.h 2006-04-19 15:02:12.000000000 +0400
-@@ -41,5 +41,6 @@ extern struct fib_alias *fib_find_alias(
- extern int fib_detect_death(struct fib_info *fi, int order,
- struct fib_info **last_resort,
- int *last_idx, int *dflt);
-+void fib_hash_free(struct hlist_head *hash, int bytes);
-
- #endif /* _FIB_LOOKUP_H */
-diff -upr linux-2.6.16.orig/net/ipv4/fib_rules.c linux-2.6.16-026test009/net/ipv4/fib_rules.c
---- linux-2.6.16.orig/net/ipv4/fib_rules.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_rules.c 2006-04-19 15:02:12.000000000 +0400
-@@ -39,6 +39,7 @@
- #include <linux/proc_fs.h>
- #include <linux/skbuff.h>
- #include <linux/netlink.h>
-+#include <linux/rtnetlink.h>
- #include <linux/init.h>
-
- #include <net/ip.h>
-@@ -99,9 +100,87 @@ static struct fib_rule local_rule = {
- .r_action = RTN_UNICAST,
- };
-
--static struct fib_rule *fib_rules = &local_rule;
- static DEFINE_RWLOCK(fib_rules_lock);
-
-+void __init prepare_fib_rules(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->_local_rule = &local_rule;
-+ get_ve0()->_fib_rules = &local_rule;
-+#endif
-+}
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define local_rule (*(get_exec_env()->_local_rule))
-+#define fib_rules (get_exec_env()->_fib_rules)
-+#else
-+static struct fib_rule *fib_rules = &local_rule;
-+#endif
-+
-+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
-+int fib_rules_create()
-+{
-+ struct fib_rule *default_rule, *main_rule, *loc_rule;
-+
-+ default_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
-+ if (default_rule == NULL)
-+ goto out_def;
-+ memset(default_rule, 0, sizeof(struct fib_rule));
-+ atomic_set(&default_rule->r_clntref, 1);
-+ default_rule->r_preference = 0x7FFF;
-+ default_rule->r_table = RT_TABLE_DEFAULT;
-+ default_rule->r_action = RTN_UNICAST;
-+
-+ main_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
-+ if (main_rule == NULL)
-+ goto out_main;
-+ memset(main_rule, 0, sizeof(struct fib_rule));
-+ atomic_set(&main_rule->r_clntref, 1);
-+ main_rule->r_preference = 0x7FFE;
-+ main_rule->r_table = RT_TABLE_MAIN;
-+ main_rule->r_action = RTN_UNICAST;
-+ main_rule->r_next = default_rule;
-+
-+ loc_rule = kmalloc(sizeof(struct fib_rule), GFP_KERNEL);
-+ if (loc_rule == NULL)
-+ goto out_loc;
-+ memset(loc_rule, 0, sizeof(struct fib_rule));
-+ atomic_set(&loc_rule->r_clntref, 1);
-+ loc_rule->r_preference = 0;
-+ loc_rule->r_table = RT_TABLE_LOCAL;
-+ loc_rule->r_action = RTN_UNICAST;
-+ loc_rule->r_next = main_rule;
-+
-+ get_exec_env()->_local_rule = loc_rule;
-+ get_exec_env()->_fib_rules = loc_rule;
-+
-+ return 0;
-+
-+out_loc:
-+ kfree(main_rule);
-+out_main:
-+ kfree(default_rule);
-+out_def:
-+ return -1;
-+}
-+
-+void fib_rules_destroy()
-+{
-+ struct fib_rule *r;
-+
-+ rtnl_lock();
-+ write_lock_bh(&fib_rules_lock);
-+ while(fib_rules != NULL) {
-+ r = fib_rules;
-+ fib_rules = fib_rules->r_next;
-+ r->r_dead = 1;
-+ fib_rule_put(r);
-+ }
-+ write_unlock_bh(&fib_rules_lock);
-+ rtnl_unlock();
-+}
-+#endif
-+
- int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
- {
- struct rtattr **rta = arg;
-@@ -435,5 +514,6 @@ int inet_dump_rules(struct sk_buff *skb,
-
- void __init fib_rules_init(void)
- {
-+ prepare_fib_rules();
- register_netdevice_notifier(&fib_rules_notifier);
- }
-diff -upr linux-2.6.16.orig/net/ipv4/fib_semantics.c linux-2.6.16-026test009/net/ipv4/fib_semantics.c
---- linux-2.6.16.orig/net/ipv4/fib_semantics.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_semantics.c 2006-04-19 15:02:12.000000000 +0400
-@@ -33,6 +33,7 @@
- #include <linux/netdevice.h>
- #include <linux/if_arp.h>
- #include <linux/proc_fs.h>
-+#include <linux/ve.h>
- #include <linux/skbuff.h>
- #include <linux/netlink.h>
- #include <linux/init.h>
-@@ -56,6 +57,24 @@ static struct hlist_head *fib_info_laddr
- static unsigned int fib_hash_size;
- static unsigned int fib_info_cnt;
-
-+void prepare_fib_info(void)
-+{
-+#ifdef CONFIG_VE
-+ get_ve0()->_fib_info_hash = fib_info_hash;
-+ get_ve0()->_fib_info_laddrhash = fib_info_laddrhash;
-+ get_ve0()->_fib_hash_size = fib_hash_size;
-+ get_ve0()->_fib_info_cnt = fib_info_cnt;
-+#endif
-+}
-+
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+#define fib_info_hash (get_exec_env()->_fib_info_hash)
-+#define fib_info_laddrhash (get_exec_env()->_fib_info_laddrhash)
-+#define fib_hash_size (get_exec_env()->_fib_hash_size)
-+#define fib_info_cnt (get_exec_env()->_fib_info_cnt)
-+#endif
-+
-+
- #define DEVINDEX_HASHBITS 8
- #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
- static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-@@ -235,13 +254,15 @@ static struct fib_info *fib_find_info(co
- return NULL;
- }
-
--static inline unsigned int fib_devindex_hashfn(unsigned int val)
-+static inline unsigned int fib_devindex_hashfn(unsigned int val,
-+ envid_t veid)
- {
- unsigned int mask = DEVINDEX_HASHSIZE - 1;
-
- return (val ^
- (val >> DEVINDEX_HASHBITS) ^
-- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
-+ (val >> (DEVINDEX_HASHBITS * 2)) ^
-+ (veid ^ (veid >> 16))) & mask;
- }
-
- /* Check, that the gateway is already configured.
-@@ -257,7 +278,7 @@ int ip_fib_check_default(u32 gw, struct
-
- read_lock(&fib_info_lock);
-
-- hash = fib_devindex_hashfn(dev->ifindex);
-+ hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
- head = &fib_info_devhash[hash];
- hlist_for_each_entry(nh, node, head, nh_hash) {
- if (nh->nh_dev == dev &&
-@@ -580,7 +601,7 @@ static struct hlist_head *fib_hash_alloc
- __get_free_pages(GFP_KERNEL, get_order(bytes));
- }
-
--static void fib_hash_free(struct hlist_head *hash, int bytes)
-+void fib_hash_free(struct hlist_head *hash, int bytes)
- {
- if (!hash)
- return;
-@@ -837,7 +858,8 @@ link_it:
-
- if (!nh->nh_dev)
- continue;
-- hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
-+ hash = fib_devindex_hashfn(nh->nh_dev->ifindex,
-+ VEID(nh->nh_dev->owner_env));
- head = &fib_info_devhash[hash];
- hlist_add_head(&nh->nh_hash, head);
- } endfor_nexthops(fi)
-@@ -1184,7 +1206,8 @@ int fib_sync_down(u32 local, struct net_
-
- if (dev) {
- struct fib_info *prev_fi = NULL;
-- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-+ unsigned int hash = fib_devindex_hashfn(dev->ifindex,
-+ VEID(dev->owner_env));
- struct hlist_head *head = &fib_info_devhash[hash];
- struct hlist_node *node;
- struct fib_nh *nh;
-@@ -1249,7 +1272,7 @@ int fib_sync_up(struct net_device *dev)
- return 0;
-
- prev_fi = NULL;
-- hash = fib_devindex_hashfn(dev->ifindex);
-+ hash = fib_devindex_hashfn(dev->ifindex, VEID(dev->owner_env));
- head = &fib_info_devhash[hash];
- ret = 0;
-
-diff -upr linux-2.6.16.orig/net/ipv4/fib_trie.c linux-2.6.16-026test009/net/ipv4/fib_trie.c
---- linux-2.6.16.orig/net/ipv4/fib_trie.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/fib_trie.c 2006-04-19 15:02:11.000000000 +0400
-@@ -314,11 +314,6 @@ static void __leaf_free_rcu(struct rcu_h
- kfree(container_of(head, struct leaf, rcu));
- }
-
--static inline void free_leaf(struct leaf *leaf)
--{
-- call_rcu(&leaf->rcu, __leaf_free_rcu);
--}
--
- static void __leaf_info_free_rcu(struct rcu_head *head)
- {
- kfree(container_of(head, struct leaf_info, rcu));
-@@ -357,7 +352,12 @@ static void __tnode_free_rcu(struct rcu_
-
- static inline void tnode_free(struct tnode *tn)
- {
-- call_rcu(&tn->rcu, __tnode_free_rcu);
-+ if(IS_LEAF(tn)) {
-+ struct leaf *l = (struct leaf *) tn;
-+ call_rcu_bh(&l->rcu, __leaf_free_rcu);
-+ }
-+ else
-+ call_rcu(&tn->rcu, __tnode_free_rcu);
- }
-
- static struct leaf *leaf_new(void)
-diff -upr linux-2.6.16.orig/net/ipv4/inet_connection_sock.c linux-2.6.16-026test009/net/ipv4/inet_connection_sock.c
---- linux-2.6.16.orig/net/ipv4/inet_connection_sock.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/inet_connection_sock.c 2006-04-19 15:02:12.000000000 +0400
-@@ -25,6 +25,9 @@
- #include <net/tcp_states.h>
- #include <net/xfrm.h>
-
-+#include <ub/ub_net.h>
-+#include <ub/ub_orphan.h>
-+
- #ifdef INET_CSK_DEBUG
- const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
- EXPORT_SYMBOL(inet_csk_timer_bug_msg);
-@@ -48,6 +51,7 @@ int inet_csk_bind_conflict(const struct
- sk_for_each_bound(sk2, node, &tb->owners) {
- if (sk != sk2 &&
- !inet_v6_ipv6only(sk2) &&
-+ !ve_accessible_strict(VE_OWNER_SK(sk), VE_OWNER_SK(sk2)) &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-@@ -77,7 +81,9 @@ int inet_csk_get_port(struct inet_hashin
- struct hlist_node *node;
- struct inet_bind_bucket *tb;
- int ret;
-+ struct ve_struct *env;
-
-+ env = VE_OWNER_SK(sk);
- local_bh_disable();
- if (!snum) {
- int low = sysctl_local_port_range[0];
-@@ -86,11 +92,15 @@ int inet_csk_get_port(struct inet_hashin
- int rover = net_random() % (high - low) + low;
-
- do {
-- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
-+ head = &hashinfo->bhash[inet_bhashfn(rover,
-+ hashinfo->bhash_size, VEID(env))];
- spin_lock(&head->lock);
-- inet_bind_bucket_for_each(tb, node, &head->chain)
-+ inet_bind_bucket_for_each(tb, node, &head->chain) {
-+ if (!ve_accessible_strict(VE_OWNER_TB(tb),env))
-+ continue;
- if (tb->port == rover)
- goto next;
-+ }
- break;
- next:
- spin_unlock(&head->lock);
-@@ -113,11 +123,15 @@ int inet_csk_get_port(struct inet_hashin
- */
- snum = rover;
- } else {
-- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
-+ head = &hashinfo->bhash[inet_bhashfn(snum,
-+ hashinfo->bhash_size, VEID(env))];
- spin_lock(&head->lock);
-- inet_bind_bucket_for_each(tb, node, &head->chain)
-+ inet_bind_bucket_for_each(tb, node, &head->chain) {
-+ if (!ve_accessible_strict(VE_OWNER_TB(tb), env))
-+ continue;
- if (tb->port == snum)
- goto tb_found;
-+ }
- }
- tb = NULL;
- goto tb_not_found;
-@@ -136,7 +150,7 @@ tb_found:
- }
- tb_not_found:
- ret = 1;
-- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
-+ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum, env)) == NULL)
- goto fail_unlock;
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-@@ -541,7 +555,7 @@ void inet_csk_destroy_sock(struct sock *
-
- sk_refcnt_debug_release(sk);
-
-- atomic_dec(sk->sk_prot->orphan_count);
-+ ub_dec_orphan_count(sk);
- sock_put(sk);
- }
-
-@@ -621,7 +635,7 @@ void inet_csk_listen_stop(struct sock *s
-
- sock_orphan(child);
-
-- atomic_inc(sk->sk_prot->orphan_count);
-+ ub_inc_orphan_count(sk);
-
- inet_csk_destroy_sock(child);
-
-diff -upr linux-2.6.16.orig/net/ipv4/inet_diag.c linux-2.6.16-026test009/net/ipv4/inet_diag.c
---- linux-2.6.16.orig/net/ipv4/inet_diag.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/inet_diag.c 2006-04-19 15:02:12.000000000 +0400
-@@ -673,7 +673,9 @@ static int inet_diag_dump(struct sk_buff
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- const struct inet_diag_handler *handler;
- struct inet_hashinfo *hashinfo;
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- handler = inet_diag_table[cb->nlh->nlmsg_type];
- BUG_ON(handler == NULL);
- hashinfo = handler->idiag_hashinfo;
-@@ -694,6 +696,8 @@ static int inet_diag_dump(struct sk_buff
- sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
- struct inet_sock *inet = inet_sk(sk);
-
-+ if (!ve_accessible(VE_OWNER_SK(sk), ve))
-+ continue;
- if (num < s_num) {
- num++;
- continue;
-@@ -754,6 +758,8 @@ skip_listen_ht:
- sk_for_each(sk, node, &head->chain) {
- struct inet_sock *inet = inet_sk(sk);
-
-+ if (!ve_accessible(VE_OWNER_SK(sk), ve))
-+ continue;
- if (num < s_num)
- goto next_normal;
- if (!(r->idiag_states & (1 << sk->sk_state)))
-@@ -778,6 +784,8 @@ next_normal:
- inet_twsk_for_each(tw, node,
- &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
-
-+ if (!ve_accessible_veid(inet_twsk(sk)->tw_owner_env, VEID(ve)))
-+ continue;
- if (num < s_num)
- goto next_dying;
- if (r->id.idiag_sport != tw->tw_sport &&
-diff -upr linux-2.6.16.orig/net/ipv4/inet_hashtables.c linux-2.6.16-026test009/net/ipv4/inet_hashtables.c
---- linux-2.6.16.orig/net/ipv4/inet_hashtables.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/inet_hashtables.c 2006-04-19 15:02:12.000000000 +0400
-@@ -30,7 +30,8 @@
- */
- struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep,
- struct inet_bind_hashbucket *head,
-- const unsigned short snum)
-+ const unsigned short snum,
-+ struct ve_struct *ve)
- {
- struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC);
-
-@@ -38,6 +39,7 @@ struct inet_bind_bucket *inet_bind_bucke
- tb->port = snum;
- tb->fastreuse = 0;
- INIT_HLIST_HEAD(&tb->owners);
-+ SET_VE_OWNER_TB(tb, ve);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-@@ -71,10 +73,13 @@ EXPORT_SYMBOL(inet_bind_hash);
- */
- static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
- {
-- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
-- struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
-+ int bhash;
-+ struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
-
-+ bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size,
-+ VEID(VE_OWNER_SK(sk)));
-+ head = &hashinfo->bhash[bhash];
- spin_lock(&head->lock);
- tb = inet_csk(sk)->icsk_bind_hash;
- __sk_del_bind_node(sk);
-@@ -130,7 +135,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
- * wildcarded during the search since they can never be otherwise.
- */
- struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr,
-- const unsigned short hnum, const int dif)
-+ const unsigned short hnum, const int dif,
-+ struct ve_struct *env)
- {
- struct sock *result = NULL, *sk;
- const struct hlist_node *node;
-@@ -139,6 +145,8 @@ struct sock *__inet_lookup_listener(cons
- sk_for_each(sk, node, head) {
- const struct inet_sock *inet = inet_sk(sk);
-
-+ if (!ve_accessible_strict(VE_OWNER_SK(sk), env))
-+ continue;
- if (inet->num == hnum && !ipv6_only_sock(sk)) {
- const __u32 rcv_saddr = inet->rcv_saddr;
- int score = sk->sk_family == PF_INET ? 1 : 0;
-@@ -169,7 +177,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener
- /* called with local bh disabled */
- static int __inet_check_established(struct inet_timewait_death_row *death_row,
- struct sock *sk, __u16 lport,
-- struct inet_timewait_sock **twp)
-+ struct inet_timewait_sock **twp,
-+ struct ve_struct *ve)
- {
- struct inet_hashinfo *hinfo = death_row->hashinfo;
- struct inet_sock *inet = inet_sk(sk);
-@@ -178,12 +187,15 @@ static int __inet_check_established(stru
- int dif = sk->sk_bound_dev_if;
- INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-- struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
-+ unsigned int hash;
-+ struct inet_ehash_bucket *head;
- struct sock *sk2;
- const struct hlist_node *node;
- struct inet_timewait_sock *tw;
-
-+ hash = inet_ehashfn(daddr, lport, saddr, inet->dport, VEID(ve));
-+ head = inet_ehash_bucket(hinfo, hash);
-+
- prefetch(head->chain.first);
- write_lock(&head->lock);
-
-@@ -191,7 +203,8 @@ static int __inet_check_established(stru
- sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
- tw = inet_twsk(sk2);
-
-- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-+ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr,
-+ ports, dif, ve)) {
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
-@@ -202,7 +215,8 @@ static int __inet_check_established(stru
-
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
-- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-+ if (INET_MATCH(sk2, hash, acookie, saddr, daddr,
-+ ports, dif, ve))
- goto not_unique;
- }
-
-@@ -253,7 +267,9 @@ int inet_hash_connect(struct inet_timewa
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
-+ struct ve_struct *ve;
-
-+ ve = VE_OWNER_SK(sk);
- if (!snum) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
-@@ -268,7 +284,8 @@ int inet_hash_connect(struct inet_timewa
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
-- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(port,
-+ hinfo->bhash_size, VEID(ve))];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
-@@ -282,13 +299,14 @@ int inet_hash_connect(struct inet_timewa
- goto next_port;
- if (!__inet_check_established(death_row,
- sk, port,
-- &tw))
-+ &tw, ve))
- goto ok;
- goto next_port;
- }
- }
-
-- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
-+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-+ head, port, ve);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
-@@ -323,7 +341,7 @@ ok:
- goto out;
- }
-
-- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-@@ -333,7 +351,7 @@ ok:
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
-- ret = __inet_check_established(death_row, sk, snum, NULL);
-+ ret = __inet_check_established(death_row, sk, snum, NULL, ve);
- out:
- local_bh_enable();
- return ret;
-diff -upr linux-2.6.16.orig/net/ipv4/inet_timewait_sock.c linux-2.6.16-026test009/net/ipv4/inet_timewait_sock.c
---- linux-2.6.16.orig/net/ipv4/inet_timewait_sock.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/inet_timewait_sock.c 2006-04-19 15:02:12.000000000 +0400
-@@ -32,7 +32,8 @@ void __inet_twsk_kill(struct inet_timewa
- write_unlock(&ehead->lock);
-
- /* Disassociate with bind bucket. */
-- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
-+ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num,
-+ hashinfo->bhash_size, tw->tw_owner_env)];
- spin_lock(&bhead->lock);
- tb = tw->tw_tb;
- __hlist_del(&tw->tw_bind_node);
-@@ -66,7 +67,8 @@ void __inet_twsk_hashdance(struct inet_t
- Note, that any socket with inet->num != 0 MUST be bound in
- binding cache, even if it is closed.
- */
-- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
-+ bhead = &hashinfo->bhash[inet_bhashfn(inet->num,
-+ hashinfo->bhash_size, tw->tw_owner_env)];
- spin_lock(&bhead->lock);
- tw->tw_tb = icsk->icsk_bind_hash;
- BUG_TRAP(icsk->icsk_bind_hash);
-@@ -90,9 +92,14 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance)
-
- struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
- {
-- struct inet_timewait_sock *tw =
-- kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
-- SLAB_ATOMIC);
-+ struct user_beancounter *ub;
-+ struct inet_timewait_sock *tw;
-+
-+ ub = set_exec_ub(sock_bc(sk)->ub);
-+ tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
-+ SLAB_ATOMIC);
-+ (void)set_exec_ub(ub);
-+
- if (tw != NULL) {
- const struct inet_sock *inet = inet_sk(sk);
-
-diff -upr linux-2.6.16.orig/net/ipv4/ip_forward.c linux-2.6.16-026test009/net/ipv4/ip_forward.c
---- linux-2.6.16.orig/net/ipv4/ip_forward.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ip_forward.c 2006-04-19 15:02:12.000000000 +0400
-@@ -87,6 +87,24 @@ int ip_forward(struct sk_buff *skb)
- if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
- goto sr_failed;
-
-+ /*
-+ * We try to optimize forwarding of VE packets:
-+ * do not decrement TTL (and so save skb_cow)
-+ * during forwarding of outgoing pkts from VE.
-+ * For incoming pkts we still do ttl decr,
-+ * since such skb is not cloned and does not require
-+ * actual cow. So, there is at least one place
-+ * in pkts path with mandatory ttl decr, that is
-+ * sufficient to prevent routing loops.
-+ */
-+ iph = skb->nh.iph;
-+ if (
-+#ifdef CONFIG_IP_ROUTE_NAT
-+ (rt->rt_flags & RTCF_NAT) == 0 && /* no NAT mangling expected */
-+#endif /* and */
-+ (skb->dev->features & NETIF_F_VENET)) /* src is VENET device */
-+ goto no_ttl_decr;
-+
- /* We are about to mangle packet. Copy it! */
- if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
- goto drop;
-@@ -95,6 +113,8 @@ int ip_forward(struct sk_buff *skb)
- /* Decrease ttl after skb cow done */
- ip_decrease_ttl(iph);
-
-+no_ttl_decr:
-+
- /*
- * We now generate an ICMP HOST REDIRECT giving the route
- * we calculated.
-diff -upr linux-2.6.16.orig/net/ipv4/ip_fragment.c linux-2.6.16-026test009/net/ipv4/ip_fragment.c
---- linux-2.6.16.orig/net/ipv4/ip_fragment.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ip_fragment.c 2006-04-19 15:02:12.000000000 +0400
-@@ -44,6 +44,7 @@
- #include <linux/udp.h>
- #include <linux/inet.h>
- #include <linux/netfilter_ipv4.h>
-+#include <linux/ve_owner.h>
-
- /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
- * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
-@@ -97,8 +98,12 @@ struct ipq {
- int iif;
- unsigned int rid;
- struct inet_peer *peer;
-+ struct ve_struct *owner_env;
- };
-
-+DCL_VE_OWNER_PROTO(IPQ, struct ipq, owner_env)
-+DCL_VE_OWNER(IPQ, struct ipq, owner_env)
-+
- /* Hash table. */
-
- #define IPQ_HASHSZ 64
-@@ -182,7 +187,8 @@ static __inline__ void frag_free_queue(s
-
- static __inline__ struct ipq *frag_alloc_queue(void)
- {
-- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
-+ struct ipq *qp = kmalloc(sizeof(struct ipq) + sizeof(void *),
-+ GFP_ATOMIC);
-
- if(!qp)
- return NULL;
-@@ -278,6 +284,9 @@ static void ip_evictor(void)
- static void ip_expire(unsigned long arg)
- {
- struct ipq *qp = (struct ipq *) arg;
-+ struct ve_struct *envid;
-+
-+ envid = set_exec_env(VE_OWNER_IPQ(qp));
-
- spin_lock(&qp->lock);
-
-@@ -300,6 +309,8 @@ static void ip_expire(unsigned long arg)
- out:
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
-+
-+ (void)set_exec_env(envid);
- }
-
- /* Creation primitives. */
-@@ -321,7 +332,8 @@ static struct ipq *ip_frag_intern(unsign
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
- qp->protocol == qp_in->protocol &&
-- qp->user == qp_in->user) {
-+ qp->user == qp_in->user &&
-+ qp->owner_env == get_exec_env()) {
- atomic_inc(&qp->refcnt);
- write_unlock(&ipfrag_lock);
- qp_in->last_in |= COMPLETE;
-@@ -371,6 +383,8 @@ static struct ipq *ip_frag_create(unsign
- spin_lock_init(&qp->lock);
- atomic_set(&qp->refcnt, 1);
-
-+ SET_VE_OWNER_IPQ(qp, get_exec_env());
-+
- return ip_frag_intern(hash, qp);
-
- out_nomem:
-@@ -397,7 +411,8 @@ static inline struct ipq *ip_find(struct
- qp->saddr == saddr &&
- qp->daddr == daddr &&
- qp->protocol == protocol &&
-- qp->user == user) {
-+ qp->user == user &&
-+ qp->owner_env == get_exec_env()) {
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
- return qp;
-@@ -719,6 +734,9 @@ struct sk_buff *ip_defrag(struct sk_buff
- qp->meat == qp->len)
- ret = ip_frag_reasm(qp, dev);
-
-+ if (ret)
-+ SET_VE_OWNER_SKB(ret, VE_OWNER_SKB(skb));
-+
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
- return ret;
-@@ -729,6 +747,51 @@ struct sk_buff *ip_defrag(struct sk_buff
- return NULL;
- }
-
-+#ifdef CONFIG_VE
-+/* XXX */
-+void ip_fragment_cleanup(struct ve_struct *envid)
-+{
-+ int i, progress;
-+
-+ /* All operations with fragment queues are performed from NET_RX/TX
-+ * soft interrupts or from timer context. --Den */
-+ local_bh_disable();
-+ do {
-+ progress = 0;
-+ for (i = 0; i < IPQ_HASHSZ; i++) {
-+ struct ipq *qp;
-+ struct hlist_node *p, *n;
-+
-+ if (hlist_empty(&ipq_hash[i]))
-+ continue;
-+inner_restart:
-+ read_lock(&ipfrag_lock);
-+ hlist_for_each_entry_safe(qp, p, n,
-+ &ipq_hash[i], list) {
-+ if (!ve_accessible_strict(
-+ VE_OWNER_IPQ(qp),
-+ envid))
-+ continue;
-+ atomic_inc(&qp->refcnt);
-+ read_unlock(&ipfrag_lock);
-+
-+ spin_lock(&qp->lock);
-+ if (!(qp->last_in&COMPLETE))
-+ ipq_kill(qp);
-+ spin_unlock(&qp->lock);
-+
-+ ipq_put(qp, NULL);
-+ progress = 1;
-+ goto inner_restart;
-+ }
-+ read_unlock(&ipfrag_lock);
-+ }
-+ } while(progress);
-+ local_bh_enable();
-+}
-+EXPORT_SYMBOL(ip_fragment_cleanup);
-+#endif
-+
- void ipfrag_init(void)
- {
- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-diff -upr linux-2.6.16.orig/net/ipv4/ip_output.c linux-2.6.16-026test009/net/ipv4/ip_output.c
---- linux-2.6.16.orig/net/ipv4/ip_output.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ip_output.c 2006-04-19 15:02:11.000000000 +0400
-@@ -86,8 +86,6 @@
-
- int sysctl_ip_default_ttl = IPDEFTTL;
-
--static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
--
- /* Generate a checksum for an outgoing IP datagram. */
- __inline__ void ip_send_check(struct iphdr *iph)
- {
-@@ -421,7 +419,7 @@ static void ip_copy_metadata(struct sk_b
- * single device frame, and queue such a frame for sending.
- */
-
--static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
-+int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
- {
- struct iphdr *iph;
- int raw = 0;
-@@ -673,6 +671,8 @@ fail:
- return err;
- }
-
-+EXPORT_SYMBOL(ip_fragment);
-+
- int
- ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
- {
-@@ -1249,11 +1249,7 @@ int ip_push_pending_frames(struct sock *
- iph->tos = inet->tos;
- iph->tot_len = htons(skb->len);
- iph->frag_off = df;
-- if (!df) {
-- __ip_select_ident(iph, &rt->u.dst, 0);
-- } else {
-- iph->id = htons(inet->id++);
-- }
-+ ip_select_ident(iph, &rt->u.dst, sk);
- iph->ttl = ttl;
- iph->protocol = sk->sk_protocol;
- iph->saddr = rt->rt_src;
-diff -upr linux-2.6.16.orig/net/ipv4/ipmr.c linux-2.6.16-026test009/net/ipv4/ipmr.c
---- linux-2.6.16.orig/net/ipv4/ipmr.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ipmr.c 2006-04-19 15:02:12.000000000 +0400
-@@ -837,7 +837,7 @@ static void mrtsock_destruct(struct sock
- {
- rtnl_lock();
- if (sk == mroute_socket) {
-- ipv4_devconf.mc_forwarding--;
-+ ve_ipv4_devconf.mc_forwarding--;
-
- write_lock_bh(&mrt_lock);
- mroute_socket=NULL;
-@@ -888,7 +888,7 @@ int ip_mroute_setsockopt(struct sock *sk
- mroute_socket=sk;
- write_unlock_bh(&mrt_lock);
-
-- ipv4_devconf.mc_forwarding++;
-+ ve_ipv4_devconf.mc_forwarding++;
- }
- rtnl_unlock();
- return ret;
-diff -upr linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.16-026test009/net/ipv4/ipvs/ip_vs_conn.c
---- linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_conn.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ipvs/ip_vs_conn.c 2006-04-19 15:02:11.000000000 +0400
-@@ -902,7 +902,8 @@ int ip_vs_conn_init(void)
- /* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
- sizeof(struct ip_vs_conn), 0,
-- SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN | SLAB_UBC,
-+ NULL, NULL);
- if (!ip_vs_conn_cachep) {
- vfree(ip_vs_conn_tab);
- return -ENOMEM;
-diff -upr linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_core.c linux-2.6.16-026test009/net/ipv4/ipvs/ip_vs_core.c
---- linux-2.6.16.orig/net/ipv4/ipvs/ip_vs_core.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/ipvs/ip_vs_core.c 2006-04-19 15:02:12.000000000 +0400
-@@ -952,6 +952,10 @@ ip_vs_in(unsigned int hooknum, struct sk
- * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
- * ... don't know why 1st test DOES NOT include 2nd (?)
- */
-+ /*
-+ * VZ: the question above is right.
-+ * The second test is superfluous.
-+ */
- if (unlikely(skb->pkt_type != PACKET_HOST
- || skb->dev == &loopback_dev || skb->sk)) {
- IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_core.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_core.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_core.c 2006-04-19 15:02:12.000000000 +0400
-@@ -49,6 +49,7 @@
- #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
- #include <linux/netfilter_ipv4/ip_conntrack_core.h>
- #include <linux/netfilter_ipv4/listhelp.h>
-+#include <ub/ub_mem.h>
-
- #define IP_CONNTRACK_VERSION "2.4"
-
-@@ -60,22 +61,41 @@
-
- DEFINE_RWLOCK(ip_conntrack_lock);
-
--/* ip_conntrack_standalone needs this */
--atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_ip_conntrack_helpers \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_helpers)
-+#define ve_ip_conntrack_max \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_max)
-+#define ve_ip_conntrack_count \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_count)
-+#define ve_ip_conntrack_unconfirmed \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_unconfirmed)
-+#else
-
- void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
- LIST_HEAD(ip_conntrack_expect_list);
- struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
- static LIST_HEAD(helpers);
-+struct list_head *ip_conntrack_hash;
-+static LIST_HEAD(unconfirmed);
-+#define ve_ip_conntrack_count ip_conntrack_count
-+#define ve_ip_conntrack_helpers helpers
-+#define ve_ip_conntrack_max ip_conntrack_max
-+#define ve_ip_conntrack_unconfirmed unconfirmed
-+#endif
-+
-+/* ip_conntrack_standalone needs this */
-+atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-+
- unsigned int ip_conntrack_htable_size = 0;
- int ip_conntrack_max;
--struct list_head *ip_conntrack_hash;
- static kmem_cache_t *ip_conntrack_cachep __read_mostly;
- static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
- struct ip_conntrack ip_conntrack_untracked;
- unsigned int ip_ct_log_invalid;
--static LIST_HEAD(unconfirmed);
-+#ifndef CONFIG_VE
- static int ip_conntrack_vmalloc;
-+#endif
-
- static unsigned int ip_conntrack_next_id = 1;
- static unsigned int ip_conntrack_expect_next_id = 1;
-@@ -105,6 +125,9 @@ void ip_ct_deliver_cached_events(const s
- {
- struct ip_conntrack_ecache *ecache;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return;
-+
- local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- if (ecache->ct == ct)
-@@ -133,6 +156,9 @@ static void ip_ct_event_cache_flush(void
- struct ip_conntrack_ecache *ecache;
- int cpu;
-
-+ if (!ve_is_super(get_exec_env()))
-+ return;
-+
- for_each_cpu(cpu) {
- ecache = &per_cpu(ip_conntrack_ecache, cpu);
- if (ecache->ct)
-@@ -226,7 +252,7 @@ __ip_conntrack_expect_find(const struct
- {
- struct ip_conntrack_expect *i;
-
-- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
- atomic_inc(&i->use);
- return i;
-@@ -255,7 +281,7 @@ find_expectation(const struct ip_conntra
- {
- struct ip_conntrack_expect *i;
-
-- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
-@@ -284,7 +310,7 @@ void ip_ct_remove_expectations(struct ip
- if (ct->expecting == 0)
- return;
-
-- list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry_safe(i, tmp, &ve_ip_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
-@@ -302,8 +328,10 @@ clean_from_lists(struct ip_conntrack *ct
-
- ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-- LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-- LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
-+ LIST_DELETE(&ve_ip_conntrack_hash[ho],
-+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-+ LIST_DELETE(&ve_ip_conntrack_hash[hr],
-+ &ct->tuplehash[IP_CT_DIR_REPLY]);
-
- /* Destroy all pending expectations */
- ip_ct_remove_expectations(ct);
-@@ -329,8 +357,8 @@ destroy_conntrack(struct nf_conntrack *n
- if (proto && proto->destroy)
- proto->destroy(ct);
-
-- if (ip_conntrack_destroyed)
-- ip_conntrack_destroyed(ct);
-+ if (ve_ip_conntrack_destroyed)
-+ ve_ip_conntrack_destroyed(ct);
-
- write_lock_bh(&ip_conntrack_lock);
- /* Expectations will have been removed in clean_from_lists,
-@@ -358,7 +386,11 @@ destroy_conntrack(struct nf_conntrack *n
- static void death_by_timeout(unsigned long ul_conntrack)
- {
- struct ip_conntrack *ct = (void *)ul_conntrack;
-+#ifdef CONFIG_VE_IPTABLES
-+ struct ve_struct *old;
-
-+ old = set_exec_env(VE_OWNER_CT(ct));
-+#endif
- write_lock_bh(&ip_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
-@@ -366,6 +398,9 @@ static void death_by_timeout(unsigned lo
- clean_from_lists(ct);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_put(ct);
-+#ifdef CONFIG_VE_IPTABLES
-+ (void)set_exec_env(old);
-+#endif
- }
-
- static inline int
-@@ -386,7 +421,7 @@ __ip_conntrack_find(const struct ip_conn
- unsigned int hash = hash_conntrack(tuple);
-
- ASSERT_READ_LOCK(&ip_conntrack_lock);
-- list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-+ list_for_each_entry(h, &ve_ip_conntrack_hash[hash], list) {
- if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
- CONNTRACK_STAT_INC(found);
- return h;
-@@ -418,9 +453,9 @@ static void __ip_conntrack_hash_insert(s
- unsigned int repl_hash)
- {
- ct->id = ++ip_conntrack_next_id;
-- list_prepend(&ip_conntrack_hash[hash],
-+ list_prepend(&ve_ip_conntrack_hash[hash],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-- list_prepend(&ip_conntrack_hash[repl_hash],
-+ list_prepend(&ve_ip_conntrack_hash[repl_hash],
- &ct->tuplehash[IP_CT_DIR_REPLY].list);
- }
-
-@@ -471,11 +506,11 @@ __ip_conntrack_confirm(struct sk_buff **
- /* See if there's one in the list already, including reverse:
- NAT could have grabbed it without realizing, since we're
- not in the hash. If there is, we lost race. */
-- if (!LIST_FIND(&ip_conntrack_hash[hash],
-+ if (!LIST_FIND(&ve_ip_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
-- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
-+ && !LIST_FIND(&ve_ip_conntrack_hash[repl_hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
-@@ -569,7 +604,7 @@ static inline int helper_cmp(const struc
- static struct ip_conntrack_helper *
- __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
- {
-- return LIST_FIND(&helpers, helper_cmp,
-+ return LIST_FIND(&ve_ip_conntrack_helpers, helper_cmp,
- struct ip_conntrack_helper *,
- tuple);
- }
-@@ -605,7 +640,7 @@ void ip_conntrack_helper_put(struct ip_c
- struct ip_conntrack_protocol *
- __ip_conntrack_proto_find(u_int8_t protocol)
- {
-- return ip_ct_protos[protocol];
-+ return ve_ip_ct_protos[protocol];
- }
-
- /* this is guaranteed to always return a valid protocol helper, since
-@@ -632,29 +667,32 @@ void ip_conntrack_proto_put(struct ip_co
- }
-
- struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-- struct ip_conntrack_tuple *repl)
-+ struct ip_conntrack_tuple *repl, struct user_beancounter *ub)
- {
- struct ip_conntrack *conntrack;
-+ struct user_beancounter *old_ub;
-
- if (!ip_conntrack_hash_rnd_initted) {
- get_random_bytes(&ip_conntrack_hash_rnd, 4);
- ip_conntrack_hash_rnd_initted = 1;
- }
-
-- if (ip_conntrack_max
-- && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
-+ if (ve_ip_conntrack_max
-+ && atomic_read(&ve_ip_conntrack_count) >= ve_ip_conntrack_max) {
- unsigned int hash = hash_conntrack(orig);
- /* Try dropping from this hash chain. */
-- if (!early_drop(&ip_conntrack_hash[hash])) {
-+ if (!early_drop(&ve_ip_conntrack_hash[hash])) {
- if (net_ratelimit())
-- printk(KERN_WARNING
-- "ip_conntrack: table full, dropping"
-- " packet.\n");
-+ ve_printk(VE_LOG_BOTH, KERN_WARNING
-+ "ip_conntrack: VPS %d: table full, dropping"
-+ " packet.\n", VEID(get_exec_env()));
- return ERR_PTR(-ENOMEM);
- }
- }
-
-+ old_ub = set_exec_ub(ub);
- conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
-+ (void)set_exec_ub(old_ub);
- if (!conntrack) {
- DEBUGP("Can't allocate conntrack.\n");
- return ERR_PTR(-ENOMEM);
-@@ -669,8 +707,11 @@ struct ip_conntrack *ip_conntrack_alloc(
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
-+#ifdef CONFIG_VE_IPTABLES
-+ SET_VE_OWNER_CT(conntrack, get_exec_env());
-+#endif
-
-- atomic_inc(&ip_conntrack_count);
-+ atomic_inc(&ve_ip_conntrack_count);
-
- return conntrack;
- }
-@@ -678,7 +719,7 @@ struct ip_conntrack *ip_conntrack_alloc(
- void
- ip_conntrack_free(struct ip_conntrack *conntrack)
- {
-- atomic_dec(&ip_conntrack_count);
-+ atomic_dec(&ve_ip_conntrack_count);
- kmem_cache_free(ip_conntrack_cachep, conntrack);
- }
-
-@@ -692,13 +733,22 @@ init_conntrack(struct ip_conntrack_tuple
- struct ip_conntrack *conntrack;
- struct ip_conntrack_tuple repl_tuple;
- struct ip_conntrack_expect *exp;
-+ struct user_beancounter *ub;
-
- if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
- DEBUGP("Can't invert tuple.\n");
- return NULL;
- }
-
-- conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
-+#ifdef CONFIG_USER_RESOURCE
-+ if (skb->dev != NULL) /* received skb */
-+ ub = netdev_bc(skb->dev)->exec_ub;
-+ else if (skb->sk != NULL) /* sent skb */
-+ ub = sock_bc(skb->sk)->ub;
-+ else
-+#endif
-+ ub = NULL;
-+ conntrack = ip_conntrack_alloc(tuple, &repl_tuple, ub);
- if (conntrack == NULL || IS_ERR(conntrack))
- return (struct ip_conntrack_tuple_hash *)conntrack;
-
-@@ -733,7 +783,8 @@ init_conntrack(struct ip_conntrack_tuple
- }
-
- /* Overload tuple linked list to put us in unconfirmed list. */
-- list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-+ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
-+ &ve_ip_conntrack_unconfirmed);
-
- write_unlock_bh(&ip_conntrack_lock);
-
-@@ -925,7 +976,7 @@ void ip_conntrack_unexpect_related(struc
-
- write_lock_bh(&ip_conntrack_lock);
- /* choose the the oldest expectation to evict */
-- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- write_unlock_bh(&ip_conntrack_lock);
-@@ -959,11 +1010,11 @@ void ip_conntrack_expect_put(struct ip_c
- kmem_cache_free(ip_conntrack_expect_cachep, exp);
- }
-
--static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-+void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
- {
- atomic_inc(&exp->use);
- exp->master->expecting++;
-- list_add(&exp->list, &ip_conntrack_expect_list);
-+ list_add(&exp->list, &ve_ip_conntrack_expect_list);
-
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
-@@ -975,13 +1026,14 @@ static void ip_conntrack_expect_insert(s
- atomic_inc(&exp->use);
- CONNTRACK_STAT_INC(expect_create);
- }
-+EXPORT_SYMBOL_GPL(ip_conntrack_expect_insert);
-
- /* Race with expectations being used means we could have none to find; OK. */
- static void evict_oldest_expect(struct ip_conntrack *master)
- {
- struct ip_conntrack_expect *i;
-
-- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry_reverse(i, &ve_ip_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
-@@ -1012,7 +1064,7 @@ int ip_conntrack_expect_related(struct i
- DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
-
- write_lock_bh(&ip_conntrack_lock);
-- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry(i, &ve_ip_conntrack_expect_list, list) {
- if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
-@@ -1060,18 +1112,48 @@ int ip_conntrack_helper_register(struct
- {
- BUG_ON(me->timeout == 0);
- write_lock_bh(&ip_conntrack_lock);
-- list_prepend(&helpers, me);
-+ list_prepend(&ve_ip_conntrack_helpers, me);
- write_unlock_bh(&ip_conntrack_lock);
-
- return 0;
- }
-
-+int virt_ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-+{
-+ int ret;
-+ struct module *mod = me->me;
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ struct ip_conntrack_helper *tmp;
-+ __module_get(mod);
-+ ret = -ENOMEM;
-+ tmp = kmalloc(sizeof(struct ip_conntrack_helper), GFP_KERNEL);
-+ if (!tmp)
-+ goto nomem;
-+ memcpy(tmp, me, sizeof(struct ip_conntrack_helper));
-+ me = tmp;
-+ }
-+
-+ ret = ip_conntrack_helper_register(me);
-+ if (ret)
-+ goto out;
-+
-+ return 0;
-+out:
-+ if (!ve_is_super(get_exec_env())){
-+ kfree(me);
-+nomem:
-+ module_put(mod);
-+ }
-+ return ret;
-+}
-+
- struct ip_conntrack_helper *
- __ip_conntrack_helper_find_byname(const char *name)
- {
- struct ip_conntrack_helper *h;
-
-- list_for_each_entry(h, &helpers, list) {
-+ list_for_each_entry(h, &ve_ip_conntrack_helpers, list) {
- if (!strcmp(h->name, name))
- return h;
- }
-@@ -1096,19 +1178,20 @@ void ip_conntrack_helper_unregister(stru
-
- /* Need write lock here, to delete helper. */
- write_lock_bh(&ip_conntrack_lock);
-- LIST_DELETE(&helpers, me);
-+ LIST_DELETE(&ve_ip_conntrack_helpers, me);
-
- /* Get rid of expectations */
-- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
-+ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list, list) {
- if (exp->master->helper == me && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- /* Get rid of expecteds, set helpers to NULL. */
-- LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
-+ LIST_FIND_W(&ve_ip_conntrack_unconfirmed, unhelp,
-+ struct ip_conntrack_tuple_hash*, me);
- for (i = 0; i < ip_conntrack_htable_size; i++)
-- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
-+ LIST_FIND_W(&ve_ip_conntrack_hash[i], unhelp,
- struct ip_conntrack_tuple_hash *, me);
- write_unlock_bh(&ip_conntrack_lock);
-
-@@ -1116,6 +1199,25 @@ void ip_conntrack_helper_unregister(stru
- synchronize_net();
- }
-
-+void virt_ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-+{
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ read_lock_bh(&ip_conntrack_lock);
-+ me = list_named_find(&ve_ip_conntrack_helpers, me->name);
-+ read_unlock_bh(&ip_conntrack_lock);
-+ if (!me)
-+ return;
-+ }
-+
-+ ip_conntrack_helper_unregister(me);
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ module_put(me->me);
-+ kfree(me);
-+ }
-+}
-+
- /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
- void __ip_ct_refresh_acct(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
-@@ -1246,13 +1348,13 @@ get_next_corpse(int (*iter)(struct ip_co
-
- write_lock_bh(&ip_conntrack_lock);
- for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-- h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
-+ h = LIST_FIND_W(&ve_ip_conntrack_hash[*bucket], do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- break;
- }
- if (!h)
-- h = LIST_FIND_W(&unconfirmed, do_iter,
-+ h = LIST_FIND_W(&ve_ip_conntrack_unconfirmed, do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
-@@ -1289,6 +1391,9 @@ getorigdst(struct sock *sk, int optval,
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
-
-+ if (!get_exec_env()->_ip_conntrack)
-+ return -ENOPROTOOPT;
-+
- IP_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
-@@ -1359,12 +1464,17 @@ static void free_conntrack_hash(struct l
- get_order(sizeof(struct list_head) * size));
- }
-
-+static void ip_conntrack_cache_free(void)
-+{
-+ kmem_cache_destroy(ip_conntrack_expect_cachep);
-+ kmem_cache_destroy(ip_conntrack_cachep);
-+ nf_unregister_sockopt(&so_getorigdst);
-+}
-+
- /* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
- void ip_conntrack_cleanup(void)
- {
-- ip_ct_attach = NULL;
--
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
-@@ -1373,19 +1483,32 @@ void ip_conntrack_cleanup(void)
- ip_ct_event_cache_flush();
- i_see_dead_people:
- ip_conntrack_flush();
-- if (atomic_read(&ip_conntrack_count) != 0) {
-+ if (atomic_read(&ve_ip_conntrack_count) != 0) {
- schedule();
- goto i_see_dead_people;
- }
-- /* wait until all references to ip_conntrack_untracked are dropped */
-- while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-- schedule();
--
-- kmem_cache_destroy(ip_conntrack_cachep);
-- kmem_cache_destroy(ip_conntrack_expect_cachep);
-- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-+ if (ve_is_super(get_exec_env())) {
-+ /* wait until all references to ip_conntrack_untracked are
-+ * dropped */
-+ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-+ schedule();
-+ ip_ct_attach = NULL;
-+ ip_conntrack_cache_free();
-+ }
-+ free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
-- nf_unregister_sockopt(&so_getorigdst);
-+ ve_ip_conntrack_hash = NULL;
-+ INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
-+ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
-+ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
-+ atomic_set(&ve_ip_conntrack_count, 0);
-+ ve_ip_conntrack_max = 0;
-+#ifdef CONFIG_VE_IPTABLES
-+ kfree(ve_ip_ct_protos);
-+ ve_ip_ct_protos = NULL;
-+ kfree(get_exec_env()->_ip_conntrack);
-+ get_exec_env()->_ip_conntrack = NULL;
-+#endif
- }
-
- static struct list_head *alloc_hashtable(int size, int *vmalloced)
-@@ -1394,13 +1517,13 @@ static struct list_head *alloc_hashtable
- unsigned int i;
-
- *vmalloced = 0;
-- hash = (void*)__get_free_pages(GFP_KERNEL,
-+ hash = (void*)__get_free_pages(GFP_KERNEL_UBC,
- get_order(sizeof(struct list_head)
- * size));
- if (!hash) {
- *vmalloced = 1;
- printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-- hash = vmalloc(sizeof(struct list_head) * size);
-+ hash = ub_vmalloc(sizeof(struct list_head) * size);
- }
-
- if (hash)
-@@ -1436,8 +1559,8 @@ static int set_hashsize(const char *val,
-
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
-- while (!list_empty(&ip_conntrack_hash[i])) {
-- h = list_entry(ip_conntrack_hash[i].next,
-+ while (!list_empty(&ve_ip_conntrack_hash[i])) {
-+ h = list_entry(ve_ip_conntrack_hash[i].next,
- struct ip_conntrack_tuple_hash, list);
- list_del(&h->list);
- bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
-@@ -1445,12 +1568,12 @@ static int set_hashsize(const char *val,
- }
- }
- old_size = ip_conntrack_htable_size;
-- old_vmalloced = ip_conntrack_vmalloc;
-- old_hash = ip_conntrack_hash;
-+ old_vmalloced = ve_ip_conntrack_vmalloc;
-+ old_hash = ve_ip_conntrack_hash;
-
- ip_conntrack_htable_size = hashsize;
-- ip_conntrack_vmalloc = vmalloced;
-- ip_conntrack_hash = hash;
-+ ve_ip_conntrack_vmalloc = vmalloced;
-+ ve_ip_conntrack_hash = hash;
- ip_conntrack_hash_rnd = rnd;
- write_unlock_bh(&ip_conntrack_lock);
-
-@@ -1461,9 +1584,8 @@ static int set_hashsize(const char *val,
- module_param_call(hashsize, set_hashsize, param_get_uint,
- &ip_conntrack_htable_size, 0600);
-
--int __init ip_conntrack_init(void)
-+static int ip_conntrack_cache_create(void)
- {
-- unsigned int i;
- int ret;
-
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
-@@ -1477,70 +1599,127 @@ int __init ip_conntrack_init(void)
- if (ip_conntrack_htable_size < 16)
- ip_conntrack_htable_size = 16;
- }
-- ip_conntrack_max = 8 * ip_conntrack_htable_size;
-+ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
- printk("ip_conntrack version %s (%u buckets, %d max)"
- " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-- ip_conntrack_htable_size, ip_conntrack_max,
-+ ip_conntrack_htable_size, ve_ip_conntrack_max,
- sizeof(struct ip_conntrack));
-
- ret = nf_register_sockopt(&so_getorigdst);
- if (ret != 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
-- return ret;
-- }
--
-- ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-- &ip_conntrack_vmalloc);
-- if (!ip_conntrack_hash) {
-- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-- goto err_unreg_sockopt;
-+ goto out_sockopt;
- }
-
-+ ret = -ENOMEM;
- ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
- sizeof(struct ip_conntrack), 0,
-- 0, NULL, NULL);
-+ SLAB_UBC, NULL, NULL);
- if (!ip_conntrack_cachep) {
- printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-- goto err_free_hash;
-+ goto err_unreg_sockopt;
- }
-
- ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
- sizeof(struct ip_conntrack_expect),
-- 0, 0, NULL, NULL);
-+ 0, SLAB_UBC, NULL, NULL);
- if (!ip_conntrack_expect_cachep) {
- printk(KERN_ERR "Unable to create ip_expect slab cache\n");
- goto err_free_conntrack_slab;
- }
-
-+ return 0;
-+
-+err_free_conntrack_slab:
-+ kmem_cache_destroy(ip_conntrack_cachep);
-+err_unreg_sockopt:
-+ nf_unregister_sockopt(&so_getorigdst);
-+out_sockopt:
-+ return ret;
-+}
-+
-+int ip_conntrack_init(void)
-+{
-+ struct ve_struct *env;
-+ unsigned int i;
-+ int ret;
-+
-+ env = get_exec_env();
-+#ifdef CONFIG_VE_IPTABLES
-+ ret = -ENOMEM;
-+ env->_ip_conntrack =
-+ kmalloc(sizeof(struct ve_ip_conntrack), GFP_KERNEL);
-+ if (!env->_ip_conntrack)
-+ goto out;
-+ memset(env->_ip_conntrack, 0, sizeof(struct ve_ip_conntrack));
-+ if (ve_is_super(env)) {
-+ ret = ip_conntrack_cache_create();
-+ if (ret)
-+ goto cache_fail;
-+ } else
-+ ve_ip_conntrack_max = 8 * ip_conntrack_htable_size;
-+#else /* CONFIG_VE_IPTABLES */
-+ ret = ip_conntrack_cache_create();
-+ if (ret)
-+ goto out;
-+#endif
-+
-+ ret = -ENOMEM;
-+ ve_ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-+ &ve_ip_conntrack_vmalloc);
-+ if (!ve_ip_conntrack_hash) {
-+ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-+ goto err_free_cache;
-+ }
-+
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_ct_protos = (struct ip_conntrack_protocol **)
-+ ub_kmalloc(sizeof(void *)*MAX_IP_CT_PROTO, GFP_KERNEL);
-+ if (!ve_ip_ct_protos)
-+ goto err_free_hash;
-+#endif
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < MAX_IP_CT_PROTO; i++)
-- ip_ct_protos[i] = &ip_conntrack_generic_protocol;
-+ ve_ip_ct_protos[i] = &ip_conntrack_generic_protocol;
- /* Sew in builtin protocols. */
-- ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-- ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-- ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
-+ ve_ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
-+ ve_ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
-+ ve_ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
- write_unlock_bh(&ip_conntrack_lock);
-
-- /* For use by ipt_REJECT */
-- ip_ct_attach = ip_conntrack_attach;
--
-- /* Set up fake conntrack:
-- - to never be deleted, not in any hashes */
-- atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-- /* - and look it like as a confirmed connection */
-- set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-+ INIT_LIST_HEAD(&ve_ip_conntrack_unconfirmed);
-+ INIT_LIST_HEAD(&ve_ip_conntrack_expect_list);
-+ INIT_LIST_HEAD(&ve_ip_conntrack_helpers);
-+
-+ if (ve_is_super(env)) {
-+ /* For use by ipt_REJECT */
-+ ip_ct_attach = ip_conntrack_attach;
-+
-+ /* Set up fake conntrack:
-+ - to never be deleted, not in any hashes */
-+ atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-+ /* - and look it like as a confirmed connection */
-+ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-+ }
-
-- return ret;
-+ return 0;
-
--err_free_conntrack_slab:
-- kmem_cache_destroy(ip_conntrack_cachep);
-+#ifdef CONFIG_VE_IPTABLES
- err_free_hash:
-- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-+#endif
-+ free_conntrack_hash(ve_ip_conntrack_hash, ve_ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
--err_unreg_sockopt:
-- nf_unregister_sockopt(&so_getorigdst);
--
-- return -ENOMEM;
-+ ve_ip_conntrack_hash = NULL;
-+err_free_cache:
-+ if (ve_is_super(env))
-+ ip_conntrack_cache_free();
-+#ifdef CONFIG_VE_IPTABLES
-+cache_fail:
-+ kfree(env->_ip_conntrack);
-+ env->_ip_conntrack = NULL;
-+#endif
-+out:
-+ return ret;
- }
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_ftp.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_ftp.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_ftp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_ftp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -15,6 +15,7 @@
- #include <linux/ctype.h>
- #include <net/checksum.h>
- #include <net/tcp.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
- #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-@@ -425,8 +426,8 @@ static int help(struct sk_buff **pskb,
-
- /* Now, NAT might want to mangle the packet, and register the
- * (possibly changed) expectation itself. */
-- if (ip_nat_ftp_hook)
-- ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
-+ if (ve_ip_nat_ftp_hook)
-+ ret = ve_ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
- matchoff, matchlen, exp, &seq);
- else {
- /* Can't expect this? Best to drop packet now. */
-@@ -452,16 +453,39 @@ out_update_nl:
- static struct ip_conntrack_helper ftp[MAX_PORTS];
- static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
--/* Not __exit: called from init() */
--static void fini(void)
-+void fini_iptable_ftp(void)
- {
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
- ports[i]);
-- ip_conntrack_helper_unregister(&ftp[i]);
-+ virt_ip_conntrack_helper_unregister(&ftp[i]);
- }
-+}
-+
-+int init_iptable_ftp(void)
-+{
-+ int i, ret;
-
-+ for (i = 0; i < ports_c; i++) {
-+ DEBUGP("ip_ct_ftp: registering helper for port %d\n",
-+ ports[i]);
-+ ret = virt_ip_conntrack_helper_register(&ftp[i]);
-+ if (ret) {
-+ fini_iptable_ftp();
-+ return ret;
-+ }
-+ }
-+ return 0;
-+}
-+
-+/* Not __exit: called from init() */
-+static void fini(void)
-+{
-+ KSYMMODUNRESOLVE(ip_conntrack_ftp);
-+ KSYMUNRESOLVE(init_iptable_ftp);
-+ KSYMUNRESOLVE(fini_iptable_ftp);
-+ fini_iptable_ftp();
- kfree(ftp_buffer);
- }
-
-@@ -496,13 +520,17 @@ static int __init init(void)
-
- DEBUGP("ip_ct_ftp: registering helper for port %d\n",
- ports[i]);
-- ret = ip_conntrack_helper_register(&ftp[i]);
-+ ret = virt_ip_conntrack_helper_register(&ftp[i]);
-
- if (ret) {
- fini();
- return ret;
- }
- }
-+
-+ KSYMRESOLVE(init_iptable_ftp);
-+ KSYMRESOLVE(fini_iptable_ftp);
-+ KSYMMODRESOLVE(ip_conntrack_ftp);
- return 0;
- }
-
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_irc.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_irc.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_irc.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_irc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -28,6 +28,7 @@
- #include <linux/ip.h>
- #include <net/checksum.h>
- #include <net/tcp.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
- #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-@@ -244,6 +245,33 @@ static char irc_names[MAX_PORTS][sizeof(
-
- static void fini(void);
-
-+void fini_iptable_irc(void)
-+{
-+ int i;
-+ for (i = 0; i < ports_c; i++) {
-+ DEBUGP("unregistering port %d\n",
-+ ports[i]);
-+ virt_ip_conntrack_helper_unregister(&irc_helpers[i]);
-+ }
-+}
-+
-+int init_iptable_irc(void)
-+{
-+ int i, ret;
-+
-+ for (i = 0; i < ports_c; i++) {
-+ DEBUGP("port #%d: %d\n", i, ports[i]);
-+ ret = virt_ip_conntrack_helper_register(&irc_helpers[i]);
-+ if (ret) {
-+ printk("ip_conntrack_irc: ERROR registering port %d\n",
-+ ports[i]);
-+ fini_iptable_irc();
-+ return -EBUSY;
-+ }
-+ }
-+ return 0;
-+}
-+
- static int __init init(void)
- {
- int i, ret;
-@@ -283,7 +311,7 @@ static int __init init(void)
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
-- ret = ip_conntrack_helper_register(hlpr);
-+ ret = virt_ip_conntrack_helper_register(hlpr);
-
- if (ret) {
- printk("ip_conntrack_irc: ERROR registering port %d\n",
-@@ -292,6 +320,10 @@ static int __init init(void)
- return -EBUSY;
- }
- }
-+
-+ KSYMRESOLVE(init_iptable_irc);
-+ KSYMRESOLVE(fini_iptable_irc);
-+ KSYMMODRESOLVE(ip_conntrack_irc);
- return 0;
- }
-
-@@ -299,12 +331,10 @@ static int __init init(void)
- * it is needed by the init function */
- static void fini(void)
- {
-- int i;
-- for (i = 0; i < ports_c; i++) {
-- DEBUGP("unregistering port %d\n",
-- ports[i]);
-- ip_conntrack_helper_unregister(&irc_helpers[i]);
-- }
-+ KSYMMODUNRESOLVE(ip_conntrack_irc);
-+ KSYMUNRESOLVE(init_iptable_irc);
-+ KSYMUNRESOLVE(fini_iptable_irc);
-+ fini_iptable_irc();
- kfree(irc_buffer);
- }
-
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_netlink.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_netlink.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_netlink.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_netlink.c 2006-04-19 15:02:12.000000000 +0400
-@@ -29,6 +29,7 @@
- #include <linux/spinlock.h>
- #include <linux/interrupt.h>
- #include <linux/notifier.h>
-+#include <net/sock.h>
-
- #include <linux/netfilter.h>
- #include <linux/netfilter_ipv4/ip_conntrack.h>
-@@ -39,6 +40,8 @@
-
- #include <linux/netfilter/nfnetlink.h>
- #include <linux/netfilter/nfnetlink_conntrack.h>
-+#include <ub/beancounter.h>
-+#include <ub/ub_sk.h>
-
- MODULE_LICENSE("GPL");
-
-@@ -403,7 +406,7 @@ ctnetlink_dump_table(struct sk_buff *skb
-
- read_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-+ list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
-@@ -440,7 +443,7 @@ ctnetlink_dump_table_w(struct sk_buff *s
-
- write_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
-- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-+ list_for_each_prev(i, &ve_ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
-@@ -1003,14 +1006,15 @@ ctnetlink_change_conntrack(struct ip_con
- static int
- ctnetlink_create_conntrack(struct nfattr *cda[],
- struct ip_conntrack_tuple *otuple,
-- struct ip_conntrack_tuple *rtuple)
-+ struct ip_conntrack_tuple *rtuple,
-+ struct user_beancounter *ub)
- {
- struct ip_conntrack *ct;
- int err = -EINVAL;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
-- ct = ip_conntrack_alloc(otuple, rtuple);
-+ ct = ip_conntrack_alloc(otuple, rtuple, ub);
- if (ct == NULL || IS_ERR(ct))
- return -ENOMEM;
-
-@@ -1087,8 +1091,16 @@ ctnetlink_new_conntrack(struct sock *ctn
- write_unlock_bh(&ip_conntrack_lock);
- DEBUGP("no such conntrack, create new\n");
- err = -ENOENT;
-- if (nlh->nlmsg_flags & NLM_F_CREATE)
-- err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
-+ if (nlh->nlmsg_flags & NLM_F_CREATE) {
-+#ifdef CONFIG_USER_RESOURCE
-+ if (skb->sk)
-+ err = ctnetlink_create_conntrack(cda, &otuple,
-+ &rtuple, sock_bc(skb->sk)->ub);
-+ else
-+#endif
-+ err = ctnetlink_create_conntrack(cda,
-+ &otuple, &rtuple, NULL);
-+ }
- return err;
- }
- /* implicit 'else' */
-@@ -1249,7 +1261,7 @@ ctnetlink_exp_dump_table(struct sk_buff
- DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
-
- read_lock_bh(&ip_conntrack_lock);
-- list_for_each_prev(i, &ip_conntrack_expect_list) {
-+ list_for_each_prev(i, &ve_ip_conntrack_expect_list) {
- exp = (struct ip_conntrack_expect *) i;
- if (exp->id <= *id)
- continue;
-@@ -1395,7 +1407,7 @@ ctnetlink_del_expect(struct sock *ctnl,
- write_unlock_bh(&ip_conntrack_lock);
- return -EINVAL;
- }
-- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-+ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
- list) {
- if (exp->master->helper == h
- && del_timer(&exp->timeout)) {
-@@ -1407,7 +1419,7 @@ ctnetlink_del_expect(struct sock *ctnl,
- } else {
- /* This basically means we have to flush everything*/
- write_lock_bh(&ip_conntrack_lock);
-- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-+ list_for_each_entry_safe(exp, tmp, &ve_ip_conntrack_expect_list,
- list) {
- if (del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
-@@ -1619,7 +1631,7 @@ static void __exit ctnetlink_exit(void)
- printk("ctnetlink: unregistering from nfnetlink.\n");
-
- #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-- ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
-+ ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
- ip_conntrack_unregister_notifier(&ctnl_notifier);
- #endif
-
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_generic.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_generic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -52,7 +52,7 @@ static int packet(struct ip_conntrack *c
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
- {
-- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
-+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_generic_timeout);
- return NF_ACCEPT;
- }
-
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_icmp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -104,7 +104,7 @@ static int icmp_packet(struct ip_conntra
- } else {
- atomic_inc(&ct->proto.icmp.count);
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
-+ ip_ct_refresh_acct(ct, ctinfo, skb, ve_ip_ct_icmp_timeout);
- }
-
- return NF_ACCEPT;
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -98,7 +98,7 @@ unsigned int ip_ct_tcp_timeout_close =
- to ~13-30min depending on RTO. */
- unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
-
--static const unsigned int * tcp_timeouts[]
-+const unsigned int * tcp_timeouts[]
- = { NULL, /* TCP_CONNTRACK_NONE */
- &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
- &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
-@@ -762,7 +762,7 @@ static int tcp_in_window(struct ip_ct_tc
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
-
-- res = ip_ct_tcp_be_liberal;
-+ res = ve_ip_ct_tcp_be_liberal;
- }
-
- DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
-@@ -1033,9 +1033,11 @@ static int tcp_packet(struct ip_conntrac
- && (new_state == TCP_CONNTRACK_FIN_WAIT
- || new_state == TCP_CONNTRACK_CLOSE))
- conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-- timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-- && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-- ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
-+ timeout = conntrack->proto.tcp.retrans >= ve_ip_ct_tcp_max_retrans &&
-+ ve_ip_ct_tcp_timeouts[new_state] >
-+ ve_ip_ct_tcp_timeout_max_retrans
-+ ? ve_ip_ct_tcp_timeout_max_retrans :
-+ ve_ip_ct_tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
-
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-@@ -1110,7 +1112,7 @@ static int tcp_new(struct ip_conntrack *
- conntrack->proto.tcp.seen[1].flags = 0;
- conntrack->proto.tcp.seen[0].loose =
- conntrack->proto.tcp.seen[1].loose = 0;
-- } else if (ip_ct_tcp_loose == 0) {
-+ } else if (ve_ip_ct_tcp_loose == 0) {
- /* Don't try to pick up connections. */
- return 0;
- } else {
-@@ -1134,7 +1136,7 @@ static int tcp_new(struct ip_conntrack *
- conntrack->proto.tcp.seen[0].flags =
- conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
- conntrack->proto.tcp.seen[0].loose =
-- conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
-+ conntrack->proto.tcp.seen[1].loose = ve_ip_ct_tcp_loose;
- }
-
- conntrack->proto.tcp.seen[1].td_end = 0;
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_udp.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_proto_udp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -71,12 +71,12 @@ static int udp_packet(struct ip_conntrac
- stream. Extend timeout. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh_acct(conntrack, ctinfo, skb,
-- ip_ct_udp_timeout_stream);
-+ ve_ip_ct_udp_timeout_stream);
- /* Also, more likely to be important, and not a probe */
- if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
-- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-+ ip_ct_refresh_acct(conntrack, ctinfo, skb, ve_ip_ct_udp_timeout);
-
- return NF_ACCEPT;
- }
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_standalone.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_conntrack_standalone.c 2006-04-19 15:02:12.000000000 +0400
-@@ -28,6 +28,7 @@
- #include <net/checksum.h>
- #include <net/ip.h>
- #include <net/route.h>
-+#include <linux/nfcalls.h>
-
- #define ASSERT_READ_LOCK(x)
- #define ASSERT_WRITE_LOCK(x)
-@@ -46,9 +47,31 @@
-
- MODULE_LICENSE("GPL");
-
-+int ip_conntrack_disable_ve0 = 0;
-+module_param(ip_conntrack_disable_ve0, int, 0440);
-+
- extern atomic_t ip_conntrack_count;
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ip_conntrack_count \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_count)
-+#else
-+#define ve_ip_conntrack_count ip_conntrack_count
-+#endif
- DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-+/* Prior to 2.6.15, we had a ip_conntrack_enable_ve0 param. */
-+static int warn_set(const char *val, struct kernel_param *kp)
-+{
-+ printk(KERN_INFO KBUILD_MODNAME
-+ ": parameter ip_conntrack_enable_ve0 is obsoleted. In ovzkernel"
-+ " >= 2.6.15 connection tracking on hardware node is enabled by "
-+ "default, use ip_conntrack_disable_ve0=1 parameter to "
-+ "disable.\n");
-+ return 0;
-+}
-+module_param_call(ip_conntrack_enable_ve0, warn_set, NULL, NULL, 0);
-+
- static int kill_proto(struct ip_conntrack *i, void *data)
- {
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
-@@ -89,8 +112,8 @@ static struct list_head *ct_get_first(st
- for (st->bucket = 0;
- st->bucket < ip_conntrack_htable_size;
- st->bucket++) {
-- if (!list_empty(&ip_conntrack_hash[st->bucket]))
-- return ip_conntrack_hash[st->bucket].next;
-+ if (!list_empty(&ve_ip_conntrack_hash[st->bucket]))
-+ return ve_ip_conntrack_hash[st->bucket].next;
- }
- return NULL;
- }
-@@ -100,10 +123,10 @@ static struct list_head *ct_get_next(str
- struct ct_iter_state *st = seq->private;
-
- head = head->next;
-- while (head == &ip_conntrack_hash[st->bucket]) {
-+ while (head == &ve_ip_conntrack_hash[st->bucket]) {
- if (++st->bucket >= ip_conntrack_htable_size)
- return NULL;
-- head = ip_conntrack_hash[st->bucket].next;
-+ head = ve_ip_conntrack_hash[st->bucket].next;
- }
- return head;
- }
-@@ -234,7 +257,7 @@ static struct file_operations ct_file_op
- /* expects */
- static void *exp_seq_start(struct seq_file *s, loff_t *pos)
- {
-- struct list_head *e = &ip_conntrack_expect_list;
-+ struct list_head *e = &ve_ip_conntrack_expect_list;
- loff_t i;
-
- /* strange seq_file api calls stop even if we fail,
-@@ -246,7 +269,7 @@ static void *exp_seq_start(struct seq_fi
-
- for (i = 0; i <= *pos; i++) {
- e = e->next;
-- if (e == &ip_conntrack_expect_list)
-+ if (e == &ve_ip_conntrack_expect_list)
- return NULL;
- }
- return e;
-@@ -259,7 +282,7 @@ static void *exp_seq_next(struct seq_fil
- ++*pos;
- e = e->next;
-
-- if (e == &ip_conntrack_expect_list)
-+ if (e == &ve_ip_conntrack_expect_list)
- return NULL;
-
- return e;
-@@ -344,7 +367,7 @@ static void ct_cpu_seq_stop(struct seq_f
-
- static int ct_cpu_seq_show(struct seq_file *seq, void *v)
- {
-- unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
-+ unsigned int nr_conntracks = atomic_read(&ve_ip_conntrack_count);
- struct ip_conntrack_stat *st = v;
-
- if (v == SEQ_START_TOKEN) {
-@@ -541,6 +564,28 @@ static struct nf_hook_ops ip_conntrack_l
-
- /* From ip_conntrack_core.c */
- extern int ip_conntrack_max;
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_ip_conntrack_max \
-+ (get_exec_env()->_ip_conntrack->_ip_conntrack_max)
-+#define ve_ip_ct_sysctl_header \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_header)
-+#define ve_ip_ct_net_table \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_net_table)
-+#define ve_ip_ct_ipv4_table \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_ipv4_table)
-+#define ve_ip_ct_netfilter_table \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_netfilter_table)
-+#define ve_ip_ct_sysctl_table \
-+ (get_exec_env()->_ip_conntrack->_ip_ct_sysctl_table)
-+#else
-+#define ve_ip_conntrack_max ip_conntrack_max
-+static struct ctl_table_header *ip_ct_sysctl_header;
-+#define ve_ip_ct_sysctl_header ip_ct_sysctl_header
-+#define ve_ip_ct_net_table ip_ct_net_table
-+#define ve_ip_ct_ipv4_table ip_ct_ipv4_table
-+#define ve_ip_ct_netfilter_table ip_ct_netfilter_table
-+#define ve_ip_ct_sysctl_table ip_ct_sysctl_table
-+#endif
- extern unsigned int ip_conntrack_htable_size;
-
- /* From ip_conntrack_proto_tcp.c */
-@@ -571,8 +616,6 @@ extern unsigned int ip_ct_generic_timeou
- static int log_invalid_proto_min = 0;
- static int log_invalid_proto_max = 255;
-
--static struct ctl_table_header *ip_ct_sysctl_header;
--
- static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
-@@ -781,6 +824,112 @@ static ctl_table ip_ct_net_table[] = {
- };
-
- EXPORT_SYMBOL(ip_ct_log_invalid);
-+
-+#ifdef CONFIG_VE
-+static void ip_conntrack_sysctl_cleanup(void)
-+{
-+ if (!ve_is_super(get_exec_env())) {
-+ kfree(ve_ip_ct_net_table);
-+ kfree(ve_ip_ct_ipv4_table);
-+ kfree(ve_ip_ct_netfilter_table);
-+ kfree(ve_ip_ct_sysctl_table);
-+ }
-+ ve_ip_ct_net_table = NULL;
-+ ve_ip_ct_ipv4_table = NULL;
-+ ve_ip_ct_netfilter_table = NULL;
-+ ve_ip_ct_sysctl_table = NULL;
-+}
-+
-+#define ALLOC_ENVCTL(field,k,label) \
-+ if ( !(field = kmalloc(k*sizeof(ctl_table), GFP_KERNEL)) ) \
-+ goto label;
-+static int ip_conntrack_sysctl_init(void)
-+{
-+ int i, ret = 0;
-+
-+ ret = -ENOMEM;
-+ if (ve_is_super(get_exec_env())) {
-+ ve_ip_ct_net_table = ip_ct_net_table;
-+ ve_ip_ct_ipv4_table = ip_ct_ipv4_table;
-+ ve_ip_ct_netfilter_table = ip_ct_netfilter_table;
-+ ve_ip_ct_sysctl_table = ip_ct_sysctl_table;
-+ } else {
-+ /* allocate structures in ve_struct */
-+ ALLOC_ENVCTL(ve_ip_ct_net_table, 2, out);
-+ ALLOC_ENVCTL(ve_ip_ct_ipv4_table, 2, nomem_1);
-+ ALLOC_ENVCTL(ve_ip_ct_netfilter_table, 3, nomem_2);
-+ ALLOC_ENVCTL(ve_ip_ct_sysctl_table, 15, nomem_3);
-+
-+ memcpy(ve_ip_ct_net_table, ip_ct_net_table,
-+ 2*sizeof(ctl_table));
-+ memcpy(ve_ip_ct_ipv4_table, ip_ct_ipv4_table,
-+ 2*sizeof(ctl_table));
-+ memcpy(ve_ip_ct_netfilter_table, ip_ct_netfilter_table,
-+ 3*sizeof(ctl_table));
-+ memcpy(ve_ip_ct_sysctl_table, ip_ct_sysctl_table,
-+ 21*sizeof(ctl_table));
-+
-+ ve_ip_ct_net_table[0].child = ve_ip_ct_ipv4_table;
-+ ve_ip_ct_ipv4_table[0].child = ve_ip_ct_netfilter_table;
-+ ve_ip_ct_netfilter_table[0].child = ve_ip_ct_sysctl_table;
-+ }
-+ ve_ip_ct_sysctl_table[0].data = &ve_ip_conntrack_max;
-+ ve_ip_ct_netfilter_table[1].data = &ve_ip_conntrack_max;
-+ ve_ip_ct_sysctl_table[1].data = &ve_ip_conntrack_count;
-+ /* skip ve_ip_ct_sysctl_table[2].data as it is read-only and common
-+ * for all environments */
-+ ve_ip_ct_tcp_timeouts[1] = ip_ct_tcp_timeout_syn_sent;
-+ ve_ip_ct_sysctl_table[3].data = &ve_ip_ct_tcp_timeouts[1];
-+ ve_ip_ct_tcp_timeouts[2] = ip_ct_tcp_timeout_syn_recv;
-+ ve_ip_ct_sysctl_table[4].data = &ve_ip_ct_tcp_timeouts[2];
-+ ve_ip_ct_tcp_timeouts[3] = ip_ct_tcp_timeout_established;
-+ ve_ip_ct_sysctl_table[5].data = &ve_ip_ct_tcp_timeouts[3];
-+ ve_ip_ct_tcp_timeouts[4] = ip_ct_tcp_timeout_fin_wait;
-+ ve_ip_ct_sysctl_table[6].data = &ve_ip_ct_tcp_timeouts[4];
-+ ve_ip_ct_tcp_timeouts[5] = ip_ct_tcp_timeout_close_wait;
-+ ve_ip_ct_sysctl_table[7].data = &ve_ip_ct_tcp_timeouts[5];
-+ ve_ip_ct_tcp_timeouts[6] = ip_ct_tcp_timeout_last_ack;
-+ ve_ip_ct_sysctl_table[8].data = &ve_ip_ct_tcp_timeouts[6];
-+ ve_ip_ct_tcp_timeouts[7] = ip_ct_tcp_timeout_time_wait;
-+ ve_ip_ct_sysctl_table[9].data = &ve_ip_ct_tcp_timeouts[7];
-+ ve_ip_ct_tcp_timeouts[8] = ip_ct_tcp_timeout_close;
-+ ve_ip_ct_sysctl_table[10].data = &ve_ip_ct_tcp_timeouts[8];
-+ ve_ip_ct_udp_timeout = ip_ct_udp_timeout;
-+ ve_ip_ct_sysctl_table[11].data = &ve_ip_ct_udp_timeout;
-+ ve_ip_ct_udp_timeout_stream = ip_ct_udp_timeout_stream;
-+ ve_ip_ct_sysctl_table[12].data = &ve_ip_ct_udp_timeout_stream;
-+ ve_ip_ct_icmp_timeout = ip_ct_icmp_timeout;
-+ ve_ip_ct_sysctl_table[13].data = &ve_ip_ct_icmp_timeout;
-+ ve_ip_ct_generic_timeout = ip_ct_generic_timeout;
-+ ve_ip_ct_sysctl_table[14].data = &ve_ip_ct_generic_timeout;
-+ ve_ip_ct_log_invalid = ip_ct_log_invalid;
-+ ve_ip_ct_sysctl_table[15].data = &ve_ip_ct_log_invalid;
-+ ve_ip_ct_tcp_timeout_max_retrans = ip_ct_tcp_timeout_max_retrans;
-+ ve_ip_ct_sysctl_table[16].data = &ve_ip_ct_tcp_timeout_max_retrans;
-+ ve_ip_ct_tcp_loose = ip_ct_tcp_loose;
-+ ve_ip_ct_sysctl_table[17].data = &ve_ip_ct_tcp_loose;
-+ ve_ip_ct_tcp_be_liberal = ip_ct_tcp_be_liberal;
-+ ve_ip_ct_sysctl_table[18].data = &ve_ip_ct_tcp_be_liberal;
-+ ve_ip_ct_tcp_max_retrans = ip_ct_tcp_max_retrans;
-+ ve_ip_ct_sysctl_table[19].data = &ve_ip_ct_tcp_max_retrans;
-+ for (i = 0; i < 20; i++)
-+ ve_ip_ct_sysctl_table[i].owner_env = get_exec_env();
-+ ve_ip_ct_netfilter_table[1].owner_env = get_exec_env();
-+ return 0;
-+
-+nomem_3:
-+ kfree(ve_ip_ct_netfilter_table);
-+ ve_ip_ct_netfilter_table = NULL;
-+nomem_2:
-+ kfree(ve_ip_ct_ipv4_table);
-+ ve_ip_ct_ipv4_table = NULL;
-+nomem_1:
-+ kfree(ve_ip_ct_net_table);
-+ ve_ip_ct_net_table = NULL;
-+out:
-+ return ret;
-+}
-+#endif /*CONFIG_VE*/
- #endif /* CONFIG_SYSCTL */
-
- static int init_or_cleanup(int init)
-@@ -792,9 +941,16 @@ static int init_or_cleanup(int init)
-
- if (!init) goto cleanup;
-
-+ ret = -ENOENT;
-+ if (!ve_is_super(get_exec_env()))
-+ __module_get(THIS_MODULE);
-+
- ret = ip_conntrack_init();
- if (ret < 0)
-- goto cleanup_nothing;
-+ goto cleanup_unget;
-+
-+ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
-+ return 0;
-
- #ifdef CONFIG_PROC_FS
- ret = -ENOMEM;
-@@ -804,98 +960,115 @@ static int init_or_cleanup(int init)
- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
- &exp_file_ops);
- if (!proc_exp) goto cleanup_proc;
-+ proc_exp->proc_fops = &exp_file_ops;
-
-- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-- if (!proc_stat)
-- goto cleanup_proc_exp;
-+ if (ve_is_super(get_exec_env())) {
-+ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-+ if (!proc_stat)
-+ goto cleanup_proc_exp;
-
-- proc_stat->proc_fops = &ct_cpu_seq_fops;
-- proc_stat->owner = THIS_MODULE;
-+ proc_stat->proc_fops = &ct_cpu_seq_fops;
-+ proc_stat->owner = THIS_MODULE;
-+ }
- #endif
-
-- ret = nf_register_hook(&ip_conntrack_defrag_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_defrag_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register pre-routing defrag hook.\n");
- goto cleanup_proc_stat;
- }
-- ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_defrag_local_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local_out defrag hook.\n");
- goto cleanup_defragops;
- }
-- ret = nf_register_hook(&ip_conntrack_in_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register pre-routing hook.\n");
- goto cleanup_defraglocalops;
- }
-- ret = nf_register_hook(&ip_conntrack_local_out_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_local_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local out hook.\n");
- goto cleanup_inops;
- }
-- ret = nf_register_hook(&ip_conntrack_helper_in_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_helper_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local in helper hook.\n");
- goto cleanup_inandlocalops;
- }
-- ret = nf_register_hook(&ip_conntrack_helper_out_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_helper_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register postrouting helper hook.\n");
- goto cleanup_helperinops;
- }
-- ret = nf_register_hook(&ip_conntrack_out_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register post-routing hook.\n");
- goto cleanup_helperoutops;
- }
-- ret = nf_register_hook(&ip_conntrack_local_in_ops);
-+ ret = virt_nf_register_hook(&ip_conntrack_local_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local in hook.\n");
- goto cleanup_inoutandlocalops;
- }
- #ifdef CONFIG_SYSCTL
-- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
-- if (ip_ct_sysctl_header == NULL) {
-+#ifdef CONFIG_VE
-+ ret = ip_conntrack_sysctl_init();
-+ if (ret < 0)
-+ goto cleanup_sysctl;
-+#endif
-+ ret = -ENOMEM;
-+ ve_ip_ct_sysctl_header = register_sysctl_table(ve_ip_ct_net_table, 0);
-+ if (ve_ip_ct_sysctl_header == NULL) {
- printk("ip_conntrack: can't register to sysctl.\n");
-- ret = -ENOMEM;
-- goto cleanup_localinops;
-+ goto cleanup_sysctl2;
- }
- #endif
-
-- return ret;
-+ return 0;
-
- cleanup:
-+ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
-+ goto cleanup_init;
- synchronize_net();
- #ifdef CONFIG_SYSCTL
-- unregister_sysctl_table(ip_ct_sysctl_header);
-- cleanup_localinops:
-+ unregister_sysctl_table(ve_ip_ct_sysctl_header);
-+ cleanup_sysctl2:
-+#ifdef CONFIG_VE
-+ ip_conntrack_sysctl_cleanup();
-+ cleanup_sysctl:
-+#endif
- #endif
-- nf_unregister_hook(&ip_conntrack_local_in_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_local_in_ops);
- cleanup_inoutandlocalops:
-- nf_unregister_hook(&ip_conntrack_out_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_out_ops);
- cleanup_helperoutops:
-- nf_unregister_hook(&ip_conntrack_helper_out_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_helper_out_ops);
- cleanup_helperinops:
-- nf_unregister_hook(&ip_conntrack_helper_in_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_helper_in_ops);
- cleanup_inandlocalops:
-- nf_unregister_hook(&ip_conntrack_local_out_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_local_out_ops);
- cleanup_inops:
-- nf_unregister_hook(&ip_conntrack_in_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_in_ops);
- cleanup_defraglocalops:
-- nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
- cleanup_defragops:
-- nf_unregister_hook(&ip_conntrack_defrag_ops);
-+ virt_nf_unregister_hook(&ip_conntrack_defrag_ops);
- cleanup_proc_stat:
- #ifdef CONFIG_PROC_FS
-- remove_proc_entry("ip_conntrack", proc_net_stat);
-+ if (ve_is_super(get_exec_env()))
-+ remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
- proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
- proc_net_remove("ip_conntrack");
-- cleanup_init:
- #endif /* CONFIG_PROC_FS */
-+ cleanup_init:
- ip_conntrack_cleanup();
-- cleanup_nothing:
-+ cleanup_unget:
-+ if (!ve_is_super(get_exec_env()))
-+ module_put(THIS_MODULE);
- return ret;
- }
-
-@@ -906,11 +1079,11 @@ int ip_conntrack_protocol_register(struc
- int ret = 0;
-
- write_lock_bh(&ip_conntrack_lock);
-- if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
-+ if (ve_ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
- ret = -EBUSY;
- goto out;
- }
-- ip_ct_protos[proto->proto] = proto;
-+ ve_ip_ct_protos[proto->proto] = proto;
- out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-@@ -919,7 +1092,7 @@ int ip_conntrack_protocol_register(struc
- void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
- {
- write_lock_bh(&ip_conntrack_lock);
-- ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
-+ ve_ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
- write_unlock_bh(&ip_conntrack_lock);
-
- /* Somebody could be still looking at the proto in bh. */
-@@ -929,17 +1102,39 @@ void ip_conntrack_protocol_unregister(st
- ip_ct_iterate_cleanup(kill_proto, &proto->proto);
- }
-
--static int __init init(void)
-+int init_iptable_conntrack(void)
- {
- return init_or_cleanup(1);
- }
-
--static void __exit fini(void)
-+void fini_iptable_conntrack(void)
- {
- init_or_cleanup(0);
- }
-
--module_init(init);
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_conntrack();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_conntrack);
-+ KSYMRESOLVE(fini_iptable_conntrack);
-+ KSYMMODRESOLVE(ip_conntrack);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ip_conntrack);
-+ KSYMUNRESOLVE(init_iptable_conntrack);
-+ KSYMUNRESOLVE(fini_iptable_conntrack);
-+ fini_iptable_conntrack();
-+}
-+
-+subsys_initcall(init);
- module_exit(fini);
-
- /* Some modules need us, but don't depend directly on any symbol.
-@@ -956,15 +1151,20 @@ EXPORT_SYMBOL_GPL(ip_conntrack_unregiste
- EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
- EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
- #endif
-+EXPORT_SYMBOL(ip_conntrack_disable_ve0);
- EXPORT_SYMBOL(ip_conntrack_protocol_register);
- EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
- EXPORT_SYMBOL(ip_ct_get_tuple);
- EXPORT_SYMBOL(invert_tuplepr);
- EXPORT_SYMBOL(ip_conntrack_alter_reply);
-+#ifndef CONFIG_VE_IPTABLES
- EXPORT_SYMBOL(ip_conntrack_destroyed);
-+#endif
- EXPORT_SYMBOL(need_conntrack);
- EXPORT_SYMBOL(ip_conntrack_helper_register);
- EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-+EXPORT_SYMBOL(virt_ip_conntrack_helper_register);
-+EXPORT_SYMBOL(virt_ip_conntrack_helper_unregister);
- EXPORT_SYMBOL(ip_ct_iterate_cleanup);
- EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-@@ -974,14 +1174,18 @@ EXPORT_SYMBOL_GPL(__ip_conntrack_expect_
- EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
- EXPORT_SYMBOL(ip_conntrack_expect_related);
- EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-+#ifndef CONFIG_VE_IPTABLES
- EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-+#endif
- EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
- EXPORT_SYMBOL(ip_conntrack_tuple_taken);
- EXPORT_SYMBOL(ip_ct_gather_frags);
- EXPORT_SYMBOL(ip_conntrack_htable_size);
- EXPORT_SYMBOL(ip_conntrack_lock);
-+#ifndef CONFIG_VE_IPTABLES
- EXPORT_SYMBOL(ip_conntrack_hash);
-+#endif
- EXPORT_SYMBOL(ip_conntrack_untracked);
- EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
- #ifdef CONFIG_IP_NF_NAT_NEEDED
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_core.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_core.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_core.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_core.c 2006-04-19 15:02:12.000000000 +0400
-@@ -21,6 +21,8 @@
- #include <linux/icmp.h>
- #include <linux/udp.h>
- #include <linux/jhash.h>
-+#include <linux/nfcalls.h>
-+#include <ub/ub_mem.h>
-
- #define ASSERT_READ_LOCK(x)
- #define ASSERT_WRITE_LOCK(x)
-@@ -46,15 +48,24 @@ DEFINE_RWLOCK(ip_nat_lock);
- /* Calculated at init based on memory size */
- static unsigned int ip_nat_htable_size;
-
--static struct list_head *bysource;
--
- #define MAX_IP_NAT_PROTO 256
-+
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_ip_nat_bysource \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_bysource)
-+#define ve_ip_nat_protos \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_protos)
-+#else
-+static struct list_head *bysource;
-+#define ve_ip_nat_bysource bysource
- static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-+#define ve_ip_nat_protos ip_nat_protos
-+#endif
-
- static inline struct ip_nat_protocol *
- __ip_nat_proto_find(u_int8_t protonum)
- {
-- return ip_nat_protos[protonum];
-+ return ve_ip_nat_protos[protonum];
- }
-
- struct ip_nat_protocol *
-@@ -177,7 +188,7 @@ find_appropriate_src(const struct ip_con
- struct ip_conntrack *ct;
-
- read_lock_bh(&ip_nat_lock);
-- list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
-+ list_for_each_entry(ct, &ve_ip_nat_bysource[h], nat.info.bysource) {
- if (same_src(ct, tuple)) {
- /* Copy source part from reply tuple. */
- invert_tuplepr(result,
-@@ -291,13 +302,22 @@ get_unique_tuple(struct ip_conntrack_tup
- ip_nat_proto_put(proto);
- }
-
-+void ip_nat_hash_conntrack(struct ip_conntrack *conntrack)
-+{
-+ unsigned int srchash
-+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-+ write_lock_bh(&ip_nat_lock);
-+ list_add(&conntrack->nat.info.bysource, &ve_ip_nat_bysource[srchash]);
-+ write_unlock_bh(&ip_nat_lock);
-+}
-+EXPORT_SYMBOL_GPL(ip_nat_hash_conntrack);
-+
- unsigned int
- ip_nat_setup_info(struct ip_conntrack *conntrack,
- const struct ip_nat_range *range,
- unsigned int hooknum)
- {
- struct ip_conntrack_tuple curr_tuple, new_tuple;
-- struct ip_nat_info *info = &conntrack->nat.info;
- int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-@@ -332,14 +352,8 @@ ip_nat_setup_info(struct ip_conntrack *c
- }
-
- /* Place in source hash if this is the first time. */
-- if (have_to_hash) {
-- unsigned int srchash
-- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-- .tuple);
-- write_lock_bh(&ip_nat_lock);
-- list_add(&info->bysource, &bysource[srchash]);
-- write_unlock_bh(&ip_nat_lock);
-- }
-+ if (have_to_hash)
-+ ip_nat_hash_conntrack(conntrack);
-
- /* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
-@@ -521,11 +535,11 @@ int ip_nat_protocol_register(struct ip_n
- int ret = 0;
-
- write_lock_bh(&ip_nat_lock);
-- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
-+ if (ve_ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
-- ip_nat_protos[proto->protonum] = proto;
-+ ve_ip_nat_protos[proto->protonum] = proto;
- out:
- write_unlock_bh(&ip_nat_lock);
- return ret;
-@@ -536,7 +550,7 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
- void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
- {
- write_lock_bh(&ip_nat_lock);
-- ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
-+ ve_ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
- write_unlock_bh(&ip_nat_lock);
-
- /* Someone could be still looking at the proto in a bh. */
-@@ -589,38 +603,55 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_
- EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
- #endif
-
--static int __init ip_nat_init(void)
-+static int ip_nat_init(void)
- {
- size_t i;
-+ int ret;
-
-- /* Leave them the same for the moment. */
-- ip_nat_htable_size = ip_conntrack_htable_size;
-+ if (ve_is_super(get_exec_env()))
-+ ip_nat_htable_size = ip_conntrack_htable_size;
-
- /* One vmalloc for both hash tables */
-- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-- if (!bysource)
-- return -ENOMEM;
-+ ret = -ENOMEM;
-+ ve_ip_nat_bysource =
-+ ub_vmalloc(sizeof(struct list_head)*ip_nat_htable_size*2);
-+ if (!ve_ip_nat_bysource)
-+ goto nomem;
-+
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_protos =
-+ ub_kmalloc(sizeof(void *)*MAX_IP_NAT_PROTO, GFP_KERNEL);
-+ if (!ve_ip_nat_protos)
-+ goto nomem2;
-+#endif
-
- /* Sew in builtin protocols. */
- write_lock_bh(&ip_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-- ip_nat_protos[i] = &ip_nat_unknown_protocol;
-- ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-- ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-- ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
-+ ve_ip_nat_protos[i] = &ip_nat_unknown_protocol;
-+ ve_ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
-+ ve_ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
-+ ve_ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
- write_unlock_bh(&ip_nat_lock);
-
- for (i = 0; i < ip_nat_htable_size; i++) {
-- INIT_LIST_HEAD(&bysource[i]);
-+ INIT_LIST_HEAD(&ve_ip_nat_bysource[i]);
- }
-
- /* FIXME: Man, this is a hack. <SIGH> */
- IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
-- ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
-+ ve_ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
-
-- /* Initialize fake conntrack so that NAT will skip it */
-- ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
-+ if (ve_is_super(get_exec_env()))
-+ /* Initialize fake conntrack so that NAT will skip it */
-+ ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
- return 0;
-+#ifdef CONFIG_VE_IPTABLES
-+nomem2:
-+#endif
-+ vfree(ve_ip_nat_bysource);
-+nomem:
-+ return ret;
- }
-
- /* Clear NAT section of all conntracks, in case we're loaded again. */
-@@ -631,14 +662,41 @@ static int clean_nat(struct ip_conntrack
- return 0;
- }
-
--static void __exit ip_nat_cleanup(void)
-+static void ip_nat_cleanup(void)
- {
- ip_ct_iterate_cleanup(&clean_nat, NULL);
-- ip_conntrack_destroyed = NULL;
-- vfree(bysource);
-+ ve_ip_conntrack_destroyed = NULL;
-+ vfree(ve_ip_nat_bysource);
-+ ve_ip_nat_bysource = NULL;
-+#ifdef CONFIG_VE_IPTABLES
-+ kfree(ve_ip_nat_protos);
-+ ve_ip_nat_protos = NULL;
-+#endif
-+}
-+
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = ip_nat_init();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(ip_nat_init);
-+ KSYMRESOLVE(ip_nat_cleanup);
-+ KSYMMODRESOLVE(ip_nat);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ip_nat);
-+ KSYMUNRESOLVE(ip_nat_cleanup);
-+ KSYMUNRESOLVE(ip_nat_init);
-+ ip_nat_cleanup();
- }
-
- MODULE_LICENSE("GPL");
-
--module_init(ip_nat_init);
--module_exit(ip_nat_cleanup);
-+fs_initcall(init);
-+module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_ftp.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_ftp.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_ftp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_ftp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -19,6 +19,7 @@
- #include <linux/netfilter_ipv4/ip_nat_rule.h>
- #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
- #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-+#include <linux/nfcalls.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-@@ -154,18 +155,43 @@ static unsigned int ip_nat_ftp(struct sk
- return NF_ACCEPT;
- }
-
--static void __exit fini(void)
-+#ifdef CONFIG_VE_IPTABLES
-+#undef ve_ip_nat_ftp_hook
-+#define ve_ip_nat_ftp_hook \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_ftp_hook)
-+#endif
-+int init_iptable_nat_ftp(void)
- {
-- ip_nat_ftp_hook = NULL;
-+ BUG_ON(ve_ip_nat_ftp_hook);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_ftp_hook = (ip_nat_helper_func)ip_nat_ftp;
-+#else
-+ ve_ip_nat_ftp_hook = ip_nat_ftp;
-+#endif
-+ return 0;
-+}
-+
-+void fini_iptable_nat_ftp(void)
-+{
-+ ve_ip_nat_ftp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
- }
-
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ip_nat_ftp);
-+ KSYMUNRESOLVE(init_iptable_nat_ftp);
-+ KSYMUNRESOLVE(fini_iptable_nat_ftp);
-+ fini_iptable_nat_ftp();
-+}
-+
- static int __init init(void)
- {
-- BUG_ON(ip_nat_ftp_hook);
-- ip_nat_ftp_hook = ip_nat_ftp;
-- return 0;
-+ KSYMRESOLVE(init_iptable_nat_ftp);
-+ KSYMRESOLVE(fini_iptable_nat_ftp);
-+ KSYMMODRESOLVE(ip_nat_ftp);
-+ return init_iptable_nat_ftp();
- }
-
- /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_irc.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_irc.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_irc.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_irc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -23,6 +23,7 @@
- #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
- #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
- #include <linux/moduleparam.h>
-+#include <linux/nfcalls.h>
-
- #if 0
- #define DEBUGP printk
-@@ -96,18 +97,44 @@ static unsigned int help(struct sk_buff
- return ret;
- }
-
--static void __exit fini(void)
-+#ifdef CONFIG_VE_IPTABLES
-+#undef ve_ip_nat_irc_hook
-+#define ve_ip_nat_irc_hook \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_irc_hook)
-+#endif
-+
-+int init_iptable_nat_irc(void)
-+{
-+ BUG_ON(ve_ip_nat_irc_hook);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_irc_hook = (ip_nat_helper_func)help;
-+#else
-+ ve_ip_nat_irc_hook = help;
-+#endif
-+ return 0;
-+}
-+
-+void fini_iptable_nat_irc(void)
- {
-- ip_nat_irc_hook = NULL;
-+ ve_ip_nat_irc_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
- }
-
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ip_nat_irc);
-+ KSYMUNRESOLVE(init_iptable_nat_irc);
-+ KSYMUNRESOLVE(fini_iptable_nat_irc);
-+ fini_iptable_nat_irc();
-+}
-+
- static int __init init(void)
- {
-- BUG_ON(ip_nat_irc_hook);
-- ip_nat_irc_hook = help;
-- return 0;
-+ KSYMRESOLVE(init_iptable_nat_irc);
-+ KSYMRESOLVE(fini_iptable_nat_irc);
-+ KSYMMODRESOLVE(ip_nat_irc);
-+ return init_iptable_nat_irc();
- }
-
- /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_rule.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_rule.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_rule.c 2006-04-19 15:02:12.000000000 +0400
-@@ -34,6 +34,13 @@
- #define DEBUGP(format, args...)
- #endif
-
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_ip_nat_table \
-+ (get_exec_env()->_ip_conntrack->_ip_nat_table)
-+#else
-+#define ve_ip_nat_table &nat_table
-+#endif
-+
- #define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
- static struct
-@@ -41,7 +48,7 @@ static struct
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
--} nat_initial_table __initdata
-+} nat_initial_table
- = { { "nat", NAT_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
-@@ -235,6 +242,93 @@ static int ipt_dnat_checkentry(const cha
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat_to_user(void *target, void **dstptr,
-+ int *size, int off)
-+{
-+ struct ipt_entry_target *pt;
-+ struct ip_nat_multi_range_compat *pinfo;
-+ struct compat_ip_nat_multi_range info;
-+ u_int16_t tsize;
-+
-+ pt = (struct ipt_entry_target *)target;
-+ tsize = pt->u.user.target_size;
-+ if (__copy_to_user(*dstptr, pt, sizeof(struct ipt_entry_target)))
-+ return -EFAULT;
-+ pinfo = (struct ip_nat_multi_range_compat *)pt->data;
-+ memset(&info, 0, sizeof(struct compat_ip_nat_multi_range));
-+ info.rangesize = pinfo->rangesize;
-+ info.range[0].flags = pinfo->range[0].flags;
-+ info.range[0].min_ip = pinfo->range[0].min_ip;
-+ info.range[0].max_ip = pinfo->range[0].max_ip;
-+ info.range[0].min = pinfo->range[0].min;
-+ info.range[0].max = pinfo->range[0].max;
-+ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_target),
-+ &info, sizeof(struct compat_ip_nat_multi_range)))
-+ return -EFAULT;
-+ tsize -= off;
-+ if (put_user(tsize, (u_int16_t *)*dstptr))
-+ return -EFAULT;
-+ *size -= off;
-+ *dstptr += tsize;
-+ return 0;
-+}
-+
-+static int compat_from_user(void *target, void **dstptr,
-+ int *size, int off)
-+{
-+ struct compat_ipt_entry_target *pt;
-+ struct ipt_entry_target *dstpt;
-+ struct compat_ip_nat_multi_range *pinfo;
-+ struct ip_nat_multi_range_compat info;
-+ u_int16_t tsize;
-+
-+ pt = (struct compat_ipt_entry_target *)target;
-+ dstpt = (struct ipt_entry_target *)*dstptr;
-+ tsize = pt->u.user.target_size;
-+ memcpy(*dstptr, pt, sizeof(struct compat_ipt_entry_target));
-+ pinfo = (struct compat_ip_nat_multi_range *)pt->data;
-+ memset(&info, 0, sizeof(struct ip_nat_multi_range_compat));
-+ info.rangesize = pinfo->rangesize;
-+ info.range[0].flags = pinfo->range[0].flags;
-+ info.range[0].min_ip = pinfo->range[0].min_ip;
-+ info.range[0].max_ip = pinfo->range[0].max_ip;
-+ info.range[0].min = pinfo->range[0].min;
-+ info.range[0].max = pinfo->range[0].max;
-+ memcpy(*dstptr + sizeof(struct compat_ipt_entry_target),
-+ &info, sizeof(struct ip_nat_multi_range_compat));
-+ tsize += off;
-+ dstpt->u.user.target_size = tsize;
-+ *size += off;
-+ *dstptr += tsize;
-+ return 0;
-+}
-+
-+static int compat(void *target, void **dstptr, int *size, int convert)
-+{
-+ int ret, off;
-+
-+ off = IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct compat_ip_nat_multi_range));
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ ret = compat_to_user(target, dstptr, size, off);
-+ break;
-+ case COMPAT_FROM_USER:
-+ ret = compat_from_user(target, dstptr, size, off);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ ret = 0;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+#endif
-+
- inline unsigned int
- alloc_null_binding(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
-@@ -286,7 +380,7 @@ int ip_nat_rule_find(struct sk_buff **ps
- {
- int ret;
-
-- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
-+ ret = ipt_do_table(pskb, hooknum, in, out, ve_ip_nat_table, NULL);
-
- if (ret == NF_ACCEPT) {
- if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
-@@ -300,21 +394,33 @@ static struct ipt_target ipt_snat_reg =
- .name = "SNAT",
- .target = ipt_snat_target,
- .checkentry = ipt_snat_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- };
-
- static struct ipt_target ipt_dnat_reg = {
- .name = "DNAT",
- .target = ipt_dnat_target,
- .checkentry = ipt_dnat_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- };
-
--int __init ip_nat_rule_init(void)
-+int ip_nat_rule_init(void)
- {
- int ret;
-+ struct ipt_table *tmp_table;
-+
-+ tmp_table = ipt_register_table(&nat_table,
-+ &nat_initial_table.repl);
-+ if (IS_ERR(tmp_table))
-+ return PTR_ERR(tmp_table);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_table = tmp_table;
-+#endif
-
-- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-- if (ret != 0)
-- return ret;
- ret = ipt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-@@ -328,7 +434,10 @@ int __init ip_nat_rule_init(void)
- unregister_snat:
- ipt_unregister_target(&ipt_snat_reg);
- unregister_table:
-- ipt_unregister_table(&nat_table);
-+ ipt_unregister_table(ve_ip_nat_table);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_table = NULL;
-+#endif
-
- return ret;
- }
-@@ -337,5 +446,8 @@ void ip_nat_rule_cleanup(void)
- {
- ipt_unregister_target(&ipt_dnat_reg);
- ipt_unregister_target(&ipt_snat_reg);
-- ipt_unregister_table(&nat_table);
-+ ipt_unregister_table(ve_ip_nat_table);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_ip_nat_table = NULL;
-+#endif
- }
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_standalone.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_standalone.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_nat_standalone.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_nat_standalone.c 2006-04-19 15:02:12.000000000 +0400
-@@ -30,6 +30,7 @@
- #include <net/ip.h>
- #include <net/checksum.h>
- #include <linux/spinlock.h>
-+#include <linux/nfcalls.h>
-
- #define ASSERT_READ_LOCK(x)
- #define ASSERT_WRITE_LOCK(x)
-@@ -358,45 +359,45 @@ static int init_or_cleanup(int init)
- {
- int ret = 0;
-
-- need_conntrack();
--
- if (!init) goto cleanup;
-
--#ifdef CONFIG_XFRM
-- BUG_ON(ip_nat_decode_session != NULL);
-- ip_nat_decode_session = nat_decode_session;
--#endif
-+ if (!ve_is_super(get_exec_env()))
-+ __module_get(THIS_MODULE);
-+
- ret = ip_nat_rule_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
-- goto cleanup_decode_session;
-+ goto cleanup_modput;
- }
-- ret = nf_register_hook(&ip_nat_in_ops);
-+ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
-+ return 0;
-+
-+ ret = virt_nf_register_hook(&ip_nat_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register in hook.\n");
- goto cleanup_rule_init;
- }
-- ret = nf_register_hook(&ip_nat_out_ops);
-+ ret = virt_nf_register_hook(&ip_nat_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register out hook.\n");
- goto cleanup_inops;
- }
-- ret = nf_register_hook(&ip_nat_adjust_in_ops);
-+ ret = virt_nf_register_hook(&ip_nat_adjust_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register adjust in hook.\n");
- goto cleanup_outops;
- }
-- ret = nf_register_hook(&ip_nat_adjust_out_ops);
-+ ret = virt_nf_register_hook(&ip_nat_adjust_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register adjust out hook.\n");
- goto cleanup_adjustin_ops;
- }
-- ret = nf_register_hook(&ip_nat_local_out_ops);
-+ ret = virt_nf_register_hook(&ip_nat_local_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register local out hook.\n");
- goto cleanup_adjustout_ops;;
- }
-- ret = nf_register_hook(&ip_nat_local_in_ops);
-+ ret = virt_nf_register_hook(&ip_nat_local_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register local in hook.\n");
- goto cleanup_localoutops;
-@@ -404,38 +405,76 @@ static int init_or_cleanup(int init)
- return ret;
-
- cleanup:
-- nf_unregister_hook(&ip_nat_local_in_ops);
-+ if (ve_is_super(get_exec_env()) && ip_conntrack_disable_ve0)
-+ goto cleanup_rule_init;
-+ virt_nf_unregister_hook(&ip_nat_local_in_ops);
- cleanup_localoutops:
-- nf_unregister_hook(&ip_nat_local_out_ops);
-+ virt_nf_unregister_hook(&ip_nat_local_out_ops);
- cleanup_adjustout_ops:
-- nf_unregister_hook(&ip_nat_adjust_out_ops);
-+ virt_nf_unregister_hook(&ip_nat_adjust_out_ops);
- cleanup_adjustin_ops:
-- nf_unregister_hook(&ip_nat_adjust_in_ops);
-+ virt_nf_unregister_hook(&ip_nat_adjust_in_ops);
- cleanup_outops:
-- nf_unregister_hook(&ip_nat_out_ops);
-+ virt_nf_unregister_hook(&ip_nat_out_ops);
- cleanup_inops:
-- nf_unregister_hook(&ip_nat_in_ops);
-+ virt_nf_unregister_hook(&ip_nat_in_ops);
- cleanup_rule_init:
- ip_nat_rule_cleanup();
-- cleanup_decode_session:
--#ifdef CONFIG_XFRM
-- ip_nat_decode_session = NULL;
-- synchronize_net();
--#endif
-+ cleanup_modput:
-+ if (!ve_is_super(get_exec_env()))
-+ module_put(THIS_MODULE);
- return ret;
- }
-
--static int __init init(void)
-+int init_iptable_nat(void)
- {
- return init_or_cleanup(1);
- }
-
--static void __exit fini(void)
-+void fini_iptable_nat(void)
- {
- init_or_cleanup(0);
- }
-
--module_init(init);
-+static int __init init(void)
-+{
-+ int err;
-+
-+ need_conntrack();
-+
-+#ifdef CONFIG_XFRM
-+ BUG_ON(ip_nat_decode_session != NULL);
-+ ip_nat_decode_session = nat_decode_session;
-+#endif
-+
-+ err = init_iptable_nat();
-+ if (err < 0) {
-+#ifdef CONFIG_XFRM
-+ ip_nat_decode_session = NULL;
-+ synchronize_net();
-+#endif
-+ return err;
-+ }
-+
-+ KSYMRESOLVE(init_iptable_nat);
-+ KSYMRESOLVE(fini_iptable_nat);
-+ KSYMMODRESOLVE(iptable_nat);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(iptable_nat);
-+ KSYMUNRESOLVE(init_iptable_nat);
-+ KSYMUNRESOLVE(fini_iptable_nat);
-+ fini_iptable_nat();
-+#ifdef CONFIG_XFRM
-+ ip_nat_decode_session = NULL;
-+ synchronize_net();
-+#endif
-+}
-+
-+fs_initcall(init);
- module_exit(fini);
-
- MODULE_LICENSE("GPL");
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_queue.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_queue.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_queue.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_queue.c 2006-04-19 15:02:12.000000000 +0400
-@@ -542,8 +542,17 @@ ipq_rcv_sk(struct sock *sk, int len)
- down(&ipqnl_sem);
-
- for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
-+#ifdef CONFIG_VE
-+ struct ve_struct *env;
-+#endif
- skb = skb_dequeue(&sk->sk_receive_queue);
-+#ifdef CONFIG_VE
-+ env = set_exec_env(VE_OWNER_SKB(skb));
- ipq_rcv_skb(skb);
-+ (void)set_exec_env(env);
-+#else
-+ ipq_rcv_skb(skb);
-+#endif
- kfree_skb(skb);
- }
-
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ip_tables.c linux-2.6.16-026test009/net/ipv4/netfilter/ip_tables.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ip_tables.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ip_tables.c 2006-04-19 15:02:12.000000000 +0400
-@@ -24,14 +24,17 @@
- #include <linux/module.h>
- #include <linux/icmp.h>
- #include <net/ip.h>
-+#include <net/compat.h>
- #include <asm/uaccess.h>
- #include <asm/semaphore.h>
- #include <linux/proc_fs.h>
- #include <linux/err.h>
- #include <linux/cpumask.h>
-+#include <ub/ub_mem.h>
-
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-+#include <linux/nfcalls.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-@@ -70,6 +73,14 @@ do { \
- #define inline
- #endif
-
-+#ifdef CONFIG_VE_IPTABLES
-+/* include ve.h and define get_exec_env */
-+#include <linux/sched.h>
-+#define ve_ipt_standard_target (get_exec_env()->_ipt_standard_target)
-+#else
-+#define ve_ipt_standard_target &ipt_standard_target
-+#endif
-+
- /*
- We keep a set of rules for each CPU, so we can avoid write-locking
- them in the softirq when updating the counters and therefore
-@@ -480,7 +491,7 @@ standard_check(const struct ipt_entry_ta
- if (t->u.target_size
- != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
- duprintf("standard_check: target size %u != %u\n",
-- t->u.target_size,
-+ t->u.target_size, (unsigned int)
- IPT_ALIGN(sizeof(struct ipt_standard_target)));
- return 0;
- }
-@@ -565,7 +576,7 @@ check_entry(struct ipt_entry *e, const c
- }
- t->u.kernel.target = target;
-
-- if (t->u.kernel.target == &ipt_standard_target) {
-+ if (t->u.kernel.target == ve_ipt_standard_target) {
- if (!standard_check(t, size)) {
- ret = -EINVAL;
- goto cleanup_matches;
-@@ -790,32 +801,45 @@ get_counters(const struct xt_table_info
- }
- }
-
--static int
--copy_entries_to_user(unsigned int total_size,
-- struct ipt_table *table,
-- void __user *userptr)
-+static inline struct xt_counters * alloc_counters(struct ipt_table *table)
- {
-- unsigned int off, num, countersize;
-- struct ipt_entry *e;
-+ unsigned int countersize;
- struct xt_counters *counters;
- struct xt_table_info *private = table->private;
-- int ret = 0;
-- void *loc_cpu_entry;
-
- /* We need atomic snapshot of counters: rest doesn't change
- (other than comefrom, which userspace doesn't care
- about). */
- countersize = sizeof(struct xt_counters) * private->number;
-- counters = vmalloc_node(countersize, numa_node_id());
-+ counters = ub_vmalloc_node(countersize, numa_node_id());
-
- if (counters == NULL)
-- return -ENOMEM;
-+ return ERR_PTR(-ENOMEM);
-
- /* First, sum counters... */
- write_lock_bh(&table->lock);
- get_counters(private, counters);
- write_unlock_bh(&table->lock);
-
-+ return counters;
-+}
-+
-+static int
-+copy_entries_to_user(unsigned int total_size,
-+ struct ipt_table *table,
-+ void __user *userptr)
-+{
-+ unsigned int off, num;
-+ struct ipt_entry *e;
-+ struct xt_counters *counters;
-+ struct xt_table_info *private = table->private;
-+ int ret = 0;
-+ void *loc_cpu_entry;
-+
-+ counters = alloc_counters(table);
-+ if (IS_ERR(counters))
-+ return PTR_ERR(counters);
-+
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
-@@ -875,25 +899,391 @@ copy_entries_to_user(unsigned int total_
- return ret;
- }
-
-+#ifdef CONFIG_COMPAT
-+static DECLARE_MUTEX(compat_ipt_mutex);
-+
-+struct compat_delta {
-+ struct compat_delta *next;
-+ u_int16_t offset;
-+ short delta;
-+};
-+
-+static struct compat_delta *compat_offsets = NULL;
-+
-+static int compat_add_offset(u_int16_t offset, short delta)
-+{
-+ struct compat_delta *tmp;
-+
-+ tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-+ if (!tmp)
-+ return -ENOMEM;
-+ tmp->offset = offset;
-+ tmp->delta = delta;
-+ if (compat_offsets) {
-+ tmp->next = compat_offsets->next;
-+ compat_offsets->next = tmp;
-+ } else {
-+ compat_offsets = tmp;
-+ tmp->next = NULL;
-+ }
-+ return 0;
-+}
-+
-+static void compat_flush_offsets(void)
-+{
-+ struct compat_delta *tmp, *next;
-+
-+ if (compat_offsets) {
-+ for(tmp = compat_offsets; tmp; tmp = next) {
-+ next = tmp->next;
-+ kfree(tmp);
-+ }
-+ compat_offsets = NULL;
-+ }
-+}
-+
-+static short compat_calc_jump(u_int16_t offset)
-+{
-+ struct compat_delta *tmp;
-+ short delta;
-+
-+ for(tmp = compat_offsets, delta = 0; tmp; tmp = tmp->next)
-+ if (tmp->offset < offset)
-+ delta += tmp->delta;
-+ return delta;
-+}
-+
-+struct compat_ipt_standard_target
-+{
-+ struct compat_ipt_entry_target target;
-+ compat_int_t verdict;
-+};
-+
-+#define IPT_ST_OFFSET (sizeof(struct ipt_standard_target) - \
-+ sizeof(struct compat_ipt_standard_target))
-+
-+struct compat_ipt_standard
-+{
-+ struct compat_ipt_entry entry;
-+ struct compat_ipt_standard_target target;
-+};
-+
-+static int compat_ipt_standard_fn(void *target,
-+ void **dstptr, int *size, int convert)
-+{
-+ struct compat_ipt_standard_target compat_st, *pcompat_st;
-+ struct ipt_standard_target st, *pst;
-+ int ret;
-+
-+ ret = 0;
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ pst = (struct ipt_standard_target *)target;
-+ memcpy(&compat_st.target, &pst->target,
-+ sizeof(struct ipt_entry_target));
-+ compat_st.verdict = pst->verdict;
-+ if (compat_st.verdict > 0)
-+ compat_st.verdict -=
-+ compat_calc_jump(compat_st.verdict);
-+ compat_st.target.u.user.target_size =
-+ sizeof(struct compat_ipt_standard_target);
-+ if (__copy_to_user(*dstptr, &compat_st,
-+ sizeof(struct compat_ipt_standard_target)))
-+ ret = -EFAULT;
-+ *size -= IPT_ST_OFFSET;
-+ *dstptr += sizeof(struct compat_ipt_standard_target);
-+ break;
-+ case COMPAT_FROM_USER:
-+ pcompat_st =
-+ (struct compat_ipt_standard_target *)target;
-+ memcpy(&st.target, &pcompat_st->target,
-+ sizeof(struct ipt_entry_target));
-+ st.verdict = pcompat_st->verdict;
-+ if (st.verdict > 0)
-+ st.verdict += compat_calc_jump(st.verdict);
-+ st.target.u.user.target_size =
-+ sizeof(struct ipt_standard_target);
-+ memcpy(*dstptr, &st,
-+ sizeof(struct ipt_standard_target));
-+ *size += IPT_ST_OFFSET;
-+ *dstptr += sizeof(struct ipt_standard_target);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += IPT_ST_OFFSET;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+
-+int ipt_target_align_compat(void *target, void **dstptr,
-+ int *size, int off, int convert)
-+{
-+ struct compat_ipt_entry_target *pcompat;
-+ struct ipt_entry_target *pt;
-+ u_int16_t tsize;
-+ int ret;
-+
-+ ret = 0;
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ pt = (struct ipt_entry_target *)target;
-+ tsize = pt->u.user.target_size;
-+ if (__copy_to_user(*dstptr, pt, tsize)) {
-+ ret = -EFAULT;
-+ break;
-+ }
-+ tsize -= off;
-+ if (put_user(tsize, (u_int16_t *)*dstptr))
-+ ret = -EFAULT;
-+ *size -= off;
-+ *dstptr += tsize;
-+ break;
-+ case COMPAT_FROM_USER:
-+ pcompat = (struct compat_ipt_entry_target *)target;
-+ pt = (struct ipt_entry_target *)*dstptr;
-+ tsize = pcompat->u.user.target_size;
-+ memcpy(pt, pcompat, tsize);
-+ tsize += off;
-+ pt->u.user.target_size = tsize;
-+ *size += off;
-+ *dstptr += tsize;
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+
-+int ipt_match_align_compat(void *match, void **dstptr,
-+ int *size, int off, int convert)
-+{
-+ struct compat_ipt_entry_match *pcompat_m;
-+ struct ipt_entry_match *pm;
-+ u_int16_t msize;
-+ int ret;
-+
-+ ret = 0;
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ pm = (struct ipt_entry_match *)match;
-+ msize = pm->u.user.match_size;
-+ if (__copy_to_user(*dstptr, pm, msize)) {
-+ ret = -EFAULT;
-+ break;
-+ }
-+ msize -= off;
-+ if (put_user(msize, (u_int16_t *)*dstptr))
-+ ret = -EFAULT;
-+ *size -= off;
-+ *dstptr += msize;
-+ break;
-+ case COMPAT_FROM_USER:
-+ pcompat_m = (struct compat_ipt_entry_match *)match;
-+ pm = (struct ipt_entry_match *)*dstptr;
-+ msize = pcompat_m->u.user.match_size;
-+ memcpy(pm, pcompat_m, msize);
-+ msize += off;
-+ pm->u.user.match_size = msize;
-+ *size += off;
-+ *dstptr += msize;
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+
-+static int icmp_compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_icmp)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_icmp));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+
-+static inline int
-+compat_calc_match(struct ipt_entry_match *m, int * size)
-+{
-+ if (m->u.kernel.match->compat)
-+ m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-+ return 0;
-+}
-+
-+static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
-+ void *base, struct xt_table_info *newinfo)
-+{
-+ struct ipt_entry_target *t;
-+ u_int16_t entry_offset;
-+ int off, i, ret;
-+
-+ off = 0;
-+ entry_offset = (void *)e - base;
-+ IPT_MATCH_ITERATE(e, compat_calc_match, &off);
-+ t = ipt_get_target(e);
-+ if (t->u.kernel.target->compat)
-+ t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-+ newinfo->size -= off;
-+ ret = compat_add_offset(entry_offset, off);
-+ if (ret)
-+ return ret;
-+
-+ for (i = 0; i< NF_IP_NUMHOOKS; i++) {
-+ if (info->hook_entry[i] && (e < (struct ipt_entry *)
-+ (base + info->hook_entry[i])))
-+ newinfo->hook_entry[i] -= off;
-+ if (info->underflow[i] && (e < (struct ipt_entry *)
-+ (base + info->underflow[i])))
-+ newinfo->underflow[i] -= off;
-+ }
-+ return 0;
-+}
-+
-+static int compat_table_info(struct xt_table_info *info,
-+ struct xt_table_info *newinfo)
-+{
-+ void *loc_cpu_entry;
-+ int i;
-+
-+ if (!newinfo || !info)
-+ return -EINVAL;
-+
-+ memset(newinfo, 0, sizeof(struct xt_table_info));
-+ newinfo->size = info->size;
-+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-+ newinfo->hook_entry[i] = info->hook_entry[i];
-+ newinfo->underflow[i] = info->underflow[i];
-+ }
-+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
-+ return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-+ compat_calc_entry, info, loc_cpu_entry, newinfo);
-+}
-+#endif
-+
-+static int get_info(void __user *user, int *len)
-+{
-+ char name[IPT_TABLE_MAXNAMELEN];
-+ struct ipt_table *t;
-+ int ret, size;
-+
-+#ifdef CONFIG_COMPAT
-+ if (is_current_32bits())
-+ size = sizeof(struct compat_ipt_getinfo);
-+ else
-+#endif
-+ size = sizeof(struct ipt_getinfo);
-+
-+ if (*len != size) {
-+ duprintf("length %u != %u\n", *len,
-+ (unsigned int)sizeof(struct ipt_getinfo));
-+ return -EINVAL;
-+ }
-+
-+ if (copy_from_user(name, user, sizeof(name)) != 0)
-+ return -EFAULT;
-+
-+ name[IPT_TABLE_MAXNAMELEN-1] = '\0';
-+#ifdef CONFIG_COMPAT
-+ down(&compat_ipt_mutex);
-+#endif
-+ t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-+ "iptable_%s", name);
-+ if (t && !IS_ERR(t)) {
-+ struct ipt_getinfo info;
-+ struct xt_table_info *private = t->private;
-+#ifdef CONFIG_COMPAT
-+ struct compat_ipt_getinfo compat_info;
-+#endif
-+ void *pinfo;
-+
-+#ifdef CONFIG_COMPAT
-+ if (is_current_32bits()) {
-+ struct xt_table_info tmp;
-+ ret = compat_table_info(private, &tmp);
-+ compat_flush_offsets();
-+ memcpy(compat_info.hook_entry, tmp.hook_entry,
-+ sizeof(compat_info.hook_entry));
-+ memcpy(compat_info.underflow, tmp.underflow,
-+ sizeof(compat_info.underflow));
-+ compat_info.valid_hooks = t->valid_hooks;
-+ compat_info.num_entries = private->number;
-+ compat_info.size = tmp.size;
-+ strcpy(compat_info.name, name);
-+ pinfo = (void *)&compat_info;
-+ } else
-+#endif
-+ {
-+ info.valid_hooks = t->valid_hooks;
-+ memcpy(info.hook_entry, private->hook_entry,
-+ sizeof(info.hook_entry));
-+ memcpy(info.underflow, private->underflow,
-+ sizeof(info.underflow));
-+ info.num_entries = private->number;
-+ info.size = private->size;
-+ strcpy(info.name, name);
-+ pinfo = (void *)&info;
-+ }
-+
-+ if (copy_to_user(user, pinfo, *len) != 0)
-+ ret = -EFAULT;
-+ else
-+ ret = 0;
-+
-+ xt_table_unlock(t);
-+ module_put(t->me);
-+ } else
-+ ret = t ? PTR_ERR(t) : -ENOENT;
-+#ifdef CONFIG_COMPAT
-+ up(&compat_ipt_mutex);
-+#endif
-+ return ret;
-+}
-+
- static int
--get_entries(const struct ipt_get_entries *entries,
-- struct ipt_get_entries __user *uptr)
-+get_entries(struct ipt_get_entries __user *uptr, int *len)
- {
- int ret;
-+ struct ipt_get_entries get;
- struct ipt_table *t;
-
-- t = xt_find_table_lock(AF_INET, entries->name);
-+ if (*len < sizeof(get)) {
-+ duprintf("get_entries: %u < %d\n", *len,
-+ (unsigned int)sizeof(get));
-+ return -EINVAL;
-+ }
-+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
-+ return -EFAULT;
-+ if (*len != sizeof(struct ipt_get_entries) + get.size) {
-+ duprintf("get_entries: %u != %u\n", *len,
-+ (unsigned int)(sizeof(struct ipt_get_entries) +
-+ get.size));
-+ return -EINVAL;
-+ }
-+
-+ t = xt_find_table_lock(AF_INET, get.name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n",
- private->number);
-- if (entries->size == private->size)
-+ if (get.size == private->size)
- ret = copy_entries_to_user(private->size,
- t, uptr->entrytable);
- else {
- duprintf("get_entries: I've got %u not %u!\n",
- private->size,
-- entries->size);
-+ get.size);
- ret = -EINVAL;
- }
- module_put(t->me);
-@@ -905,71 +1295,39 @@ get_entries(const struct ipt_get_entries
- }
-
- static int
--do_replace(void __user *user, unsigned int len)
-+__do_replace(const char *name, unsigned int valid_hooks,
-+ struct xt_table_info *newinfo, unsigned int num_counters,
-+ void __user *counters_ptr)
- {
- int ret;
-- struct ipt_replace tmp;
- struct ipt_table *t;
-- struct xt_table_info *newinfo, *oldinfo;
-+ struct xt_table_info *oldinfo;
- struct xt_counters *counters;
-- void *loc_cpu_entry, *loc_cpu_old_entry;
--
-- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-- return -EFAULT;
--
-- /* Hack: Causes ipchains to give correct error msg --RR */
-- if (len != sizeof(tmp) + tmp.size)
-- return -ENOPROTOOPT;
--
-- /* overflow check */
-- if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-- SMP_CACHE_BYTES)
-- return -ENOMEM;
-- if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-- return -ENOMEM;
--
-- newinfo = xt_alloc_table_info(tmp.size);
-- if (!newinfo)
-- return -ENOMEM;
--
-- /* choose the copy that is our node/cpu */
-- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-- tmp.size) != 0) {
-- ret = -EFAULT;
-- goto free_newinfo;
-- }
-+ void *loc_cpu_old_entry;
-
-- counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
-+ ret = 0;
-+ counters = ub_vmalloc_best(num_counters * sizeof(struct xt_counters));
- if (!counters) {
- ret = -ENOMEM;
-- goto free_newinfo;
-+ goto out;
- }
-
-- ret = translate_table(tmp.name, tmp.valid_hooks,
-- newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-- tmp.hook_entry, tmp.underflow);
-- if (ret != 0)
-- goto free_newinfo_counters;
--
-- duprintf("ip_tables: Translated table\n");
--
-- t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
-- "iptable_%s", tmp.name);
-+ t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-+ "iptable_%s", name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
- goto free_newinfo_counters_untrans;
- }
-
- /* You lied! */
-- if (tmp.valid_hooks != t->valid_hooks) {
-+ if (valid_hooks != t->valid_hooks) {
- duprintf("Valid hook crap: %08X vs %08X\n",
-- tmp.valid_hooks, t->valid_hooks);
-+ valid_hooks, t->valid_hooks);
- ret = -EINVAL;
- goto put_module;
- }
-
-- oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
-+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
- if (!oldinfo)
- goto put_module;
-
-@@ -989,8 +1347,8 @@ do_replace(void __user *user, unsigned i
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
- xt_free_table_info(oldinfo);
-- if (copy_to_user(tmp.counters, counters,
-- sizeof(struct xt_counters) * tmp.num_counters) != 0)
-+ if (copy_to_user(counters_ptr, counters,
-+ sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
- vfree(counters);
- xt_table_unlock(t);
-@@ -1000,9 +1358,62 @@ do_replace(void __user *user, unsigned i
- module_put(t->me);
- xt_table_unlock(t);
- free_newinfo_counters_untrans:
-- IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
-- free_newinfo_counters:
- vfree(counters);
-+ out:
-+ return ret;
-+}
-+
-+static int
-+do_replace(void __user *user, unsigned int len)
-+{
-+ int ret;
-+ struct ipt_replace tmp;
-+ struct xt_table_info *newinfo;
-+ void *loc_cpu_entry;
-+
-+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-+ return -EFAULT;
-+
-+ /* Hack: Causes ipchains to give correct error msg --RR */
-+ if (len != sizeof(tmp) + tmp.size)
-+ return -ENOPROTOOPT;
-+
-+ /* overflow check */
-+ if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-+ SMP_CACHE_BYTES)
-+ return -ENOMEM;
-+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-+ return -ENOMEM;
-+
-+ newinfo = xt_alloc_table_info(tmp.size);
-+ if (!newinfo)
-+ return -ENOMEM;
-+
-+ /* choose the copy that is our node/cpu */
-+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-+ tmp.size) != 0) {
-+ ret = -EFAULT;
-+ goto free_newinfo;
-+ }
-+
-+ ret = translate_table(tmp.name, tmp.valid_hooks,
-+ newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
-+ tmp.hook_entry, tmp.underflow);
-+ if (ret != 0)
-+ goto free_newinfo;
-+
-+ duprintf("ip_tables: Translated table\n");
-+
-+ ret = __do_replace(tmp.name, tmp.valid_hooks,
-+ newinfo, tmp.num_counters,
-+ tmp.counters);
-+ if (ret)
-+ goto free_newinfo_untrans;
-+ return 0;
-+
-+ free_newinfo_untrans:
-+ IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo:
- xt_free_table_info(newinfo);
- return ret;
-@@ -1034,28 +1445,56 @@ static int
- do_add_counters(void __user *user, unsigned int len)
- {
- unsigned int i;
-- struct xt_counters_info tmp, *paddc;
-+ struct xt_counters_info tmp;
-+ struct xt_counters *paddc;
-+ unsigned int num_counters;
-+ char *name;
-+ int size;
-+ void *ptmp;
- struct ipt_table *t;
- struct xt_table_info *private;
- int ret = 0;
- void *loc_cpu_entry;
-+#ifdef CONFIG_COMPAT
-+ struct compat_xt_counters_info compat_tmp;
-
-- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-+ if (is_current_32bits()) {
-+ ptmp = &compat_tmp;
-+ size = sizeof(struct compat_xt_counters_info);
-+ } else
-+#endif
-+ {
-+ ptmp = &tmp;
-+ size = sizeof(struct xt_counters_info);
-+ }
-+
-+ if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
-+#ifdef CONFIG_COMPAT
-+ if (is_current_32bits()) {
-+ num_counters = compat_tmp.num_counters;
-+ name = compat_tmp.name;
-+ } else
-+#endif
-+ {
-+ num_counters = tmp.num_counters;
-+ name = tmp.name;
-+ }
-+
-+ if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
-- paddc = vmalloc_node(len, numa_node_id());
-+ paddc = ub_vmalloc_node(len - size, numa_node_id());
- if (!paddc)
- return -ENOMEM;
-
-- if (copy_from_user(paddc, user, len) != 0) {
-+ if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
-- t = xt_find_table_lock(AF_INET, tmp.name);
-+ t = xt_find_table_lock(AF_INET, name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
- goto free;
-@@ -1063,7 +1502,7 @@ do_add_counters(void __user *user, unsig
-
- write_lock_bh(&t->lock);
- private = t->private;
-- if (private->number != paddc->num_counters) {
-+ if (private->number != num_counters) {
- ret = -EINVAL;
- goto unlock_up_free;
- }
-@@ -1074,7 +1513,7 @@ do_add_counters(void __user *user, unsig
- IPT_ENTRY_ITERATE(loc_cpu_entry,
- private->size,
- add_counter_to_entry,
-- paddc->counters,
-+ paddc,
- &i);
- unlock_up_free:
- write_unlock_bh(&t->lock);
-@@ -1086,14 +1525,590 @@ do_add_counters(void __user *user, unsig
- return ret;
- }
-
-+#ifdef CONFIG_COMPAT
-+struct compat_ipt_replace {
-+ char name[IPT_TABLE_MAXNAMELEN];
-+ u32 valid_hooks;
-+ u32 num_entries;
-+ u32 size;
-+ u32 hook_entry[NF_IP_NUMHOOKS];
-+ u32 underflow[NF_IP_NUMHOOKS];
-+ u32 num_counters;
-+ compat_uptr_t counters; /* struct ipt_counters * */
-+ struct compat_ipt_entry entries[0];
-+};
-+
-+static inline int compat_copy_match_to_user(struct ipt_entry_match *m,
-+ void __user **dstptr, compat_uint_t *size)
-+{
-+ if (m->u.kernel.match->compat)
-+ m->u.kernel.match->compat(m, dstptr, size, COMPAT_TO_USER);
-+ else {
-+ if (__copy_to_user(*dstptr, m, m->u.match_size))
-+ return -EFAULT;
-+ *dstptr += m->u.match_size;
-+ }
-+ return 0;
-+}
-+
-+static int compat_copy_entry_to_user(struct ipt_entry *e,
-+ void __user **dstptr, compat_uint_t *size)
-+{
-+ struct ipt_entry_target __user *t;
-+ struct compat_ipt_entry __user *ce;
-+ u_int16_t target_offset, next_offset;
-+ compat_uint_t origsize;
-+ int ret;
-+
-+ ret = -EFAULT;
-+ origsize = *size;
-+ ce = (struct compat_ipt_entry __user *)*dstptr;
-+ if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
-+ goto out;
-+
-+ *dstptr += sizeof(struct compat_ipt_entry);
-+ ret = IPT_MATCH_ITERATE(e, compat_copy_match_to_user, dstptr, size);
-+ target_offset = e->target_offset - (origsize - *size);
-+ if (ret)
-+ goto out;
-+ t = ipt_get_target(e);
-+ if (t->u.kernel.target->compat) {
-+ ret = t->u.kernel.target->compat(t,
-+ dstptr, size, COMPAT_TO_USER);
-+ if (ret)
-+ goto out;
-+ } else {
-+ ret = -EFAULT;
-+ if (__copy_to_user(*dstptr, t, t->u.target_size))
-+ goto out;
-+ *dstptr += t->u.target_size;
-+ }
-+ ret = -EFAULT;
-+ next_offset = e->next_offset - (origsize - *size);
-+ if (__put_user(target_offset, &ce->target_offset))
-+ goto out;
-+ if (__put_user(next_offset, &ce->next_offset))
-+ goto out;
-+ return 0;
-+out:
-+ return ret;
-+}
-+
-+static inline int
-+compat_check_calc_match(struct ipt_entry_match *m,
-+ const char *name,
-+ const struct ipt_ip *ip,
-+ unsigned int hookmask,
-+ int *size, int *i)
-+{
-+ struct ipt_match *match;
-+
-+ match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-+ m->u.user.revision),
-+ "ipt_%s", m->u.user.name);
-+ if (IS_ERR(match) || !match) {
-+ duprintf("compat_check_calc_match: `%s' not found\n",
-+ m->u.user.name);
-+ return match ? PTR_ERR(match) : -ENOENT;
-+ }
-+ m->u.kernel.match = match;
-+
-+ if (m->u.kernel.match->compat)
-+ m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE);
-+
-+ (*i)++;
-+ return 0;
-+}
-+
-+static inline int
-+check_compat_entry_size_and_hooks(struct ipt_entry *e,
-+ struct xt_table_info *newinfo,
-+ unsigned int *size,
-+ unsigned char *base,
-+ unsigned char *limit,
-+ unsigned int *hook_entries,
-+ unsigned int *underflows,
-+ unsigned int *i,
-+ const char *name)
-+{
-+ struct ipt_entry_target *t;
-+ struct ipt_target *target;
-+ u_int16_t entry_offset;
-+ int ret, off, h, j;
-+
-+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
-+ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
-+ || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
-+ duprintf("Bad offset %p, limit = %p\n", e, limit);
-+ return -EINVAL;
-+ }
-+
-+ if (e->next_offset < sizeof(struct compat_ipt_entry) +
-+ sizeof(struct compat_ipt_entry_target)) {
-+ duprintf("checking: element %p size %u\n",
-+ e, e->next_offset);
-+ return -EINVAL;
-+ }
-+
-+ if (!ip_checkentry(&e->ip)) {
-+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
-+ return -EINVAL;
-+ }
-+
-+ off = 0;
-+ entry_offset = (void *)e - (void *)base;
-+ j = 0;
-+ ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
-+ e->comefrom, &off, &j);
-+ if (ret != 0)
-+ goto out;
-+
-+ t = ipt_get_target(e);
-+ target = try_then_request_module(xt_find_target(AF_INET,
-+ t->u.user.name,
-+ t->u.user.revision),
-+ "ipt_%s", t->u.user.name);
-+ if (IS_ERR(target) || !target) {
-+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
-+ ret = target ? PTR_ERR(target) : -ENOENT;
-+ goto out;
-+ }
-+ t->u.kernel.target = target;
-+
-+ if (t->u.kernel.target->compat)
-+ t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE);
-+ *size += off;
-+ ret = compat_add_offset(entry_offset, off);
-+ if (ret)
-+ goto out;
-+
-+ /* Check hooks & underflows */
-+ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
-+ if ((unsigned char *)e - base == hook_entries[h])
-+ newinfo->hook_entry[h] = hook_entries[h];
-+ if ((unsigned char *)e - base == underflows[h])
-+ newinfo->underflow[h] = underflows[h];
-+ }
-+
-+ /* Clear counters and comefrom */
-+ e->counters = ((struct ipt_counters) { 0, 0 });
-+ e->comefrom = 0;
-+
-+ (*i)++;
-+ return 0;
-+out:
-+ IPT_MATCH_ITERATE(e, cleanup_match, &j);
-+ return ret;
-+}
-+
-+static inline int compat_copy_match_from_user(struct ipt_entry_match *m,
-+ void **dstptr, compat_uint_t *size, const char *name,
-+ const struct ipt_ip *ip, unsigned int hookmask)
-+{
-+ struct ipt_entry_match *dm;
-+
-+ dm = (struct ipt_entry_match *)*dstptr;
-+ if (m->u.kernel.match->compat)
-+ m->u.kernel.match->compat(m, dstptr, size, COMPAT_FROM_USER);
-+ else {
-+ memcpy(*dstptr, m, m->u.match_size);
-+ *dstptr += m->u.match_size;
-+ }
-+
-+ if (dm->u.kernel.match->checkentry
-+ && !dm->u.kernel.match->checkentry(name, ip, dm->data,
-+ dm->u.match_size - sizeof(*dm),
-+ hookmask)) {
-+ module_put(dm->u.kernel.match->me);
-+ duprintf("ip_tables: check failed for `%s'.\n",
-+ dm->u.kernel.match->name);
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
-+ unsigned int *size, const char *name,
-+ struct xt_table_info *newinfo, unsigned char *base)
-+{
-+ struct ipt_entry_target *t;
-+ struct ipt_entry *de;
-+ unsigned int origsize;
-+ int ret, h;
-+
-+ ret = 0;
-+ origsize = *size;
-+ de = (struct ipt_entry *)*dstptr;
-+ memcpy(de, e, sizeof(struct ipt_entry));
-+
-+ *dstptr += sizeof(struct compat_ipt_entry);
-+ ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size,
-+ name, &de->ip, de->comefrom);
-+ if (ret)
-+ goto out;
-+ de->target_offset = e->target_offset - (origsize - *size);
-+ t = ipt_get_target(e);
-+ if (t->u.kernel.target->compat)
-+ t->u.kernel.target->compat(t,
-+ dstptr, size, COMPAT_FROM_USER);
-+ else {
-+ memcpy(*dstptr, t, t->u.target_size);
-+ *dstptr += t->u.target_size;
-+ }
-+
-+ de->next_offset = e->next_offset - (origsize - *size);
-+ for (h = 0; h < NF_IP_NUMHOOKS; h++) {
-+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
-+ newinfo->hook_entry[h] -= origsize - *size;
-+ if ((unsigned char *)de - base < newinfo->underflow[h])
-+ newinfo->underflow[h] -= origsize - *size;
-+ }
-+
-+ ret = -EINVAL;
-+ t = ipt_get_target(de);
-+ if (t->u.kernel.target == &ipt_standard_target) {
-+ if (!standard_check(t, *size))
-+ goto out;
-+ } else if (t->u.kernel.target->checkentry
-+ && !t->u.kernel.target->checkentry(name, de, t->data,
-+ t->u.target_size
-+ - sizeof(*t),
-+ de->comefrom)) {
-+ module_put(t->u.kernel.target->me);
-+ duprintf("ip_tables: compat: check failed for `%s'.\n",
-+ t->u.kernel.target->name);
-+ goto out;
-+ }
-+ ret = 0;
-+out:
-+ return ret;
-+}
-+
-+static int
-+translate_compat_table(const char *name,
-+ unsigned int valid_hooks,
-+ struct xt_table_info **pinfo,
-+ void **pentry0,
-+ unsigned int total_size,
-+ unsigned int number,
-+ unsigned int *hook_entries,
-+ unsigned int *underflows)
-+{
-+ unsigned int i;
-+ struct xt_table_info *newinfo, *info;
-+ void *pos, *entry0, *entry1;
-+ unsigned int size;
-+ int ret;
-+
-+ info = *pinfo;
-+ entry0 = *pentry0;
-+ size = total_size;
-+ info->number = number;
-+
-+ /* Init all hooks to impossible value. */
-+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-+ info->hook_entry[i] = 0xFFFFFFFF;
-+ info->underflow[i] = 0xFFFFFFFF;
-+ }
-+
-+ duprintf("translate_compat_table: size %u\n", info->size);
-+ i = 0;
-+ down(&compat_ipt_mutex);
-+ /* Walk through entries, checking offsets. */
-+ ret = IPT_ENTRY_ITERATE(entry0, total_size,
-+ check_compat_entry_size_and_hooks,
-+ info, &size, entry0,
-+ entry0 + total_size,
-+ hook_entries, underflows, &i, name);
-+ if (ret != 0)
-+ goto out_unlock;
-+
-+ ret = -EINVAL;
-+ if (i != number) {
-+ duprintf("translate_compat_table: %u not %u entries\n",
-+ i, number);
-+ goto out_unlock;
-+ }
-+
-+ /* Check hooks all assigned */
-+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-+ /* Only hooks which are valid */
-+ if (!(valid_hooks & (1 << i)))
-+ continue;
-+ if (info->hook_entry[i] == 0xFFFFFFFF) {
-+ duprintf("Invalid hook entry %u %u\n",
-+ i, hook_entries[i]);
-+ goto out_unlock;
-+ }
-+ if (info->underflow[i] == 0xFFFFFFFF) {
-+ duprintf("Invalid underflow %u %u\n",
-+ i, underflows[i]);
-+ goto out_unlock;
-+ }
-+ }
-+
-+ ret = -ENOMEM;
-+ newinfo = xt_alloc_table_info(size);
-+ if (!newinfo)
-+ goto out_unlock;
-+
-+ newinfo->number = number;
-+ for (i = 0; i < NF_IP_NUMHOOKS; i++) {
-+ newinfo->hook_entry[i] = info->hook_entry[i];
-+ newinfo->underflow[i] = info->underflow[i];
-+ }
-+ entry1 = newinfo->entries[raw_smp_processor_id()];
-+ pos = entry1;
-+ size = total_size;
-+ ret = IPT_ENTRY_ITERATE(entry0, total_size,
-+ compat_copy_entry_from_user, &pos, &size,
-+ name, newinfo, entry1);
-+ compat_flush_offsets();
-+ up(&compat_ipt_mutex);
-+ if (ret)
-+ goto free_newinfo;
-+
-+ ret = -ELOOP;
-+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
-+ goto free_newinfo;
-+
-+ /* And one copy for every other CPU */
-+ for_each_cpu(i)
-+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
-+ memcpy(newinfo->entries[i], entry1, newinfo->size);
-+
-+ *pinfo = newinfo;
-+ *pentry0 = entry1;
-+ xt_free_table_info(info);
-+ return 0;
-+
-+free_newinfo:
-+ xt_free_table_info(newinfo);
-+out:
-+ return ret;
-+out_unlock:
-+ up(&compat_ipt_mutex);
-+ goto out;
-+}
-+
-+static int
-+compat_do_replace(void __user *user, unsigned int len)
-+{
-+ int ret;
-+ struct compat_ipt_replace tmp;
-+ struct xt_table_info *newinfo;
-+ void *loc_cpu_entry;
-+
-+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
-+ return -EFAULT;
-+
-+ /* Hack: Causes ipchains to give correct error msg --RR */
-+ if (len != sizeof(tmp) + tmp.size)
-+ return -ENOPROTOOPT;
-+
-+ /* overflow check */
-+ if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
-+ SMP_CACHE_BYTES)
-+ return -ENOMEM;
-+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
-+ return -ENOMEM;
-+
-+ newinfo = xt_alloc_table_info(tmp.size);
-+ if (!newinfo)
-+ return -ENOMEM;
-+
-+ /* choose the copy that is our node/cpu */
-+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
-+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
-+ tmp.size) != 0) {
-+ ret = -EFAULT;
-+ goto free_newinfo;
-+ }
-+
-+ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
-+ &newinfo, &loc_cpu_entry, tmp.size,
-+ tmp.num_entries, tmp.hook_entry, tmp.underflow);
-+ if (ret != 0)
-+ goto free_newinfo;
-+
-+ duprintf("compat_do_replace: Translated table\n");
-+
-+ ret = __do_replace(tmp.name, tmp.valid_hooks,
-+ newinfo, tmp.num_counters,
-+ compat_ptr(tmp.counters));
-+ if (ret)
-+ goto free_newinfo_untrans;
-+ return 0;
-+
-+ free_newinfo_untrans:
-+ IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
-+ free_newinfo:
-+ xt_free_table_info(newinfo);
-+ return ret;
-+}
-+
-+struct compat_ipt_get_entries
-+{
-+ char name[IPT_TABLE_MAXNAMELEN];
-+ compat_uint_t size;
-+ struct compat_ipt_entry entrytable[0];
-+};
-+
-+static int compat_copy_entries_to_user(unsigned int total_size,
-+ struct ipt_table *table, void __user *userptr)
-+{
-+ unsigned int off, num;
-+ struct compat_ipt_entry e;
-+ struct xt_counters *counters;
-+ struct xt_table_info *private = table->private;
-+ void __user *pos;
-+ unsigned int size;
-+ int ret = 0;
-+ void *loc_cpu_entry;
-+
-+ counters = alloc_counters(table);
-+ if (IS_ERR(counters))
-+ return PTR_ERR(counters);
-+
-+ /* choose the copy that is on our node/cpu, ...
-+ * This choice is lazy (because current thread is
-+ * allowed to migrate to another cpu)
-+ */
-+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
-+ pos = userptr;
-+ size = total_size;
-+ ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
-+ compat_copy_entry_to_user, &pos, &size);
-+ if (ret)
-+ goto free_counters;
-+
-+ /* ... then go back and fix counters and names */
-+ for (off = 0, num = 0; off < size; off += e.next_offset, num++) {
-+ unsigned int i;
-+ struct ipt_entry_match m;
-+ struct ipt_entry_target t;
-+
-+ ret = -EFAULT;
-+ if (copy_from_user(&e, userptr + off,
-+ sizeof(struct compat_ipt_entry)))
-+ goto free_counters;
-+ if (copy_to_user(userptr + off +
-+ offsetof(struct compat_ipt_entry, counters),
-+ &counters[num], sizeof(counters[num])))
-+ goto free_counters;
-+
-+ for (i = sizeof(struct compat_ipt_entry);
-+ i < e.target_offset; i += m.u.match_size) {
-+ if (copy_from_user(&m, userptr + off + i,
-+ sizeof(struct ipt_entry_match)))
-+ goto free_counters;
-+ if (copy_to_user(userptr + off + i +
-+ offsetof(struct ipt_entry_match, u.user.name),
-+ m.u.kernel.match->name,
-+ strlen(m.u.kernel.match->name) + 1))
-+ goto free_counters;
-+ }
-+
-+ if (copy_from_user(&t, userptr + off + e.target_offset,
-+ sizeof(struct ipt_entry_target)))
-+ goto free_counters;
-+ if (copy_to_user(userptr + off + e.target_offset +
-+ offsetof(struct ipt_entry_target, u.user.name),
-+ t.u.kernel.target->name,
-+ strlen(t.u.kernel.target->name) + 1))
-+ goto free_counters;
-+ }
-+ ret = 0;
-+free_counters:
-+ vfree(counters);
-+ return ret;
-+}
-+
-+static int
-+compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
-+{
-+ int ret;
-+ struct compat_ipt_get_entries get;
-+ struct ipt_table *t;
-+
-+
-+ if (*len < sizeof(get)) {
-+ duprintf("compat_get_entries: %u < %u\n",
-+ *len, (unsigned int)sizeof(get));
-+ return -EINVAL;
-+ }
-+
-+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
-+ return -EFAULT;
-+
-+ if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
-+ duprintf("compat_get_entries: %u != %u\n", *len,
-+ (unsigned int)(sizeof(struct compat_ipt_get_entries) +
-+ get.size));
-+ return -EINVAL;
-+ }
-+
-+ down(&compat_ipt_mutex);
-+ t = xt_find_table_lock(AF_INET, get.name);
-+ if (t && !IS_ERR(t)) {
-+ struct xt_table_info *private = t->private;
-+ struct xt_table_info info;
-+ duprintf("t->private->number = %u\n",
-+ private->number);
-+ ret = compat_table_info(private, &info);
-+ if (!ret && get.size == info.size) {
-+ ret = compat_copy_entries_to_user(private->size,
-+ t, uptr->entrytable);
-+ } else if (!ret) {
-+ duprintf("compat_get_entries: I've got %u not %u!\n",
-+ private->size,
-+ get.size);
-+ ret = -EINVAL;
-+ }
-+ compat_flush_offsets();
-+ module_put(t->me);
-+ xt_table_unlock(t);
-+ } else
-+ ret = t ? PTR_ERR(t) : -ENOENT;
-+
-+ up(&compat_ipt_mutex);
-+ return ret;
-+}
-+
-+static int
-+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
-+{
-+ int ret;
-+
-+ switch (cmd) {
-+ case IPT_SO_GET_INFO:
-+ ret = get_info(user, len);
-+ break;
-+ case IPT_SO_GET_ENTRIES:
-+ ret = compat_get_entries(user, len);
-+ break;
-+ default:
-+ duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd);
-+ ret = -EINVAL;
-+ }
-+ return ret;
-+}
-+#endif
-+
- static int
- do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
- {
- int ret;
-
-- if (!capable(CAP_NET_ADMIN))
-+ if (!capable(CAP_VE_NET_ADMIN))
- return -EPERM;
-
-+#ifdef CONFIG_COMPAT
-+ if (is_current_32bits() && (cmd == IPT_SO_SET_REPLACE))
-+ return compat_do_replace(user, len);
-+#endif
-+
- switch (cmd) {
- case IPT_SO_SET_REPLACE:
- ret = do_replace(user, len);
-@@ -1116,69 +2131,22 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
- {
- int ret;
-
-- if (!capable(CAP_NET_ADMIN))
-+ if (!capable(CAP_VE_NET_ADMIN))
- return -EPERM;
-
-- switch (cmd) {
-- case IPT_SO_GET_INFO: {
-- char name[IPT_TABLE_MAXNAMELEN];
-- struct ipt_table *t;
--
-- if (*len != sizeof(struct ipt_getinfo)) {
-- duprintf("length %u != %u\n", *len,
-- sizeof(struct ipt_getinfo));
-- ret = -EINVAL;
-- break;
-- }
--
-- if (copy_from_user(name, user, sizeof(name)) != 0) {
-- ret = -EFAULT;
-- break;
-- }
-- name[IPT_TABLE_MAXNAMELEN-1] = '\0';
--
-- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-- "iptable_%s", name);
-- if (t && !IS_ERR(t)) {
-- struct ipt_getinfo info;
-- struct xt_table_info *private = t->private;
--
-- info.valid_hooks = t->valid_hooks;
-- memcpy(info.hook_entry, private->hook_entry,
-- sizeof(info.hook_entry));
-- memcpy(info.underflow, private->underflow,
-- sizeof(info.underflow));
-- info.num_entries = private->number;
-- info.size = private->size;
-- memcpy(info.name, name, sizeof(info.name));
--
-- if (copy_to_user(user, &info, *len) != 0)
-- ret = -EFAULT;
-- else
-- ret = 0;
-- xt_table_unlock(t);
-- module_put(t->me);
-- } else
-- ret = t ? PTR_ERR(t) : -ENOENT;
-- }
-- break;
-+#ifdef CONFIG_COMPAT
-+ if (is_current_32bits())
-+ return compat_do_ipt_get_ctl(sk, cmd, user, len);
-+#endif
-
-- case IPT_SO_GET_ENTRIES: {
-- struct ipt_get_entries get;
-+ switch (cmd) {
-+ case IPT_SO_GET_INFO:
-+ ret = get_info(user, len);
-+ break;
-
-- if (*len < sizeof(get)) {
-- duprintf("get_entries: %u < %u\n", *len, sizeof(get));
-- ret = -EINVAL;
-- } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
-- ret = -EFAULT;
-- } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
-- duprintf("get_entries: %u != %u\n", *len,
-- sizeof(struct ipt_get_entries) + get.size);
-- ret = -EINVAL;
-- } else
-- ret = get_entries(&get, user);
-+ case IPT_SO_GET_ENTRIES:
-+ ret = get_entries(user, len);
- break;
-- }
-
- case IPT_SO_GET_REVISION_MATCH:
- case IPT_SO_GET_REVISION_TARGET: {
-@@ -1214,7 +2182,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd,
- return ret;
- }
-
--int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
-+struct xt_table *ipt_register_table(struct xt_table *table,
-+ const struct ipt_replace *repl)
- {
- int ret;
- struct xt_table_info *newinfo;
-@@ -1224,7 +2193,7 @@ int ipt_register_table(struct xt_table *
-
- newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo)
-- return -ENOMEM;
-+ return ERR_PTR(-ENOMEM);
-
- /* choose the copy on our node/cpu
- * but dont care of preemption
-@@ -1239,15 +2208,14 @@ int ipt_register_table(struct xt_table *
- repl->underflow);
- if (ret != 0) {
- xt_free_table_info(newinfo);
-- return ret;
-+ return ERR_PTR(ret);
- }
-
-- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
-+ table = virt_xt_register_table(table, &bootstrap, newinfo);
-+ if (IS_ERR(table))
- xt_free_table_info(newinfo);
-- return ret;
-- }
-
-- return 0;
-+ return table;
- }
-
- void ipt_unregister_table(struct ipt_table *table)
-@@ -1255,7 +2223,7 @@ void ipt_unregister_table(struct ipt_tab
- struct xt_table_info *private;
- void *loc_cpu_entry;
-
-- private = xt_unregister_table(table);
-+ private = virt_xt_unregister_table(table);
-
- /* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
-@@ -1263,6 +2231,29 @@ void ipt_unregister_table(struct ipt_tab
- xt_free_table_info(private);
- }
-
-+void ipt_flush_table(struct xt_table *table)
-+{
-+ struct xt_table *t;
-+ void *loc_cpu_entry;
-+
-+ if (table == NULL)
-+ return;
-+
-+ t = xt_find_table_lock(AF_INET, table->name);
-+ if (t && !IS_ERR(t)) {
-+ struct xt_table_info *private;
-+ private = t->private;
-+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
-+ IPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
-+ cleanup_entry, NULL);
-+ if (private->number > private->initial_entries)
-+ module_put(t->me);
-+ private->size = 0;
-+ xt_table_unlock(t);
-+ module_put(t->me);
-+ }
-+}
-+
- /* Returns 1 if the type and code is matched by the range, 0 otherwise */
- static inline int
- icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
-@@ -1327,6 +2318,9 @@ icmp_checkentry(const char *tablename,
- /* The built-in targets: standard (NULL) and error. */
- static struct ipt_target ipt_standard_target = {
- .name = IPT_STANDARD_TARGET,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat_ipt_standard_fn,
-+#endif
- };
-
- static struct ipt_target ipt_error_target = {
-@@ -1348,43 +2342,101 @@ static struct ipt_match icmp_matchstruct
- .name = "icmp",
- .match = &icmp_match,
- .checkentry = &icmp_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &icmp_compat,
-+#endif
- };
-
--static int __init init(void)
-+static int init_iptables(void)
- {
- int ret;
-
-- xt_proto_init(AF_INET);
-+ if (ve_ipt_standard_target != NULL)
-+ return -EEXIST;
-+
-+ ret = xt_register_target(AF_INET, &ipt_standard_target);
-+ if (ret)
-+ goto out;
-+ ve_ipt_standard_target = xt_find_target(AF_INET, IPT_STANDARD_TARGET, 0);
-+ if (IS_ERR(ve_ipt_standard_target))
-+ goto out_standard;
-+ ret = xt_register_target(AF_INET, &ipt_error_target);
-+ if (ret)
-+ goto out_error;
-+ ret = xt_register_match(AF_INET, &icmp_matchstruct);
-+ if (ret)
-+ goto out_icmp;
-+ ret = xt_proto_init(AF_INET);
-+ if (ret)
-+ goto out_proc;
-+ return 0;
-+
-+out_proc:
-+ xt_unregister_match(AF_INET, &icmp_matchstruct);
-+out_icmp:
-+ xt_unregister_target(AF_INET, &ipt_error_target);
-+out_error:
-+ ve_ipt_standard_target = NULL;
-+out_standard:
-+ xt_unregister_target(AF_INET, &ipt_standard_target);
-+out:
-+ return ret;
-+}
-+
-+static void fini_iptables(void)
-+{
-+ xt_proto_fini(AF_INET);
-+ xt_unregister_match(AF_INET, &icmp_matchstruct);
-+ xt_unregister_target(AF_INET, &ipt_error_target);
-+ ve_ipt_standard_target = NULL;
-+ xt_unregister_target(AF_INET, &ipt_standard_target);
-+}
-
-- /* Noone else will be downing sem now, so we won't sleep */
-- xt_register_target(AF_INET, &ipt_standard_target);
-- xt_register_target(AF_INET, &ipt_error_target);
-- xt_register_match(AF_INET, &icmp_matchstruct);
-+static int __init init(void)
-+{
-+ int ret;
-+
-+ ret = init_iptables();
-+ if (ret)
-+ goto out;
-
- /* Register setsockopt */
- ret = nf_register_sockopt(&ipt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
-- return ret;
-+ goto out_sockopts;
- }
-
-+ KSYMRESOLVE(init_iptables);
-+ KSYMRESOLVE(fini_iptables);
-+ KSYMRESOLVE(ipt_flush_table);
-+ KSYMMODRESOLVE(ip_tables);
- printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
- return 0;
-+
-+out_sockopts:
-+ fini_iptables();
-+out:
-+ return ret;
- }
-
- static void __exit fini(void)
- {
-+ KSYMMODUNRESOLVE(ip_tables);
-+ KSYMUNRESOLVE(init_iptables);
-+ KSYMUNRESOLVE(fini_iptables);
-+ KSYMUNRESOLVE(ipt_flush_table);
- nf_unregister_sockopt(&ipt_sockopts);
--
-- xt_unregister_match(AF_INET, &icmp_matchstruct);
-- xt_unregister_target(AF_INET, &ipt_error_target);
-- xt_unregister_target(AF_INET, &ipt_standard_target);
--
-- xt_proto_fini(AF_INET);
-+ fini_iptables();
- }
-
- EXPORT_SYMBOL(ipt_register_table);
- EXPORT_SYMBOL(ipt_unregister_table);
- EXPORT_SYMBOL(ipt_do_table);
--module_init(init);
-+#ifdef CONFIG_COMPAT
-+EXPORT_SYMBOL(ipt_match_align_compat);
-+EXPORT_SYMBOL(ipt_target_align_compat);
-+#endif
-+EXPORT_SYMBOL(ipt_flush_table);
-+subsys_initcall(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_LOG.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_LOG.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_LOG.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_LOG.c 2006-04-19 15:02:12.000000000 +0400
-@@ -18,6 +18,7 @@
- #include <net/udp.h>
- #include <net/tcp.h>
- #include <net/route.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-@@ -463,10 +464,25 @@ static int ipt_log_checkentry(const char
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int ipt_log_compat(void *target,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_log_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_log_info));
-+ return ipt_target_align_compat(target, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_target ipt_log_reg = {
- .name = "LOG",
- .target = ipt_log_target,
- .checkentry = ipt_log_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = ipt_log_compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-@@ -476,24 +492,44 @@ static struct nf_logger ipt_log_logger =
- .me = THIS_MODULE,
- };
-
-+int init_iptable_LOG(void)
-+{
-+ return ipt_register_target(&ipt_log_reg);
-+}
-+
-+void fini_iptable_LOG(void)
-+{
-+ ipt_unregister_target(&ipt_log_reg);
-+}
-+
- static int __init init(void)
- {
-- if (ipt_register_target(&ipt_log_reg))
-- return -EINVAL;
-+ int err;
-+
-+ err = init_iptable_LOG();
-+ if (err < 0)
-+ return err;
- if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
-- printk(KERN_WARNING "ipt_LOG: not logging via system console "
-+ ve_printk(VE_LOG, KERN_WARNING "ipt_LOG: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * iptables userspace would abort */
- }
-
-+
-+ KSYMRESOLVE(init_iptable_LOG);
-+ KSYMRESOLVE(fini_iptable_LOG);
-+ KSYMMODRESOLVE(ipt_LOG);
- return 0;
- }
-
- static void __exit fini(void)
- {
-+ KSYMMODUNRESOLVE(ipt_LOG);
-+ KSYMUNRESOLVE(init_iptable_LOG);
-+ KSYMUNRESOLVE(fini_iptable_LOG);
- nf_log_unregister_logger(&ipt_log_logger);
-- ipt_unregister_target(&ipt_log_reg);
-+ fini_iptable_LOG();
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_MASQUERADE.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_MASQUERADE.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_MASQUERADE.c 2006-04-19 15:02:12.000000000 +0400
-@@ -120,6 +120,7 @@ masquerade_target(struct sk_buff **pskb,
- return ip_nat_setup_info(ct, &newrange, hooknum);
- }
-
-+#if 0
- static inline int
- device_cmp(struct ip_conntrack *i, void *ifindex)
- {
-@@ -175,6 +176,7 @@ static struct notifier_block masq_dev_no
- static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
- };
-+#endif
-
- static struct ipt_target masquerade = {
- .name = "MASQUERADE",
-@@ -189,12 +191,16 @@ static int __init init(void)
-
- ret = ipt_register_target(&masquerade);
-
-+#if 0
-+/* These notifiers are unnecessary and may
-+ lead to oops in virtual environments */
- if (ret == 0) {
- /* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
- /* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
- }
-+#endif
-
- return ret;
- }
-@@ -202,8 +208,8 @@ static int __init init(void)
- static void __exit fini(void)
- {
- ipt_unregister_target(&masquerade);
-- unregister_netdevice_notifier(&masq_dev_notifier);
-- unregister_inetaddr_notifier(&masq_inet_notifier);
-+/* unregister_netdevice_notifier(&masq_dev_notifier);
-+ unregister_inetaddr_notifier(&masq_inet_notifier); */
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_REDIRECT.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_REDIRECT.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_REDIRECT.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_REDIRECT.c 2006-04-19 15:02:12.000000000 +0400
-@@ -17,6 +17,7 @@
- #include <linux/inetdevice.h>
- #include <net/protocol.h>
- #include <net/checksum.h>
-+#include <linux/nfcalls.h>
- #include <linux/netfilter_ipv4.h>
- #include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-@@ -25,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <core
- MODULE_DESCRIPTION("iptables REDIRECT target module");
-
- #if 0
--#define DEBUGP printk
-+#define DEBUGP ve_printk
- #else
- #define DEBUGP(format, args...)
- #endif
-@@ -119,15 +120,37 @@ static struct ipt_target redirect_reg =
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_REDIRECT(void)
- {
- return ipt_register_target(&redirect_reg);
- }
-
--static void __exit fini(void)
-+void fini_iptable_REDIRECT(void)
- {
- ipt_unregister_target(&redirect_reg);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_REDIRECT();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_REDIRECT);
-+ KSYMRESOLVE(fini_iptable_REDIRECT);
-+ KSYMMODRESOLVE(ipt_REDIRECT);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_REDIRECT);
-+ KSYMUNRESOLVE(init_iptable_REDIRECT);
-+ KSYMUNRESOLVE(fini_iptable_REDIRECT);
-+ fini_iptable_REDIRECT();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_REJECT.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_REJECT.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_REJECT.c 2006-04-19 15:02:12.000000000 +0400
-@@ -22,6 +22,7 @@
- #include <net/ip.h>
- #include <net/tcp.h>
- #include <net/route.h>
-+#include <linux/nfcalls.h>
- #include <net/dst.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
- #include <linux/netfilter_ipv4/ipt_REJECT.h>
-@@ -322,22 +323,59 @@ static int check(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *target,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_reject_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_reject_info));
-+ return ipt_target_align_compat(target, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_target ipt_reject_reg = {
- .name = "REJECT",
- .target = reject,
- .checkentry = check,
-+#ifdef CONFIG_COMPAT
-+ .compat = compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_REJECT(void)
- {
- return ipt_register_target(&ipt_reject_reg);
- }
-
--static void __exit fini(void)
-+void fini_iptable_REJECT(void)
- {
- ipt_unregister_target(&ipt_reject_reg);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_REJECT();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_REJECT);
-+ KSYMRESOLVE(fini_iptable_REJECT);
-+ KSYMMODRESOLVE(ipt_REJECT);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_REJECT);
-+ KSYMUNRESOLVE(init_iptable_REJECT);
-+ KSYMUNRESOLVE(fini_iptable_REJECT);
-+ fini_iptable_REJECT();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_TCPMSS.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_TCPMSS.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_TCPMSS.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_TCPMSS.c 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,7 @@
-
- #include <linux/ip.h>
- #include <net/tcp.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ip_tables.h>
- #include <linux/netfilter_ipv4/ipt_TCPMSS.h>
-@@ -242,22 +243,59 @@ ipt_tcpmss_checkentry(const char *tablen
- return 0;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int ipt_tcpmss_compat(void *target,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_tcpmss_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_tcpmss_info));
-+ return ipt_target_align_compat(target, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_target ipt_tcpmss_reg = {
- .name = "TCPMSS",
- .target = ipt_tcpmss_target,
- .checkentry = ipt_tcpmss_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = ipt_tcpmss_compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_TCPMSS(void)
- {
- return ipt_register_target(&ipt_tcpmss_reg);
- }
-
--static void __exit fini(void)
-+void fini_iptable_TCPMSS(void)
- {
- ipt_unregister_target(&ipt_tcpmss_reg);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_TCPMSS();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_TCPMSS);
-+ KSYMRESOLVE(fini_iptable_TCPMSS);
-+ KSYMMODRESOLVE(ipt_TCPMSS);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_TCPMSS);
-+ KSYMUNRESOLVE(init_iptable_TCPMSS);
-+ KSYMUNRESOLVE(fini_iptable_TCPMSS);
-+ fini_iptable_TCPMSS();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_TOS.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_TOS.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_TOS.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_TOS.c 2006-04-19 15:02:12.000000000 +0400
-@@ -15,6 +15,7 @@
-
- #include <linux/netfilter_ipv4/ip_tables.h>
- #include <linux/netfilter_ipv4/ipt_TOS.h>
-+#include <linux/nfcalls.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-@@ -83,22 +84,59 @@ checkentry(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *target,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_tos_target_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_target_info));
-+ return ipt_target_align_compat(target, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_target ipt_tos_reg = {
- .name = "TOS",
- .target = target,
- .checkentry = checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_TOS(void)
- {
- return ipt_register_target(&ipt_tos_reg);
- }
-
--static void __exit fini(void)
-+void fini_iptable_TOS(void)
- {
- ipt_unregister_target(&ipt_tos_reg);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_TOS();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_TOS);
-+ KSYMRESOLVE(fini_iptable_TOS);
-+ KSYMMODRESOLVE(ipt_TOS);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_TOS);
-+ KSYMUNRESOLVE(init_iptable_TOS);
-+ KSYMUNRESOLVE(fini_iptable_TOS);
-+ fini_iptable_TOS();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_multiport.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_multiport.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_multiport.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_multiport.c 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,7 @@
- #include <linux/types.h>
- #include <linux/udp.h>
- #include <linux/skbuff.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ipt_multiport.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-@@ -21,6 +22,13 @@ MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
- MODULE_DESCRIPTION("iptables multiple port match module");
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_multiport_match (*(get_exec_env()->_multiport_match))
-+#else
-+#define ve_multiport_match multiport_match
-+#endif
-+
- #if 0
- #define duprintf(format, args...) printk(format , ## args)
- #else
-@@ -174,11 +182,36 @@ checkentry_v1(const char *tablename,
- return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_multiport)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+
-+static int compat_v1(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_multiport_v1)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_multiport_v1));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_match multiport_match = {
- .name = "multiport",
- .revision = 0,
- .match = &match,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-@@ -187,10 +220,13 @@ static struct ipt_match multiport_match_
- .revision = 1,
- .match = &match_v1,
- .checkentry = &checkentry_v1,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat_v1,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_multiport(void)
- {
- int err;
-
-@@ -204,11 +240,33 @@ static int __init init(void)
- return err;
- }
-
--static void __exit fini(void)
-+void fini_iptable_multiport(void)
- {
- ipt_unregister_match(&multiport_match);
- ipt_unregister_match(&multiport_match_v1);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_multiport();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_multiport);
-+ KSYMRESOLVE(fini_iptable_multiport);
-+ KSYMMODRESOLVE(ipt_multiport);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_multiport);
-+ KSYMUNRESOLVE(init_iptable_multiport);
-+ KSYMUNRESOLVE(fini_iptable_multiport);
-+ fini_iptable_multiport();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_tos.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_tos.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_tos.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_tos.c 2006-04-19 15:02:12.000000000 +0400
-@@ -10,6 +10,7 @@
-
- #include <linux/module.h>
- #include <linux/skbuff.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ipt_tos.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-@@ -17,6 +18,13 @@
- MODULE_LICENSE("GPL");
- MODULE_DESCRIPTION("iptables TOS match module");
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_tos_match (*(get_exec_env()->_tos_match))
-+#else
-+#define ve_tos_match tos_match
-+#endif
-+
- static int
- match(const struct sk_buff *skb,
- const struct net_device *in,
-@@ -44,22 +52,59 @@ checkentry(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_tos_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_tos_info));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_match tos_match = {
- .name = "tos",
- .match = &match,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_tos(void)
- {
- return ipt_register_match(&tos_match);
- }
-
--static void __exit fini(void)
-+void fini_iptable_tos(void)
- {
- ipt_unregister_match(&tos_match);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_tos();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_tos);
-+ KSYMRESOLVE(fini_iptable_tos);
-+ KSYMMODRESOLVE(ipt_tos);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_tos);
-+ KSYMUNRESOLVE(init_iptable_tos);
-+ KSYMUNRESOLVE(fini_iptable_tos);
-+ fini_iptable_tos();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/ipt_ttl.c linux-2.6.16-026test009/net/ipv4/netfilter/ipt_ttl.c
---- linux-2.6.16.orig/net/ipv4/netfilter/ipt_ttl.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/ipt_ttl.c 2006-04-19 15:02:12.000000000 +0400
-@@ -11,6 +11,7 @@
-
- #include <linux/module.h>
- #include <linux/skbuff.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter_ipv4/ipt_ttl.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-@@ -57,22 +58,58 @@ static int checkentry(const char *tablen
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = IPT_ALIGN(sizeof(struct ipt_ttl_info)) -
-+ COMPAT_IPT_ALIGN(sizeof(struct ipt_ttl_info));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct ipt_match ttl_match = {
- .name = "ttl",
- .match = &match,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_iptable_ttl(void)
- {
- return ipt_register_match(&ttl_match);
- }
-
--static void __exit fini(void)
-+void fini_iptable_ttl(void)
- {
- ipt_unregister_match(&ttl_match);
-+}
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_ttl();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_ttl);
-+ KSYMRESOLVE(fini_iptable_ttl);
-+ KSYMMODRESOLVE(ipt_ttl);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(ipt_ttl);
-+ KSYMUNRESOLVE(init_iptable_ttl);
-+ KSYMUNRESOLVE(fini_iptable_ttl);
-+ fini_iptable_ttl();
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_filter.c linux-2.6.16-026test009/net/ipv4/netfilter/iptable_filter.c
---- linux-2.6.16.orig/net/ipv4/netfilter/iptable_filter.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/iptable_filter.c 2006-04-19 15:02:12.000000000 +0400
-@@ -12,12 +12,20 @@
-
- #include <linux/module.h>
- #include <linux/moduleparam.h>
-+#include <linux/nfcalls.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
- MODULE_DESCRIPTION("iptables filter table");
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_packet_filter (get_exec_env()->_ve_ipt_filter_pf)
-+#else
-+#define ve_packet_filter &packet_filter
-+#endif
-+
- #define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
-
- static struct
-@@ -25,7 +33,7 @@ static struct
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
--} initial_table __initdata
-+} initial_table
- = { { "filter", FILTER_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_LOCAL_IN] = 0,
-@@ -90,7 +98,7 @@ ipt_hook(unsigned int hook,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
-+ return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
- }
-
- static unsigned int
-@@ -108,7 +116,7 @@ ipt_local_out_hook(unsigned int hook,
- return NF_ACCEPT;
- }
-
-- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
-+ return ipt_do_table(pskb, hook, in, out, ve_packet_filter, NULL);
- }
-
- static struct nf_hook_ops ipt_ops[] = {
-@@ -139,56 +147,89 @@ static struct nf_hook_ops ipt_ops[] = {
- static int forward = NF_ACCEPT;
- module_param(forward, bool, 0000);
-
--static int __init init(void)
-+int init_iptable_filter(void)
- {
- int ret;
--
-- if (forward < 0 || forward > NF_MAX_VERDICT) {
-- printk("iptables forward must be 0 or 1\n");
-- return -EINVAL;
-- }
--
-- /* Entry 1 is the FORWARD hook */
-- initial_table.entries[1].target.verdict = -forward - 1;
-+ struct ipt_table *tmp_filter;
-
- /* Register table */
-- ret = ipt_register_table(&packet_filter, &initial_table.repl);
-- if (ret < 0)
-- return ret;
-+ tmp_filter = ipt_register_table(&packet_filter,
-+ &initial_table.repl);
-+ if (IS_ERR(tmp_filter))
-+ return PTR_ERR(tmp_filter);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_filter = tmp_filter;
-+#endif
-
- /* Register hooks */
-- ret = nf_register_hook(&ipt_ops[0]);
-+ ret = virt_nf_register_hook(&ipt_ops[0]);
- if (ret < 0)
- goto cleanup_table;
-
-- ret = nf_register_hook(&ipt_ops[1]);
-+ ret = virt_nf_register_hook(&ipt_ops[1]);
- if (ret < 0)
- goto cleanup_hook0;
-
-- ret = nf_register_hook(&ipt_ops[2]);
-+ ret = virt_nf_register_hook(&ipt_ops[2]);
- if (ret < 0)
- goto cleanup_hook1;
-
- return ret;
-
- cleanup_hook1:
-- nf_unregister_hook(&ipt_ops[1]);
-+ virt_nf_unregister_hook(&ipt_ops[1]);
- cleanup_hook0:
-- nf_unregister_hook(&ipt_ops[0]);
-+ virt_nf_unregister_hook(&ipt_ops[0]);
- cleanup_table:
-- ipt_unregister_table(&packet_filter);
-+ ipt_unregister_table(ve_packet_filter);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_filter = NULL;
-+#endif
-
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_iptable_filter(void)
- {
- unsigned int i;
-
- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
-- nf_unregister_hook(&ipt_ops[i]);
-+ virt_nf_unregister_hook(&ipt_ops[i]);
-
-- ipt_unregister_table(&packet_filter);
-+ ipt_unregister_table(ve_packet_filter);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_filter = NULL;
-+#endif
-+}
-+
-+static int __init init(void)
-+{
-+ int err;
-+
-+ if (forward < 0 || forward > NF_MAX_VERDICT) {
-+ printk("iptables forward must be 0 or 1\n");
-+ return -EINVAL;
-+ }
-+
-+ /* Entry 1 is the FORWARD hook */
-+ initial_table.entries[1].target.verdict = -forward - 1;
-+
-+ err = init_iptable_filter();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_filter);
-+ KSYMRESOLVE(fini_iptable_filter);
-+ KSYMMODRESOLVE(iptable_filter);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(iptable_filter);
-+ KSYMUNRESOLVE(init_iptable_filter);
-+ KSYMUNRESOLVE(fini_iptable_filter);
-+ fini_iptable_filter();
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_mangle.c linux-2.6.16-026test009/net/ipv4/netfilter/iptable_mangle.c
---- linux-2.6.16.orig/net/ipv4/netfilter/iptable_mangle.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/iptable_mangle.c 2006-04-19 15:02:12.000000000 +0400
-@@ -17,6 +17,7 @@
- #include <linux/skbuff.h>
- #include <net/sock.h>
- #include <net/route.h>
-+#include <linux/nfcalls.h>
- #include <linux/ip.h>
-
- MODULE_LICENSE("GPL");
-@@ -35,7 +36,7 @@ static struct
- struct ipt_replace repl;
- struct ipt_standard entries[5];
- struct ipt_error term;
--} initial_table __initdata
-+} initial_table
- = { { "mangle", MANGLE_VALID_HOOKS, 6,
- sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
-@@ -112,6 +113,13 @@ static struct ipt_table packet_mangler =
- .af = AF_INET,
- };
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_packet_mangler (get_exec_env()->_ipt_mangle_table)
-+#else
-+#define ve_packet_mangler &packet_mangler
-+#endif
-+
- /* The work comes in here from netfilter.c. */
- static unsigned int
- ipt_route_hook(unsigned int hook,
-@@ -120,7 +128,7 @@ ipt_route_hook(unsigned int hook,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
-+ return ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
- }
-
- static unsigned int
-@@ -149,7 +157,8 @@ ipt_local_hook(unsigned int hook,
- daddr = (*pskb)->nh.iph->daddr;
- tos = (*pskb)->nh.iph->tos;
-
-- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
-+ ret = ipt_do_table(pskb, hook, in, out, ve_packet_mangler, NULL);
-+
- /* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
- && ((*pskb)->nh.iph->saddr != saddr
-@@ -201,60 +210,103 @@ static struct nf_hook_ops ipt_ops[] = {
- },
- };
-
--static int __init init(void)
-+static int mangle_init(struct nf_hook_ops ipt_ops[])
- {
- int ret;
-+ struct ipt_table *tmp_mangler;
-
- /* Register table */
-- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
-- if (ret < 0)
-- return ret;
-+ tmp_mangler = ipt_register_table(&packet_mangler,
-+ &initial_table.repl);
-+ if (IS_ERR(tmp_mangler))
-+ return PTR_ERR(tmp_mangler);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_mangler = tmp_mangler;
-+#endif
-
- /* Register hooks */
-- ret = nf_register_hook(&ipt_ops[0]);
-+ ret = virt_nf_register_hook(&ipt_ops[0]);
- if (ret < 0)
- goto cleanup_table;
-
-- ret = nf_register_hook(&ipt_ops[1]);
-+ ret = virt_nf_register_hook(&ipt_ops[1]);
- if (ret < 0)
- goto cleanup_hook0;
-
-- ret = nf_register_hook(&ipt_ops[2]);
-+ ret = virt_nf_register_hook(&ipt_ops[2]);
- if (ret < 0)
- goto cleanup_hook1;
-
-- ret = nf_register_hook(&ipt_ops[3]);
-+ ret = virt_nf_register_hook(&ipt_ops[3]);
- if (ret < 0)
- goto cleanup_hook2;
-
-- ret = nf_register_hook(&ipt_ops[4]);
-+ ret = virt_nf_register_hook(&ipt_ops[4]);
- if (ret < 0)
- goto cleanup_hook3;
-
- return ret;
-
- cleanup_hook3:
-- nf_unregister_hook(&ipt_ops[3]);
-+ virt_nf_unregister_hook(&ipt_ops[3]);
- cleanup_hook2:
-- nf_unregister_hook(&ipt_ops[2]);
-+ virt_nf_unregister_hook(&ipt_ops[2]);
- cleanup_hook1:
-- nf_unregister_hook(&ipt_ops[1]);
-+ virt_nf_unregister_hook(&ipt_ops[1]);
- cleanup_hook0:
-- nf_unregister_hook(&ipt_ops[0]);
-+ virt_nf_unregister_hook(&ipt_ops[0]);
- cleanup_table:
-- ipt_unregister_table(&packet_mangler);
-+ ipt_unregister_table(ve_packet_mangler);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_mangler = NULL;
-+#endif
-
- return ret;
- }
-
--static void __exit fini(void)
-+static void mangle_fini(struct nf_hook_ops ipt_ops[])
- {
- unsigned int i;
-
-- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
-- nf_unregister_hook(&ipt_ops[i]);
-+ for (i = 0; i < 5; i++)
-+ virt_nf_unregister_hook(&ipt_ops[i]);
-+
-+ ipt_unregister_table(ve_packet_mangler);
-+#ifdef CONFIG_VE_IPTABLES
-+ ve_packet_mangler = NULL;
-+#endif
-+}
-+
-+int init_iptable_mangle(void)
-+{
-+ return mangle_init(ipt_ops);
-+}
-+
-+void fini_iptable_mangle(void)
-+{
-+ mangle_fini(ipt_ops);
-+}
-+
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_iptable_mangle();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_iptable_mangle);
-+ KSYMRESOLVE(fini_iptable_mangle);
-+ KSYMMODRESOLVE(iptable_mangle);
-+ return 0;
-+}
-
-- ipt_unregister_table(&packet_mangler);
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(iptable_mangle);
-+ KSYMUNRESOLVE(init_iptable_mangle);
-+ KSYMUNRESOLVE(fini_iptable_mangle);
-+ fini_iptable_mangle();
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/ipv4/netfilter/iptable_raw.c linux-2.6.16-026test009/net/ipv4/netfilter/iptable_raw.c
---- linux-2.6.16.orig/net/ipv4/netfilter/iptable_raw.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/netfilter/iptable_raw.c 2006-04-19 15:02:12.000000000 +0400
-@@ -118,12 +118,13 @@ static struct nf_hook_ops ipt_ops[] = {
-
- static int __init init(void)
- {
-+ struct ipt_table *tmp;
- int ret;
-
- /* Register table */
-- ret = ipt_register_table(&packet_raw, &initial_table.repl);
-- if (ret < 0)
-- return ret;
-+ tmp = ipt_register_table(&packet_raw, &initial_table.repl);
-+ if (IS_ERR(tmp))
-+ return PTR_ERR(tmp);
-
- /* Register hooks */
- ret = nf_register_hook(&ipt_ops[0]);
-diff -upr linux-2.6.16.orig/net/ipv4/proc.c linux-2.6.16-026test009/net/ipv4/proc.c
---- linux-2.6.16.orig/net/ipv4/proc.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/proc.c 2006-04-19 15:02:12.000000000 +0400
-@@ -258,11 +258,12 @@ static int snmp_seq_show(struct seq_file
- seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
-
- seq_printf(seq, "\nIp: %d %d",
-- ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
-+ ve_ipv4_devconf.forwarding ? 1 : 2,
-+ sysctl_ip_default_ttl);
-
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
-- fold_field((void **) ip_statistics,
-+ fold_field((void **) ve_ip_statistics,
- snmp4_ipstats_list[i].entry));
-
- seq_puts(seq, "\nIcmp:");
-@@ -272,7 +273,7 @@ static int snmp_seq_show(struct seq_file
- seq_puts(seq, "\nIcmp:");
- for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
-- fold_field((void **) icmp_statistics,
-+ fold_field((void **) ve_icmp_statistics,
- snmp4_icmp_list[i].entry));
-
- seq_puts(seq, "\nTcp:");
-@@ -284,11 +285,11 @@ static int snmp_seq_show(struct seq_file
- /* MaxConn field is signed, RFC 2012 */
- if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
- seq_printf(seq, " %ld",
-- fold_field((void **) tcp_statistics,
-+ fold_field((void **) ve_tcp_statistics,
- snmp4_tcp_list[i].entry));
- else
- seq_printf(seq, " %lu",
-- fold_field((void **) tcp_statistics,
-+ fold_field((void **) ve_tcp_statistics,
- snmp4_tcp_list[i].entry));
- }
-
-@@ -299,7 +300,7 @@ static int snmp_seq_show(struct seq_file
- seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
-- fold_field((void **) udp_statistics,
-+ fold_field((void **) ve_udp_statistics,
- snmp4_udp_list[i].entry));
-
- seq_putc(seq, '\n');
-@@ -333,7 +334,7 @@ static int netstat_seq_show(struct seq_f
- seq_puts(seq, "\nTcpExt:");
- for (i = 0; snmp4_net_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
-- fold_field((void **) net_statistics,
-+ fold_field((void **) ve_net_statistics,
- snmp4_net_list[i].entry));
-
- seq_putc(seq, '\n');
-@@ -357,10 +358,10 @@ int __init ip_misc_proc_init(void)
- {
- int rc = 0;
-
-- if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
-+ if (!proc_glob_fops_create("net/netstat", S_IRUGO, &netstat_seq_fops))
- goto out_netstat;
-
-- if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
-+ if (!proc_glob_fops_create("net/snmp", S_IRUGO, &snmp_seq_fops))
- goto out_snmp;
-
- if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
-@@ -368,9 +369,9 @@ int __init ip_misc_proc_init(void)
- out:
- return rc;
- out_sockstat:
-- proc_net_remove("snmp");
-+ remove_proc_glob_entry("net/snmp", NULL);
- out_snmp:
-- proc_net_remove("netstat");
-+ remove_proc_glob_entry("net/netstat", NULL);
- out_netstat:
- rc = -ENOMEM;
- goto out;
-diff -upr linux-2.6.16.orig/net/ipv4/raw.c linux-2.6.16-026test009/net/ipv4/raw.c
---- linux-2.6.16.orig/net/ipv4/raw.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/raw.c 2006-04-19 15:02:12.000000000 +0400
-@@ -114,7 +114,8 @@ struct sock *__raw_v4_lookup(struct sock
- if (inet->num == num &&
- !(inet->daddr && inet->daddr != raddr) &&
- !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
-- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
-+ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) &&
-+ ve_accessible_strict(VE_OWNER_SK(sk), get_exec_env()))
- goto found; /* gotcha */
- }
- sk = NULL;
-@@ -753,8 +754,12 @@ static struct sock *raw_get_first(struct
- struct hlist_node *node;
-
- sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-- if (sk->sk_family == PF_INET)
-+ if (sk->sk_family == PF_INET) {
-+ if (!ve_accessible(VE_OWNER_SK(sk),
-+ get_exec_env()))
-+ continue;
- goto found;
-+ }
- }
- sk = NULL;
- found:
-@@ -768,8 +773,14 @@ static struct sock *raw_get_next(struct
- do {
- sk = sk_next(sk);
- try_again:
-- ;
-- } while (sk && sk->sk_family != PF_INET);
-+ if (!sk)
-+ break;
-+ if (sk->sk_family != PF_INET)
-+ continue;
-+ if (ve_accessible(VE_OWNER_SK(sk),
-+ get_exec_env()))
-+ break;
-+ } while (1);
-
- if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
- sk = sk_head(&raw_v4_htable[state->bucket]);
-@@ -886,13 +897,13 @@ static struct file_operations raw_seq_fo
-
- int __init raw_proc_init(void)
- {
-- if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
-+ if (!proc_glob_fops_create("net/raw", S_IRUGO, &raw_seq_fops))
- return -ENOMEM;
- return 0;
- }
-
- void __init raw_proc_exit(void)
- {
-- proc_net_remove("raw");
-+ remove_proc_glob_entry("net/raw", NULL);
- }
- #endif /* CONFIG_PROC_FS */
-diff -upr linux-2.6.16.orig/net/ipv4/route.c linux-2.6.16-026test009/net/ipv4/route.c
---- linux-2.6.16.orig/net/ipv4/route.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/route.c 2006-04-19 15:02:12.000000000 +0400
-@@ -114,6 +114,8 @@
-
- #define RT_GC_TIMEOUT (300*HZ)
-
-+int ip_rt_src_check = 1;
-+
- static int ip_rt_min_delay = 2 * HZ;
- static int ip_rt_max_delay = 10 * HZ;
- static int ip_rt_max_size;
-@@ -253,11 +255,28 @@ static unsigned int rt_hash_code(u32 dad
- & rt_hash_mask);
- }
-
-+void prepare_rt_cache(void)
-+{
-+#ifdef CONFIG_VE
-+ struct rtable *r;
-+ int i;
-+
-+ for (i = rt_hash_mask; i >= 0; i--) {
-+ spin_lock_bh(rt_hash_lock_addr(i));
-+ for (r = rt_hash_table[i].chain; r; r = r->u.rt_next) {
-+ r->fl.owner_env = get_ve0();
-+ }
-+ spin_unlock_bh(rt_hash_lock_addr(i));
-+ }
-+#endif
-+}
-+
- #ifdef CONFIG_PROC_FS
- struct rt_cache_iter_state {
- int bucket;
- };
-
-+static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r);
- static struct rtable *rt_cache_get_first(struct seq_file *seq)
- {
- struct rtable *r = NULL;
-@@ -270,6 +289,8 @@ static struct rtable *rt_cache_get_first
- break;
- rcu_read_unlock_bh();
- }
-+ if (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()))
-+ r = rt_cache_get_next(seq, r);
- return r;
- }
-
-@@ -277,14 +298,19 @@ static struct rtable *rt_cache_get_next(
- {
- struct rt_cache_iter_state *st = rcu_dereference(seq->private);
-
-- r = r->u.rt_next;
-+start:
-+ do {
-+ r = r->u.rt_next;
-+ } while (r && !ve_accessible_strict(r->fl.owner_env, get_exec_env()));
- while (!r) {
- rcu_read_unlock_bh();
- if (--st->bucket < 0)
-- break;
-+ goto out;
- rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
- }
-+ goto start;
-+out:
- return r;
- }
-
-@@ -556,7 +582,8 @@ static inline int compare_keys(struct fl
- {
- return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
- fl1->oif == fl2->oif &&
-- fl1->iif == fl2->iif;
-+ fl1->iif == fl2->iif &&
-+ ve_accessible_strict(fl1->owner_env, fl2->owner_env);
- }
-
- #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-@@ -670,26 +697,105 @@ static void rt_check_expire(unsigned lon
- mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
- }
-
-+typedef unsigned long rt_flush_gen_t;
-+
-+#ifdef CONFIG_VE
-+
-+static rt_flush_gen_t rt_flush_gen;
-+
-+/* called under rt_flush_lock */
-+static void set_rt_flush_required(struct ve_struct *env)
-+{
-+ /*
-+ * If the global generation rt_flush_gen is equal to G, then
-+ * the pass considering entries labelled by G is yet to come.
-+ */
-+ env->rt_flush_required = rt_flush_gen;
-+}
-+
-+static spinlock_t rt_flush_lock;
-+static rt_flush_gen_t reset_rt_flush_required(void)
-+{
-+ rt_flush_gen_t g;
-+
-+ spin_lock_bh(&rt_flush_lock);
-+ g = rt_flush_gen++;
-+ spin_unlock_bh(&rt_flush_lock);
-+ return g;
-+}
-+
-+static int check_rt_flush_required(struct ve_struct *env, rt_flush_gen_t gen)
-+{
-+ /* can be checked without the lock */
-+ return env->rt_flush_required >= gen;
-+}
-+
-+#else
-+
-+static void set_rt_flush_required(struct ve_struct *env)
-+{
-+}
-+
-+static rt_flush_gen_t reset_rt_flush_required(void)
-+{
-+ return 0;
-+}
-+
-+#endif
-+
- /* This can run from both BH and non-BH contexts, the latter
- * in the case of a forced flush event.
- */
- static void rt_run_flush(unsigned long dummy)
- {
- int i;
-- struct rtable *rth, *next;
-+ struct rtable * rth, * next;
-+ struct rtable * tail;
-+ rt_flush_gen_t gen;
-
- rt_deadline = 0;
-
- get_random_bytes(&rt_hash_rnd, 4);
-
-+ gen = reset_rt_flush_required();
-+
- for (i = rt_hash_mask; i >= 0; i--) {
-+#ifdef CONFIG_VE
-+ struct rtable ** prev, * p;
-+
-+ spin_lock_bh(rt_hash_lock_addr(i));
-+ rth = rt_hash_table[i].chain;
-+
-+ /* defer releasing the head of the list after spin_unlock */
-+ for (tail = rth; tail; tail = tail->u.rt_next)
-+ if (!check_rt_flush_required(tail->fl.owner_env, gen))
-+ break;
-+ if (rth != tail)
-+ rt_hash_table[i].chain = tail;
-+
-+ /* call rt_free on entries after the tail requiring flush */
-+ prev = &rt_hash_table[i].chain;
-+ for (p = *prev; p; p = next) {
-+ next = p->u.rt_next;
-+ if (!check_rt_flush_required(p->fl.owner_env, gen)) {
-+ prev = &p->u.rt_next;
-+ } else {
-+ *prev = next;
-+ rt_free(p);
-+ }
-+ }
-+
-+#else
- spin_lock_bh(rt_hash_lock_addr(i));
- rth = rt_hash_table[i].chain;
- if (rth)
- rt_hash_table[i].chain = NULL;
-+ tail = NULL;
-+
-+#endif
- spin_unlock_bh(rt_hash_lock_addr(i));
-
-- for (; rth; rth = next) {
-+ for (; rth != tail; rth = next) {
- next = rth->u.rt_next;
- rt_free(rth);
- }
-@@ -728,6 +834,8 @@ void rt_cache_flush(int delay)
- delay = tmo;
- }
-
-+ set_rt_flush_required(get_exec_env());
-+
- if (delay <= 0) {
- spin_unlock_bh(&rt_flush_lock);
- rt_run_flush(0);
-@@ -743,9 +851,30 @@ void rt_cache_flush(int delay)
-
- static void rt_secret_rebuild(unsigned long dummy)
- {
-+ int i;
-+ struct rtable *rth, *next;
- unsigned long now = jiffies;
-
-- rt_cache_flush(0);
-+ spin_lock_bh(&rt_flush_lock);
-+ del_timer(&rt_flush_timer);
-+ spin_unlock_bh(&rt_flush_lock);
-+
-+ rt_deadline = 0;
-+ get_random_bytes(&rt_hash_rnd, 4);
-+
-+ for (i = rt_hash_mask; i >= 0; i--) {
-+ spin_lock_bh(rt_hash_lock_addr(i));
-+ rth = rt_hash_table[i].chain;
-+ if (rth)
-+ rt_hash_table[i].chain = NULL;
-+ spin_unlock_bh(rt_hash_lock_addr(i));
-+
-+ for (; rth; rth = next) {
-+ next = rth->u.rt_next;
-+ rt_free(rth);
-+ }
-+ }
-+
- mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
- }
-
-@@ -1118,7 +1247,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
- struct rtable *rth, **rthp;
- u32 skeys[2] = { saddr, 0 };
- int ikeys[2] = { dev->ifindex, 0 };
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- tos &= IPTOS_RT_MASK;
-
- if (!in_dev)
-@@ -1154,6 +1285,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
- rth->fl.fl4_src != skeys[i] ||
- rth->fl.fl4_tos != tos ||
- rth->fl.oif != ikeys[k] ||
-+#ifdef CONFIG_VE
-+ !ve_accessible_strict(rth->fl.owner_env,
-+ ve) ||
-+#endif
- rth->fl.iif != 0) {
- rthp = &rth->u.rt_next;
- continue;
-@@ -1192,6 +1327,9 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
- rt->u.dst.neighbour = NULL;
- rt->u.dst.hh = NULL;
- rt->u.dst.xfrm = NULL;
-+#ifdef CONFIG_VE
-+ rt->fl.owner_env = ve;
-+#endif
-
- rt->rt_flags |= RTCF_REDIRECTED;
-
-@@ -1631,6 +1769,9 @@ static int ip_route_input_mc(struct sk_b
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
- #endif
-+#ifdef CONFIG_VE
-+ rth->fl.owner_env = get_exec_env();
-+#endif
- rth->fl.fl4_src = saddr;
- rth->rt_src = saddr;
- #ifdef CONFIG_NET_CLS_ROUTE
-@@ -1776,6 +1917,9 @@ static inline int __mkroute_input(struct
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
- #endif
-+#ifdef CONFIG_VE
-+ rth->fl.owner_env = get_exec_env();
-+#endif
- rth->fl.fl4_src = saddr;
- rth->rt_src = saddr;
- rth->rt_gateway = daddr;
-@@ -2021,6 +2165,9 @@ local_input:
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= skb->nfmark;
- #endif
-+#ifdef CONFIG_VE
-+ rth->fl.owner_env = get_exec_env();
-+#endif
- rth->fl.fl4_src = saddr;
- rth->rt_src = saddr;
- #ifdef CONFIG_NET_CLS_ROUTE
-@@ -2100,6 +2247,9 @@ int ip_route_input(struct sk_buff *skb,
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark == skb->nfmark &&
- #endif
-+#ifdef CONFIG_VE
-+ rth->fl.owner_env == get_exec_env() &&
-+#endif
- rth->fl.fl4_tos == tos) {
- rth->u.dst.lastuse = jiffies;
- dst_hold(&rth->u.dst);
-@@ -2226,6 +2376,9 @@ static inline int __mkroute_output(struc
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
- #endif
-+#ifdef CONFIG_VE
-+ rth->fl.owner_env = get_exec_env();
-+#endif
- rth->rt_dst = fl->fl4_dst;
- rth->rt_src = fl->fl4_src;
- rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
-@@ -2399,10 +2552,13 @@ static int ip_route_output_slow(struct r
- ZERONET(oldflp->fl4_src))
- goto out;
-
-- /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-- dev_out = ip_dev_find(oldflp->fl4_src);
-- if (dev_out == NULL)
-- goto out;
-+ if (ip_rt_src_check) {
-+ /* It is equivalent to
-+ inet_addr_type(saddr) == RTN_LOCAL */
-+ dev_out = ip_dev_find(oldflp->fl4_src);
-+ if (dev_out == NULL)
-+ goto out;
-+ }
-
- /* I removed check for oif == dev_out->oif here.
- It was wrong for two reasons:
-@@ -2429,6 +2585,12 @@ static int ip_route_output_slow(struct r
- Luckily, this hack is good workaround.
- */
-
-+ if (dev_out == NULL) {
-+ dev_out = ip_dev_find(oldflp->fl4_src);
-+ if (dev_out == NULL)
-+ goto out;
-+ }
-+
- fl.oif = dev_out->ifindex;
- goto make_route;
- }
-@@ -2575,6 +2737,7 @@ int __ip_route_output_key(struct rtable
- #ifdef CONFIG_IP_ROUTE_FWMARK
- rth->fl.fl4_fwmark == flp->fl4_fwmark &&
- #endif
-+ ve_accessible_strict(rth->fl.owner_env, get_exec_env()) &&
- !((rth->fl.fl4_tos ^ flp->fl4_tos) &
- (IPTOS_RT_MASK | RTO_ONLINK))) {
-
-@@ -2705,7 +2868,7 @@ static int rt_fill_info(struct sk_buff *
- u32 dst = rt->rt_dst;
-
- if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-- ipv4_devconf.mc_forwarding) {
-+ ve_ipv4_devconf.mc_forwarding) {
- int err = ipmr_get_route(skb, r, nowait);
- if (err <= 0) {
- if (!nowait) {
-@@ -2750,7 +2913,10 @@ int inet_rtm_getroute(struct sk_buff *in
- /* Reserve room for dummy headers, this skb can pass
- through good chunk of routing engine.
- */
-- skb->mac.raw = skb->data;
-+ skb->mac.raw = skb->nh.raw = skb->data;
-+
-+ /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
-+ skb->nh.iph->protocol = IPPROTO_ICMP;
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
- if (rta[RTA_SRC - 1])
-@@ -2853,22 +3019,22 @@ void ip_rt_multicast_event(struct in_dev
- }
-
- #ifdef CONFIG_SYSCTL
--static int flush_delay;
-+int ipv4_flush_delay;
-
--static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
-+int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
- struct file *filp, void __user *buffer,
- size_t *lenp, loff_t *ppos)
- {
- if (write) {
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-- rt_cache_flush(flush_delay);
-+ rt_cache_flush(ipv4_flush_delay);
- return 0;
- }
-
- return -EINVAL;
- }
-
--static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
-+int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
- int __user *name,
- int nlen,
- void __user *oldval,
-@@ -2890,7 +3056,7 @@ ctl_table ipv4_route_table[] = {
- {
- .ctl_name = NET_IPV4_ROUTE_FLUSH,
- .procname = "flush",
-- .data = &flush_delay,
-+ .data = &ipv4_flush_delay,
- .maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = &ipv4_sysctl_rtcache_flush,
-@@ -3184,15 +3350,18 @@ int __init ip_rt_init(void)
- #ifdef CONFIG_PROC_FS
- {
- struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-- if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-- !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
-- proc_net_stat))) {
-+
-+ if (!proc_glob_fops_create("net/rt_cache",
-+ S_IRUGO, &rt_cache_seq_fops))
-+ return -ENOMEM;
-+
-+ if (!(rtstat_pde = create_proc_glob_entry("net/stat/rt_cache",
-+ S_IRUGO, NULL)))
- return -ENOMEM;
-- }
- rtstat_pde->proc_fops = &rt_cpu_seq_fops;
- }
- #ifdef CONFIG_NET_CLS_ROUTE
-- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
-+ create_proc_read_entry("net/rt_acct", 0, NULL, ip_rt_acct_read, NULL);
- #endif
- #endif
- #ifdef CONFIG_XFRM
-diff -upr linux-2.6.16.orig/net/ipv4/sysctl_net_ipv4.c linux-2.6.16-026test009/net/ipv4/sysctl_net_ipv4.c
---- linux-2.6.16.orig/net/ipv4/sysctl_net_ipv4.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/sysctl_net_ipv4.c 2006-04-19 15:02:12.000000000 +0400
-@@ -33,22 +33,21 @@ struct ipv4_config ipv4_config;
-
- #ifdef CONFIG_SYSCTL
-
--static
- int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
-- int val = ipv4_devconf.forwarding;
-+ int val = ve_ipv4_devconf.forwarding;
- int ret;
-
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-
-- if (write && ipv4_devconf.forwarding != val)
-+ if (write && ve_ipv4_devconf.forwarding != val)
- inet_forward_change();
-
- return ret;
- }
-
--static int ipv4_sysctl_forward_strategy(ctl_table *table,
-+int ipv4_sysctl_forward_strategy(ctl_table *table,
- int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen,
-diff -upr linux-2.6.16.orig/net/ipv4/tcp.c linux-2.6.16-026test009/net/ipv4/tcp.c
---- linux-2.6.16.orig/net/ipv4/tcp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -248,6 +248,7 @@
- */
-
- #include <linux/config.h>
-+#include <linux/kmem_cache.h>
- #include <linux/module.h>
- #include <linux/types.h>
- #include <linux/fcntl.h>
-@@ -263,6 +264,9 @@
- #include <net/xfrm.h>
- #include <net/ip.h>
-
-+#include <ub/ub_orphan.h>
-+#include <ub/ub_net.h>
-+#include <ub/ub_tcp.h>
-
- #include <asm/uaccess.h>
- #include <asm/ioctls.h>
-@@ -321,6 +325,7 @@ unsigned int tcp_poll(struct file *file,
- unsigned int mask;
- struct sock *sk = sock->sk;
- struct tcp_sock *tp = tcp_sk(sk);
-+ int check_send_space;
-
- poll_wait(file, sk->sk_sleep, wait);
- if (sk->sk_state == TCP_LISTEN)
-@@ -335,6 +340,21 @@ unsigned int tcp_poll(struct file *file,
- if (sk->sk_err)
- mask = POLLERR;
-
-+ check_send_space = 1;
-+#ifdef CONFIG_USER_RESOURCE
-+ if (!(sk->sk_shutdown & SEND_SHUTDOWN) && sock_has_ubc(sk)) {
-+ unsigned long size;
-+ size = MAX_TCP_HEADER + tp->mss_cache;
-+ if (size > SOCK_MIN_UBCSPACE)
-+ size = SOCK_MIN_UBCSPACE;
-+ size = skb_charge_size(size);
-+ if (ub_sock_makewres_tcp(sk, size)) {
-+ check_send_space = 0;
-+ ub_sock_sndqueueadd_tcp(sk, size);
-+ }
-+ }
-+#endif
-+
- /*
- * POLLHUP is certainly not done right. But poll() doesn't
- * have a notion of HUP in just one direction, and for a
-@@ -378,7 +398,7 @@ unsigned int tcp_poll(struct file *file,
- sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
- mask |= POLLIN | POLLRDNORM;
-
-- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-+ if (check_send_space && !(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
- mask |= POLLOUT | POLLWRNORM;
- } else { /* send SIGIO later */
-@@ -528,16 +548,23 @@ static ssize_t do_tcp_sendpages(struct s
- int copy, i, can_coalesce;
- int offset = poffset % PAGE_SIZE;
- int size = min_t(size_t, psize, PAGE_SIZE - offset);
-+ unsigned long chargesize = 0;
-
- if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
- new_segment:
-+ chargesize = 0;
- if (!sk_stream_memory_free(sk))
- goto wait_for_sndbuf;
-
-+ chargesize = skb_charge_size(MAX_TCP_HEADER +
-+ tp->mss_cache);
-+ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
-+ goto wait_for_ubspace;
- skb = sk_stream_alloc_pskb(sk, 0, 0,
- sk->sk_allocation);
- if (!skb)
- goto wait_for_memory;
-+ ub_skb_set_charge(skb, sk, chargesize, UB_TCPSNDBUF);
-
- skb_entail(sk, tp, skb);
- copy = size_goal;
-@@ -593,10 +620,14 @@ new_segment:
- wait_for_sndbuf:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- wait_for_memory:
-+ ub_sock_retwres_tcp(sk, chargesize,
-+ skb_charge_size(MAX_TCP_HEADER + tp->mss_cache));
-+ chargesize = 0;
-+wait_for_ubspace:
- if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
-
-- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
-+ if ((err = sk_stream_wait_memory(sk, &timeo, chargesize)) != 0)
- goto do_error;
-
- mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-@@ -699,6 +730,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
- while (--iovlen >= 0) {
- int seglen = iov->iov_len;
- unsigned char __user *from = iov->iov_base;
-+ unsigned long chargesize = 0;
-
- iov++;
-
-@@ -709,18 +741,26 @@ int tcp_sendmsg(struct kiocb *iocb, stru
-
- if (!sk->sk_send_head ||
- (copy = size_goal - skb->len) <= 0) {
-+ unsigned long size;
-
- new_segment:
- /* Allocate new segment. If the interface is SG,
- * allocate skb fitting to single page.
- */
-+ chargesize = 0;
- if (!sk_stream_memory_free(sk))
- goto wait_for_sndbuf;
--
-- skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
-- 0, sk->sk_allocation);
-+ size = select_size(sk, tp);
-+ chargesize = skb_charge_size(MAX_TCP_HEADER +
-+ size);
-+ if (ub_sock_getwres_tcp(sk, chargesize) < 0)
-+ goto wait_for_ubspace;
-+ skb = sk_stream_alloc_pskb(sk, size, 0,
-+ sk->sk_allocation);
- if (!skb)
- goto wait_for_memory;
-+ ub_skb_set_charge(skb, sk, chargesize,
-+ UB_TCPSNDBUF);
-
- /*
- * Check whether we can use HW checksum.
-@@ -768,6 +808,7 @@ new_segment:
- } else if (page) {
- if (off == PAGE_SIZE) {
- put_page(page);
-+ ub_sock_tcp_detachpage(sk);
- TCP_PAGE(sk) = page = NULL;
- off = 0;
- }
-@@ -781,6 +822,9 @@ new_segment:
- goto wait_for_memory;
-
- if (!page) {
-+ chargesize = PAGE_SIZE;
-+ if (ub_sock_tcp_chargepage(sk) < 0)
-+ goto wait_for_ubspace;
- /* Allocate new cache page. */
- if (!(page = sk_stream_alloc_page(sk)))
- goto wait_for_memory;
-@@ -812,7 +856,8 @@ new_segment:
- } else if (off + copy < PAGE_SIZE) {
- get_page(page);
- TCP_PAGE(sk) = page;
-- }
-+ } else
-+ ub_sock_tcp_detachpage(sk);
- }
-
- TCP_OFF(sk) = off + copy;
-@@ -843,10 +888,15 @@ new_segment:
- wait_for_sndbuf:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- wait_for_memory:
-+ ub_sock_retwres_tcp(sk, chargesize,
-+ skb_charge_size(MAX_TCP_HEADER+tp->mss_cache));
-+ chargesize = 0;
-+wait_for_ubspace:
- if (copied)
- tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
-
-- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
-+ if ((err = sk_stream_wait_memory(sk, &timeo,
-+ chargesize)) != 0)
- goto do_error;
-
- mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-@@ -944,7 +994,18 @@ static void cleanup_rbuf(struct sock *sk
- #if TCP_DEBUG
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
-
-- BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
-+ if (!(skb==NULL || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq))) {
-+ printk("KERNEL: assertion: skb==NULL || "
-+ "before(tp->copied_seq, skb->end_seq)\n");
-+ printk("VE%u pid %d comm %.16s\n",
-+ (get_exec_env() ? VEID(get_exec_env()) : 0),
-+ current->pid, current->comm);
-+ printk("copied=%d, copied_seq=%d, rcv_nxt=%d\n", copied,
-+ tp->copied_seq, tp->rcv_nxt);
-+ printk("skb->len=%d, skb->seq=%d, skb->end_seq=%d\n",
-+ skb->len, TCP_SKB_CB(skb)->seq,
-+ TCP_SKB_CB(skb)->end_seq);
-+ }
- #endif
-
- if (inet_csk_ack_scheduled(sk)) {
-@@ -1168,7 +1229,22 @@ int tcp_recvmsg(struct kiocb *iocb, stru
- goto found_ok_skb;
- if (skb->h.th->fin)
- goto found_fin_ok;
-- BUG_TRAP(flags & MSG_PEEK);
-+ if (!(flags & MSG_PEEK)) {
-+ printk("KERNEL: assertion: flags&MSG_PEEK\n");
-+ printk("VE%u pid %d comm %.16s\n",
-+ (get_exec_env() ?
-+ VEID(get_exec_env()) : 0),
-+ current->pid, current->comm);
-+ printk("flags=0x%x, len=%d, copied_seq=%d, "
-+ "rcv_nxt=%d\n", flags, len,
-+ tp->copied_seq, tp->rcv_nxt);
-+ printk("skb->len=%d, *seq=%d, skb->seq=%d, "
-+ "skb->end_seq=%d, offset=%d\n",
-+ skb->len, *seq,
-+ TCP_SKB_CB(skb)->seq,
-+ TCP_SKB_CB(skb)->end_seq,
-+ offset);
-+ }
- skb = skb->next;
- } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
-
-@@ -1231,8 +1307,18 @@ int tcp_recvmsg(struct kiocb *iocb, stru
-
- tp->ucopy.len = len;
-
-- BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
-- (flags & (MSG_PEEK | MSG_TRUNC)));
-+ if (!(tp->copied_seq == tp->rcv_nxt ||
-+ (flags&(MSG_PEEK|MSG_TRUNC)))) {
-+ printk("KERNEL: assertion: tp->copied_seq == "
-+ "tp->rcv_nxt || ...\n");
-+ printk("VE%u pid %d comm %.16s\n",
-+ (get_exec_env() ?
-+ VEID(get_exec_env()) : 0),
-+ current->pid, current->comm);
-+ printk("flags=0x%x, len=%d, copied_seq=%d, "
-+ "rcv_nxt=%d\n", flags, len,
-+ tp->copied_seq, tp->rcv_nxt);
-+ }
-
- /* Ugly... If prequeue is not empty, we have to
- * process it before releasing socket, otherwise
-@@ -1583,7 +1669,7 @@ adjudge_to_death:
- if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
- } else {
-- atomic_inc(sk->sk_prot->orphan_count);
-+ ub_inc_orphan_count(sk);
- tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
- goto out;
- }
-@@ -1591,9 +1677,7 @@ adjudge_to_death:
- }
- if (sk->sk_state != TCP_CLOSE) {
- sk_stream_mem_reclaim(sk);
-- if (atomic_read(sk->sk_prot->orphan_count) > sysctl_tcp_max_orphans ||
-- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-+ if (ub_too_many_orphans(sk, ub_get_orphan_count(sk))) {
- if (net_ratelimit())
- printk(KERN_INFO "TCP: too many of orphaned "
- "sockets\n");
-@@ -1602,7 +1686,7 @@ adjudge_to_death:
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
- }
- }
-- atomic_inc(sk->sk_prot->orphan_count);
-+ ub_inc_orphan_count(sk);
-
- if (sk->sk_state == TCP_CLOSE)
- inet_csk_destroy_sock(sk);
-@@ -2051,7 +2135,7 @@ void __init tcp_init(void)
- tcp_hashinfo.bind_bucket_cachep =
- kmem_cache_create("tcp_bind_bucket",
- sizeof(struct inet_bind_bucket), 0,
-- SLAB_HWCACHE_ALIGN, NULL, NULL);
-+ SLAB_HWCACHE_ALIGN | SLAB_UBC, NULL, NULL);
- if (!tcp_hashinfo.bind_bucket_cachep)
- panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
-
-diff -upr linux-2.6.16.orig/net/ipv4/tcp_input.c linux-2.6.16-026test009/net/ipv4/tcp_input.c
---- linux-2.6.16.orig/net/ipv4/tcp_input.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp_input.c 2006-04-19 15:02:12.000000000 +0400
-@@ -72,6 +72,8 @@
- #include <linux/ipsec.h>
- #include <asm/unaligned.h>
-
-+#include <ub/ub_tcp.h>
-+
- int sysctl_tcp_timestamps = 1;
- int sysctl_tcp_window_scaling = 1;
- int sysctl_tcp_sack = 1;
-@@ -252,7 +254,7 @@ static void tcp_grow_window(struct sock
- /* Check #1 */
- if (tp->rcv_ssthresh < tp->window_clamp &&
- (int)tp->rcv_ssthresh < tcp_space(sk) &&
-- !tcp_memory_pressure) {
-+ ub_tcp_rmem_allows_expand(sk)) {
- int incr;
-
- /* Check #2. Increase window, if skb with such overhead
-@@ -321,6 +323,8 @@ static void tcp_init_buffer_space(struct
-
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
- tp->snd_cwnd_stamp = tcp_time_stamp;
-+
-+ ub_tcp_update_maxadvmss(sk);
- }
-
- /* 5. Recalculate window clamp after socket hit its memory bounds. */
-@@ -332,7 +336,7 @@ static void tcp_clamp_window(struct sock
-
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-- !tcp_memory_pressure &&
-+ !ub_tcp_memory_pressure(sk) &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
- sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
-@@ -3118,7 +3122,7 @@ queue_and_out:
- !sk_stream_rmem_schedule(sk, skb))) {
- if (tcp_prune_queue(sk) < 0 ||
- !sk_stream_rmem_schedule(sk, skb))
-- goto drop;
-+ goto drop_part;
- }
- sk_stream_set_owner_r(skb, sk);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
-@@ -3162,6 +3166,12 @@ out_of_window:
- drop:
- __kfree_skb(skb);
- return;
-+
-+drop_part:
-+ if (after(tp->copied_seq, tp->rcv_nxt))
-+ tp->rcv_nxt = tp->copied_seq;
-+ __kfree_skb(skb);
-+ return;
- }
-
- /* Out of window. F.e. zero window probe. */
-@@ -3333,6 +3343,10 @@ tcp_collapse(struct sock *sk, struct sk_
- nskb = alloc_skb(copy+header, GFP_ATOMIC);
- if (!nskb)
- return;
-+ if (ub_tcprcvbuf_charge_forced(skb->sk, nskb) < 0) {
-+ kfree_skb(nskb);
-+ return;
-+ }
- skb_reserve(nskb, header);
- memcpy(nskb->head, skb->head, header);
- nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
-@@ -3429,7 +3443,7 @@ static int tcp_prune_queue(struct sock *
-
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- tcp_clamp_window(sk, tp);
-- else if (tcp_memory_pressure)
-+ else if (ub_tcp_memory_pressure(sk))
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
-
- tcp_collapse_ofo_queue(sk);
-@@ -3505,7 +3519,7 @@ static int tcp_should_expand_sndbuf(stru
- return 0;
-
- /* If we are under global TCP memory pressure, do not expand. */
-- if (tcp_memory_pressure)
-+ if (ub_tcp_memory_pressure(sk))
- return 0;
-
- /* If we are under soft global TCP memory pressure, do not expand. */
-@@ -3898,6 +3912,10 @@ int tcp_rcv_established(struct sock *sk,
-
- if ((int)skb->truesize > sk->sk_forward_alloc)
- goto step5;
-+ /* This is OK not to try to free memory here.
-+ * Do this below on slow path. Den */
-+ if (ub_tcprcvbuf_charge(sk, skb) < 0)
-+ goto step5;
-
- NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
-
-diff -upr linux-2.6.16.orig/net/ipv4/tcp_ipv4.c linux-2.6.16-026test009/net/ipv4/tcp_ipv4.c
---- linux-2.6.16.orig/net/ipv4/tcp_ipv4.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp_ipv4.c 2006-04-19 15:02:13.000000000 +0400
-@@ -72,6 +72,8 @@
- #include <net/timewait_sock.h>
- #include <net/xfrm.h>
-
-+#include <ub/ub_tcp.h>
-+
- #include <linux/inet.h>
- #include <linux/ipv6.h>
- #include <linux/stddef.h>
-@@ -705,6 +707,7 @@ struct request_sock_ops tcp_request_sock
- .destructor = tcp_v4_reqsk_destructor,
- .send_reset = tcp_v4_send_reset,
- };
-+EXPORT_SYMBOL_GPL(tcp_request_sock_ops);
-
- static struct timewait_sock_ops tcp_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
-@@ -979,12 +982,15 @@ static int tcp_v4_checksum_init(struct s
- */
- int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
- {
-+ struct user_beancounter *ub;
-+
-+ ub = set_exec_ub(sock_bc(sk)->ub);
- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- TCP_CHECK_TIMER(sk);
- if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
- goto reset;
- TCP_CHECK_TIMER(sk);
-- return 0;
-+ goto restore_context;
- }
-
- if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
-@@ -998,7 +1004,7 @@ int tcp_v4_do_rcv(struct sock *sk, struc
- if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb))
- goto reset;
-- return 0;
-+ goto restore_context;
- }
- }
-
-@@ -1006,6 +1012,9 @@ int tcp_v4_do_rcv(struct sock *sk, struc
- if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
- goto reset;
- TCP_CHECK_TIMER(sk);
-+
-+restore_context:
-+ (void)set_exec_ub(ub);
- return 0;
-
- reset:
-@@ -1017,7 +1026,7 @@ discard:
- * might be destroyed here. This current version compiles correctly,
- * but you have been warned.
- */
-- return 0;
-+ goto restore_context;
-
- csum_err:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-@@ -1302,6 +1311,8 @@ int tcp_v4_destroy_sock(struct sock *sk)
- * If sendmsg cached page exists, toss it.
- */
- if (sk->sk_sndmsg_page) {
-+ /* queue is empty, uncharge */
-+ ub_sock_tcp_detachpage(sk);
- __free_page(sk->sk_sndmsg_page);
- sk->sk_sndmsg_page = NULL;
- }
-@@ -1316,16 +1327,34 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
- #ifdef CONFIG_PROC_FS
- /* Proc filesystem TCP sock list dumping. */
-
--static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
-+static inline struct inet_timewait_sock *tw_head(struct hlist_head *head,
-+ envid_t veid)
- {
-- return hlist_empty(head) ? NULL :
-- list_entry(head->first, struct inet_timewait_sock, tw_node);
-+ struct inet_timewait_sock *tw;
-+ struct hlist_node *pos;
-+
-+ if (hlist_empty(head))
-+ return NULL;
-+ hlist_for_each_entry(tw, pos, head, tw_node) {
-+ if (!ve_accessible_veid(tw->tw_owner_env, veid))
-+ continue;
-+ return tw;
-+ }
-+ return NULL;
- }
-
--static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
-+static inline struct inet_timewait_sock *
-+ tw_next(struct inet_timewait_sock *tw, envid_t veid)
- {
-- return tw->tw_node.next ?
-- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
-+ while (1) {
-+ if (tw->tw_node.next == NULL)
-+ return NULL;
-+ tw = hlist_entry(tw->tw_node.next, typeof(*tw), tw_node);
-+ if (!ve_accessible_veid(tw->tw_owner_env, veid))
-+ continue;
-+ return tw;
-+ }
-+ return NULL; /* make compiler happy */
- }
-
- static void *listening_get_next(struct seq_file *seq, void *cur)
-@@ -1334,7 +1363,9 @@ static void *listening_get_next(struct s
- struct hlist_node *node;
- struct sock *sk = cur;
- struct tcp_iter_state* st = seq->private;
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- if (!sk) {
- st->bucket = 0;
- sk = sk_head(&tcp_hashinfo.listening_hash[0]);
-@@ -1374,6 +1405,8 @@ get_req:
- }
- get_sk:
- sk_for_each_from(sk, node) {
-+ if (!ve_accessible(VE_OWNER_SK(sk), ve))
-+ continue;
- if (sk->sk_family == st->family) {
- cur = sk;
- goto out;
-@@ -1414,7 +1447,9 @@ static void *established_get_first(struc
- {
- struct tcp_iter_state* st = seq->private;
- void *rc = NULL;
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
- struct sock *sk;
- struct hlist_node *node;
-@@ -1425,6 +1460,8 @@ static void *established_get_first(struc
-
- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-+ if (!ve_accessible(VE_OWNER_SK(sk), ve))
-+ continue;
- if (sk->sk_family != st->family) {
- continue;
- }
-@@ -1434,6 +1471,8 @@ static void *established_get_first(struc
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- inet_twsk_for_each(tw, node,
- &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
-+ if (!ve_accessible_veid(tw->tw_owner_env, VEID(ve)))
-+ continue;
- if (tw->tw_family != st->family) {
- continue;
- }
-@@ -1453,16 +1492,17 @@ static void *established_get_next(struct
- struct inet_timewait_sock *tw;
- struct hlist_node *node;
- struct tcp_iter_state* st = seq->private;
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- ++st->num;
-
- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
- tw = cur;
-- tw = tw_next(tw);
-+ tw = tw_next(tw, VEID(ve));
- get_tw:
-- while (tw && tw->tw_family != st->family) {
-- tw = tw_next(tw);
-- }
-+ while (tw && tw->tw_family != st->family)
-+ tw = tw_next(tw, VEID(ve));
- if (tw) {
- cur = tw;
- goto out;
-@@ -1484,12 +1524,15 @@ get_tw:
- sk = sk_next(sk);
-
- sk_for_each_from(sk, node) {
-+ if (!ve_accessible(VE_OWNER_SK(sk), ve))
-+ continue;
- if (sk->sk_family == st->family)
- goto found;
- }
-
- st->state = TCP_SEQ_STATE_TIME_WAIT;
-- tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
-+ tw = tw_head(&tcp_hashinfo.ehash[st->bucket +
-+ tcp_hashinfo.ehash_size].chain, VEID(ve));
- goto get_tw;
- found:
- cur = sk;
-@@ -1635,7 +1678,12 @@ int tcp_proc_register(struct tcp_seq_afi
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
-
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-+ if (*(afinfo->name) == 'n')
-+ p = proc_glob_fops_create(afinfo->name, S_IRUGO,
-+ afinfo->seq_fops);
-+ else
-+ p = proc_net_fops_create(afinfo->name, S_IRUGO,
-+ afinfo->seq_fops);
- if (p)
- p->data = afinfo;
- else
-@@ -1647,7 +1695,10 @@ void tcp_proc_unregister(struct tcp_seq_
- {
- if (!afinfo)
- return;
-- proc_net_remove(afinfo->name);
-+ if (*(afinfo->name) == 'n')
-+ remove_proc_glob_entry(afinfo->name, NULL);
-+ else
-+ proc_net_remove(afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
- }
-
-@@ -1777,7 +1828,7 @@ out:
- static struct file_operations tcp4_seq_fops;
- static struct tcp_seq_afinfo tcp4_seq_afinfo = {
- .owner = THIS_MODULE,
-- .name = "tcp",
-+ .name = "net/tcp",
- .family = AF_INET,
- .seq_show = tcp4_seq_show,
- .seq_fops = &tcp4_seq_fops,
-@@ -1844,6 +1895,86 @@ void __init tcp_v4_init(struct net_proto
- tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
- }
-
-+#if defined(CONFIG_VE_NETDEV) || defined(CONFIG_VE_NETDEV_MODULE)
-+static void tcp_kill_ve_onesk(struct sock *sk)
-+{
-+ struct tcp_sock *tp = tcp_sk(sk);
-+
-+ /* Check the assumed state of the socket. */
-+ if (!sock_flag(sk, SOCK_DEAD)) {
-+ static int printed;
-+invalid:
-+ if (!printed)
-+ printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
-+ "wrseq %u unseq %u, wrqu %d.\n",
-+ sock_flag(sk, SOCK_DEAD), sk->sk_state,
-+ tp->write_seq, tp->snd_una,
-+ !skb_queue_empty(&sk->sk_write_queue));
-+ printed = 1;
-+ return;
-+ }
-+
-+ tcp_send_active_reset(sk, GFP_ATOMIC);
-+ switch (sk->sk_state) {
-+ case TCP_FIN_WAIT1:
-+ case TCP_CLOSING:
-+ /* In these 2 states the peer may want us to retransmit
-+ * some data and/or FIN. Entering "resetting mode"
-+ * instead.
-+ */
-+ tcp_time_wait(sk, TCP_CLOSE, 0);
-+ break;
-+ case TCP_FIN_WAIT2:
-+ /* By some reason the socket may stay in this state
-+ * without turning into a TW bucket. Fix it.
-+ */
-+ tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
-+ break;
-+ case TCP_LAST_ACK:
-+ /* Just jump into CLOSED state. */
-+ tcp_done(sk);
-+ break;
-+ default:
-+ /* The socket must be already close()d. */
-+ goto invalid;
-+ }
-+}
-+
-+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
-+{
-+ struct inet_ehash_bucket *head;
-+ int i;
-+
-+ /* alive */
-+ local_bh_disable();
-+ head = tcp_hashinfo.ehash;
-+ for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
-+ struct sock *sk;
-+ struct hlist_node *node;
-+more_work:
-+ write_lock(&head[i].lock);
-+ sk_for_each(sk, node, &head[i].chain) {
-+ if (ve_accessible_strict(VE_OWNER_SK(sk), envid)) {
-+ sock_hold(sk);
-+ write_unlock(&head[i].lock);
-+
-+ bh_lock_sock(sk);
-+ /* sk might have disappeared from the hash before
-+ * we got the lock */
-+ if (sk->sk_state != TCP_CLOSE)
-+ tcp_kill_ve_onesk(sk);
-+ bh_unlock_sock(sk);
-+ sock_put(sk);
-+ goto more_work;
-+ }
-+ }
-+ write_unlock(&head[i].lock);
-+ }
-+ local_bh_enable();
-+}
-+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
-+#endif
-+
- EXPORT_SYMBOL(ipv4_specific);
- EXPORT_SYMBOL(tcp_hashinfo);
- EXPORT_SYMBOL(tcp_prot);
-diff -upr linux-2.6.16.orig/net/ipv4/tcp_minisocks.c linux-2.6.16-026test009/net/ipv4/tcp_minisocks.c
---- linux-2.6.16.orig/net/ipv4/tcp_minisocks.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp_minisocks.c 2006-04-19 15:02:12.000000000 +0400
-@@ -29,6 +29,8 @@
- #include <net/inet_common.h>
- #include <net/xfrm.h>
-
-+#include <ub/ub_net.h>
-+
- #ifdef CONFIG_SYSCTL
- #define SYNC_INIT 0 /* let the user enable it */
- #else
-@@ -307,6 +309,8 @@ void tcp_time_wait(struct sock *sk, int
- tw->tw_ipv6only = np->ipv6only;
- }
- #endif
-+ tw->tw_owner_env = VEID(VE_OWNER_SK(sk));
-+
- /* Linkage updates. */
- __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
-
-@@ -355,6 +359,8 @@ struct sock *tcp_create_openreq_child(st
- struct tcp_sock *newtp;
-
- /* Now setup tcp_sock */
-+ SET_VE_OWNER_SK(newsk, VE_OWNER_SK(sk));
-+
- newtp = tcp_sk(newsk);
- newtp->pred_flags = 0;
- newtp->rcv_nxt = treq->rcv_isn + 1;
-diff -upr linux-2.6.16.orig/net/ipv4/tcp_output.c linux-2.6.16-026test009/net/ipv4/tcp_output.c
---- linux-2.6.16.orig/net/ipv4/tcp_output.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp_output.c 2006-04-19 15:02:12.000000000 +0400
-@@ -42,6 +42,9 @@
- #include <linux/module.h>
- #include <linux/smp_lock.h>
-
-+#include <ub/ub_net.h>
-+#include <ub/ub_tcp.h>
-+
- /* People can turn this off for buggy TCP's found in printers etc. */
- int sysctl_tcp_retrans_collapse = 1;
-
-@@ -528,15 +531,23 @@ int tcp_fragment(struct sock *sk, struct
- if (nsize < 0)
- nsize = 0;
-
-- if (skb_cloned(skb) &&
-- skb_is_nonlinear(skb) &&
-- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-- return -ENOMEM;
-+ if (skb_cloned(skb) && skb_is_nonlinear(skb)) {
-+ unsigned long chargesize;
-+ chargesize = skb_bc(skb)->charged;
-+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-+ return -ENOMEM;
-+ ub_sock_retwres_tcp(sk, chargesize, chargesize);
-+ ub_tcpsndbuf_charge_forced(sk, skb);
-+ }
-
- /* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
- if (buff == NULL)
- return -ENOMEM; /* We'll just try again later. */
-+ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
-+ kfree_skb(buff);
-+ return -ENOMEM;
-+ }
- sk_charge_skb(sk, buff);
-
- /* Correct the sequence numbers. */
-@@ -978,6 +989,11 @@ static int tso_fragment(struct sock *sk,
- if (unlikely(buff == NULL))
- return -ENOMEM;
-
-+ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
-+ kfree_skb(buff);
-+ return -ENOMEM;
-+ }
-+
- buff->truesize = nlen;
- skb->truesize -= nlen;
-
-@@ -1281,7 +1297,7 @@ u32 __tcp_select_window(struct sock *sk)
- if (free_space < full_space/2) {
- icsk->icsk_ack.quick = 0;
-
-- if (tcp_memory_pressure)
-+ if (ub_tcp_shrink_rcvbuf(sk))
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss);
-
- if (free_space < mss)
-@@ -1708,6 +1724,7 @@ void tcp_send_fin(struct sock *sk)
- break;
- yield();
- }
-+ ub_tcpsndbuf_charge_forced(sk, skb);
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_TCP_HEADER);
-@@ -1777,6 +1794,10 @@ int tcp_send_synack(struct sock *sk)
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
- if (nskb == NULL)
- return -ENOMEM;
-+ if (ub_tcpsndbuf_charge(sk, skb) < 0) {
-+ kfree_skb(nskb);
-+ return -ENOMEM;
-+ }
- __skb_unlink(skb, &sk->sk_write_queue);
- skb_header_release(nskb);
- __skb_queue_head(&sk->sk_write_queue, nskb);
-@@ -1928,6 +1949,10 @@ int tcp_connect(struct sock *sk)
- buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
- if (unlikely(buff == NULL))
- return -ENOBUFS;
-+ if (ub_tcpsndbuf_charge(sk, buff) < 0) {
-+ kfree_skb(buff);
-+ return -ENOBUFS;
-+ }
-
- /* Reserve space for headers. */
- skb_reserve(buff, MAX_TCP_HEADER);
-diff -upr linux-2.6.16.orig/net/ipv4/tcp_timer.c linux-2.6.16-026test009/net/ipv4/tcp_timer.c
---- linux-2.6.16.orig/net/ipv4/tcp_timer.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/tcp_timer.c 2006-04-19 15:02:12.000000000 +0400
-@@ -22,6 +22,8 @@
-
- #include <linux/module.h>
- #include <net/tcp.h>
-+#include <ub/ub_orphan.h>
-+#include <ub/ub_tcp.h>
-
- int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
- int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
-@@ -67,7 +69,7 @@ static void tcp_write_err(struct sock *s
- static int tcp_out_of_resources(struct sock *sk, int do_reset)
- {
- struct tcp_sock *tp = tcp_sk(sk);
-- int orphans = atomic_read(&tcp_orphan_count);
-+ int orphans = ub_get_orphan_count(sk);
-
- /* If peer does not open window for long time, or did not transmit
- * anything for long time, penalize it. */
-@@ -78,9 +80,7 @@ static int tcp_out_of_resources(struct s
- if (sk->sk_err_soft)
- orphans <<= 1;
-
-- if (orphans >= sysctl_tcp_max_orphans ||
-- (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
-- atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
-+ if (ub_too_many_orphans(sk, orphans)) {
- if (net_ratelimit())
- printk(KERN_INFO "Out of socket memory\n");
-
-@@ -173,9 +173,12 @@ static int tcp_write_timeout(struct sock
- static void tcp_delack_timer(unsigned long data)
- {
- struct sock *sk = (struct sock*)data;
-+ struct ve_struct *env;
- struct tcp_sock *tp = tcp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
-
-+ env = set_exec_env(VE_OWNER_SK(sk));
-+
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later. */
-@@ -224,11 +227,12 @@ static void tcp_delack_timer(unsigned lo
- TCP_CHECK_TIMER(sk);
-
- out:
-- if (tcp_memory_pressure)
-+ if (ub_tcp_memory_pressure(sk))
- sk_stream_mem_reclaim(sk);
- out_unlock:
- bh_unlock_sock(sk);
- sock_put(sk);
-+ (void)set_exec_env(env);
- }
-
- static void tcp_probe_timer(struct sock *sk)
-@@ -283,8 +287,11 @@ static void tcp_probe_timer(struct sock
- static void tcp_retransmit_timer(struct sock *sk)
- {
- struct tcp_sock *tp = tcp_sk(sk);
-+ struct ve_struct *env;
- struct inet_connection_sock *icsk = inet_csk(sk);
-
-+ env = set_exec_env(VE_OWNER_SK(sk));
-+
- if (!tp->packets_out)
- goto out;
-
-@@ -381,15 +388,19 @@ out_reset_timer:
- if (icsk->icsk_retransmits > sysctl_tcp_retries1)
- __sk_dst_reset(sk);
-
--out:;
-+out:
-+ (void)set_exec_env(env);
- }
-
- static void tcp_write_timer(unsigned long data)
- {
- struct sock *sk = (struct sock*)data;
-+ struct ve_struct *env;
- struct inet_connection_sock *icsk = inet_csk(sk);
- int event;
-
-+ env = set_exec_env(VE_OWNER_SK(sk));
-+
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later */
-@@ -423,6 +434,7 @@ out:
- out_unlock:
- bh_unlock_sock(sk);
- sock_put(sk);
-+ (void)set_exec_env(env);
- }
-
- /*
-@@ -450,10 +462,13 @@ void tcp_set_keepalive(struct sock *sk,
- static void tcp_keepalive_timer (unsigned long data)
- {
- struct sock *sk = (struct sock *) data;
-+ struct ve_struct *env;
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- __u32 elapsed;
-
-+ env = set_exec_env(VE_OWNER_SK(sk));
-+
- /* Only process if socket is not in use. */
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
-@@ -525,4 +540,5 @@ death:
- out:
- bh_unlock_sock(sk);
- sock_put(sk);
-+ (void)set_exec_env(env);
- }
-diff -upr linux-2.6.16.orig/net/ipv4/udp.c linux-2.6.16-026test009/net/ipv4/udp.c
---- linux-2.6.16.orig/net/ipv4/udp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv4/udp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -127,7 +127,9 @@ static int udp_v4_get_port(struct sock *
- struct hlist_node *node;
- struct sock *sk2;
- struct inet_sock *inet = inet_sk(sk);
-+ struct ve_struct *env;
-
-+ env = VE_OWNER_SK(sk);
- write_lock_bh(&udp_hash_lock);
- if (snum == 0) {
- int best_size_so_far, best, result, i;
-@@ -141,7 +143,7 @@ static int udp_v4_get_port(struct sock *
- struct hlist_head *list;
- int size;
-
-- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-+ list = &udp_hash[udp_hashfn(result, VEID(env))];
- if (hlist_empty(list)) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0] +
-@@ -163,7 +165,7 @@ static int udp_v4_get_port(struct sock *
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
-- if (!udp_lport_inuse(result))
-+ if (!udp_lport_inuse(result, env))
- break;
- }
- if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-@@ -172,11 +174,12 @@ gotit:
- udp_port_rover = snum = result;
- } else {
- sk_for_each(sk2, node,
-- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-+ &udp_hash[udp_hashfn(snum, VEID(env))]) {
- struct inet_sock *inet2 = inet_sk(sk2);
-
- if (inet2->num == snum &&
- sk2 != sk &&
-+ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
- !ipv6_only_sock(sk2) &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
-@@ -190,7 +193,7 @@ gotit:
- }
- inet->num = snum;
- if (sk_unhashed(sk)) {
-- struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
-+ struct hlist_head *h = &udp_hash[udp_hashfn(snum, VEID(env))];
-
- sk_add_node(sk, h);
- sock_prot_inc_use(sk->sk_prot);
-@@ -228,11 +231,15 @@ static struct sock *udp_v4_lookup_longwa
- struct hlist_node *node;
- unsigned short hnum = ntohs(dport);
- int badness = -1;
-+ struct ve_struct *env;
-
-- sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) {
-+ env = get_exec_env();
-+ sk_for_each(sk, node, &udp_hash[udp_hashfn(hnum, VEID(env))]) {
- struct inet_sock *inet = inet_sk(sk);
-
-- if (inet->num == hnum && !ipv6_only_sock(sk)) {
-+ if (inet->num == hnum &&
-+ ve_accessible_strict(VE_OWNER_SK(sk), env) &&
-+ !ipv6_only_sock(sk)) {
- int score = (sk->sk_family == PF_INET ? 1 : 0);
- if (inet->rcv_saddr) {
- if (inet->rcv_saddr != daddr)
-@@ -1049,7 +1056,8 @@ static int udp_v4_mcast_deliver(struct s
- int dif;
-
- read_lock(&udp_hash_lock);
-- sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
-+ sk = sk_head(&udp_hash[udp_hashfn(ntohs(uh->dest),
-+ VEID(VE_OWNER_SKB(skb)))]);
- dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
- if (sk) {
-@@ -1367,10 +1375,14 @@ static struct sock *udp_get_first(struct
- {
- struct sock *sk;
- struct udp_iter_state *state = seq->private;
-+ struct ve_struct *env;
-
-+ env = get_exec_env();
- for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
- struct hlist_node *node;
- sk_for_each(sk, node, &udp_hash[state->bucket]) {
-+ if (!ve_accessible(VE_OWNER_SK(sk), env))
-+ continue;
- if (sk->sk_family == state->family)
- goto found;
- }
-@@ -1387,8 +1399,13 @@ static struct sock *udp_get_next(struct
- do {
- sk = sk_next(sk);
- try_again:
-- ;
-- } while (sk && sk->sk_family != state->family);
-+ if (!sk)
-+ break;
-+ if (sk->sk_family != state->family)
-+ continue;
-+ if (ve_accessible(VE_OWNER_SK(sk), get_exec_env()))
-+ break;
-+ } while (1);
-
- if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
- sk = sk_head(&udp_hash[state->bucket]);
-@@ -1474,7 +1491,12 @@ int udp_proc_register(struct udp_seq_afi
- afinfo->seq_fops->llseek = seq_lseek;
- afinfo->seq_fops->release = seq_release_private;
-
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-+ if (*(afinfo->name) == 'n')
-+ p = proc_glob_fops_create(afinfo->name, S_IRUGO,
-+ afinfo->seq_fops);
-+ else
-+ p = proc_net_fops_create(afinfo->name, S_IRUGO,
-+ afinfo->seq_fops);
- if (p)
- p->data = afinfo;
- else
-@@ -1486,7 +1508,10 @@ void udp_proc_unregister(struct udp_seq_
- {
- if (!afinfo)
- return;
-- proc_net_remove(afinfo->name);
-+ if (*(afinfo->name) == 'n')
-+ remove_proc_glob_entry(afinfo->name, NULL);
-+ else
-+ proc_net_remove(afinfo->name);
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
- }
-
-@@ -1529,7 +1554,7 @@ static int udp4_seq_show(struct seq_file
- static struct file_operations udp4_seq_fops;
- static struct udp_seq_afinfo udp4_seq_afinfo = {
- .owner = THIS_MODULE,
-- .name = "udp",
-+ .name = "net/udp",
- .family = AF_INET,
- .seq_show = udp4_seq_show,
- .seq_fops = &udp4_seq_fops,
-diff -upr linux-2.6.16.orig/net/ipv6/addrconf.c linux-2.6.16-026test009/net/ipv6/addrconf.c
---- linux-2.6.16.orig/net/ipv6/addrconf.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv6/addrconf.c 2006-04-19 15:02:12.000000000 +0400
-@@ -2153,6 +2153,10 @@ static int addrconf_notify(struct notifi
- struct inet6_dev *idev = __in6_dev_get(dev);
- int run_pending = 0;
-
-+ /* not virtualized yet */
-+ if (!ve_is_super(get_exec_env()))
-+ return NOTIFY_OK;
-+
- switch(event) {
- case NETDEV_UP:
- case NETDEV_CHANGE:
-diff -upr linux-2.6.16.orig/net/ipv6/inet6_hashtables.c linux-2.6.16-026test009/net/ipv6/inet6_hashtables.c
---- linux-2.6.16.orig/net/ipv6/inet6_hashtables.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv6/inet6_hashtables.c 2006-04-19 15:02:12.000000000 +0400
-@@ -33,7 +33,7 @@ struct sock *inet6_lookup_listener(struc
- int score, hiscore = 0;
-
- read_lock(&hashinfo->lhash_lock);
-- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
-+ sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum, 0)]) {
- if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
-
-@@ -173,7 +173,9 @@ int inet6_hash_connect(struct inet_timew
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
-+ struct ve_struct *ve;
-
-+ ve = VE_OWNER_SK(sk);
- if (snum == 0) {
- const int low = sysctl_local_port_range[0];
- const int high = sysctl_local_port_range[1];
-@@ -187,7 +189,8 @@ int inet6_hash_connect(struct inet_timew
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
-- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(port,
-+ hinfo->bhash_size, VEID(ve))];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
-@@ -208,7 +211,7 @@ int inet6_hash_connect(struct inet_timew
- }
-
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-- head, port);
-+ head, port, ve);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
-@@ -243,7 +246,7 @@ ok:
- goto out;
- }
-
-- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size, VEID(ve))];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
-
-diff -upr linux-2.6.16.orig/net/ipv6/route.c linux-2.6.16-026test009/net/ipv6/route.c
---- linux-2.6.16.orig/net/ipv6/route.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv6/route.c 2006-04-19 15:02:12.000000000 +0400
-@@ -113,7 +113,6 @@ struct rt6_info ip6_null_entry = {
- .dst = {
- .__refcnt = ATOMIC_INIT(1),
- .__use = 1,
-- .dev = &loopback_dev,
- .obsolete = -1,
- .error = -ENETUNREACH,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
-@@ -2121,6 +2120,7 @@ void __init ip6_route_init(void)
- #ifdef CONFIG_XFRM
- xfrm6_init();
- #endif
-+ ip6_null_entry.u.dst.dev = &loopback_dev;
- }
-
- void ip6_route_cleanup(void)
-diff -upr linux-2.6.16.orig/net/ipv6/udp.c linux-2.6.16-026test009/net/ipv6/udp.c
---- linux-2.6.16.orig/net/ipv6/udp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/ipv6/udp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -69,7 +69,9 @@ static int udp_v6_get_port(struct sock *
- {
- struct sock *sk2;
- struct hlist_node *node;
-+ struct ve_struct *env;
-
-+ env = VE_OWNER_SK(sk);
- write_lock_bh(&udp_hash_lock);
- if (snum == 0) {
- int best_size_so_far, best, result, i;
-@@ -83,7 +85,7 @@ static int udp_v6_get_port(struct sock *
- int size;
- struct hlist_head *list;
-
-- list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-+ list = &udp_hash[udp_hashfn(result, VEID(env))];
- if (hlist_empty(list)) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0] +
-@@ -105,7 +107,7 @@ static int udp_v6_get_port(struct sock *
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
-- if (!udp_lport_inuse(result))
-+ if (!udp_lport_inuse(result, env))
- break;
- }
- if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-@@ -114,9 +116,10 @@ gotit:
- udp_port_rover = snum = result;
- } else {
- sk_for_each(sk2, node,
-- &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) {
-+ &udp_hash[udp_hashfn(snum, VEID(env))]) {
- if (inet_sk(sk2)->num == snum &&
- sk2 != sk &&
-+ ve_accessible_strict(VE_OWNER_SK(sk2), env) &&
- (!sk2->sk_bound_dev_if ||
- !sk->sk_bound_dev_if ||
- sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-@@ -128,7 +131,7 @@ gotit:
-
- inet_sk(sk)->num = snum;
- if (sk_unhashed(sk)) {
-- sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]);
-+ sk_add_node(sk, &udp_hash[udp_hashfn(snum, VEID(env))]);
- sock_prot_inc_use(sk->sk_prot);
- }
- write_unlock_bh(&udp_hash_lock);
-diff -upr linux-2.6.16.orig/net/netfilter/core.c linux-2.6.16-026test009/net/netfilter/core.c
---- linux-2.6.16.orig/net/netfilter/core.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/core.c 2006-04-19 15:02:12.000000000 +0400
-@@ -32,16 +32,24 @@
- * of skbuffs queued for userspace, and not deregister a hook unless
- * this is zero, but that sucks. Now, we simply check when the
- * packets come back: if the hook is gone, the packet is discarded. */
-+static DEFINE_SPINLOCK(nf_hook_lock);
-+
- struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
- EXPORT_SYMBOL(nf_hooks);
--static DEFINE_SPINLOCK(nf_hook_lock);
-+#ifdef CONFIG_VE_IPTABLES
-+#define ve_nf_hooks \
-+ ((struct list_head (*)[NF_MAX_HOOKS])(get_exec_env()->_nf_hooks))
-+#else
-+#define ve_nf_hooks nf_hooks
-+#endif
-+
-
- int nf_register_hook(struct nf_hook_ops *reg)
- {
- struct list_head *i;
-
- spin_lock_bh(&nf_hook_lock);
-- list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
-+ list_for_each(i, &ve_nf_hooks[reg->pf][reg->hooknum]) {
- if (reg->priority < ((struct nf_hook_ops *)i)->priority)
- break;
- }
-@@ -53,6 +61,33 @@ int nf_register_hook(struct nf_hook_ops
- }
- EXPORT_SYMBOL(nf_register_hook);
-
-+int virt_nf_register_hook(struct nf_hook_ops *reg)
-+{
-+ int ret = 0;
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ struct nf_hook_ops *tmp;
-+ ret = -ENOMEM;
-+ tmp = kmalloc(sizeof(struct nf_hook_ops), GFP_KERNEL);
-+ if (!tmp)
-+ goto nomem;
-+ memcpy(tmp, reg, sizeof(struct nf_hook_ops));
-+ reg = tmp;
-+ }
-+
-+ ret = nf_register_hook(reg);
-+ if (ret)
-+ goto out;
-+
-+ return 0;
-+out:
-+ if (!ve_is_super(get_exec_env()))
-+ kfree(reg);
-+nomem:
-+ return ret;
-+}
-+EXPORT_SYMBOL(virt_nf_register_hook);
-+
- void nf_unregister_hook(struct nf_hook_ops *reg)
- {
- spin_lock_bh(&nf_hook_lock);
-@@ -63,6 +98,29 @@ void nf_unregister_hook(struct nf_hook_o
- }
- EXPORT_SYMBOL(nf_unregister_hook);
-
-+int virt_nf_unregister_hook(struct nf_hook_ops *reg)
-+{
-+ struct nf_hook_ops *i;
-+
-+ spin_lock_bh(&nf_hook_lock);
-+ list_for_each_entry(i, &ve_nf_hooks[reg->pf][reg->hooknum], list) {
-+ if (reg->hook == i->hook) {
-+ reg = i;
-+ break;
-+ }
-+ }
-+ spin_unlock_bh(&nf_hook_lock);
-+ if (reg != i)
-+ return -ENOENT;
-+
-+ nf_unregister_hook(reg);
-+
-+ if (!ve_is_super(get_exec_env()))
-+ kfree(reg);
-+ return 0;
-+}
-+EXPORT_SYMBOL(virt_nf_unregister_hook);
-+
- unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
- int hook,
-@@ -120,9 +178,9 @@ int nf_hook_slow(int pf, unsigned int ho
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
-
-- elem = &nf_hooks[pf][hook];
-+ elem = &ve_nf_hooks[pf][hook];
- next_hook:
-- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
-+ verdict = nf_iterate(&ve_nf_hooks[pf][hook], pskb, hook, indev,
- outdev, &elem, okfn, hook_thresh);
- if (verdict == NF_ACCEPT || verdict == NF_STOP) {
- ret = 1;
-@@ -195,13 +253,54 @@ struct proc_dir_entry *proc_net_netfilte
- EXPORT_SYMBOL(proc_net_netfilter);
- #endif
-
--void __init netfilter_init(void)
-+void init_nf_hooks(struct list_head (*nh)[NF_MAX_HOOKS])
- {
- int i, h;
- for (i = 0; i < NPROTO; i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
-- INIT_LIST_HEAD(&nf_hooks[i][h]);
-+ INIT_LIST_HEAD(&ve_nf_hooks[i][h]);
- }
-+}
-+
-+int init_netfilter(void)
-+{
-+#ifdef CONFIG_VE_IPTABLES
-+ struct ve_struct *envid;
-+
-+ envid = get_exec_env();
-+ envid->_nf_hooks = kmalloc(sizeof(nf_hooks), GFP_KERNEL);
-+ if (envid->_nf_hooks == NULL)
-+ return -ENOMEM;
-+
-+ /* FIXME: charge ubc */
-+
-+ init_nf_hooks(envid->_nf_hooks);
-+ return 0;
-+#else
-+ init_nf_hooks(nf_hooks);
-+ return 0;
-+#endif
-+}
-+EXPORT_SYMBOL(init_netfilter);
-+
-+#ifdef CONFIG_VE_IPTABLES
-+void fini_netfilter(void)
-+{
-+ struct ve_struct *envid;
-+
-+ envid = get_exec_env();
-+ if (envid->_nf_hooks != NULL)
-+ kfree(envid->_nf_hooks);
-+ envid->_nf_hooks = NULL;
-+
-+ /* FIXME: uncharge ubc */
-+}
-+EXPORT_SYMBOL(fini_netfilter);
-+#endif
-+
-+void __init netfilter_init(void)
-+{
-+ init_netfilter();
-
- #ifdef CONFIG_PROC_FS
- proc_net_netfilter = proc_mkdir("netfilter", proc_net);
-@@ -214,3 +313,4 @@ void __init netfilter_init(void)
- if (netfilter_log_init() < 0)
- panic("cannot initialize nf_log");
- }
-+
-diff -upr linux-2.6.16.orig/net/netfilter/nf_conntrack_netlink.c linux-2.6.16-026test009/net/netfilter/nf_conntrack_netlink.c
---- linux-2.6.16.orig/net/netfilter/nf_conntrack_netlink.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/nf_conntrack_netlink.c 2006-04-19 15:02:11.000000000 +0400
-@@ -1641,7 +1641,7 @@ static void __exit ctnetlink_exit(void)
- printk("ctnetlink: unregistering from nfnetlink.\n");
-
- #ifdef CONFIG_NF_CONNTRACK_EVENTS
-- nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
-+ nf_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
- nf_conntrack_unregister_notifier(&ctnl_notifier);
- #endif
-
-diff -upr linux-2.6.16.orig/net/netfilter/nf_queue.c linux-2.6.16-026test009/net/netfilter/nf_queue.c
---- linux-2.6.16.orig/net/netfilter/nf_queue.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/nf_queue.c 2006-04-19 15:02:12.000000000 +0400
-@@ -209,12 +209,12 @@ void nf_reinject(struct sk_buff *skb, st
- /* Drop reference to owner of hook which queued us. */
- module_put(info->elem->owner);
-
-- list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
-+ list_for_each_rcu(i, &ve_nf_hooks[info->pf][info->hook]) {
- if (i == elem)
- break;
- }
-
-- if (i == &nf_hooks[info->pf][info->hook]) {
-+ if (i == &ve_nf_hooks[info->pf][info->hook]) {
- /* The module which sent it to userspace is gone. */
- NFDEBUG("%s: module disappeared, dropping packet.\n",
- __FUNCTION__);
-@@ -235,7 +235,7 @@ void nf_reinject(struct sk_buff *skb, st
-
- if (verdict == NF_ACCEPT) {
- next_hook:
-- verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-+ verdict = nf_iterate(&ve_nf_hooks[info->pf][info->hook],
- &skb, info->hook,
- info->indev, info->outdev, &elem,
- info->okfn, INT_MIN);
-diff -upr linux-2.6.16.orig/net/netfilter/nf_sockopt.c linux-2.6.16-026test009/net/netfilter/nf_sockopt.c
---- linux-2.6.16.orig/net/netfilter/nf_sockopt.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/nf_sockopt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -80,6 +80,12 @@ static int nf_sockopt(struct sock *sk, i
- struct nf_sockopt_ops *ops;
- int ret;
-
-+#ifdef CONFIG_VE_IPTABLES
-+ if (!get_exec_env()->_nf_hooks ||
-+ !get_exec_env()->_ipt_standard_target)
-+ return -ENOPROTOOPT;
-+#endif
-+
- if (down_interruptible(&nf_sockopt_mutex) != 0)
- return -EINTR;
-
-diff -upr linux-2.6.16.orig/net/netfilter/x_tables.c linux-2.6.16-026test009/net/netfilter/x_tables.c
---- linux-2.6.16.orig/net/netfilter/x_tables.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/x_tables.c 2006-04-19 15:02:12.000000000 +0400
-@@ -24,6 +24,10 @@
-
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter_arp.h>
-+#include <linux/nfcalls.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_mem.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-@@ -38,7 +42,13 @@ struct xt_af {
- struct list_head tables;
- };
-
-+#ifdef CONFIG_VE_IPTABLES
-+/* include ve.h and define get_exec_env */
-+#include <linux/sched.h>
-+#define xt (get_exec_env()->_xt)
-+#else
- static struct xt_af *xt;
-+#endif
-
- #ifdef DEBUG_IP_FIREWALL_USER
- #define duprintf(format, args...) printk(format , ## args)
-@@ -52,17 +62,52 @@ enum {
- MATCH,
- };
-
-+#ifdef CONFIG_USER_RESOURCE
-+#define UB_NUMXTENT 23
-+static int charge_xtables(struct user_beancounter *ub, unsigned long size)
-+{
-+ if (ub == NULL)
-+ return 0;
-+ return charge_beancounter(ub, UB_NUMXTENT, size, 1);
-+}
-+static void uncharge_xtables(struct user_beancounter *ub, unsigned long size)
-+{
-+ if (ub == NULL)
-+ return;
-+ uncharge_beancounter(ub, UB_NUMXTENT, size);
-+}
-+#endif /* CONFIG_USER_RESOURCE */
-+
- /* Registration hooks for targets. */
- int
- xt_register_target(int af, struct xt_target *target)
- {
- int ret;
-+ struct module *mod = target->me;
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ struct xt_target *tmp;
-+ __module_get(mod);
-+ ret = -ENOMEM;
-+ tmp = ub_kmalloc(sizeof(struct xt_target), GFP_KERNEL);
-+ if (!tmp)
-+ goto nomem;
-+ memcpy(tmp, target, sizeof(struct xt_target));
-+ target = tmp;
-+ }
-
- ret = down_interruptible(&xt[af].mutex);
- if (ret != 0)
-- return ret;
-+ goto out;
- list_add(&target->list, &xt[af].target);
- up(&xt[af].mutex);
-+ return 0;
-+out:
-+ if (!ve_is_super(get_exec_env())) {
-+ kfree(target);
-+nomem:
-+ module_put(mod);
-+ }
- return ret;
- }
- EXPORT_SYMBOL(xt_register_target);
-@@ -71,8 +116,21 @@ void
- xt_unregister_target(int af, struct xt_target *target)
- {
- down(&xt[af].mutex);
-+ if (!ve_is_super(get_exec_env())) {
-+ target = list_named_find(&xt[af].target, target->name);
-+ if (!target) {
-+ up(&xt[af].mutex);
-+ return;
-+ }
-+ }
-+
- LIST_DELETE(&xt[af].target, target);
- up(&xt[af].mutex);
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ module_put(target->me);
-+ kfree(target);
-+ }
- }
- EXPORT_SYMBOL(xt_unregister_target);
-
-@@ -80,14 +138,33 @@ int
- xt_register_match(int af, struct xt_match *match)
- {
- int ret;
-+ struct module *mod = match->me;
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ struct xt_match *tmp;
-+ __module_get(mod);
-+ ret = -ENOMEM;
-+ tmp = ub_kmalloc(sizeof(struct xt_match), GFP_KERNEL);
-+ if (!tmp)
-+ goto nomem;
-+ memcpy(tmp, match, sizeof(struct xt_match));
-+ match = tmp;
-+ }
-
- ret = down_interruptible(&xt[af].mutex);
- if (ret != 0)
-- return ret;
-+ goto out;
-
- list_add(&match->list, &xt[af].match);
- up(&xt[af].mutex);
-
-+ return 0;
-+out:
-+ if (!ve_is_super(get_exec_env())) {
-+ kfree(match);
-+nomem:
-+ module_put(mod);
-+ }
- return ret;
- }
- EXPORT_SYMBOL(xt_register_match);
-@@ -96,8 +173,21 @@ void
- xt_unregister_match(int af, struct xt_match *match)
- {
- down(&xt[af].mutex);
-+ if (!ve_is_super(get_exec_env())) {
-+ match = list_named_find(&xt[af].match, match->name);
-+ if (!match) {
-+ up(&xt[af].mutex);
-+ return;
-+ }
-+ }
-+
- LIST_DELETE(&xt[af].match, match);
- up(&xt[af].mutex);
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ module_put(match->me);
-+ kfree(match);
-+ }
- }
- EXPORT_SYMBOL(xt_unregister_match);
-
-@@ -246,7 +336,7 @@ struct xt_table_info *xt_alloc_table_inf
- if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
- return NULL;
-
-- newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL);
-+ newinfo = kzalloc(sizeof(struct xt_table_info), GFP_KERNEL_UBC);
- if (!newinfo)
- return NULL;
-
-@@ -255,10 +345,10 @@ struct xt_table_info *xt_alloc_table_inf
- for_each_cpu(cpu) {
- if (size <= PAGE_SIZE)
- newinfo->entries[cpu] = kmalloc_node(size,
-- GFP_KERNEL,
-+ GFP_KERNEL_UBC,
- cpu_to_node(cpu));
- else
-- newinfo->entries[cpu] = vmalloc_node(size,
-+ newinfo->entries[cpu] = ub_vmalloc_node(size,
- cpu_to_node(cpu));
-
- if (newinfo->entries[cpu] == NULL) {
-@@ -315,6 +405,9 @@ xt_replace_table(struct xt_table *table,
- int *error)
- {
- struct xt_table_info *oldinfo, *private;
-+#ifdef CONFIG_USER_RESOURCE
-+ struct user_beancounter *old_ub, *new_ub;
-+#endif
-
- /* Do the substitution. */
- write_lock_bh(&table->lock);
-@@ -328,6 +421,21 @@ xt_replace_table(struct xt_table *table,
- return NULL;
- }
- oldinfo = private;
-+
-+#ifdef CONFIG_USER_RESOURCE
-+ new_ub = mem_ub(newinfo);
-+ if (charge_xtables(new_ub, newinfo->number)) {
-+ oldinfo = NULL;
-+ write_unlock_bh(&table->lock);
-+ *error = -ENOMEM;
-+ return NULL;
-+ }
-+ if (num_counters) {
-+ old_ub = mem_ub(oldinfo);
-+ uncharge_xtables(old_ub, oldinfo->number);
-+ }
-+#endif
-+
- table->private = newinfo;
- newinfo->initial_entries = oldinfo->initial_entries;
- write_unlock_bh(&table->lock);
-@@ -355,6 +463,7 @@ int xt_register_table(struct xt_table *t
-
- /* Simplifies replace_table code. */
- table->private = bootstrap;
-+ rwlock_init(&table->lock);
- if (!xt_replace_table(table, 0, newinfo, &ret))
- goto unlock;
-
-@@ -364,7 +473,6 @@ int xt_register_table(struct xt_table *t
- /* save number of initial entries */
- private->initial_entries = private->number;
-
-- rwlock_init(&table->lock);
- list_prepend(&xt[table->af].tables, table);
-
- ret = 0;
-@@ -374,6 +482,39 @@ int xt_register_table(struct xt_table *t
- }
- EXPORT_SYMBOL_GPL(xt_register_table);
-
-+struct xt_table * virt_xt_register_table(struct xt_table *table,
-+ struct xt_table_info *bootstrap,
-+ struct xt_table_info *newinfo)
-+{
-+ int ret;
-+ struct module *mod = table->me;
-+
-+ if (!ve_is_super(get_exec_env())) {
-+ struct xt_table *tmp;
-+ __module_get(mod);
-+ ret = -ENOMEM;
-+ tmp = ub_kmalloc(sizeof(struct xt_table), GFP_KERNEL);
-+ if (!tmp)
-+ goto nomem;
-+ memcpy(tmp, table, sizeof(struct xt_table));
-+ table = tmp;
-+ }
-+
-+ ret = xt_register_table(table, bootstrap, newinfo);
-+ if (ret)
-+ goto out;
-+
-+ return table;
-+out:
-+ if (!ve_is_super(get_exec_env())) {
-+ kfree(table);
-+nomem:
-+ module_put(mod);
-+ }
-+ return ERR_PTR(ret);
-+}
-+EXPORT_SYMBOL_GPL(virt_xt_register_table);
-+
- void *xt_unregister_table(struct xt_table *table)
- {
- struct xt_table_info *private;
-@@ -383,10 +524,27 @@ void *xt_unregister_table(struct xt_tabl
- LIST_DELETE(&xt[table->af].tables, table);
- up(&xt[table->af].mutex);
-
-+#ifdef CONFIG_USER_RESOURCE
-+ uncharge_xtables(mem_ub(private), private->number);
-+#endif
-+
- return private;
- }
- EXPORT_SYMBOL_GPL(xt_unregister_table);
-
-+void *virt_xt_unregister_table(struct xt_table *table)
-+{
-+ void *ret;
-+
-+ ret = xt_unregister_table(table);
-+ if (!ve_is_super(get_exec_env())) {
-+ module_put(table->me);
-+ kfree(table);
-+ }
-+ return ret;
-+}
-+EXPORT_SYMBOL_GPL(virt_xt_unregister_table);
-+
- #ifdef CONFIG_PROC_FS
- static char *xt_proto_prefix[NPROTO] = {
- [AF_INET] = "ip",
-@@ -597,10 +755,13 @@ void xt_proto_fini(int af)
- EXPORT_SYMBOL_GPL(xt_proto_fini);
-
-
--static int __init xt_init(void)
-+int init_xtables(void)
- {
- int i;
-
-+ if (xt)
-+ return -EEXIST;
-+
- xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL);
- if (!xt)
- return -ENOMEM;
-@@ -614,11 +775,34 @@ static int __init xt_init(void)
- return 0;
- }
-
--static void __exit xt_fini(void)
-+void fini_xtables(void)
- {
- kfree(xt);
-+ xt = NULL;
-+}
-+
-+static int __init xt_init(void)
-+{
-+ int err;
-+
-+ err = init_xtables();
-+ if (err)
-+ return err;
-+
-+ KSYMRESOLVE(init_xtables);
-+ KSYMRESOLVE(fini_xtables);
-+ KSYMMODRESOLVE(x_tables);
-+ return 0;
-+}
-+
-+static void __exit xt_fini(void)
-+{
-+ KSYMMODUNRESOLVE(x_tables);
-+ KSYMUNRESOLVE(init_xtables);
-+ KSYMUNRESOLVE(fini_xtables);
-+ fini_xtables();
- }
-
--module_init(xt_init);
-+subsys_initcall(xt_init);
- module_exit(xt_fini);
-
-diff -upr linux-2.6.16.orig/net/netfilter/xt_conntrack.c linux-2.6.16-026test009/net/netfilter/xt_conntrack.c
---- linux-2.6.16.orig/net/netfilter/xt_conntrack.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_conntrack.c 2006-04-19 15:02:12.000000000 +0400
-@@ -20,6 +20,8 @@
-
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter/xt_conntrack.h>
-+#include <linux/netfilter_ipv4/ip_tables.h>
-+#include <linux/nfcalls.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-@@ -213,25 +215,145 @@ static int check(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat_to_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct ipt_entry_match *pm;
-+ struct xt_conntrack_info *pinfo;
-+ struct compat_xt_conntrack_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct ipt_entry_match *)match;
-+ msize = pm->u.user.match_size;
-+ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
-+ return -EFAULT;
-+ pinfo = (struct xt_conntrack_info *)pm->data;
-+ memset(&info, 0, sizeof(struct compat_xt_conntrack_info));
-+ info.statemask = pinfo->statemask;
-+ info.statusmask = pinfo->statusmask;
-+ memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
-+ sizeof(struct ip_conntrack_tuple));
-+ memcpy(info.sipmsk, pinfo->sipmsk,
-+ IP_CT_DIR_MAX * sizeof(struct in_addr));
-+ memcpy(info.dipmsk, pinfo->dipmsk,
-+ IP_CT_DIR_MAX * sizeof(struct in_addr));
-+ info.expires_min = pinfo->expires_min;
-+ info.expires_max = pinfo->expires_max;
-+ info.flags = pinfo->flags;
-+ info.invflags = pinfo->invflags;
-+ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
-+ &info, sizeof(struct compat_xt_conntrack_info)))
-+ return -EFAULT;
-+ msize -= off;
-+ if (put_user(msize, (u_int16_t *)*dstptr))
-+ return -EFAULT;
-+ *size -= off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat_from_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct compat_ipt_entry_match *pm;
-+ struct ipt_entry_match *dstpm;
-+ struct compat_xt_conntrack_info *pinfo;
-+ struct xt_conntrack_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct compat_ipt_entry_match *)match;
-+ dstpm = (struct ipt_entry_match *)*dstptr;
-+ msize = pm->u.user.match_size;
-+ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
-+ pinfo = (struct compat_xt_conntrack_info *)pm->data;
-+ memset(&info, 0, sizeof(struct xt_conntrack_info));
-+ info.statemask = pinfo->statemask;
-+ info.statusmask = pinfo->statusmask;
-+ memcpy(info.tuple, pinfo->tuple, IP_CT_DIR_MAX *
-+ sizeof(struct ip_conntrack_tuple));
-+ memcpy(info.sipmsk, pinfo->sipmsk,
-+ IP_CT_DIR_MAX * sizeof(struct in_addr));
-+ memcpy(info.dipmsk, pinfo->dipmsk,
-+ IP_CT_DIR_MAX * sizeof(struct in_addr));
-+ info.expires_min = pinfo->expires_min;
-+ info.expires_max = pinfo->expires_max;
-+ info.flags = pinfo->flags;
-+ info.invflags = pinfo->invflags;
-+ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
-+ &info, sizeof(struct xt_conntrack_info));
-+ msize += off;
-+ dstpm->u.user.match_size = msize;
-+ *size += off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat(void *match, void **dstptr, int *size, int convert)
-+{
-+ int ret, off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_conntrack_info)) -
-+ COMPAT_XT_ALIGN(sizeof(struct compat_xt_conntrack_info));
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ ret = compat_to_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_FROM_USER:
-+ ret = compat_from_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ ret = 0;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+#endif
-+
- static struct xt_match conntrack_match = {
- .name = "conntrack",
- .match = &match,
- .checkentry = &check,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-+int init_xt_conntrack_match(void)
-+{
-+ return xt_register_match(AF_INET, &conntrack_match);
-+}
-+
-+void fini_xt_conntrack_match(void)
-+{
-+ xt_unregister_match(AF_INET, &conntrack_match);
-+}
-+
- static int __init init(void)
- {
- int ret;
- need_conntrack();
-- ret = xt_register_match(AF_INET, &conntrack_match);
--
-+ ret = init_xt_conntrack_match();
-+ if (ret < 0)
-+ return ret;
-+
-+ KSYMRESOLVE(init_xt_conntrack_match);
-+ KSYMRESOLVE(fini_xt_conntrack_match);
-+ KSYMMODRESOLVE(xt_conntrack);
- return ret;
- }
-
- static void __exit fini(void)
- {
-- xt_unregister_match(AF_INET, &conntrack_match);
-+ KSYMMODUNRESOLVE(xt_conntrack);
-+ KSYMUNRESOLVE(init_xt_conntrack_match);
-+ KSYMUNRESOLVE(fini_xt_conntrack_match);
-+ fini_xt_conntrack_match();
- }
-
- module_init(init);
-diff -upr linux-2.6.16.orig/net/netfilter/xt_helper.c linux-2.6.16-026test009/net/netfilter/xt_helper.c
---- linux-2.6.16.orig/net/netfilter/xt_helper.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_helper.c 2006-04-19 15:02:12.000000000 +0400
-@@ -24,6 +24,8 @@
- #endif
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter/xt_helper.h>
-+#include <linux/netfilter_ipv4/ip_tables.h>
-+#include <linux/nfcalls.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
-@@ -148,23 +150,107 @@ static int check(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat_to_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct ipt_entry_match *pm;
-+ struct xt_helper_info *pinfo;
-+ struct compat_xt_helper_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct ipt_entry_match *)match;
-+ msize = pm->u.user.match_size;
-+ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
-+ return -EFAULT;
-+ pinfo = (struct xt_helper_info *)pm->data;
-+ memset(&info, 0, sizeof(struct compat_xt_helper_info));
-+ info.invert = pinfo->invert;
-+ memcpy(info.name, pinfo->name, 30);
-+ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
-+ &info, sizeof(struct compat_xt_helper_info)))
-+ return -EFAULT;
-+ msize -= off;
-+ if (put_user(msize, (u_int16_t *)*dstptr))
-+ return -EFAULT;
-+ *size -= off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat_from_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct compat_ipt_entry_match *pm;
-+ struct ipt_entry_match *dstpm;
-+ struct compat_xt_helper_info *pinfo;
-+ struct xt_helper_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct compat_ipt_entry_match *)match;
-+ dstpm = (struct ipt_entry_match *)*dstptr;
-+ msize = pm->u.user.match_size;
-+ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
-+ pinfo = (struct compat_xt_helper_info *)pm->data;
-+ memset(&info, 0, sizeof(struct xt_helper_info));
-+ info.invert = pinfo->invert;
-+ memcpy(info.name, pinfo->name, 30);
-+ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
-+ &info, sizeof(struct xt_helper_info));
-+ msize += off;
-+ dstpm->u.user.match_size = msize;
-+ *size += off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat(void *match, void **dstptr, int *size, int convert)
-+{
-+ int ret, off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_helper_info)) -
-+ COMPAT_XT_ALIGN(sizeof(struct compat_xt_helper_info));
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ ret = compat_to_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_FROM_USER:
-+ ret = compat_from_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ ret = 0;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+#endif
-+
- static struct xt_match helper_match = {
- .name = "helper",
- .match = &match,
- .checkentry = &check,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
- static struct xt_match helper6_match = {
- .name = "helper",
- .match = &match,
- .checkentry = &check,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_xt_helper(void)
- {
- int ret;
-- need_conntrack();
-
- ret = xt_register_match(AF_INET, &helper_match);
- if (ret < 0)
-@@ -177,12 +263,35 @@ static int __init init(void)
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_helper(void)
- {
- xt_unregister_match(AF_INET, &helper_match);
- xt_unregister_match(AF_INET6, &helper6_match);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ need_conntrack();
-+ err = init_xt_helper();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_helper);
-+ KSYMRESOLVE(fini_xt_helper);
-+ KSYMMODRESOLVE(xt_helper);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_helper);
-+ KSYMUNRESOLVE(init_xt_helper);
-+ KSYMUNRESOLVE(fini_xt_helper);
-+ fini_xt_helper();
-+}
-+
- module_init(init);
- module_exit(fini);
-
-diff -upr linux-2.6.16.orig/net/netfilter/xt_length.c linux-2.6.16-026test009/net/netfilter/xt_length.c
---- linux-2.6.16.orig/net/netfilter/xt_length.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_length.c 2006-04-19 15:02:12.000000000 +0400
-@@ -13,6 +13,7 @@
-
- #include <linux/netfilter/xt_length.h>
- #include <linux/netfilter/x_tables.h>
-+#include <linux/nfcalls.h>
-
- MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
- MODULE_DESCRIPTION("IP tables packet length matching module");
-@@ -63,20 +64,38 @@ checkentry(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_length_info)) -
-+ COMPAT_XT_ALIGN(sizeof(struct xt_length_info));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct xt_match length_match = {
- .name = "length",
- .match = &match,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
- static struct xt_match length6_match = {
- .name = "length",
- .match = &match6,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_xt_length(void)
- {
- int ret;
- ret = xt_register_match(AF_INET, &length_match);
-@@ -89,11 +108,33 @@ static int __init init(void)
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_length(void)
- {
- xt_unregister_match(AF_INET, &length_match);
- xt_unregister_match(AF_INET6, &length6_match);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_xt_length();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_length);
-+ KSYMRESOLVE(fini_xt_length);
-+ KSYMMODRESOLVE(xt_length);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_length);
-+ KSYMUNRESOLVE(init_xt_length);
-+ KSYMUNRESOLVE(fini_xt_length);
-+ fini_xt_length();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/netfilter/xt_limit.c linux-2.6.16-026test009/net/netfilter/xt_limit.c
---- linux-2.6.16.orig/net/netfilter/xt_limit.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_limit.c 2006-04-19 15:02:12.000000000 +0400
-@@ -17,9 +17,11 @@
- #include <linux/skbuff.h>
- #include <linux/spinlock.h>
- #include <linux/interrupt.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter/xt_limit.h>
-+#include <linux/netfilter_ipv4/ip_tables.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
-@@ -27,6 +29,13 @@ MODULE_DESCRIPTION("iptables rate limit
- MODULE_ALIAS("ipt_limit");
- MODULE_ALIAS("ip6t_limit");
-
-+#ifdef CONFIG_VE_IPTABLES
-+#include <linux/sched.h>
-+#define ve_ipt_limit_reg (*(get_exec_env()->_ipt_limit_reg))
-+#else
-+#define ve_ipt_limit_reg ipt_limit_reg
-+#endif
-+
- /* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-@@ -137,20 +146,108 @@ ipt_limit_checkentry(const char *tablena
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int ipt_limit_compat_to_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct ipt_entry_match *pm;
-+ struct xt_rateinfo *pinfo;
-+ struct compat_xt_rateinfo rinfo;
-+ u_int16_t msize;
-+
-+ pm = (struct ipt_entry_match *)match;
-+ msize = pm->u.user.match_size;
-+ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
-+ return -EFAULT;
-+ pinfo = (struct xt_rateinfo *)pm->data;
-+ memset(&rinfo, 0, sizeof(struct compat_xt_rateinfo));
-+ rinfo.avg = pinfo->avg;
-+ rinfo.burst = pinfo->burst;
-+ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
-+ &rinfo, sizeof(struct compat_xt_rateinfo)))
-+ return -EFAULT;
-+ msize -= off;
-+ if (put_user(msize, (u_int16_t *)*dstptr))
-+ return -EFAULT;
-+ *size -= off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int ipt_limit_compat_from_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct compat_ipt_entry_match *pm;
-+ struct ipt_entry_match *dstpm;
-+ struct compat_xt_rateinfo *pinfo;
-+ struct xt_rateinfo rinfo;
-+ u_int16_t msize;
-+
-+ pm = (struct compat_ipt_entry_match *)match;
-+ dstpm = (struct ipt_entry_match *)*dstptr;
-+ msize = pm->u.user.match_size;
-+ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
-+ pinfo = (struct compat_xt_rateinfo *)pm->data;
-+ memset(&rinfo, 0, sizeof(struct xt_rateinfo));
-+ rinfo.avg = pinfo->avg;
-+ rinfo.burst = pinfo->burst;
-+ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
-+ &rinfo, sizeof(struct xt_rateinfo));
-+ msize += off;
-+ dstpm->u.user.match_size = msize;
-+ *size += off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int ipt_limit_compat(void *match, void **dstptr,
-+ int *size, int convert)
-+{
-+ int ret, off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_rateinfo)) -
-+ COMPAT_XT_ALIGN(sizeof(struct compat_xt_rateinfo));
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ ret = ipt_limit_compat_to_user(match,
-+ dstptr, size, off);
-+ break;
-+ case COMPAT_FROM_USER:
-+ ret = ipt_limit_compat_from_user(match,
-+ dstptr, size, off);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ ret = 0;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+#endif
-+
- static struct xt_match ipt_limit_reg = {
- .name = "limit",
- .match = ipt_limit_match,
- .checkentry = ipt_limit_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = ipt_limit_compat,
-+#endif
- .me = THIS_MODULE,
- };
- static struct xt_match limit6_reg = {
- .name = "limit",
- .match = ipt_limit_match,
- .checkentry = ipt_limit_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = ipt_limit_compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_xt_limit(void)
- {
- int ret;
-
-@@ -165,11 +262,33 @@ static int __init init(void)
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_limit(void)
- {
- xt_unregister_match(AF_INET, &ipt_limit_reg);
- xt_unregister_match(AF_INET6, &limit6_reg);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_xt_limit();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_limit);
-+ KSYMRESOLVE(fini_xt_limit);
-+ KSYMMODRESOLVE(xt_limit);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_limit);
-+ KSYMUNRESOLVE(init_xt_limit);
-+ KSYMUNRESOLVE(fini_xt_limit);
-+ fini_xt_limit();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/netfilter/xt_state.c linux-2.6.16-026test009/net/netfilter/xt_state.c
---- linux-2.6.16.orig/net/netfilter/xt_state.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_state.c 2006-04-19 15:02:12.000000000 +0400
-@@ -10,9 +10,11 @@
-
- #include <linux/module.h>
- #include <linux/skbuff.h>
-+#include <linux/nfcalls.h>
- #include <net/netfilter/nf_conntrack_compat.h>
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter/xt_state.h>
-+#include <linux/netfilter_ipv4/ip_tables.h>
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-@@ -55,10 +57,90 @@ static int check(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat_to_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct ipt_entry_match *pm;
-+ struct xt_state_info *pinfo;
-+ struct compat_xt_state_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct ipt_entry_match *)match;
-+ msize = pm->u.user.match_size;
-+ if (__copy_to_user(*dstptr, pm, sizeof(struct ipt_entry_match)))
-+ return -EFAULT;
-+ pinfo = (struct xt_state_info *)pm->data;
-+ memset(&info, 0, sizeof(struct compat_xt_state_info));
-+ info.statemask = pinfo->statemask;
-+ if (__copy_to_user(*dstptr + sizeof(struct ipt_entry_match),
-+ &info, sizeof(struct compat_xt_state_info)))
-+ return -EFAULT;
-+ msize -= off;
-+ if (put_user(msize, (u_int16_t *)*dstptr))
-+ return -EFAULT;
-+ *size -= off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat_from_user(void *match, void **dstptr,
-+ int *size, int off)
-+{
-+ struct compat_ipt_entry_match *pm;
-+ struct ipt_entry_match *dstpm;
-+ struct compat_xt_state_info *pinfo;
-+ struct xt_state_info info;
-+ u_int16_t msize;
-+
-+ pm = (struct compat_ipt_entry_match *)match;
-+ dstpm = (struct ipt_entry_match *)*dstptr;
-+ msize = pm->u.user.match_size;
-+ memcpy(*dstptr, pm, sizeof(struct compat_ipt_entry_match));
-+ pinfo = (struct compat_xt_state_info *)pm->data;
-+ memset(&info, 0, sizeof(struct xt_state_info));
-+ info.statemask = pinfo->statemask;
-+ memcpy(*dstptr + sizeof(struct compat_ipt_entry_match),
-+ &info, sizeof(struct xt_state_info));
-+ msize += off;
-+ dstpm->u.user.match_size = msize;
-+ *size += off;
-+ *dstptr += msize;
-+ return 0;
-+}
-+
-+static int compat(void *match, void **dstptr, int *size, int convert)
-+{
-+ int ret, off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_state_info)) -
-+ COMPAT_XT_ALIGN(sizeof(struct compat_xt_state_info));
-+ switch (convert) {
-+ case COMPAT_TO_USER:
-+ ret = compat_to_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_FROM_USER:
-+ ret = compat_from_user(match, dstptr, size, off);
-+ break;
-+ case COMPAT_CALC_SIZE:
-+ *size += off;
-+ ret = 0;
-+ break;
-+ default:
-+ ret = -ENOPROTOOPT;
-+ break;
-+ }
-+ return ret;
-+}
-+#endif
-+
- static struct xt_match state_match = {
- .name = "state",
- .match = &match,
- .checkentry = &check,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-@@ -66,15 +148,16 @@ static struct xt_match state6_match = {
- .name = "state",
- .match = &match,
- .checkentry = &check,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_xt_state(void)
- {
- int ret;
-
-- need_conntrack();
--
- ret = xt_register_match(AF_INET, &state_match);
- if (ret < 0)
- return ret;
-@@ -86,11 +169,34 @@ static int __init init(void)
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_state(void)
- {
- xt_unregister_match(AF_INET, &state_match);
- xt_unregister_match(AF_INET6, &state6_match);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ need_conntrack();
-+ err = init_xt_state();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_state);
-+ KSYMRESOLVE(fini_xt_state);
-+ KSYMMODRESOLVE(xt_state);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_state);
-+ KSYMUNRESOLVE(init_xt_state);
-+ KSYMUNRESOLVE(fini_xt_state);
-+ fini_xt_state();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/netfilter/xt_tcpmss.c linux-2.6.16-026test009/net/netfilter/xt_tcpmss.c
---- linux-2.6.16.orig/net/netfilter/xt_tcpmss.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_tcpmss.c 2006-04-19 15:02:12.000000000 +0400
-@@ -11,6 +11,7 @@
- #include <linux/module.h>
- #include <linux/skbuff.h>
- #include <net/tcp.h>
-+#include <linux/nfcalls.h>
-
- #include <linux/netfilter/xt_tcpmss.h>
- #include <linux/netfilter/x_tables.h>
-@@ -133,10 +134,25 @@ checkentry6(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_tcpmss_match_info)) -
-+ COMPAT_XT_ALIGN(sizeof(struct xt_tcpmss_match_info));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct xt_match tcpmss_match = {
- .name = "tcpmss",
- .match = &match,
- .checkentry = &checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-@@ -144,11 +160,14 @@ static struct xt_match tcpmss6_match = {
- .name = "tcpmss",
- .match = &match,
- .checkentry = &checkentry6,
-+#ifdef CONFIG_COMPAT
-+ .compat = &compat,
-+#endif
- .me = THIS_MODULE,
- };
-
-
--static int __init init(void)
-+int init_xt_tcpmss(void)
- {
- int ret;
- ret = xt_register_match(AF_INET, &tcpmss_match);
-@@ -162,11 +181,33 @@ static int __init init(void)
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_tcpmss(void)
- {
- xt_unregister_match(AF_INET6, &tcpmss6_match);
- xt_unregister_match(AF_INET, &tcpmss_match);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_xt_tcpmss();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_tcpmss);
-+ KSYMRESOLVE(fini_xt_tcpmss);
-+ KSYMMODRESOLVE(xt_tcpmss);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_tcpmss);
-+ KSYMUNRESOLVE(init_xt_tcpmss);
-+ KSYMUNRESOLVE(fini_xt_tcpmss);
-+ fini_xt_tcpmss();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/netfilter/xt_tcpudp.c linux-2.6.16-026test009/net/netfilter/xt_tcpudp.c
---- linux-2.6.16.orig/net/netfilter/xt_tcpudp.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netfilter/xt_tcpudp.c 2006-04-19 15:02:12.000000000 +0400
-@@ -5,6 +5,7 @@
- #include <net/ipv6.h>
- #include <net/tcp.h>
- #include <net/udp.h>
-+#include <linux/nfcalls.h>
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter/xt_tcpudp.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-@@ -266,10 +267,35 @@ udp6_checkentry(const char *tablename,
- return 1;
- }
-
-+#ifdef CONFIG_COMPAT
-+static int tcp_compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_tcp)) -
-+ COMPAT_XT_ALIGN(sizeof(struct xt_tcp));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+
-+static int udp_compat(void *match,
-+ void **dstptr, int *size, int convert)
-+{
-+ int off;
-+
-+ off = XT_ALIGN(sizeof(struct xt_udp)) -
-+ COMPAT_XT_ALIGN(sizeof(struct xt_udp));
-+ return ipt_match_align_compat(match, dstptr, size, off, convert);
-+}
-+#endif
-+
- static struct xt_match tcp_matchstruct = {
- .name = "tcp",
- .match = &tcp_match,
- .checkentry = &tcp_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &tcp_compat,
-+#endif
- .me = THIS_MODULE,
- };
- static struct xt_match tcp6_matchstruct = {
-@@ -283,6 +309,9 @@ static struct xt_match udp_matchstruct =
- .name = "udp",
- .match = &udp_match,
- .checkentry = &udp_checkentry,
-+#ifdef CONFIG_COMPAT
-+ .compat = &udp_compat,
-+#endif
- .me = THIS_MODULE,
- };
- static struct xt_match udp6_matchstruct = {
-@@ -292,7 +321,7 @@ static struct xt_match udp6_matchstruct
- .me = THIS_MODULE,
- };
-
--static int __init init(void)
-+int init_xt_tcpudp(void)
- {
- int ret;
- ret = xt_register_match(AF_INET, &tcp_matchstruct);
-@@ -322,7 +351,7 @@ out_unreg_tcp:
- return ret;
- }
-
--static void __exit fini(void)
-+void fini_xt_tcpudp(void)
- {
- xt_unregister_match(AF_INET6, &udp6_matchstruct);
- xt_unregister_match(AF_INET, &udp_matchstruct);
-@@ -330,5 +359,27 @@ static void __exit fini(void)
- xt_unregister_match(AF_INET, &tcp_matchstruct);
- }
-
-+static int __init init(void)
-+{
-+ int err;
-+
-+ err = init_xt_tcpudp();
-+ if (err < 0)
-+ return err;
-+
-+ KSYMRESOLVE(init_xt_tcpudp);
-+ KSYMRESOLVE(fini_xt_tcpudp);
-+ KSYMMODRESOLVE(xt_tcpudp);
-+ return 0;
-+}
-+
-+static void __exit fini(void)
-+{
-+ KSYMMODUNRESOLVE(xt_tcpudp);
-+ KSYMUNRESOLVE(init_xt_tcpudp);
-+ KSYMUNRESOLVE(fini_xt_tcpudp);
-+ fini_xt_tcpudp();
-+}
-+
- module_init(init);
- module_exit(fini);
-diff -upr linux-2.6.16.orig/net/netlink/af_netlink.c linux-2.6.16-026test009/net/netlink/af_netlink.c
---- linux-2.6.16.orig/net/netlink/af_netlink.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/netlink/af_netlink.c 2006-04-19 15:02:13.000000000 +0400
-@@ -60,27 +60,14 @@
- #include <net/sock.h>
- #include <net/scm.h>
- #include <net/netlink.h>
-+#include <net/netlink_sock.h>
-+
-+#include <ub/beancounter.h>
-+#include <ub/ub_net.h>
-
- #define Nprintk(a...)
- #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
-
--struct netlink_sock {
-- /* struct sock has to be the first member of netlink_sock */
-- struct sock sk;
-- u32 pid;
-- u32 dst_pid;
-- u32 dst_group;
-- u32 flags;
-- u32 subscriptions;
-- u32 ngroups;
-- unsigned long *groups;
-- unsigned long state;
-- wait_queue_head_t wait;
-- struct netlink_callback *cb;
-- spinlock_t cb_lock;
-- void (*data_ready)(struct sock *sk, int bytes);
-- struct module *module;
--};
-
- #define NETLINK_KERNEL_SOCKET 0x1
- #define NETLINK_RECV_PKTINFO 0x2
-@@ -209,7 +196,10 @@ static __inline__ struct sock *netlink_l
- read_lock(&nl_table_lock);
- head = nl_pid_hashfn(hash, pid);
- sk_for_each(sk, node, head) {
-- if (nlk_sk(sk)->pid == pid) {
-+ /* VEs should find sockets, created by kernel */
-+ if ((nlk_sk(sk)->pid == pid) &&
-+ (!pid || ve_accessible_strict(VE_OWNER_SK(sk),
-+ get_exec_env()))){
- sock_hold(sk);
- goto found;
- }
-@@ -309,7 +299,9 @@ static int netlink_insert(struct sock *s
- head = nl_pid_hashfn(hash, pid);
- len = 0;
- sk_for_each(osk, node, head) {
-- if (nlk_sk(osk)->pid == pid)
-+ if ((nlk_sk(sk)->pid == pid) &&
-+ ve_accessible_strict(VE_OWNER_SK(sk),
-+ get_exec_env()))
- break;
- len++;
- }
-@@ -362,6 +354,8 @@ static int __netlink_create(struct socke
- sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
- if (!sk)
- return -ENOMEM;
-+ if (ub_other_sock_charge(sk))
-+ goto out_free;
-
- sock_init_data(sock, sk);
-
-@@ -372,6 +366,10 @@ static int __netlink_create(struct socke
- sk->sk_destruct = netlink_sock_destruct;
- sk->sk_protocol = protocol;
- return 0;
-+
-+out_free:
-+ sk_free(sk);
-+ return -ENOMEM;
- }
-
- static int netlink_create(struct socket *sock, int protocol)
-@@ -477,7 +475,7 @@ static int netlink_autobind(struct socke
- struct hlist_head *head;
- struct sock *osk;
- struct hlist_node *node;
-- s32 pid = current->tgid;
-+ s32 pid = virt_pid(current);
- int err;
- static s32 rover = -4097;
-
-@@ -486,7 +484,9 @@ retry:
- netlink_table_grab();
- head = nl_pid_hashfn(hash, pid);
- sk_for_each(osk, node, head) {
-- if (nlk_sk(osk)->pid == pid) {
-+ if ((nlk_sk(osk)->pid == pid) &&
-+ ve_accessible_strict(VE_OWNER_SK(osk),
-+ get_exec_env())) {
- /* Bind collision, search negative pid values. */
- pid = rover--;
- if (rover > -4097)
-@@ -511,7 +511,7 @@ retry:
- static inline int netlink_capable(struct socket *sock, unsigned int flag)
- {
- return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
-- capable(CAP_NET_ADMIN);
-+ capable(CAP_VE_NET_ADMIN);
- }
-
- static void
-@@ -845,6 +845,9 @@ static inline int do_one_broadcast(struc
- !test_bit(p->group - 1, nlk->groups))
- goto out;
-
-+ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
-+ goto out;
-+
- if (p->failure) {
- netlink_overrun(sk);
- goto out;
-@@ -942,6 +945,9 @@ static inline int do_one_set_err(struct
- !test_bit(p->group - 1, nlk->groups))
- goto out;
-
-+ if (!ve_accessible_strict(get_exec_env(), VE_OWNER_SK(sk)))
-+ goto out;
-+
- sk->sk_err = p->code;
- sk->sk_error_report(sk);
- out:
-@@ -1076,12 +1082,17 @@ static int netlink_sendmsg(struct kiocb
- struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
- struct sock *sk = sock->sk;
- struct netlink_sock *nlk = nlk_sk(sk);
-- struct sockaddr_nl *addr=msg->msg_name;
-+ struct sockaddr_nl *addr = msg->msg_name;
- u32 dst_pid;
-- u32 dst_group;
- struct sk_buff *skb;
- int err;
- struct scm_cookie scm;
-+ struct sock *dstsk;
-+ long timeo;
-+ int no_ubc, no_buf;
-+ unsigned long chargesize;
-+
-+ DECLARE_WAITQUEUE(wait, current);
-
- if (msg->msg_flags&MSG_OOB)
- return -EOPNOTSUPP;
-@@ -1092,17 +1103,16 @@ static int netlink_sendmsg(struct kiocb
- if (err < 0)
- return err;
-
-+ /* Broadcasts from user to kernel are disabled. This is OK
-+ * according to ANK */
- if (msg->msg_namelen) {
- if (addr->nl_family != AF_NETLINK)
- return -EINVAL;
- dst_pid = addr->nl_pid;
-- dst_group = ffs(addr->nl_groups);
-- if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
-+ if (addr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
- return -EPERM;
-- } else {
-+ } else
- dst_pid = nlk->dst_pid;
-- dst_group = nlk->dst_group;
-- }
-
- if (!nlk->pid) {
- err = netlink_autobind(sock);
-@@ -1115,12 +1125,12 @@ static int netlink_sendmsg(struct kiocb
- goto out;
- err = -ENOBUFS;
- skb = alloc_skb(len, GFP_KERNEL);
-- if (skb==NULL)
-+ if (skb == NULL)
- goto out;
-
- NETLINK_CB(skb).pid = nlk->pid;
- NETLINK_CB(skb).dst_pid = dst_pid;
-- NETLINK_CB(skb).dst_group = dst_group;
-+ NETLINK_CB(skb).dst_group = 0;
- NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
- memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-
-@@ -1131,25 +1141,88 @@ static int netlink_sendmsg(struct kiocb
- */
-
- err = -EFAULT;
-- if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
-- kfree_skb(skb);
-- goto out;
-- }
-+ if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
-+ goto out_free;
-
- err = security_netlink_send(sk, skb);
-- if (err) {
-- kfree_skb(skb);
-- goto out;
-+ if (err)
-+ goto out_free;
-+
-+ timeo = sock_sndtimeo(sk, msg->msg_flags&MSG_DONTWAIT);
-+retry:
-+ dstsk = netlink_getsockbypid(sk, dst_pid);
-+ if (IS_ERR(dstsk)) {
-+ err = PTR_ERR(dstsk);
-+ goto out_free;
- }
-
-- if (dst_group) {
-- atomic_inc(&skb->users);
-- netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
-+ nlk = nlk_sk(dstsk);
-+#ifdef NL_EMULATE_DEV
-+ if (nlk->handler) {
-+ skb_orphan(skb);
-+ err = nlk->handler(protocol, skb);
-+ goto out_put;
- }
-- err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
-+#endif
-+
-+ /* BTW, it could be done once, before the retry loop */
-+ chargesize = skb_charge_fullsize(skb);
-+ no_ubc = ub_sock_getwres_other(sk, chargesize);
-+ no_buf = atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
-+ test_bit(0, &nlk->state);
-+ if (no_ubc || no_buf) {
-+ wait_queue_head_t *sleep;
-+
-+ if (!no_ubc)
-+ ub_sock_retwres_other(sk, chargesize,
-+ SOCK_MIN_UBCSPACE_CH);
-+ err = -EAGAIN;
-+ if (timeo == 0) {
-+ kfree_skb(skb);
-+ goto out_put;
-+ }
-+
-+ /* wake up comes to different queues */
-+ sleep = no_ubc ? sk->sk_sleep : &nlk->wait;
-+ __set_current_state(TASK_INTERRUPTIBLE);
-+ add_wait_queue(sleep, &wait);
-
-+ /* this if can't be moved upper because ub_sock_snd_queue_add()
-+ * may change task state to TASK_RUNNING */
-+ if (no_ubc)
-+ ub_sock_sndqueueadd_other(sk, chargesize);
-+
-+ if ((atomic_read(&dstsk->sk_rmem_alloc) > dstsk->sk_rcvbuf ||
-+ test_bit(0, &nlk->state) || no_ubc) &&
-+ !sock_flag(dstsk, SOCK_DEAD))
-+ timeo = schedule_timeout(timeo);
-+
-+ __set_current_state(TASK_RUNNING);
-+ remove_wait_queue(sleep, &wait);
-+ if (no_ubc)
-+ ub_sock_sndqueuedel(sk);
-+ sock_put(dstsk);
-+
-+ if (!signal_pending(current))
-+ goto retry;
-+ err = sock_intr_errno(timeo);
-+ goto out_free;
-+ }
-+
-+ skb_orphan(skb);
-+ skb_set_owner_r(skb, dstsk);
-+ ub_skb_set_charge(skb, sk, chargesize, UB_OTHERSOCKBUF);
-+ skb_queue_tail(&dstsk->sk_receive_queue, skb);
-+ dstsk->sk_data_ready(dstsk, len);
-+ err = len;
-+out_put:
-+ sock_put(dstsk);
- out:
- return err;
-+
-+out_free:
-+ kfree_skb(skb);
-+ return err;
- }
-
- static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
-@@ -1303,6 +1376,10 @@ static int netlink_dump(struct sock *sk)
- skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-+ if (ub_nlrcvbuf_charge(skb, sk) < 0) {
-+ kfree_skb(skb);
-+ return -EACCES;
-+ }
-
- spin_lock(&nlk->cb_lock);
-
-@@ -1471,8 +1548,15 @@ void netlink_run_queue(struct sock *sk,
- *qlen = skb_queue_len(&sk->sk_receive_queue);
-
- for (; *qlen; (*qlen)--) {
-+ int ret;
-+ struct ve_struct *old_env;
- skb = skb_dequeue(&sk->sk_receive_queue);
-- if (netlink_rcv_skb(skb, cb)) {
-+
-+ old_env = set_exec_env(VE_OWNER_SKB(skb));
-+ ret = netlink_rcv_skb(skb, cb);
-+ (void)set_exec_env(old_env);
-+
-+ if (ret) {
- if (skb->len)
- skb_queue_head(&sk->sk_receive_queue, skb);
- else {
-@@ -1740,6 +1824,7 @@ enomem:
-
- sock_register(&netlink_family_ops);
- #ifdef CONFIG_PROC_FS
-+ /* FIXME: virtualize before give access from VEs */
- proc_net_fops_create("netlink", 0, &netlink_seq_fops);
- #endif
- /* The netlink device handler may be needed early. */
-diff -upr linux-2.6.16.orig/net/packet/af_packet.c linux-2.6.16-026test009/net/packet/af_packet.c
---- linux-2.6.16.orig/net/packet/af_packet.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/packet/af_packet.c 2006-04-19 15:02:12.000000000 +0400
-@@ -79,6 +79,8 @@
- #include <linux/module.h>
- #include <linux/init.h>
-
-+#include <ub/ub_net.h>
-+
- #ifdef CONFIG_INET
- #include <net/inet_common.h>
- #endif
-@@ -280,7 +282,8 @@ static int packet_rcv_spkt(struct sk_buf
- * so that this procedure is noop.
- */
-
-- if (skb->pkt_type == PACKET_LOOPBACK)
-+ if (skb->pkt_type == PACKET_LOOPBACK ||
-+ !ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
- goto out;
-
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-@@ -472,6 +475,9 @@ static int packet_rcv(struct sk_buff *sk
- sk = pt->af_packet_priv;
- po = pkt_sk(sk);
-
-+ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
-+ goto drop;
-+
- skb->dev = dev;
-
- if (dev->hard_header) {
-@@ -531,6 +537,9 @@ static int packet_rcv(struct sk_buff *sk
- if (pskb_trim(skb, snaplen))
- goto drop_n_acct;
-
-+ if (ub_sockrcvbuf_charge(sk, skb))
-+ goto drop_n_acct;
-+
- skb_set_owner_r(skb, sk);
- skb->dev = NULL;
- dst_release(skb->dst);
-@@ -581,6 +590,9 @@ static int tpacket_rcv(struct sk_buff *s
- sk = pt->af_packet_priv;
- po = pkt_sk(sk);
-
-+ if (!ve_accessible(VE_OWNER_SKB(skb), VE_OWNER_SK(sk)))
-+ goto drop;
-+
- if (dev->hard_header) {
- if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
-@@ -630,6 +642,12 @@ static int tpacket_rcv(struct sk_buff *s
- if (snaplen > skb->len-skb->data_len)
- snaplen = skb->len-skb->data_len;
-
-+ if (copy_skb &&
-+ ub_sockrcvbuf_charge(sk, copy_skb)) {
-+ spin_lock(&sk->sk_receive_queue.lock);
-+ goto ring_is_full;
-+ }
-+
- spin_lock(&sk->sk_receive_queue.lock);
- h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
-
-@@ -1010,6 +1028,8 @@ static int packet_create(struct socket *
- sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
- if (sk == NULL)
- goto out;
-+ if (ub_other_sock_charge(sk))
-+ goto out_free;
-
- sock->ops = &packet_ops;
- #ifdef CONFIG_SOCK_PACKET
-@@ -1048,6 +1068,9 @@ static int packet_create(struct socket *
- sk_add_node(sk, &packet_sklist);
- write_unlock_bh(&packet_sklist_lock);
- return(0);
-+
-+out_free:
-+ sk_free(sk);
- out:
- return err;
- }
-@@ -1430,11 +1453,16 @@ static int packet_notifier(struct notifi
- struct sock *sk;
- struct hlist_node *node;
- struct net_device *dev = (struct net_device*)data;
-+ struct ve_struct *ve;
-
-+ ve = get_exec_env();
- read_lock(&packet_sklist_lock);
- sk_for_each(sk, node, &packet_sklist) {
- struct packet_sock *po = pkt_sk(sk);
-
-+ if (!ve_accessible_strict(VE_OWNER_SK(sk), ve))
-+ continue;
-+
- switch (msg) {
- case NETDEV_UNREGISTER:
- #ifdef CONFIG_PACKET_MULTICAST
-@@ -1845,6 +1873,8 @@ static inline struct sock *packet_seq_id
- struct hlist_node *node;
-
- sk_for_each(s, node, &packet_sklist) {
-+ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
-+ continue;
- if (!off--)
- return s;
- }
-@@ -1860,9 +1890,13 @@ static void *packet_seq_start(struct seq
- static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
- {
- ++*pos;
-- return (v == SEQ_START_TOKEN)
-- ? sk_head(&packet_sklist)
-- : sk_next((struct sock*)v) ;
-+ do {
-+ v = (v == SEQ_START_TOKEN)
-+ ? sk_head(&packet_sklist)
-+ : sk_next((struct sock*)v);
-+ } while (v != NULL &&
-+ !ve_accessible(VE_OWNER_SK((struct sock*)v), get_exec_env()));
-+ return v;
- }
-
- static void packet_seq_stop(struct seq_file *seq, void *v)
-@@ -1918,7 +1952,7 @@ static struct file_operations packet_seq
-
- static void __exit packet_exit(void)
- {
-- proc_net_remove("packet");
-+ remove_proc_glob_entry("net/packet", NULL);
- unregister_netdevice_notifier(&packet_netdev_notifier);
- sock_unregister(PF_PACKET);
- proto_unregister(&packet_proto);
-@@ -1933,7 +1967,7 @@ static int __init packet_init(void)
-
- sock_register(&packet_family_ops);
- register_netdevice_notifier(&packet_netdev_notifier);
-- proc_net_fops_create("packet", 0, &packet_seq_fops);
-+ proc_glob_fops_create("net/packet", 0, &packet_seq_fops);
- out:
- return rc;
- }
-diff -upr linux-2.6.16.orig/net/sched/sch_generic.c linux-2.6.16-026test009/net/sched/sch_generic.c
---- linux-2.6.16.orig/net/sched/sch_generic.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/sched/sch_generic.c 2006-04-19 15:02:12.000000000 +0400
-@@ -97,6 +97,7 @@ int qdisc_restart(struct net_device *dev
-
- /* Dequeue packet */
- if ((skb = q->dequeue(q)) != NULL) {
-+ struct ve_struct *envid;
- unsigned nolock = (dev->features & NETIF_F_LLTX);
- /*
- * When the driver has LLTX set it does its own locking
-@@ -107,6 +108,7 @@ int qdisc_restart(struct net_device *dev
- * of lock congestion it should return -1 and the packet
- * will be requeued.
- */
-+ envid = set_exec_env(VE_OWNER_SKB(skb));
- if (!nolock) {
- if (!spin_trylock(&dev->xmit_lock)) {
- collision:
-@@ -121,6 +123,7 @@ int qdisc_restart(struct net_device *dev
- kfree_skb(skb);
- if (net_ratelimit())
- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
-+ (void)set_exec_env(envid);
- return -1;
- }
- __get_cpu_var(netdev_rx_stat).cpu_collision++;
-@@ -146,6 +149,7 @@ int qdisc_restart(struct net_device *dev
- spin_unlock(&dev->xmit_lock);
- }
- spin_lock(&dev->queue_lock);
-+ (void)set_exec_env(envid);
- return -1;
- }
- if (ret == NETDEV_TX_LOCKED && nolock) {
-@@ -177,6 +181,7 @@ int qdisc_restart(struct net_device *dev
- requeue:
- q->ops->requeue(skb, q);
- netif_schedule(dev);
-+ (void)set_exec_env(envid);
- return 1;
- }
- BUG_ON((int) q->q.qlen < 0);
-@@ -625,3 +630,4 @@ EXPORT_SYMBOL(qdisc_reset);
- EXPORT_SYMBOL(qdisc_restart);
- EXPORT_SYMBOL(qdisc_lock_tree);
- EXPORT_SYMBOL(qdisc_unlock_tree);
-+EXPORT_SYMBOL(dev_shutdown);
-diff -upr linux-2.6.16.orig/net/sched/sch_teql.c linux-2.6.16-026test009/net/sched/sch_teql.c
---- linux-2.6.16.orig/net/sched/sch_teql.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/sched/sch_teql.c 2006-04-19 15:02:12.000000000 +0400
-@@ -189,6 +189,9 @@ static int teql_qdisc_init(struct Qdisc
- struct teql_master *m = (struct teql_master*)sch->ops;
- struct teql_sched_data *q = qdisc_priv(sch);
-
-+ if (!capable(CAP_NET_ADMIN))
-+ return -EPERM;
-+
- if (dev->hard_header_len > m->dev->hard_header_len)
- return -EINVAL;
-
-diff -upr linux-2.6.16.orig/net/socket.c linux-2.6.16-026test009/net/socket.c
---- linux-2.6.16.orig/net/socket.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/socket.c 2006-04-19 15:02:12.000000000 +0400
-@@ -84,6 +84,7 @@
- #include <linux/compat.h>
- #include <linux/kmod.h>
- #include <linux/audit.h>
-+#include <linux/in.h>
-
- #ifdef CONFIG_NET_RADIO
- #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
-@@ -1075,6 +1076,37 @@ int sock_wake_async(struct socket *sock,
- return 0;
- }
-
-+int vz_security_proto_check(int family, int type, int protocol)
-+{
-+#ifdef CONFIG_VE
-+ if (ve_is_super(get_exec_env()))
-+ return 0;
-+
-+ switch (family) {
-+ case PF_UNSPEC:
-+ case PF_PACKET:
-+ case PF_NETLINK:
-+ case PF_UNIX:
-+ break;
-+ case PF_INET:
-+ switch (protocol) {
-+ case IPPROTO_IP:
-+ case IPPROTO_ICMP:
-+ case IPPROTO_TCP:
-+ case IPPROTO_UDP:
-+ case IPPROTO_RAW:
-+ break;
-+ default:
-+ return -EAFNOSUPPORT;
-+ }
-+ break;
-+ default:
-+ return -EAFNOSUPPORT;
-+ }
-+#endif
-+ return 0;
-+}
-+
- static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
- {
- int err;
-@@ -1102,6 +1134,11 @@ static int __sock_create(int family, int
- family = PF_PACKET;
- }
-
-+ /* VZ compatibility layer */
-+ err = vz_security_proto_check(family, type, protocol);
-+ if (err < 0)
-+ return err;
-+
- err = security_socket_create(family, type, protocol, kern);
- if (err)
- return err;
-diff -upr linux-2.6.16.orig/net/sunrpc/clnt.c linux-2.6.16-026test009/net/sunrpc/clnt.c
---- linux-2.6.16.orig/net/sunrpc/clnt.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/sunrpc/clnt.c 2006-04-19 15:02:12.000000000 +0400
-@@ -168,10 +168,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
- }
-
- /* save the nodename */
-- clnt->cl_nodelen = strlen(system_utsname.nodename);
-+ clnt->cl_nodelen = strlen(ve_utsname.nodename);
- if (clnt->cl_nodelen > UNX_MAXNODENAME)
- clnt->cl_nodelen = UNX_MAXNODENAME;
-- memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
-+ memcpy(clnt->cl_nodename, ve_utsname.nodename, clnt->cl_nodelen);
- return clnt;
-
- out_no_auth:
-diff -upr linux-2.6.16.orig/net/sunrpc/sched.c linux-2.6.16-026test009/net/sunrpc/sched.c
---- linux-2.6.16.orig/net/sunrpc/sched.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/sunrpc/sched.c 2006-04-19 15:02:12.000000000 +0400
-@@ -605,7 +605,9 @@ EXPORT_SYMBOL(rpc_exit_task);
- static int __rpc_execute(struct rpc_task *task)
- {
- int status = 0;
-+ struct ve_struct *env;
-
-+ env = set_exec_env(get_ve0());
- dprintk("RPC: %4d rpc_execute flgs %x\n",
- task->tk_pid, task->tk_flags);
-
-@@ -693,6 +695,7 @@ static int __rpc_execute(struct rpc_task
- rpc_mark_complete_task(task);
- /* Release all resources associated with the task */
- rpc_release_task(task);
-+ (void)set_exec_env(env);
- return status;
- }
-
-diff -upr linux-2.6.16.orig/net/sunrpc/svcsock.c linux-2.6.16-026test009/net/sunrpc/svcsock.c
---- linux-2.6.16.orig/net/sunrpc/svcsock.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/sunrpc/svcsock.c 2006-04-19 15:02:12.000000000 +0400
-@@ -361,6 +361,9 @@ svc_sendto(struct svc_rqst *rqstp, struc
- size_t base = xdr->page_base;
- unsigned int pglen = xdr->page_len;
- unsigned int flags = MSG_MORE;
-+ struct ve_struct *old_env;
-+
-+ old_env = set_exec_env(get_ve0());
-
- slen = xdr->len;
-
-@@ -425,6 +428,8 @@ out:
- rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
- rqstp->rq_addr.sin_addr.s_addr);
-
-+ (void)set_exec_env(old_env);
-+
- return len;
- }
-
-@@ -437,9 +442,12 @@ svc_recv_available(struct svc_sock *svsk
- mm_segment_t oldfs;
- struct socket *sock = svsk->sk_sock;
- int avail, err;
-+ struct ve_struct *old_env;
-
- oldfs = get_fs(); set_fs(KERNEL_DS);
-+ old_env = set_exec_env(get_ve0());
- err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
-+ (void)set_exec_env(old_env);
- set_fs(oldfs);
-
- return (err >= 0)? avail : err;
-@@ -454,6 +462,7 @@ svc_recvfrom(struct svc_rqst *rqstp, str
- struct msghdr msg;
- struct socket *sock;
- int len, alen;
-+ struct ve_struct *old_env;
-
- rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
- sock = rqstp->rq_sock->sk_sock;
-@@ -465,7 +474,9 @@ svc_recvfrom(struct svc_rqst *rqstp, str
-
- msg.msg_flags = MSG_DONTWAIT;
-
-+ old_env = set_exec_env(get_ve0());
- len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
-+ (void)set_exec_env(get_ve0());
-
- /* sock_recvmsg doesn't fill in the name/namelen, so we must..
- * possibly we should cache this in the svc_sock structure
-@@ -761,17 +772,19 @@ svc_tcp_accept(struct svc_sock *svsk)
- const struct proto_ops *ops;
- struct svc_sock *newsvsk;
- int err, slen;
-+ struct ve_struct *old_env;
-
- dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
- if (!sock)
- return;
-
-+ old_env = set_exec_env(get_ve0());
- err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
- if (err) {
- if (err == -ENOMEM)
- printk(KERN_WARNING "%s: no more sockets!\n",
- serv->sv_name);
-- return;
-+ goto restore;
- }
-
- dprintk("svc: tcp_accept %p allocated\n", newsock);
-@@ -865,6 +878,8 @@ svc_tcp_accept(struct svc_sock *svsk)
-
- }
-
-+ (void)set_exec_env(old_env);
-+
- if (serv->sv_stats)
- serv->sv_stats->nettcpconn++;
-
-@@ -872,6 +887,8 @@ svc_tcp_accept(struct svc_sock *svsk)
-
- failed:
- sock_release(newsock);
-+restore:
-+ (void)set_exec_env(old_env);
- return;
- }
-
-@@ -1388,6 +1405,7 @@ svc_create_socket(struct svc_serv *serv,
- struct socket *sock;
- int error;
- int type;
-+ struct ve_struct *old_env;
-
- dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
- serv->sv_program->pg_name, protocol,
-@@ -1401,8 +1419,10 @@ svc_create_socket(struct svc_serv *serv,
- }
- type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
-
-+ old_env = set_exec_env(get_ve0());
-+
- if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
-- return error;
-+ goto restore;
-
- if (sin != NULL) {
- if (type == SOCK_STREAM)
-@@ -1418,12 +1438,16 @@ svc_create_socket(struct svc_serv *serv,
- goto bummer;
- }
-
-- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
-+ if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) {
-+ (void)set_exec_env(old_env);
- return 0;
-+ }
-
- bummer:
- dprintk("svc: svc_create_socket error = %d\n", -error);
- sock_release(sock);
-+restore:
-+ (void)set_exec_env(old_env);
- return error;
- }
-
-diff -upr linux-2.6.16.orig/net/unix/af_unix.c linux-2.6.16-026test009/net/unix/af_unix.c
---- linux-2.6.16.orig/net/unix/af_unix.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/unix/af_unix.c 2006-04-19 15:02:12.000000000 +0400
-@@ -118,6 +118,9 @@
- #include <net/checksum.h>
- #include <linux/security.h>
-
-+#include <ub/ub_net.h>
-+#include <ub/beancounter.h>
-+
- int sysctl_unix_max_dgram_qlen = 10;
-
- struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
-@@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
- sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
- struct unix_sock *u = unix_sk(s);
-
-+ if (!ve_accessible(VE_OWNER_SK(s), get_exec_env()))
-+ continue;
- if (u->addr->len == len &&
- !memcmp(u->addr->name, sunname, len))
- goto found;
-@@ -439,7 +444,7 @@ static int unix_listen(struct socket *so
- sk->sk_max_ack_backlog = backlog;
- sk->sk_state = TCP_LISTEN;
- /* set credentials so connect can copy them */
-- sk->sk_peercred.pid = current->tgid;
-+ sk->sk_peercred.pid = virt_tgid(current);
- sk->sk_peercred.uid = current->euid;
- sk->sk_peercred.gid = current->egid;
- err = 0;
-@@ -553,6 +558,8 @@ static struct sock * unix_create1(struct
- sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
- if (!sk)
- goto out;
-+ if (ub_other_sock_charge(sk))
-+ goto out_sk_free;
-
- atomic_inc(&unix_nr_socks);
-
-@@ -571,6 +578,9 @@ static struct sock * unix_create1(struct
- unix_insert_socket(unix_sockets_unbound, sk);
- out:
- return sk;
-+out_sk_free:
-+ sk_free(sk);
-+ return NULL;
- }
-
- static int unix_create(struct socket *sock, int protocol)
-@@ -676,7 +686,7 @@ static struct sock *unix_find_other(stru
- err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
- if (err)
- goto fail;
-- err = vfs_permission(&nd, MAY_WRITE);
-+ err = vfs_permission(&nd, MAY_WRITE, NULL);
- if (err)
- goto put_fail;
-
-@@ -932,6 +942,7 @@ static int unix_stream_connect(struct so
- int st;
- int err;
- long timeo;
-+ unsigned long chargesize;
-
- err = unix_mkname(sunaddr, addr_len, &hash);
- if (err < 0)
-@@ -960,6 +971,10 @@ static int unix_stream_connect(struct so
- skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
- if (skb == NULL)
- goto out;
-+ chargesize = skb_charge_fullsize(skb);
-+ if (ub_sock_getwres_other(newsk, chargesize) < 0)
-+ goto out;
-+ ub_skb_set_charge(skb, newsk, chargesize, UB_OTHERSOCKBUF);
-
- restart:
- /* Find listening sock. */
-@@ -1043,7 +1058,7 @@ restart:
- unix_peer(newsk) = sk;
- newsk->sk_state = TCP_ESTABLISHED;
- newsk->sk_type = sk->sk_type;
-- newsk->sk_peercred.pid = current->tgid;
-+ newsk->sk_peercred.pid = virt_tgid(current);
- newsk->sk_peercred.uid = current->euid;
- newsk->sk_peercred.gid = current->egid;
- newu = unix_sk(newsk);
-@@ -1107,7 +1122,7 @@ static int unix_socketpair(struct socket
- sock_hold(skb);
- unix_peer(ska)=skb;
- unix_peer(skb)=ska;
-- ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
-+ ska->sk_peercred.pid = skb->sk_peercred.pid = virt_tgid(current);
- ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
- ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
-
-@@ -1433,6 +1448,16 @@ static int unix_stream_sendmsg(struct ki
-
- size=len-sent;
-
-+ if (msg->msg_flags & MSG_DONTWAIT)
-+ ub_sock_makewres_other(sk, skb_charge_size(size));
-+ if (sock_bc(sk) != NULL &&
-+ sock_bc(sk)->poll_reserv >=
-+ SOCK_MIN_UBCSPACE &&
-+ skb_charge_size(size) >
-+ sock_bc(sk)->poll_reserv)
-+ size = skb_charge_datalen(sock_bc(sk)->poll_reserv);
-+
-+
- /* Keep two messages in the pipe so it schedules better */
- if (size > sk->sk_sndbuf / 2 - 64)
- size = sk->sk_sndbuf / 2 - 64;
-@@ -1444,7 +1469,8 @@ static int unix_stream_sendmsg(struct ki
- * Grab a buffer
- */
-
-- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
-+ skb = sock_alloc_send_skb2(sk, size, SOCK_MIN_UBCSPACE,
-+ msg->msg_flags&MSG_DONTWAIT, &err);
-
- if (skb==NULL)
- goto out_err;
-@@ -1869,6 +1895,7 @@ static unsigned int unix_poll(struct fil
- {
- struct sock *sk = sock->sk;
- unsigned int mask;
-+ int no_ub_res;
-
- poll_wait(file, sk->sk_sleep, wait);
- mask = 0;
-@@ -1879,6 +1906,10 @@ static unsigned int unix_poll(struct fil
- if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
-
-+ no_ub_res = ub_sock_makewres_other(sk, SOCK_MIN_UBCSPACE_CH);
-+ if (no_ub_res)
-+ ub_sock_sndqueueadd_other(sk, SOCK_MIN_UBCSPACE_CH);
-+
- /* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
-@@ -1892,7 +1923,7 @@ static unsigned int unix_poll(struct fil
- * we set writable also when the other side has shut down the
- * connection. This prevents stuck sockets.
- */
-- if (unix_writable(sk))
-+ if (!no_ub_res && unix_writable(sk))
- mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
- return mask;
-@@ -2044,7 +2075,7 @@ static int __init af_unix_init(void)
-
- sock_register(&unix_family_ops);
- #ifdef CONFIG_PROC_FS
-- proc_net_fops_create("unix", 0, &unix_seq_fops);
-+ proc_glob_fops_create("net/unix", 0, &unix_seq_fops);
- #endif
- unix_sysctl_register();
- out:
-@@ -2055,7 +2086,7 @@ static void __exit af_unix_exit(void)
- {
- sock_unregister(PF_UNIX);
- unix_sysctl_unregister();
-- proc_net_remove("unix");
-+ remove_proc_glob_entry("net/unix", NULL);
- proto_unregister(&unix_proto);
- }
-
-diff -upr linux-2.6.16.orig/net/unix/garbage.c linux-2.6.16-026test009/net/unix/garbage.c
---- linux-2.6.16.orig/net/unix/garbage.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/net/unix/garbage.c 2006-04-19 15:02:13.000000000 +0400
-@@ -76,6 +76,7 @@
- #include <linux/netdevice.h>
- #include <linux/file.h>
- #include <linux/proc_fs.h>
-+#include <linux/module.h>
-
- #include <net/sock.h>
- #include <net/af_unix.h>
-@@ -135,7 +136,7 @@ void unix_notinflight(struct file *fp)
- atomic_dec(&unix_tot_inflight);
- }
- }
--
-+EXPORT_SYMBOL_GPL(unix_notinflight);
-
- /*
- * Garbage Collector Support Functions
-diff -upr linux-2.6.16.orig/security/commoncap.c linux-2.6.16-026test009/security/commoncap.c
---- linux-2.6.16.orig/security/commoncap.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/security/commoncap.c 2006-04-19 15:02:12.000000000 +0400
-@@ -35,7 +35,7 @@ EXPORT_SYMBOL(cap_netlink_send);
-
- int cap_netlink_recv(struct sk_buff *skb)
- {
-- if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
-+ if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_VE_NET_ADMIN))
- return -EPERM;
- return 0;
- }
-@@ -197,7 +197,7 @@ int cap_inode_setxattr(struct dentry *de
- {
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-- !capable(CAP_SYS_ADMIN))
-+ !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
- return -EPERM;
- return 0;
- }
-@@ -206,7 +206,7 @@ int cap_inode_removexattr(struct dentry
- {
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- sizeof(XATTR_SECURITY_PREFIX) - 1) &&
-- !capable(CAP_SYS_ADMIN))
-+ !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_ADMIN))
- return -EPERM;
- return 0;
- }
-@@ -312,7 +312,7 @@ void cap_task_reparent_to_init (struct t
-
- int cap_syslog (int type)
- {
-- if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
-+ if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
- return 0;
- }
-diff -upr linux-2.6.16.orig/security/keys/key.c linux-2.6.16-026test009/security/keys/key.c
---- linux-2.6.16.orig/security/keys/key.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/security/keys/key.c 2006-04-19 15:02:11.000000000 +0400
-@@ -785,6 +785,10 @@ key_ref_t key_create_or_update(key_ref_t
-
- key_check(keyring);
-
-+ key_ref = ERR_PTR(-ENOTDIR);
-+ if (keyring->type != &key_type_keyring)
-+ goto error_2;
-+
- down_write(&keyring->sem);
-
- /* if we're going to allocate a new key, we're going to have
-diff -upr linux-2.6.16.orig/security/keys/keyring.c linux-2.6.16-026test009/security/keys/keyring.c
---- linux-2.6.16.orig/security/keys/keyring.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/security/keys/keyring.c 2006-04-19 15:02:11.000000000 +0400
-@@ -437,6 +437,7 @@ EXPORT_SYMBOL(keyring_search);
- /*
- * search the given keyring only (no recursion)
- * - keyring must be locked by caller
-+ * - caller must guarantee that the keyring is a keyring
- */
- key_ref_t __keyring_search_one(key_ref_t keyring_ref,
- const struct key_type *ktype,
-diff -upr linux-2.6.16.orig/security/selinux/hooks.c linux-2.6.16-026test009/security/selinux/hooks.c
---- linux-2.6.16.orig/security/selinux/hooks.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/security/selinux/hooks.c 2006-04-19 15:02:12.000000000 +0400
-@@ -4167,12 +4167,12 @@ static int selinux_setprocattr(struct ta
- struct task_struct *g, *t;
- struct mm_struct *mm = p->mm;
- read_lock(&tasklist_lock);
-- do_each_thread(g, t)
-+ do_each_thread_ve(g, t)
- if (t->mm == mm && t != p) {
- read_unlock(&tasklist_lock);
- return -EPERM;
- }
-- while_each_thread(g, t);
-+ while_each_thread_ve(g, t);
- read_unlock(&tasklist_lock);
- }
-
-diff -upr linux-2.6.16.orig/sound/isa/opti9xx/opti92x-ad1848.c linux-2.6.16-026test009/sound/isa/opti9xx/opti92x-ad1848.c
---- linux-2.6.16.orig/sound/isa/opti9xx/opti92x-ad1848.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/sound/isa/opti9xx/opti92x-ad1848.c 2006-04-19 15:02:11.000000000 +0400
-@@ -2088,9 +2088,11 @@ static int __init alsa_card_opti9xx_init
- int error;
- struct platform_device *device;
-
-+#ifdef CONFIG_PNP
- pnp_register_card_driver(&opti9xx_pnpc_driver);
- if (snd_opti9xx_pnp_is_probed)
- return 0;
-+#endif
- if (! is_isapnp_selected()) {
- error = platform_driver_register(&snd_opti9xx_driver);
- if (error < 0)
-@@ -2102,7 +2104,9 @@ static int __init alsa_card_opti9xx_init
- }
- platform_driver_unregister(&snd_opti9xx_driver);
- }
-+#ifdef CONFIG_PNP
- pnp_unregister_card_driver(&opti9xx_pnpc_driver);
-+#endif
- #ifdef MODULE
- printk(KERN_ERR "no OPTi " CHIP_NAME " soundcard found\n");
- #endif
-@@ -2115,7 +2119,9 @@ static void __exit alsa_card_opti9xx_exi
- platform_device_unregister(snd_opti9xx_platform_device);
- platform_driver_unregister(&snd_opti9xx_driver);
- }
-+#ifdef CONFIG_PNP
- pnp_unregister_card_driver(&opti9xx_pnpc_driver);
-+#endif
- }
-
- module_init(alsa_card_opti9xx_init)
-diff -upr linux-2.6.16.orig/sound/pci/hda/patch_realtek.c linux-2.6.16-026test009/sound/pci/hda/patch_realtek.c
---- linux-2.6.16.orig/sound/pci/hda/patch_realtek.c 2006-04-19 15:02:02.000000000 +0400
-+++ linux-2.6.16-026test009/sound/pci/hda/patch_realtek.c 2006-04-19 15:02:11.000000000 +0400
-@@ -2948,6 +2948,8 @@ static struct hda_board_config alc260_cf
- { .modelname = "basic", .config = ALC260_BASIC },
- { .pci_subvendor = 0x104d, .pci_subdevice = 0x81bb,
- .config = ALC260_BASIC }, /* Sony VAIO */
-+ { .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
-+ .config = ALC260_BASIC }, /* CTL Travel Master U553W */
- { .modelname = "hp", .config = ALC260_HP },
- { .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
- { .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },