diff options
author | 2006-11-05 10:46:32 +0000 | |
---|---|---|
committer | 2006-11-05 10:46:32 +0000 | |
commit | 1b9715bc80fe0852cf4c0a289fe849c3757490b9 (patch) | |
tree | 22f71b0ce089e46d98f5d2a686ddead05bdfe2e2 | |
parent | Parts from 035_all_optional-vlogin.patch accidently slipped into 030_all_gent... (diff) | |
download | misc-1b9715bc80fe0852cf4c0a289fe849c3757490b9.tar.gz misc-1b9715bc80fe0852cf4c0a289fe849c3757490b9.tar.bz2 misc-1b9715bc80fe0852cf4c0a289fe849c3757490b9.zip |
We are now using UPSTREAMs tarball
svn path=/; revision=523
1059 files changed, 0 insertions, 5785466 deletions
diff --git a/openvz-sources/022.034/openvz-022stab034-core.patch b/openvz-sources/022.034/openvz-022stab034-core.patch deleted file mode 100644 index 9185ee2..0000000 --- a/openvz-sources/022.034/openvz-022stab034-core.patch +++ /dev/null @@ -1,64534 +0,0 @@ -diff -uprN linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/alpha/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/alpha/kernel/ptrace.c 2004-08-14 14:56:14.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/alpha/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -354,7 +354,7 @@ do_sys_ptrace(long request, long pid, lo - */ - case PTRACE_KILL: - ret = 0; -- if (child->state == TASK_ZOMBIE) -+ if (child->exit_state == EXIT_ZOMBIE) - break; - child->exit_code = SIGKILL; - /* make sure single-step breakpoint is gone. */ -diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/arm/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/arm/kernel/ptrace.c 2004-08-14 14:54:49.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/arm/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -677,7 +677,7 @@ static int do_ptrace(int request, struct - /* make sure single-step breakpoint is gone. */ - child->ptrace &= ~PT_SINGLESTEP; - ptrace_cancel_bpt(child); -- if (child->state != TASK_ZOMBIE) { -+ if (child->exit_state != EXIT_ZOMBIE) { - child->exit_code = SIGKILL; - wake_up_process(child); - } -diff -uprN linux-2.6.8.1.orig/arch/arm/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/arm/kernel/signal.c ---- linux-2.6.8.1.orig/arch/arm/kernel/signal.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/arm/kernel/signal.c 2005-09-09 14:39:24.000000000 +0400 -@@ -548,9 +548,10 @@ static int do_signal(sigset_t *oldset, s - if (!user_mode(regs)) - return 0; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (current->ptrace & PT_SINGLESTEP) -diff -uprN linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/arm26/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/arm26/kernel/ptrace.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/arm26/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -614,7 +614,7 @@ static int do_ptrace(int request, struct - /* make sure single-step breakpoint is gone. */ - child->ptrace &= ~PT_SINGLESTEP; - ptrace_cancel_bpt(child); -- if (child->state != TASK_ZOMBIE) { -+ if (child->exit_state != EXIT_ZOMBIE) { - child->exit_code = SIGKILL; - wake_up_process(child); - } -diff -uprN linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/cris/arch-v10/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/cris/arch-v10/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/cris/arch-v10/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -185,7 +185,7 @@ sys_ptrace(long request, long pid, long - case PTRACE_KILL: - ret = 0; - -- if (child->state == TASK_ZOMBIE) -+ if (child->exit_state == EXIT_ZOMBIE) - break; - - child->exit_code = SIGKILL; -diff -uprN linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/h8300/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/h8300/kernel/ptrace.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/h8300/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -199,7 +199,7 @@ asmlinkage int sys_ptrace(long request, - case PTRACE_KILL: { - - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - h8300_disable_trace(child); -diff -uprN linux-2.6.8.1.orig/arch/i386/boot/setup.S linux-2.6.8.1-ve022stab034/arch/i386/boot/setup.S ---- linux-2.6.8.1.orig/arch/i386/boot/setup.S 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/boot/setup.S 2005-09-09 14:39:25.000000000 +0400 -@@ -156,7 +156,7 @@ cmd_line_ptr: .long 0 # (Header versio - # can be located anywhere in - # low memory 0x10000 or higher. - --ramdisk_max: .long (MAXMEM-1) & 0x7fffffff -+ramdisk_max: .long (__MAXMEM-1) & 0x7fffffff - # (Header version 0x0203 or later) - # The highest safe address for - # the contents of an initrd -diff -uprN linux-2.6.8.1.orig/arch/i386/Kconfig.open_virtuozzo linux-2.6.8.1-ve022stab034/arch/i386/Kconfig.open_virtuozzo ---- linux-2.6.8.1.orig/arch/i386/Kconfig.open_virtuozzo 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/arch/i386/Kconfig.open_virtuozzo 2005-09-09 14:39:26.000000000 +0400 -@@ -0,0 +1,44 @@ -+config VE -+ bool "Virtual Environment support" -+ default y -+ help -+ This option adds support of virtual Linux running on the original box -+ with fully supported virtual network driver, tty subsystem and -+ configurable access for hardware and other resources. -+ -+config VE_CALLS -+ tristate "VE calls interface" -+ depends on VE -+ default m -+ help -+ This option controls how to build vzmon code containing VE calls. -+ By default it's build in module vzmon.o -+ -+config VE_SYSFS -+ bool "Enable sysfs support in Virtual Environments" -+ depends on VE -+ default n -+ help -+ This option enables sysfs support in Virtual Environments -+ -+config VE_NETDEV -+ tristate "VE networking" -+ depends on VE -+ default m -+ help -+ This option controls whether to build VE networking code. -+ -+config VE_IPTABLES -+ bool "VE netfiltering" -+ depends on VE && VE_NETDEV && INET && NETFILTER -+ default y -+ help -+ This option controls whether to build VE netfiltering code. -+ -+config VZ_WDOG -+ tristate "VE watchdog module" -+ depends on VE -+ default m -+ help -+ This option controls building of vzwdog module, which dumps -+ a lot of useful system info on console periodically. -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/boot.c ---- linux-2.6.8.1.orig/arch/i386/kernel/acpi/boot.c 2004-08-14 14:56:01.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/boot.c 2005-09-09 14:39:25.000000000 +0400 -@@ -484,7 +484,7 @@ acpi_scan_rsdp ( - * RSDP signature. - */ - for (offset = 0; offset < length; offset += 16) { -- if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) -+ if (strncmp((char *) __va(start + offset), "RSD PTR ", sig_len)) - continue; - return (start + offset); - } -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/sleep.c ---- linux-2.6.8.1.orig/arch/i386/kernel/acpi/sleep.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/sleep.c 2005-09-09 14:39:25.000000000 +0400 -@@ -19,13 +19,29 @@ extern void zap_low_mappings(void); - - extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); - --static void init_low_mapping(pgd_t *pgd, int pgd_limit) -+static void map_low(pgd_t *pgd_base, unsigned long start, unsigned long end) - { -- int pgd_ofs = 0; -- -- while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { -- set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD)); -- pgd_ofs++, pgd++; -+ unsigned long vaddr; -+ pmd_t *pmd; -+ pgd_t *pgd; -+ int i, j; -+ -+ pgd = pgd_base; -+ -+ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { -+ vaddr = i*PGDIR_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ pmd = pmd_offset(pgd, 0); -+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { -+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ if (vaddr < start) -+ continue; -+ set_pmd(pmd, __pmd(_KERNPG_TABLE + _PAGE_PSE + -+ vaddr - start)); -+ } - } - } - -@@ -39,7 +55,9 @@ int acpi_save_state_mem (void) - { - if (!acpi_wakeup_address) - return 1; -- init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); -+ if (!cpu_has_pse) -+ return 1; -+ map_low(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE); - memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); - acpi_copy_wakeup_routine(acpi_wakeup_address); - -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/wakeup.S ---- linux-2.6.8.1.orig/arch/i386/kernel/acpi/wakeup.S 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/acpi/wakeup.S 2005-09-09 14:39:25.000000000 +0400 -@@ -67,6 +67,13 @@ wakeup_code: - movw $0x0e00 + 'i', %fs:(0x12) - - # need a gdt -+ #use the gdt copied in this low mem -+ lea temp_gdt_table - wakeup_code, %eax -+ xor %ebx, %ebx -+ movw %ds, %bx -+ shll $4, %ebx -+ addl %ebx, %eax -+ movl %eax, real_save_gdt + 2 - wakeup_code - lgdt real_save_gdt - wakeup_code - - movl real_save_cr0 - wakeup_code, %eax -@@ -89,6 +96,7 @@ real_save_cr4: .long 0 - real_magic: .long 0 - video_mode: .long 0 - video_flags: .long 0 -+temp_gdt_table: .fill GDT_ENTRIES, 8, 0 - - bogus_real_magic: - movw $0x0e00 + 'B', %fs:(0x12) -@@ -231,6 +239,13 @@ ENTRY(acpi_copy_wakeup_routine) - movl %edx, real_save_cr0 - wakeup_start (%eax) - sgdt real_save_gdt - wakeup_start (%eax) - -+ # gdt wont be addressable from real mode in 4g4g split -+ # copying it to the lower mem -+ xor %ecx, %ecx -+ movw saved_gdt, %cx -+ movl saved_gdt + 2, %esi -+ lea temp_gdt_table - wakeup_start (%eax), %edi -+ rep movsb - movl saved_videomode, %edx - movl %edx, video_mode - wakeup_start (%eax) - movl acpi_video_flags, %edx -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/apic.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/apic.c ---- linux-2.6.8.1.orig/arch/i386/kernel/apic.c 2004-08-14 14:56:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/apic.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1089,6 +1089,7 @@ inline void smp_local_timer_interrupt(st - void smp_apic_timer_interrupt(struct pt_regs regs) - { - int cpu = smp_processor_id(); -+ struct ve_struct *envid; - - /* - * the NMI deadlock-detector uses this. -@@ -1105,9 +1106,11 @@ void smp_apic_timer_interrupt(struct pt_ - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ -+ envid = set_exec_env(get_ve0()); - irq_enter(); - smp_local_timer_interrupt(®s); - irq_exit(); -+ (void)set_exec_env(envid); - } - - /* -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/asm-offsets.c ---- linux-2.6.8.1.orig/arch/i386/kernel/asm-offsets.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/asm-offsets.c 2005-09-09 14:39:25.000000000 +0400 -@@ -61,5 +61,19 @@ void foo(void) - DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - - sizeof(struct tss_struct)); - -+ DEFINE(TI_task, offsetof (struct thread_info, task)); -+ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); -+ DEFINE(TI_flags, offsetof (struct thread_info, flags)); -+ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); -+ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); -+ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); -+ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); -+ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); -+ -+ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, -+ __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); -+ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); - DEFINE(PAGE_SIZE_asm, PAGE_SIZE); -+ DEFINE(task_thread_db7, -+ offsetof (struct task_struct, thread.debugreg[7])); - } -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/common.c ---- linux-2.6.8.1.orig/arch/i386/kernel/cpu/common.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/common.c 2005-09-09 14:39:25.000000000 +0400 -@@ -554,12 +554,16 @@ void __init cpu_init (void) - set_tss_desc(cpu,t); - cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; - load_TR_desc(); -- load_LDT(&init_mm.context); -+ if (cpu) -+ load_LDT(&init_mm.context); - - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); - cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; - -+ if (cpu) -+ trap_init_virtual_GDT(); -+ - /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); - -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/intel.c ---- linux-2.6.8.1.orig/arch/i386/kernel/cpu/intel.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/intel.c 2005-09-09 14:39:25.000000000 +0400 -@@ -10,6 +10,7 @@ - #include <asm/processor.h> - #include <asm/msr.h> - #include <asm/uaccess.h> -+#include <asm/desc.h> - - #include "cpu.h" - -@@ -19,8 +20,6 @@ - #include <mach_apic.h> - #endif - --extern int trap_init_f00f_bug(void); -- - #ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Alignment at which movsl is preferred for bulk memory copies. -@@ -147,7 +146,7 @@ static void __init init_intel(struct cpu - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { -- trap_init_f00f_bug(); -+ trap_init_virtual_IDT(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/mtrr/if.c ---- linux-2.6.8.1.orig/arch/i386/kernel/cpu/mtrr/if.c 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/mtrr/if.c 2005-09-09 14:39:25.000000000 +0400 -@@ -358,7 +358,7 @@ static int __init mtrr_if_init(void) - return -ENODEV; - - proc_root_mtrr = -- create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); -+ create_proc_entry("mtrr", S_IWUSR | S_IRUGO, NULL); - if (proc_root_mtrr) { - proc_root_mtrr->owner = THIS_MODULE; - proc_root_mtrr->proc_fops = &mtrr_fops; -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/proc.c ---- linux-2.6.8.1.orig/arch/i386/kernel/cpu/proc.c 2004-08-14 14:56:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/cpu/proc.c 2005-09-09 14:39:26.000000000 +0400 -@@ -3,6 +3,8 @@ - #include <linux/string.h> - #include <asm/semaphore.h> - #include <linux/seq_file.h> -+#include <linux/vsched.h> -+#include <linux/fairsched.h> - - /* - * Get CPU information for use by the procfs. -@@ -58,11 +60,17 @@ static int show_cpuinfo(struct seq_file - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; -+ unsigned long vcpu_khz; - - #ifdef CONFIG_SMP -- if (!cpu_online(n)) -+ if (!vcpu_online(n)) - return 0; - #endif -+#ifdef CONFIG_VE -+ vcpu_khz = ve_scale_khz(cpu_khz); -+#else -+ vcpu_khz = cpu_khz; -+#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" -@@ -81,7 +89,7 @@ static int show_cpuinfo(struct seq_file - - if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", -- cpu_khz / 1000, (cpu_khz % 1000)); -+ vcpu_khz / 1000, (vcpu_khz % 1000)); - } - - /* Cache size */ -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/doublefault.c ---- linux-2.6.8.1.orig/arch/i386/kernel/doublefault.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/doublefault.c 2005-09-09 14:39:25.000000000 +0400 -@@ -8,12 +8,13 @@ - #include <asm/pgtable.h> - #include <asm/processor.h> - #include <asm/desc.h> -+#include <asm/fixmap.h> - - #define DOUBLEFAULT_STACKSIZE (1024) - static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; - #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - --#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) -+#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) - - static void doublefault_fn(void) - { -@@ -39,8 +40,8 @@ static void doublefault_fn(void) - - printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->eax, t->ebx, t->ecx, t->edx); -- printk("esi = %08lx, edi = %08lx\n", -- t->esi, t->edi); -+ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", -+ t->esi, t->edi, t->ebp); - } - } - -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry.S linux-2.6.8.1-ve022stab034/arch/i386/kernel/entry.S ---- linux-2.6.8.1.orig/arch/i386/kernel/entry.S 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/entry.S 2005-09-09 14:39:25.000000000 +0400 -@@ -43,8 +43,10 @@ - #include <linux/config.h> - #include <linux/linkage.h> - #include <asm/thread_info.h> -+#include <asm/asm_offsets.h> - #include <asm/errno.h> - #include <asm/segment.h> -+#include <asm/page.h> - #include <asm/smp.h> - #include <asm/page.h> - #include "irq_vectors.h" -@@ -81,7 +83,102 @@ VM_MASK = 0x00020000 - #define resume_kernel restore_all - #endif - --#define SAVE_ALL \ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+ -+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -+/* -+ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, -+ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is -+ * left stale, so we must check whether to repeat the real stack calculation. -+ */ -+#define repeat_if_esp_changed \ -+ xorl %esp, %ebp; \ -+ testl $-THREAD_SIZE, %ebp; \ -+ jnz 0b -+#else -+#define repeat_if_esp_changed -+#endif -+ -+/* clobbers ebx, edx and ebp */ -+ -+#define __SWITCH_KERNELSPACE \ -+ cmpl $0xff000000, %esp; \ -+ jb 1f; \ -+ \ -+ /* \ -+ * switch pagetables and load the real stack, \ -+ * keep the stack offset: \ -+ */ \ -+ \ -+ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ -+ \ -+ /* GET_THREAD_INFO(%ebp) intermixed */ \ -+0: \ -+ movl %esp, %ebp; \ -+ movl %esp, %ebx; \ -+ andl $(-THREAD_SIZE), %ebp; \ -+ andl $(THREAD_SIZE-1), %ebx; \ -+ orl TI_real_stack(%ebp), %ebx; \ -+ repeat_if_esp_changed; \ -+ \ -+ movl %edx, %cr3; \ -+ movl %ebx, %esp; \ -+1: -+ -+#endif -+ -+ -+#define __SWITCH_USERSPACE \ -+ /* interrupted any of the user return paths? */ \ -+ \ -+ movl EIP(%esp), %eax; \ -+ \ -+ cmpl $int80_ret_start_marker, %eax; \ -+ jb 33f; /* nope - continue with sysexit check */\ -+ cmpl $int80_ret_end_marker, %eax; \ -+ jb 22f; /* yes - switch to virtual stack */ \ -+33: \ -+ cmpl $sysexit_ret_start_marker, %eax; \ -+ jb 44f; /* nope - continue with user check */ \ -+ cmpl $sysexit_ret_end_marker, %eax; \ -+ jb 22f; /* yes - switch to virtual stack */ \ -+ /* return to userspace? */ \ -+44: \ -+ movl EFLAGS(%esp),%ecx; \ -+ movb CS(%esp),%cl; \ -+ testl $(VM_MASK | 3),%ecx; \ -+ jz 2f; \ -+22: \ -+ /* \ -+ * switch to the virtual stack, then switch to \ -+ * the userspace pagetables. \ -+ */ \ -+ \ -+ GET_THREAD_INFO(%ebp); \ -+ movl TI_virtual_stack(%ebp), %edx; \ -+ movl TI_user_pgd(%ebp), %ecx; \ -+ \ -+ movl %esp, %ebx; \ -+ andl $(THREAD_SIZE-1), %ebx; \ -+ orl %ebx, %edx; \ -+int80_ret_start_marker: \ -+ movl %edx, %esp; \ -+ movl %ecx, %cr3; \ -+ \ -+ __RESTORE_ALL_USER; \ -+int80_ret_end_marker: \ -+2: -+ -+#else /* !CONFIG_X86_HIGH_ENTRY */ -+ -+#define __SWITCH_KERNELSPACE -+#define __SWITCH_USERSPACE -+ -+#endif -+ -+#define __SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ -@@ -96,7 +193,7 @@ VM_MASK = 0x00020000 - movl %edx, %ds; \ - movl %edx, %es; - --#define RESTORE_INT_REGS \ -+#define __RESTORE_INT_REGS \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ -@@ -105,29 +202,44 @@ VM_MASK = 0x00020000 - popl %ebp; \ - popl %eax - --#define RESTORE_REGS \ -- RESTORE_INT_REGS; \ --1: popl %ds; \ --2: popl %es; \ --.section .fixup,"ax"; \ --3: movl $0,(%esp); \ -- jmp 1b; \ --4: movl $0,(%esp); \ -- jmp 2b; \ --.previous; \ -+#define __RESTORE_REGS \ -+ __RESTORE_INT_REGS; \ -+ popl %ds; \ -+ popl %es; -+ -+#define __RESTORE_REGS_USER \ -+ __RESTORE_INT_REGS; \ -+111: popl %ds; \ -+222: popl %es; \ -+ jmp 666f; \ -+444: movl $0,(%esp); \ -+ jmp 111b; \ -+555: movl $0,(%esp); \ -+ jmp 222b; \ -+666: \ - .section __ex_table,"a";\ - .align 4; \ -- .long 1b,3b; \ -- .long 2b,4b; \ -+ .long 111b,444b;\ -+ .long 222b,555b;\ - .previous - -+#define __RESTORE_ALL_USER \ -+ __RESTORE_REGS_USER \ -+ __RESTORE_IRET -+ -+#ifdef CONFIG_X86_HIGH_ENTRY -+#define __RESTORE_ALL \ -+ __RESTORE_REGS \ -+ __RESTORE_IRET -+#else /* !CONFIG_X86_HIGH_ENTRY */ -+#define __RESTORE_ALL __RESTORE_ALL_USER -+#endif - --#define RESTORE_ALL \ -- RESTORE_REGS \ -+#define __RESTORE_IRET \ - addl $4, %esp; \ --1: iret; \ -+333: iret; \ - .section .fixup,"ax"; \ --2: sti; \ -+666: sti; \ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ -@@ -136,10 +248,18 @@ VM_MASK = 0x00020000 - .previous; \ - .section __ex_table,"a";\ - .align 4; \ -- .long 1b,2b; \ -+ .long 333b,666b;\ - .previous - -+#define SAVE_ALL \ -+ __SAVE_ALL; \ -+ __SWITCH_KERNELSPACE; -+ -+#define RESTORE_ALL \ -+ __SWITCH_USERSPACE; \ -+ __RESTORE_ALL; - -+.section .entry.text,"ax" - - ENTRY(lcall7) - pushfl # We get a different stack layout with call -@@ -240,17 +360,9 @@ sysenter_past_esp: - pushl $(__USER_CS) - pushl $SYSENTER_RETURN - --/* -- * Load the potential sixth argument from user stack. -- * Careful about security. -- */ -- cmpl $__PAGE_OFFSET-3,%ebp -- jae syscall_fault --1: movl (%ebp),%ebp --.section __ex_table,"a" -- .align 4 -- .long 1b,syscall_fault --.previous -+ /* -+ * No six-argument syscall is ever used with sysenter. -+ */ - - pushl %eax - SAVE_ALL -@@ -266,12 +378,34 @@ sysenter_past_esp: - movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx - jne syscall_exit_work -+ -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+ -+ GET_THREAD_INFO(%ebp) -+ movl TI_virtual_stack(%ebp), %edx -+ movl TI_user_pgd(%ebp), %ecx -+ movl %esp, %ebx -+ andl $(THREAD_SIZE-1), %ebx -+ orl %ebx, %edx -+sysexit_ret_start_marker: -+ movl %edx, %esp -+ movl %ecx, %cr3 -+ /* -+ * only ebx is not restored by the userspace sysenter vsyscall -+ * code, it assumes it to be callee-saved. -+ */ -+ movl EBX(%esp), %ebx -+#endif -+ - /* if something modifies registers it must also disable sysexit */ - movl EIP(%esp), %edx - movl OLDESP(%esp), %ecx - sti - sysexit -- -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+sysexit_ret_end_marker: -+ nop -+#endif - - # system call handler stub - ENTRY(system_call) -@@ -321,6 +455,22 @@ work_notifysig: # deal with pending s - # vm86-space - xorl %edx, %edx - call do_notify_resume -+ -+#if CONFIG_X86_HIGH_ENTRY -+ /* -+ * Reload db7 if necessary: -+ */ -+ movl TI_flags(%ebp), %ecx -+ testb $_TIF_DB7, %cl -+ jnz work_db7 -+ -+ jmp restore_all -+ -+work_db7: -+ movl TI_task(%ebp), %edx; -+ movl task_thread_db7(%edx), %edx; -+ movl %edx, %db7; -+#endif - jmp restore_all - - ALIGN -@@ -358,14 +508,6 @@ syscall_exit_work: - jmp resume_userspace - - ALIGN --syscall_fault: -- pushl %eax # save orig_eax -- SAVE_ALL -- GET_THREAD_INFO(%ebp) -- movl $-EFAULT,EAX(%esp) -- jmp resume_userspace -- -- ALIGN - syscall_badsys: - movl $-ENOSYS,EAX(%esp) - jmp resume_userspace -@@ -376,7 +518,7 @@ syscall_badsys: - */ - .data - ENTRY(interrupt) --.text -+.previous - - vector=0 - ENTRY(irq_entries_start) -@@ -386,7 +528,7 @@ ENTRY(irq_entries_start) - jmp common_interrupt - .data - .long 1b --.text -+.previous - vector=vector+1 - .endr - -@@ -427,12 +569,17 @@ error_code: - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) -- movl %esp, %edx - pushl %esi # push the error code -- pushl %edx # push the pt_regs pointer - movl $(__USER_DS), %edx - movl %edx, %ds - movl %edx, %es -+ -+/* clobbers edx, ebx and ebp */ -+ __SWITCH_KERNELSPACE -+ -+ leal 4(%esp), %edx # prepare pt_regs -+ pushl %edx # push pt_regs -+ - call *%edi - addl $8, %esp - jmp ret_from_exception -@@ -523,7 +670,7 @@ nmi_stack_correct: - pushl %edx - call do_nmi - addl $8, %esp -- RESTORE_ALL -+ jmp restore_all - - nmi_stack_fixup: - FIX_STACK(12,nmi_stack_correct, 1) -@@ -600,6 +747,8 @@ ENTRY(spurious_interrupt_bug) - pushl $do_spurious_interrupt_bug - jmp error_code - -+.previous -+ - .data - ENTRY(sys_call_table) - .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ -@@ -887,4 +1036,22 @@ ENTRY(sys_call_table) - .long sys_mq_getsetattr - .long sys_ni_syscall /* reserved for kexec */ - -+ .rept 500-(.-sys_call_table)/4 -+ .long sys_ni_syscall -+ .endr -+ .long sys_fairsched_mknod /* 500 */ -+ .long sys_fairsched_rmnod -+ .long sys_fairsched_chwt -+ .long sys_fairsched_mvpr -+ .long sys_fairsched_rate -+ -+ .rept 510-(.-sys_call_table)/4 -+ .long sys_ni_syscall -+ .endr -+ -+ .long sys_getluid /* 510 */ -+ .long sys_setluid -+ .long sys_setublimit -+ .long sys_ubstat -+ - syscall_table_size=(.-sys_call_table) -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/entry_trampoline.c ---- linux-2.6.8.1.orig/arch/i386/kernel/entry_trampoline.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/entry_trampoline.c 2005-09-09 14:39:25.000000000 +0400 -@@ -0,0 +1,75 @@ -+/* -+ * linux/arch/i386/kernel/entry_trampoline.c -+ * -+ * (C) Copyright 2003 Ingo Molnar -+ * -+ * This file contains the needed support code for 4GB userspace -+ */ -+ -+#include <linux/init.h> -+#include <linux/smp.h> -+#include <linux/mm.h> -+#include <linux/sched.h> -+#include <linux/kernel.h> -+#include <linux/string.h> -+#include <linux/highmem.h> -+#include <asm/desc.h> -+#include <asm/atomic_kmap.h> -+ -+extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; -+ -+void __init init_entry_mappings(void) -+{ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ -+ void *tramp; -+ int p; -+ -+ /* -+ * We need a high IDT and GDT for the 4G/4G split: -+ */ -+ trap_init_virtual_IDT(); -+ -+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL_EXEC); -+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL_EXEC); -+ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); -+ -+ printk("mapped 4G/4G trampoline to %p.\n", tramp); -+ BUG_ON((void *)&__start___entry_text != tramp); -+ /* -+ * Virtual kernel stack: -+ */ -+ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK_TOP) & (THREAD_SIZE-1)); -+ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); -+ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); -+ -+ /* -+ * set up the initial thread's virtual stack related -+ * fields: -+ */ -+ for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++) -+ current->thread_info->stack_page[p] = virt_to_page((char *)current->thread_info + (p*PAGE_SIZE)); -+ -+ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP); -+ -+ for (p = 0; p < ARRAY_SIZE(current->thread_info->stack_page); p++) { -+ __kunmap_atomic_type(KM_VSTACK_TOP-p); -+ __kmap_atomic(current->thread_info->stack_page[p], KM_VSTACK_TOP-p); -+ } -+#endif -+ current->thread_info->real_stack = (void *)current->thread_info; -+ current->thread_info->user_pgd = NULL; -+ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; -+} -+ -+ -+ -+void __init entry_trampoline_setup(void) -+{ -+ /* -+ * old IRQ entries set up by the boot code will still hang -+ * around - they are a sign of hw trouble anyway, now they'll -+ * produce a double fault message. -+ */ -+ trap_init_virtual_GDT(); -+} -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/i386_ksyms.c ---- linux-2.6.8.1.orig/arch/i386/kernel/i386_ksyms.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/i386_ksyms.c 2005-09-09 14:39:25.000000000 +0400 -@@ -92,7 +92,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter - EXPORT_SYMBOL_NOVERS(__down_failed_trylock); - EXPORT_SYMBOL_NOVERS(__up_wakeup); - /* Networking helper routines. */ --EXPORT_SYMBOL(csum_partial_copy_generic); - /* Delay loops */ - EXPORT_SYMBOL(__ndelay); - EXPORT_SYMBOL(__udelay); -@@ -106,13 +105,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); - EXPORT_SYMBOL(strpbrk); - EXPORT_SYMBOL(strstr); - -+#if !defined(CONFIG_X86_UACCESS_INDIRECT) - EXPORT_SYMBOL(strncpy_from_user); --EXPORT_SYMBOL(__strncpy_from_user); -+EXPORT_SYMBOL(__direct_strncpy_from_user); - EXPORT_SYMBOL(clear_user); - EXPORT_SYMBOL(__clear_user); - EXPORT_SYMBOL(__copy_from_user_ll); - EXPORT_SYMBOL(__copy_to_user_ll); - EXPORT_SYMBOL(strnlen_user); -+#else /* CONFIG_X86_UACCESS_INDIRECT */ -+EXPORT_SYMBOL(direct_csum_partial_copy_generic); -+#endif - - EXPORT_SYMBOL(dma_alloc_coherent); - EXPORT_SYMBOL(dma_free_coherent); -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/i387.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/i387.c ---- linux-2.6.8.1.orig/arch/i386/kernel/i387.c 2004-08-14 14:56:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/i387.c 2005-09-09 14:39:25.000000000 +0400 -@@ -227,6 +227,7 @@ void set_fpu_twd( struct task_struct *ts - static int convert_fxsr_to_user( struct _fpstate __user *buf, - struct i387_fxsave_struct *fxsave ) - { -+ struct _fpreg tmp[8]; /* 80 bytes scratch area */ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; -@@ -243,23 +244,25 @@ static int convert_fxsr_to_user( struct - if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) - return 1; - -- to = &buf->_st[0]; -+ to = tmp; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - -- if (__put_user(*f, t) || -- __put_user(*(f + 1), t + 1) || -- __put_user(from->exponent, &to->exponent)) -- return 1; -+ *t = *f; -+ *(t + 1) = *(f+1); -+ to->exponent = from->exponent; - } -+ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) -+ return 1; - return 0; - } - - static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, - struct _fpstate __user *buf ) - { -+ struct _fpreg tmp[8]; /* 80 bytes scratch area */ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; -@@ -267,6 +270,8 @@ static int convert_fxsr_from_user( struc - - if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) - return 1; -+ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) -+ return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); -@@ -278,15 +283,14 @@ static int convert_fxsr_from_user( struc - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; -- from = &buf->_st[0]; -+ from = tmp; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - -- if (__get_user(*t, f) || -- __get_user(*(t + 1), f + 1) || -- __get_user(to->exponent, &from->exponent)) -- return 1; -+ *t = *f; -+ *(t + 1) = *(f + 1); -+ to->exponent = from->exponent; - } - return 0; - } -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/init_task.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/init_task.c ---- linux-2.6.8.1.orig/arch/i386/kernel/init_task.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/init_task.c 2005-09-09 14:39:25.000000000 +0400 -@@ -27,7 +27,7 @@ EXPORT_SYMBOL(init_mm); - */ - union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -- { INIT_THREAD_INFO(init_task) }; -+ { INIT_THREAD_INFO(init_task, init_thread_union) }; - - /* - * Initial task structure. -@@ -45,5 +45,5 @@ EXPORT_SYMBOL(init_task); - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ --struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; -+struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; - -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/irq.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/irq.c ---- linux-2.6.8.1.orig/arch/i386/kernel/irq.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/irq.c 2005-09-09 14:39:25.000000000 +0400 -@@ -45,6 +45,9 @@ - #include <asm/desc.h> - #include <asm/irq.h> - -+#include <ub/beancounter.h> -+#include <ub/ub_task.h> -+ - /* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used -@@ -221,15 +224,19 @@ asmlinkage int handle_IRQ_event(unsigned - { - int status = 1; /* Force the "do bottom halves" bit */ - int retval = 0; -+ struct user_beancounter *ub; - - if (!(action->flags & SA_INTERRUPT)) - local_irq_enable(); - -+ ub = set_exec_ub(get_ub0()); - do { - status |= action->flags; - retval |= action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); -+ (void)set_exec_ub(ub); -+ - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - local_irq_disable(); -@@ -429,7 +436,9 @@ asmlinkage unsigned int do_IRQ(struct pt - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; -+ struct ve_struct *envid; - -+ envid = set_exec_env(get_ve0()); - irq_enter(); - - #ifdef CONFIG_DEBUG_STACKOVERFLOW -@@ -513,6 +522,8 @@ asmlinkage unsigned int do_IRQ(struct pt - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; -+ irqctx->tinfo.real_stack = curctx->tinfo.real_stack; -+ irqctx->tinfo.virtual_stack = curctx->tinfo.virtual_stack; - irqctx->tinfo.previous_esp = current_stack_pointer(); - - *--isp = (u32) action; -@@ -568,6 +579,7 @@ out: - spin_unlock(&desc->lock); - - irq_exit(); -+ (void)set_exec_env(envid); - - return 1; - } -@@ -1173,6 +1185,8 @@ asmlinkage void do_softirq(void) - curctx = current_thread_info(); - irqctx = softirq_ctx[smp_processor_id()]; - irqctx->tinfo.task = curctx->task; -+ irqctx->tinfo.real_stack = curctx->real_stack; -+ irqctx->tinfo.virtual_stack = curctx->virtual_stack; - irqctx->tinfo.previous_esp = current_stack_pointer(); - - /* build the stack frame on the softirq stack */ -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ldt.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/ldt.c ---- linux-2.6.8.1.orig/arch/i386/kernel/ldt.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/ldt.c 2005-09-09 14:39:25.000000000 +0400 -@@ -2,7 +2,7 @@ - * linux/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds -- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> -+ * Copyright (C) 1999, 2003 Ingo Molnar <mingo@redhat.com> - */ - - #include <linux/errno.h> -@@ -18,6 +18,8 @@ - #include <asm/system.h> - #include <asm/ldt.h> - #include <asm/desc.h> -+#include <linux/highmem.h> -+#include <asm/atomic_kmap.h> - - #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ - static void flush_ldt(void *null) -@@ -29,34 +31,31 @@ static void flush_ldt(void *null) - - static int alloc_ldt(mm_context_t *pc, int mincount, int reload) - { -- void *oldldt; -- void *newldt; -- int oldsize; -+ int oldsize, newsize, i; - - if (mincount <= pc->size) - return 0; -+ /* -+ * LDT got larger - reallocate if necessary. -+ */ - oldsize = pc->size; - mincount = (mincount+511)&(~511); -- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) -- newldt = vmalloc(mincount*LDT_ENTRY_SIZE); -- else -- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); -- -- if (!newldt) -- return -ENOMEM; -- -- if (oldsize) -- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); -- oldldt = pc->ldt; -- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); -- pc->ldt = newldt; -- wmb(); -+ newsize = mincount*LDT_ENTRY_SIZE; -+ for (i = 0; i < newsize; i += PAGE_SIZE) { -+ int nr = i/PAGE_SIZE; -+ BUG_ON(i >= 64*1024); -+ if (!pc->ldt_pages[nr]) { -+ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER|__GFP_UBC); -+ if (!pc->ldt_pages[nr]) -+ return -ENOMEM; -+ clear_highpage(pc->ldt_pages[nr]); -+ } -+ } - pc->size = mincount; -- wmb(); -- - if (reload) { - #ifdef CONFIG_SMP - cpumask_t mask; -+ - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); -@@ -67,24 +66,32 @@ static int alloc_ldt(mm_context_t *pc, i - load_LDT(pc); - #endif - } -- if (oldsize) { -- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) -- vfree(oldldt); -- else -- kfree(oldldt); -- } - return 0; - } - - static inline int copy_ldt(mm_context_t *new, mm_context_t *old) - { -- int err = alloc_ldt(new, old->size, 0); -- if (err < 0) -+ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; -+ -+ err = alloc_ldt(new, size, 0); -+ if (err < 0) { -+ new->size = 0; - return err; -- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); -+ } -+ for (i = 0; i < nr_pages; i++) -+ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); - return 0; - } - -+static void free_ldt(mm_context_t *mc) -+{ -+ int i; -+ -+ for (i = 0; i < MAX_LDT_PAGES; i++) -+ if (mc->ldt_pages[i]) -+ __free_page(mc->ldt_pages[i]); -+} -+ - /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. -@@ -96,10 +103,13 @@ int init_new_context(struct task_struct - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; -+ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); -+ if (retval < 0) -+ free_ldt(&mm->context); - up(&old_mm->context.sem); - } - return retval; -@@ -107,23 +117,21 @@ int init_new_context(struct task_struct - - /* - * No need to lock the MM as we are the last user -+ * Do not touch the ldt register, we are already -+ * in the next thread. - */ - void destroy_context(struct mm_struct *mm) - { -- if (mm->context.size) { -- if (mm == current->active_mm) -- clear_LDT(); -- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) -- vfree(mm->context.ldt); -- else -- kfree(mm->context.ldt); -- mm->context.size = 0; -- } -+ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; -+ -+ for (i = 0; i < nr_pages; i++) -+ __free_page(mm->context.ldt_pages[i]); -+ mm->context.size = 0; - } - - static int read_ldt(void __user * ptr, unsigned long bytecount) - { -- int err; -+ int err, i; - unsigned long size; - struct mm_struct * mm = current->mm; - -@@ -138,8 +146,25 @@ static int read_ldt(void __user * ptr, u - size = bytecount; - - err = 0; -- if (copy_to_user(ptr, mm->context.ldt, size)) -- err = -EFAULT; -+ /* -+ * This is necessary just in case we got here straight from a -+ * context-switch where the ptes were set but no tlb flush -+ * was done yet. We rather avoid doing a TLB flush in the -+ * context-switch path and do it here instead. -+ */ -+ __flush_tlb_global(); -+ -+ for (i = 0; i < size; i += PAGE_SIZE) { -+ int nr = i / PAGE_SIZE, bytes; -+ char *kaddr = kmap(mm->context.ldt_pages[nr]); -+ -+ bytes = size - i; -+ if (bytes > PAGE_SIZE) -+ bytes = PAGE_SIZE; -+ if (copy_to_user(ptr + i, kaddr, bytes)) -+ err = -EFAULT; -+ kunmap(mm->context.ldt_pages[nr]); -+ } - up(&mm->context.sem); - if (err < 0) - return err; -@@ -158,7 +183,7 @@ static int read_default_ldt(void __user - - err = 0; - address = &default_ldt[0]; -- size = 5*sizeof(struct desc_struct); -+ size = 5*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - -@@ -200,7 +225,15 @@ static int write_ldt(void __user * ptr, - goto out_unlock; - } - -- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); -+ /* -+ * No rescheduling allowed from this point to the install. -+ * -+ * We do a TLB flush for the same reason as in the read_ldt() path. -+ */ -+ preempt_disable(); -+ __flush_tlb_global(); -+ lp = (__u32 *) ((ldt_info.entry_number << 3) + -+ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { -@@ -221,6 +254,7 @@ install: - *lp = entry_1; - *(lp+1) = entry_2; - error = 0; -+ preempt_enable(); - - out_unlock: - up(&mm->context.sem); -@@ -248,3 +282,26 @@ asmlinkage int sys_modify_ldt(int func, - } - return ret; - } -+ -+/* -+ * load one particular LDT into the current CPU -+ */ -+void load_LDT_nolock(mm_context_t *pc, int cpu) -+{ -+ struct page **pages = pc->ldt_pages; -+ int count = pc->size; -+ int nr_pages, i; -+ -+ if (likely(!count)) { -+ pages = &default_ldt_page; -+ count = 5; -+ } -+ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; -+ -+ for (i = 0; i < nr_pages; i++) { -+ __kunmap_atomic_type(KM_LDT_PAGE0 - i); -+ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); -+ } -+ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); -+ load_LDT_desc(); -+} -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/Makefile linux-2.6.8.1-ve022stab034/arch/i386/kernel/Makefile ---- linux-2.6.8.1.orig/arch/i386/kernel/Makefile 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/Makefile 2005-09-09 14:39:25.000000000 +0400 -@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld - obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ - ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ - pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ -- doublefault.o -+ doublefault.o entry_trampoline.o - - obj-y += cpu/ - obj-y += timers/ -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/mpparse.c ---- linux-2.6.8.1.orig/arch/i386/kernel/mpparse.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/mpparse.c 2005-09-09 14:39:25.000000000 +0400 -@@ -690,7 +690,7 @@ void __init get_smp_config (void) - * Read the physical hardware table. Anything here will - * override the defaults. - */ -- if (!smp_read_mpc((void *)mpf->mpf_physptr)) { -+ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { - smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/nmi.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/nmi.c ---- linux-2.6.8.1.orig/arch/i386/kernel/nmi.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/nmi.c 2005-09-09 14:39:37.000000000 +0400 -@@ -25,13 +25,19 @@ - #include <linux/module.h> - #include <linux/nmi.h> - #include <linux/sysdev.h> -+#include <linux/console.h> - - #include <asm/smp.h> - #include <asm/mtrr.h> - #include <asm/mpspec.h> - #include <asm/nmi.h> - --unsigned int nmi_watchdog = NMI_NONE; -+#ifdef CONFIG_NMI_WATCHDOG -+#define NMI_DEFAULT NMI_IO_APIC -+#else -+#define NMI_DEFAULT NMI_NONE -+#endif -+unsigned int nmi_watchdog = NMI_DEFAULT; - static unsigned int nmi_hz = HZ; - static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ - static unsigned int nmi_p4_cccr_val; -@@ -459,6 +465,21 @@ void touch_nmi_watchdog (void) - alert_counter[i] = 0; - } - -+static spinlock_t show_regs_lock = SPIN_LOCK_UNLOCKED; -+ -+void smp_show_regs(struct pt_regs *regs, void *info) -+{ -+ if (regs == NULL) -+ return; -+ -+ bust_spinlocks(1); -+ spin_lock(&show_regs_lock); -+ printk("----------- IPI show regs -----------"); -+ show_regs(regs); -+ spin_unlock(&show_regs_lock); -+ bust_spinlocks(0); -+} -+ - void nmi_watchdog_tick (struct pt_regs * regs) - { - -@@ -486,10 +507,10 @@ void nmi_watchdog_tick (struct pt_regs * - bust_spinlocks(1); - printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); - show_registers(regs); -- printk("console shuts up ...\n"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); -+ smp_nmi_call_function(smp_show_regs, NULL, 1); - do_exit(SIGSEGV); - } - } else { -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/process.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/process.c ---- linux-2.6.8.1.orig/arch/i386/kernel/process.c 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/process.c 2005-09-09 14:39:25.000000000 +0400 -@@ -36,6 +36,7 @@ - #include <linux/module.h> - #include <linux/kallsyms.h> - #include <linux/ptrace.h> -+#include <linux/faudit.h> - - #include <asm/uaccess.h> - #include <asm/pgtable.h> -@@ -46,6 +47,7 @@ - #include <asm/i387.h> - #include <asm/irq.h> - #include <asm/desc.h> -+#include <asm/atomic_kmap.h> - #ifdef CONFIG_MATH_EMULATION - #include <asm/math_emu.h> - #endif -@@ -219,10 +221,12 @@ __setup("idle=", idle_setup); - void show_regs(struct pt_regs * regs) - { - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; -+ extern int die_counter; - - printk("\n"); -- printk("Pid: %d, comm: %20s\n", current->pid, current->comm); -- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); -+ printk("Pid: %d, comm: %20s, oopses: %d\n", current->pid, current->comm, die_counter); -+ printk("EIP: %04x:[<%08lx>] CPU: %d, VCPU: %d:%d\n",0xffff & regs->xcs,regs->eip, smp_processor_id(), -+ task_vsched_id(current), task_cpu(current)); - print_symbol("EIP is at %s\n", regs->eip); - - if (regs->xcs & 3) -@@ -272,6 +276,13 @@ int kernel_thread(int (*fn)(void *), voi - { - struct pt_regs regs; - -+ /* Don't allow kernel_thread() inside VE */ -+ if (!ve_is_super(get_exec_env())) { -+ printk("kernel_thread call inside VE\n"); -+ dump_stack(); -+ return -EPERM; -+ } -+ - memset(®s, 0, sizeof(regs)); - - regs.ebx = (unsigned long) fn; -@@ -311,6 +322,9 @@ void flush_thread(void) - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); -+#ifdef CONFIG_X86_HIGH_ENTRY -+ clear_thread_flag(TIF_DB7); -+#endif - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - /* - * Forget coprocessor state.. -@@ -324,9 +338,8 @@ void release_thread(struct task_struct * - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { -- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", -+ printk("WARNING: dead process %8s still has LDT? <%d>\n", - dead_task->comm, -- dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } -@@ -350,7 +363,7 @@ int copy_thread(int nr, unsigned long cl - { - struct pt_regs * childregs; - struct task_struct *tsk; -- int err; -+ int err, i; - - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - *childregs = *regs; -@@ -361,7 +374,18 @@ int copy_thread(int nr, unsigned long cl - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - -+ /* -+ * get the two stack pages, for the virtual stack. -+ * -+ * IMPORTANT: this code relies on the fact that the task -+ * structure is an THREAD_SIZE aligned piece of physical memory. -+ */ -+ for (i = 0; i < ARRAY_SIZE(p->thread_info->stack_page); i++) -+ p->thread_info->stack_page[i] = -+ virt_to_page((unsigned long)p->thread_info + (i*PAGE_SIZE)); -+ - p->thread.eip = (unsigned long) ret_from_fork; -+ p->thread_info->real_stack = p->thread_info; - - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); -@@ -513,10 +537,42 @@ struct task_struct fastcall * __switch_t - - __unlazy_fpu(prev_p); - -+#ifdef CONFIG_X86_HIGH_ENTRY -+{ -+ int i; -+ /* -+ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is -+ * needed because otherwise NMIs could interrupt the -+ * user-return code with a virtual stack and stale TLBs.) -+ */ -+ for (i = 0; i < ARRAY_SIZE(next_p->thread_info->stack_page); i++) { -+ __kunmap_atomic_type(KM_VSTACK_TOP-i); -+ __kmap_atomic(next_p->thread_info->stack_page[i], KM_VSTACK_TOP-i); -+ } -+ /* -+ * NOTE: here we rely on the task being the stack as well -+ */ -+ next_p->thread_info->virtual_stack = -+ (void *)__kmap_atomic_vaddr(KM_VSTACK_TOP); -+} -+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -+ /* -+ * If next was preempted on entry from userspace to kernel, -+ * and now it's on a different cpu, we need to adjust %esp. -+ * This assumes that entry.S does not copy %esp while on the -+ * virtual stack (with interrupts enabled): which is so, -+ * except within __SWITCH_KERNELSPACE itself. -+ */ -+ if (unlikely(next->esp >= TASK_SIZE)) { -+ next->esp &= THREAD_SIZE - 1; -+ next->esp |= (unsigned long) next_p->thread_info->virtual_stack; -+ } -+#endif -+#endif - /* - * Reload esp0, LDT and the page table pointer: - */ -- load_esp0(tss, next); -+ load_virtual_esp0(tss, next_p); - - /* - * Load the per-thread Thread-Local Storage descriptor. -@@ -578,6 +634,13 @@ struct task_struct fastcall * __switch_t - - asmlinkage int sys_fork(struct pt_regs regs) - { -+ struct faudit_regs_arg arg; -+ -+ arg.regs = ®s; -+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_FORK, &arg) -+ != NOTIFY_DONE) -+ return arg.err; -+ - return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); - } - -@@ -586,6 +649,12 @@ asmlinkage int sys_clone(struct pt_regs - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; -+ struct faudit_regs_arg arg; -+ -+ arg.regs = ®s; -+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_CLONE, &arg) -+ != NOTIFY_DONE) -+ return arg.err; - - clone_flags = regs.ebx; - newsp = regs.ecx; -@@ -608,6 +677,13 @@ asmlinkage int sys_clone(struct pt_regs - */ - asmlinkage int sys_vfork(struct pt_regs regs) - { -+ struct faudit_regs_arg arg; -+ -+ arg.regs = ®s; -+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_VFORK, &arg) -+ != NOTIFY_DONE) -+ return arg.err; -+ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); - } - -@@ -618,6 +694,13 @@ asmlinkage int sys_execve(struct pt_regs - { - int error; - char * filename; -+ struct faudit_regs_arg arg; -+ -+ arg.regs = ®s; -+ if (virtinfo_notifier_call(VITYPE_FAUDIT, VIRTINFO_FAUDIT_EXECVE, &arg) -+ != NOTIFY_DONE) -+ return arg.err; -+ - - filename = getname((char __user *) regs.ebx); - error = PTR_ERR(filename); -@@ -759,6 +842,8 @@ asmlinkage int sys_get_thread_area(struc - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - -+ memset(&info, 0, sizeof(info)); -+ - desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/i386/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/ptrace.c 2005-09-09 14:39:30.000000000 +0400 -@@ -253,7 +253,7 @@ asmlinkage int sys_ptrace(long request, - } - ret = -ESRCH; - read_lock(&tasklist_lock); -- child = find_task_by_pid(pid); -+ child = find_task_by_pid_ve(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); -@@ -388,7 +388,7 @@ asmlinkage int sys_ptrace(long request, - long tmp; - - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -@@ -541,8 +541,10 @@ void do_syscall_trace(struct pt_regs *re - return; - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ -+ set_pn_state(current, entryexit ? PN_STOP_LEAVE : PN_STOP_ENTRY); - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); -+ clear_pn_state(current); - - /* - * this isn't the same as continuing with a signal, but it will do -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/reboot.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/reboot.c ---- linux-2.6.8.1.orig/arch/i386/kernel/reboot.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/reboot.c 2005-09-09 14:39:25.000000000 +0400 -@@ -233,12 +233,11 @@ void machine_real_restart(unsigned char - CMOS_WRITE(0x00, 0x8f); - spin_unlock_irqrestore(&rtc_lock, flags); - -- /* Remap the kernel at virtual address zero, as well as offset zero -- from the kernel segment. This assumes the kernel segment starts at -- virtual address PAGE_OFFSET. */ -- -- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, -- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); -+ /* -+ * Remap the first 16 MB of RAM (which includes the kernel image) -+ * at virtual address zero: -+ */ -+ setup_identity_mappings(swapper_pg_dir, 0, LOW_MAPPINGS_SIZE); - - /* - * Use `swapper_pg_dir' as our page directory. -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/setup.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/setup.c ---- linux-2.6.8.1.orig/arch/i386/kernel/setup.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/setup.c 2005-09-09 14:39:25.000000000 +0400 -@@ -39,6 +39,7 @@ - #include <linux/efi.h> - #include <linux/init.h> - #include <linux/edd.h> -+#include <linux/mmzone.h> - #include <video/edid.h> - #include <asm/e820.h> - #include <asm/mpspec.h> -@@ -1073,7 +1074,21 @@ static unsigned long __init setup_memory - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; - initrd_end = initrd_start+INITRD_SIZE; - } -- else { -+ else if ((max_low_pfn << PAGE_SHIFT) < -+ PAGE_ALIGN(INITRD_START + INITRD_SIZE)) { -+ /* GRUB places initrd as high as possible, so when -+ VMALLOC_AREA is bigger than std Linux has, such -+ initrd is inaccessiable in normal zone (highmem) */ -+ -+ /* initrd should be totally in highmem, sorry */ -+ BUG_ON(INITRD_START < (max_low_pfn << PAGE_SHIFT)); -+ -+ initrd_start = (unsigned long) -+ alloc_bootmem_low(PAGE_ALIGN(INITRD_SIZE)); -+ initrd_copy = INITRD_SIZE; -+ initrd_end = INITRD_START + initrd_copy; -+ /* initrd is copied from highmem in initrd_move() */ -+ } else { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/signal.c ---- linux-2.6.8.1.orig/arch/i386/kernel/signal.c 2004-08-14 14:55:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/signal.c 2005-09-09 14:39:30.000000000 +0400 -@@ -42,6 +42,7 @@ sys_sigsuspend(int history0, int history - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; -+ set_sigsuspend_state(current, saveset); - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -@@ -50,8 +51,10 @@ sys_sigsuspend(int history0, int history - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); -- if (do_signal(regs, &saveset)) -+ if (do_signal(regs, &saveset)) { -+ clear_sigsuspend_state(current); - return -EINTR; -+ } - } - } - -@@ -70,6 +73,7 @@ sys_rt_sigsuspend(struct pt_regs regs) - - spin_lock_irq(¤t->sighand->siglock); - saveset = current->blocked; -+ set_sigsuspend_state(current, saveset); - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); -@@ -78,8 +82,10 @@ sys_rt_sigsuspend(struct pt_regs regs) - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); -- if (do_signal(®s, &saveset)) -+ if (do_signal(®s, &saveset)) { -+ clear_sigsuspend_state(current); - return -EINTR; -+ } - } - } - -@@ -132,28 +138,29 @@ sys_sigaltstack(unsigned long ebx) - */ - - static int --restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) -+restore_sigcontext(struct pt_regs *regs, -+ struct sigcontext __user *__sc, int *peax) - { -- unsigned int err = 0; -+ struct sigcontext scratch; /* 88 bytes of scratch area */ - - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; - --#define COPY(x) err |= __get_user(regs->x, &sc->x) -+ if (copy_from_user(&scratch, __sc, sizeof(scratch))) -+ return -EFAULT; -+ -+#define COPY(x) regs->x = scratch.x - - #define COPY_SEG(seg) \ -- { unsigned short tmp; \ -- err |= __get_user(tmp, &sc->seg); \ -+ { unsigned short tmp = scratch.seg; \ - regs->x##seg = tmp; } - - #define COPY_SEG_STRICT(seg) \ -- { unsigned short tmp; \ -- err |= __get_user(tmp, &sc->seg); \ -+ { unsigned short tmp = scratch.seg; \ - regs->x##seg = tmp|3; } - - #define GET_SEG(seg) \ -- { unsigned short tmp; \ -- err |= __get_user(tmp, &sc->seg); \ -+ { unsigned short tmp = scratch.seg; \ - loadsegment(seg,tmp); } - - #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ -@@ -176,27 +183,23 @@ restore_sigcontext(struct pt_regs *regs, - COPY_SEG_STRICT(ss); - - { -- unsigned int tmpflags; -- err |= __get_user(tmpflags, &sc->eflags); -+ unsigned int tmpflags = scratch.eflags; - regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_eax = -1; /* disable syscall checks */ - } - - { -- struct _fpstate __user * buf; -- err |= __get_user(buf, &sc->fpstate); -+ struct _fpstate * buf = scratch.fpstate; - if (buf) { - if (verify_area(VERIFY_READ, buf, sizeof(*buf))) -- goto badframe; -- err |= restore_i387(buf); -+ return -EFAULT; -+ if (restore_i387(buf)) -+ return -EFAULT; - } - } - -- err |= __get_user(*peax, &sc->eax); -- return err; -- --badframe: -- return 1; -+ *peax = scratch.eax; -+ return 0; - } - - asmlinkage int sys_sigreturn(unsigned long __unused) -@@ -265,46 +268,47 @@ badframe: - */ - - static int --setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, -+setup_sigcontext(struct sigcontext __user *__sc, struct _fpstate __user *fpstate, - struct pt_regs *regs, unsigned long mask) - { -- int tmp, err = 0; -+ struct sigcontext sc; /* 88 bytes of scratch area */ -+ int tmp; - - tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); -- err |= __put_user(tmp, (unsigned int __user *)&sc->gs); -+ *(unsigned int *)&sc.gs = tmp; - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); -- err |= __put_user(tmp, (unsigned int __user *)&sc->fs); -- -- err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); -- err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); -- err |= __put_user(regs->edi, &sc->edi); -- err |= __put_user(regs->esi, &sc->esi); -- err |= __put_user(regs->ebp, &sc->ebp); -- err |= __put_user(regs->esp, &sc->esp); -- err |= __put_user(regs->ebx, &sc->ebx); -- err |= __put_user(regs->edx, &sc->edx); -- err |= __put_user(regs->ecx, &sc->ecx); -- err |= __put_user(regs->eax, &sc->eax); -- err |= __put_user(current->thread.trap_no, &sc->trapno); -- err |= __put_user(current->thread.error_code, &sc->err); -- err |= __put_user(regs->eip, &sc->eip); -- err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs); -- err |= __put_user(regs->eflags, &sc->eflags); -- err |= __put_user(regs->esp, &sc->esp_at_signal); -- err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss); -+ *(unsigned int *)&sc.fs = tmp; -+ *(unsigned int *)&sc.es = regs->xes; -+ *(unsigned int *)&sc.ds = regs->xds; -+ sc.edi = regs->edi; -+ sc.esi = regs->esi; -+ sc.ebp = regs->ebp; -+ sc.esp = regs->esp; -+ sc.ebx = regs->ebx; -+ sc.edx = regs->edx; -+ sc.ecx = regs->ecx; -+ sc.eax = regs->eax; -+ sc.trapno = current->thread.trap_no; -+ sc.err = current->thread.error_code; -+ sc.eip = regs->eip; -+ *(unsigned int *)&sc.cs = regs->xcs; -+ sc.eflags = regs->eflags; -+ sc.esp_at_signal = regs->esp; -+ *(unsigned int *)&sc.ss = regs->xss; - - tmp = save_i387(fpstate); - if (tmp < 0) -- err = 1; -- else -- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); -+ return 1; -+ sc.fpstate = tmp ? fpstate : NULL; - - /* non-iBCS2 extensions.. */ -- err |= __put_user(mask, &sc->oldmask); -- err |= __put_user(current->thread.cr2, &sc->cr2); -+ sc.oldmask = mask; -+ sc.cr2 = current->thread.cr2; - -- return err; -+ if (copy_to_user(__sc, &sc, sizeof(sc))) -+ return 1; -+ return 0; - } - - /* -@@ -443,7 +447,7 @@ static void setup_rt_frame(int sig, stru - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); -- err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); -+ err |= __put_user(current->sas_ss_sp, (unsigned long *)&frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->esp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); -@@ -565,9 +569,10 @@ int fastcall do_signal(struct pt_regs *r - if ((regs->xcs & 3) != 3) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/smpboot.c ---- linux-2.6.8.1.orig/arch/i386/kernel/smpboot.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/smpboot.c 2005-09-09 14:39:25.000000000 +0400 -@@ -309,6 +309,8 @@ static void __init synchronize_tsc_bp (v - if (!buggy) - printk("passed.\n"); - ; -+ /* TSC reset. kill whatever might rely on old values */ -+ VE_TASK_INFO(current)->wakeup_stamp = 0; - } - - static void __init synchronize_tsc_ap (void) -@@ -334,6 +336,8 @@ static void __init synchronize_tsc_ap (v - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); - } -+ /* TSC reset. kill whatever might rely on old values */ -+ VE_TASK_INFO(current)->wakeup_stamp = 0; - } - #undef NR_LOOPS - -@@ -499,7 +503,7 @@ static struct task_struct * __init fork_ - * don't care about the eip and regs settings since - * we'll never reschedule the forked task. - */ -- return copy_process(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL, NULL); -+ return copy_process(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL, NULL, 0); - } - - #ifdef CONFIG_NUMA -@@ -810,6 +814,9 @@ static int __init do_boot_cpu(int apicid - - idle->thread.eip = (unsigned long) start_secondary; - -+ /* Cosmetic: sleep_time won't be changed afterwards for the idle -+ * thread; keep it 0 rather than -cycles. */ -+ VE_TASK_INFO(idle)->sleep_time = 0; - unhash_process(idle); - - /* start_eip had better be page-aligned! */ -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/smp.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/smp.c ---- linux-2.6.8.1.orig/arch/i386/kernel/smp.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/smp.c 2005-09-09 14:39:25.000000000 +0400 -@@ -22,6 +22,7 @@ - - #include <asm/mtrr.h> - #include <asm/tlbflush.h> -+#include <asm/nmi.h> - #include <mach_ipi.h> - #include <mach_apic.h> - -@@ -326,10 +327,12 @@ asmlinkage void smp_invalidate_interrupt - - if (flush_mm == cpu_tlbstate[cpu].active_mm) { - if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { -+#ifndef CONFIG_X86_SWITCH_PAGETABLES - if (flush_va == FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); -+#endif - } else - leave_mm(cpu); - } -@@ -395,21 +398,6 @@ static void flush_tlb_others(cpumask_t c - spin_unlock(&tlbstate_lock); - } - --void flush_tlb_current_task(void) --{ -- struct mm_struct *mm = current->mm; -- cpumask_t cpu_mask; -- -- preempt_disable(); -- cpu_mask = mm->cpu_vm_mask; -- cpu_clear(smp_processor_id(), cpu_mask); -- -- local_flush_tlb(); -- if (!cpus_empty(cpu_mask)) -- flush_tlb_others(cpu_mask, mm, FLUSH_ALL); -- preempt_enable(); --} -- - void flush_tlb_mm (struct mm_struct * mm) - { - cpumask_t cpu_mask; -@@ -441,7 +429,10 @@ void flush_tlb_page(struct vm_area_struc - - if (current->active_mm == mm) { - if(current->mm) -- __flush_tlb_one(va); -+#ifndef CONFIG_X86_SWITCH_PAGETABLES -+ __flush_tlb_one(va) -+#endif -+ ; - else - leave_mm(smp_processor_id()); - } -@@ -547,6 +538,89 @@ int smp_call_function (void (*func) (voi - return 0; - } - -+static spinlock_t nmi_call_lock = SPIN_LOCK_UNLOCKED; -+static struct nmi_call_data_struct { -+ smp_nmi_function func; -+ void *info; -+ atomic_t started; -+ atomic_t finished; -+ cpumask_t cpus_called; -+ int wait; -+} *nmi_call_data; -+ -+static int smp_nmi_callback(struct pt_regs * regs, int cpu) -+{ -+ smp_nmi_function func; -+ void *info; -+ int wait; -+ -+ func = nmi_call_data->func; -+ info = nmi_call_data->info; -+ wait = nmi_call_data->wait; -+ ack_APIC_irq(); -+ /* prevent from calling func() multiple times */ -+ if (cpu_test_and_set(cpu, nmi_call_data->cpus_called)) -+ return 0; -+ /* -+ * notify initiating CPU that I've grabbed the data and am -+ * about to execute the function -+ */ -+ mb(); -+ atomic_inc(&nmi_call_data->started); -+ /* at this point the nmi_call_data structure is out of scope */ -+ irq_enter(); -+ func(regs, info); -+ irq_exit(); -+ if (wait) -+ atomic_inc(&nmi_call_data->finished); -+ -+ return 0; -+} -+ -+/* -+ * This function tries to call func(regs, info) on each cpu. -+ * Func must be fast and non-blocking. -+ * May be called with disabled interrupts and from any context. -+ */ -+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait) -+{ -+ struct nmi_call_data_struct data; -+ int cpus; -+ -+ cpus = num_online_cpus() - 1; -+ if (!cpus) -+ return 0; -+ -+ data.func = func; -+ data.info = info; -+ data.wait = wait; -+ atomic_set(&data.started, 0); -+ atomic_set(&data.finished, 0); -+ cpus_clear(data.cpus_called); -+ /* prevent this cpu from calling func if NMI happens */ -+ cpu_set(smp_processor_id(), data.cpus_called); -+ -+ if (!spin_trylock(&nmi_call_lock)) -+ return -1; -+ -+ nmi_call_data = &data; -+ set_nmi_ipi_callback(smp_nmi_callback); -+ mb(); -+ -+ /* Send a message to all other CPUs and wait for them to respond */ -+ send_IPI_allbutself(APIC_DM_NMI); -+ while (atomic_read(&data.started) != cpus) -+ barrier(); -+ -+ unset_nmi_ipi_callback(); -+ if (wait) -+ while (atomic_read(&data.finished) != cpus) -+ barrier(); -+ spin_unlock(&nmi_call_lock); -+ -+ return 0; -+} -+ - static void stop_this_cpu (void * dummy) - { - /* -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/sysenter.c ---- linux-2.6.8.1.orig/arch/i386/kernel/sysenter.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/sysenter.c 2005-09-09 14:39:25.000000000 +0400 -@@ -18,13 +18,18 @@ - #include <asm/msr.h> - #include <asm/pgtable.h> - #include <asm/unistd.h> -+#include <linux/highmem.h> - - extern asmlinkage void sysenter_entry(void); - - void enable_sep_cpu(void *info) - { - int cpu = get_cpu(); -+#ifdef CONFIG_X86_HIGH_ENTRY -+ struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; -+#else - struct tss_struct *tss = init_tss + cpu; -+#endif - - tss->ss1 = __KERNEL_CS; - tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/sys_i386.c ---- linux-2.6.8.1.orig/arch/i386/kernel/sys_i386.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/sys_i386.c 2005-09-09 14:39:25.000000000 +0400 -@@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsn - if (!name) - return -EFAULT; - down_read(&uts_sem); -- err=copy_to_user(name, &system_utsname, sizeof (*name)); -+ err=copy_to_user(name, &ve_utsname, sizeof (*name)); - up_read(&uts_sem); - return err?-EFAULT:0; - } -@@ -233,15 +233,15 @@ asmlinkage int sys_olduname(struct oldol - - down_read(&uts_sem); - -- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); -+ error = __copy_to_user(name->sysname,ve_utsname.sysname,__OLD_UTS_LEN); - error |= __put_user(0,name->sysname+__OLD_UTS_LEN); -- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); -+ error |= __copy_to_user(name->nodename,ve_utsname.nodename,__OLD_UTS_LEN); - error |= __put_user(0,name->nodename+__OLD_UTS_LEN); -- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); -+ error |= __copy_to_user(name->release,ve_utsname.release,__OLD_UTS_LEN); - error |= __put_user(0,name->release+__OLD_UTS_LEN); -- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); -+ error |= __copy_to_user(name->version,ve_utsname.version,__OLD_UTS_LEN); - error |= __put_user(0,name->version+__OLD_UTS_LEN); -- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); -+ error |= __copy_to_user(name->machine,ve_utsname.machine,__OLD_UTS_LEN); - error |= __put_user(0,name->machine+__OLD_UTS_LEN); - - up_read(&uts_sem); -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/timers/timer_tsc.c ---- linux-2.6.8.1.orig/arch/i386/kernel/timers/timer_tsc.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/timers/timer_tsc.c 2005-09-09 14:39:25.000000000 +0400 -@@ -81,7 +81,7 @@ static int count2; /* counter for mark_o - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ --static unsigned long fast_gettimeoffset_quotient; -+unsigned long fast_gettimeoffset_quotient; - - static unsigned long get_offset_tsc(void) - { -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/traps.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/traps.c ---- linux-2.6.8.1.orig/arch/i386/kernel/traps.c 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/traps.c 2005-09-09 14:39:27.000000000 +0400 -@@ -45,6 +45,7 @@ - #include <asm/desc.h> - #include <asm/i387.h> - #include <asm/nmi.h> -+#include <asm/oops.h> - - #include <asm/smp.h> - #include <asm/arch_hooks.h> -@@ -54,12 +55,8 @@ - - #include "mach_traps.h" - --asmlinkage int system_call(void); --asmlinkage void lcall7(void); --asmlinkage void lcall27(void); -- --struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, -- { 0, 0 }, { 0, 0 } }; -+struct desc_struct default_ldt[] __attribute__((__section__(".data.default_ldt"))) = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }; -+struct page *default_ldt_page; - - /* Do we ignore FPU interrupts ? */ - char ignore_fpu_irq = 0; -@@ -93,36 +90,27 @@ asmlinkage void machine_check(void); - - static int kstack_depth_to_print = 24; - --static int valid_stack_ptr(struct task_struct *task, void *p) -+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) - { -- if (p <= (void *)task->thread_info) -- return 0; -- if (kstack_end(p)) -- return 0; -- return 1; -+ return p > (void *)tinfo && -+ p < (void *)tinfo + THREAD_SIZE - 3; - } - --#ifdef CONFIG_FRAME_POINTER --static void print_context_stack(struct task_struct *task, unsigned long *stack, -- unsigned long ebp) -+static inline unsigned long print_context_stack(struct thread_info *tinfo, -+ unsigned long *stack, unsigned long ebp) - { - unsigned long addr; - -- while (valid_stack_ptr(task, (void *)ebp)) { -+#ifdef CONFIG_FRAME_POINTER -+ while (valid_stack_ptr(tinfo, (void *)ebp)) { - addr = *(unsigned long *)(ebp + 4); - printk(" [<%08lx>] ", addr); - print_symbol("%s", addr); - printk("\n"); - ebp = *(unsigned long *)ebp; - } --} - #else --static void print_context_stack(struct task_struct *task, unsigned long *stack, -- unsigned long ebp) --{ -- unsigned long addr; -- -- while (!kstack_end(stack)) { -+ while (valid_stack_ptr(tinfo, stack)) { - addr = *stack++; - if (__kernel_text_address(addr)) { - printk(" [<%08lx>]", addr); -@@ -130,8 +118,9 @@ static void print_context_stack(struct t - printk("\n"); - } - } --} - #endif -+ return ebp; -+} - - void show_trace(struct task_struct *task, unsigned long * stack) - { -@@ -140,11 +129,6 @@ void show_trace(struct task_struct *task - if (!task) - task = current; - -- if (!valid_stack_ptr(task, stack)) { -- printk("Stack pointer is garbage, not printing trace\n"); -- return; -- } -- - if (task == current) { - /* Grab ebp right from our regs */ - asm ("movl %%ebp, %0" : "=r" (ebp) : ); -@@ -157,7 +141,7 @@ void show_trace(struct task_struct *task - struct thread_info *context; - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); -- print_context_stack(task, stack, ebp); -+ ebp = print_context_stack(context, stack, ebp); - stack = (unsigned long*)context->previous_esp; - if (!stack) - break; -@@ -216,9 +200,10 @@ void show_registers(struct pt_regs *regs - ss = regs->xss & 0xffff; - } - print_modules(); -- printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx" -+ printk("CPU: %d, VCPU: %d:%d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx" - " (%s) \n", -- smp_processor_id(), 0xffff & regs->xcs, regs->eip, -+ smp_processor_id(), task_vsched_id(current), task_cpu(current), -+ 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, UTS_RELEASE); - print_symbol("EIP is at %s\n", regs->eip); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", -@@ -227,8 +212,10 @@ void show_registers(struct pt_regs *regs - regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); -- printk("Process %s (pid: %d, threadinfo=%p task=%p)", -- current->comm, current->pid, current_thread_info(), current); -+ printk("Process %s (pid: %d, veid=%d, threadinfo=%p task=%p)", -+ current->comm, current->pid, -+ VEID(VE_TASK_INFO(current)->owner_env), -+ current_thread_info(), current); - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. -@@ -244,8 +231,10 @@ void show_registers(struct pt_regs *regs - - for(i=0;i<20;i++) - { -- unsigned char c; -- if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -+ unsigned char c = 0; -+ if ((user_mode(regs) && get_user(c, &((unsigned char*)regs->eip)[i])) || -+ (!user_mode(regs) && __direct_get_user(c, &((unsigned char*)regs->eip)[i]))) { -+ - bad: - printk(" Bad EIP value."); - break; -@@ -269,16 +258,14 @@ static void handle_BUG(struct pt_regs *r - - eip = regs->eip; - -- if (eip < PAGE_OFFSET) -- goto no_bug; -- if (__get_user(ud2, (unsigned short *)eip)) -+ if (__direct_get_user(ud2, (unsigned short *)eip)) - goto no_bug; - if (ud2 != 0x0b0f) - goto no_bug; -- if (__get_user(line, (unsigned short *)(eip + 2))) -+ if (__direct_get_user(line, (unsigned short *)(eip + 4))) - goto bug; -- if (__get_user(file, (char **)(eip + 4)) || -- (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) -+ if (__direct_get_user(file, (char **)(eip + 7)) || -+ __direct_get_user(c, file)) - file = "<bad filename>"; - - printk("------------[ cut here ]------------\n"); -@@ -292,11 +279,18 @@ bug: - printk("Kernel BUG\n"); - } - -+static void inline check_kernel_csum_bug(void) -+{ -+ if (kernel_text_csum_broken) -+ printk("Kernel code checksum mismatch detected %d times\n", -+ kernel_text_csum_broken); -+} -+ - spinlock_t die_lock = SPIN_LOCK_UNLOCKED; -+int die_counter; - - void die(const char * str, struct pt_regs * regs, long err) - { -- static int die_counter; - int nl = 0; - - console_verbose(); -@@ -319,8 +313,11 @@ void die(const char * str, struct pt_reg - if (nl) - printk("\n"); - show_registers(regs); -+ check_kernel_csum_bug(); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); -+ oops_do_jump(); -+ - if (in_interrupt()) - panic("Fatal exception in interrupt"); - -@@ -531,6 +528,7 @@ static int dummy_nmi_callback(struct pt_ - } - - static nmi_callback_t nmi_callback = dummy_nmi_callback; -+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback; - - asmlinkage void do_nmi(struct pt_regs * regs, long error_code) - { -@@ -544,9 +542,20 @@ asmlinkage void do_nmi(struct pt_regs * - if (!nmi_callback(regs, cpu)) - default_do_nmi(regs); - -+ nmi_ipi_callback(regs, cpu); - nmi_exit(); - } - -+void set_nmi_ipi_callback(nmi_callback_t callback) -+{ -+ nmi_ipi_callback = callback; -+} -+ -+void unset_nmi_ipi_callback(void) -+{ -+ nmi_ipi_callback = dummy_nmi_callback; -+} -+ - void set_nmi_callback(nmi_callback_t callback) - { - nmi_callback = callback; -@@ -591,10 +600,18 @@ asmlinkage void do_debug(struct pt_regs - if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); - -- /* Mask out spurious debug traps due to lazy DR7 setting */ -+ /* -+ * Mask out spurious debug traps due to lazy DR7 setting or -+ * due to 4G/4G kernel mode: -+ */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg[7]) - goto clear_dr7; -+ if (!user_mode(regs)) { -+ // restore upon return-to-userspace: -+ set_thread_flag(TIF_DB7); -+ goto clear_dr7; -+ } - } - - if (regs->eflags & VM_MASK) -@@ -836,19 +853,52 @@ asmlinkage void math_emulate(long arg) - - #endif /* CONFIG_MATH_EMULATION */ - --#ifdef CONFIG_X86_F00F_BUG --void __init trap_init_f00f_bug(void) -+void __init trap_init_virtual_IDT(void) - { -- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); -- - /* -- * Update the IDT descriptor and reload the IDT so that -- * it uses the read-only mapped virtual address. -+ * "idt" is magic - it overlaps the idt_descr -+ * variable so that updating idt will automatically -+ * update the idt descriptor.. - */ -- idt_descr.address = fix_to_virt(FIX_F00F_IDT); -+ __set_fixmap(FIX_IDT, __pa(&idt_table), PAGE_KERNEL_RO); -+ idt_descr.address = __fix_to_virt(FIX_IDT); -+ - __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); - } -+ -+void __init trap_init_virtual_GDT(void) -+{ -+ int cpu = smp_processor_id(); -+ struct Xgt_desc_struct *gdt_desc = cpu_gdt_descr + cpu; -+ struct Xgt_desc_struct tmp_desc = {0, 0}; -+ struct tss_struct * t; -+ -+ __asm__ __volatile__("sgdt %0": "=m" (tmp_desc): :"memory"); -+ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ if (!cpu) { -+ int i; -+ __set_fixmap(FIX_GDT_0, __pa(cpu_gdt_table), PAGE_KERNEL); -+ __set_fixmap(FIX_GDT_1, __pa(cpu_gdt_table) + PAGE_SIZE, PAGE_KERNEL); -+ for(i = 0; i < FIX_TSS_COUNT; i++) -+ __set_fixmap(FIX_TSS_0 - i, __pa(init_tss) + i * PAGE_SIZE, PAGE_KERNEL); -+ } -+ -+ gdt_desc->address = __fix_to_virt(FIX_GDT_0) + sizeof(cpu_gdt_table[0]) * cpu; -+#else -+ gdt_desc->address = (unsigned long)cpu_gdt_table[cpu]; - #endif -+ __asm__ __volatile__("lgdt %0": "=m" (*gdt_desc)); -+ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ t = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu; -+#else -+ t = init_tss + cpu; -+#endif -+ set_tss_desc(cpu, t); -+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; -+ load_TR_desc(); -+} - - #define _set_gate(gate_addr,type,dpl,addr,seg) \ - do { \ -@@ -875,17 +925,17 @@ void set_intr_gate(unsigned int n, void - _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); - } - --static void __init set_trap_gate(unsigned int n, void *addr) -+void __init set_trap_gate(unsigned int n, void *addr) - { - _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); - } - --static void __init set_system_gate(unsigned int n, void *addr) -+void __init set_system_gate(unsigned int n, void *addr) - { - _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); - } - --static void __init set_call_gate(void *a, void *addr) -+void __init set_call_gate(void *a, void *addr) - { - _set_gate(a,12,3,addr,__KERNEL_CS); - } -@@ -907,6 +957,7 @@ void __init trap_init(void) - #ifdef CONFIG_X86_LOCAL_APIC - init_apic_mappings(); - #endif -+ init_entry_mappings(); - - set_trap_gate(0,÷_error); - set_intr_gate(1,&debug); -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vm86.c linux-2.6.8.1-ve022stab034/arch/i386/kernel/vm86.c ---- linux-2.6.8.1.orig/arch/i386/kernel/vm86.c 2004-08-14 14:54:49.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/vm86.c 2005-09-09 14:39:25.000000000 +0400 -@@ -124,7 +124,7 @@ struct pt_regs * fastcall save_v86_state - tss = init_tss + get_cpu(); - current->thread.esp0 = current->thread.saved_esp0; - current->thread.sysenter_cs = __KERNEL_CS; -- load_esp0(tss, ¤t->thread); -+ load_virtual_esp0(tss, current); - current->thread.saved_esp0 = 0; - put_cpu(); - -@@ -307,7 +307,7 @@ static void do_sys_vm86(struct kernel_vm - tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; - if (cpu_has_sep) - tsk->thread.sysenter_cs = 0; -- load_esp0(tss, &tsk->thread); -+ load_virtual_esp0(tss, tsk); - put_cpu(); - - tsk->thread.screen_bitmap = info->screen_bitmap; -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S linux-2.6.8.1-ve022stab034/arch/i386/kernel/vmlinux.lds.S ---- linux-2.6.8.1.orig/arch/i386/kernel/vmlinux.lds.S 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/vmlinux.lds.S 2005-09-09 14:39:25.000000000 +0400 -@@ -5,13 +5,17 @@ - #include <asm-generic/vmlinux.lds.h> - #include <asm/thread_info.h> - -+#include <linux/config.h> -+#include <asm/page.h> -+#include <asm/asm_offsets.h> -+ - OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") - OUTPUT_ARCH(i386) - ENTRY(startup_32) - jiffies = jiffies_64; - SECTIONS - { -- . = 0xC0000000 + 0x100000; -+ . = __PAGE_OFFSET + 0x100000; - /* read-only */ - _text = .; /* Text and read-only data */ - .text : { -@@ -21,6 +25,19 @@ SECTIONS - *(.gnu.warning) - } = 0x9090 - -+#ifdef CONFIG_X86_4G -+ . = ALIGN(PAGE_SIZE_asm); -+ __entry_tramp_start = .; -+ . = FIX_ENTRY_TRAMPOLINE_0_addr; -+ __start___entry_text = .; -+ .entry.text : AT (__entry_tramp_start) { *(.entry.text) } -+ __entry_tramp_end = __entry_tramp_start + SIZEOF(.entry.text); -+ . = __entry_tramp_end; -+ . = ALIGN(PAGE_SIZE_asm); -+#else -+ .entry.text : { *(.entry.text) } -+#endif -+ - _etext = .; /* End of text section */ - - . = ALIGN(16); /* Exception table */ -@@ -36,15 +53,12 @@ SECTIONS - CONSTRUCTORS - } - -- . = ALIGN(4096); -+ . = ALIGN(PAGE_SIZE_asm); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } -- . = ALIGN(4096); -+ . = ALIGN(PAGE_SIZE_asm); - __nosave_end = .; - -- . = ALIGN(4096); -- .data.page_aligned : { *(.data.idt) } -- - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - -@@ -54,7 +68,7 @@ SECTIONS - .data.init_task : { *(.data.init_task) } - - /* will be freed after init */ -- . = ALIGN(4096); /* Init code and data */ -+ . = ALIGN(PAGE_SIZE_asm); /* Init code and data */ - __init_begin = .; - .init.text : { - _sinittext = .; -@@ -93,7 +107,7 @@ SECTIONS - from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } - .exit.data : { *(.exit.data) } -- . = ALIGN(4096); -+ . = ALIGN(PAGE_SIZE_asm); - __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } - __initramfs_end = .; -@@ -101,10 +115,22 @@ SECTIONS - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; -- . = ALIGN(4096); -+ . = ALIGN(PAGE_SIZE_asm); - __init_end = .; - /* freed after init ends here */ -- -+ -+ . = ALIGN(PAGE_SIZE_asm); -+ .data.page_aligned_tss : { *(.data.tss) } -+ -+ . = ALIGN(PAGE_SIZE_asm); -+ .data.page_aligned_default_ldt : { *(.data.default_ldt) } -+ -+ . = ALIGN(PAGE_SIZE_asm); -+ .data.page_aligned_idt : { *(.data.idt) } -+ -+ . = ALIGN(PAGE_SIZE_asm); -+ .data.page_aligned_gdt : { *(.data.gdt) } -+ - __bss_start = .; /* BSS */ - .bss : { - *(.bss.page_aligned) -@@ -132,4 +158,6 @@ SECTIONS - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } -+ -+ - } -diff -uprN linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S linux-2.6.8.1-ve022stab034/arch/i386/kernel/vsyscall-sysenter.S ---- linux-2.6.8.1.orig/arch/i386/kernel/vsyscall-sysenter.S 2004-08-14 14:55:19.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/kernel/vsyscall-sysenter.S 2005-09-09 14:39:25.000000000 +0400 -@@ -12,6 +12,11 @@ - .type __kernel_vsyscall,@function - __kernel_vsyscall: - .LSTART_vsyscall: -+ cmpl $192, %eax -+ jne 1f -+ int $0x80 -+ ret -+1: - push %ecx - .Lpush_ecx: - push %edx -diff -uprN linux-2.6.8.1.orig/arch/i386/lib/checksum.S linux-2.6.8.1-ve022stab034/arch/i386/lib/checksum.S ---- linux-2.6.8.1.orig/arch/i386/lib/checksum.S 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/lib/checksum.S 2005-09-09 14:39:25.000000000 +0400 -@@ -280,14 +280,14 @@ unsigned int csum_partial_copy_generic ( - .previous - - .align 4 --.globl csum_partial_copy_generic -+.globl direct_csum_partial_copy_generic - - #ifndef CONFIG_X86_USE_PPRO_CHECKSUM - - #define ARGBASE 16 - #define FP 12 - --csum_partial_copy_generic: -+direct_csum_partial_copy_generic: - subl $4,%esp - pushl %edi - pushl %esi -@@ -422,7 +422,7 @@ DST( movb %cl, (%edi) ) - - #define ARGBASE 12 - --csum_partial_copy_generic: -+direct_csum_partial_copy_generic: - pushl %ebx - pushl %edi - pushl %esi -diff -uprN linux-2.6.8.1.orig/arch/i386/lib/getuser.S linux-2.6.8.1-ve022stab034/arch/i386/lib/getuser.S ---- linux-2.6.8.1.orig/arch/i386/lib/getuser.S 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/lib/getuser.S 2005-09-09 14:39:25.000000000 +0400 -@@ -9,6 +9,7 @@ - * return value. - */ - #include <asm/thread_info.h> -+#include <asm/asm_offsets.h> - - - /* -diff -uprN linux-2.6.8.1.orig/arch/i386/lib/usercopy.c linux-2.6.8.1-ve022stab034/arch/i386/lib/usercopy.c ---- linux-2.6.8.1.orig/arch/i386/lib/usercopy.c 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/lib/usercopy.c 2005-09-09 14:39:25.000000000 +0400 -@@ -9,7 +9,6 @@ - #include <linux/mm.h> - #include <linux/highmem.h> - #include <linux/blkdev.h> --#include <linux/module.h> - #include <asm/uaccess.h> - #include <asm/mmx.h> - -@@ -77,7 +76,7 @@ do { \ - * and returns @count. - */ - long --__strncpy_from_user(char *dst, const char __user *src, long count) -+__direct_strncpy_from_user(char *dst, const char __user *src, long count) - { - long res; - __do_strncpy_from_user(dst, src, count, res); -@@ -103,7 +102,7 @@ __strncpy_from_user(char *dst, const cha - * and returns @count. - */ - long --strncpy_from_user(char *dst, const char __user *src, long count) -+direct_strncpy_from_user(char *dst, const char __user *src, long count) - { - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) -@@ -148,7 +147,7 @@ do { \ - * On success, this will be zero. - */ - unsigned long --clear_user(void __user *to, unsigned long n) -+direct_clear_user(void __user *to, unsigned long n) - { - might_sleep(); - if (access_ok(VERIFY_WRITE, to, n)) -@@ -168,7 +167,7 @@ clear_user(void __user *to, unsigned lon - * On success, this will be zero. - */ - unsigned long --__clear_user(void __user *to, unsigned long n) -+__direct_clear_user(void __user *to, unsigned long n) - { - __do_clear_user(to, n); - return n; -@@ -185,7 +184,7 @@ __clear_user(void __user *to, unsigned l - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ --long strnlen_user(const char __user *s, long n) -+long direct_strnlen_user(const char __user *s, long n) - { - unsigned long mask = -__addr_ok(s); - unsigned long res, tmp; -@@ -568,8 +567,7 @@ survive: - return n; - } - --unsigned long --__copy_from_user_ll(void *to, const void __user *from, unsigned long n) -+unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) - { - if (movsl_is_ok(to, from, n)) - __copy_user_zeroing(to, from, n); -@@ -578,53 +576,3 @@ __copy_from_user_ll(void *to, const void - return n; - } - --/** -- * copy_to_user: - Copy a block of data into user space. -- * @to: Destination address, in user space. -- * @from: Source address, in kernel space. -- * @n: Number of bytes to copy. -- * -- * Context: User context only. This function may sleep. -- * -- * Copy data from kernel space to user space. -- * -- * Returns number of bytes that could not be copied. -- * On success, this will be zero. -- */ --unsigned long --copy_to_user(void __user *to, const void *from, unsigned long n) --{ -- might_sleep(); -- if (access_ok(VERIFY_WRITE, to, n)) -- n = __copy_to_user(to, from, n); -- return n; --} --EXPORT_SYMBOL(copy_to_user); -- --/** -- * copy_from_user: - Copy a block of data from user space. -- * @to: Destination address, in kernel space. -- * @from: Source address, in user space. -- * @n: Number of bytes to copy. -- * -- * Context: User context only. This function may sleep. -- * -- * Copy data from user space to kernel space. -- * -- * Returns number of bytes that could not be copied. -- * On success, this will be zero. -- * -- * If some data could not be copied, this function will pad the copied -- * data to the requested size using zero bytes. -- */ --unsigned long --copy_from_user(void *to, const void __user *from, unsigned long n) --{ -- might_sleep(); -- if (access_ok(VERIFY_READ, from, n)) -- n = __copy_from_user(to, from, n); -- else -- memset(to, 0, n); -- return n; --} --EXPORT_SYMBOL(copy_from_user); -diff -uprN linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h linux-2.6.8.1-ve022stab034/arch/i386/math-emu/fpu_system.h ---- linux-2.6.8.1.orig/arch/i386/math-emu/fpu_system.h 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/math-emu/fpu_system.h 2005-09-09 14:39:25.000000000 +0400 -@@ -15,6 +15,7 @@ - #include <linux/sched.h> - #include <linux/kernel.h> - #include <linux/mm.h> -+#include <asm/atomic_kmap.h> - - /* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -@@ -22,7 +23,7 @@ - - /* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ --#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) -+#define LDT_DESCRIPTOR(s) (((struct desc_struct *)__kmap_atomic_vaddr(KM_LDT_PAGE0))[(s) >> 3]) - #define SEG_D_SIZE(x) ((x).b & (3 << 21)) - #define SEG_G_BIT(x) ((x).b & (1 << 23)) - #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/fault.c linux-2.6.8.1-ve022stab034/arch/i386/mm/fault.c ---- linux-2.6.8.1.orig/arch/i386/mm/fault.c 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/fault.c 2005-09-09 14:39:25.000000000 +0400 -@@ -26,36 +26,11 @@ - #include <asm/uaccess.h> - #include <asm/hardirq.h> - #include <asm/desc.h> -+#include <asm/tlbflush.h> - - extern void die(const char *,struct pt_regs *,long); - - /* -- * Unlock any spinlocks which will prevent us from getting the -- * message out -- */ --void bust_spinlocks(int yes) --{ -- int loglevel_save = console_loglevel; -- -- if (yes) { -- oops_in_progress = 1; -- return; -- } --#ifdef CONFIG_VT -- unblank_screen(); --#endif -- oops_in_progress = 0; -- /* -- * OK, the message is on the console. Now we call printk() -- * without oops_in_progress set so that printk will give klogd -- * a poke. Hold onto your hats... -- */ -- console_loglevel = 15; /* NMI oopser may have shut the console up */ -- printk(" "); -- console_loglevel = loglevel_save; --} -- --/* - * Return EIP plus the CS segment base. The segment limit is also - * adjusted, clamped to the kernel/user address space (whichever is - * appropriate), and returned in *eip_limit. -@@ -103,8 +78,17 @@ static inline unsigned long get_segment_ - if (seg & (1<<2)) { - /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); -+#if 1 -+ /* horrible hack for 4/4 disabled kernels. -+ I'm not quite sure what the TLB flush is good for, -+ it's mindlessly copied from the read_ldt code */ -+ __flush_tlb_global(); -+ desc = kmap(current->mm->context.ldt_pages[(seg&~7)/PAGE_SIZE]); -+ desc = (void *)desc + ((seg & ~7) % PAGE_SIZE); -+#else - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); -+#endif - } else { - /* Must disable preemption while reading the GDT. */ - desc = (u32 *)&cpu_gdt_table[get_cpu()]; -@@ -117,6 +101,9 @@ static inline unsigned long get_segment_ - (desc[1] & 0xff000000); - - if (seg & (1<<2)) { -+#if 1 -+ kunmap((void *)((unsigned long)desc & PAGE_MASK)); -+#endif - up(¤t->mm->context.sem); - } else - put_cpu(); -@@ -232,6 +219,20 @@ asmlinkage void do_page_fault(struct pt_ - - tsk = current; - -+#ifdef CONFIG_DEBUG_STACKOVERFLOW -+ /* Debugging check for stack overflow: is there less than 1KB free? */ -+ { -+ long esp; -+ -+ __asm__ __volatile__("andl %%esp,%0" : -+ "=r" (esp) : "0" (THREAD_SIZE - 1)); -+ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { -+ printk("do_page_fault: stack overflow: %ld\n", -+ esp - sizeof(struct thread_info)); -+ dump_stack(); -+ } -+ } -+#endif - info.si_code = SEGV_MAPERR; - - /* -@@ -247,6 +248,17 @@ asmlinkage void do_page_fault(struct pt_ - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. - */ -+#ifdef CONFIG_X86_4G -+ /* -+ * On 4/4 all kernels faults are either bugs, vmalloc or prefetch -+ */ -+ /* If it's vm86 fall through */ -+ if (unlikely(!(regs->eflags & VM_MASK) && ((regs->xcs & 3) == 0))) { -+ if (error_code & 3) -+ goto bad_area_nosemaphore; -+ goto vmalloc_fault; -+ } -+#else - if (unlikely(address >= TASK_SIZE)) { - if (!(error_code & 5)) - goto vmalloc_fault; -@@ -256,6 +268,7 @@ asmlinkage void do_page_fault(struct pt_ - */ - goto bad_area_nosemaphore; - } -+#endif - - mm = tsk->mm; - -@@ -333,7 +346,6 @@ good_area: - goto bad_area; - } - -- survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo -@@ -472,14 +484,14 @@ no_context: - */ - out_of_memory: - up_read(&mm->mmap_sem); -- if (tsk->pid == 1) { -- yield(); -- down_read(&mm->mmap_sem); -- goto survive; -+ if (error_code & 4) { -+ /* -+ * 0-order allocation always success if something really -+ * fatal not happen: beancounter overdraft or OOM. Den -+ */ -+ force_sig(SIGKILL, tsk); -+ return; - } -- printk("VM: killing process %s\n", tsk->comm); -- if (error_code & 4) -- do_exit(SIGKILL); - goto no_context; - - do_sigbus: -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/highmem.c linux-2.6.8.1-ve022stab034/arch/i386/mm/highmem.c ---- linux-2.6.8.1.orig/arch/i386/mm/highmem.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/highmem.c 2005-09-09 14:39:25.000000000 +0400 -@@ -41,12 +41,45 @@ void *kmap_atomic(struct page *page, enu - if (!pte_none(*(kmap_pte-idx))) - BUG(); - #endif -- set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); -+ /* -+ * If the page is not a normal RAM page, then map it -+ * uncached to be on the safe side - it could be device -+ * memory that must not be prefetched: -+ */ -+ if (PageReserved(page)) -+ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot_nocache)); -+ else -+ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); - __flush_tlb_one(vaddr); - - return (void*) vaddr; - } - -+/* -+ * page frame number based kmaps - useful for PCI mappings. -+ * NOTE: we map the page with the same mapping as what user is using. -+ */ -+void *kmap_atomic_pte(pte_t *pte, enum km_type type) -+{ -+ enum fixed_addresses idx; -+ unsigned long vaddr; -+ -+ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ -+ inc_preempt_count(); -+ -+ idx = type + KM_TYPE_NR*smp_processor_id(); -+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -+#ifdef CONFIG_DEBUG_HIGHMEM -+ if (!pte_none(*(kmap_pte-idx))) -+ BUG(); -+#endif -+ set_pte(kmap_pte-idx, *pte); -+ __flush_tlb_one(vaddr); -+ -+ return (void*) vaddr; -+} -+ -+ - void kunmap_atomic(void *kvaddr, enum km_type type) - { - #ifdef CONFIG_DEBUG_HIGHMEM -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c linux-2.6.8.1-ve022stab034/arch/i386/mm/hugetlbpage.c ---- linux-2.6.8.1.orig/arch/i386/mm/hugetlbpage.c 2004-08-14 14:56:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/hugetlbpage.c 2005-09-09 14:39:25.000000000 +0400 -@@ -18,6 +18,8 @@ - #include <asm/tlb.h> - #include <asm/tlbflush.h> - -+#include <ub/ub_vmpages.h> -+ - static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) - { - pgd_t *pgd; -@@ -43,6 +45,7 @@ static void set_huge_pte(struct mm_struc - pte_t entry; - - mm->rss += (HPAGE_SIZE / PAGE_SIZE); -+ ub_unused_privvm_dec(mm_ub(mm), HPAGE_SIZE / PAGE_SIZE, vma); - if (write_access) { - entry = - pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); -@@ -83,6 +86,7 @@ int copy_hugetlb_page_range(struct mm_st - get_page(ptepage); - set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); -+ ub_unused_privvm_dec(mm_ub(mm), HPAGE_SIZE / PAGE_SIZE, vma); - addr += HPAGE_SIZE; - } - return 0; -@@ -219,6 +223,7 @@ void unmap_hugepage_range(struct vm_area - put_page(page); - } - mm->rss -= (end - start) >> PAGE_SHIFT; -+ ub_unused_privvm_inc(mm_ub(mm), (end - start) >> PAGE_SIFT, vma); - flush_tlb_range(vma, start, end); - } - -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/init.c linux-2.6.8.1-ve022stab034/arch/i386/mm/init.c ---- linux-2.6.8.1.orig/arch/i386/mm/init.c 2004-08-14 14:55:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/init.c 2005-09-09 14:39:25.000000000 +0400 -@@ -27,6 +27,7 @@ - #include <linux/slab.h> - #include <linux/proc_fs.h> - #include <linux/efi.h> -+#include <linux/initrd.h> - - #include <asm/processor.h> - #include <asm/system.h> -@@ -39,143 +40,13 @@ - #include <asm/tlb.h> - #include <asm/tlbflush.h> - #include <asm/sections.h> -+#include <asm/desc.h> - - DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - unsigned long highstart_pfn, highend_pfn; - - static int do_test_wp_bit(void); - --/* -- * Creates a middle page table and puts a pointer to it in the -- * given global directory entry. This only returns the gd entry -- * in non-PAE compilation mode, since the middle layer is folded. -- */ --static pmd_t * __init one_md_table_init(pgd_t *pgd) --{ -- pmd_t *pmd_table; -- --#ifdef CONFIG_X86_PAE -- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); -- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); -- if (pmd_table != pmd_offset(pgd, 0)) -- BUG(); --#else -- pmd_table = pmd_offset(pgd, 0); --#endif -- -- return pmd_table; --} -- --/* -- * Create a page table and place a pointer to it in a middle page -- * directory entry. -- */ --static pte_t * __init one_page_table_init(pmd_t *pmd) --{ -- if (pmd_none(*pmd)) { -- pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -- set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); -- if (page_table != pte_offset_kernel(pmd, 0)) -- BUG(); -- -- return page_table; -- } -- -- return pte_offset_kernel(pmd, 0); --} -- --/* -- * This function initializes a certain range of kernel virtual memory -- * with new bootmem page tables, everywhere page tables are missing in -- * the given range. -- */ -- --/* -- * NOTE: The pagetables are allocated contiguous on the physical space -- * so we can cache the place of the first one and move around without -- * checking the pgd every time. -- */ --static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) --{ -- pgd_t *pgd; -- pmd_t *pmd; -- int pgd_idx, pmd_idx; -- unsigned long vaddr; -- -- vaddr = start; -- pgd_idx = pgd_index(vaddr); -- pmd_idx = pmd_index(vaddr); -- pgd = pgd_base + pgd_idx; -- -- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { -- if (pgd_none(*pgd)) -- one_md_table_init(pgd); -- -- pmd = pmd_offset(pgd, vaddr); -- for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { -- if (pmd_none(*pmd)) -- one_page_table_init(pmd); -- -- vaddr += PMD_SIZE; -- } -- pmd_idx = 0; -- } --} -- --static inline int is_kernel_text(unsigned long addr) --{ -- if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end) -- return 1; -- return 0; --} -- --/* -- * This maps the physical memory to kernel virtual address space, a total -- * of max_low_pfn pages, by creating page tables starting from address -- * PAGE_OFFSET. -- */ --static void __init kernel_physical_mapping_init(pgd_t *pgd_base) --{ -- unsigned long pfn; -- pgd_t *pgd; -- pmd_t *pmd; -- pte_t *pte; -- int pgd_idx, pmd_idx, pte_ofs; -- -- pgd_idx = pgd_index(PAGE_OFFSET); -- pgd = pgd_base + pgd_idx; -- pfn = 0; -- -- for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { -- pmd = one_md_table_init(pgd); -- if (pfn >= max_low_pfn) -- continue; -- for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { -- unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; -- -- /* Map with big pages if possible, otherwise create normal page tables. */ -- if (cpu_has_pse) { -- unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; -- -- if (is_kernel_text(address) || is_kernel_text(address2)) -- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); -- else -- set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); -- pfn += PTRS_PER_PTE; -- } else { -- pte = one_page_table_init(pmd); -- -- for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { -- if (is_kernel_text(address)) -- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); -- else -- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); -- } -- } -- } -- } --} -- - static inline int page_kills_ppro(unsigned long pagenr) - { - if (pagenr >= 0x70000 && pagenr <= 0x7003F) -@@ -223,11 +94,8 @@ static inline int page_is_ram(unsigned l - return 0; - } - --#ifdef CONFIG_HIGHMEM - pte_t *kmap_pte; --pgprot_t kmap_prot; - --EXPORT_SYMBOL(kmap_prot); - EXPORT_SYMBOL(kmap_pte); - - #define kmap_get_fixmap_pte(vaddr) \ -@@ -235,29 +103,7 @@ EXPORT_SYMBOL(kmap_pte); - - void __init kmap_init(void) - { -- unsigned long kmap_vstart; -- -- /* cache the first kmap pte */ -- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -- kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -- -- kmap_prot = PAGE_KERNEL; --} -- --void __init permanent_kmaps_init(pgd_t *pgd_base) --{ -- pgd_t *pgd; -- pmd_t *pmd; -- pte_t *pte; -- unsigned long vaddr; -- -- vaddr = PKMAP_BASE; -- page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); -- -- pgd = swapper_pg_dir + pgd_index(vaddr); -- pmd = pmd_offset(pgd, vaddr); -- pte = pte_offset_kernel(pmd, vaddr); -- pkmap_page_table = pte; -+ kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); - } - - void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) -@@ -272,6 +118,8 @@ void __init one_highpage_init(struct pag - SetPageReserved(page); - } - -+#ifdef CONFIG_HIGHMEM -+ - #ifndef CONFIG_DISCONTIGMEM - void __init set_highmem_pages_init(int bad_ppro) - { -@@ -283,12 +131,9 @@ void __init set_highmem_pages_init(int b - #else - extern void set_highmem_pages_init(int); - #endif /* !CONFIG_DISCONTIGMEM */ -- - #else --#define kmap_init() do { } while (0) --#define permanent_kmaps_init(pgd_base) do { } while (0) --#define set_highmem_pages_init(bad_ppro) do { } while (0) --#endif /* CONFIG_HIGHMEM */ -+# define set_highmem_pages_init(bad_ppro) do { } while (0) -+#endif - - unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; - unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; -@@ -299,31 +144,125 @@ unsigned long long __PAGE_KERNEL_EXEC = - extern void __init remap_numa_kva(void); - #endif - --static void __init pagetable_init (void) -+static __init void prepare_pagetables(pgd_t *pgd_base, unsigned long address) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ pgd = pgd_base + pgd_index(address); -+ pmd = pmd_offset(pgd, address); -+ if (!pmd_present(*pmd)) { -+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); -+ } -+} -+ -+static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) -+{ -+ unsigned long vaddr; -+ -+ for (vaddr = start; vaddr != end; vaddr += PAGE_SIZE) -+ prepare_pagetables(pgd_base, vaddr); -+} -+ -+void setup_identity_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) - { - unsigned long vaddr; -- pgd_t *pgd_base = swapper_pg_dir; -+ pgd_t *pgd; -+ int i, j, k; -+ pmd_t *pmd; -+ pte_t *pte, *pte_base; -+ -+ pgd = pgd_base; - -+ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { -+ vaddr = i*PGDIR_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ pmd = pmd_offset(pgd, 0); -+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { -+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ if (vaddr < start) -+ continue; -+ if (cpu_has_pse) { -+ unsigned long __pe; -+ -+ set_in_cr4(X86_CR4_PSE); -+ boot_cpu_data.wp_works_ok = 1; -+ __pe = _KERNPG_TABLE + _PAGE_PSE + vaddr - start; -+ /* Make it "global" too if supported */ -+ if (cpu_has_pge) { -+ set_in_cr4(X86_CR4_PGE); -+#if !defined(CONFIG_X86_SWITCH_PAGETABLES) -+ __pe += _PAGE_GLOBAL; -+ __PAGE_KERNEL |= _PAGE_GLOBAL; -+#endif -+ } -+ set_pmd(pmd, __pmd(__pe)); -+ continue; -+ } -+ if (!pmd_present(*pmd)) -+ pte_base = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ else -+ pte_base = pte_offset_kernel(pmd, 0); -+ pte = pte_base; -+ for (k = 0; k < PTRS_PER_PTE; pte++, k++) { -+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ if (vaddr < start) -+ continue; -+ *pte = mk_pte_phys(vaddr-start, PAGE_KERNEL); -+ } -+ set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); -+ } -+ } -+} -+ -+static void __init pagetable_init (void) -+{ -+ unsigned long vaddr, end; -+ pgd_t *pgd_base; - #ifdef CONFIG_X86_PAE - int i; -- /* Init entries of the first-level page table to the zero page */ -- for (i = 0; i < PTRS_PER_PGD; i++) -- set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); - #endif - -- /* Enable PSE if available */ -- if (cpu_has_pse) { -- set_in_cr4(X86_CR4_PSE); -- } -+ /* -+ * This can be zero as well - no problem, in that case we exit -+ * the loops anyway due to the PTRS_PER_* conditions. -+ */ -+ end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); - -- /* Enable PGE if available */ -- if (cpu_has_pge) { -- set_in_cr4(X86_CR4_PGE); -- __PAGE_KERNEL |= _PAGE_GLOBAL; -- __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; -+ pgd_base = swapper_pg_dir; -+#ifdef CONFIG_X86_PAE -+ /* -+ * It causes too many problems if there's no proper pmd set up -+ * for all 4 entries of the PGD - so we allocate all of them. -+ * PAE systems will not miss this extra 4-8K anyway ... -+ */ -+ for (i = 0; i < PTRS_PER_PGD; i++) { -+ pmd_t *pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ set_pgd(pgd_base + i, __pgd(__pa(pmd) + 0x1)); - } -+#endif -+ /* -+ * Set up lowmem-sized identity mappings at PAGE_OFFSET: -+ */ -+ setup_identity_mappings(pgd_base, PAGE_OFFSET, end); - -- kernel_physical_mapping_init(pgd_base); -+ /* -+ * Add flat-mode identity-mappings - SMP needs it when -+ * starting up on an AP from real-mode. (In the non-PAE -+ * case we already have these mappings through head.S.) -+ * All user-space mappings are explicitly cleared after -+ * SMP startup. -+ */ -+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PAE) -+ setup_identity_mappings(pgd_base, 0, 16*1024*1024); -+#endif - remap_numa_kva(); - - /* -@@ -331,22 +270,57 @@ static void __init pagetable_init (void) - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -- page_table_range_init(vaddr, 0, pgd_base); -+ fixrange_init(vaddr, 0, pgd_base); - -- permanent_kmaps_init(pgd_base); -+#ifdef CONFIG_HIGHMEM -+ { -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; - --#ifdef CONFIG_X86_PAE -- /* -- * Add low memory identity-mappings - SMP needs it when -- * starting up on an AP from real-mode. In the non-PAE -- * case we already have these mappings through head.S. -- * All user-space mappings are explicitly cleared after -- * SMP startup. -- */ -- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; -+ /* -+ * Permanent kmaps: -+ */ -+ vaddr = PKMAP_BASE; -+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); -+ -+ pgd = swapper_pg_dir + pgd_index(vaddr); -+ pmd = pmd_offset(pgd, vaddr); -+ pte = pte_offset_kernel(pmd, vaddr); -+ pkmap_page_table = pte; -+ } - #endif - } - -+/* -+ * Clear kernel pagetables in a PMD_SIZE-aligned range. -+ */ -+static void clear_mappings(pgd_t *pgd_base, unsigned long start, unsigned long end) -+{ -+ unsigned long vaddr; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ int i, j; -+ -+ pgd = pgd_base; -+ -+ for (i = 0; i < PTRS_PER_PGD; pgd++, i++) { -+ vaddr = i*PGDIR_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ pmd = pmd_offset(pgd, 0); -+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { -+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE; -+ if (end && (vaddr >= end)) -+ break; -+ if (vaddr < start) -+ continue; -+ pmd_clear(pmd); -+ } -+ } -+ flush_tlb_all(); -+} -+ - #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) - /* - * Swap suspend & friends need this for resume because things like the intel-agp -@@ -365,25 +339,16 @@ static inline void save_pg_dir(void) - } - #endif - --void zap_low_mappings (void) --{ -- int i; - -+void zap_low_mappings(void) -+{ - save_pg_dir(); - -+ printk("zapping low mappings.\n"); - /* - * Zap initial low-memory mappings. -- * -- * Note that "pgd_clear()" doesn't do it for -- * us, because pgd_clear() is a no-op on i386. - */ -- for (i = 0; i < USER_PTRS_PER_PGD; i++) --#ifdef CONFIG_X86_PAE -- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); --#else -- set_pgd(swapper_pg_dir+i, __pgd(0)); --#endif -- flush_tlb_all(); -+ clear_mappings(swapper_pg_dir, 0, 16*1024*1024); - } - - #ifndef CONFIG_DISCONTIGMEM -@@ -454,7 +419,6 @@ static void __init set_nx(void) - } - } - } -- - /* - * Enables/disables executability of a given kernel page and - * returns the previous setting. -@@ -512,7 +476,15 @@ void __init paging_init(void) - set_in_cr4(X86_CR4_PAE); - #endif - __flush_tlb_all(); -- -+ /* -+ * Subtle. SMP is doing it's boot stuff late (because it has to -+ * fork idle threads) - but it also needs low mappings for the -+ * protected-mode entry to work. We zap these entries only after -+ * the WP-bit has been tested. -+ */ -+#ifndef CONFIG_SMP -+ zap_low_mappings(); -+#endif - kmap_init(); - zone_sizes_init(); - } -@@ -561,6 +533,34 @@ extern void set_max_mapnr_init(void); - - static struct kcore_list kcore_mem, kcore_vmalloc; - -+#ifdef CONFIG_BLK_DEV_INITRD -+/* -+ * This function move initrd from highmem to normal zone, if needed. -+ * Note, we have to do it before highmem pages are given to buddy allocator. -+ */ -+static void initrd_move(void) -+{ -+ unsigned long i, start, off; -+ struct page *page; -+ void *addr; -+ -+ if (initrd_copy <= 0) -+ return; -+ -+ start = (initrd_end - initrd_copy) & PAGE_MASK; -+ off = (initrd_end - initrd_copy) & ~PAGE_MASK; -+ for (i = 0; i < initrd_copy; i += PAGE_SIZE) { -+ page = pfn_to_page((start + i) >> PAGE_SHIFT); -+ addr = kmap_atomic(page, KM_USER0); -+ memcpy((void *)initrd_start + i, -+ addr, PAGE_SIZE); -+ kunmap_atomic(addr, KM_USER0); -+ } -+ initrd_start += off; -+ initrd_end = initrd_start + initrd_copy; -+} -+#endif -+ - void __init mem_init(void) - { - extern int ppro_with_ram_bug(void); -@@ -604,6 +604,9 @@ void __init mem_init(void) - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; - -+#ifdef CONFIG_BLK_DEV_INITRD -+ initrd_move(); -+#endif - set_highmem_pages_init(bad_ppro); - - codesize = (unsigned long) &_etext - (unsigned long) &_text; -@@ -631,38 +634,57 @@ void __init mem_init(void) - if (boot_cpu_data.wp_works_ok < 0) - test_wp_bit(); - -- /* -- * Subtle. SMP is doing it's boot stuff late (because it has to -- * fork idle threads) - but it also needs low mappings for the -- * protected-mode entry to work. We zap these entries only after -- * the WP-bit has been tested. -- */ --#ifndef CONFIG_SMP -- zap_low_mappings(); --#endif -+ entry_trampoline_setup(); -+ default_ldt_page = virt_to_page(default_ldt); -+ load_LDT(&init_mm.context); - } - --kmem_cache_t *pgd_cache; --kmem_cache_t *pmd_cache; -+kmem_cache_t *pgd_cache, *pmd_cache, *kpmd_cache; - - void __init pgtable_cache_init(void) - { -+ void (*ctor)(void *, kmem_cache_t *, unsigned long); -+ void (*dtor)(void *, kmem_cache_t *, unsigned long); -+ - if (PTRS_PER_PMD > 1) { - pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), -- 0, -+ SLAB_UBC, - pmd_ctor, - NULL); - if (!pmd_cache) - panic("pgtable_cache_init(): cannot create pmd cache"); -+ -+ if (TASK_SIZE > PAGE_OFFSET) { -+ kpmd_cache = kmem_cache_create("kpmd", -+ PTRS_PER_PMD*sizeof(pmd_t), -+ PTRS_PER_PMD*sizeof(pmd_t), -+ SLAB_UBC, -+ kpmd_ctor, -+ NULL); -+ if (!kpmd_cache) -+ panic("pgtable_cache_init(): " -+ "cannot create kpmd cache"); -+ } - } -+ -+ if (PTRS_PER_PMD == 1 || TASK_SIZE <= PAGE_OFFSET) -+ ctor = pgd_ctor; -+ else -+ ctor = NULL; -+ -+ if (PTRS_PER_PMD == 1 && TASK_SIZE <= PAGE_OFFSET) -+ dtor = pgd_dtor; -+ else -+ dtor = NULL; -+ - pgd_cache = kmem_cache_create("pgd", - PTRS_PER_PGD*sizeof(pgd_t), - PTRS_PER_PGD*sizeof(pgd_t), -- 0, -- pgd_ctor, -- PTRS_PER_PMD == 1 ? pgd_dtor : NULL); -+ SLAB_UBC, -+ ctor, -+ dtor); - if (!pgd_cache) - panic("pgtable_cache_init(): Cannot create pgd cache"); - } -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pageattr.c linux-2.6.8.1-ve022stab034/arch/i386/mm/pageattr.c ---- linux-2.6.8.1.orig/arch/i386/mm/pageattr.c 2004-08-14 14:55:20.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/pageattr.c 2005-09-09 14:39:25.000000000 +0400 -@@ -67,22 +67,21 @@ static void flush_kernel_map(void *dummy - - static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) - { -- struct page *page; -- unsigned long flags; -- - set_pte_atomic(kpte, pte); /* change init_mm */ -- if (PTRS_PER_PMD > 1) -- return; -- -- spin_lock_irqsave(&pgd_lock, flags); -- for (page = pgd_list; page; page = (struct page *)page->index) { -- pgd_t *pgd; -- pmd_t *pmd; -- pgd = (pgd_t *)page_address(page) + pgd_index(address); -- pmd = pmd_offset(pgd, address); -- set_pte_atomic((pte_t *)pmd, pte); -+#ifndef CONFIG_X86_PAE -+ { -+ struct list_head *l; -+ if (TASK_SIZE > PAGE_OFFSET) -+ return; -+ spin_lock(&mmlist_lock); -+ list_for_each(l, &init_mm.mmlist) { -+ struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist); -+ pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address); -+ set_pte_atomic((pte_t *)pmd, pte); -+ } -+ spin_unlock(&mmlist_lock); - } -- spin_unlock_irqrestore(&pgd_lock, flags); -+#endif - } - - /* -diff -uprN linux-2.6.8.1.orig/arch/i386/mm/pgtable.c linux-2.6.8.1-ve022stab034/arch/i386/mm/pgtable.c ---- linux-2.6.8.1.orig/arch/i386/mm/pgtable.c 2004-08-14 14:56:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/mm/pgtable.c 2005-09-09 14:39:25.000000000 +0400 -@@ -5,8 +5,10 @@ - #include <linux/config.h> - #include <linux/sched.h> - #include <linux/kernel.h> -+#include <linux/module.h> - #include <linux/errno.h> - #include <linux/mm.h> -+#include <linux/vmalloc.h> - #include <linux/swap.h> - #include <linux/smp.h> - #include <linux/highmem.h> -@@ -21,6 +23,7 @@ - #include <asm/e820.h> - #include <asm/tlb.h> - #include <asm/tlbflush.h> -+#include <asm/atomic_kmap.h> - - void show_mem(void) - { -@@ -53,6 +56,7 @@ void show_mem(void) - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); -+ vprintstat(); - } - - /* -@@ -143,9 +147,10 @@ struct page *pte_alloc_one(struct mm_str - struct page *pte; - - #ifdef CONFIG_HIGHPTE -- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); -+ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC| -+ __GFP_HIGHMEM|__GFP_REPEAT, 0); - #else -- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); -+ pte = alloc_pages(GFP_KERNEL_UBC|__GFP_SOFT_UBC|__GFP_REPEAT, 0); - #endif - if (pte) - clear_highpage(pte); -@@ -157,11 +162,20 @@ void pmd_ctor(void *pmd, kmem_cache_t *c - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); - } - -+void kpmd_ctor(void *__pmd, kmem_cache_t *cache, unsigned long flags) -+{ -+ pmd_t *kpmd, *pmd; -+ kpmd = pmd_offset(&swapper_pg_dir[PTRS_PER_PGD-1], -+ (PTRS_PER_PMD - NR_SHARED_PMDS)*PMD_SIZE); -+ pmd = (pmd_t *)__pmd + (PTRS_PER_PMD - NR_SHARED_PMDS); -+ -+ memset(__pmd, 0, (PTRS_PER_PMD - NR_SHARED_PMDS)*sizeof(pmd_t)); -+ memcpy(pmd, kpmd, NR_SHARED_PMDS*sizeof(pmd_t)); -+} -+ - /* -- * List of all pgd's needed for non-PAE so it can invalidate entries -- * in both cached and uncached pgd's; not needed for PAE since the -- * kernel pmd is shared. If PAE were not to share the pmd a similar -- * tactic would be needed. This is essentially codepath-based locking -+ * List of all pgd's needed so it can invalidate entries in both cached -+ * and uncached pgd's. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. -@@ -169,6 +183,12 @@ void pmd_ctor(void *pmd, kmem_cache_t *c - * checks at dup_mmap(), exec(), and other mmlist addition points - * could be used. The locking scheme was chosen on the basis of - * manfred's recommendations and having no core impact whatsoever. -+ * -+ * Lexicon for #ifdefless conditions to config options: -+ * (a) PTRS_PER_PMD == 1 means non-PAE. -+ * (b) PTRS_PER_PMD > 1 means PAE. -+ * (c) TASK_SIZE > PAGE_OFFSET means 4:4. -+ * (d) TASK_SIZE <= PAGE_OFFSET means non-4:4. - * -- wli - */ - spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; -@@ -194,26 +214,38 @@ static inline void pgd_list_del(pgd_t *p - next->private = (unsigned long)pprev; - } - --void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) -+void pgd_ctor(void *__pgd, kmem_cache_t *cache, unsigned long unused) - { -+ pgd_t *pgd = __pgd; - unsigned long flags; - -- if (PTRS_PER_PMD == 1) -- spin_lock_irqsave(&pgd_lock, flags); -+ if (PTRS_PER_PMD == 1) { -+ if (TASK_SIZE <= PAGE_OFFSET) -+ spin_lock_irqsave(&pgd_lock, flags); -+ else -+ memcpy(&pgd[PTRS_PER_PGD - NR_SHARED_PMDS], -+ &swapper_pg_dir[PTRS_PER_PGD - NR_SHARED_PMDS], -+ NR_SHARED_PMDS*sizeof(pgd_t)); -+ } - -- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); -+ if (TASK_SIZE <= PAGE_OFFSET) -+ memcpy(&pgd[USER_PTRS_PER_PGD], -+ &swapper_pg_dir[USER_PTRS_PER_PGD], -+ (PTRS_PER_PGD - USER_PTRS_PER_PGD)*sizeof(pgd_t)); - - if (PTRS_PER_PMD > 1) - return; - -- pgd_list_add(pgd); -- spin_unlock_irqrestore(&pgd_lock, flags); -- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); -+ if (TASK_SIZE > PAGE_OFFSET) -+ memset(pgd, 0, (PTRS_PER_PGD - NR_SHARED_PMDS)*sizeof(pgd_t)); -+ else { -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); -+ } - } - --/* never called when PTRS_PER_PMD > 1 */ -+/* Never called when PTRS_PER_PMD > 1 || TASK_SIZE > PAGE_OFFSET */ - void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) - { - unsigned long flags; /* can be called from interrupt context */ -@@ -231,15 +263,31 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - if (PTRS_PER_PMD == 1 || !pgd) - return pgd; - -+ /* -+ * In the 4G userspace case alias the top 16 MB virtual -+ * memory range into the user mappings as well (these -+ * include the trampoline and CPU data structures). -+ */ - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ pmd_t *pmd; -+ -+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) -+ pmd = kmem_cache_alloc(kpmd_cache, GFP_KERNEL); -+ else -+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ - if (!pmd) - goto out_oom; - set_pgd(&pgd[i], __pgd(1 + __pa((u64)((u32)pmd)))); - } -- return pgd; - -+ return pgd; - out_oom: -+ /* -+ * we don't have to handle the kpmd_cache here, since it's the -+ * last allocation, and has either nothing to free or when it -+ * succeeds the whole operation succeeds. -+ */ - for (i--; i >= 0; i--) - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); - kmem_cache_free(pgd_cache, pgd); -@@ -250,10 +298,27 @@ void pgd_free(pgd_t *pgd) - { - int i; - -- /* in the PAE case user pgd entries are overwritten before usage */ -- if (PTRS_PER_PMD > 1) -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) -- kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); - /* in the non-PAE case, clear_page_tables() clears user pgd entries */ -+ if (PTRS_PER_PMD == 1) -+ goto out_free; -+ -+ /* in the PAE case user pgd entries are overwritten before usage */ -+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = __va(pgd_val(pgd[i]) - 1); -+ -+ /* -+ * only userspace pmd's are cleared for us -+ * by mm/memory.c; it's a slab cache invariant -+ * that we must separate the kernel pmd slab -+ * all times, else we'll have bad pmd's. -+ */ -+ if (TASK_SIZE > PAGE_OFFSET && i == USER_PTRS_PER_PGD - 1) -+ kmem_cache_free(kpmd_cache, pmd); -+ else -+ kmem_cache_free(pmd_cache, pmd); -+ } -+out_free: - kmem_cache_free(pgd_cache, pgd); - } -+ -+EXPORT_SYMBOL(show_mem); -diff -uprN linux-2.6.8.1.orig/arch/i386/power/cpu.c linux-2.6.8.1-ve022stab034/arch/i386/power/cpu.c ---- linux-2.6.8.1.orig/arch/i386/power/cpu.c 2004-08-14 14:55:59.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/i386/power/cpu.c 2005-09-09 14:39:25.000000000 +0400 -@@ -83,9 +83,7 @@ do_fpu_end(void) - static void fix_processor_context(void) - { - int cpu = smp_processor_id(); -- struct tss_struct * t = init_tss + cpu; - -- set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; - - load_TR_desc(); /* This does ltr */ -diff -uprN linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c linux-2.6.8.1-ve022stab034/arch/ia64/ia32/binfmt_elf32.c ---- linux-2.6.8.1.orig/arch/ia64/ia32/binfmt_elf32.c 2004-08-14 14:56:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/ia32/binfmt_elf32.c 2005-09-09 14:39:25.000000000 +0400 -@@ -84,7 +84,11 @@ ia64_elf32_init (struct pt_regs *regs) - vma->vm_ops = &ia32_shared_page_vm_ops; - down_write(¤t->mm->mmap_sem); - { -- insert_vm_struct(current->mm, vma); -+ if (insert_vm_struct(current->mm, vma)) { -+ kmem_cache_free(vm_area_cachep, vma); -+ up_write(¤t->mm->mmap_sem); -+ return; -+ } - } - up_write(¤t->mm->mmap_sem); - } -@@ -103,7 +107,11 @@ ia64_elf32_init (struct pt_regs *regs) - vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE; - down_write(¤t->mm->mmap_sem); - { -- insert_vm_struct(current->mm, vma); -+ if (insert_vm_struct(current->mm, vma)) { -+ kmem_cache_free(vm_area_cachep, vma); -+ up_write(¤t->mm->mmap_sem); -+ return; -+ } - } - up_write(¤t->mm->mmap_sem); - } -@@ -151,7 +159,7 @@ ia32_setup_arg_pages (struct linux_binpr - unsigned long stack_base; - struct vm_area_struct *mpnt; - struct mm_struct *mm = current->mm; -- int i; -+ int i, ret; - - stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; - mm->arg_start = bprm->p + stack_base; -@@ -186,7 +194,11 @@ ia32_setup_arg_pages (struct linux_binpr - mpnt->vm_flags = VM_STACK_FLAGS; - mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)? - PAGE_COPY_EXEC: PAGE_COPY; -- insert_vm_struct(current->mm, mpnt); -+ if ((ret = insert_vm_struct(current->mm, mpnt))) { -+ up_write(¤t->mm->mmap_sem); -+ kmem_cache_free(vm_area_cachep, mpnt); -+ return ret; -+ } - current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - } - -diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c linux-2.6.8.1-ve022stab034/arch/ia64/kernel/perfmon.c ---- linux-2.6.8.1.orig/arch/ia64/kernel/perfmon.c 2004-08-14 14:56:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/kernel/perfmon.c 2005-09-09 14:39:25.000000000 +0400 -@@ -2582,7 +2582,7 @@ pfm_task_incompatible(pfm_context_t *ctx - return -EINVAL; - } - -- if (task->state == TASK_ZOMBIE) { -+ if (task->exit_state == EXIT_ZOMBIE) { - DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); - return -EBUSY; - } -diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/ia64/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/ia64/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1393,7 +1393,7 @@ sys_ptrace (long request, pid_t pid, uns - * sigkill. Perhaps it should be put in the status - * that it wants to exit. - */ -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - goto out_tsk; - child->exit_code = SIGKILL; - -diff -uprN linux-2.6.8.1.orig/arch/ia64/kernel/traps.c linux-2.6.8.1-ve022stab034/arch/ia64/kernel/traps.c ---- linux-2.6.8.1.orig/arch/ia64/kernel/traps.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/kernel/traps.c 2005-09-09 14:39:24.000000000 +0400 -@@ -35,34 +35,6 @@ trap_init (void) - fpswa_interface = __va(ia64_boot_param->fpswa); - } - --/* -- * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock -- * is acquired through the console unblank code) -- */ --void --bust_spinlocks (int yes) --{ -- int loglevel_save = console_loglevel; -- -- if (yes) { -- oops_in_progress = 1; -- return; -- } -- --#ifdef CONFIG_VT -- unblank_screen(); --#endif -- oops_in_progress = 0; -- /* -- * OK, the message is on the console. Now we call printk() without -- * oops_in_progress set so that printk will give klogd a poke. Hold onto -- * your hats... -- */ -- console_loglevel = 15; /* NMI oopser may have shut the console up */ -- printk(" "); -- console_loglevel = loglevel_save; --} -- - void - die (const char *str, struct pt_regs *regs, long err) - { -diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/contig.c linux-2.6.8.1-ve022stab034/arch/ia64/mm/contig.c ---- linux-2.6.8.1.orig/arch/ia64/mm/contig.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/mm/contig.c 2005-09-09 14:39:25.000000000 +0400 -@@ -19,6 +19,7 @@ - #include <linux/efi.h> - #include <linux/mm.h> - #include <linux/swap.h> -+#include <linux/module.h> - - #include <asm/meminit.h> - #include <asm/pgalloc.h> -@@ -297,3 +298,5 @@ paging_init (void) - #endif /* !CONFIG_VIRTUAL_MEM_MAP */ - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); - } -+ -+EXPORT_SYMBOL(show_mem); -diff -uprN linux-2.6.8.1.orig/arch/ia64/mm/init.c linux-2.6.8.1-ve022stab034/arch/ia64/mm/init.c ---- linux-2.6.8.1.orig/arch/ia64/mm/init.c 2004-08-14 14:55:19.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ia64/mm/init.c 2005-09-09 14:39:25.000000000 +0400 -@@ -131,7 +131,13 @@ ia64_init_addr_space (void) - vma->vm_end = vma->vm_start + PAGE_SIZE; - vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; - vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP; -- insert_vm_struct(current->mm, vma); -+ down_write(¤t->mm->mmap_sem); -+ if (insert_vm_struct(current->mm, vma)) { -+ up_write(¤t->mm->mmap_sem); -+ kmem_cache_free(vm_area_cachep, vma); -+ return; -+ } -+ up_write(¤t->mm->mmap_sem); - } - - /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ -@@ -143,7 +149,13 @@ ia64_init_addr_space (void) - vma->vm_end = PAGE_SIZE; - vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; -- insert_vm_struct(current->mm, vma); -+ down_write(¤t->mm->mmap_sem); -+ if (insert_vm_struct(current->mm, vma)) { -+ up_write(¤t->mm->mmap_sem); -+ kmem_cache_free(vm_area_cachep, vma); -+ return; -+ } -+ up_write(¤t->mm->mmap_sem); - } - } - } -diff -uprN linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/m68k/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/m68k/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/m68k/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request, - long tmp; - - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/m68knommu/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/m68knommu/kernel/ptrace.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/m68knommu/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -271,7 +271,7 @@ asmlinkage int sys_ptrace(long request, - long tmp; - - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/irixelf.c ---- linux-2.6.8.1.orig/arch/mips/kernel/irixelf.c 2004-08-14 14:56:25.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/irixelf.c 2005-09-09 14:39:25.000000000 +0400 -@@ -127,7 +127,9 @@ static void set_brk(unsigned long start, - end = PAGE_ALIGN(end); - if (end <= start) - return; -+ down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); -+ up_write(¤t->mm->mmap_sem); - } - - -@@ -376,7 +378,9 @@ static unsigned int load_irix_interp(str - - /* Map the last of the bss segment */ - if (last_bss > len) { -+ down_write(¤t->mm->mmap_sem); - do_brk(len, (last_bss - len)); -+ up_write(¤t->mm->mmap_sem); - } - kfree(elf_phdata); - -@@ -448,7 +452,12 @@ static inline int look_for_irix_interpre - if (retval < 0) - goto out; - -- file = open_exec(*name); -+ /* -+ * I don't understand this loop. -+ * Are we suppose to break the loop after successful open and -+ * read, or close the file, or store it somewhere? --SAW -+ */ -+ file = open_exec(*name, bprm); - if (IS_ERR(file)) { - retval = PTR_ERR(file); - goto out; -@@ -564,7 +573,9 @@ void irix_map_prda_page (void) - unsigned long v; - struct prda *pp; - -+ down_write(¤t->mm->mmap_sem); - v = do_brk (PRDA_ADDRESS, PAGE_SIZE); -+ up_write(¤t->mm->mmap_sem); - - if (v < 0) - return; -@@ -855,8 +866,11 @@ static int load_irix_library(struct file - - len = (elf_phdata->p_filesz + elf_phdata->p_vaddr+ 0xfff) & 0xfffff000; - bss = elf_phdata->p_memsz + elf_phdata->p_vaddr; -- if (bss > len) -+ if (bss > len) { -+ down_write(¤t->mm->mmap_sem); - do_brk(len, bss-len); -+ up_write(¤t->mm->mmap_sem); -+ } - kfree(elf_phdata); - return 0; - } -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/irixsig.c ---- linux-2.6.8.1.orig/arch/mips/kernel/irixsig.c 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/irixsig.c 2005-09-09 14:39:24.000000000 +0400 -@@ -184,9 +184,10 @@ asmlinkage int do_irix_signal(sigset_t * - if (!user_mode(regs)) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/ptrace32.c ---- linux-2.6.8.1.orig/arch/mips/kernel/ptrace32.c 2004-08-14 14:55:20.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/ptrace32.c 2005-09-09 14:39:25.000000000 +0400 -@@ -262,7 +262,7 @@ asmlinkage int sys32_ptrace(int request, - */ - case PTRACE_KILL: - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/mips/kernel/ptrace.c 2004-08-14 14:56:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -277,7 +277,7 @@ asmlinkage int sys_ptrace(long request, - */ - case PTRACE_KILL: - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal32.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/signal32.c ---- linux-2.6.8.1.orig/arch/mips/kernel/signal32.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/signal32.c 2005-09-09 14:39:24.000000000 +0400 -@@ -704,9 +704,10 @@ asmlinkage int do_signal32(sigset_t *old - if (!user_mode(regs)) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/mips/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/mips/kernel/signal.c ---- linux-2.6.8.1.orig/arch/mips/kernel/signal.c 2004-08-14 14:55:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/mips/kernel/signal.c 2005-09-09 14:39:24.000000000 +0400 -@@ -556,9 +556,10 @@ asmlinkage int do_signal(sigset_t *oldse - if (!user_mode(regs)) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/parisc/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/parisc/kernel/ptrace.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/parisc/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -303,7 +303,7 @@ long sys_ptrace(long request, pid_t pid, - * that it wants to exit. - */ - DBG(("sys_ptrace(KILL)\n")); -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - goto out_tsk; - child->exit_code = SIGKILL; - goto out_wake_notrap; -diff -uprN linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/ppc/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/ppc/kernel/ptrace.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ppc/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -377,7 +377,7 @@ int sys_ptrace(long request, long pid, l - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c linux-2.6.8.1-ve022stab034/arch/ppc64/boot/zlib.c ---- linux-2.6.8.1.orig/arch/ppc64/boot/zlib.c 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ppc64/boot/zlib.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1307,7 +1307,7 @@ local int huft_build( - { - *t = (inflate_huft *)Z_NULL; - *m = 0; -- return Z_OK; -+ return Z_DATA_ERROR; - } - - -@@ -1351,6 +1351,7 @@ local int huft_build( - if ((j = *p++) != 0) - v[x[j]++] = i; - } while (++i < n); -+ n = x[g]; /* set n to length of v */ - - - /* Generate the Huffman codes and for each, make the table entries */ -diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c linux-2.6.8.1-ve022stab034/arch/ppc64/kernel/ptrace32.c ---- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace32.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ppc64/kernel/ptrace32.c 2005-09-09 14:39:25.000000000 +0400 -@@ -314,7 +314,7 @@ int sys32_ptrace(long request, long pid, - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/ppc64/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/ppc64/kernel/ptrace.c 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/ppc64/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -182,7 +182,7 @@ int sys_ptrace(long request, long pid, l - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c linux-2.6.8.1-ve022stab034/arch/s390/kernel/compat_exec.c ---- linux-2.6.8.1.orig/arch/s390/kernel/compat_exec.c 2004-08-14 14:56:01.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/s390/kernel/compat_exec.c 2005-09-09 14:39:25.000000000 +0400 -@@ -39,7 +39,7 @@ int setup_arg_pages32(struct linux_binpr - unsigned long stack_base; - struct vm_area_struct *mpnt; - struct mm_struct *mm = current->mm; -- int i; -+ int i, ret; - - stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; - mm->arg_start = bprm->p + stack_base; -@@ -68,7 +68,11 @@ int setup_arg_pages32(struct linux_binpr - /* executable stack setting would be applied here */ - mpnt->vm_page_prot = PAGE_COPY; - mpnt->vm_flags = VM_STACK_FLAGS; -- insert_vm_struct(mm, mpnt); -+ if ((ret = insert_vm_struct(mm, mpnt))) { -+ up_write(&mm->mmap_sem); -+ kmem_cache_free(vm_area_cachep, mpnt); -+ return ret; -+ } - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - } - -diff -uprN linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/s390/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/s390/kernel/ptrace.c 2004-08-14 14:56:14.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/s390/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -626,7 +626,7 @@ do_ptrace(struct task_struct *child, lon - * perhaps it should be put in the status that it wants to - * exit. - */ -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - return 0; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/s390/mm/fault.c linux-2.6.8.1-ve022stab034/arch/s390/mm/fault.c ---- linux-2.6.8.1.orig/arch/s390/mm/fault.c 2004-08-14 14:56:26.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/s390/mm/fault.c 2005-09-09 14:39:24.000000000 +0400 -@@ -61,17 +61,9 @@ void bust_spinlocks(int yes) - if (yes) { - oops_in_progress = 1; - } else { -- int loglevel_save = console_loglevel; - oops_in_progress = 0; - console_unblank(); -- /* -- * OK, the message is on the console. Now we call printk() -- * without oops_in_progress set so that printk will give klogd -- * a poke. Hold onto your hats... -- */ -- console_loglevel = 15; -- printk(" "); -- console_loglevel = loglevel_save; -+ wake_up_klogd(); - } - } - -diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/sh/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/sh/kernel/ptrace.c 2004-08-14 14:54:49.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sh/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -217,7 +217,7 @@ asmlinkage int sys_ptrace(long request, - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/sh/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/sh/kernel/signal.c ---- linux-2.6.8.1.orig/arch/sh/kernel/signal.c 2004-08-14 14:56:25.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sh/kernel/signal.c 2005-09-09 14:39:24.000000000 +0400 -@@ -584,9 +584,10 @@ int do_signal(struct pt_regs *regs, sigs - if (!user_mode(regs)) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/sh64/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/sh64/kernel/ptrace.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sh64/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -257,7 +257,7 @@ asmlinkage int sys_ptrace(long request, - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/sh64/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/sh64/kernel/signal.c ---- linux-2.6.8.1.orig/arch/sh64/kernel/signal.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sh64/kernel/signal.c 2005-09-09 14:39:24.000000000 +0400 -@@ -705,10 +705,11 @@ int do_signal(struct pt_regs *regs, sigs - if (!user_mode(regs)) - return 1; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -- } -+ if (unlikely(test_thread_flag(TIF_FREEZE))) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; -+ } - - if (!oldset) - oldset = ¤t->blocked; -diff -uprN linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/sparc/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/sparc/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sparc/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -567,7 +567,7 @@ asmlinkage void do_ptrace(struct pt_regs - * exit. - */ - case PTRACE_KILL: { -- if (child->state == TASK_ZOMBIE) { /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) { /* already dead */ - pt_succ_return(regs, 0); - goto out_tsk; - } -diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.8.1-ve022stab034/arch/sparc64/kernel/binfmt_aout32.c ---- linux-2.6.8.1.orig/arch/sparc64/kernel/binfmt_aout32.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sparc64/kernel/binfmt_aout32.c 2005-09-09 14:39:25.000000000 +0400 -@@ -49,7 +49,9 @@ static void set_brk(unsigned long start, - end = PAGE_ALIGN(end); - if (end <= start) - return; -+ down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); -+ up_write(¤t->mm->mmap_sem); - } - - /* -@@ -246,10 +248,14 @@ static int load_aout32_binary(struct lin - if (N_MAGIC(ex) == NMAGIC) { - loff_t pos = fd_offset; - /* Fuck me plenty... */ -+ down_write(¤t->mm->mmap_sem); - error = do_brk(N_TXTADDR(ex), ex.a_text); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), - ex.a_text, &pos); -+ down_write(¤t->mm->mmap_sem); - error = do_brk(N_DATADDR(ex), ex.a_data); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char __user *)N_DATADDR(ex), - ex.a_data, &pos); - goto beyond_if; -@@ -257,8 +263,10 @@ static int load_aout32_binary(struct lin - - if (N_MAGIC(ex) == OMAGIC) { - loff_t pos = fd_offset; -+ down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex) & PAGE_MASK, - ex.a_text+ex.a_data + PAGE_SIZE - 1); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); - } else { -@@ -272,7 +280,9 @@ static int load_aout32_binary(struct lin - - if (!bprm->file->f_op->mmap) { - loff_t pos = fd_offset; -+ down_write(¤t->mm->mmap_sem); - do_brk(0, ex.a_text+ex.a_data); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, - (char __user *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); -@@ -389,7 +399,9 @@ static int load_aout32_library(struct fi - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { -+ down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); -+ up_write(¤t->mm->mmap_sem); - retval = error; - if (error != start_addr + len) - goto out; -diff -uprN linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/sparc64/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/sparc64/kernel/ptrace.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/sparc64/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -559,7 +559,7 @@ asmlinkage void do_ptrace(struct pt_regs - * exit. - */ - case PTRACE_KILL: { -- if (child->state == TASK_ZOMBIE) { /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) { /* already dead */ - pt_succ_return(regs, 0); - goto out_tsk; - } -diff -uprN linux-2.6.8.1.orig/arch/um/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/um/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/um/kernel/ptrace.c 2004-08-14 14:56:25.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/um/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -163,7 +163,7 @@ int sys_ptrace(long request, long pid, l - */ - case PTRACE_KILL: { - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c linux-2.6.8.1-ve022stab034/arch/um/kernel/tt/process_kern.c ---- linux-2.6.8.1.orig/arch/um/kernel/tt/process_kern.c 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/um/kernel/tt/process_kern.c 2005-09-09 14:39:25.000000000 +0400 -@@ -65,7 +65,7 @@ void *switch_to_tt(void *prev, void *nex - panic("write of switch_pipe failed, errno = %d", -err); - - reading = 1; -- if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD)) -+ if((from->exit_state == EXIT_ZOMBIE) || (from->exit_state == EXIT_DEAD)) - os_kill_process(os_getpid(), 0); - - err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); -diff -uprN linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/v850/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/v850/kernel/ptrace.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/v850/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -238,7 +238,7 @@ int sys_ptrace(long request, long pid, l - */ - case PTRACE_KILL: - rval = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - wake_up_process(child); -diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c linux-2.6.8.1-ve022stab034/arch/x86_64/ia32/ia32_aout.c ---- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_aout.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/ia32/ia32_aout.c 2005-09-09 14:39:25.000000000 +0400 -@@ -113,7 +113,9 @@ static void set_brk(unsigned long start, - end = PAGE_ALIGN(end); - if (end <= start) - return; -+ down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); -+ up_write(¤t->mm->mmap_sem); - } - - #if CORE_DUMP -@@ -323,7 +325,10 @@ static int load_aout_binary(struct linux - pos = 32; - map_size = ex.a_text+ex.a_data; - -+ down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); -+ up_write(¤t->mm->mmap_sem); -+ - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); - return error; -@@ -359,7 +364,9 @@ static int load_aout_binary(struct linux - - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { - loff_t pos = fd_offset; -+ down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); - flush_icache_range((unsigned long) N_TXTADDR(ex), -@@ -467,8 +474,9 @@ static int load_aout_library(struct file - error_time = jiffies; - } - #endif -- -+ down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); -+ up_write(¤t->mm->mmap_sem); - - file->f_op->read(file, (char *)start_addr, - ex.a_text + ex.a_data, &pos); -@@ -492,7 +500,9 @@ static int load_aout_library(struct file - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { -+ down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); -+ up_write(¤t->mm->mmap_sem); - retval = error; - if (error != start_addr + len) - goto out; -diff -uprN linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.8.1-ve022stab034/arch/x86_64/ia32/ia32_binfmt.c ---- linux-2.6.8.1.orig/arch/x86_64/ia32/ia32_binfmt.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/ia32/ia32_binfmt.c 2005-09-09 14:39:25.000000000 +0400 -@@ -330,7 +330,7 @@ int setup_arg_pages(struct linux_binprm - unsigned long stack_base; - struct vm_area_struct *mpnt; - struct mm_struct *mm = current->mm; -- int i; -+ int i, ret; - - stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; - mm->arg_start = bprm->p + stack_base; -@@ -364,7 +364,11 @@ int setup_arg_pages(struct linux_binprm - mpnt->vm_flags = vm_stack_flags32; - mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ? - PAGE_COPY_EXEC : PAGE_COPY; -- insert_vm_struct(mm, mpnt); -+ if ((ret = insert_vm_struct(mm, mpnt))) { -+ up_write(&mm->mmap_sem); -+ kmem_cache_free(vm_area_cachep, mpnt); -+ return ret; -+ } - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - } - -diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/ptrace.c ---- linux-2.6.8.1.orig/arch/x86_64/kernel/ptrace.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/ptrace.c 2005-09-09 14:39:25.000000000 +0400 -@@ -393,7 +393,7 @@ asmlinkage long sys_ptrace(long request, - long tmp; - - ret = 0; -- if (child->state == TASK_ZOMBIE) /* already dead */ -+ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ -diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/signal.c ---- linux-2.6.8.1.orig/arch/x86_64/kernel/signal.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/signal.c 2005-09-09 14:39:24.000000000 +0400 -@@ -411,9 +411,10 @@ int do_signal(struct pt_regs *regs, sigs - return 1; - } - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -- goto no_signal; -+ if (test_thread_flag(TIF_FREEZE)) { -+ refrigerator(); -+ if (!signal_pending(current)) -+ goto no_signal; - } - - if (!oldset) -diff -uprN linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/traps.c ---- linux-2.6.8.1.orig/arch/x86_64/kernel/traps.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/kernel/traps.c 2005-09-09 14:39:27.000000000 +0400 -@@ -254,10 +254,13 @@ void show_registers(struct pt_regs *regs - - rsp = regs->rsp; - -- printk("CPU %d ", cpu); -+ printk("CPU: %d, VCPU: %d:%d ", cpu, task_vsched_id(current), -+ task_cpu(current)); - __show_regs(regs); -- printk("Process %s (pid: %d, threadinfo %p, task %p)\n", -- cur->comm, cur->pid, cur->thread_info, cur); -+ printk("Process %s (pid: %d, veid=%d, threadinfo %p, task %p)\n", -+ cur->comm, cur->pid, -+ VEID(VE_TASK_INFO(current)->owner_env), -+ cur->thread_info, cur); - - /* - * When in-kernel, we also print out the stack and code at the -diff -uprN linux-2.6.8.1.orig/arch/x86_64/mm/fault.c linux-2.6.8.1-ve022stab034/arch/x86_64/mm/fault.c ---- linux-2.6.8.1.orig/arch/x86_64/mm/fault.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/arch/x86_64/mm/fault.c 2005-09-09 14:39:24.000000000 +0400 -@@ -34,27 +34,6 @@ - #include <asm/kdebug.h> - #include <asm-generic/sections.h> - --void bust_spinlocks(int yes) --{ -- int loglevel_save = console_loglevel; -- if (yes) { -- oops_in_progress = 1; -- } else { --#ifdef CONFIG_VT -- unblank_screen(); --#endif -- oops_in_progress = 0; -- /* -- * OK, the message is on the console. Now we call printk() -- * without oops_in_progress set so that printk will give klogd -- * a poke. Hold onto your hats... -- */ -- console_loglevel = 15; /* NMI oopser may have shut the console up */ -- printk(" "); -- console_loglevel = loglevel_save; -- } --} -- - /* Sometimes the CPU reports invalid exceptions on prefetch. - Check that here and ignore. - Opcode checker based on code by Richard Brunner */ -diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/Locking linux-2.6.8.1-ve022stab034/Documentation/filesystems/Locking ---- linux-2.6.8.1.orig/Documentation/filesystems/Locking 2004-08-14 14:56:01.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/Documentation/filesystems/Locking 2005-09-09 14:39:25.000000000 +0400 -@@ -90,7 +90,7 @@ prototypes: - void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); - void (*dirty_inode) (struct inode *); -- void (*write_inode) (struct inode *, int); -+ int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); -diff -uprN linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt linux-2.6.8.1-ve022stab034/Documentation/filesystems/vfs.txt ---- linux-2.6.8.1.orig/Documentation/filesystems/vfs.txt 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/Documentation/filesystems/vfs.txt 2005-09-09 14:39:25.000000000 +0400 -@@ -176,7 +176,7 @@ filesystem. As of kernel 2.1.99, the fol - - struct super_operations { - void (*read_inode) (struct inode *); -- void (*write_inode) (struct inode *, int); -+ int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); -diff -uprN linux-2.6.8.1.orig/Documentation/power/swsusp.txt linux-2.6.8.1-ve022stab034/Documentation/power/swsusp.txt ---- linux-2.6.8.1.orig/Documentation/power/swsusp.txt 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/Documentation/power/swsusp.txt 2005-09-09 14:39:24.000000000 +0400 -@@ -211,8 +211,8 @@ A: All such kernel threads need to be fi - where it is safe to be frozen (no kernel semaphores should be held at - that point and it must be safe to sleep there), and add: - -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - - Q: What is the difference between between "platform", "shutdown" and - "firmware" in /sys/power/disk? -diff -uprN linux-2.6.8.1.orig/Documentation/ve.txt linux-2.6.8.1-ve022stab034/Documentation/ve.txt ---- linux-2.6.8.1.orig/Documentation/ve.txt 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/Documentation/ve.txt 2005-09-09 14:39:26.000000000 +0400 -@@ -0,0 +1,36 @@ -+ Open Virtuozzo Overview -+ ----------------------- -+ (C) SWsoft, 2005, http://www.sw-soft.com, All rights reserved. -+ -+Open Virtuozzo is a virtualization technology which allows to run multiple -+isolated VPSs (Virtual Private Server) on a single operating system. -+It uses a single instance of Linux kernel in memory which efficiently -+manages resources between VPSs. -+ -+Virtual environment (VE) notion used sometimes in kernel is the original -+name of more modern notion of Virtual Private Server (VPS). -+ -+From user point of view, every VPS is an isolated operating system with -+private file system, private set of users, private root superuser, -+private set of processes and so on. Every application which do not -+require direct hardware access can't feel the difference between VPS -+and real standalone server. -+ -+From kernel point of view, VPS is an isolated set of processes spawned -+from their private 'init' process. Kernel controls which resources are -+accessible inside VPS and which amount of these resources can be -+consumed/used by VPS processes. Also kernel provides isolation between -+VPSs thus ensuring that one VPS can't use private resources of another -+VPS, make DoS/hack/crash attack on it's neighbour and so on. -+ -+main Open Virtuozzo config options: -+ CONFIG_FAIRSCHED=y -+ CONFIG_SCHED_VCPU=y -+ CONFIG_VE=y -+ CONFIG_VE_CALLS=m -+ CONFIG_VE_NETDEV=m -+ CONFIG_VE_IPTABLES=y -+ -+Official product pages: -+ http://www.virtuozzo.com -+ http://openvirtuozzo.org -diff -uprN linux-2.6.8.1.orig/Documentation/vsched.txt linux-2.6.8.1-ve022stab034/Documentation/vsched.txt ---- linux-2.6.8.1.orig/Documentation/vsched.txt 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/Documentation/vsched.txt 2005-09-09 14:39:26.000000000 +0400 -@@ -0,0 +1,82 @@ -+Copyright (C) 2005 SWsoft. All rights reserved. -+ -+Hierarchical CPU schedulers -+~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+Hierarchical CPU scheduler is a stack of CPU schedulers which allows -+to organize different policies of scheduling in the system and/or between -+groups of processes. -+ -+Virtuozzo uses a hierarchical Fair CPU scheduler organized as a 2-stage -+CPU scheduler, where the scheduling decisions are made in 2 steps: -+1. On the first step Fair CPU scheduler selects a group of processes -+ which should get some CPU time. -+2. Then standard Linux scheduler chooses a process inside the group. -+Such scheduler efficiently allows to isolate one group of processes -+from another and still allows a group to use more than 1 CPU on SMP systems. -+ -+This document describes a new middle layer of Virtuozzo hierarchical CPU -+scheduler which makes decisions after Fair scheduler, but before Linux -+scheduler and which is called VCPU scheduler. -+ -+ -+Where VCPU scheduler comes from? -+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ -+Existing hierarchical CPU scheduler uses isolated algorithms on each stage -+of decision making, i.e. every scheduler makes its decisions without -+taking into account the details of other schedulers. This can lead to a number -+of problems described below. -+ -+On SMP systems there are possible situations when the first CPU scheduler -+in the hierarchy (e.g. Fair scheduler) wants to schedule some group of -+processes on the physical CPU, but the underlying process scheduler -+(e.g. Linux O(1) CPU scheduler) is unable to schedule any processes -+on this physical CPU. Usually this happens due to the fact that Linux -+kernel scheduler uses per-physical CPU runqueues. -+ -+Another problem is that Linux scheduler also knows nothing about -+Fair scheduler and can't balance efficiently without taking into account -+statistics about process groups from Fair scheduler. Without such -+statistics Linux scheduler can concentrate all processes on one physical -+CPU, thus making CPU consuming highly inefficient. -+ -+VCPU scheduler solves these problems by adding a new layer between -+Fair schedule and Linux scheduler. -+ -+VCPU scheduler -+~~~~~~~~~~~~~~ -+ -+VCPU scheduler is a CPU scheduler which splits notion of -+physical and virtual CPUs (VCPU and PCPU). This means that tasks are -+running on virtual CPU runqueues, while VCPUs are running on PCPUs. -+ -+The Virtuozzo hierarchical fair scheduler becomes 3 stage CPU scheduler: -+1. First, Fair CPU scheduler select a group of processes. -+2. Then VCPU scheduler select a virtual CPU to run (this is actually -+ a runqueue). -+3. Standard Linux scheduler chooses a process from the runqueue. -+ -+For example on the picture below PCPU0 executes tasks from -+VCPU1 runqueue and PCPU1 is idle: -+ -+ virtual | physical | virtual -+ idle CPUs | CPUs | CPUS -+--------------------|------------------------|-------------------------- -+ | | ----------------- -+ | | | virtual sched X | -+ | | | ----------- | -+ | | | | VCPU0 | | -+ | | | ----------- | -+ ------------ | ----------- | ----------- | -+| idle VCPU0 | | | PCPU0 | <---> | | VCPU1 | | -+ ------------ | ----------- | ----------- | -+ | | ----------------- -+ | | -+ | | ----------------- -+ | | | virtual sched Y | -+ ------------ ----------- | | ----------- | -+| idle VCPU1 | <---> | PCPU1 | | | | VCPU0 | | -+ ------------ ----------- | | ----------- | -+ | | ----------------- -+ | | -diff -uprN linux-2.6.8.1.orig/drivers/base/class.c linux-2.6.8.1-ve022stab034/drivers/base/class.c ---- linux-2.6.8.1.orig/drivers/base/class.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/base/class.c 2005-09-09 14:39:25.000000000 +0400 -@@ -71,6 +71,11 @@ static struct kobj_type ktype_class = { - /* Hotplug events for classes go to the class_obj subsys */ - static decl_subsys(class, &ktype_class, NULL); - -+#ifndef CONFIG_VE -+#define visible_class_subsys class_subsys -+#else -+#define visible_class_subsys (*get_exec_env()->class_subsys) -+#endif - - int class_create_file(struct class * cls, const struct class_attribute * attr) - { -@@ -143,7 +148,7 @@ int class_register(struct class * cls) - if (error) - return error; - -- subsys_set_kset(cls, class_subsys); -+ subsys_set_kset(cls, visible_class_subsys); - - error = subsystem_register(&cls->subsys); - if (!error) { -@@ -306,6 +311,11 @@ static struct kset_hotplug_ops class_hot - - static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops); - -+#ifndef CONFIG_VE -+#define visible_class_obj_subsys class_obj_subsys -+#else -+#define visible_class_obj_subsys (*get_exec_env()->class_obj_subsys) -+#endif - - static int class_device_add_attrs(struct class_device * cd) - { -@@ -342,7 +352,7 @@ static void class_device_remove_attrs(st - - void class_device_initialize(struct class_device *class_dev) - { -- kobj_set_kset_s(class_dev, class_obj_subsys); -+ kobj_set_kset_s(class_dev, visible_class_obj_subsys); - kobject_init(&class_dev->kobj); - INIT_LIST_HEAD(&class_dev->node); - } -@@ -505,12 +515,19 @@ void class_interface_unregister(struct c - class_put(parent); - } - -- -+void prepare_sysfs_classes(void) -+{ -+#ifdef CONFIG_VE -+ get_ve0()->class_subsys = &class_subsys; -+ get_ve0()->class_obj_subsys = &class_obj_subsys; -+#endif -+} - - int __init classes_init(void) - { - int retval; - -+ prepare_sysfs_classes(); - retval = subsystem_register(&class_subsys); - if (retval) - return retval; -@@ -542,3 +559,6 @@ EXPORT_SYMBOL(class_device_remove_file); - - EXPORT_SYMBOL(class_interface_register); - EXPORT_SYMBOL(class_interface_unregister); -+ -+EXPORT_SYMBOL(class_subsys); -+EXPORT_SYMBOL(class_obj_subsys); -diff -uprN linux-2.6.8.1.orig/drivers/block/floppy.c linux-2.6.8.1-ve022stab034/drivers/block/floppy.c ---- linux-2.6.8.1.orig/drivers/block/floppy.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/block/floppy.c 2005-09-09 14:39:25.000000000 +0400 -@@ -3774,7 +3774,7 @@ static int floppy_open(struct inode *ino - * Needed so that programs such as fdrawcmd still can work on write - * protected disks */ - if (filp->f_mode & 2 -- || permission(filp->f_dentry->d_inode, 2, NULL) == 0) -+ || permission(filp->f_dentry->d_inode, 2, NULL, NULL) == 0) - filp->private_data = (void *)8; - - if (UFDCS->rawcmd == 1) -diff -uprN linux-2.6.8.1.orig/drivers/block/genhd.c linux-2.6.8.1-ve022stab034/drivers/block/genhd.c ---- linux-2.6.8.1.orig/drivers/block/genhd.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/block/genhd.c 2005-09-09 14:39:25.000000000 +0400 -@@ -18,6 +18,8 @@ - #define MAX_PROBE_HASH 255 /* random */ - - static struct subsystem block_subsys; -+struct subsystem *get_block_subsys(void) {return &block_subsys;} -+EXPORT_SYMBOL(get_block_subsys); - - /* - * Can be deleted altogether. Later. -diff -uprN linux-2.6.8.1.orig/drivers/block/ioctl.c linux-2.6.8.1-ve022stab034/drivers/block/ioctl.c ---- linux-2.6.8.1.orig/drivers/block/ioctl.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/block/ioctl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -219,3 +219,5 @@ int blkdev_ioctl(struct inode *inode, st - } - return -ENOTTY; - } -+ -+EXPORT_SYMBOL_GPL(blkdev_ioctl); -diff -uprN linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c linux-2.6.8.1-ve022stab034/drivers/block/ll_rw_blk.c ---- linux-2.6.8.1.orig/drivers/block/ll_rw_blk.c 2004-08-14 14:54:49.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/block/ll_rw_blk.c 2005-09-09 14:39:24.000000000 +0400 -@@ -2192,7 +2192,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge); - static int __make_request(request_queue_t *q, struct bio *bio) - { - struct request *req, *freereq = NULL; -- int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra; -+ int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra, sync; - sector_t sector; - - sector = bio->bi_sector; -@@ -2238,6 +2238,7 @@ again: - drive_stat_acct(req, nr_sectors, 0); - if (!attempt_back_merge(q, req)) - elv_merged_request(q, req); -+ sync = bio_sync(bio); - goto out; - - case ELEVATOR_FRONT_MERGE: -@@ -2264,6 +2265,7 @@ again: - drive_stat_acct(req, nr_sectors, 0); - if (!attempt_front_merge(q, req)) - elv_merged_request(q, req); -+ sync = bio_sync(bio); - goto out; - - /* -@@ -2329,11 +2331,12 @@ get_rq: - req->rq_disk = bio->bi_bdev->bd_disk; - req->start_time = jiffies; - -+ sync = bio_sync(bio); - add_request(q, req); - out: - if (freereq) - __blk_put_request(q, freereq); -- if (bio_sync(bio)) -+ if (sync) - __generic_unplug_device(q); - - spin_unlock_irq(q->queue_lock); -diff -uprN linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c linux-2.6.8.1-ve022stab034/drivers/block/scsi_ioctl.c ---- linux-2.6.8.1.orig/drivers/block/scsi_ioctl.c 2004-08-14 14:56:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/block/scsi_ioctl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -304,7 +304,8 @@ static int sg_scsi_ioctl(struct file *fi - struct gendisk *bd_disk, Scsi_Ioctl_Command __user *sic) - { - struct request *rq; -- int err, in_len, out_len, bytes, opcode, cmdlen; -+ int err; -+ unsigned int in_len, out_len, bytes, opcode, cmdlen; - char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; - - /* -@@ -316,7 +317,7 @@ static int sg_scsi_ioctl(struct file *fi - return -EFAULT; - if (in_len > PAGE_SIZE || out_len > PAGE_SIZE) - return -EINVAL; -- if (get_user(opcode, sic->data)) -+ if (get_user(opcode, (int *)sic->data)) - return -EFAULT; - - bytes = max(in_len, out_len); -diff -uprN linux-2.6.8.1.orig/drivers/char/keyboard.c linux-2.6.8.1-ve022stab034/drivers/char/keyboard.c ---- linux-2.6.8.1.orig/drivers/char/keyboard.c 2004-08-14 14:56:26.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/keyboard.c 2005-09-09 14:39:24.000000000 +0400 -@@ -1063,7 +1063,7 @@ void kbd_keycode(unsigned int keycode, i - sysrq_down = down; - return; - } -- if (sysrq_down && down && !rep) { -+ if ((sysrq_down || sysrq_eat_all()) && down && !rep) { - handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); - return; - } -diff -uprN linux-2.6.8.1.orig/drivers/char/n_tty.c linux-2.6.8.1-ve022stab034/drivers/char/n_tty.c ---- linux-2.6.8.1.orig/drivers/char/n_tty.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/n_tty.c 2005-09-09 14:39:25.000000000 +0400 -@@ -946,13 +946,13 @@ static inline int copy_from_read_buf(str - - { - int retval; -- ssize_t n; -+ size_t n; - unsigned long flags; - - retval = 0; - spin_lock_irqsave(&tty->read_lock, flags); - n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail); -- n = min((ssize_t)*nr, n); -+ n = min(*nr, n); - spin_unlock_irqrestore(&tty->read_lock, flags); - if (n) { - mb(); -diff -uprN linux-2.6.8.1.orig/drivers/char/pty.c linux-2.6.8.1-ve022stab034/drivers/char/pty.c ---- linux-2.6.8.1.orig/drivers/char/pty.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/pty.c 2005-09-09 14:39:26.000000000 +0400 -@@ -32,22 +32,48 @@ - #include <asm/bitops.h> - #include <linux/devpts_fs.h> - -+#include <ub/ub_misc.h> -+ - #if defined(CONFIG_LEGACY_PTYS) || defined(CONFIG_UNIX98_PTYS) - - #ifdef CONFIG_LEGACY_PTYS - static struct tty_driver *pty_driver, *pty_slave_driver; -+ -+struct tty_driver *get_pty_driver(void) {return pty_driver;} -+struct tty_driver *get_pty_slave_driver(void) {return pty_slave_driver;} -+ -+EXPORT_SYMBOL(get_pty_driver); -+EXPORT_SYMBOL(get_pty_slave_driver); - #endif - - /* These are global because they are accessed in tty_io.c */ - #ifdef CONFIG_UNIX98_PTYS - struct tty_driver *ptm_driver; - struct tty_driver *pts_driver; -+EXPORT_SYMBOL(ptm_driver); -+EXPORT_SYMBOL(pts_driver); -+ -+#ifdef CONFIG_VE -+#define ve_ptm_driver (get_exec_env()->ptm_driver) -+#else -+#define ve_ptm_driver ptm_driver -+#endif -+ -+void prepare_pty(void) -+{ -+#ifdef CONFIG_VE -+ get_ve0()->ptm_driver = ptm_driver; -+ /* don't clean ptm_driver and co. here, they are used in vecalls.c */ -+#endif -+} - #endif - - static void pty_close(struct tty_struct * tty, struct file * filp) - { - if (!tty) - return; -+ -+ ub_pty_uncharge(tty); - if (tty->driver->subtype == PTY_TYPE_MASTER) { - if (tty->count > 1) - printk("master pty_close: count = %d!!\n", tty->count); -@@ -67,8 +93,12 @@ static void pty_close(struct tty_struct - if (tty->driver->subtype == PTY_TYPE_MASTER) { - set_bit(TTY_OTHER_CLOSED, &tty->flags); - #ifdef CONFIG_UNIX98_PTYS -- if (tty->driver == ptm_driver) -+ if (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) { -+ struct ve_struct *old_env; -+ old_env = set_exec_env(VE_OWNER_TTY(tty)); - devpts_pty_kill(tty->index); -+ set_exec_env(old_env); -+ } - #endif - tty_vhangup(tty->link); - } -@@ -288,6 +318,8 @@ static int pty_open(struct tty_struct *t - - if (!tty || !tty->link) - goto out; -+ if (ub_pty_charge(tty)) -+ goto out; - - retval = -EIO; - if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) -@@ -455,6 +487,7 @@ static int __init pty_init(void) - panic("Couldn't register Unix98 pts driver"); - - pty_table[1].data = &ptm_driver->refcount; -+ prepare_pty(); - #endif /* CONFIG_UNIX98_PTYS */ - - return 0; -diff -uprN linux-2.6.8.1.orig/drivers/char/random.c linux-2.6.8.1-ve022stab034/drivers/char/random.c ---- linux-2.6.8.1.orig/drivers/char/random.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/random.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1917,7 +1917,7 @@ static int poolsize_strategy(ctl_table * - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) - { -- int len; -+ unsigned int len; - - sysctl_poolsize = random_state->poolinfo.POOLBYTES; - -diff -uprN linux-2.6.8.1.orig/drivers/char/raw.c linux-2.6.8.1-ve022stab034/drivers/char/raw.c ---- linux-2.6.8.1.orig/drivers/char/raw.c 2004-08-14 14:55:34.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/raw.c 2005-09-09 14:39:25.000000000 +0400 -@@ -122,7 +122,7 @@ raw_ioctl(struct inode *inode, struct fi - { - struct block_device *bdev = filp->private_data; - -- return ioctl_by_bdev(bdev, command, arg); -+ return blkdev_ioctl(bdev->bd_inode, filp, command, arg); - } - - static void bind_device(struct raw_config_request *rq) -diff -uprN linux-2.6.8.1.orig/drivers/char/sysrq.c linux-2.6.8.1-ve022stab034/drivers/char/sysrq.c ---- linux-2.6.8.1.orig/drivers/char/sysrq.c 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/sysrq.c 2005-09-09 14:39:25.000000000 +0400 -@@ -31,10 +31,12 @@ - #include <linux/suspend.h> - #include <linux/writeback.h> - #include <linux/buffer_head.h> /* for fsync_bdev() */ -+#include <linux/kallsyms.h> - - #include <linux/spinlock.h> - - #include <asm/ptrace.h> -+#include <asm/uaccess.h> - - extern void reset_vc(unsigned int); - -@@ -131,6 +133,296 @@ static struct sysrq_key_op sysrq_mountro - .action_msg = "Emergency Remount R/O", - }; - -+#ifdef CONFIG_SYSRQ_DEBUG -+/* -+ * Alt-SysRq debugger -+ * Implemented functions: -+ * dumping memory -+ * resolvind symbols -+ * writing memory -+ * quitting :) -+ */ -+ -+/* Memory accessing routines */ -+#define DUMP_LINES 22 -+unsigned long *dumpmem_addr; -+ -+static void dump_mem(void) -+{ -+ unsigned long value[4]; -+ mm_segment_t old_fs; -+ int line, err; -+ -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = 0; -+ for (line = 0; line < DUMP_LINES; line++) { -+ err |= __get_user(value[0], dumpmem_addr++); -+ err |= __get_user(value[1], dumpmem_addr++); -+ err |= __get_user(value[2], dumpmem_addr++); -+ err |= __get_user(value[3], dumpmem_addr++); -+ if (err) { -+ printk("Invalid address 0x%p\n", dumpmem_addr - 4); -+ break; -+ } -+ printk("0x%p: %08lx %08lx %08lx %08lx\n", dumpmem_addr - 4, -+ value[0], value[1], value[2], value[3]); -+ } -+ set_fs(old_fs); -+} -+ -+static unsigned long *writemem_addr; -+ -+static void write_mem(unsigned long val) -+{ -+ mm_segment_t old_fs; -+ unsigned long old_val; -+ -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ if (__get_user(old_val, writemem_addr)) -+ goto err; -+ printk("Changing [0x%p] %08lX to %08lX\n", writemem_addr, old_val, val); -+ __put_user(val, writemem_addr); -+err: -+ set_fs(old_fs); -+} -+ -+/* reading user input */ -+#define NAME_LEN (64) -+static struct { -+ unsigned long hex; -+ char name[NAME_LEN + 1]; -+ void (*entered)(void); -+} debug_input; -+ -+static void debug_read_hex(int key) -+{ -+ static int entered = 0; -+ int val; -+ -+ if (key >= '0' && key <= '9') -+ val = key - '0'; -+ else if (key >= 'a' && key <= 'f') -+ val = key - 'a' + 0xa; -+ else -+ return; -+ -+ entered++; -+ debug_input.hex = (debug_input.hex << 4) + val; -+ printk("%c", key); -+ if (entered != sizeof(unsigned long) * 2) -+ return; -+ -+ printk("\n"); -+ entered = 0; -+ debug_input.entered(); -+} -+ -+static void debug_read_string(int key) -+{ -+ static int pos; -+ static int shift; -+ -+ if (key == 0) { -+ /* actually key == 0 not only for shift */ -+ shift = 1; -+ return; -+ } -+ -+ if (key == 0x0d) /* enter */ -+ goto finish; -+ -+ if (key >= 'a' && key <= 'z') { -+ if (shift) -+ key = key - 'a' + 'A'; -+ goto correct; -+ } -+ if (key == '-') { -+ if (shift) -+ key = '_'; -+ goto correct; -+ } -+ if (key >= '0' && key <= '9') -+ goto correct; -+ return; -+ -+correct: -+ debug_input.name[pos] = key; -+ pos++; -+ shift = 0; -+ printk("%c", key); -+ if (pos != NAME_LEN) -+ return; -+ -+finish: -+ printk("\n"); -+ pos = 0; -+ shift = 0; -+ debug_input.entered(); -+ memset(debug_input.name, 0, NAME_LEN); -+} -+ -+static int sysrq_debug_mode; -+#define DEBUG_SELECT_ACTION 1 -+#define DEBUG_READ_INPUT 2 -+static struct sysrq_key_op *debug_sysrq_key_table[]; -+static void (*handle_debug_input)(int key); -+static void swap_opts(struct sysrq_key_op **); -+#define PROMPT "> " -+ -+int sysrq_eat_all(void) -+{ -+ return sysrq_debug_mode; -+} -+ -+static inline void debug_switch_read_input(void (*fn_read)(int), -+ void (*fn_fini)(void)) -+{ -+ WARN_ON(fn_read == NULL || fn_fini == NULL); -+ debug_input.entered = fn_fini; -+ handle_debug_input = fn_read; -+ sysrq_debug_mode = DEBUG_READ_INPUT; -+} -+ -+static inline void debug_switch_select_action(void) -+{ -+ sysrq_debug_mode = DEBUG_SELECT_ACTION; -+ handle_debug_input = NULL; -+ printk(PROMPT); -+} -+ -+/* handle key press in debug mode */ -+static void __handle_debug(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ if (sysrq_debug_mode == DEBUG_SELECT_ACTION) { -+ __handle_sysrq(key, pt_regs, tty); -+ if (sysrq_debug_mode) -+ printk(PROMPT); -+ } else { -+ __sysrq_lock_table(); -+ handle_debug_input(key); -+ __sysrq_unlock_table(); -+ } -+} -+ -+/* dump memory */ -+static void debug_dumpmem_addr_entered(void) -+{ -+ dumpmem_addr = (unsigned long *)debug_input.hex; -+ dump_mem(); -+ debug_switch_select_action(); -+} -+ -+static void sysrq_handle_dumpmem(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ debug_switch_read_input(debug_read_hex, debug_dumpmem_addr_entered); -+} -+static struct sysrq_key_op sysrq_debug_dumpmem = { -+ .handler = sysrq_handle_dumpmem, -+ .help_msg = "Dump memory\n", -+ .action_msg = "Enter address", -+}; -+ -+static void sysrq_handle_dumpnext(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ dump_mem(); -+} -+static struct sysrq_key_op sysrq_debug_dumpnext = { -+ .handler = sysrq_handle_dumpnext, -+ .help_msg = "dump neXt\n", -+ .action_msg = "", -+}; -+ -+/* resolve symbol */ -+static void debug_resolve_name_entered(void) -+{ -+ unsigned long sym_addr; -+ -+ sym_addr = kallsyms_lookup_name(debug_input.name); -+ printk("%s: %08lX\n", debug_input.name, sym_addr); -+ if (sym_addr) { -+ printk("Now you can dump it via X\n"); -+ dumpmem_addr = (unsigned long *)sym_addr; -+ } -+ debug_switch_select_action(); -+} -+ -+static void sysrq_handle_resolve(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ debug_switch_read_input(debug_read_string, debug_resolve_name_entered); -+} -+static struct sysrq_key_op sysrq_debug_resove = { -+ .handler = sysrq_handle_resolve, -+ .help_msg = "Resolve symbol\n", -+ .action_msg = "Enter symbol name", -+}; -+ -+/* write memory */ -+static void debug_writemem_val_entered(void) -+{ -+ write_mem(debug_input.hex); -+ debug_switch_select_action(); -+} -+ -+static void debug_writemem_addr_entered(void) -+{ -+ mm_segment_t old_fs; -+ unsigned long val; -+ -+ writemem_addr = (unsigned long *)debug_input.hex; -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ if (!__get_user(val, writemem_addr)) -+ printk(" [0x%p] = %08lX\n", writemem_addr, val); -+ set_fs(old_fs); -+ debug_switch_read_input(debug_read_hex, debug_writemem_val_entered); -+} -+ -+static void sysrq_handle_writemem(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ debug_switch_read_input(debug_read_hex, debug_writemem_addr_entered); -+} -+static struct sysrq_key_op sysrq_debug_writemem = { -+ .handler = sysrq_handle_writemem, -+ .help_msg = "Write memory\n", -+ .action_msg = "Enter address and then value", -+}; -+ -+/* switch to debug mode */ -+static void sysrq_handle_debug(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ swap_opts(debug_sysrq_key_table); -+ printk("Welcome sysrq debugging mode\n" -+ "Press H for help\n"); -+ debug_switch_select_action(); -+} -+static struct sysrq_key_op sysrq_debug_enter = { -+ .handler = sysrq_handle_debug, -+ .help_msg = "start Degugging", -+ .action_msg = "Select desired action", -+}; -+ -+/* quit debug mode */ -+static void sysrq_handle_quit(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ swap_opts(NULL); -+ sysrq_debug_mode = 0; -+} -+static struct sysrq_key_op sysrq_debug_quit = { -+ .handler = sysrq_handle_quit, -+ .help_msg = "Quit debug mode\n", -+ .action_msg = "Thank you for using debugger", -+}; -+#endif -+ - /* END SYNC SYSRQ HANDLERS BLOCK */ - - -@@ -139,8 +431,13 @@ static struct sysrq_key_op sysrq_mountro - static void sysrq_handle_showregs(int key, struct pt_regs *pt_regs, - struct tty_struct *tty) - { -+ bust_spinlocks(1); - if (pt_regs) - show_regs(pt_regs); -+ bust_spinlocks(0); -+#ifdef __i386__ -+ smp_nmi_call_function(smp_show_regs, NULL, 0); -+#endif - } - static struct sysrq_key_op sysrq_showregs_op = { - .handler = sysrq_handle_showregs, -@@ -183,7 +480,7 @@ static void send_sig_all(int sig) - { - struct task_struct *p; - -- for_each_process(p) { -+ for_each_process_all(p) { - if (p->mm && p->pid != 1) - /* Not swapper, init nor kernel thread */ - force_sig(sig, p); -@@ -214,13 +511,26 @@ static struct sysrq_key_op sysrq_kill_op - .action_msg = "Kill All Tasks", - }; - -+#ifdef CONFIG_SCHED_VCPU -+static void sysrq_handle_vschedstate(int key, struct pt_regs *pt_regs, -+ struct tty_struct *tty) -+{ -+ show_vsched(); -+} -+static struct sysrq_key_op sysrq_vschedstate_op = { -+ .handler = sysrq_handle_vschedstate, -+ .help_msg = "showvsChed", -+ .action_msg = "Show Vsched", -+}; -+#endif -+ - /* END SIGNAL SYSRQ HANDLERS BLOCK */ - - - /* Key Operations table and lock */ - static spinlock_t sysrq_key_table_lock = SPIN_LOCK_UNLOCKED; - #define SYSRQ_KEY_TABLE_LENGTH 36 --static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = { -+static struct sysrq_key_op *def_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = { - /* 0 */ &sysrq_loglevel_op, - /* 1 */ &sysrq_loglevel_op, - /* 2 */ &sysrq_loglevel_op, -@@ -235,8 +545,16 @@ static struct sysrq_key_op *sysrq_key_ta - it is handled specially on the sparc - and will never arrive */ - /* b */ &sysrq_reboot_op, -+#ifdef CONFIG_SCHED_VCPU -+/* c */ &sysrq_vschedstate_op, -+#else - /* c */ NULL, -+#endif -+#ifdef CONFIG_SYSRQ_DEBUG -+/* d */ &sysrq_debug_enter, -+#else - /* d */ NULL, -+#endif - /* e */ &sysrq_term_op, - /* f */ NULL, - /* g */ NULL, -@@ -270,6 +588,29 @@ static struct sysrq_key_op *sysrq_key_ta - /* z */ NULL - }; - -+#ifdef CONFIG_SYSRQ_DEBUG -+static struct sysrq_key_op *debug_sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = { -+ [13] = &sysrq_debug_dumpmem, /* d */ -+ [26] = &sysrq_debug_quit, /* q */ -+ [27] = &sysrq_debug_resove, /* r */ -+ [32] = &sysrq_debug_writemem, /* w */ -+ [33] = &sysrq_debug_dumpnext, /* x */ -+}; -+ -+static struct sysrq_key_op **sysrq_key_table = def_sysrq_key_table; -+ -+/* call swap_opts(NULL) to restore opts to defaults */ -+static void swap_opts(struct sysrq_key_op **swap_to) -+{ -+ if (swap_to) -+ sysrq_key_table = swap_to; -+ else -+ sysrq_key_table = def_sysrq_key_table; -+} -+#else -+#define sysrq_key_table def_sysrq_key_table -+#endif -+ - /* key2index calculation, -1 on invalid index */ - static int sysrq_key_table_key2index(int key) { - int retval; -@@ -358,6 +699,12 @@ void handle_sysrq(int key, struct pt_reg - { - if (!sysrq_enabled) - return; -+#ifdef CONFIG_SYSRQ_DEBUG -+ if (sysrq_debug_mode) { -+ __handle_debug(key, pt_regs, tty); -+ return; -+ } -+#endif - __handle_sysrq(key, pt_regs, tty); - } - -diff -uprN linux-2.6.8.1.orig/drivers/char/tty_io.c linux-2.6.8.1-ve022stab034/drivers/char/tty_io.c ---- linux-2.6.8.1.orig/drivers/char/tty_io.c 2004-08-14 14:55:34.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/tty_io.c 2005-09-09 14:39:25.000000000 +0400 -@@ -86,6 +86,7 @@ - #include <linux/string.h> - #include <linux/slab.h> - #include <linux/poll.h> -+#include <linux/ve_owner.h> - #include <linux/proc_fs.h> - #include <linux/init.h> - #include <linux/module.h> -@@ -103,6 +104,7 @@ - #include <linux/devfs_fs_kernel.h> - - #include <linux/kmod.h> -+#include <ub/ub_mem.h> - - #undef TTY_DEBUG_HANGUP - -@@ -120,7 +122,12 @@ struct termios tty_std_termios = { /* fo - - EXPORT_SYMBOL(tty_std_termios); - -+/* this lock protects tty_drivers list, this pretty guys do no locking */ -+rwlock_t tty_driver_guard = RW_LOCK_UNLOCKED; -+EXPORT_SYMBOL(tty_driver_guard); -+ - LIST_HEAD(tty_drivers); /* linked list of tty drivers */ -+EXPORT_SYMBOL(tty_drivers); - struct tty_ldisc ldiscs[NR_LDISCS]; /* line disc dispatch table */ - - /* Semaphore to protect creating and releasing a tty */ -@@ -130,6 +137,13 @@ DECLARE_MUTEX(tty_sem); - extern struct tty_driver *ptm_driver; /* Unix98 pty masters; for /dev/ptmx */ - extern int pty_limit; /* Config limit on Unix98 ptys */ - static DEFINE_IDR(allocated_ptys); -+#ifdef CONFIG_VE -+#define ve_allocated_ptys (*(get_exec_env()->allocated_ptys)) -+#define ve_ptm_driver (get_exec_env()->ptm_driver) -+#else -+#define ve_allocated_ptys allocated_ptys -+#define ve_ptm_driver ptm_driver -+#endif - static DECLARE_MUTEX(allocated_ptys_lock); - #endif - -@@ -150,11 +164,25 @@ extern void rs_360_init(void); - static void release_mem(struct tty_struct *tty, int idx); - - -+DCL_VE_OWNER(TTYDRV, TAIL_SOFT, struct tty_driver, owner_env, , ()) -+DCL_VE_OWNER(TTY, TAIL_SOFT, struct tty_struct, owner_env, , ()) -+ -+void prepare_tty(void) -+{ -+#ifdef CONFIG_VE -+ get_ve0()->allocated_ptys = &allocated_ptys; -+ /* -+ * in this case, tty_register_driver() setups -+ * owner_env correctly right from the bootup -+ */ -+#endif -+} -+ - static struct tty_struct *alloc_tty_struct(void) - { - struct tty_struct *tty; - -- tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL); -+ tty = ub_kmalloc(sizeof(struct tty_struct), GFP_KERNEL); - if (tty) - memset(tty, 0, sizeof(struct tty_struct)); - return tty; -@@ -307,14 +335,37 @@ struct tty_driver *get_tty_driver(dev_t - { - struct tty_driver *p; - -+ read_lock(&tty_driver_guard); - list_for_each_entry(p, &tty_drivers, tty_drivers) { - dev_t base = MKDEV(p->major, p->minor_start); - if (device < base || device >= base + p->num) - continue; - *index = device - base; -- return p; -+#ifdef CONFIG_VE -+ if (in_interrupt()) -+ goto found; -+ if (p->major!=PTY_MASTER_MAJOR && p->major!=PTY_SLAVE_MAJOR -+#ifdef CONFIG_UNIX98_PTYS -+ && (p->major<UNIX98_PTY_MASTER_MAJOR || -+ p->major>UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) && -+ (p->major<UNIX98_PTY_SLAVE_MAJOR || -+ p->major>UNIX98_PTY_SLAVE_MAJOR+UNIX98_PTY_MAJOR_COUNT-1) -+#endif -+ ) goto found; -+ if (ve_is_super(VE_OWNER_TTYDRV(p)) && -+ ve_is_super(get_exec_env())) -+ goto found; -+ if (!ve_accessible_strict(VE_OWNER_TTYDRV(p), get_exec_env())) -+ continue; -+#endif -+ goto found; - } -+ read_unlock(&tty_driver_guard); - return NULL; -+ -+found: -+ read_unlock(&tty_driver_guard); -+ return p; - } - - /* -@@ -410,7 +461,6 @@ void do_tty_hangup(void *data) - struct file * cons_filp = NULL; - struct file *filp, *f = NULL; - struct task_struct *p; -- struct pid *pid; - int closecount = 0, n; - - if (!tty) -@@ -481,8 +531,7 @@ void do_tty_hangup(void *data) - - read_lock(&tasklist_lock); - if (tty->session > 0) { -- struct list_head *l; -- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid) { -+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) { - if (p->signal->tty == tty) - p->signal->tty = NULL; - if (!p->signal->leader) -@@ -491,7 +540,7 @@ void do_tty_hangup(void *data) - send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p); - if (tty->pgrp > 0) - p->signal->tty_old_pgrp = tty->pgrp; -- } -+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p); - } - read_unlock(&tasklist_lock); - -@@ -563,15 +612,15 @@ void disassociate_ctty(int on_exit) - { - struct tty_struct *tty; - struct task_struct *p; -- struct list_head *l; -- struct pid *pid; - int tty_pgrp = -1; - - lock_kernel(); - -+ down(&tty_sem); - tty = current->signal->tty; - if (tty) { - tty_pgrp = tty->pgrp; -+ up(&tty_sem); - if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) - tty_vhangup(tty); - } else { -@@ -579,6 +628,7 @@ void disassociate_ctty(int on_exit) - kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit); - kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit); - } -+ up(&tty_sem); - unlock_kernel(); - return; - } -@@ -588,14 +638,19 @@ void disassociate_ctty(int on_exit) - kill_pg(tty_pgrp, SIGCONT, on_exit); - } - -+ /* Must lock changes to tty_old_pgrp */ -+ down(&tty_sem); - current->signal->tty_old_pgrp = 0; - tty->session = 0; - tty->pgrp = -1; - -+ /* Now clear signal->tty under the lock */ - read_lock(&tasklist_lock); -- for_each_task_pid(current->signal->session, PIDTYPE_SID, p, l, pid) -+ do_each_task_pid_all(current->signal->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; -+ } while_each_task_pid_all(current->signal->session, PIDTYPE_SID, p); - read_unlock(&tasklist_lock); -+ up(&tty_sem); - unlock_kernel(); - } - -@@ -760,27 +815,28 @@ static inline void tty_line_name(struct - * really quite straightforward. The semaphore locking can probably be - * relaxed for the (most common) case of reopening a tty. - */ --static int init_dev(struct tty_driver *driver, int idx, -- struct tty_struct **ret_tty) -+static int init_dev(struct tty_driver *driver, int idx, -+ struct tty_struct *i_tty, struct tty_struct **ret_tty) - { - struct tty_struct *tty, *o_tty; - struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; - struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; -+ struct ve_struct * owner; - int retval=0; - -- /* -- * Check whether we need to acquire the tty semaphore to avoid -- * race conditions. For now, play it safe. -- */ -- down(&tty_sem); -+ owner = VE_OWNER_TTYDRV(driver); - -- /* check whether we're reopening an existing tty */ -- if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { -- tty = devpts_get_tty(idx); -- if (tty && driver->subtype == PTY_TYPE_MASTER) -- tty = tty->link; -- } else { -- tty = driver->ttys[idx]; -+ if (i_tty) -+ tty = i_tty; -+ else { -+ /* check whether we're reopening an existing tty */ -+ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { -+ tty = devpts_get_tty(idx); -+ if (tty && driver->subtype == PTY_TYPE_MASTER) -+ tty = tty->link; -+ } else { -+ tty = driver->ttys[idx]; -+ } - } - if (tty) goto fast_track; - -@@ -808,6 +864,7 @@ static int init_dev(struct tty_driver *d - tty->driver = driver; - tty->index = idx; - tty_line_name(driver, idx, tty->name); -+ SET_VE_OWNER_TTY(tty, owner); - - if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { - tp_loc = &tty->termios; -@@ -818,7 +875,7 @@ static int init_dev(struct tty_driver *d - } - - if (!*tp_loc) { -- tp = (struct termios *) kmalloc(sizeof(struct termios), -+ tp = (struct termios *) ub_kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!tp) - goto free_mem_out; -@@ -826,7 +883,7 @@ static int init_dev(struct tty_driver *d - } - - if (!*ltp_loc) { -- ltp = (struct termios *) kmalloc(sizeof(struct termios), -+ ltp = (struct termios *) ub_kmalloc(sizeof(struct termios), - GFP_KERNEL); - if (!ltp) - goto free_mem_out; -@@ -841,6 +898,7 @@ static int init_dev(struct tty_driver *d - o_tty->driver = driver->other; - o_tty->index = idx; - tty_line_name(driver->other, idx, o_tty->name); -+ SET_VE_OWNER_TTY(o_tty, owner); - - if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { - o_tp_loc = &o_tty->termios; -@@ -852,7 +910,7 @@ static int init_dev(struct tty_driver *d - - if (!*o_tp_loc) { - o_tp = (struct termios *) -- kmalloc(sizeof(struct termios), GFP_KERNEL); -+ ub_kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_tp) - goto free_mem_out; - *o_tp = driver->other->init_termios; -@@ -860,7 +918,7 @@ static int init_dev(struct tty_driver *d - - if (!*o_ltp_loc) { - o_ltp = (struct termios *) -- kmalloc(sizeof(struct termios), GFP_KERNEL); -+ ub_kmalloc(sizeof(struct termios), GFP_KERNEL); - if (!o_ltp) - goto free_mem_out; - memset(o_ltp, 0, sizeof(struct termios)); -@@ -878,6 +936,10 @@ static int init_dev(struct tty_driver *d - *o_ltp_loc = o_ltp; - o_tty->termios = *o_tp_loc; - o_tty->termios_locked = *o_ltp_loc; -+#ifdef CONFIG_VE -+ if (driver->other->refcount == 0) -+ (void)get_ve(owner); -+#endif - driver->other->refcount++; - if (driver->subtype == PTY_TYPE_MASTER) - o_tty->count++; -@@ -902,6 +964,10 @@ static int init_dev(struct tty_driver *d - *ltp_loc = ltp; - tty->termios = *tp_loc; - tty->termios_locked = *ltp_loc; -+#ifdef CONFIG_VE -+ if (driver->refcount == 0) -+ (void)get_ve(owner); -+#endif - driver->refcount++; - tty->count++; - -@@ -956,7 +1022,6 @@ success: - - /* All paths come through here to release the semaphore */ - end_init: -- up(&tty_sem); - return retval; - - /* Release locally allocated memory ... nothing placed in slots */ -@@ -1010,6 +1075,10 @@ static void release_mem(struct tty_struc - } - o_tty->magic = 0; - o_tty->driver->refcount--; -+#ifdef CONFIG_VE -+ if (o_tty->driver->refcount == 0) -+ put_ve(VE_OWNER_TTY(o_tty)); -+#endif - file_list_lock(); - list_del_init(&o_tty->tty_files); - file_list_unlock(); -@@ -1032,6 +1101,10 @@ static void release_mem(struct tty_struc - - tty->magic = 0; - tty->driver->refcount--; -+#ifdef CONFIG_VE -+ if (tty->driver->refcount == 0) -+ put_ve(VE_OWNER_TTY(tty)); -+#endif - file_list_lock(); - list_del_init(&tty->tty_files); - file_list_unlock(); -@@ -1054,6 +1127,9 @@ static void release_dev(struct file * fi - int devpts_master, devpts; - int idx; - char buf[64]; -+#ifdef CONFIG_UNIX98_PTYS -+ struct idr *idr_alloced; -+#endif - - tty = (struct tty_struct *)filp->private_data; - if (tty_paranoia_check(tty, filp->f_dentry->d_inode, "release_dev")) -@@ -1069,6 +1145,9 @@ static void release_dev(struct file * fi - devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0; - devpts_master = pty_master && devpts; - o_tty = tty->link; -+#ifdef CONFIG_UNIX98_PTYS -+ idr_alloced = tty->owner_env->allocated_ptys; -+#endif - - #ifdef TTY_PARANOIA_CHECK - if (idx < 0 || idx >= tty->driver->num) { -@@ -1152,9 +1231,14 @@ static void release_dev(struct file * fi - * each iteration we avoid any problems. - */ - while (1) { -+ /* Guard against races with tty->count changes elsewhere and -+ opens on /dev/tty */ -+ -+ down(&tty_sem); - tty_closing = tty->count <= 1; - o_tty_closing = o_tty && - (o_tty->count <= (pty_master ? 1 : 0)); -+ up(&tty_sem); - do_sleep = 0; - - if (tty_closing) { -@@ -1190,6 +1274,8 @@ static void release_dev(struct file * fi - * both sides, and we've completed the last operation that could - * block, so it's safe to proceed with closing. - */ -+ -+ down(&tty_sem); - if (pty_master) { - if (--o_tty->count < 0) { - printk(KERN_WARNING "release_dev: bad pty slave count " -@@ -1203,7 +1289,8 @@ static void release_dev(struct file * fi - tty->count, tty_name(tty, buf)); - tty->count = 0; - } -- -+ up(&tty_sem); -+ - /* - * We've decremented tty->count, so we need to remove this file - * descriptor off the tty->tty_files list; this serves two -@@ -1235,15 +1322,15 @@ static void release_dev(struct file * fi - */ - if (tty_closing || o_tty_closing) { - struct task_struct *p; -- struct list_head *l; -- struct pid *pid; - - read_lock(&tasklist_lock); -- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid) -+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; -+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p); - if (o_tty) -- for_each_task_pid(o_tty->session, PIDTYPE_SID, p,l, pid) -+ do_each_task_pid_all(o_tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; -+ } while_each_task_pid_all(o_tty->session, PIDTYPE_SID, p); - read_unlock(&tasklist_lock); - } - -@@ -1294,7 +1381,7 @@ static void release_dev(struct file * fi - /* Make this pty number available for reallocation */ - if (devpts) { - down(&allocated_ptys_lock); -- idr_remove(&allocated_ptys, idx); -+ idr_remove(idr_alloced, idx); - up(&allocated_ptys_lock); - } - #endif -@@ -1315,7 +1402,7 @@ static void release_dev(struct file * fi - */ - static int tty_open(struct inode * inode, struct file * filp) - { -- struct tty_struct *tty; -+ struct tty_struct *tty, *c_tty; - int noctty, retval; - struct tty_driver *driver; - int index; -@@ -1327,12 +1414,18 @@ retry_open: - noctty = filp->f_flags & O_NOCTTY; - index = -1; - retval = 0; -+ c_tty = NULL; -+ -+ down(&tty_sem); - - if (device == MKDEV(TTYAUX_MAJOR,0)) { -- if (!current->signal->tty) -+ if (!current->signal->tty) { -+ up(&tty_sem); - return -ENXIO; -+ } - driver = current->signal->tty->driver; - index = current->signal->tty->index; -+ c_tty = current->signal->tty; - filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ - /* noctty = 1; */ - goto got_driver; -@@ -1341,6 +1434,12 @@ retry_open: - if (device == MKDEV(TTY_MAJOR,0)) { - extern int fg_console; - extern struct tty_driver *console_driver; -+#ifdef CONFIG_VE -+ if (!ve_is_super(get_exec_env())) { -+ up(&tty_sem); -+ return -ENODEV; -+ } -+#endif - driver = console_driver; - index = fg_console; - noctty = 1; -@@ -1348,6 +1447,12 @@ retry_open: - } - #endif - if (device == MKDEV(TTYAUX_MAJOR,1)) { -+#ifdef CONFIG_VE -+ if (!ve_is_super(get_exec_env())) { -+ up(&tty_sem); -+ return -ENODEV; -+ } -+#endif - driver = console_device(&index); - if (driver) { - /* Don't let /dev/console block */ -@@ -1355,6 +1460,7 @@ retry_open: - noctty = 1; - goto got_driver; - } -+ up(&tty_sem); - return -ENODEV; - } - -@@ -1364,29 +1470,33 @@ retry_open: - - /* find a device that is not in use. */ - down(&allocated_ptys_lock); -- if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { -+ if (!idr_pre_get(&ve_allocated_ptys, GFP_KERNEL)) { - up(&allocated_ptys_lock); -+ up(&tty_sem); - return -ENOMEM; - } -- idr_ret = idr_get_new(&allocated_ptys, NULL, &index); -+ idr_ret = idr_get_new(&ve_allocated_ptys, NULL, &index); - if (idr_ret < 0) { - up(&allocated_ptys_lock); -+ up(&tty_sem); - if (idr_ret == -EAGAIN) - return -ENOMEM; - return -EIO; - } - if (index >= pty_limit) { -- idr_remove(&allocated_ptys, index); -+ idr_remove(&ve_allocated_ptys, index); - up(&allocated_ptys_lock); -+ up(&tty_sem); - return -EIO; - } - up(&allocated_ptys_lock); - -- driver = ptm_driver; -- retval = init_dev(driver, index, &tty); -+ driver = ve_ptm_driver; -+ retval = init_dev(driver, index, NULL, &tty); -+ up(&tty_sem); - if (retval) { - down(&allocated_ptys_lock); -- idr_remove(&allocated_ptys, index); -+ idr_remove(&ve_allocated_ptys, index); - up(&allocated_ptys_lock); - return retval; - } -@@ -1398,10 +1508,13 @@ retry_open: - #endif - { - driver = get_tty_driver(device, &index); -- if (!driver) -+ if (!driver) { -+ up(&tty_sem); - return -ENODEV; -+ } - got_driver: -- retval = init_dev(driver, index, &tty); -+ retval = init_dev(driver, index, c_tty, &tty); -+ up(&tty_sem); - if (retval) - return retval; - } -@@ -1435,7 +1548,7 @@ got_driver: - #ifdef CONFIG_UNIX98_PTYS - if (index != -1) { - down(&allocated_ptys_lock); -- idr_remove(&allocated_ptys, index); -+ idr_remove(&ve_allocated_ptys, index); - up(&allocated_ptys_lock); - } - #endif -@@ -1566,10 +1679,12 @@ static int tiocswinsz(struct tty_struct - - static int tioccons(struct file *file) - { -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ if (!ve_is_super(get_exec_env())) -+ return -EACCES; - if (file->f_op->write == redirected_tty_write) { - struct file *f; -- if (!capable(CAP_SYS_ADMIN)) -- return -EPERM; - spin_lock(&redirect_lock); - f = redirect; - redirect = NULL; -@@ -1606,8 +1721,6 @@ static int fionbio(struct file *file, in - - static int tiocsctty(struct tty_struct *tty, int arg) - { -- struct list_head *l; -- struct pid *pid; - task_t *p; - - if (current->signal->leader && -@@ -1630,8 +1743,9 @@ static int tiocsctty(struct tty_struct * - */ - - read_lock(&tasklist_lock); -- for_each_task_pid(tty->session, PIDTYPE_SID, p, l, pid) -+ do_each_task_pid_all(tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; -+ } while_each_task_pid_all(tty->session, PIDTYPE_SID, p); - read_unlock(&tasklist_lock); - } else - return -EPERM; -@@ -1653,7 +1767,7 @@ static int tiocgpgrp(struct tty_struct * - */ - if (tty == real_tty && current->signal->tty != real_tty) - return -ENOTTY; -- return put_user(real_tty->pgrp, p); -+ return put_user(pid_type_to_vpid(PIDTYPE_PGID, real_tty->pgrp), p); - } - - static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) -@@ -1673,6 +1787,9 @@ static int tiocspgrp(struct tty_struct * - return -EFAULT; - if (pgrp < 0) - return -EINVAL; -+ pgrp = vpid_to_pid(pgrp); -+ if (pgrp < 0) -+ return -EPERM; - if (session_of_pgrp(pgrp) != current->signal->session) - return -EPERM; - real_tty->pgrp = pgrp; -@@ -1689,7 +1806,7 @@ static int tiocgsid(struct tty_struct *t - return -ENOTTY; - if (real_tty->session <= 0) - return -ENOTTY; -- return put_user(real_tty->session, p); -+ return put_user(pid_type_to_vpid(PIDTYPE_SID, real_tty->session), p); - } - - static int tiocsetd(struct tty_struct *tty, int __user *p) -@@ -1938,8 +2055,6 @@ static void __do_SAK(void *arg) - #else - struct tty_struct *tty = arg; - struct task_struct *p; -- struct list_head *l; -- struct pid *pid; - int session; - int i; - struct file *filp; -@@ -1952,7 +2067,7 @@ static void __do_SAK(void *arg) - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - read_lock(&tasklist_lock); -- for_each_task_pid(session, PIDTYPE_SID, p, l, pid) { -+ do_each_task_pid_all(session, PIDTYPE_SID, p) { - if (p->signal->tty == tty || session > 0) { - printk(KERN_NOTICE "SAK: killed process %d" - " (%s): p->signal->session==tty->session\n", -@@ -1979,7 +2094,7 @@ static void __do_SAK(void *arg) - spin_unlock(&p->files->file_lock); - } - task_unlock(p); -- } -+ } while_each_task_pid_all(session, PIDTYPE_SID, p); - read_unlock(&tasklist_lock); - #endif - } -@@ -2303,8 +2418,11 @@ int tty_register_driver(struct tty_drive - - if (!driver->put_char) - driver->put_char = tty_default_put_char; -- -+ -+ SET_VE_OWNER_TTYDRV(driver, get_exec_env()); -+ write_lock_irq(&tty_driver_guard); - list_add(&driver->tty_drivers, &tty_drivers); -+ write_unlock_irq(&tty_driver_guard); - - if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) { - for(i = 0; i < driver->num; i++) -@@ -2331,7 +2449,9 @@ int tty_unregister_driver(struct tty_dri - unregister_chrdev_region(MKDEV(driver->major, driver->minor_start), - driver->num); - -+ write_lock_irq(&tty_driver_guard); - list_del(&driver->tty_drivers); -+ write_unlock_irq(&tty_driver_guard); - - /* - * Free the termios and termios_locked structures because -@@ -2459,6 +2579,7 @@ static int __init tty_init(void) - - vty_init(); - #endif -+ prepare_tty(); - return 0; - } - module_init(tty_init); -diff -uprN linux-2.6.8.1.orig/drivers/char/vt.c linux-2.6.8.1-ve022stab034/drivers/char/vt.c ---- linux-2.6.8.1.orig/drivers/char/vt.c 2004-08-14 14:56:00.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/char/vt.c 2005-09-09 14:39:37.000000000 +0400 -@@ -748,6 +748,8 @@ inline int resize_screen(int currcons, i - * [this is to be used together with some user program - * like resize that changes the hardware videomode] - */ -+#define VC_RESIZE_MAXCOL (32767) -+#define VC_RESIZE_MAXROW (32767) - int vc_resize(int currcons, unsigned int cols, unsigned int lines) - { - unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0; -@@ -760,6 +762,9 @@ int vc_resize(int currcons, unsigned int - if (!vc_cons_allocated(currcons)) - return -ENXIO; - -+ if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW) -+ return -EINVAL; -+ - new_cols = (cols ? cols : video_num_columns); - new_rows = (lines ? lines : video_num_lines); - new_row_size = new_cols << 1; -@@ -2233,8 +2238,10 @@ void vt_console_print(struct console *co - } - set_cursor(currcons); - -- if (!oops_in_progress) -- poke_blanked_console(); -+ if (!oops_in_progress) { -+ if (!printk_no_wake) -+ poke_blanked_console(); -+ } - - quit: - clear_bit(0, &printing); -diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c linux-2.6.8.1-ve022stab034/drivers/ieee1394/ieee1394_core.c ---- linux-2.6.8.1.orig/drivers/ieee1394/ieee1394_core.c 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/ieee1394/ieee1394_core.c 2005-09-09 14:39:24.000000000 +0400 -@@ -1034,8 +1034,8 @@ static int hpsbpkt_thread(void *__hi) - if (khpsbpkt_kill) - break; - -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -+ if (test_thread_flag(TIF_FREEZE)) { -+ refrigerator(); - continue; - } - -diff -uprN linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c linux-2.6.8.1-ve022stab034/drivers/ieee1394/nodemgr.c ---- linux-2.6.8.1.orig/drivers/ieee1394/nodemgr.c 2004-08-14 14:55:34.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/ieee1394/nodemgr.c 2005-09-09 14:39:24.000000000 +0400 -@@ -1481,8 +1481,8 @@ static int nodemgr_host_thread(void *__h - - if (down_interruptible(&hi->reset_sem) || - down_interruptible(&nodemgr_serialize)) { -- if (current->flags & PF_FREEZE) { -- refrigerator(0); -+ if (test_thread_flag(TIF_FREEZE)) { -+ refrigerator(); - continue; - } - printk("NodeMgr: received unexpected signal?!\n" ); -diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serio.c linux-2.6.8.1-ve022stab034/drivers/input/serio/serio.c ---- linux-2.6.8.1.orig/drivers/input/serio/serio.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/input/serio/serio.c 2005-09-09 14:39:24.000000000 +0400 -@@ -153,8 +153,8 @@ static int serio_thread(void *nothing) - do { - serio_handle_events(); - wait_event_interruptible(serio_wait, !list_empty(&serio_event_list)); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - } while (!signal_pending(current)); - - printk(KERN_DEBUG "serio: kseriod exiting\n"); -diff -uprN linux-2.6.8.1.orig/drivers/input/serio/serport.c linux-2.6.8.1-ve022stab034/drivers/input/serio/serport.c ---- linux-2.6.8.1.orig/drivers/input/serio/serport.c 2004-08-14 14:56:14.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/input/serio/serport.c 2005-09-09 14:39:25.000000000 +0400 -@@ -66,6 +66,9 @@ static int serport_ldisc_open(struct tty - struct serport *serport; - char name[64]; - -+ if (!capable(CAP_SYS_ADMIN)) -+ return -EPERM; -+ - serport = kmalloc(sizeof(struct serport), GFP_KERNEL); - if (unlikely(!serport)) - return -ENOMEM; -diff -uprN linux-2.6.8.1.orig/drivers/md/md.c linux-2.6.8.1-ve022stab034/drivers/md/md.c ---- linux-2.6.8.1.orig/drivers/md/md.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/md/md.c 2005-09-09 14:39:24.000000000 +0400 -@@ -2822,8 +2822,8 @@ int md_thread(void * arg) - - wait_event_interruptible(thread->wqueue, - test_bit(THREAD_WAKEUP, &thread->flags)); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - - clear_bit(THREAD_WAKEUP, &thread->flags); - -diff -uprN linux-2.6.8.1.orig/drivers/net/8139too.c linux-2.6.8.1-ve022stab034/drivers/net/8139too.c ---- linux-2.6.8.1.orig/drivers/net/8139too.c 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/8139too.c 2005-09-09 14:39:24.000000000 +0400 -@@ -1624,8 +1624,8 @@ static int rtl8139_thread (void *data) - do { - timeout = interruptible_sleep_on_timeout (&tp->thr_wait, timeout); - /* make swsusp happy with our thread */ -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - } while (!signal_pending (current) && (timeout > 0)); - - if (signal_pending (current)) { -diff -uprN linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c linux-2.6.8.1-ve022stab034/drivers/net/irda/sir_kthread.c ---- linux-2.6.8.1.orig/drivers/net/irda/sir_kthread.c 2004-08-14 14:55:59.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/irda/sir_kthread.c 2005-09-09 14:39:24.000000000 +0400 -@@ -136,8 +136,8 @@ static int irda_thread(void *startup) - remove_wait_queue(&irda_rq_queue.kick, &wait); - - /* make swsusp happy with our thread */ -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - - run_irda_queue(); - } -diff -uprN linux-2.6.8.1.orig/drivers/net/irda/stir4200.c linux-2.6.8.1-ve022stab034/drivers/net/irda/stir4200.c ---- linux-2.6.8.1.orig/drivers/net/irda/stir4200.c 2004-08-14 14:54:52.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/irda/stir4200.c 2005-09-09 14:39:24.000000000 +0400 -@@ -767,7 +767,7 @@ static int stir_transmit_thread(void *ar - && !signal_pending(current)) - { - /* if suspending, then power off and wait */ -- if (current->flags & PF_FREEZE) { -+ if (test_thread_flag(TIF_FREEZE)) { - if (stir->receiving) - receive_stop(stir); - else -@@ -775,7 +775,7 @@ static int stir_transmit_thread(void *ar - - write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD); - -- refrigerator(PF_FREEZE); -+ refrigerator(); - - if (change_speed(stir, stir->speed)) - break; -diff -uprN linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h linux-2.6.8.1-ve022stab034/drivers/net/irda/vlsi_ir.h ---- linux-2.6.8.1.orig/drivers/net/irda/vlsi_ir.h 2004-08-14 14:55:34.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/irda/vlsi_ir.h 2005-09-09 14:39:25.000000000 +0400 -@@ -58,7 +58,7 @@ typedef void irqreturn_t; - - /* PDE() introduced in 2.5.4 */ - #ifdef CONFIG_PROC_FS --#define PDE(inode) ((inode)->u.generic_ip) -+#define LPDE(inode) ((inode)->u.generic_ip) - #endif - - /* irda crc16 calculation exported in 2.5.42 */ -diff -uprN linux-2.6.8.1.orig/drivers/net/loopback.c linux-2.6.8.1-ve022stab034/drivers/net/loopback.c ---- linux-2.6.8.1.orig/drivers/net/loopback.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/loopback.c 2005-09-09 14:39:30.000000000 +0400 -@@ -127,6 +127,11 @@ static int loopback_xmit(struct sk_buff - { - struct net_device_stats *lb_stats; - -+ if (unlikely(get_exec_env()->disable_net)) { -+ kfree_skb(skb); -+ return 0; -+ } -+ - skb_orphan(skb); - - skb->protocol=eth_type_trans(skb,dev); -@@ -183,6 +188,30 @@ static struct net_device_stats *get_stat - return stats; - } - -+static void loopback_destructor(struct net_device *dev) -+{ -+ kfree(dev->priv); -+ dev->priv = NULL; -+} -+ -+struct net_device templ_loopback_dev = { -+ .name = "lo", -+ .mtu = (16 * 1024) + 20 + 20 + 12, -+ .hard_start_xmit = loopback_xmit, -+ .hard_header = eth_header, -+ .hard_header_cache = eth_header_cache, -+ .header_cache_update = eth_header_cache_update, -+ .hard_header_len = ETH_HLEN, /* 14 */ -+ .addr_len = ETH_ALEN, /* 6 */ -+ .tx_queue_len = 0, -+ .type = ARPHRD_LOOPBACK, /* 0x0001*/ -+ .rebuild_header = eth_rebuild_header, -+ .flags = IFF_LOOPBACK, -+ .features = NETIF_F_SG|NETIF_F_FRAGLIST -+ |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA -+ |NETIF_F_LLTX, -+}; -+ - struct net_device loopback_dev = { - .name = "lo", - .mtu = (16 * 1024) + 20 + 20 + 12, -@@ -212,9 +241,11 @@ int __init loopback_init(void) - memset(stats, 0, sizeof(struct net_device_stats)); - loopback_dev.priv = stats; - loopback_dev.get_stats = &get_stats; -+ loopback_dev.destructor = &loopback_destructor; - } - - return register_netdev(&loopback_dev); - }; - - EXPORT_SYMBOL(loopback_dev); -+EXPORT_SYMBOL(templ_loopback_dev); -diff -uprN linux-2.6.8.1.orig/drivers/net/net_init.c linux-2.6.8.1-ve022stab034/drivers/net/net_init.c ---- linux-2.6.8.1.orig/drivers/net/net_init.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/net_init.c 2005-09-09 14:39:25.000000000 +0400 -@@ -51,6 +51,7 @@ - #include <linux/if_ltalk.h> - #include <linux/rtnetlink.h> - #include <net/neighbour.h> -+#include <ub/ub_mem.h> - - /* The network devices currently exist only in the socket namespace, so these - entries are unused. The only ones that make sense are -@@ -83,7 +84,7 @@ struct net_device *alloc_netdev(int size - & ~NETDEV_ALIGN_CONST; - alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; - -- p = kmalloc (alloc_size, GFP_KERNEL); -+ p = ub_kmalloc(alloc_size, GFP_KERNEL); - if (!p) { - printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); - return NULL; -@@ -392,6 +393,10 @@ int register_netdev(struct net_device *d - - out: - rtnl_unlock(); -+ if (err == 0 && dev->reg_state != NETREG_REGISTERED) { -+ unregister_netdev(dev); -+ err = -ENOMEM; -+ } - return err; - } - -diff -uprN linux-2.6.8.1.orig/drivers/net/open_vznet.c linux-2.6.8.1-ve022stab034/drivers/net/open_vznet.c ---- linux-2.6.8.1.orig/drivers/net/open_vznet.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/drivers/net/open_vznet.c 2005-09-09 14:39:27.000000000 +0400 -@@ -0,0 +1,188 @@ -+/* -+ * open_vznet.c -+ * -+ * Copyright (C) 2005 SWsoft -+ * All rights reserved. -+ * -+ */ -+ -+/* -+ * Virtual Networking device used to change VE ownership on packets -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <linux/seq_file.h> -+ -+#include <linux/inet.h> -+#include <net/ip.h> -+#include <linux/skbuff.h> -+#include <linux/venet.h> -+ -+void veip_stop(struct ve_struct *ve) -+{ -+ struct list_head *p, *tmp; -+ -+ write_lock_irq(&veip_hash_lock); -+ if (ve->veip == NULL) -+ goto unlock; -+ list_for_each_safe(p, tmp, &ve->veip->ip_lh) { -+ struct ip_entry_struct *ptr; -+ ptr = list_entry(p, struct ip_entry_struct, ve_list); -+ ptr->active_env = NULL; -+ list_del(&ptr->ve_list); -+ list_del(&ptr->ip_hash); -+ kfree(ptr); -+ } -+ veip_put(ve->veip); -+ ve->veip = NULL; -+unlock: -+ write_unlock_irq(&veip_hash_lock); -+} -+ -+int veip_start(struct ve_struct *ve) -+{ -+ int err; -+ -+ err = 0; -+ write_lock_irq(&veip_hash_lock); -+ ve->veip = veip_findcreate(ve->veid); -+ if (ve->veip == NULL) -+ err = -ENOMEM; -+ write_unlock_irq(&veip_hash_lock); -+ return err; -+} -+ -+int veip_entry_add(struct ve_struct *ve, struct sockaddr_in *addr) -+{ -+ struct ip_entry_struct *entry, *found; -+ int err; -+ -+ entry = kmalloc(sizeof(struct ip_entry_struct), GFP_KERNEL); -+ if (entry == NULL) -+ return -ENOMEM; -+ -+ memset(entry, 0, sizeof(struct ip_entry_struct)); -+ entry->ip = addr->sin_addr.s_addr; -+ -+ write_lock_irq(&veip_hash_lock); -+ err = -EADDRINUSE; -+ found = ip_entry_lookup(entry->ip); -+ if (found != NULL) -+ goto out_unlock; -+ else { -+ ip_entry_hash(entry, ve->veip); -+ found = entry; -+ entry = NULL; -+ } -+ err = 0; -+ found->active_env = ve; -+out_unlock: -+ write_unlock_irq(&veip_hash_lock); -+ if (entry != NULL) -+ kfree(entry); -+ return err; -+} -+ -+int veip_entry_del(envid_t veid, struct sockaddr_in *addr) -+{ -+ struct ip_entry_struct *found; -+ int err; -+ -+ err = -EADDRNOTAVAIL; -+ write_lock_irq(&veip_hash_lock); -+ found = ip_entry_lookup(addr->sin_addr.s_addr); -+ if (found == NULL) -+ goto out; -+ if (found->active_env->veid != veid) -+ goto out; -+ -+ err = 0; -+ found->active_env = NULL; -+ -+ list_del(&found->ip_hash); -+ list_del(&found->ve_list); -+ kfree(found); -+out: -+ write_unlock_irq(&veip_hash_lock); -+ return err; -+} -+ -+static struct ve_struct *venet_find_ve(__u32 ip) -+{ -+ struct ip_entry_struct *entry; -+ -+ entry = ip_entry_lookup(ip); -+ if (entry == NULL) -+ return NULL; -+ -+ return entry->active_env; -+} -+ -+int venet_change_skb_owner(struct sk_buff *skb) -+{ -+ struct ve_struct *ve, *ve_old; -+ struct iphdr *iph; -+ -+ ve_old = skb->owner_env; -+ iph = skb->nh.iph; -+ -+ read_lock(&veip_hash_lock); -+ if (!ve_is_super(ve_old)) { -+ /* from VE to host */ -+ ve = venet_find_ve(iph->saddr); -+ if (ve == NULL) -+ goto out_drop; -+ if (!ve_accessible_strict(ve, ve_old)) -+ goto out_source; -+ skb->owner_env = get_ve0(); -+ } else { -+ /* from host to VE */ -+ ve = venet_find_ve(iph->daddr); -+ if (ve == NULL) -+ goto out_drop; -+ skb->owner_env = ve; -+ } -+ read_unlock(&veip_hash_lock); -+ -+ return 0; -+ -+out_drop: -+ read_unlock(&veip_hash_lock); -+ return -ESRCH; -+ -+out_source: -+ read_unlock(&veip_hash_lock); -+ if (net_ratelimit()) { -+ printk(KERN_WARNING "Dropped packet, source wrong " -+ "veid=%u src-IP=%u.%u.%u.%u " -+ "dst-IP=%u.%u.%u.%u\n", -+ skb->owner_env->veid, -+ NIPQUAD(skb->nh.iph->saddr), -+ NIPQUAD(skb->nh.iph->daddr)); -+ } -+ return -EACCES; -+} -+ -+#ifdef CONFIG_PROC_FS -+int veip_seq_show(struct seq_file *m, void *v) -+{ -+ struct list_head *p; -+ struct ip_entry_struct *entry; -+ char s[16]; -+ -+ p = (struct list_head *)v; -+ if (p == ip_entry_hash_table) { -+ seq_puts(m, "Version: 2.5\n"); -+ return 0; -+ } -+ entry = list_entry(p, struct ip_entry_struct, ip_hash); -+ sprintf(s, "%u.%u.%u.%u", NIPQUAD(entry->ip)); -+ seq_printf(m, "%15s %10u\n", s, 0); -+ return 0; -+} -+#endif -+ -+MODULE_AUTHOR("SWsoft <info@sw-soft.com>"); -+MODULE_DESCRIPTION("Virtuozzo Virtual Network Device"); -+MODULE_LICENSE("GPL v2"); -diff -uprN linux-2.6.8.1.orig/drivers/net/ppp_async.c linux-2.6.8.1-ve022stab034/drivers/net/ppp_async.c ---- linux-2.6.8.1.orig/drivers/net/ppp_async.c 2004-08-14 14:55:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/ppp_async.c 2005-09-09 14:39:25.000000000 +0400 -@@ -973,7 +973,7 @@ static void async_lcp_peek(struct asyncp - data += 4; - dlen -= 4; - /* data[0] is code, data[1] is length */ -- while (dlen >= 2 && dlen >= data[1]) { -+ while (dlen >= 2 && dlen >= data[1] && data[1] >= 2) { - switch (data[0]) { - case LCP_MRU: - val = (data[2] << 8) + data[3]; -diff -uprN linux-2.6.8.1.orig/drivers/net/tun.c linux-2.6.8.1-ve022stab034/drivers/net/tun.c ---- linux-2.6.8.1.orig/drivers/net/tun.c 2004-08-14 14:55:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/tun.c 2005-09-09 14:39:25.000000000 +0400 -@@ -44,6 +44,7 @@ - - #include <asm/system.h> - #include <asm/uaccess.h> -+#include <ub/beancounter.h> - - #ifdef TUN_DEBUG - static int debug; -@@ -71,6 +72,7 @@ static int tun_net_close(struct net_devi - static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) - { - struct tun_struct *tun = netdev_priv(dev); -+ struct user_beancounter *ub; - - DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len); - -@@ -90,6 +92,19 @@ static int tun_net_xmit(struct sk_buff * - if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) - goto drop; - } -+ -+ ub = netdev_bc(dev)->exec_ub; -+ if (ub && (skb_bc(skb)->charged == 0)) { -+ unsigned long charge; -+ charge = skb_charge_truesize(skb->truesize); -+ if (charge_beancounter(ub, UB_OTHERSOCKBUF, charge, 1)) -+ goto drop; -+ get_beancounter(ub); -+ skb_bc(skb)->ub = ub; -+ skb_bc(skb)->charged = charge; -+ skb_bc(skb)->resource = UB_OTHERSOCKBUF; -+ } -+ - skb_queue_tail(&tun->readq, skb); - - /* Notify and wake up reader process */ -@@ -177,7 +192,7 @@ static __inline__ ssize_t tun_get_user(s - size_t len = count; - - if (!(tun->flags & TUN_NO_PI)) { -- if ((len -= sizeof(pi)) > len) -+ if ((len -= sizeof(pi)) > count) - return -EINVAL; - - if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi))) -@@ -322,6 +337,7 @@ static ssize_t tun_chr_readv(struct file - - ret = tun_put_user(tun, skb, (struct iovec *) iv, len); - -+ /* skb will be uncharged in kfree_skb() */ - kfree_skb(skb); - break; - } -@@ -383,7 +399,8 @@ static int tun_set_iff(struct file *file - - /* Check permissions */ - if (tun->owner != -1 && -- current->euid != tun->owner && !capable(CAP_NET_ADMIN)) -+ current->euid != tun->owner && -+ !capable(CAP_NET_ADMIN) && !capable(CAP_VE_NET_ADMIN)) - return -EPERM; - } - else if (__dev_get_by_name(ifr->ifr_name)) -diff -uprN linux-2.6.8.1.orig/drivers/net/venet_core.c linux-2.6.8.1-ve022stab034/drivers/net/venet_core.c ---- linux-2.6.8.1.orig/drivers/net/venet_core.c 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.8.1-ve022stab034/drivers/net/venet_core.c 2005-09-09 14:39:30.000000000 +0400 -@@ -0,0 +1,629 @@ -+/* -+ * venet_core.c -+ * -+ * Copyright (C) 2005 SWsoft -+ * All rights reserved. -+ * -+ */ -+ -+/* -+ * Common part for Virtuozzo virtual network devices -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/sched.h> -+#include <linux/interrupt.h> -+#include <linux/fs.h> -+#include <linux/types.h> -+#include <linux/string.h> -+#include <linux/socket.h> -+#include <linux/errno.h> -+#include <linux/fcntl.h> -+#include <linux/in.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/tcp.h> -+#include <linux/proc_fs.h> -+#include <linux/seq_file.h> -+ -+#include <asm/system.h> -+#include <asm/uaccess.h> -+#include <asm/io.h> -+#include <asm/unistd.h> -+ -+#include <linux/inet.h> -+#include <linux/netdevice.h> -+#include <linux/etherdevice.h> -+#include <net/ip.h> -+#include <linux/skbuff.h> -+#include <net/sock.h> -+#include <linux/if_ether.h> /* For the statistics structure. */ -+#include <linux/if_arp.h> /* For ARPHRD_ETHER */ -+#include <linux/venet.h> -+#include <linux/ve_proto.h> -+#include <linux/vzctl.h> -+#include <linux/vzctl_venet.h> -+ -+struct list_head ip_entry_hash_table[VEIP_HASH_SZ]; -+rwlock_t veip_hash_lock = RW_LOCK_UNLOCKED; -+LIST_HEAD(veip_lh); -+ -+#define ip_entry_hash_function(ip) (ntohl(ip) & (VEIP_HASH_SZ - 1)) -+ -+void ip_entry_hash(struct ip_entry_struct *entry, struct veip_struct *veip) -+{ -+ list_add(&entry->ip_hash, -+ ip_entry_hash_table + ip_entry_hash_function(entry->ip)); -+ list_add(&entry->ve_list, &veip->ip_lh); -+} -+ -+void veip_put(struct veip_struct *veip) -+{ -+ if (!list_empty(&veip->ip_lh)) -+ return; -+ if (!list_empty(&veip->src_lh)) -+ return; -+ if (!list_empty(&veip->dst_lh)) -+ return; -+ -+ list_del(&veip->list); -+ kfree(veip); -+} -+ -+struct ip_entry_struct *ip_entry_lookup(u32 addr) -+{ -+ struct ip_entry_struct *entry; -+ struct list_head *tmp; -+ -+ list_for_each(tmp, ip_entry_hash_table + ip_entry_hash_function(addr)) { -+ entry = list_entry(tmp, struct ip_entry_struct, ip_hash); -+ if (entry->ip != addr) -+ continue; -+ return entry; -+ } -+ return NULL; -+} -+ -+struct veip_struct *veip_find(envid_t veid) -+{ -+ struct veip_struct *ptr; -+ list_for_each_entry(ptr, &veip_lh, list) { -+ if (ptr->veid != veid) -+ continue; -+ return ptr; -+ } -+ return NULL; -+} -+ -+struct veip_struct *veip_findcreate(envid_t veid) -+{ -+ struct veip_struct *ptr; -+ -+ ptr = veip_find(veid); -+ if (ptr != NULL) -+ return ptr; -+ -+ ptr = kmalloc(sizeof(struct veip_struct), GFP_ATOMIC); -+ if (ptr == NULL) -+ return NULL; -+ memset(ptr, 0, sizeof(struct veip_struct)); -+ INIT_LIST_HEAD(&ptr->ip_lh); -+ INIT_LIST_HEAD(&ptr->src_lh); -+ INIT_LIST_HEAD(&ptr->dst_lh); -+ list_add(&ptr->list, &veip_lh); -+ ptr->veid = veid; -+ return ptr; -+} -+ -+/* -+ * Device functions -+ */ -+ -+static int venet_open(struct net_device *dev) -+{ -+ if (!try_module_get(THIS_MODULE)) -+ return -EBUSY; -+ return 0; -+} -+ -+static int venet_close(struct net_device *master) -+{ -+ module_put(THIS_MODULE); -+ return 0; -+} -+ -+static void venet_destructor(struct net_device *dev) -+{ -+ kfree(dev->priv); -+ dev->priv = NULL; -+} -+ -+/* -+ * The higher levels take care of making this non-reentrant (it's -+ * called with bh's disabled). -+ */ -+static int venet_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct net_device_stats *stats = (struct net_device_stats *)dev->priv; -+ struct net_device *rcv = NULL; -+ struct iphdr *iph; -+ int length; -+ -+ if (unlikely(get_exec_env()->disable_net)) -+ goto outf; -+ -+ /* -+ * Optimise so buffers with skb->free=1 are not copied but -+ * instead are lobbed from tx queue to rx queue -+ */ -+ if (atomic_read(&skb->users) != 1) { -+ struct sk_buff *skb2 = skb; -+ skb = skb_clone(skb, GFP_ATOMIC); /* Clone the buffer */ -+ if (skb == NULL) { -+ kfree_skb(skb2); -+ goto out; -+ } -+ kfree_skb(skb2); -+ } else -+ skb_orphan(skb); -+ -+ if (skb->protocol != __constant_htons(ETH_P_IP)) -+ goto outf; -+ -+ iph = skb->nh.iph; -+ if (MULTICAST(iph->daddr)) -+ goto outf; -+ -+ if (venet_change_skb_owner(skb) < 0) -+ goto outf; -+ -+ if (unlikely(VE_OWNER_SKB(skb)->disable_net)) -+ goto outf; -+ -+ rcv = VE_OWNER_SKB(skb)->_venet_dev; -+ if (!rcv) -+ /* VE going down */ -+ goto outf; -+ -+ dev_hold(rcv); -+ -+ if (!(rcv->flags & IFF_UP)) { -+ /* Target VE does not want to receive packets */ -+ dev_put(rcv); -+ goto outf; -+ } -+ -+ skb->pkt_type = PACKET_HOST; -+ skb->dev = rcv; -+ -+ skb->mac.raw = skb->data; -+ memset(skb->data - dev->hard_header_len, 0, dev->hard_header_len); -+ -+ dst_release(skb->dst); -+ skb->dst = NULL; -+#ifdef CONFIG_NETFILTER -+ nf_conntrack_put(skb->nfct); -+ skb->nfct = NULL; -+#ifdef CONFIG_NETFILTER_DEBUG -+ skb->nf_debug = 0; -+#endif -+#endif -+ length = skb->len; -+ -+ netif_rx(skb); -+ -+ stats->tx_bytes += length; -+ stats->tx_packets++; -+ if (rcv) { -+ struct net_device_stats *rcv_stats = -+ (struct net_device_stats *)rcv->priv; -+ rcv_stats->rx_bytes += length; -+ rcv_stats->rx_packets++; -+ dev_put(rcv); -+ } -+ -+ return 0; -+ -+outf: -+ kfree_skb(skb); -+ ++stats->tx_dropped; -+out: -+ return 0; -+} -+ -+static struct net_device_stats *get_stats(struct net_device *dev) -+{ -+ return (struct net_device_stats *)dev->priv; -+} -+ -+/* Initialize the rest of the LOOPBACK device. */ -+int venet_init_dev(struct net_device *dev) -+{ -+ dev->hard_start_xmit = venet_xmit; -+ dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); -+ if (dev->priv == NULL) -+ return -ENOMEM; -+ memset(dev->priv, 0, sizeof(struct net_device_stats)); -+ dev->get_stats = get_stats; -+ dev->open = venet_open; -+ dev->stop = venet_close; -+ dev->destructor = venet_destructor; -+ -+ /* -+ * Fill in the generic fields of the device structure. -+ */ -+ dev->type = ARPHRD_VOID; -+ dev->hard_header_len = ETH_HLEN; -+ dev->mtu = 1500; /* eth_mtu */ -+ dev->tx_queue_len = 0; -+ -+ memset(dev->broadcast, 0xFF, ETH_ALEN); -+ -+ /* New-style flags. */ -+ dev->flags = IFF_BROADCAST|IFF_NOARP|IFF_POINTOPOINT; -+ return 0; -+} -+ -+static void venet_setup(struct net_device *dev) -+{ -+ dev->init = venet_init_dev; -+ /* -+ * No other features, as they are: -+ * - checksumming is required, and nobody else will done our job -+ */ -+ dev->features |= NETIF_F_VENET; -+} -+ -+#ifdef CONFIG_PROC_FS -+static int veinfo_seq_show(struct seq_file *m, void *v) -+{ -+ struct ve_struct *ve = (struct ve_struct *)v; -+ struct list_head *tmp; -+ -+ seq_printf(m, "%10u %5u %5u", ve->veid, -+ ve->class_id, atomic_read(&ve->pcounter)); -+ read_lock(&veip_hash_lock); -+ if (ve->veip == NULL) -+ goto unlock; -+ list_for_each(tmp, &ve->veip->ip_lh) { -+ char ip[16]; -+ struct ip_entry_struct *entry; -+ -+ entry = list_entry(tmp, struct ip_entry_struct, ve_list); -+ if (entry->active_env == NULL) -+ continue; -+ -+ sprintf(ip, "%u.%u.%u.%u", NIPQUAD(entry->ip)); -+ seq_printf(m, " %15s", ip); -+ } -+unlock: -+ read_unlock(&veip_hash_lock); -+ seq_putc(m, '\n'); -+ return 0; -+} -+ -+static void *ve_seq_start(struct seq_file *m, loff_t *pos) -+{ -+ struct ve_struct *ve, *curve; -+ loff_t l; -+ -+ curve = get_exec_env(); -+ read_lock(&ve_list_guard); -+ if (!ve_is_super(curve)) { -+ if (*pos != 0) -+ return NULL; -+ return curve; -+ } -+ for (ve = ve_list_head, l = *pos; -+ ve != NULL && l > 0; -+ ve = ve->next, l--); -+ return ve; -+} -+ -+static void *ve_seq_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ struct ve_struct *ve = (struct ve_struct *)v; -+ -+ if (!ve_is_super(get_exec_env())) -+ return NULL; -+ (*pos)++; -+ return ve->next; -+} -+ -+static void ve_seq_stop(struct seq_file *m, void *v) -+{ -+ read_unlock(&ve_list_guard); -+} -+ -+ -+static struct seq_operations veinfo_seq_op = { -+ start: ve_seq_start, -+ next: ve_seq_next, -+ stop: ve_seq_stop, -+ show: veinfo_seq_show -+}; -+ -+static int veinfo_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &veinfo_seq_op); -+} -+ -+static struct file_operations proc_veinfo_operations = { -+ open: veinfo_open, -+ read: seq_read, -+ llseek: seq_lseek, -+ release: seq_release -+}; -+ -+static void *veip_seq_start(struct seq_file *m, loff_t *pos) -+{ -+ loff_t l; -+ struct list_head *p; -+ int i; -+ -+ l = *pos; -+ write_lock_irq(&veip_hash_lock); -+ if (l == 0) -+ return ip_entry_hash_table; -+ for (i = 0; i < VEIP_HASH_SZ; i++) { -+ list_for_each(p, ip_entry_hash_table + i) { -+ if (--l == 0) -+ return p; -+ } -+ } -+ return NULL; -+} -+ -+static void *veip_seq_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ struct list_head *p; -+ -+ p = (struct list_head *)v; -+ while (1) { -+ p = p->next; -+ if (p < ip_entry_hash_table || -+ p >= ip_entry_hash_table + VEIP_HASH_SZ) { -+ (*pos)++; -+ return p; -+ } -+ if (++p >= ip_entry_hash_table + VEIP_HASH_SZ) -+ return NULL; -+ } -+ return NULL; -+} -+ -+static void veip_seq_stop(struct seq_file *m, void *v) -+{ -+ write_unlock_irq(&veip_hash_lock); -+} -+ -+static struct seq_operations veip_seq_op = { -+ start: veip_seq_start, -+ next: veip_seq_next, -+ stop: veip_seq_stop, -+ show: veip_seq_show -+}; -+ -+static int veip_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &veip_seq_op); -+} -+ -+static struct file_operations proc_veip_operations = { -+ open: veip_open, -+ read: seq_read, -+ llseek: seq_lseek, -+ release: seq_release -+}; -+#endif -+ -+int real_ve_ip_map(envid_t veid, int op, struct sockaddr *uservaddr, int addrlen) -+{ -+ int err; -+ struct sockaddr_in addr; -+ struct ve_struct *ve; -+ -+ err = -EPERM; -+ if (!capable(CAP_SETVEID)) -+ goto out; -+ -+ err = -EINVAL; -+ if (addrlen != sizeof(struct sockaddr_in)) -+ goto out; -+ -+ err = move_addr_to_kernel(uservaddr, addrlen, &addr); -+ if (err < 0) -+ goto out; -+ -+ switch (op) -+ { -+ case VE_IP_ADD: -+ ve = get_ve_by_id(veid); -+ err = -ESRCH; -+ if (!ve) -+ goto out; -+ -+ down_read(&ve->op_sem); -+ if (ve->is_running) -+ err = veip_entry_add(ve, &addr); -+ up_read(&ve->op_sem); -+ put_ve(ve); -+ break; -+ -+ case VE_IP_DEL: -+ err = veip_entry_del(veid, &addr); -+ break; -+ default: -+ err = -EINVAL; -+ } -+ -+out: -+ return err; -+} -+ -+int venet_ioctl(struct inode *ino, struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err; -+ -+ err = -ENOTTY; -+ switch(cmd) { -+ case VENETCTL_VE_IP_MAP: { -+ struct vzctl_ve_ip_map s; -+ err = -EFAULT; -+ if (copy_from_user(&s, (void *)arg, sizeof(s))) -+ break; -+ err = real_ve_ip_map(s.veid, s.op, s.addr, s.addrlen); -+ } -+ break; -+ } -+ return err; -+} -+ -+static struct vzioctlinfo venetcalls = { -+ type: VENETCTLTYPE, -+ func: venet_ioctl, -+ owner: THIS_MODULE, -+}; -+ -+int venet_dev_start(struct ve_struct *env) -+{ -+ struct net_device *dev_venet; -+ int err; -+ -+ dev_venet = alloc_netdev(0, "venet%d", venet_setup); -+ if (!dev_venet) -+ return -ENOMEM; -+ err = dev_alloc_name(dev_venet, dev_venet->name); -+ if (err<0) -+ goto err; -+ if ((err = register_netdev(dev_venet)) != 0) -+ goto err; -+ env->_venet_dev = dev_venet; -+ return 0; -+err: -+ free_netdev(dev_venet); -+ printk(KERN_ERR "VENET initialization error err=%d\n", err); -+ return err; -+} -+ -+static int venet_start(unsigned int hooknum, void *data) -+{ -+ struct ve_struct *env; -+ int err; -+ -+ env = ((struct ve_hook_init_data *)data)->env; -+ if (env->veip) -+ return -EEXIST; -+ if (!ve_is_super(env) && !try_module_get(THIS_MODULE)) -+ return 0; -+ -+ err = veip_start(env); -+ if (err) -+ goto err; -+ -+ err = venet_dev_start(env); -+ if (err) -+ goto err_free; -+ return 0; -+ -+err_free: -+ veip_stop(env); -+err: -+ if (!ve_is_super(env)) -+ module_put(THIS_MODULE); -+ return err; -+} -+ -+static int venet_stop(unsigned int hooknum, void *data) -+{ -+ struct ve_struct *env; -+ -+ if (hooknum == VE_HOOK_INIT) -+ env = ((struct ve_hook_init_data *)data)->env; -+ else -+ env = (struct ve_struct *)data; -+ veip_stop(env); -+ if (!ve_is_super(env)) -+ module_put(THIS_MODULE); -+ return 0; -+} -+ -+#define VE_HOOK_PRI_NET 0 -+ -+static struct ve_hook venet_ve_hook_init = { -+ hook: venet_start, -+ undo: venet_stop, -+ hooknum: VE_HOOK_INIT, -+ priority: VE_HOOK_PRI_NET -+}; -+ -+static struct ve_hook venet_ve_hook_fini = { -+ hook: venet_stop, -+ hooknum: VE_HOOK_FINI, -+ priority: VE_HOOK_PRI_NET -+}; -+ -+__init int venet_init(void) -+{ -+ struct ve_hook_init_data vhd; -+#ifdef CONFIG_PROC_FS -+ struct proc_dir_entry *de; -+#endif -+ int i, err; -+ -+ if (get_ve0()->_venet_dev != NULL) -+ return -EEXIST; -+ -+ for (i = 0; i < VEIP_HASH_SZ; i++) -+ INIT_LIST_HEAD(ip_entry_hash_table + i); -+ -+ vhd.env = get_ve0(); -+ err = venet_start(VE_HOOK_INIT, (void *)&vhd); -+ if (err) -+ return err; -+ -+#ifdef CONFIG_PROC_FS -+ de = create_proc_glob_entry("vz/veinfo", -+ S_IFREG|S_IRUSR, NULL); -+ if (de) -+ de->proc_fops = &proc_veinfo_operations; -+ else -+ printk(KERN_WARNING "venet: can't make veinfo proc entry\n"); -+ -+ de = create_proc_entry("vz/veip", S_IFREG|S_IRUSR, NULL); -+ if (de) -+ de->proc_fops = &proc_veip_operations; -+ else -+ printk(KERN_WARNING "venet: can't make veip proc entry\n"); -+#endif -+ -+ ve_hook_register(&venet_ve_hook_init); -+ ve_hook_register(&venet_ve_hook_fini); -+ vzioctl_register(&venetcalls); -+ return 0; -+} -+ -+__exit void venet_exit(void) -+{ -+ struct net_device *dev_venet; -+ -+ vzioctl_unregister(&venetcalls); -+ ve_hook_unregister(&venet_ve_hook_fini); -+ ve_hook_unregister(&venet_ve_hook_init); -+#ifdef CONFIG_PROC_FS -+ remove_proc_entry("vz/veip", NULL); -+ remove_proc_entry("vz/veinfo", NULL); -+#endif -+ -+ dev_venet = get_ve0()->_venet_dev; -+ if (dev_venet != NULL) { -+ get_ve0()->_venet_dev = NULL; -+ unregister_netdev(dev_venet); -+ free_netdev(dev_venet); -+ } -+ veip_stop(get_ve0()); -+} -+ -+module_init(venet_init); -+module_exit(venet_exit); -diff -uprN linux-2.6.8.1.orig/drivers/net/wireless/airo.c linux-2.6.8.1-ve022stab034/drivers/net/wireless/airo.c ---- linux-2.6.8.1.orig/drivers/net/wireless/airo.c 2004-08-14 14:54:49.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/wireless/airo.c 2005-09-09 14:39:24.000000000 +0400 -@@ -2901,8 +2901,8 @@ static int airo_thread(void *data) { - flush_signals(current); - - /* make swsusp happy with our thread */ -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - - if (test_bit(JOB_DIE, &ai->flags)) - break; -diff -uprN linux-2.6.8.1.orig/drivers/net/wireless/strip.c linux-2.6.8.1-ve022stab034/drivers/net/wireless/strip.c ---- linux-2.6.8.1.orig/drivers/net/wireless/strip.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/net/wireless/strip.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1977,7 +1977,7 @@ static struct net_device *get_strip_dev( - sizeof(zero_address))) { - struct net_device *dev; - read_lock_bh(&dev_base_lock); -- dev = dev_base; -+ dev = visible_dev_base; - while (dev) { - if (dev->type == strip_info->dev->type && - !memcmp(dev->dev_addr, -diff -uprN linux-2.6.8.1.orig/drivers/pci/probe.c linux-2.6.8.1-ve022stab034/drivers/pci/probe.c ---- linux-2.6.8.1.orig/drivers/pci/probe.c 2004-08-14 14:55:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/pci/probe.c 2005-09-09 14:39:25.000000000 +0400 -@@ -26,6 +26,7 @@ LIST_HEAD(pci_root_buses); - EXPORT_SYMBOL(pci_root_buses); - - LIST_HEAD(pci_devices); -+EXPORT_SYMBOL(pci_devices); - - /* - * PCI Bus Class -diff -uprN linux-2.6.8.1.orig/drivers/pcmcia/cs.c linux-2.6.8.1-ve022stab034/drivers/pcmcia/cs.c ---- linux-2.6.8.1.orig/drivers/pcmcia/cs.c 2004-08-14 14:55:09.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/pcmcia/cs.c 2005-09-09 14:39:24.000000000 +0400 -@@ -724,8 +724,8 @@ static int pccardd(void *__skt) - } - - schedule(); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - - if (!skt->thread) - break; -diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c linux-2.6.8.1-ve022stab034/drivers/sbus/char/bbc_envctrl.c ---- linux-2.6.8.1.orig/drivers/sbus/char/bbc_envctrl.c 2004-08-14 14:56:26.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/sbus/char/bbc_envctrl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -614,7 +614,7 @@ void bbc_envctrl_cleanup(void) - int found = 0; - - read_lock(&tasklist_lock); -- for_each_process(p) { -+ for_each_process_all(p) { - if (p == kenvctrld_task) { - found = 1; - break; -diff -uprN linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c linux-2.6.8.1-ve022stab034/drivers/sbus/char/envctrl.c ---- linux-2.6.8.1.orig/drivers/sbus/char/envctrl.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/sbus/char/envctrl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -1170,7 +1170,7 @@ static void __exit envctrl_cleanup(void) - int found = 0; - - read_lock(&tasklist_lock); -- for_each_process(p) { -+ for_each_process_all(p) { - if (p == kenvctrld_task) { - found = 1; - break; -diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.8.1-ve022stab034/drivers/scsi/aic7xxx/aic79xx_osm.c ---- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic79xx_osm.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/scsi/aic7xxx/aic79xx_osm.c 2005-09-09 14:39:24.000000000 +0400 -@@ -2591,7 +2591,6 @@ ahd_linux_dv_thread(void *data) - sprintf(current->comm, "ahd_dv_%d", ahd->unit); - #else - daemonize("ahd_dv_%d", ahd->unit); -- current->flags |= PF_FREEZE; - #endif - unlock_kernel(); - -diff -uprN linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.8.1-ve022stab034/drivers/scsi/aic7xxx/aic7xxx_osm.c ---- linux-2.6.8.1.orig/drivers/scsi/aic7xxx/aic7xxx_osm.c 2004-08-14 14:54:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/scsi/aic7xxx/aic7xxx_osm.c 2005-09-09 14:39:24.000000000 +0400 -@@ -2295,7 +2295,6 @@ ahc_linux_dv_thread(void *data) - sprintf(current->comm, "ahc_dv_%d", ahc->unit); - #else - daemonize("ahc_dv_%d", ahc->unit); -- current->flags |= PF_FREEZE; - #endif - unlock_kernel(); - -diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_error.c linux-2.6.8.1-ve022stab034/drivers/scsi/scsi_error.c ---- linux-2.6.8.1.orig/drivers/scsi/scsi_error.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/scsi/scsi_error.c 2005-09-09 14:39:25.000000000 +0400 -@@ -558,7 +558,7 @@ static int scsi_request_sense(struct scs - - memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense)); - -- scsi_result = kmalloc(252, GFP_ATOMIC | (scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0); -+ scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0)); - - - if (unlikely(!scsi_result)) { -diff -uprN linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c linux-2.6.8.1-ve022stab034/drivers/scsi/scsi_scan.c ---- linux-2.6.8.1.orig/drivers/scsi/scsi_scan.c 2004-08-14 14:55:59.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/scsi/scsi_scan.c 2005-09-09 14:39:25.000000000 +0400 -@@ -733,7 +733,7 @@ static int scsi_probe_and_add_lun(struct - if (!sreq) - goto out_free_sdev; - result = kmalloc(256, GFP_ATOMIC | -- (host->unchecked_isa_dma) ? __GFP_DMA : 0); -+ ((host->unchecked_isa_dma) ? __GFP_DMA : 0)); - if (!result) - goto out_free_sreq; - -diff -uprN linux-2.6.8.1.orig/drivers/usb/core/hub.c linux-2.6.8.1-ve022stab034/drivers/usb/core/hub.c ---- linux-2.6.8.1.orig/drivers/usb/core/hub.c 2004-08-14 14:55:32.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/usb/core/hub.c 2005-09-09 14:39:24.000000000 +0400 -@@ -1922,8 +1922,8 @@ static int hub_thread(void *__unused) - do { - hub_events(); - wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - } while (!signal_pending(current)); - - pr_debug ("%s: khubd exiting\n", usbcore_name); -diff -uprN linux-2.6.8.1.orig/drivers/w1/w1.c linux-2.6.8.1-ve022stab034/drivers/w1/w1.c ---- linux-2.6.8.1.orig/drivers/w1/w1.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/drivers/w1/w1.c 2005-09-09 14:39:24.000000000 +0400 -@@ -465,8 +465,8 @@ int w1_control(void *data) - timeout = w1_timeout; - do { - timeout = interruptible_sleep_on_timeout(&w1_control_wait, timeout); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - } while (!signal_pending(current) && (timeout > 0)); - - if (signal_pending(current)) -@@ -536,8 +536,8 @@ int w1_process(void *data) - timeout = w1_timeout; - do { - timeout = interruptible_sleep_on_timeout(&dev->kwait, timeout); -- if (current->flags & PF_FREEZE) -- refrigerator(PF_FREEZE); -+ if (test_thread_flag(TIF_FREEZE)) -+ refrigerator(); - } while (!signal_pending(current) && (timeout > 0)); - - if (signal_pending(current)) -diff -uprN linux-2.6.8.1.orig/fs/adfs/adfs.h linux-2.6.8.1-ve022stab034/fs/adfs/adfs.h ---- linux-2.6.8.1.orig/fs/adfs/adfs.h 2004-08-14 14:56:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/adfs/adfs.h 2005-09-09 14:39:25.000000000 +0400 -@@ -72,7 +72,7 @@ int adfs_get_block(struct inode *inode, - struct buffer_head *bh, int create); - struct inode *adfs_iget(struct super_block *sb, struct object_info *obj); - void adfs_read_inode(struct inode *inode); --void adfs_write_inode(struct inode *inode,int unused); -+int adfs_write_inode(struct inode *inode,int unused); - int adfs_notify_change(struct dentry *dentry, struct iattr *attr); - - /* map.c */ -diff -uprN linux-2.6.8.1.orig/fs/adfs/inode.c linux-2.6.8.1-ve022stab034/fs/adfs/inode.c ---- linux-2.6.8.1.orig/fs/adfs/inode.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/adfs/inode.c 2005-09-09 14:39:25.000000000 +0400 -@@ -372,10 +372,11 @@ out: - * The adfs-specific inode data has already been updated by - * adfs_notify_change() - */ --void adfs_write_inode(struct inode *inode, int unused) -+int adfs_write_inode(struct inode *inode, int unused) - { - struct super_block *sb = inode->i_sb; - struct object_info obj; -+ int ret; - - lock_kernel(); - obj.file_id = inode->i_ino; -@@ -386,7 +387,8 @@ void adfs_write_inode(struct inode *inod - obj.attr = ADFS_I(inode)->attr; - obj.size = inode->i_size; - -- adfs_dir_update(sb, &obj); -+ ret = adfs_dir_update(sb, &obj); - unlock_kernel(); -+ return ret; - } - MODULE_LICENSE("GPL"); -diff -uprN linux-2.6.8.1.orig/fs/affs/inode.c linux-2.6.8.1-ve022stab034/fs/affs/inode.c ---- linux-2.6.8.1.orig/fs/affs/inode.c 2004-08-14 14:55:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/affs/inode.c 2005-09-09 14:39:25.000000000 +0400 -@@ -181,7 +181,7 @@ bad_inode: - return; - } - --void -+int - affs_write_inode(struct inode *inode, int unused) - { - struct super_block *sb = inode->i_sb; -@@ -194,11 +194,11 @@ affs_write_inode(struct inode *inode, in - - if (!inode->i_nlink) - // possibly free block -- return; -+ return 0; - bh = affs_bread(sb, inode->i_ino); - if (!bh) { - affs_error(sb,"write_inode","Cannot read block %lu",inode->i_ino); -- return; -+ return -EIO; - } - tail = AFFS_TAIL(sb, bh); - if (tail->stype == be32_to_cpu(ST_ROOT)) { -@@ -226,6 +226,7 @@ affs_write_inode(struct inode *inode, in - mark_buffer_dirty_inode(bh, inode); - affs_brelse(bh); - affs_free_prealloc(inode); -+ return 0; - } - - int -diff -uprN linux-2.6.8.1.orig/fs/autofs4/autofs_i.h linux-2.6.8.1-ve022stab034/fs/autofs4/autofs_i.h ---- linux-2.6.8.1.orig/fs/autofs4/autofs_i.h 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/autofs4/autofs_i.h 2005-09-09 14:39:25.000000000 +0400 -@@ -91,6 +91,7 @@ struct autofs_wait_queue { - - struct autofs_sb_info { - u32 magic; -+ struct dentry *root; - struct file *pipe; - pid_t oz_pgrp; - int catatonic; -diff -uprN linux-2.6.8.1.orig/fs/autofs4/inode.c linux-2.6.8.1-ve022stab034/fs/autofs4/inode.c ---- linux-2.6.8.1.orig/fs/autofs4/inode.c 2004-08-14 14:55:48.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/autofs4/inode.c 2005-09-09 14:39:25.000000000 +0400 -@@ -16,6 +16,7 @@ - #include <linux/pagemap.h> - #include <linux/parser.h> - #include <asm/bitops.h> -+#include <linux/smp_lock.h> - #include "autofs_i.h" - #include <linux/module.h> - -@@ -76,6 +77,66 @@ void autofs4_free_ino(struct autofs_info - kfree(ino); - } - -+/* -+ * Deal with the infamous "Busy inodes after umount ..." message. -+ * -+ * Clean up the dentry tree. This happens with autofs if the user -+ * space program goes away due to a SIGKILL, SIGSEGV etc. -+ */ -+static void autofs4_force_release(struct autofs_sb_info *sbi) -+{ -+ struct dentry *this_parent = sbi->root; -+ struct list_head *next; -+ -+ spin_lock(&dcache_lock); -+repeat: -+ next = this_parent->d_subdirs.next; -+resume: -+ while (next != &this_parent->d_subdirs) { -+ struct dentry *dentry = list_entry(next, struct dentry, d_child); -+ -+ /* Negative dentry - don`t care */ -+ if (!simple_positive(dentry)) { -+ next = next->next; -+ continue; -+ } -+ -+ if (!list_empty(&dentry->d_subdirs)) { -+ this_parent = dentry; -+ goto repeat; -+ } -+ -+ next = next->next; -+ spin_unlock(&dcache_lock); -+ -+ DPRINTK("dentry %p %.*s", -+ dentry, (int)dentry->d_name.len, dentry->d_name.name); -+ -+ dput(dentry); -+ spin_lock(&dcache_lock); -+ } -+ -+ if (this_parent != sbi->root) { -+ struct dentry *dentry = this_parent; -+ -+ next = this_parent->d_child.next; -+ this_parent = this_parent->d_parent; -+ spin_unlock(&dcache_lock); -+ DPRINTK("parent dentry %p %.*s", -+ dentry, (int)dentry->d_name.len, dentry->d_name.name); -+ dput(dentry); -+ spin_lock(&dcache_lock); -+ goto resume; -+ } -+ spin_unlock(&dcache_lock); -+ -+ dput(sbi->root); -+ sbi->root = NULL; -+ shrink_dcache_sb(sbi->sb); -+ -+ return; -+} -+ - static void autofs4_put_super(struct super_block *sb) - { - struct autofs_sb_info *sbi = autofs4_sbi(sb); -@@ -85,6 +146,10 @@ static void autofs4_put_super(struct sup - if ( !sbi->catatonic ) - autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ - -+ /* Clean up and release dangling references */ -+ if (sbi) -+ autofs4_force_release(sbi); -+ - kfree(sbi); - - DPRINTK("shutting down"); -@@ -199,6 +264,7 @@ int autofs4_fill_super(struct super_bloc - - s->s_fs_info = sbi; - sbi->magic = AUTOFS_SBI_MAGIC; -+ sbi->root = NULL; - sbi->catatonic = 0; - sbi->exp_timeout = 0; - sbi->oz_pgrp = process_group(current); -@@ -265,6 +331,13 @@ int autofs4_fill_super(struct super_bloc - sbi->pipe = pipe; - - /* -+ * Take a reference to the root dentry so we get a chance to -+ * clean up the dentry tree on umount. -+ * See autofs4_force_release. -+ */ -+ sbi->root = dget(root); -+ -+ /* - * Success! Install the root dentry now to indicate completion. - */ - s->s_root = root; -diff -uprN linux-2.6.8.1.orig/fs/autofs4/root.c linux-2.6.8.1-ve022stab034/fs/autofs4/root.c ---- linux-2.6.8.1.orig/fs/autofs4/root.c 2004-08-14 14:54:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/autofs4/root.c 2005-09-09 14:39:25.000000000 +0400 -@@ -621,7 +621,9 @@ static int autofs4_dir_rmdir(struct inod - spin_unlock(&dcache_lock); - return -ENOTEMPTY; - } -+ spin_lock(&dentry->d_lock); - __d_drop(dentry); -+ spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - - dput(ino->dentry); -diff -uprN linux-2.6.8.1.orig/fs/bfs/inode.c linux-2.6.8.1-ve022stab034/fs/bfs/inode.c ---- linux-2.6.8.1.orig/fs/bfs/inode.c 2004-08-14 14:55:19.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/bfs/inode.c 2005-09-09 14:39:25.000000000 +0400 -@@ -85,7 +85,7 @@ static void bfs_read_inode(struct inode - brelse(bh); - } - --static void bfs_write_inode(struct inode * inode, int unused) -+static int bfs_write_inode(struct inode * inode, int unused) - { - unsigned long ino = inode->i_ino; - struct bfs_inode * di; -@@ -94,7 +94,7 @@ static void bfs_write_inode(struct inode - - if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { - printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); -- return; -+ return -EIO; - } - - lock_kernel(); -@@ -103,7 +103,7 @@ static void bfs_write_inode(struct inode - if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); - unlock_kernel(); -- return; -+ return -EIO; - } - - off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; -@@ -129,6 +129,7 @@ static void bfs_write_inode(struct inode - mark_buffer_dirty(bh); - brelse(bh); - unlock_kernel(); -+ return 0; - } - - static void bfs_delete_inode(struct inode * inode) -diff -uprN linux-2.6.8.1.orig/fs/binfmt_aout.c linux-2.6.8.1-ve022stab034/fs/binfmt_aout.c ---- linux-2.6.8.1.orig/fs/binfmt_aout.c 2004-08-14 14:54:51.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_aout.c 2005-09-09 14:39:30.000000000 +0400 -@@ -43,13 +43,21 @@ static struct linux_binfmt aout_format = - .min_coredump = PAGE_SIZE - }; - --static void set_brk(unsigned long start, unsigned long end) -+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) -+ -+static int set_brk(unsigned long start, unsigned long end) - { - start = PAGE_ALIGN(start); - end = PAGE_ALIGN(end); -- if (end <= start) -- return; -- do_brk(start, end - start); -+ if (end > start) { -+ unsigned long addr; -+ down_write(¤t->mm->mmap_sem); -+ addr = do_brk(start, end - start); -+ up_write(¤t->mm->mmap_sem); -+ if (BAD_ADDR(addr)) -+ return addr; -+ } -+ return 0; - } - - /* -@@ -318,10 +326,14 @@ static int load_aout_binary(struct linux - loff_t pos = fd_offset; - /* Fuck me plenty... */ - /* <AOL></AOL> */ -+ down_write(¤t->mm->mmap_sem); - error = do_brk(N_TXTADDR(ex), ex.a_text); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), - ex.a_text, &pos); -+ down_write(¤t->mm->mmap_sem); - error = do_brk(N_DATADDR(ex), ex.a_data); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), - ex.a_data, &pos); - goto beyond_if; -@@ -341,8 +353,9 @@ static int load_aout_binary(struct linux - pos = 32; - map_size = ex.a_text+ex.a_data; - #endif -- -+ down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); -+ up_write(¤t->mm->mmap_sem); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); - return error; -@@ -377,7 +390,9 @@ static int load_aout_binary(struct linux - - if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { - loff_t pos = fd_offset; -+ down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); -+ up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, - (char __user *)N_TXTADDR(ex), - ex.a_text+ex.a_data, &pos); -@@ -413,7 +428,11 @@ static int load_aout_binary(struct linux - beyond_if: - set_binfmt(&aout_format); - -- set_brk(current->mm->start_brk, current->mm->brk); -+ retval = set_brk(current->mm->start_brk, current->mm->brk); -+ if (retval < 0) { -+ send_sig(SIGKILL, current, 0); -+ return retval; -+ } - - retval = setup_arg_pages(bprm, EXSTACK_DEFAULT); - if (retval < 0) { -@@ -429,9 +448,11 @@ beyond_if: - #endif - start_thread(regs, ex.a_entry, current->mm->start_stack); - if (unlikely(current->ptrace & PT_PTRACED)) { -- if (current->ptrace & PT_TRACE_EXEC) -+ if (current->ptrace & PT_TRACE_EXEC) { -+ set_pn_state(current, PN_STOP_EXEC); - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); -- else -+ clear_pn_state(current); -+ } else - send_sig(SIGTRAP, current, 0); - } - return 0; -@@ -478,8 +499,9 @@ static int load_aout_library(struct file - file->f_dentry->d_name.name); - error_time = jiffies; - } -- -+ down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); -+ up_write(¤t->mm->mmap_sem); - - file->f_op->read(file, (char __user *)start_addr, - ex.a_text + ex.a_data, &pos); -@@ -503,7 +525,9 @@ static int load_aout_library(struct file - len = PAGE_ALIGN(ex.a_text + ex.a_data); - bss = ex.a_text + ex.a_data + ex.a_bss; - if (bss > len) { -+ down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); -+ up_write(¤t->mm->mmap_sem); - retval = error; - if (error != start_addr + len) - goto out; -diff -uprN linux-2.6.8.1.orig/fs/binfmt_elf.c linux-2.6.8.1-ve022stab034/fs/binfmt_elf.c ---- linux-2.6.8.1.orig/fs/binfmt_elf.c 2004-08-14 14:55:23.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_elf.c 2005-09-09 14:39:30.000000000 +0400 -@@ -87,7 +87,10 @@ static int set_brk(unsigned long start, - start = ELF_PAGEALIGN(start); - end = ELF_PAGEALIGN(end); - if (end > start) { -- unsigned long addr = do_brk(start, end - start); -+ unsigned long addr; -+ down_write(¤t->mm->mmap_sem); -+ addr = do_brk(start, end - start); -+ up_write(¤t->mm->mmap_sem); - if (BAD_ADDR(addr)) - return addr; - } -@@ -102,15 +105,17 @@ static int set_brk(unsigned long start, - be in memory */ - - --static void padzero(unsigned long elf_bss) -+static int padzero(unsigned long elf_bss) - { - unsigned long nbyte; - - nbyte = ELF_PAGEOFFSET(elf_bss); - if (nbyte) { - nbyte = ELF_MIN_ALIGN - nbyte; -- clear_user((void __user *) elf_bss, nbyte); -+ if (clear_user((void __user *) elf_bss, nbyte)) -+ return -EFAULT; - } -+ return 0; - } - - /* Let's use some macros to make this stack manipulation a litle clearer */ -@@ -126,7 +131,7 @@ static void padzero(unsigned long elf_bs - #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) - #endif - --static void -+static int - create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec, - int interp_aout, unsigned long load_addr, - unsigned long interp_load_addr) -@@ -171,7 +176,8 @@ create_elf_tables(struct linux_binprm *b - STACK_ALLOC(p, ((current->pid % 64) << 7)); - #endif - u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); -- __copy_to_user(u_platform, k_platform, len); -+ if (__copy_to_user(u_platform, k_platform, len)) -+ return -EFAULT; - } - - /* Create the ELF interpreter info */ -@@ -233,7 +239,8 @@ create_elf_tables(struct linux_binprm *b - #endif - - /* Now, let's put argc (and argv, envp if appropriate) on the stack */ -- __put_user(argc, sp++); -+ if (__put_user(argc, sp++)) -+ return -EFAULT; - if (interp_aout) { - argv = sp + 2; - envp = argv + argc + 1; -@@ -245,31 +252,35 @@ create_elf_tables(struct linux_binprm *b - } - - /* Populate argv and envp */ -- p = current->mm->arg_start; -+ p = current->mm->arg_end = current->mm->arg_start; - while (argc-- > 0) { - size_t len; - __put_user((elf_addr_t)p, argv++); - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) -- return; -+ return 0; - p += len; - } -- __put_user(0, argv); -+ if (__put_user(0, argv)) -+ return -EFAULT; - current->mm->arg_end = current->mm->env_start = p; - while (envc-- > 0) { - size_t len; - __put_user((elf_addr_t)p, envp++); - len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) -- return; -+ return 0; - p += len; - } -- __put_user(0, envp); -+ if (__put_user(0, envp)) -+ return -EFAULT; - current->mm->env_end = p; - - /* Put the elf_info on the stack in the right place. */ - sp = (elf_addr_t __user *)envp + 1; -- copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)); -+ if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t))) -+ return -EFAULT; -+ return 0; - } - - #ifndef elf_map -@@ -334,14 +345,17 @@ static unsigned long load_elf_interp(str - goto out; - - retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size); -- error = retval; -- if (retval < 0) -+ error = -EIO; -+ if (retval != size) { -+ if (retval < 0) -+ error = retval; - goto out_close; -+ } - - eppnt = elf_phdata; - for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) { - if (eppnt->p_type == PT_LOAD) { -- int elf_type = MAP_PRIVATE | MAP_DENYWRITE; -+ int elf_type = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECPRIO; - int elf_prot = 0; - unsigned long vaddr = 0; - unsigned long k, map_addr; -@@ -399,12 +413,18 @@ static unsigned long load_elf_interp(str - * that there are zero-mapped pages up to and including the - * last bss page. - */ -- padzero(elf_bss); -+ if (padzero(elf_bss)) { -+ error = -EFAULT; -+ goto out_close; -+ } -+ - elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */ - - /* Map the last of the bss segment */ - if (last_bss > elf_bss) { -+ down_write(¤t->mm->mmap_sem); - error = do_brk(elf_bss, last_bss - elf_bss); -+ up_write(¤t->mm->mmap_sem); - if (BAD_ADDR(error)) - goto out_close; - } -@@ -444,7 +464,9 @@ static unsigned long load_aout_interp(st - goto out; - } - -+ down_write(¤t->mm->mmap_sem); - do_brk(0, text_data); -+ up_write(¤t->mm->mmap_sem); - if (!interpreter->f_op || !interpreter->f_op->read) - goto out; - if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0) -@@ -452,8 +474,11 @@ static unsigned long load_aout_interp(st - flush_icache_range((unsigned long)addr, - (unsigned long)addr + text_data); - -+ -+ down_write(¤t->mm->mmap_sem); - do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1), - interp_ex->a_bss); -+ up_write(¤t->mm->mmap_sem); - elf_entry = interp_ex->a_entry; - - out: -@@ -487,25 +512,33 @@ static int load_elf_binary(struct linux_ - unsigned long elf_entry, interp_load_addr = 0; - unsigned long start_code, end_code, start_data, end_data; - unsigned long reloc_func_desc = 0; -- struct elfhdr elf_ex; -- struct elfhdr interp_elf_ex; -- struct exec interp_ex; - char passed_fileno[6]; - struct files_struct *files; - int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT; - unsigned long def_flags = 0; -+ struct { -+ struct elfhdr elf_ex; -+ struct elfhdr interp_elf_ex; -+ struct exec interp_ex; -+ } *loc; -+ -+ loc = kmalloc(sizeof(*loc), GFP_KERNEL); -+ if (!loc) { -+ retval = -ENOMEM; -+ goto out_ret; -+ } - - /* Get the exec-header */ -- elf_ex = *((struct elfhdr *) bprm->buf); -+ loc->elf_ex = *((struct elfhdr *) bprm->buf); - - retval = -ENOEXEC; - /* First of all, some simple consistency checks */ -- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) -+ if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - goto out; - -- if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) -+ if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) - goto out; -- if (!elf_check_arch(&elf_ex)) -+ if (!elf_check_arch(&loc->elf_ex)) - goto out; - if (!bprm->file->f_op||!bprm->file->f_op->mmap) - goto out; -@@ -513,18 +546,21 @@ static int load_elf_binary(struct linux_ - /* Now read in all of the header information */ - - retval = -ENOMEM; -- if (elf_ex.e_phentsize != sizeof(struct elf_phdr)) -+ if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr)) - goto out; -- if (elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) -+ if (loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr)) - goto out; -- size = elf_ex.e_phnum * sizeof(struct elf_phdr); -+ size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr); - elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); - if (!elf_phdata) - goto out; - -- retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size); -- if (retval < 0) -+ retval = kernel_read(bprm->file, loc->elf_ex.e_phoff, (char *) elf_phdata, size); -+ if (retval != size) { -+ if (retval >= 0) -+ retval = -EIO; - goto out_free_ph; -+ } - - files = current->files; /* Refcounted so ok */ - retval = unshare_files(); -@@ -553,7 +589,7 @@ static int load_elf_binary(struct linux_ - start_data = 0; - end_data = 0; - -- for (i = 0; i < elf_ex.e_phnum; i++) { -+ for (i = 0; i < loc->elf_ex.e_phnum; i++) { - if (elf_ppnt->p_type == PT_INTERP) { - /* This is the program interpreter used for - * shared libraries - for now assume that this -@@ -561,7 +597,8 @@ static int load_elf_binary(struct linux_ - */ - - retval = -ENOMEM; -- if (elf_ppnt->p_filesz > PATH_MAX) -+ if (elf_ppnt->p_filesz > PATH_MAX || -+ elf_ppnt->p_filesz == 0) - goto out_free_file; - elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz, - GFP_KERNEL); -@@ -571,8 +608,16 @@ static int load_elf_binary(struct linux_ - retval = kernel_read(bprm->file, elf_ppnt->p_offset, - elf_interpreter, - elf_ppnt->p_filesz); -- if (retval < 0) -+ if (retval != elf_ppnt->p_filesz) { -+ if (retval >= 0) -+ retval = -EIO; - goto out_free_interp; -+ } -+ /* make sure path is NULL terminated */ -+ retval = -EINVAL; -+ if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') -+ goto out_free_interp; -+ - /* If the program interpreter is one of these two, - * then assume an iBCS2 image. Otherwise assume - * a native linux image. -@@ -600,26 +645,29 @@ static int load_elf_binary(struct linux_ - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ -- SET_PERSONALITY(elf_ex, ibcs2_interpreter); -+ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); - -- interpreter = open_exec(elf_interpreter); -+ interpreter = open_exec(elf_interpreter, NULL); - retval = PTR_ERR(interpreter); - if (IS_ERR(interpreter)) - goto out_free_interp; - retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); -- if (retval < 0) -+ if (retval != BINPRM_BUF_SIZE) { -+ if (retval >= 0) -+ retval = -EIO; - goto out_free_dentry; -+ } - - /* Get the exec headers */ -- interp_ex = *((struct exec *) bprm->buf); -- interp_elf_ex = *((struct elfhdr *) bprm->buf); -+ loc->interp_ex = *((struct exec *) bprm->buf); -+ loc->interp_elf_ex = *((struct elfhdr *) bprm->buf); - break; - } - elf_ppnt++; - } - - elf_ppnt = elf_phdata; -- for (i = 0; i < elf_ex.e_phnum; i++, elf_ppnt++) -+ for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) - if (elf_ppnt->p_type == PT_GNU_STACK) { - if (elf_ppnt->p_flags & PF_X) - executable_stack = EXSTACK_ENABLE_X; -@@ -627,19 +675,19 @@ static int load_elf_binary(struct linux_ - executable_stack = EXSTACK_DISABLE_X; - break; - } -- have_pt_gnu_stack = (i < elf_ex.e_phnum); -+ have_pt_gnu_stack = (i < loc->elf_ex.e_phnum); - - /* Some simple consistency checks for the interpreter */ - if (elf_interpreter) { - interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT; - - /* Now figure out which format our binary is */ -- if ((N_MAGIC(interp_ex) != OMAGIC) && -- (N_MAGIC(interp_ex) != ZMAGIC) && -- (N_MAGIC(interp_ex) != QMAGIC)) -+ if ((N_MAGIC(loc->interp_ex) != OMAGIC) && -+ (N_MAGIC(loc->interp_ex) != ZMAGIC) && -+ (N_MAGIC(loc->interp_ex) != QMAGIC)) - interpreter_type = INTERPRETER_ELF; - -- if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) -+ if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - interpreter_type &= ~INTERPRETER_ELF; - - retval = -ELIBBAD; -@@ -655,11 +703,11 @@ static int load_elf_binary(struct linux_ - } - /* Verify the interpreter has a valid arch */ - if ((interpreter_type == INTERPRETER_ELF) && -- !elf_check_arch(&interp_elf_ex)) -+ !elf_check_arch(&loc->interp_elf_ex)) - goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ -- SET_PERSONALITY(elf_ex, ibcs2_interpreter); -+ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); - } - - /* OK, we are done with that, now set up the arg stuff, -@@ -699,8 +747,8 @@ static int load_elf_binary(struct linux_ - - /* Do this immediately, since STACK_TOP as used in setup_arg_pages - may depend on the personality. */ -- SET_PERSONALITY(elf_ex, ibcs2_interpreter); -- if (elf_read_implies_exec(elf_ex, have_pt_gnu_stack)) -+ SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter); -+ if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack)) - current->personality |= READ_IMPLIES_EXEC; - - /* Do this so that we can load the interpreter, if need be. We will -@@ -720,7 +768,7 @@ static int load_elf_binary(struct linux_ - the image should be loaded at fixed address, not at a variable - address. */ - -- for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) { -+ for(i = 0, elf_ppnt = elf_phdata; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { - int elf_prot = 0, elf_flags; - unsigned long k, vaddr; - -@@ -744,7 +792,13 @@ static int load_elf_binary(struct linux_ - nbyte = ELF_MIN_ALIGN - nbyte; - if (nbyte > elf_brk - elf_bss) - nbyte = elf_brk - elf_bss; -- clear_user((void __user *) elf_bss + load_bias, nbyte); -+ /* -+ * This bss-zeroing can fail if the ELF file -+ * specifies odd protections. So we don't check -+ * the return value -+ */ -+ (void)clear_user((void __user *)elf_bss + -+ load_bias, nbyte); - } - } - -@@ -752,12 +806,13 @@ static int load_elf_binary(struct linux_ - if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; - -- elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE; -+ elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE| -+ MAP_EXECPRIO; - - vaddr = elf_ppnt->p_vaddr; -- if (elf_ex.e_type == ET_EXEC || load_addr_set) { -+ if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { - elf_flags |= MAP_FIXED; -- } else if (elf_ex.e_type == ET_DYN) { -+ } else if (loc->elf_ex.e_type == ET_DYN) { - /* Try and get dynamic programs out of the way of the default mmap - base, as well as whatever program they might try to exec. This - is because the brk will follow the loader, and is not movable. */ -@@ -765,13 +820,15 @@ static int load_elf_binary(struct linux_ - } - - error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags); -- if (BAD_ADDR(error)) -- continue; -+ if (BAD_ADDR(error)) { -+ send_sig(SIGKILL, current, 0); -+ goto out_free_dentry; -+ } - - if (!load_addr_set) { - load_addr_set = 1; - load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); -- if (elf_ex.e_type == ET_DYN) { -+ if (loc->elf_ex.e_type == ET_DYN) { - load_bias += error - - ELF_PAGESTART(load_bias + vaddr); - load_addr += load_bias; -@@ -808,7 +865,7 @@ static int load_elf_binary(struct linux_ - elf_brk = k; - } - -- elf_ex.e_entry += load_bias; -+ loc->elf_ex.e_entry += load_bias; - elf_bss += load_bias; - elf_brk += load_bias; - start_code += load_bias; -@@ -826,14 +883,18 @@ static int load_elf_binary(struct linux_ - send_sig(SIGKILL, current, 0); - goto out_free_dentry; - } -- padzero(elf_bss); -+ if (padzero(elf_bss)) { -+ send_sig(SIGSEGV, current, 0); -+ retval = -EFAULT; /* Nobody gets to see this, but.. */ -+ goto out_free_dentry; -+ } - - if (elf_interpreter) { - if (interpreter_type == INTERPRETER_AOUT) -- elf_entry = load_aout_interp(&interp_ex, -+ elf_entry = load_aout_interp(&loc->interp_ex, - interpreter); - else -- elf_entry = load_elf_interp(&interp_elf_ex, -+ elf_entry = load_elf_interp(&loc->interp_elf_ex, - interpreter, - &interp_load_addr); - if (BAD_ADDR(elf_entry)) { -@@ -848,7 +909,7 @@ static int load_elf_binary(struct linux_ - fput(interpreter); - kfree(elf_interpreter); - } else { -- elf_entry = elf_ex.e_entry; -+ elf_entry = loc->elf_ex.e_entry; - } - - kfree(elf_phdata); -@@ -860,7 +921,7 @@ static int load_elf_binary(struct linux_ - - compute_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; -- create_elf_tables(bprm, &elf_ex, (interpreter_type == INTERPRETER_AOUT), -+ create_elf_tables(bprm, &loc->elf_ex, (interpreter_type == INTERPRETER_AOUT), - load_addr, interp_load_addr); - /* N.B. passed_fileno might not be initialized? */ - if (interpreter_type == INTERPRETER_AOUT) -@@ -898,13 +959,17 @@ static int load_elf_binary(struct linux_ - - start_thread(regs, elf_entry, bprm->p); - if (unlikely(current->ptrace & PT_PTRACED)) { -- if (current->ptrace & PT_TRACE_EXEC) -+ if (current->ptrace & PT_TRACE_EXEC) { -+ set_pn_state(current, PN_STOP_EXEC); - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); -- else -+ clear_pn_state(current); -+ } else - send_sig(SIGTRAP, current, 0); - } - retval = 0; - out: -+ kfree(loc); -+out_ret: - return retval; - - /* error cleanup */ -@@ -933,6 +998,7 @@ out_free_ph: - static int load_elf_library(struct file *file) - { - struct elf_phdr *elf_phdata; -+ struct elf_phdr *eppnt; - unsigned long elf_bss, bss, len; - int retval, error, i, j; - struct elfhdr elf_ex; -@@ -956,43 +1022,52 @@ static int load_elf_library(struct file - /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ - - error = -ENOMEM; -- elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL); -+ elf_phdata = kmalloc(j, GFP_KERNEL); - if (!elf_phdata) - goto out; - -+ eppnt = elf_phdata; - error = -ENOEXEC; -- retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j); -+ retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j); - if (retval != j) - goto out_free_ph; - - for (j = 0, i = 0; i<elf_ex.e_phnum; i++) -- if ((elf_phdata + i)->p_type == PT_LOAD) j++; -+ if ((eppnt + i)->p_type == PT_LOAD) -+ j++; - if (j != 1) - goto out_free_ph; - -- while (elf_phdata->p_type != PT_LOAD) elf_phdata++; -+ while (eppnt->p_type != PT_LOAD) -+ eppnt++; - - /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, -- ELF_PAGESTART(elf_phdata->p_vaddr), -- (elf_phdata->p_filesz + -- ELF_PAGEOFFSET(elf_phdata->p_vaddr)), -+ ELF_PAGESTART(eppnt->p_vaddr), -+ (eppnt->p_filesz + -+ ELF_PAGEOFFSET(eppnt->p_vaddr)), - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, -- (elf_phdata->p_offset - -- ELF_PAGEOFFSET(elf_phdata->p_vaddr))); -+ (eppnt->p_offset - -+ ELF_PAGEOFFSET(eppnt->p_vaddr))); - up_write(¤t->mm->mmap_sem); -- if (error != ELF_PAGESTART(elf_phdata->p_vaddr)) -+ if (error != ELF_PAGESTART(eppnt->p_vaddr)) - goto out_free_ph; - -- elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz; -- padzero(elf_bss); -+ elf_bss = eppnt->p_vaddr + eppnt->p_filesz; -+ if (padzero(elf_bss)) { -+ error = -EFAULT; -+ goto out_free_ph; -+ } - -- len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1); -- bss = elf_phdata->p_memsz + elf_phdata->p_vaddr; -- if (bss > len) -+ len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1); -+ bss = eppnt->p_memsz + eppnt->p_vaddr; -+ if (bss > len) { -+ down_write(¤t->mm->mmap_sem); - do_brk(len, bss - len); -+ up_write(¤t->mm->mmap_sem); -+ } - error = 0; - - out_free_ph: -@@ -1172,20 +1247,20 @@ static void fill_prstatus(struct elf_prs - prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; - prstatus->pr_sigpend = p->pending.signal.sig[0]; - prstatus->pr_sighold = p->blocked.sig[0]; -- prstatus->pr_pid = p->pid; -- prstatus->pr_ppid = p->parent->pid; -- prstatus->pr_pgrp = process_group(p); -- prstatus->pr_sid = p->signal->session; -+ prstatus->pr_pid = virt_pid(p); -+ prstatus->pr_ppid = virt_pid(p->parent); -+ prstatus->pr_pgrp = virt_pgid(p); -+ prstatus->pr_sid = virt_sid(p); - jiffies_to_timeval(p->utime, &prstatus->pr_utime); - jiffies_to_timeval(p->stime, &prstatus->pr_stime); - jiffies_to_timeval(p->cutime, &prstatus->pr_cutime); - jiffies_to_timeval(p->cstime, &prstatus->pr_cstime); - } - --static void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, -- struct mm_struct *mm) -+static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, -+ struct mm_struct *mm) - { -- int i, len; -+ unsigned int i, len; - - /* first copy the parameters from user space */ - memset(psinfo, 0, sizeof(struct elf_prpsinfo)); -@@ -1193,17 +1268,18 @@ static void fill_psinfo(struct elf_prpsi - len = mm->arg_end - mm->arg_start; - if (len >= ELF_PRARGSZ) - len = ELF_PRARGSZ-1; -- copy_from_user(&psinfo->pr_psargs, -- (const char __user *)mm->arg_start, len); -+ if (copy_from_user(&psinfo->pr_psargs, -+ (const char __user *)mm->arg_start, len)) -+ return -EFAULT; - for(i = 0; i < len; i++) - if (psinfo->pr_psargs[i] == 0) - psinfo->pr_psargs[i] = ' '; - psinfo->pr_psargs[len] = 0; - -- psinfo->pr_pid = p->pid; -- psinfo->pr_ppid = p->parent->pid; -- psinfo->pr_pgrp = process_group(p); -- psinfo->pr_sid = p->signal->session; -+ psinfo->pr_pid = virt_pid(p); -+ psinfo->pr_ppid = virt_pid(p->parent); -+ psinfo->pr_pgrp = virt_pgid(p); -+ psinfo->pr_sid = virt_sid(p); - - i = p->state ? ffz(~p->state) + 1 : 0; - psinfo->pr_state = i; -@@ -1215,7 +1291,7 @@ static void fill_psinfo(struct elf_prpsi - SET_GID(psinfo->pr_gid, p->gid); - strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); - -- return; -+ return 0; - } - - /* Here is the structure in which status of each thread is captured. */ -@@ -1344,7 +1420,7 @@ static int elf_core_dump(long signr, str - /* capture the status of all other threads */ - if (signr) { - read_lock(&tasklist_lock); -- do_each_thread(g,p) -+ do_each_thread_ve(g,p) - if (current->mm == p->mm && current != p) { - int sz = elf_dump_thread_status(signr, p, &thread_list); - if (!sz) { -@@ -1353,7 +1429,7 @@ static int elf_core_dump(long signr, str - } else - thread_status_size += sz; - } -- while_each_thread(g,p); -+ while_each_thread_ve(g,p); - read_unlock(&tasklist_lock); - } - -diff -uprN linux-2.6.8.1.orig/fs/binfmt_em86.c linux-2.6.8.1-ve022stab034/fs/binfmt_em86.c ---- linux-2.6.8.1.orig/fs/binfmt_em86.c 2004-08-14 14:55:35.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_em86.c 2005-09-09 14:39:25.000000000 +0400 -@@ -82,7 +82,7 @@ static int load_em86(struct linux_binprm - * Note that we use open_exec() as the name is now in kernel - * space, and we don't need to copy it. - */ -- file = open_exec(interp); -+ file = open_exec(interp, bprm); - if (IS_ERR(file)) - return PTR_ERR(file); - -diff -uprN linux-2.6.8.1.orig/fs/binfmt_flat.c linux-2.6.8.1-ve022stab034/fs/binfmt_flat.c ---- linux-2.6.8.1.orig/fs/binfmt_flat.c 2004-08-14 14:54:46.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_flat.c 2005-09-09 14:39:25.000000000 +0400 -@@ -774,7 +774,7 @@ static int load_flat_shared_library(int - - /* Open the file up */ - bprm.filename = buf; -- bprm.file = open_exec(bprm.filename); -+ bprm.file = open_exec(bprm.filename, &bprm); - res = PTR_ERR(bprm.file); - if (IS_ERR(bprm.file)) - return res; -diff -uprN linux-2.6.8.1.orig/fs/binfmt_misc.c linux-2.6.8.1-ve022stab034/fs/binfmt_misc.c ---- linux-2.6.8.1.orig/fs/binfmt_misc.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_misc.c 2005-09-09 14:39:25.000000000 +0400 -@@ -150,7 +150,8 @@ static int load_misc_binary(struct linux - - /* if the binary is not readable than enforce mm->dumpable=0 - regardless of the interpreter's permissions */ -- if (permission(bprm->file->f_dentry->d_inode, MAY_READ, NULL)) -+ if (permission(bprm->file->f_dentry->d_inode, MAY_READ, -+ NULL, NULL)) - bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; - - allow_write_access(bprm->file); -@@ -179,7 +180,7 @@ static int load_misc_binary(struct linux - - bprm->interp = iname; /* for binfmt_script */ - -- interp_file = open_exec (iname); -+ interp_file = open_exec (iname, bprm); - retval = PTR_ERR (interp_file); - if (IS_ERR (interp_file)) - goto _error; -diff -uprN linux-2.6.8.1.orig/fs/binfmt_script.c linux-2.6.8.1-ve022stab034/fs/binfmt_script.c ---- linux-2.6.8.1.orig/fs/binfmt_script.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/binfmt_script.c 2005-09-09 14:39:25.000000000 +0400 -@@ -85,7 +85,7 @@ static int load_script(struct linux_binp - /* - * OK, now restart the process with the interpreter's dentry. - */ -- file = open_exec(interp); -+ file = open_exec(interp, bprm); - if (IS_ERR(file)) - return PTR_ERR(file); - -diff -uprN linux-2.6.8.1.orig/fs/block_dev.c linux-2.6.8.1-ve022stab034/fs/block_dev.c ---- linux-2.6.8.1.orig/fs/block_dev.c 2004-08-14 14:56:24.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/block_dev.c 2005-09-09 14:39:26.000000000 +0400 -@@ -548,9 +548,16 @@ static int do_open(struct block_device * - { - struct module *owner = NULL; - struct gendisk *disk; -- int ret = -ENXIO; -+ int ret; - int part; - -+#ifdef CONFIG_VE -+ ret = get_device_perms_ve(S_IFBLK, bdev->bd_dev, -+ file->f_mode&(FMODE_READ|FMODE_WRITE)); -+ if (ret) -+ return ret; -+#endif -+ ret = -ENXIO; - file->f_mapping = bdev->bd_inode->i_mapping; - lock_kernel(); - disk = get_gendisk(bdev->bd_dev, &part); -@@ -821,7 +828,7 @@ EXPORT_SYMBOL(ioctl_by_bdev); - * namespace if possible and return it. Return ERR_PTR(error) - * otherwise. - */ --struct block_device *lookup_bdev(const char *path) -+struct block_device *lookup_bdev(const char *path, int mode) - { - struct block_device *bdev; - struct inode *inode; -@@ -839,6 +846,11 @@ struct block_device *lookup_bdev(const c - error = -ENOTBLK; - if (!S_ISBLK(inode->i_mode)) - goto fail; -+#ifdef CONFIG_VE -+ error = get_device_perms_ve(S_IFBLK, inode->i_rdev, mode); -+ if (error) -+ goto fail; -+#endif - error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) - goto fail; -@@ -870,12 +882,13 @@ struct block_device *open_bdev_excl(cons - mode_t mode = FMODE_READ; - int error = 0; - -- bdev = lookup_bdev(path); -+ if (!(flags & MS_RDONLY)) -+ mode |= FMODE_WRITE; -+ -+ bdev = lookup_bdev(path, mode); - if (IS_ERR(bdev)) - return bdev; - -- if (!(flags & MS_RDONLY)) -- mode |= FMODE_WRITE; - error = blkdev_get(bdev, mode, 0); - if (error) - return ERR_PTR(error); -diff -uprN linux-2.6.8.1.orig/fs/buffer.c linux-2.6.8.1-ve022stab034/fs/buffer.c ---- linux-2.6.8.1.orig/fs/buffer.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/buffer.c 2005-09-09 14:39:25.000000000 +0400 -@@ -2022,8 +2022,9 @@ static int __block_prepare_write(struct - goto out; - if (buffer_new(bh)) { - clear_buffer_new(bh); -- unmap_underlying_metadata(bh->b_bdev, -- bh->b_blocknr); -+ if (buffer_mapped(bh)) -+ unmap_underlying_metadata(bh->b_bdev, -+ bh->b_blocknr); - if (PageUptodate(page)) { - set_buffer_uptodate(bh); - continue; -@@ -2901,7 +2902,7 @@ drop_buffers(struct page *page, struct b - - bh = head; - do { -- if (buffer_write_io_error(bh)) -+ if (buffer_write_io_error(bh) && page->mapping) - set_bit(AS_EIO, &page->mapping->flags); - if (buffer_busy(bh)) - goto failed; -@@ -3100,7 +3101,7 @@ void __init buffer_init(void) - - bh_cachep = kmem_cache_create("buffer_head", - sizeof(struct buffer_head), 0, -- SLAB_PANIC, init_buffer_head, NULL); -+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL); - for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++) - init_waitqueue_head(&bh_wait_queue_heads[i].wqh); - -diff -uprN linux-2.6.8.1.orig/fs/char_dev.c linux-2.6.8.1-ve022stab034/fs/char_dev.c ---- linux-2.6.8.1.orig/fs/char_dev.c 2004-08-14 14:55:10.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/char_dev.c 2005-09-09 14:39:26.000000000 +0400 -@@ -257,6 +257,13 @@ int chrdev_open(struct inode * inode, st - struct cdev *new = NULL; - int ret = 0; - -+#ifdef CONFIG_VE -+ ret = get_device_perms_ve(S_IFCHR, inode->i_rdev, -+ filp->f_mode&(FMODE_READ|FMODE_WRITE)); -+ if (ret) -+ return ret; -+#endif -+ - spin_lock(&cdev_lock); - p = inode->i_cdev; - if (!p) { -diff -uprN linux-2.6.8.1.orig/fs/cifs/cifsfs.c linux-2.6.8.1-ve022stab034/fs/cifs/cifsfs.c ---- linux-2.6.8.1.orig/fs/cifs/cifsfs.c 2004-08-14 14:55:33.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/cifs/cifsfs.c 2005-09-09 14:39:25.000000000 +0400 -@@ -188,7 +188,8 @@ cifs_statfs(struct super_block *sb, stru - return 0; /* always return success? what if volume is no longer available? */ - } - --static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) -+static int cifs_permission(struct inode * inode, int mask, -+ struct nameidata *nd, struct exec_perm *exec_perm) - { - struct cifs_sb_info *cifs_sb; - -@@ -200,7 +201,7 @@ static int cifs_permission(struct inode - on the client (above and beyond ACL on servers) for - servers which do not support setting and viewing mode bits, - so allowing client to check permissions is useful */ -- return vfs_permission(inode, mask); -+ return vfs_permission(inode, mask, exec_perm); - } - - static kmem_cache_t *cifs_inode_cachep; -diff -uprN linux-2.6.8.1.orig/fs/coda/dir.c linux-2.6.8.1-ve022stab034/fs/coda/dir.c ---- linux-2.6.8.1.orig/fs/coda/dir.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/coda/dir.c 2005-09-09 14:39:25.000000000 +0400 -@@ -147,7 +147,8 @@ exit: - } - - --int coda_permission(struct inode *inode, int mask, struct nameidata *nd) -+int coda_permission(struct inode *inode, int mask, struct nameidata *nd, -+ struct exec_perm *perm) - { - int error = 0; - -diff -uprN linux-2.6.8.1.orig/fs/coda/pioctl.c linux-2.6.8.1-ve022stab034/fs/coda/pioctl.c ---- linux-2.6.8.1.orig/fs/coda/pioctl.c 2004-08-14 14:55:47.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/coda/pioctl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -25,7 +25,7 @@ - - /* pioctl ops */ - static int coda_ioctl_permission(struct inode *inode, int mask, -- struct nameidata *nd); -+ struct nameidata *nd, struct exec_perm *); - static int coda_pioctl(struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long user_data); - -@@ -43,7 +43,8 @@ struct file_operations coda_ioctl_operat - - /* the coda pioctl inode ops */ - static int coda_ioctl_permission(struct inode *inode, int mask, -- struct nameidata *nd) -+ struct nameidata *nd, -+ struct exec_perm *exec_perm) - { - return 0; - } -diff -uprN linux-2.6.8.1.orig/fs/compat.c linux-2.6.8.1-ve022stab034/fs/compat.c ---- linux-2.6.8.1.orig/fs/compat.c 2004-08-14 14:55:31.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/compat.c 2005-09-09 14:39:26.000000000 +0400 -@@ -429,6 +429,8 @@ asmlinkage long compat_sys_ioctl(unsigne - fn = d_path(filp->f_dentry, - filp->f_vfsmnt, path, - PAGE_SIZE); -+ if (IS_ERR(fn)) -+ fn = "(err)"; - } - - sprintf(buf,"'%c'", (cmd>>24) & 0x3f); -@@ -1375,7 +1377,7 @@ int compat_do_execve(char * filename, - - sched_balance_exec(); - -- file = open_exec(filename); -+ file = open_exec(filename, &bprm); - - retval = PTR_ERR(file); - if (IS_ERR(file)) -diff -uprN linux-2.6.8.1.orig/fs/compat_ioctl.c linux-2.6.8.1-ve022stab034/fs/compat_ioctl.c ---- linux-2.6.8.1.orig/fs/compat_ioctl.c 2004-08-14 14:56:22.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/compat_ioctl.c 2005-09-09 14:39:25.000000000 +0400 -@@ -640,8 +640,11 @@ int siocdevprivate_ioctl(unsigned int fd - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ -- copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ); -- __put_user(data64, &u_ifreq64->ifr_ifru.ifru_data); -+ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], -+ IFNAMSIZ)) -+ return -EFAULT; -+ if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) -+ return -EFAULT; - - return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64); - } -@@ -2336,7 +2339,9 @@ put_dirent32 (struct dirent *d, struct c - __put_user(d->d_ino, &d32->d_ino); - __put_user(d->d_off, &d32->d_off); - __put_user(d->d_reclen, &d32->d_reclen); -- __copy_to_user(d32->d_name, d->d_name, d->d_reclen); -+ if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) -+ return -EFAULT; -+ - return ret; - } - -@@ -2479,7 +2484,8 @@ static int serial_struct_ioctl(unsigned - if (cmd == TIOCSSERIAL) { - if (verify_area(VERIFY_READ, ss32, sizeof(SS32))) - return -EFAULT; -- __copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)); -+ if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base))) -+ return -EFAULT; - __get_user(udata, &ss32->iomem_base); - ss.iomem_base = compat_ptr(udata); - __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift); -@@ -2492,7 +2498,8 @@ static int serial_struct_ioctl(unsigned - if (cmd == TIOCGSERIAL && err >= 0) { - if (verify_area(VERIFY_WRITE, ss32, sizeof(SS32))) - return -EFAULT; -- __copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)); -+ if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base))) -+ return -EFAULT; - __put_user((unsigned long)ss.iomem_base >> 32 ? - 0xffffffff : (unsigned)(unsigned long)ss.iomem_base, - &ss32->iomem_base); -diff -uprN linux-2.6.8.1.orig/fs/dcache.c linux-2.6.8.1-ve022stab034/fs/dcache.c ---- linux-2.6.8.1.orig/fs/dcache.c 2004-08-14 14:54:50.000000000 +0400 -+++ linux-2.6.8.1-ve022stab034/fs/dcache.c 2005-09-09 14:39:26.000000000 +0400 -@@ -19,6 +19,7 @@ - #include <linux/mm.h> - #include <linux/fs.h> - #include <linux/slab.h> -+#include <linux/kmem_cache.h> - #include <linux/init.h> - #include <linux/smp_lock.h> - #include <linux/hash.h> -@@ -26,11 +27,15 @@ - #include <linux/module.h> - #include <linux/mount.h> - #include <linux/file.h> -+#include <linux/namei.h> - #include <asm/uaccess.h> - #include <linux/security.h> - #include <linux/seqlock.h> - #include <linux/swap.h> - #include <linux/bootmem.h> -+#include <linux/kernel_stat.h> -+ -+#include <ub/ub_dcache.h> - - /* #define DCACHE_DEBUG 1 */ - -@@ -43,7 +48,10 @@ EXPORT_SYMBOL(dcache_lock); - - static kmem_cache_t *dentry_cache; - --#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) -+unsigned int dentry_memusage(void) -+{ -+ return kmem_cache_memusage(dentry_cache); -+} - - /* - * This is the single most critical data structure when it comes -@@ -70,6 +78,7 @@ static void d_callback(struct rcu_head * - { - struct dentry * dentry = container_of(head, struct dentry, d_rcu); - -+ ub_dentry_free(dentry); - if (dname_external(dentry)) - kfree(dentry->d_name.name); - kmem_cache_free(dentry_cache, dentry); -@@ -109,6 +118,78 @@ static inline void dentry_iput(struct de - } - } - -+struct dcache_shrinker { -+ struct list_head list; -+ struct dentry *dentry; -+}; -+ -+DECLARE_WAIT_QUEUE_HEAD(dcache_shrinker_wq); -+ -+/* called under dcache_lock */ -+static void dcache_shrinker_add(struct dcache_shrinker *ds, -+ struct dentry *parent, struct dentry *dentry) -+{ -+ if (parent != dentry) { -+ struct super_block *sb; -+ -+ sb = parent->d_sb; -+ ds->dentry = parent; -+ list_add(&ds->list, &sb->s_dshrinkers); -+ } else -+ INIT_LIST_HEAD(&ds->list); -+} -+ -+/* called under dcache_lock */ -+static void dcache_shrinker_del(struct dcache_shrinker *ds) -+{ -+ if (ds == NULL || list_empty(&ds->list)) -+ return; -+ -+ list_del_init(&ds->list); -+ wake_up_all(&dcache_shrinker_wq); -+} -+ -+/* called under dcache_lock, drops inside */ -+static void dcache_shrinker_wait(struct super_block *sb) -+{ -+ DECLARE_WAITQUEUE(wq, current); -+ -+ __set_current_state(TASK_UNINTERRUPTIBLE); -+ add_wait_queue(&dcache_shrinker_wq, &wq); -+ spin_unlock(&dcache_lock); -+ -+ schedule(); -+ remove_wait_queue(&dcache_shrinker_wq, &wq); -+ __set_current_state(TASK_RUNNING); -+} -+ -+void dcache_shrinker_wait_sb(struct super_block *sb) -+{ -+ /* the root dentry can be held in dput_recursive */ -+ spin_lock(&dcache_lock); -+ while (!list_empty(&sb->s_dshrinkers)) { -+ dcache_shrinker_wait(sb); -+ spin_lock(&dcache_lock); -+ } -+ spin_unlock(&dcache_lock); -+} -+ -+/* dcache_lock protects shrinker's list */ -+static void shrink_dcache_racecheck(struct dentry *parent, int *racecheck) -+{ -+ struct super_block *sb; -+ struct dcache_shrinker *ds; -+ -+ sb = parent->d_sb; -+ list_for_each_entry(ds, &sb->s_dshrinkers, list) { -+ /* is one of dcache shrinkers working on the dentry? */ -+ if (ds->dentry == parent) { -+ *racecheck = 1; -+ break; -+ } -+ } -+} -+ - /* - * This is dput - * -@@ -127,26 +208,26 @@ static inline void dentry_iput(struct de - */ - - /* -- * dput - release a dentry -- * @dentry: dentry to release -+ * dput_recursive - go upward through the dentry tree and release dentries -+ * @dentry: starting dentry -+ * @ds: shrinker to be added to active list (see shrink_dcache_parent) - * - * Release a dentry. This will drop the usage count and if appropriate - * call the dentry unlink method as well as removing it from the queues and - * releasing its resources. If the parent dentries were scheduled for release - * they too may now get deleted. - * -+ * This traverse upward doesn't change d_inuse of any dentry -+ * - * no dcache lock, please. - */ -- --void dput(struct dentry *dentry) -+static void dput_recursive(struct dentry *dentry, struct dcache_shrinker *ds) - { -- if (!dentry) -- return; -- --repeat: - if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) - return; -+ dcache_shrinker_del(ds); - -+repeat: - spin_lock(&dentry->d_lock); - if (atomic_read(&dentry->d_count)) { - spin_unlock(&dentry->d_lock); -@@ -178,6 +259,7 @@ unhash_it: - - kill_it: { - struct dentry *parent; -+ struct dcache_shrinker lds; - - /* If dentry was on d_lru list - * delete it from there -@@ -187,18 +269,50 @@ kill_it: { - dentry_stat.nr_unused--; - } - list_del(&dentry->d_child); -+ parent = dentry->d_parent; -+ dcache_shrinker_add(&lds, parent, dentry); - dentry_stat.nr_dentry--; /* For d_free, below */ - /*drops the locks, at that point nobody can reach this dentry */ - dentry_iput(dentry); -- parent = dentry->d_parent; - d_free(dentry); -- if (dentry == parent) -+ if (unlikely(dentry == parent)) { -+ spin_lock(&dcache_lock); -+ dcache_shrinker_del(&lds); -+ spin_unlock(&dcache_lock); - return; -+ } - dentry = parent; -- goto repeat; -+ spin_lock(&dcache_lock); -+ dcache_shrinker_del(&lds); -+ if (atomic_dec_and_test(&dentry->d_count)) -+ goto repeat; -+ spin_unlock(&dcache_lock); - } - } - -+/* -+ * dput - release a dentry -+ * @dentry: dentry to release -+ * -+ * Release a dentry. This will drop the usage count and if appropriate -+ * call the dentry unlink method as well as removing it from the queues and -+ * releasing its resources. If the parent dentries were scheduled for release -+ * they too may now get deleted. -+ * -+ * no dcache lock, please. -+ */ -+ -+void dput(struct dentry *dentry) -+{ -+ if (!dentry) -+ return; -+ -+ spin_lock(&dcache_lock); -+ ub_dentry_uncharge(dentry); -+ spin_unlock(&dcache_lock); -+ dput_recursive(dentry, NULL); -+} -+ - /** - * d_invalidate - invalidate a dentry - * @dentry: dentry to invalidate -@@ -265,6 +379,8 @@ static inline struct dentry * __dget_loc - dentry_stat.nr_unused--; - list_del_init(&dentry->d_lru); - } -+ -+ ub_dentry_charge_nofail(dentry); - return dentry; - } - -@@ -327,13 +443,16 @@ restart: - tmp = head; - while ((tmp = tmp->next) != head) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); -+ spin_lock(&dentry->d_lock); - if (!atomic_read(&dentry->d_count)) { - __dget_locked(dentry); - __d_drop(dentry); -+ spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); - dput(dentry); - goto restart; - } -+ spin_unlock(&dentry->d_lock); - } - spin_unlock(&dcache_lock); - } -@@ -344,19 +463,27 @@ restart: - * removed. - * Called with dcache_lock, drops it and then regains. - */ --static inline void prune_one_dentry(struct dentry * dentry) -+static void prune_one_dentry(struct dentry * dentry) - { - struct dentry * parent; -+ struct dcache_shrinker ds; - - __d_drop(dentry); - list_del(&dentry->d_child); -+ parent = dentry->d_parent; -+ dcache_shrinker_add(&ds, parent, dentry); - dentry_stat.nr_dentry--; /* For d_free, below */ - dentry_iput(dentry); - parent = dentry->d_parent; - d_free(dentry); - if (parent != dentry) -- dput(parent); -+ /* -+ * dentry is not in use, only child (not outside) -+ * references change, so parent->d_inuse does not change -+ */ -+ dput_recursive(parent, &ds); - spin_lock(&dcache_lock); -+ dcache_shrinker_del(&ds); - } - - /** -@@ -536,13 +663,12 @@ positive: - * whenever the d_subdirs list is non-empty and continue - * searching. - */ --static int select_parent(struct dentry * parent) -+static int select_parent(struct dentry * parent, int * racecheck) - { - struct dentry *this_parent = parent; - struct list_head *next; - int found = 0; - -- spin_lock(&dcache_lock); - repeat: - next = this_parent->d_subdirs.next; - resume: -@@ -575,6 +701,9 @@ dentry->d_parent->d_name.name, dentry->d - #endif - goto repeat; - } -+ -+ if (!found && racecheck != NULL) -+ shrink_dcache_racecheck(dentry, racecheck); - } - /* - * All done at this level ... ascend and resume the search. -@@ -588,7 +717,6 @@ this_parent->d_parent->d_name.name, this - #endif - goto resume; - } -- spin_unlock(&dcache_lock); - return found; - } - -@@ -601,10 +729,66 @@ this_parent->d_parent->d_name.name, this - - void shrink_dcache_parent(struct dentry * parent) - { -- int found; -+ int found, r; - -- while ((found = select_parent(parent)) != 0) -+ while (1) { -+ spin_lock(&dcache_lock); -+ found = select_parent(parent, NULL); -+ if (found) -+ goto found; -+ -+ /* -+ * try again with a dput_recursive() race check. -+ * it returns quickly if everything was really shrinked -+ */ -+ r = 0; -+ found = select_parent(parent, &r); -+ if (found) -+ goto found; -+ if (!r) -+ break; -+ -+ /* drops the lock inside */ -+ dcache_shrinker_wait(parent->d_sb); -+ continue; -+ -+found: -+ spin_unlock(&dcache_lock); - prune_dcache(found); -+ } -+ spin_unlock(&dcache_lock); -+} -+ -+/* -+ * Move any unused anon dentries to the end of the unused list. -+ * called under dcache_lock -+ */ -+static int select_anon(struct hlist_head *head, int *racecheck) -+{ -+ struct hlist_node *lp; -+ int found = 0; -+ -+ hlist_for_each(lp, head) { -+ struct dentry *this = hlist_entry(lp, struct dentry, d_hash); -+ if (!list_empty(&this->d_lru)) { -+ dentry_stat.nr_unused--; -+ list_del_init(&this->d_lru); -+ } -+ -+ /* -+ * move only zero ref count dentries to the end -+ * of the unused list for prune_dcache -+ */ -+ if (!atomic_read(&this->d_count)) { -+ list_add_tail(&this->d_lru, &dentry_unused); -+ dentry_stat.nr_unused++; -+ found++; -+ } -+ -+ if (!found && racecheck != NULL) -+ shrink_dcache_racecheck(this, racecheck); -+ } -+ return found; - } - - /** -@@ -617,33 +801,36 @@ void shrink_dcache_parent(struct dentry - * done under dcache_lock. - * - */ --void shrink_dcache_anon(struct hlist_head *head) -+void shrink_dcache_anon(struct super_block *sb) - { -- struct hlist_node *lp; -- int found; -- do { -- found = 0; -+ int found, r; -+ -+ while (1) { - spin_lock(&dcache_lock); -- hlist_for_each(lp, head) { -- struct dentry *this = hlist_entry(lp, struct dentry, d_hash); -- if (!list_empty(&this->d_lru)) { -- dentry_stat.nr_unused--; -- list_del_init(&this->d_lru); -- |