diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 0493e8b..1e91d22 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index a9cd42e..e547830 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -31,6 +31,7 @@ cflags-$(CONFIG_MK8) += $(call cc-option cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) CFLAGS += $(cflags-y) +CFLAGS += -ffixed-r10 CFLAGS += -mno-red-zone CFLAGS += -mcmodel=kernel CFLAGS += -pipe diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S index 483cbb2..1785b57 100644 --- a/arch/x86_64/crypto/aes-x86_64-asm.S +++ b/arch/x86_64/crypto/aes-x86_64-asm.S @@ -43,27 +43,28 @@ #define R7E %ebp #define R8 %r8 #define R9 %r9 -#define R10 %r10 +#define R10 %r12 #define R11 %r11 -#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ +#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r12,r11) \ .global FUNC; \ .type FUNC,@function; \ .align 8; \ -FUNC: movq r1,r2; \ +FUNC: pushq %r12; \ + movq r1,r2; \ movq r3,r4; \ leaq BASE+52(r8),r9; \ - movq r10,r11; \ + movq r12,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl (r8),r10 ## E; \ + movl (r8),r12 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ xorl -36(r9),r7 ## E; \ - cmpl $24,r10 ## E; \ + cmpl $24,r12 ## E; \ jb B128; \ leaq 32(r9),r9; \ je B192; \ @@ -76,6 +77,7 @@ FUNC: movq r1,r2; \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ movl r8 ## E,12(r9); \ + popq %r12; \ ret; #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index e0eb0c7..cdb5918 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -99,6 +99,7 @@ sysenter_do_call: cmpl $(IA32_NR_syscalls),%eax jae ia32_badsys IA32_ARG_FIXUP 1 + movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) @@ -127,6 +128,7 @@ sysenter_tracesys: CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ + movq %gs:pda_pcurrent,%r10 call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST @@ -198,6 +200,7 @@ cstar_do_call: cmpl $IA32_NR_syscalls,%eax jae ia32_badsys IA32_ARG_FIXUP 1 + movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) @@ -220,6 +223,7 @@ cstar_tracesys: CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ + movq %gs:pda_pcurrent,%r10 call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST @@ -282,6 +286,7 @@ ia32_do_syscall: cmpl $(IA32_NR_syscalls),%eax jae ia32_badsys IA32_ARG_FIXUP + movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) @@ -291,6 +296,7 @@ ia32_tracesys: SAVE_REST movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ + movq %gs:pda_pcurrent,%r10 call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST @@ -336,6 +342,7 @@ ENTRY(ia32_ptregs_common) CFI_ADJUST_CFA_OFFSET -8 CFI_REGISTER rip, r11 SAVE_REST + movq %gs:pda_pcurrent,%r10 call *%rax RESTORE_REST jmp ia32_sysret /* misbalances the return cache */ diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index aaa6d38..66ebe60 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c @@ -29,7 +29,7 @@ int main(void) ENTRY(pid); BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) +#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct task_struct, thread.info.entry)) ENTRY(flags); ENTRY(addr_limit); ENTRY(preempt_count); diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 9ff4204..8af38c4 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -136,21 +136,19 @@ ENTRY(ret_from_fork) CFI_DEFAULT_STACK call schedule_tail - GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10) jnz rff_trace rff_action: RESTORE_REST testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call - testl $_TIF_IA32,threadinfo_flags(%rcx) + testl $_TIF_IA32,threadinfo_flags(%r10) jnz int_ret_from_sys_call RESTORE_TOP_OF_STACK %rdi,ARGOFFSET jmp ret_from_sys_call rff_trace: movq %rsp,%rdi call syscall_trace_leave - GET_THREAD_INFO(%rcx) jmp rff_action CFI_ENDPROC @@ -191,17 +189,18 @@ ENTRY(system_call) movq %gs:pda_kernelstack,%rsp sti SAVE_ARGS 8,1 + cmpq $__NR_syscall_max,%rax movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) + movq %r10,%rcx + ja badsys + movq sys_call_table(,%rax,8),%rax CFI_REL_OFFSET rip,RIP-ARGOFFSET - GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + GET_THREAD_INFO(%r10) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) CFI_REMEMBER_STATE jnz tracesys - cmpq $__NR_syscall_max,%rax - ja badsys - movq %r10,%rcx - call *sys_call_table(,%rax,8) # XXX: rip relative + call *%rax # XXX: rip relative movq %rax,RAX-ARGOFFSET(%rsp) /* * Syscall return path ending with SYSRET (fast path) @@ -212,9 +211,8 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: - GET_THREAD_INFO(%rcx) cli - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz sysret_careful @@ -235,6 +233,7 @@ sysret_careful: sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 + movq %gs:pda_pcurrent,%r10 call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 @@ -251,11 +250,13 @@ sysret_signal: leaq do_notify_resume(%rip),%rax leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 + movq %gs:pda_pcurrent,%r10 call ptregscall_common 1: movl $_TIF_NEED_RESCHED,%edi jmp sysret_check badsys: + GET_THREAD_INFO(%r10) movq $-ENOSYS,RAX-ARGOFFSET(%rsp) jmp ret_from_sys_call @@ -271,7 +272,6 @@ tracesys: RESTORE_REST cmpq $__NR_syscall_max,%rax ja 1f - movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) 1: SAVE_REST @@ -309,8 +309,8 @@ ENTRY(int_ret_from_sys_call) movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%edx + GET_THREAD_INFO(%r10) + movl threadinfo_flags(%r10),%edx andl %edi,%edx jnz int_careful jmp retint_swapgs @@ -324,6 +324,7 @@ int_careful: sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 + movq %gs:pda_pcurrent,%r10 call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 @@ -340,6 +341,7 @@ int_very_careful: pushq %rdi CFI_ADJUST_CFA_OFFSET 8 leaq 8(%rsp),%rdi # &ptregs -> arg1 + movq %gs:pda_pcurrent,%r10 call syscall_trace_leave popq %rdi CFI_ADJUST_CFA_OFFSET -8 @@ -352,6 +354,7 @@ int_signal: jz 1f movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 + movq %gs:pda_pcurrent,%r10 call do_notify_resume 1: movl $_TIF_NEED_RESCHED,%edi int_restore_rest: @@ -389,6 +392,7 @@ ENTRY(ptregscall_common) movq %r11, %r15 CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 + movq %gs:pda_pcurrent,%r10 call *%rax RESTORE_TOP_OF_STACK %r11 movq %r15, %r11 @@ -409,9 +413,9 @@ ENTRY(stub_execve) movq %r11, %r15 CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 + movq %gs:pda_pcurrent,%r10 call sys_execve - GET_THREAD_INFO(%rcx) - bt $TIF_IA32,threadinfo_flags(%rcx) + bt $TIF_IA32,threadinfo_flags(%r10) CFI_REMEMBER_STATE jc exec_32bit RESTORE_TOP_OF_STACK %r11 @@ -441,6 +445,7 @@ ENTRY(stub_rt_sigreturn) SAVE_REST movq %rsp,%rdi FIXUP_TOP_OF_STACK %r11 + movq %gs:pda_pcurrent,%r10 call sys_rt_sigreturn movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer RESTORE_REST @@ -494,6 +499,7 @@ ENTRY(stub_rt_sigreturn) je 1f swapgs 1: incl %gs:pda_irqcount # RED-PEN should check preempt count + movq %gs:pda_pcurrent,%r10 movq %gs:pda_irqstackptr,%rax cmoveq %rax,%rsp /*todo This needs CFI annotation! */ pushq %rdi # save old stack @@ -516,19 +522,19 @@ ret_from_intr: #endif leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */ exit_intr: - GET_THREAD_INFO(%rcx) + GET_THREAD_INFO(%r10) testl $3,CS-ARGOFFSET(%rsp) je retint_kernel /* Interrupt came from user space */ /* * Has a correct top of stack, but a partial stack frame - * %rcx: thread info. Interrupts off. + * %r10: thread info. Interrupts off. */ retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful @@ -559,10 +565,10 @@ retint_careful: sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 + movq %gs:pda_pcurrent,%r10 call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 - GET_THREAD_INFO(%rcx) cli jmp retint_check @@ -574,24 +580,25 @@ retint_signal: movq $-1,ORIG_RAX(%rsp) xorl %esi,%esi # oldset movq %rsp,%rdi # &pt_regs + movq %gs:pda_pcurrent,%r10 call do_notify_resume RESTORE_REST cli movl $_TIF_NEED_RESCHED,%edi - GET_THREAD_INFO(%rcx) jmp retint_check #ifdef CONFIG_PREEMPT /* Returning to kernel space. Check if we need preemption */ - /* rcx: threadinfo. interrupts off. */ + /* r10: threadinfo. interrupts off. */ .p2align retint_kernel: - cmpl $0,threadinfo_preempt_count(%rcx) + cmpl $0,threadinfo_preempt_count(%r10) jnz retint_restore_args - bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) + bt $TIF_NEED_RESCHED,threadinfo_flags(%r10) jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args + movq %gs:pda_pcurrent,%r10 call preempt_schedule_irq jmp exit_intr #endif @@ -686,6 +693,7 @@ ENTRY(spurious_interrupt) 1: movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi movq $-1,ORIG_RAX(%rsp) + movq %gs:pda_pcurrent,%r10 call \sym cli .endm @@ -735,6 +743,7 @@ ENTRY(error_entry) error_swapgs: swapgs error_sti: + movq %gs:pda_pcurrent,%r10 movq %rdi,RDI(%rsp) movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi /* get error code */ @@ -745,10 +754,9 @@ error_exit: movl %ebx,%eax RESTORE_REST cli - GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful @@ -876,6 +884,7 @@ ENTRY(execve) CFI_STARTPROC FAKE_STACK_FRAME $0 SAVE_ALL + movq %gs:pda_pcurrent,%r10 call sys_execve movq %rax, RAX(%rsp) RESTORE_REST @@ -935,11 +944,11 @@ paranoid_restore: RESTORE_ALL 8 iretq paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx + movl threadinfo_flags(%r10),%ebx andl $_TIF_WORK_MASK,%ebx jz paranoid_swapgs movq %rsp,%rdi /* &pt_regs */ + /* doesn't need current loaded */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ testl $_TIF_NEED_RESCHED,%ebx diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index a472d62..42531a9 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 9da3edb..28b775f 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index d9b22b6..a5d7e16 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -95,7 +95,7 @@ int save_i387(struct _fpstate __user *bu if (!used_math()) return 0; clear_used_math(); /* trigger finit */ - if (tsk->thread_info->status & TS_USEDFPU) { + if (task_thread_info(tsk)->status & TS_USEDFPU) { err = save_i387_checking((struct i387_fxsave_struct __user *)buf); if (err) return err; stts(); diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c index d7e5d0c..9c4792c 100644 --- a/arch/x86_64/kernel/ldt.c +++ b/arch/x86_64/kernel/ldt.c @@ -94,7 +94,7 @@ static inline int copy_ldt(mm_context_t * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int x86_64_init_new_context(struct mm_struct *mm) { struct mm_struct * old_mm; int retval = 0; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 5afd63e..7237dad 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -185,6 +185,7 @@ static inline void play_dead(void) void cpu_idle (void) { set_thread_flag(TIF_POLLING_NRFLAG); + local_irq_disable(); /* endless idle loop with no priority at all */ while (1) { @@ -205,6 +206,7 @@ void cpu_idle (void) preempt_enable_no_resched(); schedule(); + local_irq_disable(); preempt_disable(); } } @@ -435,14 +437,16 @@ int copy_thread(int nr, unsigned long cl childregs->rax = 0; childregs->rsp = rsp; - if (rsp == ~0UL) + if (rsp == ~0UL) { + childregs->r10 = (long)p; childregs->rsp = (unsigned long)childregs; + } p->thread.rsp = (unsigned long) childregs; p->thread.rsp0 = (unsigned long) (childregs+1); p->thread.userrsp = me->thread.userrsp; - set_ti_thread_flag(p->thread_info, TIF_FORK); + set_ti_thread_flag(task_thread_info(p), TIF_FORK); p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; @@ -479,6 +483,7 @@ int copy_thread(int nr, unsigned long cl out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); + p->thread.io_bitmap_ptr = 0; p->thread.io_bitmap_max = 0; } return err; @@ -568,6 +573,7 @@ __switch_to(struct task_struct *prev_p, prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + current = next_p; write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 06dc354..fe6453f 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -125,23 +125,22 @@ void pda_init(int cpu) pda->cpunumber = cpu; pda->irqcount = -1; - pda->kernelstack = - (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; pda->active_mm = &init_mm; pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; + current = pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; } else { + current = pda->pcurrent; pda->irqstackptr = (char *) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!pda->irqstackptr) panic("cannot allocate irqstack for cpu %d", cpu); } - - + //__asm__ volatile("movq %0,%%r10" : "=g" (pda->pcurrent) :: "memory"); + pda->kernelstack = (long)current->thread_info + THREAD_SIZE - 64; pda->irqstackptr += IRQSTACKSIZE-64; } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 683c33f..a67160b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -746,6 +746,8 @@ static int __cpuinit do_boot_cpu(int cpu c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { + /* set the CPU for stack_smp_processor_id() */ + c_idle.idle->thread_info->cpu = cpu; c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); init_idle(c_idle.idle, cpu); @@ -785,7 +787,7 @@ do_rest: init_rsp = c_idle.idle->thread.rsp; per_cpu(init_tss,cpu).rsp0 = init_rsp; initial_code = start_secondary; - clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); + clear_ti_thread_flag(task_thread_info(c_idle.idle), TIF_FORK); printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, cpus_weight(cpu_present_map), diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index bf337f4..92d3cfd 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -91,7 +91,7 @@ static inline void conditional_sti(struc local_irq_enable(); } -static int kstack_depth_to_print = 10; +static char kstack_depth_to_print = 10; #ifdef CONFIG_KALLSYMS #include @@ -338,7 +338,7 @@ void out_of_line_bug(void) #endif static DEFINE_SPINLOCK(die_lock); -static int die_owner = -1; +static signed char die_owner = -1; unsigned long oops_begin(void) { diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 58b1921..e6422fa 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -194,6 +194,8 @@ SECTIONS #endif } + empty_zero_page_pfn = (empty_zero_page - LOAD_OFFSET) >> PAGE_SHIFT; + STABS_DEBUG DWARF_DEBUG diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index dfa358b..d9dc08c 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S @@ -15,11 +15,11 @@ .globl copy_to_user .p2align 4 copy_to_user: - GET_THREAD_INFO(%rax) + //GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx jc bad_to_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_to_user 2: .byte 0xe9 /* 32bit jump */ @@ -43,11 +43,11 @@ copy_to_user: .globl copy_from_user .p2align 4 copy_from_user: - GET_THREAD_INFO(%rax) + //GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx jc bad_from_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_from_user /* FALL THROUGH to copy_user_generic */ @@ -95,6 +95,7 @@ copy_user_generic: .previous .Lcug: pushq %rbx + pushq %r12 xorl %eax,%eax /*zero for the exception handler */ #ifdef FIX_ALIGNMENT @@ -117,20 +118,20 @@ copy_user_generic: .Ls1: movq (%rsi),%r11 .Ls2: movq 1*8(%rsi),%r8 .Ls3: movq 2*8(%rsi),%r9 -.Ls4: movq 3*8(%rsi),%r10 +.Ls4: movq 3*8(%rsi),%r12 .Ld1: movq %r11,(%rdi) .Ld2: movq %r8,1*8(%rdi) .Ld3: movq %r9,2*8(%rdi) -.Ld4: movq %r10,3*8(%rdi) +.Ld4: movq %r12,3*8(%rdi) .Ls5: movq 4*8(%rsi),%r11 .Ls6: movq 5*8(%rsi),%r8 .Ls7: movq 6*8(%rsi),%r9 -.Ls8: movq 7*8(%rsi),%r10 +.Ls8: movq 7*8(%rsi),%r12 .Ld5: movq %r11,4*8(%rdi) .Ld6: movq %r8,5*8(%rdi) .Ld7: movq %r9,6*8(%rdi) -.Ld8: movq %r10,7*8(%rdi) +.Ld8: movq %r12,7*8(%rdi) decq %rdx @@ -169,6 +170,7 @@ copy_user_generic: jnz .Lloop_1 .Lende: + popq %r12 popq %rbx ret diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S index 72fd55e..b3d69e5 100644 --- a/arch/x86_64/lib/csum-copy.S +++ b/arch/x86_64/lib/csum-copy.S @@ -60,12 +60,13 @@ csum_partial_copy_generic: jle .Lignore .Lignore: - subq $7*8,%rsp + subq $8*8,%rsp movq %rbx,2*8(%rsp) movq %r12,3*8(%rsp) movq %r14,4*8(%rsp) movq %r13,5*8(%rsp) - movq %rbp,6*8(%rsp) + movq %r15,6*8(%rsp) + movq %rbp,7*8(%rsp) movq %r8,(%rsp) movq %r9,1*8(%rsp) @@ -84,7 +85,7 @@ csum_partial_copy_generic: /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ - /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ + /* r15: temp5, rbp: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source @@ -97,7 +98,7 @@ csum_partial_copy_generic: movq 24(%rdi),%rdx source - movq 32(%rdi),%r10 + movq 32(%rdi),%r15 source movq 40(%rdi),%rbp source @@ -112,7 +113,7 @@ csum_partial_copy_generic: adcq %r8,%rax adcq %r11,%rax adcq %rdx,%rax - adcq %r10,%rax + adcq %r15,%rax adcq %rbp,%rax adcq %r14,%rax adcq %r13,%rax @@ -129,7 +130,7 @@ csum_partial_copy_generic: movq %rdx,24(%rsi) dest - movq %r10,32(%rsi) + movq %r15,32(%rsi) dest movq %rbp,40(%rsi) dest @@ -149,7 +150,7 @@ csum_partial_copy_generic: /* do last upto 56 bytes */ .Lhandle_tail: /* ecx: count */ - movl %ecx,%r10d + movl %ecx,%r15d andl $63,%ecx shrl $3,%ecx jz .Lfold @@ -176,7 +177,7 @@ csum_partial_copy_generic: /* do last upto 6 bytes */ .Lhandle_7: - movl %r10d,%ecx + movl %r15d,%ecx andl $7,%ecx shrl $1,%ecx jz .Lhandle_1 @@ -198,7 +199,7 @@ csum_partial_copy_generic: /* handle last odd byte */ .Lhandle_1: - testl $1,%r10d + testl $1,%r15d jz .Lende xorl %ebx,%ebx source @@ -213,8 +214,9 @@ csum_partial_copy_generic: movq 3*8(%rsp),%r12 movq 4*8(%rsp),%r14 movq 5*8(%rsp),%r13 - movq 6*8(%rsp),%rbp - addq $7*8,%rsp + movq 6*8(%rsp),%r15 + movq 7*8(%rsp),%rbp + addq $8*8,%rsp ret /* Exception handlers. Very simple, zeroing is done in the wrappers */ diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S index 3844d5e..0a01f59 100644 --- a/arch/x86_64/lib/getuser.S +++ b/arch/x86_64/lib/getuser.S @@ -36,8 +36,8 @@ .p2align 4 .globl __get_user_1 __get_user_1: - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + //GET_THREAD_INFO(%r10) + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_get_user 1: movzb (%rcx),%edx xorl %eax,%eax @@ -46,10 +46,10 @@ __get_user_1: .p2align 4 .globl __get_user_2 __get_user_2: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r10) addq $1,%rcx jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 20f decq %rcx 2: movzwl (%rcx),%edx @@ -61,10 +61,10 @@ __get_user_2: .p2align 4 .globl __get_user_4 __get_user_4: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r10) addq $3,%rcx jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 30f subq $3,%rcx 3: movl (%rcx),%edx @@ -76,10 +76,10 @@ __get_user_4: .p2align 4 .globl __get_user_8 __get_user_8: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r10) addq $7,%rcx jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 40f subq $7,%rcx 4: movq (%rcx),%rdx diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index 7f55939..d08b152 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S @@ -34,8 +34,8 @@ .p2align 4 .globl __put_user_1 __put_user_1: - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + //GET_THREAD_INFO(%r8) + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_put_user 1: movb %dl,(%rcx) xorl %eax,%eax @@ -44,10 +44,10 @@ __put_user_1: .p2align 4 .globl __put_user_2 __put_user_2: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 20f decq %rcx 2: movw %dx,(%rcx) @@ -59,10 +59,10 @@ __put_user_2: .p2align 4 .globl __put_user_4 __put_user_4: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 30f subq $3,%rcx 3: movl %edx,(%rcx) @@ -74,10 +74,10 @@ __put_user_4: .p2align 4 .globl __put_user_8 __put_user_8: - GET_THREAD_INFO(%r8) + //GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 40f subq $7,%rcx 4: movq %rdx,(%rcx) diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 3a63707..a7e62ca 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -190,7 +190,7 @@ KERN_ERR "******* Disabling USB legacy i static int is_errata93(struct pt_regs *regs, unsigned long address) { - static int warned; + static char warned; if (address != regs->rip) return 0; if ((address >> 32) != 0) @@ -236,7 +236,7 @@ static noinline void pgtable_bad(unsigne * * This assumes no large pages in there. */ -static int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -282,8 +282,8 @@ static int vmalloc_fault(unsigned long a return 0; } -int page_fault_trace = 0; -int exception_trace = 1; +char page_fault_trace = 0; +char exception_trace = 1; /* * This routine handles page faults. It determines the address, @@ -299,7 +299,6 @@ int exception_trace = 1; asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { - struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; @@ -321,8 +320,7 @@ asmlinkage void __kprobes do_page_fault( printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - tsk = current; - mm = tsk->mm; + mm = current->mm; info.si_code = SEGV_MAPERR; @@ -339,20 +337,8 @@ asmlinkage void __kprobes do_page_fault( * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 1) == 0. */ - if (unlikely(address >= TASK_SIZE64)) { - if (!(error_code & 5) && - ((address >= VMALLOC_START && address < VMALLOC_END) || - (address >= MODULES_VADDR && address < MODULES_END))) { - if (vmalloc_fault(address) < 0) - goto bad_area_nosemaphore; - return; - } - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. - */ - goto bad_area_nosemaphore; - } + if (unlikely(address >= TASK_SIZE64)) + goto vmalloc_address; if (unlikely(error_code & (1 << 3))) pgtable_bad(address, regs, error_code); @@ -430,10 +416,10 @@ good_area: */ switch (handle_mm_fault(mm, vma, address, write)) { case VM_FAULT_MINOR: - tsk->min_flt++; + current->min_flt++; break; case VM_FAULT_MAJOR: - tsk->maj_flt++; + current->maj_flt++; break; case VM_FAULT_SIGBUS: goto do_sigbus; @@ -444,6 +430,20 @@ good_area: up_read(&mm->mmap_sem); return; +vmalloc_address: + if (!(error_code & 5) && + ((address >= VMALLOC_START && address < VMALLOC_END) || + (address >= MODULES_VADDR && address < MODULES_END))) { + if (vmalloc_fault(address) < 0) + goto bad_area_nosemaphore; + return; + } + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. + */ + goto bad_area_nosemaphore; + /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. @@ -467,23 +467,23 @@ bad_area_nosemaphore: (address >> 32)) return; - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) { + if (exception_trace && unhandled_signal(current, SIGSEGV)) { printk( "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->rip, + current->pid > 1 ? KERN_INFO : KERN_EMERG, + current->comm, current->pid, address, regs->rip, regs->rsp, error_code); } - tsk->thread.cr2 = address; + current->thread.cr2 = address; /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; + current->thread.error_code = error_code | (address >= TASK_SIZE); + current->thread.trap_no = 14; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_info(SIGSEGV, &info, current); return; } @@ -537,7 +537,7 @@ out_of_memory: yield(); goto again; } - printk("VM: killing process %s\n", tsk->comm); + printk("VM: killing process %s\n", current->comm); if (error_code & 4) do_exit(SIGKILL); goto no_context; @@ -549,14 +549,14 @@ do_sigbus: if (!(error_code & 4)) goto no_context; - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; + current->thread.cr2 = address; + current->thread.error_code = error_code; + current->thread.trap_no = 14; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_info(SIGBUS, &info, current); return; } diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 286f6a6..d213e5a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -560,10 +560,10 @@ int kern_addr_valid(unsigned long addr) #ifdef CONFIG_SYSCTL #include -extern int exception_trace, page_fault_trace; +extern char exception_trace, page_fault_trace; static ctl_table debug_table2[] = { - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, + { 99, "exception-trace", &exception_trace, sizeof(char), 0644, NULL, proc_dointvec }, { 0, } }; diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 91dd669..d00fa93 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -633,9 +633,6 @@ static ssize_t read_zero(struct file * f if (!count) return 0; - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - left = count; /* do we want to be clever? Arbitrary cut-off */ diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c diff --git a/fs/aio.c b/fs/aio.c index 5a28b69..01629d8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -296,19 +296,18 @@ static void aio_cancel_all(struct kioctx static void wait_for_all_aios(struct kioctx *ctx) { - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); + DECLARE_WAITQUEUE(wait, current); if (!ctx->reqs_active) return; add_wait_queue(&ctx->wait, &wait); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_task_state(current, TASK_UNINTERRUPTIBLE); while (ctx->reqs_active) { schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_task_state(current, TASK_UNINTERRUPTIBLE); } - __set_task_state(tsk, TASK_RUNNING); + __set_task_state(current, TASK_RUNNING); remove_wait_queue(&ctx->wait, &wait); } @@ -574,20 +573,19 @@ struct kioctx *lookup_ioctx(unsigned lon static void use_mm(struct mm_struct *mm) { struct mm_struct *active_mm; - struct task_struct *tsk = current; - task_lock(tsk); - tsk->flags |= PF_BORROWED_MM; - active_mm = tsk->active_mm; + task_lock(current); + current->flags |= PF_BORROWED_MM; + active_mm = current->active_mm; atomic_inc(&mm->mm_count); - tsk->mm = mm; - tsk->active_mm = mm; + current->mm = mm; + current->active_mm = mm; /* * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise * it won't work. Update it accordingly if you change it here */ activate_mm(active_mm, mm); - task_unlock(tsk); + task_unlock(current); mmdrop(active_mm); } @@ -605,14 +603,12 @@ static void use_mm(struct mm_struct *mm) */ static void unuse_mm(struct mm_struct *mm) { - struct task_struct *tsk = current; - - task_lock(tsk); - tsk->flags &= ~PF_BORROWED_MM; - tsk->mm = NULL; + task_lock(current); + current->flags &= ~PF_BORROWED_MM; + current->mm = NULL; /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); + enter_lazy_tlb(mm, current); + task_unlock(current); } /* @@ -1109,8 +1105,7 @@ static int read_events(struct kioctx *ct struct timespec __user *timeout) { long start_jiffies = jiffies; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); + DECLARE_WAITQUEUE(wait, current); int ret; int i = 0; struct io_event ent; @@ -1171,7 +1166,7 @@ retry: while (likely(i < nr)) { add_wait_queue_exclusive(&ctx->wait, &wait); do { - set_task_state(tsk, TASK_INTERRUPTIBLE); + set_task_state(current, TASK_INTERRUPTIBLE); ret = aio_read_evt(ctx, &ent); if (ret) break; @@ -1181,14 +1176,14 @@ retry: if (to.timed_out) /* Only check after read evt */ break; schedule(); - if (signal_pending(tsk)) { + if (signal_pending(current)) { ret = -EINTR; break; } /*ret = aio_read_evt(ctx, &ent);*/ } while (1) ; - set_task_state(tsk, TASK_RUNNING); + set_task_state(current, TASK_RUNNING); remove_wait_queue(&ctx->wait, &wait); if (unlikely(ret <= 0)) diff --git a/fs/bad_inode.c b/fs/bad_inode.c index e172180..4b238e9 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -22,7 +22,7 @@ static int return_EIO(void) #define EIO_ERROR ((void *) (return_EIO)) -static struct file_operations bad_file_ops = +struct file_operations bad_file_ops = { .llseek = EIO_ERROR, .aio_read = EIO_ERROR, diff --git a/fs/compat.c b/fs/compat.c index 8e71cdb..a1d6492 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1176,8 +1176,8 @@ static ssize_t compat_do_readv_writev(in goto out; } - ret = rw_verify_area(type, file, pos, tot_len); - if (ret) + ret = rw_verify_area(file, type, tot_len, pos); + if (unlikely(ret)) goto out; fnv = NULL; diff --git a/fs/dnotify.c b/fs/dnotify.c index f3b540d..4e2cf21 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -153,12 +153,12 @@ EXPORT_SYMBOL(__inode_dir_notify); * To safely access ->d_parent we need to keep d_move away from it. Use the * dentry's d_lock for this. */ -void dnotify_parent(struct dentry *dentry, unsigned long event) +struct dentry *__dnotify_parent(struct dentry *dentry, unsigned long event) { struct dentry *parent; if (!dir_notify_enable) - return; + return dentry; spin_lock(&dentry->d_lock); parent = dentry->d_parent; @@ -170,6 +170,7 @@ void dnotify_parent(struct dentry *dentr } else { spin_unlock(&dentry->d_lock); } + return dentry; } EXPORT_SYMBOL_GPL(dnotify_parent); diff --git a/fs/exec.c b/fs/exec.c index c466fec..f42f2a6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -183,17 +183,21 @@ static int count(char __user * __user * for (;;) { char __user * p; - if (get_user(p, argv)) - return -EFAULT; + if (unlikely(get_user(p, argv))) + goto out_efault; if (!p) break; argv++; - if(++i > max) - return -E2BIG; + if (unlikely(++i > max)) + goto out_e2big; cond_resched(); } } return i; +out_efault: + return -EFAULT; +out_e2big: + return -E2BIG; } /* @@ -539,20 +543,18 @@ EXPORT_SYMBOL(kernel_read); static int exec_mmap(struct mm_struct *mm) { - struct task_struct *tsk; struct mm_struct * old_mm, *active_mm; /* Notify parent that we're no longer interested in the old VM */ - tsk = current; old_mm = current->mm; - mm_release(tsk, old_mm); + mm_release(current, old_mm); if (old_mm) { /* * Make sure that if there is a core dump in progress * for the old mm, we get out and die instead of going * through with the exec. We must hold mmap_sem around - * checking core_waiters and changing tsk->mm. The + * checking core_waiters and changing current->mm. The * core-inducing thread will increment core_waiters for * each thread whose ->mm == old_mm. */ @@ -562,12 +564,12 @@ static int exec_mmap(struct mm_struct *m return -EINTR; } } - task_lock(tsk); - active_mm = tsk->active_mm; - tsk->mm = mm; - tsk->active_mm = mm; + task_lock(current); + active_mm = current->active_mm; + current->mm = mm; + current->active_mm = mm; activate_mm(active_mm, mm); - task_unlock(tsk); + task_unlock(current); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); @@ -828,11 +830,11 @@ void get_task_comm(char *buf, struct tas task_unlock(tsk); } -void set_task_comm(struct task_struct *tsk, char *buf) +void set_current_task_comm(char *buf) { - task_lock(tsk); - strlcpy(tsk->comm, buf, sizeof(tsk->comm)); - task_unlock(tsk); + task_lock(current); + strlcpy(current->comm, buf, sizeof(current->comm)); + task_unlock(current); } int flush_old_exec(struct linux_binprm * bprm) @@ -890,7 +892,7 @@ int flush_old_exec(struct linux_binprm * tcomm[i++] = ch; } tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_current_task_comm(tcomm); current->flags &= ~PF_RANDOMIZE; flush_thread(); @@ -974,18 +976,18 @@ int prepare_binprm(struct linux_binprm * EXPORT_SYMBOL(prepare_binprm); -static inline int unsafe_exec(struct task_struct *p) +static inline int unsafe_exec(void) { int unsafe = 0; - if (p->ptrace & PT_PTRACED) { - if (p->ptrace & PT_PTRACE_CAP) + if (current->ptrace & PT_PTRACED) { + if (current->ptrace & PT_PTRACE_CAP) unsafe |= LSM_UNSAFE_PTRACE_CAP; else unsafe |= LSM_UNSAFE_PTRACE; } - if (atomic_read(&p->fs->count) > 1 || - atomic_read(&p->files->count) > 1 || - atomic_read(&p->sighand->count) > 1) + if (atomic_read(¤t->fs->count) > 1 || + atomic_read(¤t->files->count) > 1 || + atomic_read(¤t->sighand->count) > 1) unsafe |= LSM_UNSAFE_SHARE; return unsafe; @@ -1000,7 +1002,7 @@ void compute_creds(struct linux_binprm * exec_keys(current); task_lock(current); - unsafe = unsafe_exec(current); + unsafe = unsafe_exec(); security_bprm_apply_creds(bprm, unsafe); task_unlock(current); security_bprm_post_apply_creds(bprm); @@ -1147,10 +1149,9 @@ int do_execve(char * filename, int i; retval = -ENOMEM; - bprm = kmalloc(sizeof(*bprm), GFP_KERNEL); + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) goto out_ret; - memset(bprm, 0, sizeof(*bprm)); file = open_exec(filename); retval = PTR_ERR(file); @@ -1376,8 +1377,7 @@ static void format_corename(char *corena static void zap_threads (struct mm_struct *mm) { struct task_struct *g, *p; - struct task_struct *tsk = current; - struct completion *vfork_done = tsk->vfork_done; + struct completion *vfork_done = current->vfork_done; int traced = 0; /* @@ -1385,13 +1385,13 @@ static void zap_threads (struct mm_struc * otherwise we can deadlock when we wait on each other */ if (vfork_done) { - tsk->vfork_done = NULL; + current->vfork_done = NULL; complete(vfork_done); } read_lock(&tasklist_lock); do_each_thread(g,p) - if (mm == p->mm && p != tsk) { + if (mm == p->mm && p != current) { force_sig_specific(SIGKILL, p); mm->core_waiters++; if (unlikely(p->ptrace) && @@ -1411,7 +1411,7 @@ static void zap_threads (struct mm_struc */ write_lock_irq(&tasklist_lock); do_each_thread(g,p) { - if (mm == p->mm && p != tsk && + if (mm == p->mm && p != current && p->ptrace && p->parent->mm == mm) { __ptrace_unlink(p); } diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 98e7834..dea04d9 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -107,7 +107,7 @@ force_commit: struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, + .read = generic_file_read, .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = ext3_file_write, diff --git a/fs/file_table.c b/fs/file_table.c index c3a5e2f..0fa6460 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -71,6 +71,7 @@ static inline void file_free(struct file */ struct file *get_empty_filp(void) { + extern struct file_operations bad_file_ops; static int old_max; struct file * f; @@ -91,6 +92,7 @@ struct file *get_empty_filp(void) eventpoll_init_file(f); atomic_set(&f->f_count, 1); + f->f_op = &bad_file_ops; f->f_uid = current->fsuid; f->f_gid = current->fsgid; rwlock_init(&f->f_owner.lock); @@ -186,27 +188,27 @@ EXPORT_SYMBOL(fget); * and a flag is returned to be passed to the corresponding fput_light(). * There must not be a cloning between an fget_light/fput_light pair. */ -struct file fastcall *fget_light(unsigned int fd, int *fput_needed) +struct file fastcall *__fget_light(unsigned int fd) { struct file *file; struct files_struct *files = current->files; - *fput_needed = 0; if (likely((atomic_read(&files->count) == 1))) { file = fcheck_files(files, fd); } else { rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (rcuref_inc_lf(&file->f_count)) - *fput_needed = 1; - else + if (!rcuref_inc_lf(&file->f_count)) /* Didn't get the reference, someone's freed */ - file = NULL; + file = ERR_PTR(-EBADF); } rcu_read_unlock(); } + if (!file) + file = ERR_PTR(-EBADF); + return file; } diff --git a/fs/inotify.c b/fs/inotify.c index bf7ce1d..bc957e8 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -935,7 +935,7 @@ asmlinkage long sys_inotify_add_watch(in int mask_add = 0; filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + if (unlikely(IS_ERR(filp))) return -EBADF; /* verify that this is indeed an inotify instance */ @@ -1005,7 +1005,7 @@ asmlinkage long sys_inotify_rm_watch(int int ret, fput_needed; filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) + if (unlikely(IS_ERR(filp))) return -EBADF; /* verify that this is indeed an inotify instance */ diff --git a/fs/ioctl.c b/fs/ioctl.c index 5692091..6c455b4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -163,7 +163,7 @@ asmlinkage long sys_ioctl(unsigned int f int fput_needed; filp = fget_light(fd, &fput_needed); - if (!filp) + if (IS_ERR(filp)) goto out; error = security_file_ioctl(filp, cmd, arg); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c diff --git a/fs/read_write.c b/fs/read_write.c index a091ee4..43aba38 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -126,25 +126,25 @@ EXPORT_SYMBOL(vfs_llseek); asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) { - off_t retval; + off_t ret; struct file * file; - int fput_needed; - retval = -EBADF; file = fget_light(fd, &fput_needed); - if (!file) + if (IS_ERR(file)) { + ret = PTR_ERR(file); goto bad; + } - retval = -EINVAL; + ret = -EINVAL; if (origin <= 2) { loff_t res = vfs_llseek(file, offset, origin); - retval = res; - if (res != (loff_t)retval) - retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ + ret = res; + if (res != (loff_t)ret) + ret = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } fput_light(file, fput_needed); bad: - return retval; + return ret; } #ifdef __ARCH_WANT_SYS_LLSEEK @@ -152,38 +152,37 @@ asmlinkage long sys_llseek(unsigned int unsigned long offset_low, loff_t __user * result, unsigned int origin) { - int retval; + int ret; struct file * file; loff_t offset; - int fput_needed; - retval = -EBADF; + ret = -EBADF; file = fget_light(fd, &fput_needed); - if (!file) + if (IS_ERR(file)) goto bad; - retval = -EINVAL; + ret = -EINVAL; if (origin > 2) goto out_putf; offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, origin); - retval = (int)offset; + ret = (int)offset; if (offset >= 0) { - retval = -EFAULT; + ret = -EFAULT; if (!copy_to_user(result, &offset, sizeof(offset))) - retval = 0; + ret = 0; } out_putf: fput_light(file, fput_needed); bad: - return retval; + return ret; } #endif -int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) +long rw_verify_area(struct file *file, int read_write, size_t count, loff_t *ppos) { struct inode *inode; loff_t pos; @@ -218,6 +217,10 @@ ssize_t do_sync_read(struct file *filp, struct kiocb kiocb; ssize_t ret; + ret = -EINVAL; + if (unlikely(!filp->f_op->aio_read)) + goto out; + init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; while (-EIOCBRETRY == @@ -227,6 +230,7 @@ ssize_t do_sync_read(struct file *filp, if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; +out: return ret; } @@ -236,21 +240,21 @@ ssize_t vfs_read(struct file *file, char { ssize_t ret; - if (!(file->f_mode & FMODE_READ)) - return -EBADF; - if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) - return -EINVAL; + if (unlikely(!(file->f_mode & FMODE_READ))) + goto out_ebadf; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) - return -EFAULT; + goto out_efault; - ret = rw_verify_area(READ, file, pos, count); - if (!ret) { + ret = rw_verify_area(file, READ, count, pos); + if (likely(!ret)) { ret = security_file_permission (file, MAY_READ); - if (!ret) { - if (file->f_op->read) - ret = file->f_op->read(file, buf, count, pos); - else - ret = do_sync_read(file, buf, count, pos); + if (likely(!ret)) { + ssize_t (*read)(struct file *, char __user *, size_t, + loff_t *); + read = file->f_op->read; + if (unlikely(!read)) + read = do_sync_read; + ret = read(file, buf, count, pos); if (ret > 0) { fsnotify_access(file->f_dentry); current->rchar += ret; @@ -260,6 +264,10 @@ ssize_t vfs_read(struct file *file, char } return ret; +out_ebadf: + return -EBADF; +out_efault: + return -EFAULT; } EXPORT_SYMBOL(vfs_read); @@ -269,6 +277,9 @@ ssize_t do_sync_write(struct file *filp, struct kiocb kiocb; ssize_t ret; + ret = -EINVAL; + if (unlikely(!filp->f_op->aio_write)) + goto out; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; while (-EIOCBRETRY == @@ -278,6 +289,7 @@ ssize_t do_sync_write(struct file *filp, if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; +out: return ret; } @@ -287,21 +299,21 @@ ssize_t vfs_write(struct file *file, con { ssize_t ret; - if (!(file->f_mode & FMODE_WRITE)) - return -EBADF; - if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) - return -EINVAL; + if (unlikely(!(file->f_mode & FMODE_WRITE))) + goto out_ebadf; if (unlikely(!access_ok(VERIFY_READ, buf, count))) - return -EFAULT; + goto out_efault; - ret = rw_verify_area(WRITE, file, pos, count); + ret = rw_verify_area(file, WRITE, count, pos); if (!ret) { ret = security_file_permission (file, MAY_WRITE); if (!ret) { - if (file->f_op->write) - ret = file->f_op->write(file, buf, count, pos); - else - ret = do_sync_write(file, buf, count, pos); + ssize_t (*write)(struct file *, const char __user *, + size_t, loff_t *); + write = file->f_op->write; + if (unlikely(!write)) + write = do_sync_write; /* Returns -EINVAL if no op */ + ret = write(file, buf, count, pos); if (ret > 0) { fsnotify_modify(file->f_dentry); current->wchar += ret; @@ -311,6 +323,10 @@ ssize_t vfs_write(struct file *file, con } return ret; +out_ebadf: + return -EBADF; +out_efault: + return -EFAULT; } EXPORT_SYMBOL(vfs_write); @@ -328,36 +344,36 @@ static inline void file_pos_write(struct asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) { struct file *file; - ssize_t ret = -EBADF; - int fput_needed; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { + ssize_t ret; loff_t pos = file_pos_read(file); ret = vfs_read(file, buf, count, &pos); file_pos_write(file, pos); fput_light(file, fput_needed); + return ret; } - return ret; + return PTR_ERR(file); } EXPORT_SYMBOL_GPL(sys_read); asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) { struct file *file; - ssize_t ret = -EBADF; - int fput_needed; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { + ssize_t ret; loff_t pos = file_pos_read(file); ret = vfs_write(file, buf, count, &pos); file_pos_write(file, pos); fput_light(file, fput_needed); - } + return ret; + } - return ret; + return PTR_ERR(file); } asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, @@ -365,13 +381,12 @@ asmlinkage ssize_t sys_pread64(unsigned { struct file *file; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { ret = -ESPIPE; if (file->f_mode & FMODE_PREAD) ret = vfs_read(file, buf, count, &pos); @@ -386,13 +401,12 @@ asmlinkage ssize_t sys_pwrite64(unsigned { struct file *file; ssize_t ret = -EBADF; - int fput_needed; if (pos < 0) return -EINVAL; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { ret = -ESPIPE; if (file->f_mode & FMODE_PWRITE) ret = vfs_write(file, buf, count, &pos); @@ -496,7 +510,7 @@ static ssize_t do_readv_writev(int type, goto out; } - ret = rw_verify_area(type, file, pos, tot_len); + ret = rw_verify_area(file, type, tot_len, pos); if (ret) goto out; ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); @@ -585,10 +599,9 @@ sys_readv(unsigned long fd, const struct { struct file *file; ssize_t ret = -EBADF; - int fput_needed; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { loff_t pos = file_pos_read(file); ret = vfs_readv(file, vec, vlen, &pos); file_pos_write(file, pos); @@ -606,10 +619,9 @@ sys_writev(unsigned long fd, const struc { struct file *file; ssize_t ret = -EBADF; - int fput_needed; file = fget_light(fd, &fput_needed); - if (file) { + if (!IS_ERR(file)) { loff_t pos = file_pos_read(file); ret = vfs_writev(file, vec, vlen, &pos); file_pos_write(file, pos); @@ -628,91 +640,90 @@ static ssize_t do_sendfile(int out_fd, i struct file * in_file, * out_file; struct inode * in_inode, * out_inode; loff_t pos; - ssize_t retval; - int fput_needed_in, fput_needed_out; + ssize_t ret; /* * Get input file, and verify that it is ok.. */ - retval = -EBADF; + ret = -EBADF; in_file = fget_light(in_fd, &fput_needed_in); - if (!in_file) + if (IS_ERR(in_file)) goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; - retval = -EINVAL; + ret = -EINVAL; in_inode = in_file->f_dentry->d_inode; if (!in_inode) goto fput_in; if (!in_file->f_op || !in_file->f_op->sendfile) goto fput_in; - retval = -ESPIPE; + ret = -ESPIPE; if (!ppos) ppos = &in_file->f_pos; else if (!(in_file->f_mode & FMODE_PREAD)) goto fput_in; - retval = rw_verify_area(READ, in_file, ppos, count); - if (retval) + ret = rw_verify_area(in_file, READ, count, ppos); + if (ret) goto fput_in; - retval = security_file_permission (in_file, MAY_READ); - if (retval) + ret = security_file_permission (in_file, MAY_READ); + if (ret) goto fput_in; /* * Get output file, and verify that it is ok.. */ - retval = -EBADF; + ret = -EBADF; out_file = fget_light(out_fd, &fput_needed_out); - if (!out_file) + if (IS_ERR(out_file)) goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; - retval = -EINVAL; + ret = -EINVAL; if (!out_file->f_op || !out_file->f_op->sendpage) goto fput_out; out_inode = out_file->f_dentry->d_inode; - retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); - if (retval) + ret = rw_verify_area(out_file, WRITE, count, &out_file->f_pos); + if (ret) goto fput_out; - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) + ret = security_file_permission (out_file, MAY_WRITE); + if (ret) goto fput_out; if (!max) max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); pos = *ppos; - retval = -EINVAL; + ret = -EINVAL; if (unlikely(pos < 0)) goto fput_out; if (unlikely(pos + count > max)) { - retval = -EOVERFLOW; + ret = -EOVERFLOW; if (pos >= max) goto fput_out; count = max - pos; } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); - if (retval > 0) { - current->rchar += retval; - current->wchar += retval; + if (ret > 0) { + current->rchar += ret; + current->wchar += ret; } current->syscr++; current->syscw++; if (*ppos > max) - retval = -EOVERFLOW; + ret = -EOVERFLOW; fput_out: fput_light(out_file, fput_needed_out); fput_in: fput_light(in_file, fput_needed_in); out: - return retval; + return ret; } asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 0866ef6..e25322c 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -31,7 +31,9 @@ typedef struct { volatile int counter; } * * Atomically reads the value of @v. */ -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) ({ atomic_t *a = (v); \ + __asm__ volatile("# atomic read: %0\n" : "+m" (*a) :: "memory"); \ + a->counter; }) /** * atomic_set - set atomic variable diff --git a/include/asm-x86_64/bug.h b/include/asm-x86_64/bug.h index 80ac1fe..95f2dc5 100644 --- a/include/asm-x86_64/bug.h +++ b/include/asm-x86_64/bug.h @@ -2,6 +2,7 @@ #define __ASM_X8664_BUG_H 1 #include +#include /* * Tell the user there is some problem. The exception handler decodes @@ -25,7 +26,7 @@ struct bug_frame { asm volatile( \ "ud2 ; pushq $%c1 ; ret $%c0" :: \ "i"(__LINE__), "i" (__FILE__)) -void out_of_line_bug(void); +void out_of_line_bug(void) ATTRIB_NORET; #else static inline void out_of_line_bug(void) { } #endif diff --git a/include/asm-x86_64/calling.h b/include/asm-x86_64/calling.h diff --git a/include/asm-x86_64/current.h b/include/asm-x86_64/current.h index bc8adec..6675f2d 100644 --- a/include/asm-x86_64/current.h +++ b/include/asm-x86_64/current.h @@ -6,13 +6,7 @@ struct task_struct; #include -static inline struct task_struct *get_current(void) -{ - struct task_struct *t = read_pda(pcurrent); - return t; -} - -#define current get_current() +register struct task_struct *current __asm__("%r10"); #else diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index 3376486..ece54d4 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -9,6 +9,7 @@ #include #include +#include #include #include diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index aa39cfd..ae8b4c1 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h @@ -30,7 +30,7 @@ extern int save_i387(struct _fpstate __u */ #define unlazy_fpu(tsk) do { \ - if ((tsk)->thread_info->status & TS_USEDFPU) \ + if (task_thread_info(tsk)->status & TS_USEDFPU) \ save_init_fpu(tsk); \ } while (0) @@ -46,9 +46,9 @@ static inline void tolerant_fwait(void) } #define clear_fpu(tsk) do { \ - if ((tsk)->thread_info->status & TS_USEDFPU) { \ + if (task_thread_info(tsk)->status & TS_USEDFPU) { \ tolerant_fwait(); \ - (tsk)->thread_info->status &= ~TS_USEDFPU; \ + task_thread_info(tsk)->status &= ~TS_USEDFPU; \ stts(); \ } \ } while (0) @@ -135,7 +135,7 @@ static inline void save_init_fpu( struct { asm volatile( "rex64 ; fxsave %0 ; fnclex" : "=m" (tsk->thread.i387.fxsave)); - tsk->thread_info->status &= ~TS_USEDFPU; + task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h index f604e84..8122e52 100644 --- a/include/asm-x86_64/kdebug.h +++ b/include/asm-x86_64/kdebug.h @@ -1,6 +1,7 @@ #ifndef _X86_64_KDEBUG_H #define _X86_64_KDEBUG_H 1 +#include #include struct pt_regs; @@ -37,8 +38,13 @@ enum die_val { static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig) { - struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig }; - return notifier_call_chain(&die_chain, val, &args); + if (unlikely(die_chain)) { + struct die_args args = { + .regs=regs, .str=str, .err=err, .trapnr=trap, + .signr=sig + }; + return notifier_call_chain(&die_chain, val, &args); + } } extern int printk_address(unsigned long address); diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h index b630d52..8079878 100644 --- a/include/asm-x86_64/mmu_context.h +++ b/include/asm-x86_64/mmu_context.h @@ -12,7 +12,8 @@ /* * possibly do the LDT unload here? */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +int x86_64_init_new_context(struct mm_struct *mm); +#define init_new_context(tsk, mm) x86_64_init_new_context(mm) void destroy_context(struct mm_struct *mm); #ifdef CONFIG_SMP diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index ecf58c7..4a3fd8f 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -30,9 +30,14 @@ extern unsigned long pgkern_mask; /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. + * The variable empty_zero_page_pfn is linker magic to avoid runtime + * calculations of the zero page constant. */ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +extern unsigned long empty_zero_page_pfn; +#define ZERO_PAGE_PFN ((long)&empty_zero_page_pfn) +#define IS_ZERO_PAGE_PFN(pfn,vaddr) ((pfn) == ZERO_PAGE_PFN) +#define ZERO_PAGE(vaddr) (pfn_to_page(ZERO_PAGE_PFN)) /* * PGDIR_SHIFT determines what a top-level page table entry can map @@ -450,4 +455,7 @@ extern int kern_addr_valid(unsigned long #define __HAVE_ARCH_PTE_SAME #include +//#define __mod_page_state(offset, delta) add_pda_offset(per_cpu__page_states,offset,delta) +#define __mod_page_state(offset, delta) (*((unsigned long *)&__get_cpu_var(page_states)) += delta) + #endif /* _X86_64_PGTABLE_H */ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 4861246..7f24beb 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include #define TF_MASK 0x00000100 #define IF_MASK 0x00000200 @@ -230,6 +231,7 @@ DECLARE_PER_CPU(struct tss_struct,init_t #define ARCH_MIN_TASKALIGN 16 struct thread_struct { + struct thread_info info; unsigned long rsp0; unsigned long rsp; unsigned long userrsp; /* Copy from PDA */ @@ -257,6 +259,7 @@ struct thread_struct { } __attribute__((aligned(16))); #define INIT_THREAD { \ + .info = INIT_THREAD_INFO(init_task), \ .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ } @@ -467,13 +470,6 @@ static inline void __mwait(unsigned long : :"a" (eax), "c" (ecx)); } -#define stack_current() \ -({ \ - struct thread_info *ti; \ - asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->task; \ -}) - #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern unsigned long boot_option_idle_override; diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 3450108..699ff9f 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -96,7 +96,7 @@ extern void swiotlb_init(void); extern unsigned long table_start, table_end; -extern int exception_trace; +extern char exception_trace, page_fault_trace; extern int force_iommu, no_iommu; extern int using_apic_timer; extern int disable_apic; diff --git a/include/asm-x86_64/signal.h b/include/asm-x86_64/signal.h diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 85348e0..21e02bb 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -20,8 +20,8 @@ #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" /* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushfq ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popfq\n\t" +#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\n\t" #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" @@ -33,16 +33,14 @@ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + LOCK "btr %[tif_fork],%P[ti_flags](%%r10)\n\t" \ "movq %%rax,%%rdi\n\t" \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)),\ + [ti_flags] "i" (offsetof(struct task_struct, thread.info.flags)),\ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index 08eb6e4..e849bcf 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h @@ -57,20 +57,16 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~(THREAD_SIZE - 1))); - return ti; -} +#define task_thread_info(t) (&(t)->thread.info) +#define current_thread_info() task_thread_info(current) + +#define setup_thread_stack(p, org) ({ \ + task_thread_info(p)->task = (p); \ +}) + +#define end_of_stack(p) ((unsigned long *)(p)->thread_info + 1) + +#define __HAVE_THREAD_FUNCTIONS /* thread information allocation */ #define alloc_thread_info(tsk) \ @@ -79,12 +75,12 @@ static inline struct thread_info *stack_ #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) + #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq %gs:pda_pcurrent,reg #endif diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h index 1bb8b8a..5551666 100644 --- a/include/asm-x86_64/uaccess.h +++ b/include/asm-x86_64/uaccess.h @@ -39,13 +39,39 @@ * Uhhuh, this needs 65-bit arithmetic. We have a carry.. */ #define __range_not_ok(addr,size) ({ \ - unsigned long flag,sum; \ - __chk_user_ptr(addr); \ - asm("# range_ok\n\r" \ - "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ - :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((long)(size)),"g" (current_thread_info()->addr_limit.seg)); \ - flag; }) + unsigned long flag,sum; \ + __chk_user_ptr(addr); \ + asm("# range_ok\n\r" \ + "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ + :"=&r" (flag), "=r" (sum) \ + :"1" (addr),"g" ((long)(size)),"g" (current_thread_info()->addr_limit.seg)); \ + flag; }) + +static inline int my__range_not_ok(const void __user *_addr, unsigned long size) +{ + unsigned long addr = (unsigned long)_addr; + __chk_user_ptr(addr); + /* this only works for addr_limit == 0x80 << 56 or -1 */ + if ((addr | size | (addr + size)) > current_thread_info()->addr_limit.seg) + return 1; + return 0; +} + +static inline int test__range_not_ok(const void __user *addr, unsigned long size) +{ + int a, b; + + a = __range_not_ok(addr, size); + b = my__range_not_ok(addr, size); + + if ((a != b) || a) { + static unsigned long test; + if (test++ < 5) + printk("test_range_not_ok: 0x%p 0x%08lx limit = 0x%08lx\n %d %d", + addr, size, current_thread_info()->addr_limit.seg, a, b); + } + return a; +} #define access_ok(type, addr, size) (__range_not_ok(addr,size) == 0) diff --git a/include/linux/capability.h b/include/linux/capability.h diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index f134a01..cc8bf26 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -25,7 +25,16 @@ struct dnotify_struct { extern void __inode_dir_notify(struct inode *, unsigned long); extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); -extern void dnotify_parent(struct dentry *, unsigned long); +extern struct dentry *__dnotify_parent(struct dentry *, unsigned long); + +extern int dir_notify_enable; + +static inline struct dentry *dnotify_parent(struct dentry *dentry, unsigned long event) +{ + if (!dir_notify_enable) + return dentry; + return __dnotify_parent(dentry, event); +} static inline void inode_dir_notify(struct inode *inode, unsigned long event) { diff --git a/include/linux/file.h b/include/linux/file.h index 418b610..42fbedd 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -47,14 +47,14 @@ struct files_struct { extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); -static inline void fput_light(struct file *file, int fput_needed) -{ - if (unlikely(fput_needed)) - fput(file); -} +#define fput_light(file, fput_needed) do { \ + if (unlikely(atomic_read(¤t->files->count) != 1)) \ + fput(file); \ +} while (0) extern struct file * FASTCALL(fget(unsigned int fd)); -extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed)); +extern struct file * FASTCALL(__fget_light(unsigned int fd)); +#define fget_light(fd, needed) __fget_light(fd) extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag)); extern void put_filp(struct file *); extern int get_unused_fd(void); diff --git a/include/linux/fs.h b/include/linux/fs.h index cc35b6a..86a317a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -374,11 +374,9 @@ struct block_device { void * bd_holder; int bd_holders; struct block_device * bd_contains; - unsigned bd_block_size; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; struct gendisk * bd_disk; struct list_head bd_list; struct backing_dev_info *bd_inode_backing_dev_info; @@ -389,6 +387,8 @@ struct block_device { * care to not mess up bd_private for that case. */ unsigned long bd_private; + unsigned bd_invalidated :1; + unsigned short bd_block_size; }; /* @@ -1288,7 +1288,7 @@ static inline int locks_verify_locked(st return 0; } -extern int rw_verify_area(int, struct file *, loff_t *, size_t); +extern long rw_verify_area(struct file *, int, size_t, loff_t *); static inline int locks_verify_truncate(struct inode *inode, struct file *filp, diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 03b8e79..877bea5 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -96,7 +96,7 @@ static inline void fsnotify_access(struc if (S_ISDIR(inode->i_mode)) mask |= IN_ISDIR; - dnotify_parent(dentry, DN_ACCESS); + dentry = dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL); } @@ -112,7 +112,7 @@ static inline void fsnotify_modify(struc if (S_ISDIR(inode->i_mode)) mask |= IN_ISDIR; - dnotify_parent(dentry, DN_MODIFY); + dentry = dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL); } diff --git a/include/linux/ipc.h b/include/linux/ipc.h diff --git a/include/linux/kernel.h b/include/linux/kernel.h diff --git a/include/linux/list.h b/include/linux/list.h diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34767c..75bf5b3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -137,7 +137,10 @@ extern void get_page_state(struct page_s extern void get_page_state_node(struct page_state *ret, int node); extern void get_full_page_state(struct page_state *ret); extern unsigned long __read_page_state(unsigned long offset); +#ifndef __mod_page_state extern void __mod_page_state(unsigned long offset, unsigned long delta); +#endif +extern DEFINE_PER_CPU(struct page_state, page_states); #define read_page_state(member) \ __read_page_state(offsetof(struct page_state, member)) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb8d2d2..4f16023 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -42,10 +42,7 @@ extern void free_percpu(const void *); static inline void *__alloc_percpu(size_t size, size_t align) { - void *ret = kmalloc(size, GFP_KERNEL); - if (ret) - memset(ret, 0, size); - return ret; + return kzalloc(size, GFP_KERNEL); } static inline void free_percpu(const void *ptr) { diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h diff --git a/include/linux/sched.h b/include/linux/sched.h index 2038bd2..d9932e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -484,7 +484,7 @@ struct signal_struct { * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. */ -#define MAX_USER_RT_PRIO 100 +#define MAX_USER_RT_PRIO 100 /* must fit in a signed char */ #define MAX_RT_PRIO MAX_USER_RT_PRIO #define MAX_PRIO (MAX_RT_PRIO + 40) @@ -690,21 +690,30 @@ struct task_struct { unsigned long flags; /* per process flags, defined below */ unsigned long ptrace; - int lock_depth; /* BKL lock depth */ - #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) int oncpu; #endif - int prio, static_prio; struct list_head run_list; prio_array_t *array; unsigned short ioprio; + int prio; + int static_prio; + + signed char lock_depth; /* BKL lock depth */ + signed char pdeath_signal; /* The signal sent when the parent dies */ + signed char oomkilladj; /* OOM kill score adjustment (bit shift). */ + signed char exit_signal; /* -1 or signal # */ + unsigned short exit_code; + unsigned keep_capabilities:1; + unsigned did_exec:1; + int activated:4; + + long exit_state; unsigned long sleep_avg; unsigned long long timestamp, last_ran; unsigned long long sched_time; /* sched_clock time spent running */ - int activated; unsigned long policy; cpumask_t cpus_allowed; @@ -726,12 +735,8 @@ struct task_struct { /* task state */ struct linux_binfmt *binfmt; - long exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ /* ??? */ unsigned long personality; - unsigned did_exec:1; pid_t pid; pid_t tgid; /* @@ -772,13 +777,11 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted; - unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif - int oomkilladj; /* OOM kill score adjustment (bit shift). */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -793,6 +796,8 @@ struct task_struct { struct fs_struct *fs; /* open file information */ struct files_struct *files; +/* i/o counters(bytes read/written, #syscalls */ + u64 rchar, wchar, syscr, syscw; /* namespace */ struct namespace *namespace; /* signal handlers */ @@ -840,8 +845,6 @@ struct task_struct { * to a stack based synchronous wait) if its doing sync IO. */ wait_queue_t *io_wait; -/* i/o counters(bytes read/written, #syscalls */ - u64 rchar, wchar, syscr, syscw; #if defined(CONFIG_BSD_PROCESS_ACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ @@ -1159,7 +1162,7 @@ extern int do_execve(char *, char __user extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); task_t *fork_idle(int); -extern void set_task_comm(struct task_struct *tsk, char *from); +extern void set_current_task_comm(char *from); extern void get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index dc89116..61eabc3 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -13,16 +13,13 @@ typedef struct { int mode; } seccomp_t; extern void __secure_computing(int); -static inline void secure_computing(int this_syscall) -{ - if (unlikely(test_thread_flag(TIF_SECCOMP))) - __secure_computing(this_syscall); -} +#define secure_computing(this_syscall) \ +do { \ + if (unlikely(test_thread_flag(TIF_SECCOMP))) \ + __secure_computing(this_syscall); \ +} while (0) -static inline int has_secure_computing(struct thread_info *ti) -{ - return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); -} +#define has_secure_computing(ti) unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)) #else /* CONFIG_SECCOMP */ diff --git a/include/linux/security.h b/include/linux/security.h index f7e0ae0..aa48844 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1120,7 +1120,7 @@ struct security_operations { int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags); int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); - int (*file_permission) (struct file * file, int mask); + long (*file_permission) (struct file * file, int mask); int (*file_alloc_security) (struct file * file); void (*file_free_security) (struct file * file); int (*file_ioctl) (struct file * file, unsigned int cmd, @@ -1637,7 +1637,7 @@ static inline int security_inode_listsec return security_ops->inode_listsecurity(inode, buffer, buffer_size); } -static inline int security_file_permission (struct file *file, int mask) +static inline long security_file_permission (struct file *file, int mask) { return security_ops->file_permission (file, mask); } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h diff --git a/include/linux/shm.h b/include/linux/shm.h diff --git a/include/linux/signal.h b/include/linux/signal.h diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0a8ea8b..f259119 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -476,7 +476,7 @@ static inline void skb_header_release(st * Returns true if more than one person has a reference to this * buffer. */ -static inline int skb_shared(const struct sk_buff *skb) +static inline int skb_shared(struct sk_buff *skb) { return atomic_read(&skb->users) != 1; } diff --git a/include/linux/smp.h b/include/linux/smp.h index 9dfa3ee..a341b4d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -57,19 +57,20 @@ extern int smp_call_function (void (*fun int retry, int wait); /* - * Call a function on all processors + * Call a function on all processors. + * This needs to be a macro to allow for arch specific dependances on + * sched.h in preempt_*(). */ -static inline int on_each_cpu(void (*func) (void *info), void *info, - int retry, int wait) -{ - int ret = 0; - - preempt_disable(); - ret = smp_call_function(func, info, retry, wait); - func(info); - preempt_enable(); - return ret; -} +#define on_each_cpu(func, info, retry, wait) \ +({ \ + int ret = 0; \ + \ + preempt_disable(); \ + ret = smp_call_function(func, info, retry, wait); \ + func(info); \ + preempt_enable(); \ + ret; \ +}) #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h diff --git a/include/linux/time.h b/include/linux/time.h diff --git a/include/linux/wait.h b/include/linux/wait.h diff --git a/include/net/sock.h b/include/net/sock.h index 982b4ec..b8a5d10 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1256,7 +1256,7 @@ static inline struct page *sk_stream_all /* * Default write policy as shown to user space via poll/select/SIGIO */ -static inline int sock_writeable(const struct sock *sk) +static inline int sock_writeable(struct sock *sk) { return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } diff --git a/include/net/tcp.h b/include/net/tcp.h index d78025f..b6a3f48 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1106,7 +1106,7 @@ static inline int tcp_win_from_space(int } /* Note: caller must be prepared to deal with negative returns */ -static inline int tcp_space(const struct sock *sk) +static inline int tcp_space(struct sock *sk) { return tcp_win_from_space(sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)); diff --git a/kernel/sched.c b/kernel/sched.c index 6f46c94..ad55569 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -58,7 +58,7 @@ */ #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) -#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) +#define TASK_NICE(p) PRIO_TO_NICE((int)(p)->static_prio) /* * 'User priority' is the nice value converted to something we @@ -66,7 +66,7 @@ * it's a [ 0 ... 39 ] range. */ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) -#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) +#define TASK_USER_PRIO(p) USER_PRIO((int)(p)->static_prio) #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) /* @@ -145,7 +145,7 @@ (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + ((p)->prio <= (int)(p)->static_prio - DELTA(p)) #define INTERACTIVE_SLEEP(p) \ (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ @@ -168,10 +168,10 @@ static unsigned int task_timeslice(task_t *p) { - if (p->static_prio < NICE_TO_PRIO(0)) - return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio); + if ((int)p->static_prio < NICE_TO_PRIO(0)) + return SCALE_PRIO(DEF_TIMESLICE*4, (int)p->static_prio); else - return SCALE_PRIO(DEF_TIMESLICE, p->static_prio); + return SCALE_PRIO(DEF_TIMESLICE, (int)p->static_prio); } #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ < (long long) (sd)->cache_hot_time) @@ -1678,6 +1678,15 @@ task_t * context_switch(runqueue_t *rq, WARN_ON(rq->prev_mm); rq->prev_mm = oldmm; } + { + unsigned long flags; + local_save_flags(flags); + if ((flags & 0x100)) { + static int hit; + if (++hit < 5) + printk("irqs enabled in schedule\n"); + } + } /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); @@ -3614,7 +3623,7 @@ asmlinkage long sys_nice(int increment) if (increment > 40) increment = 40; - nice = PRIO_TO_NICE(current->static_prio) + increment; + nice = PRIO_TO_NICE((int)current->static_prio) + increment; if (nice < -20) nice = -20; if (nice > 19) diff --git a/kernel/sys.c b/kernel/sys.c index bce933e..70f599a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1837,7 +1837,7 @@ asmlinkage long sys_prctl(int option, un if (strncpy_from_user(ncomm, (char __user *)arg2, sizeof(me->comm)-1) < 0) return -EFAULT; - set_task_comm(me, ncomm); + set_current_task_comm(ncomm); return 0; } case PR_GET_NAME: { diff --git a/mm/filemap.c b/mm/filemap.c diff --git a/mm/memory.c b/mm/memory.c index 2998cfc..a50e453 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -692,7 +692,8 @@ static unsigned long unmap_page_range(st if (details && !details->check_mapping && !details->nonlinear_vma) details = NULL; - BUG_ON(addr >= end); + if (unlikely(addr >= end)) + goto bug; tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -707,6 +708,8 @@ static unsigned long unmap_page_range(st tlb_end_vma(tlb, vma); return addr; +bug: + out_of_line_bug(); } #ifdef CONFIG_PREEMPT @@ -783,7 +786,8 @@ unsigned long unmap_vmas(struct mmu_gath start, end, &zap_work, details); if (zap_work > 0) { - BUG_ON(start != end); + if (unlikely(start != end)) + goto bug; break; } @@ -805,6 +809,8 @@ unsigned long unmap_vmas(struct mmu_gath } out: return start; /* which is now the end (or restart) address */ +bug: + out_of_line_bug(); } /** @@ -1017,7 +1023,7 @@ int get_user_pages(struct task_struct *t case VM_FAULT_OOM: return i ? i : -ENOMEM; default: - BUG(); + goto bug; } } if (pages) { @@ -1032,6 +1038,8 @@ int get_user_pages(struct task_struct *t } while (len && start < vma->vm_end); } while (len); return i; +bug: + out_of_line_bug(); } EXPORT_SYMBOL(get_user_pages); @@ -1311,7 +1319,10 @@ static int do_wp_page(struct mm_struct * if (unlikely(anon_vma_prepare(vma))) goto oom; - if (old_page == ZERO_PAGE(address)) { +#ifndef IS_ZERO_PAGE_PFN +#define IS_ZERO_PAGE_PFN(pfn, addr) (old_page == ZERO_PAGE(addr)) +#endif + if (IS_ZERO_PAGE_PFN(pfn, address)) { new_page = alloc_zeroed_user_highpage(vma, address); if (!new_page) goto oom; @@ -1920,6 +1931,10 @@ retry: if (write_access) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte_at(mm, address, page_table, entry); + if (write_access) + prefetchw((void *)address); + else + prefetch((void *)address); if (anon) { inc_mm_counter(mm, anon_rss); lru_cache_add_active(new_page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bd4de59..8a50e20 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1136,7 +1136,7 @@ static void show_node(struct zone *zone) * The result is unavoidably approximate - it can change * during and after execution of this function. */ -static DEFINE_PER_CPU(struct page_state, page_states) = {0}; +DEFINE_PER_CPU(struct page_state, page_states) = {0}; atomic_t nr_pagecache = ATOMIC_INIT(0); EXPORT_SYMBOL(nr_pagecache); @@ -1211,6 +1211,7 @@ unsigned long __read_page_state(unsigned return ret; } +#ifndef __mod_page_state void __mod_page_state(unsigned long offset, unsigned long delta) { unsigned long flags; @@ -1223,6 +1224,7 @@ void __mod_page_state(unsigned long offs } EXPORT_SYMBOL(__mod_page_state); +#endif void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat) diff --git a/mm/slab.c b/mm/slab.c index e5ec26e..0d44419 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2931,9 +2931,9 @@ void *__kmalloc(size_t size, gfp_t flags * functions. */ cachep = __find_general_cachep(size, flags); - if (unlikely(cachep == NULL)) - return NULL; - return __cache_alloc(cachep, flags); + if (likely(cachep != NULL)) + return __cache_alloc(cachep, flags); + return NULL; } EXPORT_SYMBOL(__kmalloc); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dd476b1..d1d9f3f 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -136,8 +136,8 @@ static void ct_seq_stop(struct seq_file static int ct_seq_show(struct seq_file *s, void *v) { - const struct ip_conntrack_tuple_hash *hash = v; - const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); + struct ip_conntrack_tuple_hash *hash = v; + struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); struct ip_conntrack_protocol *proto; ASSERT_READ_LOCK(&ip_conntrack_lock); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fc77443..f037d62 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1018,7 +1018,7 @@ static inline int dentry_has_perm(struct has the same SID as the process. If av is zero, then access to the file is not checked, e.g. for cases where only the descriptor is affected like seek. */ -static inline int file_has_perm(struct task_struct *tsk, +static inline long file_has_perm(struct task_struct *tsk, struct file *file, u32 av) { @@ -2319,7 +2319,7 @@ static int selinux_inode_listsecurity(st /* file security operations */ -static int selinux_file_permission(struct file *file, int mask) +static long selinux_file_permission(struct file *file, int mask) { struct inode *inode = file->f_dentry->d_inode;