Subject: [PATCH] x86-64 optimize GET_THREAD_INFO users to use r10 Now that current in r10 is in place and working, optimize bits of x86-64 specific assembly to not reload/recalculate current where unnecessary. --- arch/x86_64/ia32/ia32entry.S | 16 ++++++++-------- arch/x86_64/kernel/entry.S | 39 ++++++++++++++------------------------- arch/x86_64/lib/copy_user.S | 6 ++---- arch/x86_64/lib/getuser.S | 12 ++++-------- arch/x86_64/lib/putuser.S | 12 ++++-------- 5 files changed, 32 insertions(+), 53 deletions(-) applies-to: d8b8d8a196d1608edcd5ea682f2d1c99e209ebc0 d30adbf231158a79cff30c8786bdb4dbd62d76d6 diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index cdb5918..222076a 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -85,13 +85,13 @@ ENTRY(ia32_sysenter_target) CFI_ADJUST_CFA_OFFSET 8 cld SAVE_ARGS 0,0,1 + movq %gs:pda_pcurrent,%r10 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ 1: movl (%rbp),%r9d .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys @@ -99,10 +99,8 @@ sysenter_do_call: cmpl $(IA32_NR_syscalls),%eax jae ia32_badsys IA32_ARG_FIXUP 1 - movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) cli testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call @@ -139,6 +137,7 @@ sysenter_tracesys: .section __ex_table,"a" .quad 1b,ia32_badarg .previous + movq %gs:pda_pcurrent,%r10 jmp sysenter_do_call CFI_ENDPROC @@ -192,7 +191,7 @@ ENTRY(ia32_cstar_target) .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) + movq %gs:pda_pcurrent,%r10 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys @@ -200,10 +199,8 @@ cstar_do_call: cmpl $IA32_NR_syscalls,%eax jae ia32_badsys IA32_ARG_FIXUP 1 - movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) cli testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) jnz int_ret_from_sys_call @@ -234,10 +231,12 @@ cstar_tracesys: .section __ex_table,"a" .quad 1b,ia32_badarg .previous + movq %gs:pda_pcurrent,%r10 jmp cstar_do_call ia32_badarg: movq $-EFAULT,%rax + movq %gs:pda_pcurrent,%r10 jmp ia32_sysret CFI_ENDPROC @@ -279,14 +278,13 @@ ENTRY(ia32_syscall) /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,0,1 - GET_THREAD_INFO(%r10) + movq %gs:pda_pcurrent,%r10 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) jnz ia32_tracesys ia32_do_syscall: cmpl $(IA32_NR_syscalls),%eax jae ia32_badsys IA32_ARG_FIXUP - movq %gs:pda_pcurrent,%r10 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: movq %rax,RAX-ARGOFFSET(%rsp) @@ -300,6 +298,7 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST + movq %gs:pda_pcurrent,%r10 jmp ia32_do_syscall ia32_badsys: @@ -345,6 +344,7 @@ ENTRY(ia32_ptregs_common) movq %gs:pda_pcurrent,%r10 call *%rax RESTORE_REST + movq %gs:pda_pcurrent,%r10 jmp ia32_sysret /* misbalances the return cache */ CFI_ENDPROC diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index b2cec61..5340696 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -136,21 +136,19 @@ ENTRY(ret_from_fork) CFI_DEFAULT_STACK call schedule_tail - GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10) jnz rff_trace rff_action: RESTORE_REST testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call - testl $_TIF_IA32,threadinfo_flags(%rcx) + testl $_TIF_IA32,threadinfo_flags(%r10) jnz int_ret_from_sys_call RESTORE_TOP_OF_STACK %rdi,ARGOFFSET jmp ret_from_sys_call rff_trace: movq %rsp,%rdi call syscall_trace_leave - GET_THREAD_INFO(%rcx) jmp rff_action CFI_ENDPROC @@ -194,11 +192,10 @@ ENTRY(system_call) movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET - GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) - CFI_REMEMBER_STATE movq %r10,%rcx movq %gs:pda_pcurrent,%r10 + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) + CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -213,9 +210,8 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: - GET_THREAD_INFO(%rcx) cli - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz sysret_careful @@ -271,10 +267,10 @@ tracesys: call syscall_trace_enter LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpq $__NR_syscall_max,%rax - ja 1f movq %r10,%rcx /* fixup for C */ movq %gs:pda_pcurrent,%r10 + cmpq $__NR_syscall_max,%rax + ja 1f call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) 1: SAVE_REST @@ -312,8 +308,7 @@ ENTRY(int_ret_from_sys_call) movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx andl %edi,%edx jnz int_careful jmp retint_swapgs @@ -413,8 +408,7 @@ ENTRY(stub_execve) CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call sys_execve - GET_THREAD_INFO(%rcx) - bt $TIF_IA32,threadinfo_flags(%rcx) + bt $TIF_IA32,threadinfo_flags(%r10) CFI_REMEMBER_STATE jc exec_32bit RESTORE_TOP_OF_STACK %r11 @@ -520,7 +514,6 @@ ret_from_intr: #endif leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */ exit_intr: - GET_THREAD_INFO(%rcx) testl $3,CS-ARGOFFSET(%rsp) je retint_kernel @@ -532,7 +525,7 @@ exit_intr: retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful @@ -566,7 +559,6 @@ retint_careful: call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 - GET_THREAD_INFO(%rcx) cli jmp retint_check @@ -582,7 +574,6 @@ retint_signal: RESTORE_REST cli movl $_TIF_NEED_RESCHED,%edi - GET_THREAD_INFO(%rcx) jmp retint_check #ifdef CONFIG_PREEMPT @@ -590,9 +581,9 @@ retint_signal: /* rcx: threadinfo. interrupts off. */ .p2align retint_kernel: - cmpl $0,threadinfo_preempt_count(%rcx) + cmpl $0,threadinfo_preempt_count(%r10) jnz retint_restore_args - bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) + bt $TIF_NEED_RESCHED,threadinfo_flags(%r10) jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args @@ -751,10 +742,9 @@ error_exit: movl %ebx,%eax RESTORE_REST cli - GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel - movl threadinfo_flags(%rcx),%edx + movl threadinfo_flags(%r10),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful @@ -942,8 +932,7 @@ paranoid_restore: RESTORE_ALL 8 iretq paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx + movl threadinfo_flags(%r10),%ebx andl $_TIF_WORK_MASK,%ebx jz paranoid_swapgs movq %rsp,%rdi /* &pt_regs */ diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index f24497d..f69bdd5 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S @@ -15,11 +15,10 @@ .globl copy_to_user .p2align 4 copy_to_user: - GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx jc bad_to_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_to_user 2: .byte 0xe9 /* 32bit jump */ @@ -43,11 +42,10 @@ copy_to_user: .globl copy_from_user .p2align 4 copy_from_user: - GET_THREAD_INFO(%rax) movq %rsi,%rcx addq %rdx,%rcx jc bad_from_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_from_user /* FALL THROUGH to copy_user_generic */ diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S index 3844d5e..f8cbd9f 100644 --- a/arch/x86_64/lib/getuser.S +++ b/arch/x86_64/lib/getuser.S @@ -36,8 +36,7 @@ .p2align 4 .globl __get_user_1 __get_user_1: - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_get_user 1: movzb (%rcx),%edx xorl %eax,%eax @@ -46,10 +45,9 @@ __get_user_1: .p2align 4 .globl __get_user_2 __get_user_2: - GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 20f decq %rcx 2: movzwl (%rcx),%edx @@ -61,10 +59,9 @@ __get_user_2: .p2align 4 .globl __get_user_4 __get_user_4: - GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 30f subq $3,%rcx 3: movl (%rcx),%edx @@ -76,10 +73,9 @@ __get_user_4: .p2align 4 .globl __get_user_8 __get_user_8: - GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 40f subq $7,%rcx 4: movq (%rcx),%rdx diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index 7f55939..4de4e34 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S @@ -34,8 +34,7 @@ .p2align 4 .globl __put_user_1 __put_user_1: - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae bad_put_user 1: movb %dl,(%rcx) xorl %eax,%eax @@ -44,10 +43,9 @@ __put_user_1: .p2align 4 .globl __put_user_2 __put_user_2: - GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 20f decq %rcx 2: movw %dx,(%rcx) @@ -59,10 +57,9 @@ __put_user_2: .p2align 4 .globl __put_user_4 __put_user_4: - GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 30f subq $3,%rcx 3: movl %edx,(%rcx) @@ -74,10 +71,9 @@ __put_user_4: .p2align 4 .globl __put_user_8 __put_user_8: - GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%r10),%rcx jae 40f subq $7,%rcx 4: movq %rdx,(%rcx) --- 0.99.9.GIT