Linux下syscall分析

先看如何增加系統調用

diff --git a/kernel4.4/arch/arm/include/uapi/asm/unistd.h b/kernel4.4/arch/arm/include/uapi/asm/unistd.h
index ede692f..081ab28 100644
--- a/kernel4.4/arch/arm/include/uapi/asm/unistd.h
+++ b/kernel4.4/arch/arm/include/uapi/asm/unistd.h
@@ -417,6 +417,7 @@
 #define __NR_userfaultfd               (__NR_SYSCALL_BASE+388)
 #define __NR_membarrier                (__NR_SYSCALL_BASE+389)
 #define __NR_mlock2                    (__NR_SYSCALL_BASE+390)
+#define __NR_dump                      (__NR_SYSCALL_BASE+391)
 
 /*
  * The following SWIs are ARM private.
diff --git a/kernel4.4/arch/arm/kernel/Makefile b/kernel4.4/arch/arm/kernel/Makefile
index 28b4e38..9b768ca 100644
--- a/kernel4.4/arch/arm/kernel/Makefile
+++ b/kernel4.4/arch/arm/kernel/Makefile
@@ -18,7 +18,7 @@ CFLAGS_REMOVE_return_address.o = -pg
 obj-y          := elf.o entry-common.o irq.o opcodes.o \
                   process.o ptrace.o reboot.o return_address.o \
                   setup.o signal.o sigreturn_codes.o \
-                  stacktrace.o sys_arm.o time.o traps.o
+                  stacktrace.o sys_arm.o time.o traps.o mysyscall.o
 
 obj-$(CONFIG_ATAGS)            += atags_parse.o
 obj-$(CONFIG_ATAGS_PROC)       += atags_proc.o
diff --git a/kernel4.4/arch/arm/kernel/calls.S b/kernel4.4/arch/arm/kernel/calls.S
index ac368bb..dc1bf8a 100644
--- a/kernel4.4/arch/arm/kernel/calls.S
+++ b/kernel4.4/arch/arm/kernel/calls.S
@@ -400,6 +400,7 @@
                CALL(sys_userfaultfd)
                CALL(sys_membarrier)
                CALL(sys_mlock2)
+               CALL(sys_dump)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/kernel4.4/include/linux/syscalls.h b/kernel4.4/include/linux/syscalls.h
index c2b66a2..02ee903 100644
--- a/kernel4.4/include/linux/syscalls.h
+++ b/kernel4.4/include/linux/syscalls.h
@@ -888,5 +888,5 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
 asmlinkage long sys_membarrier(int cmd, int flags);
 
 asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
-
+asmlinkage long sys_dump(void);
 #endif
diff --git a/kernel4.4/kernel/sys_ni.c b/kernel4.4/kernel/sys_ni.c
index 0623787..44d04a7 100644
--- a/kernel4.4/kernel/sys_ni.c
+++ b/kernel4.4/kernel/sys_ni.c
@@ -249,3 +249,4 @@ cond_syscall(sys_execveat);
 
 /* membarrier */
 cond_syscall(sys_membarrier);
+cond_syscall(sys_dump);

kernel4.4/arch/arm/kernel/mysyscall.c 

#include <linux/syscalls.h>
#include <asm-generic/bug.h>
SYSCALL_DEFINE0(dump)
{
	long ret=0;
	WARN(1,"my syscall");
	return ret;
}

 

應用層的測試程序syscall.c

#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/ioctl.h>
#define	sys_dump() __asm__ __volatile__ ("mov r7,#391\n\t" "swi 0\n\t" )
int main()
{
	int ret;
	ret=syscall(391);
	sys_dump();
	printf("sys_dump\n");
	return ret;
}

sys_dump或者syscall兩種方式都可以定義

Android.mk

LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE_TAGS := eng
LOCAL_SRC_FILES:=syscall.c
LOCAL_MODULE:=syscall
LOCAL_CPPFLAGS += -DANDROID
LOCAL_SHARED_LIBRARIES:=libc
LOCAL_STATIC_LIBRARIES := 
include $(BUILD_EXECUTABLE)

 http://androidxref.com/9.0.0_r3/xref/bionic/libc/arch-arm/bionic/syscall.S#31

ENTRY(syscall)
    mov     ip, sp
    stmfd   sp!, {r4, r5, r6, r7}
    .cfi_def_cfa_offset 16
    .cfi_rel_offset r4, 0
    .cfi_rel_offset r5, 4
    .cfi_rel_offset r6, 8
    .cfi_rel_offset r7, 12
    mov     r7, r0
    mov     r0, r1
    mov     r1, r2
    mov     r2, r3
    ldmfd   ip, {r3, r4, r5, r6}
    swi     #0
    ldmfd   sp!, {r4, r5, r6, r7}
    .cfi_def_cfa_offset 0
    cmn     r0, #(MAX_ERRNO + 1)
    bxls    lr
    neg     r0, r0
    b       __set_errno_internal
END(syscall)

kernel4.4/arch/arm/include/asm/unistd.h

#define __NR_syscalls (392)

同時這個值按需要進行修改

arch/arm/kernel/entry-common.S

	.equ NR_syscalls,0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1
#include "calls.S"

/*
 * Ensure that the system call table is equal to __NR_syscalls,
 * which is the value the rest of the system sees
 */
.ifne NR_syscalls - __NR_syscalls
.error "__NR_syscalls is not equal to the size of the syscall table"
.endif

 .equ NR_syscall,0  相當與NR_syscall=0

#define CALL(x) .equ NR_syscalls,NR_syscalls+1每一個syscall定義,變量NR_syscall自加1

kernel4.4/arch/arm/kernel/calls.S

/* 385 */	CALL(sys_memfd_create)
		CALL(sys_bpf)
		CALL(sys_execveat)
		CALL(sys_userfaultfd)
		CALL(sys_membarrier)
		CALL(sys_mlock2)
		CALL(sys_dump)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
#endif
.rept syscalls_padding
		CALL(sys_ni_syscall)
.endr

.rept重複次數

     添加CALL(sys_ni_syscall)

比如CALL(sys_mlock2)已經是390,NR_syscalls就是391,((NR_syscalls + 3) & ~3) - NR_syscalls=0

添加CALL(sys_dump),NR_syscalls就是392,((NR_syscalls + 3) & ~3) - NR_syscalls=0

假如再添加一個CALL(sys_dump1),NR_syscalls就是393,((NR_syscalls + 3) & ~3) - NR_syscalls=3,__NR_syscalls就需要定義爲396

 

看下系統調用過程

CONFIG_CPU_V7M/CONFIG_OABI_COMPAT/CONFIG_ARM_THUMB都是沒定義的,只定義CONFIG_AEABI

#undef CALL
#define CALL(x) .long x

/*=============================================================================
 * SWI handler
 *-----------------------------------------------------------------------------
 */
ENTRY(vector_swi)
#ifdef CONFIG_CPU_V7M
	v7m_exception_entry
#else
	sub	sp, sp, #S_FRAME_SIZE
	stmia	sp, {r0 - r12}			@ Calling r0 - r12
 ARM(	add	r8, sp, #S_PC		)
 ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
 THUMB(	mov	r8, sp			)
 THUMB(	store_user_sp_lr r8, r10, S_SP	)	@ calling sp, lr
	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
	str	lr, [sp, #S_PC]			@ Save calling PC
	str	r8, [sp, #S_PSR]		@ Save CPSR
	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
#endif
	zero_fp
	alignment_trap r10, ip, __cr_alignment
	enable_irq
	ct_user_exit
	get_thread_info tsk

	/*
	 * Get the system call number.
	 */

#if defined(CONFIG_OABI_COMPAT)

	/*
	 * If we have CONFIG_OABI_COMPAT then we need to look at the swi
	 * value to determine if it is an EABI or an old ABI call.
	 */
#ifdef CONFIG_ARM_THUMB
	tst	r8, #PSR_T_BIT
	movne	r10, #0				@ no thumb OABI emulation
 USER(	ldreq	r10, [lr, #-4]		)	@ get SWI instruction
#else
 USER(	ldr	r10, [lr, #-4]		)	@ get SWI instruction
#endif
 ARM_BE8(rev	r10, r10)			@ little endian instruction

#elif defined(CONFIG_AEABI)

	/*
	 * Pure EABI user space always put syscall number into scno (r7).
	 */
#elif defined(CONFIG_ARM_THUMB)
	/* Legacy ABI only, possibly thumb mode. */
	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
 USER(	ldreq	scno, [lr, #-4]		)

#else
	/* Legacy ABI only. */
 USER(	ldr	scno, [lr, #-4]		)	@ get SWI instruction
#endif

	uaccess_disable tbl

	adr	tbl, sys_call_table		@ load syscall table pointer

#if defined(CONFIG_OABI_COMPAT)
	/*
	 * If the swi argument is zero, this is an EABI call and we do nothing.
	 *
	 * If this is an old ABI call, get the syscall number into scno and
	 * get the old ABI syscall table address.
	 */
	bics	r10, r10, #0xff000000
	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
	ldrne	tbl, =sys_oabi_call_table
#elif !defined(CONFIG_AEABI)
	bic	scno, scno, #0xff000000		@ mask off SWI op-code
	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
#endif

local_restart:
	ldr	r10, [tsk, #TI_FLAGS]		@ check for syscall tracing
	stmdb	sp!, {r4, r5}			@ push fifth and sixth args

	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
	bne	__sys_trace

	cmp	scno, #NR_syscalls		@ check upper syscall limit
	badr	lr, ret_fast_syscall		@ return address
	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine    

	add	r1, sp, #S_OFF
2:	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
	bcs	arm_syscall
	mov	why, #0				@ no longer a real syscall
	b	sys_ni_syscall			@ not private func

#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
	/*
	 * We failed to handle a fault trying to access the page
	 * containing the swi instruction, but we're not really in a
	 * position to return -EFAULT. Instead, return back to the
	 * instruction and re-enter the user fault handling path trying
	 * to page it in. This will likely result in sending SEGV to the
	 * current task.
	 */
9001:
	sub	lr, lr, #4
	str	lr, [sp, #S_PC]
	b	ret_fast_syscall
#endif
ENDPROC(vector_swi)
	.type	sys_call_table, #object
ENTRY(sys_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE

adr    tbl, sys_call_table        @ load syscall table pointer

badr    lr, ret_fast_syscall        @ return address

ldrcc    pc, [tbl, scno, lsl #2]        @ call sys_* routine調到系統調用

系統調用號存於r7,而r7等同於scno

arch/arm/kernel/entry-header.S

/*
 * These are the registers used in the syscall handler, and allow us to
 * have in theory up to 7 arguments to a function - r0 to r6.
 *
 * r7 is reserved for the system call number for thumb mode.
 *
 * Note that tbl == why is intentional.
 *
 * We must set at least "tsk" and "why" when calling ret_with_reschedule.
 */
scno    .req    r7              @ syscall number
tbl     .req    r8              @ syscall table pointer
why     .req    r8              @ Linux syscall (!= 0)
tsk     .req    r9              @ current thread_info

kernel4.4/include/linux/syscalls.h

#define SYSCALL_DEFINE0(sname)                                  \
        SYSCALL_METADATA(_##sname, 0);                          \
        asmlinkage long sys_##sname(void)
include/linux/syscalls.h
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)

#define SYSCALL_DEFINEx(x, sname, ...)                          \
        SYSCALL_METADATA(sname, x, __VA_ARGS__)                 \
        __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

 看下syscall返回函數ret_fast_syscall

/*
 * This is the fast syscall return path.  We do as little as possible here,
 * such as avoiding writing r0 to the stack.  We only use this path if we
 * have tracing and context tracking disabled - the overheads from those
 * features make this path too inefficient.
 */
ret_fast_syscall:
 UNWIND(.fnstart	)
 UNWIND(.cantunwind	)
	disable_irq_notrace			@ disable interrupts  xxxxxxxxxxxxxxxxxxxxxxxxxxx
	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
	bne	fast_work_pending

	/* perform architecture specific actions before user return */
	arch_ret_to_user r1, lr

	restore_user_regs fast = 1, offset = S_OFF
 UNWIND(.fnend		)
ENDPROC(ret_fast_syscall)

	/* Ok, we need to do extra processing, enter the slow path. */
fast_work_pending:
	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
	/* fall through to work_pending */
	tst	r1, #_TIF_SYSCALL_WORK
	bne	__sys_trace_return_nosave
slow_work_pending:
	mov	r0, sp				@ 'regs'
	mov	r2, why				@ 'syscall'
	bl	do_work_pending
	cmp	r0, #0
	beq	no_work_pending
	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
	b	local_restart			@ ... and off we go
ENDPROC(ret_fast_syscall)

有標誌,跳到fast_work_pending--->fast_work_pending

/* Checks for any syscall work in entry-common.S */
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
			   _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)

/*
 * Change these and you break ASM code in entry-common.S
 */
#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
{
	/*
	 * The assembly code enters us with IRQs off, but it hasn't
	 * informed the tracing code of that for efficiency reasons.
	 * Update the trace code with the current status.
	 */
	trace_hardirqs_off();
	do {
		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
			schedule();
		} else {
			if (unlikely(!user_mode(regs)))
				return 0;
			local_irq_enable();
			if (thread_flags & _TIF_SIGPENDING) {
				int restart = do_signal(regs, syscall);
				if (unlikely(restart)) {
					/*
					 * Restart without handlers.
					 * Deal with it without leaving
					 * the kernel space.
					 */
					return restart;
				}
				syscall = 0;
			} else if (thread_flags & _TIF_UPROBE) {
				uprobe_notify_resume(regs);
			} else {
				clear_thread_flag(TIF_NOTIFY_RESUME);
				tracehook_notify_resume(regs);
			}
		}
		local_irq_disable();
		thread_flags = current_thread_info()->flags;
	} while (thread_flags & _TIF_WORK_MASK);
	return 0;
}
asmlinkage __visible void __sched schedule(void)
{
	struct task_struct *tsk = current;

	sched_submit_work(tsk);
	do {
		preempt_disable();
		__schedule(false);
		sched_preempt_enable_no_resched();
	} while (need_resched());
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章