引言
__libc_open 是 glibc 中 openat 接口的封装,针对给定的 file 路径,打开文件,生成一个文件描述符 fd。可以创建文件/创建临时文件/打开已有文件。在很多 IO 接口函数中都有使用到,我们来一起看看它的实现原理。
源码分析
代码参考:glibc/sysdeps/unix/sysv/linux/open.c
28 /* Open FILE with access OFLAG. If O_CREAT or O_TMPFILE is in OFLAG,
29 a third argument is the file protection. */
30 int
31 __libc_open (const char *file, int oflag, ...)
32 {
33 int mode = 0;
34
35 if (__OPEN_NEEDS_MODE (oflag))
36 {
37 va_list arg;
38 va_start (arg, oflag);
39 mode = va_arg (arg, int);
40 va_end (arg);
41 }
42
43 return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
44 }
复制代码
首先根据 oflag(里面是否包含 O_CREAT 或 O_TMPFILE)检查,是否需要获取第三个参数 mode(用来进行读写控制的)。
__OPEN_NEEDS_MODE 宏判定
具体实现里面就是通过位运算检测 oflag 中是否包含__O_TMPFILE 或 O_CREAT
//glibc/io/fcntl.h
37 /* Detect if open needs mode as a third argument (or for openat as a fourth
38 argument). */
39 #ifdef __O_TMPFILE
40 # define __OPEN_NEEDS_MODE(oflag) \
41 (((oflag) & O_CREAT) != 0 || ((oflag) & __O_TMPFILE) == __O_TMPFILE)
42 #else
43 # define __OPEN_NEEDS_MODE(oflag) (((oflag) & O_CREAT) != 0)
44 #endif
复制代码
SYSCALL_CANCEL
在这个宏中实际上最后还是调用 INLINE_SYSCALL_CALL 实现我们的调用,多余的部分实际上是 syscall 取消的检测,这里我们就不展开了。
//glibc/sysdeps/unix/sysdep.h
111 #if IS_IN (rtld)
112 /* All cancellation points are compiled out in the dynamic loader. */
113 # define NO_SYSCALL_CANCEL_CHECKING 1
114 #else
115 # define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P
116 #endif
117
118 #define SYSCALL_CANCEL(...) \
119 ({ \
120 long int sc_ret; \
121 if (NO_SYSCALL_CANCEL_CHECKING) \
122 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
123 else \
124 { \
125 int sc_cancel_oldtype = LIBC_CANCEL_ASYNC (); \
126 sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__); \
127 LIBC_CANCEL_RESET (sc_cancel_oldtype); \
128 } \
129 sc_ret; \
130 })
复制代码
INLINE_SYSCALL_CALL
这里就是对 syscall 的相关封装了,实际上就是要从我们之前传入的SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
解析出具体需要执行的汇编指令
103 /* Issue a syscall defined by syscall number plus any other argument
104 required. Any error will be handled using arch defined macros and errno
105 will be set accordingly.
106 It is similar to INLINE_SYSCALL macro, but without the need to pass the
107 expected argument number as second parameter. */
108 #define INLINE_SYSCALL_CALL(...) \
109 __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)
复制代码
宏展开之后__INLINE_SYSCALL_DISP (__INLINE_SYSCALL, openat, AT_FDCWD, file, oflag, mode)
100 #define __INLINE_SYSCALL_DISP(b,...) \
101 __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
复制代码
展开之后:__SYSCALL_CONCAT (__INLINE_SYSCALL, __INLINE_SYSCALL_NARGS(openat, AT_FDCWD, file, oflag, mode)) (openat, AT_FDCWD, file, oflag, mode)
__INLINE_SYSCALL_NARGS
先具体分析__INLINE_SYSCALL_NARGS 的宏展开方式:
98 #define __INLINE_SYSCALL_NARGS(...) \
99 __INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)
复制代码
展开如下,在原有参数后面增加了 8 个数字
__INLINE_SYSCALL_NARGS_X (openat, AT_FDCWD, file, oflag, mode,7,6,5,4,3,2,1,0,)
继续展开,可以看到这里是取第九个参数,对应上面这个表达式,也就是 4,可以推出,如果输入有 1 个参数,那么返回 0,输入有 2 个参数,返回 1,依次类推,输入有 8 个参数,返回 7。97 #define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n
__SYSCALL_CONCAT
实际上就是把两个字符进行了连接,“##”在宏当中是进行字符连接的
27 #define __SYSCALL_CONCAT_X(a,b) a##b
28 #define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X (a, b)
复制代码
至此,我们的宏展开就应该是这样的
__INLINE_SYSCALL4(openat, AT_FDCWD, file, oflag, mode)
__INLINE_SYSCALL4
对应的定义如下:就应该被展开为INLINE_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)
80 #define __INLINE_SYSCALL0(name) \
81 INLINE_SYSCALL (name, 0)
82 #define __INLINE_SYSCALL1(name, a1) \
83 INLINE_SYSCALL (name, 1, a1)
84 #define __INLINE_SYSCALL2(name, a1, a2) \
85 INLINE_SYSCALL (name, 2, a1, a2)
86 #define __INLINE_SYSCALL3(name, a1, a2, a3) \
87 INLINE_SYSCALL (name, 3, a1, a2, a3)
88 #define __INLINE_SYSCALL4(name, a1, a2, a3, a4) \
89 INLINE_SYSCALL (name, 4, a1, a2, a3, a4)
90 #define __INLINE_SYSCALL5(name, a1, a2, a3, a4, a5) \
91 INLINE_SYSCALL (name, 5, a1, a2, a3, a4, a5)
92 #define __INLINE_SYSCALL6(name, a1, a2, a3, a4, a5, a6) \
93 INLINE_SYSCALL (name, 6, a1, a2, a3, a4, a5, a6)
94 #define __INLINE_SYSCALL7(name, a1, a2, a3, a4, a5, a6, a7) \
95 INLINE_SYSCALL (name, 7, a1, a2, a3, a4, a5, a6, a7)
复制代码
INLINE_SYSCALL
封装调用了 INTERNAL_SYSCALL 宏
INTERNAL_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)
38 /* Define a macro which expands into the inline wrapper code for a system
39 call. It sets the errno and returns -1 on a failure, or the syscall
40 return value otherwise. */
41 #undef INLINE_SYSCALL
42 #define INLINE_SYSCALL(name, nr, args...) \
43 ({ \
44 long int sc_ret = INTERNAL_SYSCALL (name, nr, args); \
45 __glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret)) \
46 ? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret)) \
47 : sc_ret; \
48 })
复制代码
INTERNAL_SYSCALL
这一步的实现就与每个平台不同了,这里以 x86_64 平台为例上面的被依次如下封装:
internal_syscall4 (SYS_ify (openat), AT_FDCWD, file, oflag, mode)
internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
//glibc/sysdeps/unix/sysv/linux/x86_64/sysdep.h
29 /* For Linux we can use the system call table in the header file
30 /usr/include/asm/unistd.h
31 of the kernel. But these symbols do not follow the SYS_* syntax
32 so we have to redefine the `SYS_ify' macro here. */
33 #undef SYS_ify
34 #define SYS_ify(syscall_name) __NR_##syscall_name
233 #undef INTERNAL_SYSCALL
234 #define INTERNAL_SYSCALL(name, nr, args...) \
235 internal_syscall##nr (SYS_ify (name), args)
复制代码
最终我们的调用为internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
依次将 4 个参数装入寄存器中,然后调用 syscall 汇编代码实现调用,这里用到 C 语言的 asm 汇编语法
301 #undef internal_syscall4
302 #define internal_syscall4(number, arg1, arg2, arg3, arg4) \
303 ({ \
304 unsigned long int resultvar; \
305 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
306 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
307 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
308 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
309 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
310 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
311 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
312 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
313 asm volatile ( \
314 "syscall\n\t" \
315 : "=a" (resultvar) \
316 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) \
317 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
318 (long int) resultvar; \
319 })
复制代码
我们看一下该架构下使用最多参数的宏,(x86_64 架构最多只支持 6 个参数)
344 #undef internal_syscall6
345 #define internal_syscall6(number, arg1, arg2, arg3, arg4, arg5, arg6) \
346 ({ \
347 unsigned long int resultvar; \
348 TYPEFY (arg6, __arg6) = ARGIFY (arg6); \
349 TYPEFY (arg5, __arg5) = ARGIFY (arg5); \
350 TYPEFY (arg4, __arg4) = ARGIFY (arg4); \
351 TYPEFY (arg3, __arg3) = ARGIFY (arg3); \
352 TYPEFY (arg2, __arg2) = ARGIFY (arg2); \
353 TYPEFY (arg1, __arg1) = ARGIFY (arg1); \
354 register TYPEFY (arg6, _a6) asm ("r9") = __arg6; \
355 register TYPEFY (arg5, _a5) asm ("r8") = __arg5; \
356 register TYPEFY (arg4, _a4) asm ("r10") = __arg4; \
357 register TYPEFY (arg3, _a3) asm ("rdx") = __arg3; \
358 register TYPEFY (arg2, _a2) asm ("rsi") = __arg2; \
359 register TYPEFY (arg1, _a1) asm ("rdi") = __arg1; \
360 asm volatile ( \
361 "syscall\n\t" \
362 : "=a" (resultvar) \
363 : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4), \
364 "r" (_a5), "r" (_a6) \
365 : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
366 (long int) resultvar; \
367 })
复制代码
寄存器的使用中参数依次装入 rdi,rsi,rdx,r10,r8,r9
支持最多 7 个参数的只有如下的架构:
riscv 架构:glibc/sysdeps/unix/sysv/linux/riscv/sysdep.h 分别使用 a0 到 a6 寄存器装参数
323 # define internal_syscall7(number, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \
324 ({ \
325 long int _sys_result; \
326 long int _arg0 = (long int) (arg0); \
327 long int _arg1 = (long int) (arg1); \
328 long int _arg2 = (long int) (arg2); \
329 long int _arg3 = (long int) (arg3); \
330 long int _arg4 = (long int) (arg4); \
331 long int _arg5 = (long int) (arg5); \
332 long int _arg6 = (long int) (arg6); \
333 \
334 { \
335 register long int __a7 asm ("a7") = number; \
336 register long int __a0 asm ("a0") = _arg0; \
337 register long int __a1 asm ("a1") = _arg1; \
338 register long int __a2 asm ("a2") = _arg2; \
339 register long int __a3 asm ("a3") = _arg3; \
340 register long int __a4 asm ("a4") = _arg4; \
341 register long int __a5 asm ("a5") = _arg5; \
342 register long int __a6 asm ("a6") = _arg6; \
343 __asm__ volatile ( \
344 "scall\n\t" \
345 : "+r" (__a0) \
346 : "r" (__a7), "r" (__a1), "r" (__a2), "r" (__a3), \
347 "r" (__a4), "r" (__a5), "r" (__a6) \
348 : __SYSCALL_CLOBBERS); \
349 _sys_result = __a0; \
350 } \
351 _sys_result; \
352 })
复制代码
mips 架构:glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
315 #define internal_syscall7(v0_init, input, number, err, \
316 arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
317 ({ \
318 union __mips_syscall_return _sc_ret; \
319 _sc_ret.val = __mips_syscall7 ((long int) (arg1), \
320 (long int) (arg2), \
321 (long int) (arg3), \
322 (long int) (arg4), \
323 (long int) (arg5), \
324 (long int) (arg6), \
325 (long int) (arg7), \
326 (long int) (number)); \
327 _sc_ret.reg.v1 != 0 ? -_sc_ret.reg.v0 : _sc_ret.reg.v0; \
328 })
复制代码
总结
从最开始的SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode)
宏,依次转换为
internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)
,然后调用汇编实现底层 syscall 操作。
评论