variable-argument-lists in AARCH64
Table of Contents
Variadic functions
定义: https://en.cppreference.com/w/c/variadic
这类函数,的特点在于,参数的个数是不固定的,具体有多少个参数,实际上是通过调用者来决定的。被调用的函数是不知道具体会传递多少个参数.
资料
AARCH64 APCS规范中已经对可变参数函数做了详细的说明,下面就以一个实际的例子来看一下,具体的过程
通过资料,我们大概可以了解到,可变参数函数在使用时,需要使用va_list的这样一个结构体,函数在调用时,通过一系列的宏来准备,获取参数.
下边是va_arg的伪代码
type va_arg (va_list ap, type)
{
int nreg, offs;
if (type passed in general registers) {
offs = ap.__gr_offs;
if (offs >= 0)
goto on_stack; // reg save area empty
if (alignof(type) > 8)
offs = (offs + 15) & -16; // round up
nreg = (sizeof(type) + 7) / 8;
ap.__gr_offs = offs + (nreg * 8);
if (ap.__gr_offs > 0)
goto on_stack; // overflowed reg save area
#ifdef BIG_ENDIAN
if (classof(type) != "aggregate" && sizeof(type) < 8)
offs += 8 - sizeof(type);
#endif
return *(type *)(ap.__gr_top + offs);
} else if (type is an HFA or an HVA) {
type ha; // treat as "struct {ftype field[n];}"
offs = ap.__vr_offs;
if (offs >= 0)
goto on_stack; // reg save area empty
nreg = sizeof(type) / sizeof(ftype);
ap.__vr_offs = offs + (nreg * 16);
if (ap.__vr_offs > 0)
goto on_stack; // overflowed reg save area
#ifdef BIG_ENDIAN
if (sizeof(ftype) < 16)
offs += 16 - sizeof(ftype);
#endif
for (i = 0; i < nreg; i++, offs += 16)
ha.field[i] = *((ftype *)(ap.__vr_top + offs));
return ha;
} else if (type passed in fp/simd registers) {
offs = ap.__vr_offs;
if (offs >= 0)
goto on_stack; // reg save area empty
nreg = (sizeof(type) + 15) / 16;
ap.__vr_offs = offs + (nreg * 16);
if (ap.__vr_offs > 0)
goto on_stack; // overflowed reg save area
#ifdef BIG_ENDIAN
if (classof(type) != "aggregate" && sizeof(type) < 16)
offs += 16 - sizeof(type);
#endif
return *(type *)(ap.__vr_top + offs);
}
on_stack:
intptr_t arg = ap.__stack;
if (alignof(type) > 8)
arg = (arg + 15) & -16;
ap.__stack = (void *)((arg + sizeof(type) + 7) & -8);
#ifdef BIG_ENDIAN
if (classof(type) != "aggregate" && sizeof(type) < 8)
arg += 8 - sizeof(type);
#endif
return *(type *)arg;
}
例子
本文章使用Linux Kernel中的printk为研究对象
< 7个参数, 以两个参数为例
Caller代码
__mdiobus_register
bus->state = MDIOBUS_REGISTERED;
pr_info("%s: probed\n", bus->name);
Caller disassemble
下面就是调用的地方,从汇编代码上来看,这个地方并没有什么特别之处,这个东西在调用之前,x0为格式化输出字符串,x1是bus->name的参数,和普通的参数传递方法完全一致.
419
420 bus->state = MDIOBUS_REGISTERED;
0xffff0000086bb8d0 <+432>: ldr x1, [x20, #8]
0xffff0000086bb8d4 <+436>: mov w0, #0x2 // #2
0xffff0000086bb8d8 <+440>: str w0, [x20, #152]
421 pr_info("%s: probed\n", bus->name);
0xffff0000086bb8dc <+444>: adrp x0, 0xffff000008e97000
0xffff0000086bb8e0 <+448>: add x0, x0, #0xaf0
0xffff0000086bb8e4 <+452>: bl 0xffff000008127454 <printk>
(gdb) x/s $x0
0xffff000008e97af0: "\001\066libphy: %s: probed\n"
(gdb) x/s $x1
0xffff000008e98080: "Fixed MDIO Bus"
(gdb)
上面例子是内核中__mdiobus_register函数的片段
Callee disassemble
(gdb) disassemble /s
Dump of assembler code for function printk:
kernel/printk/printk.c:
1990 {
=> 0xffff000008127454 <+0>: stp x29, x30, [sp, #-176]! sp = 0xffff00000805bcf0
0xffff000008127458 <+4>: mov w8, #0xffffffc8 // #-56 sp = 0xFFFF00000805bc40
0xffff00000812745c <+8>: mov x29, sp x29 = sp, fp = 0xFFFF00000805bc40
0xffff000008127460 <+12>: add x9, sp, #0x70 x9= 0xFFFF00000805bcb0
0xffff000008127464 <+16>: add x10, sp, #0xb0 x10 = 0xffff00000805bcf0 i.e. stack_top
0xffff000008127468 <+20>: str x19, [sp, #16] backup x19 to stack Local Variables
0xffff00000812746c <+24>: adrp x19, 0xffff0000092b9000 <page_wait_table+5376>
0xffff000008127470 <+28>: add x19, x19, #0x6c8 x19 = address of __stack_chk_guard
0xffff000008127474 <+32>: stp x10, x10, [sp, #72] prepare va_list args
0xffff000008127478 <+36>: str x9, [sp, #88] save x9 to 0xFFFF00000805BC98
0xffff00000812747c <+40>: ldr x9, [x19] load stack guard magic value to x9.
0xffff000008127480 <+44>: str x9, [sp, #104] store stack guard magic value to stack on 0xFFFF00000805BCA8.
0xffff000008127484 <+48>: mov x9, #0x0 // #0
0xffff000008127488 <+52>: stp w8, wzr, [sp, #96] 设置初始的__gr_offs
0xffff00000812748c <+56>: ldp x8, x9, [sp, #72]
0xffff000008127490 <+60>: stp x8, x9, [sp, #32]
0xffff000008127494 <+64>: ldp x8, x9, [sp, #88]
0xffff000008127498 <+68>: stp x1, x2, [sp, #120] 将x1 ~ x7放入GP Arg Save Area的区域
0xffff00000812749c <+72>: add x1, sp, #0x20
0xffff0000081274a0 <+76>: stp x8, x9, [sp, #48]
0xffff0000081274a4 <+80>: stp x3, x4, [sp, #136]
0xffff0000081274a8 <+84>: stp x5, x6, [sp, #152]
0xffff0000081274ac <+88>: str x7, [sp, #168] 已经将GP Arg Save Area设置好,va_list args参数已经处理好了。
(gdb) i r pc
pc 0xffff0000081274b0 0xffff0000081274b0 <printk+92>
(gdb) p args
$6 = {
__stack = 0xffff00000805bcf0,
__gr_top = 0xffff00000805bcf0,
__vr_top = 0xffff00000805bcb0,
__gr_offs = -56,
__vr_offs = 0
}
0xffff0000081274b0 <+92>: bl 0xffff000008127e28 <vprintk_func>
1996 va_end(args);
1997
1998 return r;
0xffff0000081274b4 <+96>: ldr x2, [sp, #104]
0xffff0000081274b8 <+100>: ldr x1, [x19]
0xffff0000081274bc <+104>: eor x1, x2, x1
0xffff0000081274c0 <+108>: cbz x1, 0xffff0000081274c8 <printk+116>
0xffff0000081274c4 <+112>: bl 0xffff0000080d3d48 <__stack_chk_fail>
0xffff0000081274c8 <+116>: ldr x19, [sp, #16]
0xffff0000081274cc <+120>: ldp x29, x30, [sp], #176
0xffff0000081274d0 <+124>: ret
End of assembler dump.
从上面的例子中,可以看出来,在参数小于7个时,编译器会将参数全部存入GP Arg Save Area的区域,并传递给vprintk_func来处理.
下边的函数通过va_list args这个参数机可以遍历所有x1 ~ x7的参数了。
参数个数大于7
同样的,取内核中的例子:
Caller 代码
void __init mem_init_print_info(const char *str)
{
pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
#ifdef CONFIG_HIGHMEM
", %luK highmem"
#endif
"%s%s)\n",
nr_free_pages() << (PAGE_SHIFT - 10),
physpages << (PAGE_SHIFT - 10),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
(physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
totalcma_pages << (PAGE_SHIFT - 10),
#ifdef CONFIG_HIGHMEM
totalhigh_pages << (PAGE_SHIFT - 10),
#endif
str ? ", " : "", str ? str : "");
}
这段内存初始化时的打印,传递了非常多的参数,就以这个作为研究对象吧
Callee va_list
上面的例子中已经简单的分析过printk的汇编代码,这里不再赘述,直接去看va_list的值
#0 0xffff0000081274b0 in printk (fmt=0xffff000008e04b68 "\001\066Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved%s%s)\n") at kernel/printk/printk.c:1995
args = {
__stack = 0xffff0000092b3f00,
__gr_top = 0xffff0000092b3f00,
__vr_top = 0xffff0000092b3ec0,
__gr_offs = -56,
__vr_offs = 0
}
r = <optimized out>
#1 0xffff000009196994 in mem_init_print_info (str=0xffff000008e0e090 "") at ./include/linux/vmstat.h:184
physpages = <optimized out>
codesize = <optimized out>
datasize = <optimized out>
rosize = <optimized out>
bss_size = <optimized out>
init_code_size = <optimized out>
init_data_size = <optimized out>
#2 0xffff000009185e9c in mem_init () at arch/arm64/mm/init.c:608
No locals.
#3 0xffff000009180b90 in mm_init () at init/main.c:520
No locals.
#4 start_kernel () at init/main.c:590
command_line = 0xffff000009226028 <boot_command_line> "root=/dev/ram0 console=ttyAMA0"
after_dashes = <optimized out>
#5 0x0000000000000000 in ?? ()
(gdb) i r pc
pc 0xffff0000081274b0 0xffff0000081274b0 <printk+92>
(gdb) x/16gx args.__stack
0xffff0000092b3f00: 0x000000000000c848 0x0000000000008000 分别是第8和第9个参数
0xffff0000092b3f10: 0xffff000008e0e090 0xffff000008e0e090 第10和第11个参数
0xffff0000092b3f20: 0xffff0000092b3f70 0xffff000009185e9c
0xffff0000092b3f30: 0xffff000009407000 0xffff0000092d55b8
0xffff0000092b3f40: 0xffff0000092b96c8 0xffff0000092b96c0
0xffff0000092b3f50: 0xffff800005fffe00 0xffff000009226028
0xffff0000092b3f60: 0x0000000000000000 0x07bd33f332224500
0xffff0000092b3f70: 0xffff0000092b3fa0 0xffff000009180b90
(gdb) x/s 0xffff000008e0e090
0xffff000008e0e090: ""
从上边准备的va_list可以看到,在传递多余7个参数时,参数存放的位址还是和正常的函数调用一样,存放在Caller的Stack Arg Area中
打印出来的log
[ 0.000000] Kernel command line: root=/dev/ram0 console=ttyAMA0
[ 0.000000] Memory: 47032K/131072K available (10236K kernel code, 1352K rwdata, 7112K rodata, 1216K init, 379K bss, 51272K reserved, 32768K cma-reserved)
[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1
[ 0.000000] Preemptible hierarchical RCU implementation.
[ 0.000000] RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=2.
[ 0.000000] Tasks RCU enabled.
从log可以看出,这里最后的4个参数完全一致.