variable-argument-lists in AARCH64
Table of Contents
Variadic functions
定义: https://en.cppreference.com/w/c/variadic
这类函数,的特点在于,参数的个数是不固定的,具体有多少个参数,实际上是通过调用者来决定的。被调用的函数是不知道具体会传递多少个参数.
资料
AARCH64 APCS规范中已经对可变参数函数做了详细的说明,下面就以一个实际的例子来看一下,具体的过程
通过资料,我们大概可以了解到,可变参数函数在使用时,需要使用va_list的这样一个结构体,函数在调用时,通过一系列的宏来准备,获取参数.
下边是va_arg的伪代码
type va_arg (va_list ap, type){int nreg, offs;if (type passed in general registers) {offs = ap.__gr_offs;if (offs >= 0)goto on_stack; // reg save area emptyif (alignof(type) > 8)offs = (offs + 15) & -16; // round upnreg = (sizeof(type) + 7) / 8;ap.__gr_offs = offs + (nreg * 8);if (ap.__gr_offs > 0)goto on_stack; // overflowed reg save area#ifdef BIG_ENDIANif (classof(type) != "aggregate" && sizeof(type) < 8)offs += 8 - sizeof(type);#endifreturn *(type *)(ap.__gr_top + offs);} else if (type is an HFA or an HVA) {type ha; // treat as "struct {ftype field[n];}"offs = ap.__vr_offs;if (offs >= 0)goto on_stack; // reg save area emptynreg = sizeof(type) / sizeof(ftype);ap.__vr_offs = offs + (nreg * 16);if (ap.__vr_offs > 0)goto on_stack; // overflowed reg save area#ifdef BIG_ENDIANif (sizeof(ftype) < 16)offs += 16 - sizeof(ftype);#endiffor (i = 0; i < nreg; i++, offs += 16)ha.field[i] = *((ftype *)(ap.__vr_top + offs));return ha;} else if (type passed in fp/simd registers) {offs = ap.__vr_offs;if (offs >= 0)goto on_stack; // reg save area emptynreg = (sizeof(type) + 15) / 16;ap.__vr_offs = offs + (nreg * 16);if (ap.__vr_offs > 0)goto on_stack; // overflowed reg save area#ifdef BIG_ENDIANif (classof(type) != "aggregate" && sizeof(type) < 16)offs += 16 - sizeof(type);#endifreturn *(type *)(ap.__vr_top + offs);}on_stack:intptr_t arg = ap.__stack;if (alignof(type) > 8)arg = (arg + 15) & -16;ap.__stack = (void *)((arg + sizeof(type) + 7) & -8);#ifdef BIG_ENDIANif (classof(type) != "aggregate" && sizeof(type) < 8)arg += 8 - sizeof(type);#endifreturn *(type *)arg;}
例子
本文章使用Linux Kernel中的printk为研究对象
< 7个参数, 以两个参数为例
Caller代码
__mdiobus_registerbus->state = MDIOBUS_REGISTERED;pr_info("%s: probed\n", bus->name);
Caller disassemble
下面就是调用的地方,从汇编代码上来看,这个地方并没有什么特别之处,这个东西在调用之前,x0为格式化输出字符串,x1是bus->name的参数,和普通的参数传递方法完全一致.
419420 bus->state = MDIOBUS_REGISTERED;0xffff0000086bb8d0 <+432>: ldr x1, [x20, #8]0xffff0000086bb8d4 <+436>: mov w0, #0x2 // #20xffff0000086bb8d8 <+440>: str w0, [x20, #152]421 pr_info("%s: probed\n", bus->name);0xffff0000086bb8dc <+444>: adrp x0, 0xffff000008e970000xffff0000086bb8e0 <+448>: add x0, x0, #0xaf00xffff0000086bb8e4 <+452>: bl 0xffff000008127454 <printk>(gdb) x/s $x00xffff000008e97af0: "\001\066libphy: %s: probed\n"(gdb) x/s $x10xffff000008e98080: "Fixed MDIO Bus"(gdb)
上面例子是内核中__mdiobus_register函数的片段
Callee disassemble
(gdb) disassemble /sDump of assembler code for function printk:kernel/printk/printk.c:1990 {=> 0xffff000008127454 <+0>: stp x29, x30, [sp, #-176]! sp = 0xffff00000805bcf00xffff000008127458 <+4>: mov w8, #0xffffffc8 // #-56 sp = 0xFFFF00000805bc400xffff00000812745c <+8>: mov x29, sp x29 = sp, fp = 0xFFFF00000805bc400xffff000008127460 <+12>: add x9, sp, #0x70 x9= 0xFFFF00000805bcb00xffff000008127464 <+16>: add x10, sp, #0xb0 x10 = 0xffff00000805bcf0 i.e. stack_top0xffff000008127468 <+20>: str x19, [sp, #16] backup x19 to stack Local Variables0xffff00000812746c <+24>: adrp x19, 0xffff0000092b9000 <page_wait_table+5376>0xffff000008127470 <+28>: add x19, x19, #0x6c8 x19 = address of __stack_chk_guard0xffff000008127474 <+32>: stp x10, x10, [sp, #72] prepare va_list args0xffff000008127478 <+36>: str x9, [sp, #88] save x9 to 0xFFFF00000805BC980xffff00000812747c <+40>: ldr x9, [x19] load stack guard magic value to x9.0xffff000008127480 <+44>: str x9, [sp, #104] store stack guard magic value to stack on 0xFFFF00000805BCA8.0xffff000008127484 <+48>: mov x9, #0x0 // #00xffff000008127488 <+52>: stp w8, wzr, [sp, #96] 设置初始的__gr_offs0xffff00000812748c <+56>: ldp x8, x9, [sp, #72]0xffff000008127490 <+60>: stp x8, x9, [sp, #32]0xffff000008127494 <+64>: ldp x8, x9, [sp, #88]0xffff000008127498 <+68>: stp x1, x2, [sp, #120] 将x1 ~ x7放入GP Arg Save Area的区域0xffff00000812749c <+72>: add x1, sp, #0x200xffff0000081274a0 <+76>: stp x8, x9, [sp, #48]0xffff0000081274a4 <+80>: stp x3, x4, [sp, #136]0xffff0000081274a8 <+84>: stp x5, x6, [sp, #152]0xffff0000081274ac <+88>: str x7, [sp, #168] 已经将GP Arg Save Area设置好,va_list args参数已经处理好了。(gdb) i r pcpc 0xffff0000081274b0 0xffff0000081274b0 <printk+92>(gdb) p args$6 = {__stack = 0xffff00000805bcf0,__gr_top = 0xffff00000805bcf0,__vr_top = 0xffff00000805bcb0,__gr_offs = -56,__vr_offs = 0}0xffff0000081274b0 <+92>: bl 0xffff000008127e28 <vprintk_func>1996 va_end(args);19971998 return r;0xffff0000081274b4 <+96>: ldr x2, [sp, #104]0xffff0000081274b8 <+100>: ldr x1, [x19]0xffff0000081274bc <+104>: eor x1, x2, x10xffff0000081274c0 <+108>: cbz x1, 0xffff0000081274c8 <printk+116>0xffff0000081274c4 <+112>: bl 0xffff0000080d3d48 <__stack_chk_fail>0xffff0000081274c8 <+116>: ldr x19, [sp, #16]0xffff0000081274cc <+120>: ldp x29, x30, [sp], #1760xffff0000081274d0 <+124>: retEnd of assembler dump.
从上面的例子中,可以看出来,在参数小于7个时,编译器会将参数全部存入GP Arg Save Area的区域,并传递给vprintk_func来处理.
下边的函数通过va_list args这个参数机可以遍历所有x1 ~ x7的参数了。
参数个数大于7
同样的,取内核中的例子:
Caller 代码
void __init mem_init_print_info(const char *str){pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"#ifdef CONFIG_HIGHMEM", %luK highmem"#endif"%s%s)\n",nr_free_pages() << (PAGE_SHIFT - 10),physpages << (PAGE_SHIFT - 10),codesize >> 10, datasize >> 10, rosize >> 10,(init_data_size + init_code_size) >> 10, bss_size >> 10,(physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),totalcma_pages << (PAGE_SHIFT - 10),#ifdef CONFIG_HIGHMEMtotalhigh_pages << (PAGE_SHIFT - 10),#endifstr ? ", " : "", str ? str : "");}
这段内存初始化时的打印,传递了非常多的参数,就以这个作为研究对象吧
Callee va_list
上面的例子中已经简单的分析过printk的汇编代码,这里不再赘述,直接去看va_list的值
#0 0xffff0000081274b0 in printk (fmt=0xffff000008e04b68 "\001\066Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved%s%s)\n") at kernel/printk/printk.c:1995args = {__stack = 0xffff0000092b3f00,__gr_top = 0xffff0000092b3f00,__vr_top = 0xffff0000092b3ec0,__gr_offs = -56,__vr_offs = 0}r = <optimized out>#1 0xffff000009196994 in mem_init_print_info (str=0xffff000008e0e090 "") at ./include/linux/vmstat.h:184physpages = <optimized out>codesize = <optimized out>datasize = <optimized out>rosize = <optimized out>bss_size = <optimized out>init_code_size = <optimized out>init_data_size = <optimized out>#2 0xffff000009185e9c in mem_init () at arch/arm64/mm/init.c:608No locals.#3 0xffff000009180b90 in mm_init () at init/main.c:520No locals.#4 start_kernel () at init/main.c:590command_line = 0xffff000009226028 <boot_command_line> "root=/dev/ram0 console=ttyAMA0"after_dashes = <optimized out>#5 0x0000000000000000 in ?? ()(gdb) i r pcpc 0xffff0000081274b0 0xffff0000081274b0 <printk+92>(gdb) x/16gx args.__stack0xffff0000092b3f00: 0x000000000000c848 0x0000000000008000 分别是第8和第9个参数0xffff0000092b3f10: 0xffff000008e0e090 0xffff000008e0e090 第10和第11个参数0xffff0000092b3f20: 0xffff0000092b3f70 0xffff000009185e9c0xffff0000092b3f30: 0xffff000009407000 0xffff0000092d55b80xffff0000092b3f40: 0xffff0000092b96c8 0xffff0000092b96c00xffff0000092b3f50: 0xffff800005fffe00 0xffff0000092260280xffff0000092b3f60: 0x0000000000000000 0x07bd33f3322245000xffff0000092b3f70: 0xffff0000092b3fa0 0xffff000009180b90(gdb) x/s 0xffff000008e0e0900xffff000008e0e090: ""
从上边准备的va_list可以看到,在传递多余7个参数时,参数存放的位址还是和正常的函数调用一样,存放在Caller的Stack Arg Area中
打印出来的log
[ 0.000000] Kernel command line: root=/dev/ram0 console=ttyAMA0[ 0.000000] Memory: 47032K/131072K available (10236K kernel code, 1352K rwdata, 7112K rodata, 1216K init, 379K bss, 51272K reserved, 32768K cma-reserved)[ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1[ 0.000000] Preemptible hierarchical RCU implementation.[ 0.000000] RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=2.[ 0.000000] Tasks RCU enabled.
从log可以看出,这里最后的4个参数完全一致.