@Eric Postpischil
In fact, the compiler will not know.
int func1(int *);
int func2(int);
int entry() {
int arr[10];
{
int _arr[10];
func1(_arr);
__builtin_memcpy(arr, _arr, sizeof(arr));
}
for (int i = 0; i < sizeof(arr) / sizeof(int); i++) {
func2(arr[i]);
}
return 0;
}
.file "example.c"
# GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu)
# compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables
.text
.p2align 4
.globl entry
.type entry, @function
entry:
pushq %rbp #
pushq %rbx #
subq $104, %rsp #,
# /app/example.c:12: func1(_arr);
leaq 48(%rsp), %rdi #, tmp103
movq %rsp, %rbx #, ivtmp.11
leaq 40(%rsp), %rbp #, _19
call func1 #
# /app/example.c:14: __builtin_memcpy(arr, _arr, sizeof(arr));
movdqa 48(%rsp), %xmm0 # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&_arr]
movq 80(%rsp), %rax # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&_arr]
movaps %xmm0, (%rsp) # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&arr]
movdqa 64(%rsp), %xmm0 # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&_arr]
movq %rax, 32(%rsp) # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&arr]
movaps %xmm0, 16(%rsp) # MEM <unsigned char[40]> [(char * {ref-all})&_arr], MEM <unsigned char[40]> [(char * {ref-all})&arr]
.p2align 4
.p2align 3
.L2:
# /app/example.c:18: func2(arr[i]);
movl (%rbx), %edi # MEM[(int *)_17], MEM[(int *)_17]
# /app/example.c:17: for (int i = 0; i < sizeof(arr) / sizeof(int); i++) {
addq $4, %rbx #, ivtmp.11
# /app/example.c:18: func2(arr[i]);
call func2 #
# /app/example.c:17: for (int i = 0; i < sizeof(arr) / sizeof(int); i++) {
cmpq %rbp, %rbx # _19, ivtmp.11
jne .L2 #,
# /app/example.c:22: }
addq $104, %rsp #,
xorl %eax, %eax #
popq %rbx #
popq %rbp #
ret
.size entry, .-entry
.ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0"
.section .note.GNU-stack,"",@progbits
The compiler will still allocate space to save it.
The best optimization method is to dereference each time func2(arr[i]); is called.
The compiler does indeed have the ability to do it.
int func1(int *);
int func2(int);
__attribute__((malloc)) int *func3();
int entry() {
int *arr = func3();
for (int i = 0; i < 10; i++) {
func2(arr[i]);
}
return 0;
}
.file "example.c"
# GNU C23 (Compiler-Explorer-Build-gcc--binutils-2.44) version 15.2.0 (x86_64-linux-gnu)
# compiled by GNU C version 11.4.0, GMP version 6.2.1, MPFR version 4.1.0, MPC version 1.2.1, isl version isl-0.24-GMP
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -mtune=generic -march=x86-64 -g -g0 -Ofast -fno-asynchronous-unwind-tables
.text
.p2align 4
.globl entry
.type entry, @function
entry:
pushq %rbp #
pushq %rbx #
subq $8, %rsp #,
# /app/example.c:9: int *arr = func3();
call func3 #
movq %rax, %rbx # ivtmp.10, ivtmp.10
leaq 40(%rax), %rbp #, _20
.p2align 4
.p2align 3
.L2:
# /app/example.c:12: func2(arr[i]);
movl (%rbx), %edi # MEM[(int *)_18], MEM[(int *)_18]
# /app/example.c:11: for (int i = 0; i < 10; i++) {
addq $4, %rbx #, ivtmp.10
# /app/example.c:12: func2(arr[i]);
call func2 #
# /app/example.c:11: for (int i = 0; i < 10; i++) {
cmpq %rbp, %rbx # _20, ivtmp.10
jne .L2 #,
# /app/example.c:16: }
addq $8, %rsp #,
xorl %eax, %eax #
popq %rbx #
popq %rbp #
ret
.size entry, .-entry
.ident "GCC: (Compiler-Explorer-Build-gcc--binutils-2.44) 15.2.0"
.section .note.GNU-stack,"",@progbits