first working executable!

This commit is contained in:
pommicket 2022-02-10 16:06:17 -05:00
parent b88de92fc7
commit d8bb5b8957
4 changed files with 134 additions and 13 deletions

View file

@ -1,10 +1,10 @@
; CALLING CONVENTION: ; CALLING CONVENTION:
; Here is the process for calling a function: ; Here is the process for calling a function:
; - the caller pushes the arguments on to the stack, from right to left ; - the caller pushes the arguments on to the stack, from right to left
; - the caller subtracts sizeof(return type) from rsp ; - the caller subtracts sizeof(return type) from rsp, rounded up to the nearest 8 bytes
; - the caller calls the function ; - the caller calls the function
; - the caller stores away the return value ; - the caller stores away the return value
; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp ; - the caller adds (sizeof(return type) + sizeof arg0 + ... + sizeof argn) to rsp - where each sizeof is rounded up to the nearest 8 bytes
; STACK LAYOUT: ; STACK LAYOUT:
; arg n ; arg n
; ... ; ...
@ -117,6 +117,45 @@ function emit_ret
code_output += 1 code_output += 1
return return
function emit_mov_qword_rsp_plus_imm32_rax
argument imm32
; 48 89 84 24 IMM32
*4code_output = 0x24848948
code_output += 4
*4code_output = imm32
code_output += 4
return
function emit_mov_rax_qword_rsp_plus_imm32
argument imm32
; 48 8b 84 24 IMM32
*4code_output = 0x24848b48
code_output += 4
*4code_output = imm32
code_output += 4
return
function emit_mov_rax_imm64
argument imm64
; 48 b8 IMM64
*2code_output = 0xb848
code_output += 2
*8code_output = imm64
code_output += 8
return
function emit_call_rax
; ff d0
*2code_output = 0xd0ff
code_output += 2
return
function emit_syscall
; 0f 05
*2code_output = 0x050f
code_output += 2
return
; make sure you put the return value in the proper place before calling this ; make sure you put the return value in the proper place before calling this
function generate_return function generate_return
emit_mov_reg(REG_RSP, REG_RBP) emit_mov_reg(REG_RSP, REG_RBP)
@ -193,7 +232,63 @@ function generate_functions
byte 32 byte 32
byte 0 byte 0
; emit ELF header and code.
function generate_code function generate_code
code_output = output_file_data
emit_qword(0x00010102464c457f) ; elf identifier, 64-bit little endian, ELF version 1
emit_qword(0) ; reserved
emit_word(2) ; executable file
emit_word(0x3e) ; architecture x86-64
emit_dword(1) ; ELF version 1
emit_qword(ENTRY_ADDR) ; entry point
emit_qword(0x40) ; program header table offset
emit_qword(0) ; section header table offset
emit_dword(0) ; flags
emit_word(0x40) ; size of header
emit_word(0x38) ; size of program header
emit_word(3) ; # of program headers = 3 (code, rwdata, rodata)
emit_word(0) ; size of section header
emit_word(0) ; # of section headers
emit_word(0) ; index of .shstrtab
; from /usr/include/elf.h:
;#define PF_X (1 << 0) /* Segment is executable */
;#define PF_W (1 << 1) /* Segment is writable */
;#define PF_R (1 << 2) /* Segment is readable */
; program header 1 (code)
emit_dword(1) ; loadable segment
emit_dword(1) ; execute only
emit_qword(ENTRY_ADDR) ; offset in file
emit_qword(ENTRY_ADDR) ; virtual address
emit_qword(0) ; physical address
emit_qword(TOTAL_CODE_SIZE) ; size in executable file
emit_qword(TOTAL_CODE_SIZE) ; size when loaded into memory
emit_qword(4096) ; alignment
; program header 2 (rodata)
emit_dword(1) ; loadable segment
emit_dword(4) ; read only
emit_qword(RODATA_ADDR) ; offset in file
emit_qword(RODATA_ADDR) ; virtual address
emit_qword(0) ; physical address
emit_qword(RODATA_SIZE) ; size in executable file
emit_qword(RODATA_SIZE) ; size when loaded into memory
emit_qword(4096) ; alignment
; program header 3 (rwdata)
emit_dword(1) ; loadable segment
emit_dword(6) ; read/write
emit_qword(RWDATA_ADDR) ; offset in file
emit_qword(RWDATA_ADDR) ; virtual address
emit_qword(0) ; physical address
emit_qword(RWDATA_SIZE) ; size in executable file
emit_qword(RWDATA_SIZE) ; size when loaded into memory
emit_qword(4096) ; alignment
local p_func local p_func
code_output = output_file_data + FUNCTIONS_ADDR code_output = output_file_data + FUNCTIONS_ADDR
codegen_second_pass = 0 codegen_second_pass = 0
@ -209,9 +304,32 @@ function generate_code
; on entry, we will have: ; on entry, we will have:
; argc = *rsp ; argc = *rsp
; argv = rsp + 8 ; argv = rsp + 8
code_output = output_file_data + ENTRY_ADDR
; add rsp, 8
emit_add_rsp_imm32(8)
; mov rax, rsp (set rax to argv)
emit_mov_reg(REG_RAX, REG_RSP)
; sub rsp, 32 (undo add rsp, 8 from before and add space for argv, argc, return value)
emit_sub_rsp_imm32(32)
; mov [rsp+16], rax (put argv in the right place)
emit_mov_qword_rsp_plus_imm32_rax(16)
; mov rax, [rsp+24] (set rax to argc)
emit_mov_rax_qword_rsp_plus_imm32(24)
; mov [rsp+8], rax (put argc in the right place)
emit_mov_qword_rsp_plus_imm32_rax(8)
; mov rax, main
emit_mov_rax_imm64(main_addr)
; call rax
emit_call_rax()
; mov rax, [rsp]
emit_mov_rax_qword_rsp_plus_imm32(0)
; mov rdi, rax
emit_mov_reg(REG_RDI, REG_RAX)
; mov rax, 0x3c (SYS_exit)
emit_mov_rax_imm64(0x3c)
; syscall
emit_syscall()
; @TODO
return return
:no_main_function :no_main_function
die(.str_no_main_function) die(.str_no_main_function)

View file

@ -1,15 +1,18 @@
; this is the format of the executables we produce: ; this is the format of the executables we produce:
; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space) ; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space) ; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
; code (functions) 4MB addresses 0x400000-0x7fffff ; functions 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff ; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff ; read-write data 4MB addresses 0xc00000-0xffffff
; note that file offsets and runtime addresses are the same. ; note that file offsets and runtime addresses are the same.
; you should be able to change these constants without breaking anything: ; you should be able to change these constants (in a way that's consistent) without breaking anything:
#define ENTRY_ADDR 0x200000 #define ENTRY_ADDR 0x200000
#define FUNCTIONS_ADDR 0x400000 #define FUNCTIONS_ADDR 0x400000
#define TOTAL_CODE_SIZE 0x600000
#define RODATA_ADDR 0x800000 #define RODATA_ADDR 0x800000
#define RODATA_SIZE 0x400000
#define RWDATA_ADDR 0xc00000 #define RWDATA_ADDR 0xc00000
#define RWDATA_SIZE 0x400000
#define RWDATA_END 0x1000000 #define RWDATA_END 0x1000000
#define EXECUTABLE_SIZE 0x1000000 #define EXECUTABLE_SIZE 0x1000000

View file

@ -13,8 +13,6 @@ struct A { struct B *blah; }
struct B { struct A *blah; } struct B { struct A *blah; }
*/ */
int main(int argc, char **Argv) { int main(int argc, char **argv) {
int i,j; argv+argc;
Argv+argc+i;
j;
} }

View file

@ -130,12 +130,14 @@ ax bx cx dx sp bp si di
│ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │ │ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │
│ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │ │ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │
│ │ │ into rax (note: IMM32 may be negative) │ │ │ │ into rax (note: IMM32 may be negative) │
lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 mov rax, [rsp+IMM32] │ 48 8b 84 24 IMM32 │ load 8 bytes from address rsp+IMM32
lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │ │ into rax (note: IMM32 may be negative)
│ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │ │ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │
│ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │ │ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │
│ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │ │ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │
│ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │ │ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │
│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │
│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │
│ neg rax │ 48 f7 d8 │ set rax to -rax │ │ neg rax │ 48 f7 d8 │ set rax to -rax │
│ add rax, rbx │ 48 01 d8 │ add rbx to rax │ │ add rax, rbx │ 48 01 d8 │ add rbx to rax │
│ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │ │ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │