start codegen
This commit is contained in:
parent
d74dc53b0b
commit
3d44eba388
5 changed files with 210 additions and 4 deletions
195
05/codegen.b
Normal file
195
05/codegen.b
Normal file
|
@ -0,0 +1,195 @@
|
||||||
|
; CALLING CONVENTION:
|
||||||
|
; arguments are pushed onto the stack by the caller, from right to left
|
||||||
|
; caller must also reserve space on stack for return value
|
||||||
|
; so the function puts the return value at [rbp+8] (+8 for stored return address)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
global code_output
|
||||||
|
global codegen_second_pass ; = 0 on first global pass, 1 on second global pass
|
||||||
|
global functions_addresses ; ident list of addresses
|
||||||
|
global functions_labels ; ident list of ident lists of label addresses
|
||||||
|
global curr_function_labels ; ident list of labels for current function (written to in 1st pass, read from in 2nd pass)
|
||||||
|
|
||||||
|
#define REG_RAX 0
|
||||||
|
#define REG_RBX 3
|
||||||
|
#define REG_RCX 1
|
||||||
|
#define REG_RDX 2
|
||||||
|
#define REG_RSP 4
|
||||||
|
#define REG_RBP 5
|
||||||
|
#define REG_RSI 6
|
||||||
|
#define REG_RDI 7
|
||||||
|
|
||||||
|
function emit_byte
|
||||||
|
argument byte
|
||||||
|
*1code_output = byte
|
||||||
|
code_output += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_bytes
|
||||||
|
argument bytes
|
||||||
|
argument count
|
||||||
|
memcpy(code_output, bytes, count)
|
||||||
|
code_output += count
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_word
|
||||||
|
argument word
|
||||||
|
*2code_output = word
|
||||||
|
code_output += 2
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_dword
|
||||||
|
argument word
|
||||||
|
*4code_output = word
|
||||||
|
code_output += 4
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_qword
|
||||||
|
argument word
|
||||||
|
*8code_output = word
|
||||||
|
code_output += 8
|
||||||
|
return
|
||||||
|
|
||||||
|
; e.g. emit_mov_reg(REG_RAX, REG_RBX) emits mov rax, rbx
|
||||||
|
function emit_mov_reg
|
||||||
|
argument dest
|
||||||
|
argument src
|
||||||
|
local n
|
||||||
|
|
||||||
|
;48 89 (DEST|SRC<<3|0xc0)
|
||||||
|
*2code_output = 0x8948
|
||||||
|
code_output += 2
|
||||||
|
n = 0xc0 | dest
|
||||||
|
n |= src < 3
|
||||||
|
*1code_output = n
|
||||||
|
code_output += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
function emit_sub_rsp_imm32
|
||||||
|
argument imm32
|
||||||
|
;48 81 ec IMM32
|
||||||
|
*2code_output = 0x8148
|
||||||
|
code_output += 2
|
||||||
|
*1code_output = 0xec
|
||||||
|
code_output += 1
|
||||||
|
*4code_output = imm32
|
||||||
|
code_output += 4
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_mov_qword_rsp_rbp
|
||||||
|
; 48 89 2c 24
|
||||||
|
*4code_output = 0x242c8948
|
||||||
|
code_output += 4
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_mov_rbp_qword_rsp
|
||||||
|
; 48 8b 2c 24
|
||||||
|
*4code_output = 0x242c8b48
|
||||||
|
code_output += 4
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_add_rsp_imm32
|
||||||
|
argument imm32
|
||||||
|
;48 81 c4 IMM32
|
||||||
|
*2code_output = 0x8148
|
||||||
|
code_output += 2
|
||||||
|
*1code_output = 0xc4
|
||||||
|
code_output += 1
|
||||||
|
*4code_output = imm32
|
||||||
|
code_output += 4
|
||||||
|
return
|
||||||
|
|
||||||
|
function emit_ret
|
||||||
|
*1code_output = 0xc3
|
||||||
|
code_output += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
; make sure you put the return value in the proper place before calling this
|
||||||
|
function generate_return
|
||||||
|
emit_mov_reg(REG_RSP, REG_RBP)
|
||||||
|
emit_mov_rbp_qword_rsp()
|
||||||
|
emit_add_rsp_imm32(8)
|
||||||
|
emit_ret()
|
||||||
|
return
|
||||||
|
|
||||||
|
function generate_statement
|
||||||
|
argument statement
|
||||||
|
; @TODO
|
||||||
|
return
|
||||||
|
|
||||||
|
function generate_function
|
||||||
|
argument function_name
|
||||||
|
argument function_statement
|
||||||
|
local out0
|
||||||
|
|
||||||
|
if codegen_second_pass != 0 goto genf_second_pass
|
||||||
|
curr_function_labels = ident_list_create(4000) ; ~ 200 labels per function should be plenty
|
||||||
|
ident_list_add(functions_labels, function_name, curr_function_labels)
|
||||||
|
goto genf_cont
|
||||||
|
:genf_second_pass
|
||||||
|
curr_function_labels = ident_list_lookup(functions_labels, function_name)
|
||||||
|
:genf_cont
|
||||||
|
|
||||||
|
; prologue
|
||||||
|
emit_sub_rsp_imm32(8)
|
||||||
|
emit_mov_qword_rsp_rbp()
|
||||||
|
emit_mov_reg(REG_RBP, REG_RSP)
|
||||||
|
|
||||||
|
generate_statement(function_statement)
|
||||||
|
|
||||||
|
; implicit return at end of function
|
||||||
|
generate_return()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
function generate_functions
|
||||||
|
local addr
|
||||||
|
local c
|
||||||
|
local p
|
||||||
|
local function_name
|
||||||
|
|
||||||
|
function_name = function_statements
|
||||||
|
|
||||||
|
:genfunctions_loop
|
||||||
|
if *1function_name == 0 goto genfunctions_loop_end
|
||||||
|
addr = code_output - output_file_data ; address of this function
|
||||||
|
if codegen_second_pass != 0 goto genfs_check_addr
|
||||||
|
; first pass; record address of function
|
||||||
|
ident_list_add(functions_addresses, function_name, addr)
|
||||||
|
goto genfs_cont
|
||||||
|
:genfs_check_addr
|
||||||
|
c = ident_list_lookup(functions_addresses, function_name)
|
||||||
|
if c != addr goto function_addr_mismatch
|
||||||
|
goto genfs_cont
|
||||||
|
:genfs_cont
|
||||||
|
p = memchr(function_name, 0)
|
||||||
|
p += 1
|
||||||
|
generate_function(function_name, p)
|
||||||
|
function_name = p + 8
|
||||||
|
goto genfunctions_loop
|
||||||
|
:genfunctions_loop_end
|
||||||
|
return
|
||||||
|
|
||||||
|
:function_addr_mismatch
|
||||||
|
; address of function on 2nd pass doesn't line up with 1st pass
|
||||||
|
fputs(2, .str_function_addr_mismatch)
|
||||||
|
fputs(2, function_name)
|
||||||
|
exit(1)
|
||||||
|
:str_function_addr_mismatch
|
||||||
|
string Function address on first pass doesn't match 2nd pass:
|
||||||
|
byte 32
|
||||||
|
byte 0
|
||||||
|
|
||||||
|
function generate_code
|
||||||
|
local p_func
|
||||||
|
code_output = output_file_data + FUNCTIONS_ADDR
|
||||||
|
codegen_second_pass = 0
|
||||||
|
generate_functions()
|
||||||
|
code_output = output_file_data + FUNCTIONS_ADDR
|
||||||
|
codegen_second_pass = 1
|
||||||
|
generate_functions()
|
||||||
|
; generate code at the entry point of the executable
|
||||||
|
; @TODO
|
||||||
|
return
|
|
@ -1,10 +1,13 @@
|
||||||
; this is the format of the executables we produce:
|
; this is the format of the executables we produce:
|
||||||
; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
|
; elf header 2MB addresses 0x000000-0x200000 (no, it won't actually take up that much space)
|
||||||
; code 4MB addresses 0x400000-0x7fffff
|
; entry point 2MB addresses 0x200000-0x3fffff this is where we put the code to call main(), etc. (again, it won't actually take up that much space)
|
||||||
|
; code (functions) 4MB addresses 0x400000-0x7fffff
|
||||||
; read-only data 4MB addresses 0x800000-0xbfffff
|
; read-only data 4MB addresses 0x800000-0xbfffff
|
||||||
; read-write data 4MB addresses 0xc00000-0xffffff
|
; read-write data 4MB addresses 0xc00000-0xffffff
|
||||||
; note that file offsets and runtime addresses are the same.
|
; note that file offsets and runtime addresses are the same.
|
||||||
; you should be able to change these constants without breaking anything:
|
; you should be able to change these constants without breaking anything:
|
||||||
|
#define ENTRY_ADDR 0x200000
|
||||||
|
#define FUNCTIONS_ADDR 0x400000
|
||||||
#define RODATA_ADDR 0x800000
|
#define RODATA_ADDR 0x800000
|
||||||
#define RWDATA_ADDR 0xc00000
|
#define RWDATA_ADDR 0xc00000
|
||||||
#define RWDATA_END 0x1000000
|
#define RWDATA_END 0x1000000
|
||||||
|
|
|
@ -81,6 +81,7 @@ global function_param_has_no_name
|
||||||
#include preprocess.b
|
#include preprocess.b
|
||||||
#include tokenize.b
|
#include tokenize.b
|
||||||
#include parse.b
|
#include parse.b
|
||||||
|
#include codegen.b
|
||||||
|
|
||||||
function types_init
|
function types_init
|
||||||
argument _types
|
argument _types
|
||||||
|
@ -235,6 +236,8 @@ function main
|
||||||
structure_locations = ident_list_create(2000000)
|
structure_locations = ident_list_create(2000000)
|
||||||
global_variables = ident_list_create(400000)
|
global_variables = ident_list_create(400000)
|
||||||
function_statements = ident_list_create(800000)
|
function_statements = ident_list_create(800000)
|
||||||
|
functions_addresses = ident_list_create(800000)
|
||||||
|
functions_labels = ident_list_create(800000)
|
||||||
function_types = ident_list_create(800000)
|
function_types = ident_list_create(800000)
|
||||||
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
||||||
|
|
||||||
|
@ -285,6 +288,7 @@ function main
|
||||||
; NOTE: do NOT free pptokens; identifiers still reference them.
|
; NOTE: do NOT free pptokens; identifiers still reference them.
|
||||||
|
|
||||||
parse_tokens(tokens)
|
parse_tokens(tokens)
|
||||||
|
generate_code()
|
||||||
|
|
||||||
p = output_file_data + RODATA_ADDR
|
p = output_file_data + RODATA_ADDR
|
||||||
munmap(output_file_data, RWDATA_END)
|
munmap(output_file_data, RWDATA_END)
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
#include "tests/parse_stb_truetype.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
; @NONSTANDARD:
|
; @NONSTANDARD:
|
||||||
; the following does not work:
|
; the following does not work:
|
||||||
|
@ -14,3 +12,6 @@ This needs to be fixed because otherwise you can't do:
|
||||||
struct A { struct B *blah; }
|
struct A { struct B *blah; }
|
||||||
struct B { struct A *blah; }
|
struct B { struct A *blah; }
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
}
|
||||||
|
|
|
@ -105,6 +105,9 @@ In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
|
||||||
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
|
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
|
||||||
|
|
||||||
```
|
```
|
||||||
|
ax bx cx dx sp bp si di
|
||||||
|
0 3 1 2 4 5 6 7
|
||||||
|
|
||||||
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
|
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
|
||||||
│ Instruction │ Encoding │ Description │
|
│ Instruction │ Encoding │ Description │
|
||||||
├──────────────────────┼───────────────────┼────────────────────────────────────────┤
|
├──────────────────────┼───────────────────┼────────────────────────────────────────┤
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue