2022-02-04 19:31:45 -05:00
|
|
|
; @TODO: if we have,
|
|
|
|
; 1 extern int blah;
|
|
|
|
; 2 ...
|
|
|
|
; n int blah;
|
|
|
|
; give `blah` an address on line 1, then ignore declaration on line n
|
|
|
|
|
|
|
|
|
2022-01-07 23:32:27 -05:00
|
|
|
; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
|
|
|
|
byte 0x48
|
|
|
|
byte 0x81
|
|
|
|
byte 0xc4
|
|
|
|
byte 40
|
|
|
|
byte 0
|
|
|
|
byte 0
|
|
|
|
byte 0
|
|
|
|
goto main
|
|
|
|
|
2022-01-08 14:37:39 -05:00
|
|
|
|
|
|
|
global object_macros_size
|
|
|
|
global function_macros_size
|
|
|
|
; these are allocated in main()
|
|
|
|
global object_macros
|
|
|
|
global function_macros
|
|
|
|
|
2022-01-24 13:22:16 -05:00
|
|
|
; powers of 10, stored in the following format:
|
|
|
|
; ulong significand
|
|
|
|
; ulong exponent
|
|
|
|
; where for i = -1023..1023, powers_of_10 + 16*i points to an entry where
|
|
|
|
; 10^i = significand * 2^exponent
|
|
|
|
global powers_of_10
|
|
|
|
|
|
|
|
global types
|
|
|
|
global types_bytes_used
|
|
|
|
; ident list of type IDs
|
|
|
|
global typedefs
|
2022-01-24 20:53:37 -05:00
|
|
|
; ident list of enum values
|
|
|
|
global enumerators
|
2022-01-26 18:00:47 -05:00
|
|
|
; struct/unions
|
|
|
|
; an ident list of pointers to struct data
|
|
|
|
; each struct data is an ident list of 64-bit values, (type << 32) | offset
|
|
|
|
; for unions, offset will always be 0.
|
2022-01-25 18:28:26 -05:00
|
|
|
global structures
|
|
|
|
global structures_bytes_used
|
2022-01-27 18:52:39 -05:00
|
|
|
; file offset/runtime address to write next piece of read-only data; initialized in main
|
|
|
|
global rodata_end_addr
|
2022-01-28 15:07:23 -05:00
|
|
|
; file offset/runtime address to write next piece of read-write data; initialized in main
|
|
|
|
global rwdata_end_addr
|
2022-01-27 18:52:39 -05:00
|
|
|
global output_file_data
|
2022-01-29 11:08:57 -05:00
|
|
|
; ident list of global variables. each one is stored as
|
|
|
|
; (type << 32) | address
|
2022-01-28 15:07:23 -05:00
|
|
|
global global_variables
|
2022-02-06 18:24:52 -05:00
|
|
|
; ident list of functions. each entry is a pointer two statements
|
|
|
|
; - the first one is a STATEMENT_LOCAL_DECLARATION (with dat2=dat3=dat4=0), which is only there to set rsp properly because of parameters
|
|
|
|
; - the second one is the function body (a STATEMENT_BLOCK)
|
2022-02-03 22:53:38 -05:00
|
|
|
global function_statements
|
|
|
|
; statement_datas[0] = pointer to statement data for block-nesting depth 0 (i.e. function bodies)
|
|
|
|
; statement_datas[1] = pointer to statement data for block-nesting depth 1 (blocks inside functions)
|
|
|
|
; statement_datas[2] = pointer to statement data for block-nesting depth 2 (blocks inside blocks inside functions)
|
2022-02-04 23:21:33 -05:00
|
|
|
; etc. up to statement_datas[BLOCK_DEPTH_LIMIT-1]
|
2022-02-03 22:53:38 -05:00
|
|
|
; these have to be separated for reasons™
|
|
|
|
global statement_datas
|
|
|
|
global statement_datas_ends
|
2022-02-05 18:41:01 -05:00
|
|
|
; ident lists of (type << 32) | address
|
2022-02-04 23:21:33 -05:00
|
|
|
; block_static_variables[0] = static variables inside this function
|
|
|
|
; block_static_variables[1] = static variables inside this block inside this function
|
|
|
|
; etc.
|
|
|
|
global block_static_variables
|
2022-02-05 18:41:01 -05:00
|
|
|
; ident lists of (type << 32) | rbp offset; one per block depth
|
|
|
|
global local_variables
|
2022-02-04 23:21:33 -05:00
|
|
|
global block_depth
|
2022-02-04 19:31:45 -05:00
|
|
|
global expressions
|
|
|
|
global expressions_end
|
2022-02-06 18:24:52 -05:00
|
|
|
; current rbp offset (where rsp is)
|
|
|
|
global local_var_rbp_offset
|
|
|
|
global function_param_names
|
2022-01-24 13:22:16 -05:00
|
|
|
|
|
|
|
#include util.b
|
|
|
|
#include idents.b
|
|
|
|
#include constants.b
|
|
|
|
#include preprocess.b
|
|
|
|
#include tokenize.b
|
|
|
|
#include parse.b
|
|
|
|
|
|
|
|
function types_init
|
|
|
|
argument _types
|
|
|
|
argument ptypes_bytes_used
|
|
|
|
local i
|
|
|
|
local p
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
p = _types
|
|
|
|
:fill_initial_types_loop
|
|
|
|
*1p = i
|
|
|
|
p += 1
|
|
|
|
i += 1
|
|
|
|
if i <= 16 goto fill_initial_types_loop
|
|
|
|
p = _types + TYPE_POINTER_TO_CHAR
|
|
|
|
*1p = TYPE_POINTER
|
|
|
|
p += 1
|
|
|
|
*1p = TYPE_CHAR
|
|
|
|
p += 1
|
2022-02-06 20:00:30 -05:00
|
|
|
p = _types + TYPE_POINTER_TO_VOID
|
|
|
|
*1p = TYPE_POINTER
|
|
|
|
p += 1
|
|
|
|
*1p = TYPE_VOID
|
|
|
|
p += 1
|
2022-01-24 13:22:16 -05:00
|
|
|
|
|
|
|
*8ptypes_bytes_used = p - types
|
|
|
|
return
|
|
|
|
|
2022-01-14 13:41:44 -05:00
|
|
|
function fprint_token_location
|
|
|
|
argument fd
|
|
|
|
argument token
|
|
|
|
token += 2
|
|
|
|
fprint_filename(fd, *2token)
|
|
|
|
token += 2
|
|
|
|
fputc(fd, ':)
|
|
|
|
fputn(fd, *4token)
|
|
|
|
return
|
|
|
|
|
2022-01-11 00:09:11 -05:00
|
|
|
; accepts EITHER file index OR pointer to filename
|
|
|
|
function fprint_filename
|
|
|
|
argument fd
|
|
|
|
argument file
|
|
|
|
if file ] 65535 goto print_filename_string
|
|
|
|
file = file_get(file)
|
|
|
|
; (fallthrough)
|
|
|
|
:print_filename_string
|
|
|
|
fputs(2, file)
|
|
|
|
return
|
|
|
|
|
|
|
|
; accepts EITHER file index OR pointer to filename
|
2022-01-08 12:15:17 -05:00
|
|
|
function compile_error
|
|
|
|
argument file
|
|
|
|
argument line
|
|
|
|
argument message
|
2022-01-11 00:09:11 -05:00
|
|
|
fprint_filename(2, file)
|
2022-01-08 12:15:17 -05:00
|
|
|
fputc(2, ':)
|
|
|
|
fputn(2, line)
|
2022-01-08 14:37:39 -05:00
|
|
|
fputs(2, .str_error_prefix)
|
2022-01-08 12:15:17 -05:00
|
|
|
fputs(2, message)
|
|
|
|
fputc(2, 10)
|
|
|
|
exit(1)
|
2022-01-09 22:33:33 -05:00
|
|
|
|
2022-01-13 16:12:28 -05:00
|
|
|
function token_error
|
|
|
|
argument token
|
|
|
|
argument message
|
|
|
|
local p
|
|
|
|
local file
|
|
|
|
local line
|
|
|
|
p = token + 2
|
|
|
|
file = *2p
|
|
|
|
p += 2
|
|
|
|
line = *4p
|
|
|
|
compile_error(file, line, message)
|
|
|
|
|
2022-01-11 00:09:11 -05:00
|
|
|
; accepts EITHER file index OR pointer to filename
|
2022-01-09 22:33:33 -05:00
|
|
|
function compile_warning
|
|
|
|
argument file
|
|
|
|
argument line
|
|
|
|
argument message
|
2022-01-11 00:09:11 -05:00
|
|
|
fprint_filename(2, file)
|
2022-01-09 22:33:33 -05:00
|
|
|
fputc(2, ':)
|
|
|
|
fputn(2, line)
|
|
|
|
fputs(2, .str_warning_prefix)
|
|
|
|
fputs(2, message)
|
|
|
|
fputc(2, 10)
|
|
|
|
return
|
2022-01-08 12:15:17 -05:00
|
|
|
|
2022-01-08 14:37:39 -05:00
|
|
|
:str_error_prefix
|
2022-01-08 12:15:17 -05:00
|
|
|
string : Error:
|
|
|
|
byte 32
|
|
|
|
byte 0
|
|
|
|
|
2022-01-09 22:33:33 -05:00
|
|
|
:str_warning_prefix
|
|
|
|
string : Warning:
|
|
|
|
byte 32
|
|
|
|
byte 0
|
|
|
|
|
2022-01-13 16:12:28 -05:00
|
|
|
|
2022-01-07 23:32:27 -05:00
|
|
|
function main
|
|
|
|
argument argv2
|
|
|
|
argument argv1
|
|
|
|
argument argv0
|
|
|
|
argument argc
|
|
|
|
local input_filename
|
|
|
|
local output_filename
|
2022-01-08 12:15:17 -05:00
|
|
|
local pptokens
|
2022-01-10 15:12:24 -05:00
|
|
|
local processed_pptokens
|
2022-01-11 00:09:11 -05:00
|
|
|
local tokens
|
2022-01-13 16:12:28 -05:00
|
|
|
local ast
|
|
|
|
local p
|
2022-02-03 22:53:38 -05:00
|
|
|
local q
|
2022-01-13 16:12:28 -05:00
|
|
|
local i
|
2022-01-27 18:52:39 -05:00
|
|
|
local output_fd
|
2022-02-04 23:21:33 -05:00
|
|
|
local memory
|
2022-01-27 18:52:39 -05:00
|
|
|
|
2022-02-04 23:21:33 -05:00
|
|
|
memory = malloc(4000)
|
|
|
|
statement_datas = memory
|
|
|
|
statement_datas_ends = memory + 400
|
|
|
|
block_static_variables = memory + 800
|
2022-02-05 18:41:01 -05:00
|
|
|
local_variables = memory + 1200
|
2022-02-06 18:24:52 -05:00
|
|
|
function_param_names = memory + 1600
|
2022-02-05 18:41:01 -05:00
|
|
|
|
2022-02-03 22:53:38 -05:00
|
|
|
p = statement_datas
|
|
|
|
q = statement_datas_ends
|
|
|
|
i = 0
|
|
|
|
:statement_datas_loop
|
|
|
|
*8p = malloc(4000000) ; supports 100,000 statements at each level
|
2022-02-04 23:21:33 -05:00
|
|
|
*8q = *8p
|
2022-02-03 22:53:38 -05:00
|
|
|
p += 8
|
|
|
|
q += 8
|
|
|
|
i += 1
|
2022-02-04 23:21:33 -05:00
|
|
|
if i < BLOCK_DEPTH_LIMIT goto statement_datas_loop
|
|
|
|
p = block_static_variables
|
|
|
|
i = 0
|
|
|
|
:bsv_alloc_loop
|
|
|
|
*8p = malloc(24000) ; more than enough memory to hold static variable names/addresses for a particular block
|
|
|
|
p += 8
|
|
|
|
i += 1
|
|
|
|
if i < BLOCK_DEPTH_LIMIT goto bsv_alloc_loop
|
2022-02-05 18:41:01 -05:00
|
|
|
p = local_variables
|
|
|
|
i = 0
|
|
|
|
:lv_alloc_loop
|
|
|
|
*8p = malloc(100000)
|
|
|
|
p += 8
|
|
|
|
i += 1
|
|
|
|
if i < BLOCK_DEPTH_LIMIT goto lv_alloc_loop
|
2022-01-11 17:36:33 -05:00
|
|
|
fill_in_powers_of_10()
|
2022-01-11 15:55:37 -05:00
|
|
|
|
2022-01-18 16:29:48 -05:00
|
|
|
typedefs = ident_list_create(100000)
|
2022-01-25 18:28:26 -05:00
|
|
|
enumerators = ident_list_create(4000000)
|
2022-01-26 18:00:47 -05:00
|
|
|
structures = ident_list_create(4000000)
|
2022-02-03 22:53:38 -05:00
|
|
|
global_variables = ident_list_create(400000)
|
|
|
|
function_statements = ident_list_create(400000)
|
|
|
|
|
|
|
|
function_stmt_data = malloc(800000) ; should be at least 40 bytes * max # of functions
|
2022-01-18 16:29:48 -05:00
|
|
|
|
2022-01-09 12:31:35 -05:00
|
|
|
dat_banned_objmacros = 255
|
|
|
|
dat_banned_fmacros = 255
|
2022-01-09 00:08:29 -05:00
|
|
|
|
2022-01-11 00:09:11 -05:00
|
|
|
file_list = malloc(40000)
|
|
|
|
*1file_list = 255
|
2022-01-08 14:37:39 -05:00
|
|
|
object_macros = malloc(4000000)
|
|
|
|
function_macros = malloc(4000000)
|
2022-02-04 19:31:45 -05:00
|
|
|
expressions = malloc(16000000)
|
|
|
|
expressions_end = expressions
|
2022-01-08 14:37:39 -05:00
|
|
|
|
2022-01-13 16:12:28 -05:00
|
|
|
types = malloc(16000000)
|
2022-01-24 13:22:16 -05:00
|
|
|
types_init(types, &types_bytes_used)
|
2022-01-13 16:12:28 -05:00
|
|
|
|
2022-01-07 23:32:27 -05:00
|
|
|
input_filename = .str_default_input_filename
|
|
|
|
output_filename = .str_default_output_filename
|
|
|
|
if argc == 1 goto have_filenames
|
|
|
|
if argc != 3 goto usage_error
|
|
|
|
input_filename = argv1
|
|
|
|
output_filename = argv2
|
|
|
|
:have_filenames
|
2022-01-27 18:52:39 -05:00
|
|
|
output_fd = open_rw(output_filename, 493)
|
|
|
|
rodata_end_addr = RODATA_ADDR
|
2022-01-28 15:07:23 -05:00
|
|
|
rwdata_end_addr = RWDATA_ADDR
|
2022-01-27 18:52:39 -05:00
|
|
|
|
|
|
|
ftruncate(output_fd, RWDATA_END)
|
|
|
|
output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0)
|
|
|
|
if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed
|
2022-01-13 16:12:28 -05:00
|
|
|
|
2022-01-08 12:15:17 -05:00
|
|
|
pptokens = split_into_preprocessing_tokens(input_filename)
|
2022-01-11 00:09:11 -05:00
|
|
|
;print_pptokens(pptokens)
|
|
|
|
;print_separator()
|
2022-01-10 15:12:24 -05:00
|
|
|
processed_pptokens = malloc(16000000)
|
|
|
|
translation_phase_4(input_filename, pptokens, processed_pptokens)
|
|
|
|
free(pptokens)
|
|
|
|
pptokens = processed_pptokens
|
2022-02-03 22:53:38 -05:00
|
|
|
;print_pptokens(pptokens)
|
|
|
|
;print_separator()
|
2022-01-11 00:09:11 -05:00
|
|
|
;print_object_macros()
|
|
|
|
;print_function_macros()
|
2022-01-11 15:55:37 -05:00
|
|
|
|
2022-01-11 00:09:11 -05:00
|
|
|
tokens = malloc(16000000)
|
2022-01-13 16:12:28 -05:00
|
|
|
p = tokenize(pptokens, tokens, input_filename, 1)
|
2022-01-14 14:02:34 -05:00
|
|
|
print_tokens(tokens, p)
|
2022-02-03 22:53:38 -05:00
|
|
|
print_separator()
|
2022-01-19 11:57:42 -05:00
|
|
|
; NOTE: do NOT free pptokens; identifiers still reference them.
|
2022-01-11 15:55:37 -05:00
|
|
|
|
2022-01-18 16:29:48 -05:00
|
|
|
parse_tokens(tokens)
|
2022-01-13 16:12:28 -05:00
|
|
|
|
2022-01-27 18:52:39 -05:00
|
|
|
p = output_file_data + RODATA_ADDR
|
|
|
|
munmap(output_file_data, RWDATA_END)
|
|
|
|
close(output_fd)
|
|
|
|
|
2022-01-29 11:08:57 -05:00
|
|
|
ident_list_printx64(global_variables)
|
2022-01-28 15:07:23 -05:00
|
|
|
|
2022-01-07 23:32:27 -05:00
|
|
|
exit(0)
|
|
|
|
|
2022-01-27 18:52:39 -05:00
|
|
|
:mmap_output_fd_failed
|
|
|
|
fputs(2, .str_mmap_output_fd_failed)
|
|
|
|
exit(1)
|
|
|
|
:str_mmap_output_fd_failed
|
|
|
|
string Couldn't mmap output file.
|
|
|
|
byte 10
|
|
|
|
byte 0
|
|
|
|
|
2022-01-07 23:32:27 -05:00
|
|
|
:usage_error
|
|
|
|
fputs(2, .str_usage_error)
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
:str_usage_error
|
|
|
|
string Please either specify no arguments or an input and output file.
|
|
|
|
|
|
|
|
:str_default_input_filename
|
|
|
|
string main.c
|
|
|
|
byte 0
|
|
|
|
|
|
|
|
:str_default_output_filename
|
|
|
|
string a.out
|
|
|
|
byte 0
|
2022-01-11 17:36:33 -05:00
|
|
|
|
2022-01-11 18:03:09 -05:00
|
|
|
; NOTE: this language doesn't have proper support for floating-point numbers,
|
|
|
|
; but we need to do some float stuff. floats are stored as a 58-bit significand
|
|
|
|
; and an exponent. the significand ranges from 0 (inclusive) to 0x400000000000000 (exclusive)
|
|
|
|
|
2022-01-11 17:36:33 -05:00
|
|
|
function normalize_float
|
|
|
|
argument p_significand
|
|
|
|
argument p_exponent
|
|
|
|
local significand
|
|
|
|
local exponent
|
|
|
|
|
|
|
|
significand = *8p_significand
|
2022-01-11 22:29:00 -05:00
|
|
|
if significand == 0 goto normalize_0
|
2022-01-11 17:36:33 -05:00
|
|
|
exponent = *8p_exponent
|
|
|
|
|
|
|
|
:float_reduce_loop
|
|
|
|
if significand [ 0x400000000000000 goto float_reduce_loop_end
|
|
|
|
significand >= 1
|
|
|
|
exponent += 1
|
|
|
|
goto float_reduce_loop
|
|
|
|
:float_reduce_loop_end
|
|
|
|
:float_increase_loop
|
|
|
|
if significand ]= 0x200000000000000 goto float_increase_loop_end
|
|
|
|
significand <= 1
|
|
|
|
exponent -= 1
|
|
|
|
goto float_increase_loop
|
|
|
|
:float_increase_loop_end
|
|
|
|
*8p_significand = significand
|
|
|
|
*8p_exponent = exponent
|
|
|
|
return
|
2022-01-11 22:29:00 -05:00
|
|
|
:normalize_0
|
|
|
|
*8p_exponent = 0
|
|
|
|
return
|
|
|
|
|
2022-01-11 17:36:33 -05:00
|
|
|
function fill_in_powers_of_10
|
|
|
|
local i
|
|
|
|
local p
|
|
|
|
local significand
|
|
|
|
local exponent
|
|
|
|
powers_of_10 = malloc(40000)
|
|
|
|
powers_of_10 += 20000
|
|
|
|
significand = 1 < 57
|
2022-01-12 09:59:34 -05:00
|
|
|
exponent = -57
|
2022-01-11 17:36:33 -05:00
|
|
|
i = 0
|
|
|
|
:pow10_loop_positive
|
|
|
|
p = powers_of_10
|
|
|
|
p += i < 4
|
|
|
|
*8p = significand
|
|
|
|
p += 8
|
|
|
|
*8p = exponent
|
|
|
|
|
|
|
|
significand *= 10
|
|
|
|
normalize_float(&significand, &exponent)
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
if i < 1024 goto pow10_loop_positive
|
|
|
|
significand = 1 < 57
|
2022-01-12 09:59:34 -05:00
|
|
|
exponent = -57
|
2022-01-11 17:36:33 -05:00
|
|
|
i = 0
|
|
|
|
:pow10_loop_negative
|
|
|
|
p = powers_of_10
|
|
|
|
p += i < 4
|
|
|
|
*8p = significand
|
|
|
|
p += 8
|
|
|
|
*8p = exponent
|
|
|
|
|
|
|
|
significand *= 32
|
|
|
|
exponent -= 5
|
|
|
|
significand /= 10
|
|
|
|
normalize_float(&significand, &exponent)
|
|
|
|
|
|
|
|
i -= 1
|
|
|
|
if i > -1024 goto pow10_loop_negative
|
|
|
|
return
|
|
|
|
|
|
|
|
function print_powers_of_10
|
|
|
|
local i
|
|
|
|
local j
|
|
|
|
local b
|
|
|
|
local p
|
|
|
|
local significand
|
|
|
|
i = -325
|
|
|
|
:print_powers_of_10_loop
|
|
|
|
putc(49)
|
|
|
|
putc(48)
|
|
|
|
putc('^)
|
|
|
|
putn_signed(i)
|
|
|
|
putc(61)
|
|
|
|
|
|
|
|
p = powers_of_10
|
|
|
|
p += i < 4
|
|
|
|
significand = *8p
|
|
|
|
j = 57
|
|
|
|
:pow10_binary_loop
|
|
|
|
b = significand > j
|
|
|
|
b &= 1
|
|
|
|
b += '0
|
|
|
|
putc(b)
|
|
|
|
j -= 1
|
|
|
|
if j >= 0 goto pow10_binary_loop
|
|
|
|
putc('*)
|
|
|
|
putc('2)
|
|
|
|
putc('^)
|
|
|
|
p += 8
|
|
|
|
putn_signed(*8p)
|
|
|
|
putc(10)
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
if i < 325 goto print_powers_of_10_loop
|
|
|
|
return
|