switch to using mmap for output file

This commit is contained in:
pommicket 2022-01-27 18:52:39 -05:00
parent b5a498aa52
commit 01b8a4d728
7 changed files with 91 additions and 30 deletions

View file

@ -1,9 +1,13 @@
; this is the format of the executables we produce: ; this is the format of the executables we produce:
; elf header + code 4MB addresses 0x400000-0x7fffff ; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
; code 4MB addresses 0x400000-0x7fffff
; read-only data 4MB addresses 0x800000-0xbfffff ; read-only data 4MB addresses 0x800000-0xbfffff
; read-write data 4MB addresses 0xc00000-0xffffff ; read-write data 4MB addresses 0xc00000-0xffffff
#define RODATA_OFFSET 0x400000 ; note that file offsets and runtime addresses are the same.
; you should be able to change these constants without breaking anything:
#define RODATA_ADDR 0x800000 #define RODATA_ADDR 0x800000
#define RWDATA_END 0x1000000
#define EXECUTABLE_SIZE 0x1000000
; C OPERATOR PRECEDENCE ; C OPERATOR PRECEDENCE
; lowest ; lowest

View file

@ -8,8 +8,6 @@ byte 0
byte 0 byte 0
goto main goto main
global output_fd
global object_macros_size global object_macros_size
global function_macros_size global function_macros_size
@ -36,6 +34,9 @@ global enumerators
; for unions, offset will always be 0. ; for unions, offset will always be 0.
global structures global structures
global structures_bytes_used global structures_bytes_used
; file offset/runtime address to write next piece of read-only data; initialized in main
global rodata_end_addr
global output_file_data
#include util.b #include util.b
#include idents.b #include idents.b
@ -149,6 +150,8 @@ function main
local ast local ast
local p local p
local i local i
local output_fd
fill_in_powers_of_10() fill_in_powers_of_10()
typedefs = ident_list_create(100000) typedefs = ident_list_create(100000)
@ -173,8 +176,12 @@ function main
input_filename = argv1 input_filename = argv1
output_filename = argv2 output_filename = argv2
:have_filenames :have_filenames
output_fd = open_w(output_filename) output_fd = open_rw(output_filename, 493)
rodata_end_offset = RODATA_OFFSET rodata_end_addr = RODATA_ADDR
ftruncate(output_fd, RWDATA_END)
output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0)
if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed
pptokens = split_into_preprocessing_tokens(input_filename) pptokens = split_into_preprocessing_tokens(input_filename)
;print_pptokens(pptokens) ;print_pptokens(pptokens)
@ -195,8 +202,20 @@ function main
parse_tokens(tokens) parse_tokens(tokens)
p = output_file_data + RODATA_ADDR
munmap(output_file_data, RWDATA_END)
close(output_fd)
exit(0) exit(0)
:mmap_output_fd_failed
fputs(2, .str_mmap_output_fd_failed)
exit(1)
:str_mmap_output_fd_failed
string Couldn't mmap output file.
byte 10
byte 0
:usage_error :usage_error
fputs(2, .str_usage_error) fputs(2, .str_usage_error)
exit(1) exit(1)

View file

@ -24,5 +24,5 @@ typedef union B{
} c; } c;
}B; }B;
typedef int x[sizeof(A)]; typedef int x[sizeof(A)+sizeof"hello"];
typedef int y[sizeof(struct B)]; typedef int y[sizeof(struct B)];

View file

@ -1500,7 +1500,7 @@ function type_sizeof
if c == TYPE_ARRAY goto sizeof_array if c == TYPE_ARRAY goto sizeof_array
if c == TYPE_STRUCT goto sizeof_struct if c == TYPE_STRUCT goto sizeof_struct
fputs(2, .str_sizeof_bad) ; @TODO fputs(2, .str_sizeof_bad)
exit(1) exit(1)
:str_sizeof_bad :str_sizeof_bad
string type_sizeof bad type. string type_sizeof bad type.
@ -1621,7 +1621,6 @@ function evaluate_constant_expression
c = *1expr c = *1expr
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
@ -1666,14 +1665,6 @@ function evaluate_constant_expression
:str_eval_cast_bad_type :str_eval_cast_bad_type
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard). string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
byte 0 byte 0
:eval_constant_identifier
; @TODO: enum values
fputs(2, .str_constant_identifier)
exit(1)
:str_constant_identifier
string Constant identifiers not handled (see @TODO).
byte 10
byte 0
:eval_constant_int :eval_constant_int
expr += 8 expr += 8
*8p_value = *8expr *8p_value = *8expr

View file

@ -97,9 +97,6 @@ function get_keyword_str
byte 0 byte 0
; file offset to write next piece of read-only data; initialized in main.b
global rodata_end_offset
; turn pptokens into tokens, written to out. ; turn pptokens into tokens, written to out.
; tokens are 16 bytes and have the following format: ; tokens are 16 bytes and have the following format:
; uchar type ; uchar type
@ -256,26 +253,28 @@ function tokenize
data = c data = c
goto token_output goto token_output
:tokenize_string_literal :tokenize_string_literal
n = rodata_end_offset - RODATA_OFFSET data = rodata_end_addr
n += RODATA_ADDR ; address of string p = output_file_data + rodata_end_addr
lseek(output_fd, rodata_end_offset, SEEK_SET)
:string_literal_loop :string_literal_loop
in += 1 ; skip opening " in += 1 ; skip opening "
:string_literal_char_loop :string_literal_char_loop
if *1in == '" goto string_literal_char_loop_end if *1in == '" goto string_literal_char_loop_end
c = read_c_char(&in) c = read_c_char(&in)
if c ] 255 goto bad_char_in_string if c ] 255 goto bad_char_in_string
fputc(output_fd, c) *1p = c
p += 1
goto string_literal_char_loop goto string_literal_char_loop
:string_literal_char_loop_end :string_literal_char_loop_end
pptoken_skip(&in) ; skip closing " pptoken_skip(&in) ; skip closing "
pptoken_skip_spaces(&in) pptoken_skip_spaces(&in)
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!" if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
fputc(output_fd, 0) ; null terminator *1p = 0 ; null terminator
rodata_end_offset = lseek(output_fd, 0, SEEK_CUR) p += 1
rodata_end_addr = p - output_file_data
*1out = TOKEN_STRING_LITERAL *1out = TOKEN_STRING_LITERAL
out += 2 ; no info out += 2 ; no info
data = n
goto token_output goto token_output
:tokenize_float :tokenize_float
; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0 ; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0

View file

@ -139,12 +139,48 @@ function die
fputs(2, message) fputs(2, message)
exit(1) exit(1)
function ftruncate
argument fd
argument length
local x
x = syscall(77, fd, length)
if x != 0 goto ftruncate_failed
return
:ftruncate_failed
fputs(2, .str_ftruncate_failed)
exit(1)
:str_ftruncate_failed
string ftruncated failed.
byte 10
byte 0
function mmap
argument addr
argument length
argument prot
argument flags
argument fd
argument offset
return syscall(9, addr, length, prot, flags, fd, offset)
function munmap
argument addr
argument length
return syscall(11, addr, length)
#define PROT_READ 1
#define PROT_WRITE 2
#define PROT_READ_WRITE 3
#define MAP_SHARED 0x01
#define MAP_PRIVATE_ANONYMOUS 0x22
function malloc function malloc
argument size argument size
local total_size local total_size
local memory local memory
total_size = size + 8 total_size = size + 8
memory = syscall(9, 0, total_size, 3, 0x22, -1, 0) memory = mmap(0, total_size, PROT_READ_WRITE, MAP_PRIVATE_ANONYMOUS, -1, 0)
if memory ] 0xffffffffffff0000 goto malloc_failed if memory ] 0xffffffffffff0000 goto malloc_failed
*8memory = total_size *8memory = total_size
return memory + 8 return memory + 8
@ -164,7 +200,7 @@ function free
local size local size
psize = memory - 8 psize = memory - 8
size = *8psize size = *8psize
syscall(11, psize, size) munmap(psize, size)
return return
; returns a pointer to a null-terminated string containing the ; returns a pointer to a null-terminated string containing the
@ -568,6 +604,18 @@ function open_w
:open_w_error :open_w_error
file_error(filename) file_error(filename)
return -1 return -1
; open the given file for reading and writing with the given mode
function open_rw
argument filename
argument mode
local fd
fd = syscall(2, filename, 0x242, mode)
if fd < 0 goto open_rw_error
return fd
:open_rw_error
file_error(filename)
return -1
function close function close
argument fd argument fd

View file

@ -96,7 +96,7 @@ I'll leave that to someone else.
## instruction set ## instruction set
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
long! So it makes sense to select only a small subset of it to use. long! To make things simpler, we will only use a small subset.
Here are all the instructions we'll be using. If you're not familiar with Here are all the instructions we'll be using. If you're not familiar with
x86-64 assembly, you might want to look over these (but you don't need to understand everything). x86-64 assembly, you might want to look over these (but you don't need to understand everything).