switch to using mmap for output file
This commit is contained in:
parent
b5a498aa52
commit
01b8a4d728
7 changed files with 91 additions and 30 deletions
|
@ -1,9 +1,13 @@
|
||||||
; this is the format of the executables we produce:
|
; this is the format of the executables we produce:
|
||||||
; elf header + code 4MB addresses 0x400000-0x7fffff
|
; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
|
||||||
|
; code 4MB addresses 0x400000-0x7fffff
|
||||||
; read-only data 4MB addresses 0x800000-0xbfffff
|
; read-only data 4MB addresses 0x800000-0xbfffff
|
||||||
; read-write data 4MB addresses 0xc00000-0xffffff
|
; read-write data 4MB addresses 0xc00000-0xffffff
|
||||||
#define RODATA_OFFSET 0x400000
|
; note that file offsets and runtime addresses are the same.
|
||||||
|
; you should be able to change these constants without breaking anything:
|
||||||
#define RODATA_ADDR 0x800000
|
#define RODATA_ADDR 0x800000
|
||||||
|
#define RWDATA_END 0x1000000
|
||||||
|
#define EXECUTABLE_SIZE 0x1000000
|
||||||
|
|
||||||
; C OPERATOR PRECEDENCE
|
; C OPERATOR PRECEDENCE
|
||||||
; lowest
|
; lowest
|
||||||
|
|
27
05/main.b
27
05/main.b
|
@ -8,8 +8,6 @@ byte 0
|
||||||
byte 0
|
byte 0
|
||||||
goto main
|
goto main
|
||||||
|
|
||||||
global output_fd
|
|
||||||
|
|
||||||
|
|
||||||
global object_macros_size
|
global object_macros_size
|
||||||
global function_macros_size
|
global function_macros_size
|
||||||
|
@ -36,6 +34,9 @@ global enumerators
|
||||||
; for unions, offset will always be 0.
|
; for unions, offset will always be 0.
|
||||||
global structures
|
global structures
|
||||||
global structures_bytes_used
|
global structures_bytes_used
|
||||||
|
; file offset/runtime address to write next piece of read-only data; initialized in main
|
||||||
|
global rodata_end_addr
|
||||||
|
global output_file_data
|
||||||
|
|
||||||
#include util.b
|
#include util.b
|
||||||
#include idents.b
|
#include idents.b
|
||||||
|
@ -149,6 +150,8 @@ function main
|
||||||
local ast
|
local ast
|
||||||
local p
|
local p
|
||||||
local i
|
local i
|
||||||
|
local output_fd
|
||||||
|
|
||||||
fill_in_powers_of_10()
|
fill_in_powers_of_10()
|
||||||
|
|
||||||
typedefs = ident_list_create(100000)
|
typedefs = ident_list_create(100000)
|
||||||
|
@ -173,8 +176,12 @@ function main
|
||||||
input_filename = argv1
|
input_filename = argv1
|
||||||
output_filename = argv2
|
output_filename = argv2
|
||||||
:have_filenames
|
:have_filenames
|
||||||
output_fd = open_w(output_filename)
|
output_fd = open_rw(output_filename, 493)
|
||||||
rodata_end_offset = RODATA_OFFSET
|
rodata_end_addr = RODATA_ADDR
|
||||||
|
|
||||||
|
ftruncate(output_fd, RWDATA_END)
|
||||||
|
output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0)
|
||||||
|
if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed
|
||||||
|
|
||||||
pptokens = split_into_preprocessing_tokens(input_filename)
|
pptokens = split_into_preprocessing_tokens(input_filename)
|
||||||
;print_pptokens(pptokens)
|
;print_pptokens(pptokens)
|
||||||
|
@ -195,8 +202,20 @@ function main
|
||||||
|
|
||||||
parse_tokens(tokens)
|
parse_tokens(tokens)
|
||||||
|
|
||||||
|
p = output_file_data + RODATA_ADDR
|
||||||
|
munmap(output_file_data, RWDATA_END)
|
||||||
|
close(output_fd)
|
||||||
|
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
:mmap_output_fd_failed
|
||||||
|
fputs(2, .str_mmap_output_fd_failed)
|
||||||
|
exit(1)
|
||||||
|
:str_mmap_output_fd_failed
|
||||||
|
string Couldn't mmap output file.
|
||||||
|
byte 10
|
||||||
|
byte 0
|
||||||
|
|
||||||
:usage_error
|
:usage_error
|
||||||
fputs(2, .str_usage_error)
|
fputs(2, .str_usage_error)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
|
@ -24,5 +24,5 @@ typedef union B{
|
||||||
} c;
|
} c;
|
||||||
}B;
|
}B;
|
||||||
|
|
||||||
typedef int x[sizeof(A)];
|
typedef int x[sizeof(A)+sizeof"hello"];
|
||||||
typedef int y[sizeof(struct B)];
|
typedef int y[sizeof(struct B)];
|
||||||
|
|
11
05/parse.b
11
05/parse.b
|
@ -1500,7 +1500,7 @@ function type_sizeof
|
||||||
if c == TYPE_ARRAY goto sizeof_array
|
if c == TYPE_ARRAY goto sizeof_array
|
||||||
if c == TYPE_STRUCT goto sizeof_struct
|
if c == TYPE_STRUCT goto sizeof_struct
|
||||||
|
|
||||||
fputs(2, .str_sizeof_bad) ; @TODO
|
fputs(2, .str_sizeof_bad)
|
||||||
exit(1)
|
exit(1)
|
||||||
:str_sizeof_bad
|
:str_sizeof_bad
|
||||||
string type_sizeof bad type.
|
string type_sizeof bad type.
|
||||||
|
@ -1621,7 +1621,6 @@ function evaluate_constant_expression
|
||||||
c = *1expr
|
c = *1expr
|
||||||
|
|
||||||
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
|
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
|
||||||
if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier
|
|
||||||
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
|
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
|
||||||
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
|
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
|
||||||
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
|
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
|
||||||
|
@ -1666,14 +1665,6 @@ function evaluate_constant_expression
|
||||||
:str_eval_cast_bad_type
|
:str_eval_cast_bad_type
|
||||||
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
|
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
|
||||||
byte 0
|
byte 0
|
||||||
:eval_constant_identifier
|
|
||||||
; @TODO: enum values
|
|
||||||
fputs(2, .str_constant_identifier)
|
|
||||||
exit(1)
|
|
||||||
:str_constant_identifier
|
|
||||||
string Constant identifiers not handled (see @TODO).
|
|
||||||
byte 10
|
|
||||||
byte 0
|
|
||||||
:eval_constant_int
|
:eval_constant_int
|
||||||
expr += 8
|
expr += 8
|
||||||
*8p_value = *8expr
|
*8p_value = *8expr
|
||||||
|
|
|
@ -97,9 +97,6 @@ function get_keyword_str
|
||||||
byte 0
|
byte 0
|
||||||
|
|
||||||
|
|
||||||
; file offset to write next piece of read-only data; initialized in main.b
|
|
||||||
global rodata_end_offset
|
|
||||||
|
|
||||||
; turn pptokens into tokens, written to out.
|
; turn pptokens into tokens, written to out.
|
||||||
; tokens are 16 bytes and have the following format:
|
; tokens are 16 bytes and have the following format:
|
||||||
; uchar type
|
; uchar type
|
||||||
|
@ -256,26 +253,28 @@ function tokenize
|
||||||
data = c
|
data = c
|
||||||
goto token_output
|
goto token_output
|
||||||
:tokenize_string_literal
|
:tokenize_string_literal
|
||||||
n = rodata_end_offset - RODATA_OFFSET
|
data = rodata_end_addr
|
||||||
n += RODATA_ADDR ; address of string
|
p = output_file_data + rodata_end_addr
|
||||||
lseek(output_fd, rodata_end_offset, SEEK_SET)
|
|
||||||
:string_literal_loop
|
:string_literal_loop
|
||||||
in += 1 ; skip opening "
|
in += 1 ; skip opening "
|
||||||
:string_literal_char_loop
|
:string_literal_char_loop
|
||||||
if *1in == '" goto string_literal_char_loop_end
|
if *1in == '" goto string_literal_char_loop_end
|
||||||
c = read_c_char(&in)
|
c = read_c_char(&in)
|
||||||
if c ] 255 goto bad_char_in_string
|
if c ] 255 goto bad_char_in_string
|
||||||
fputc(output_fd, c)
|
*1p = c
|
||||||
|
p += 1
|
||||||
goto string_literal_char_loop
|
goto string_literal_char_loop
|
||||||
:string_literal_char_loop_end
|
:string_literal_char_loop_end
|
||||||
pptoken_skip(&in) ; skip closing "
|
pptoken_skip(&in) ; skip closing "
|
||||||
pptoken_skip_spaces(&in)
|
pptoken_skip_spaces(&in)
|
||||||
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
|
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
|
||||||
fputc(output_fd, 0) ; null terminator
|
*1p = 0 ; null terminator
|
||||||
rodata_end_offset = lseek(output_fd, 0, SEEK_CUR)
|
p += 1
|
||||||
|
rodata_end_addr = p - output_file_data
|
||||||
|
|
||||||
*1out = TOKEN_STRING_LITERAL
|
*1out = TOKEN_STRING_LITERAL
|
||||||
out += 2 ; no info
|
out += 2 ; no info
|
||||||
data = n
|
|
||||||
goto token_output
|
goto token_output
|
||||||
:tokenize_float
|
:tokenize_float
|
||||||
; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0
|
; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0
|
||||||
|
|
52
05/util.b
52
05/util.b
|
@ -139,12 +139,48 @@ function die
|
||||||
fputs(2, message)
|
fputs(2, message)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
function ftruncate
|
||||||
|
argument fd
|
||||||
|
argument length
|
||||||
|
local x
|
||||||
|
x = syscall(77, fd, length)
|
||||||
|
if x != 0 goto ftruncate_failed
|
||||||
|
return
|
||||||
|
|
||||||
|
:ftruncate_failed
|
||||||
|
fputs(2, .str_ftruncate_failed)
|
||||||
|
exit(1)
|
||||||
|
:str_ftruncate_failed
|
||||||
|
string ftruncated failed.
|
||||||
|
byte 10
|
||||||
|
byte 0
|
||||||
|
|
||||||
|
function mmap
|
||||||
|
argument addr
|
||||||
|
argument length
|
||||||
|
argument prot
|
||||||
|
argument flags
|
||||||
|
argument fd
|
||||||
|
argument offset
|
||||||
|
return syscall(9, addr, length, prot, flags, fd, offset)
|
||||||
|
|
||||||
|
function munmap
|
||||||
|
argument addr
|
||||||
|
argument length
|
||||||
|
return syscall(11, addr, length)
|
||||||
|
|
||||||
|
#define PROT_READ 1
|
||||||
|
#define PROT_WRITE 2
|
||||||
|
#define PROT_READ_WRITE 3
|
||||||
|
#define MAP_SHARED 0x01
|
||||||
|
#define MAP_PRIVATE_ANONYMOUS 0x22
|
||||||
|
|
||||||
function malloc
|
function malloc
|
||||||
argument size
|
argument size
|
||||||
local total_size
|
local total_size
|
||||||
local memory
|
local memory
|
||||||
total_size = size + 8
|
total_size = size + 8
|
||||||
memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
|
memory = mmap(0, total_size, PROT_READ_WRITE, MAP_PRIVATE_ANONYMOUS, -1, 0)
|
||||||
if memory ] 0xffffffffffff0000 goto malloc_failed
|
if memory ] 0xffffffffffff0000 goto malloc_failed
|
||||||
*8memory = total_size
|
*8memory = total_size
|
||||||
return memory + 8
|
return memory + 8
|
||||||
|
@ -164,7 +200,7 @@ function free
|
||||||
local size
|
local size
|
||||||
psize = memory - 8
|
psize = memory - 8
|
||||||
size = *8psize
|
size = *8psize
|
||||||
syscall(11, psize, size)
|
munmap(psize, size)
|
||||||
return
|
return
|
||||||
|
|
||||||
; returns a pointer to a null-terminated string containing the
|
; returns a pointer to a null-terminated string containing the
|
||||||
|
@ -568,6 +604,18 @@ function open_w
|
||||||
:open_w_error
|
:open_w_error
|
||||||
file_error(filename)
|
file_error(filename)
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
; open the given file for reading and writing with the given mode
|
||||||
|
function open_rw
|
||||||
|
argument filename
|
||||||
|
argument mode
|
||||||
|
local fd
|
||||||
|
fd = syscall(2, filename, 0x242, mode)
|
||||||
|
if fd < 0 goto open_rw_error
|
||||||
|
return fd
|
||||||
|
:open_rw_error
|
||||||
|
file_error(filename)
|
||||||
|
return -1
|
||||||
|
|
||||||
function close
|
function close
|
||||||
argument fd
|
argument fd
|
||||||
|
|
|
@ -96,7 +96,7 @@ I'll leave that to someone else.
|
||||||
## instruction set
|
## instruction set
|
||||||
|
|
||||||
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
|
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
|
||||||
long! So it makes sense to select only a small subset of it to use.
|
long! To make things simpler, we will only use a small subset.
|
||||||
|
|
||||||
Here are all the instructions we'll be using. If you're not familiar with
|
Here are all the instructions we'll be using. If you're not familiar with
|
||||||
x86-64 assembly, you might want to look over these (but you don't need to understand everything).
|
x86-64 assembly, you might want to look over these (but you don't need to understand everything).
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue