switch to using mmap for output file
This commit is contained in:
parent
b5a498aa52
commit
01b8a4d728
7 changed files with 91 additions and 30 deletions
|
@ -1,9 +1,13 @@
|
|||
; this is the format of the executables we produce:
|
||||
; elf header + code 4MB addresses 0x400000-0x7fffff
|
||||
; elf header 4MB addresses 0x000000-0x400000 (no, it won't actually take up that much space)
|
||||
; code 4MB addresses 0x400000-0x7fffff
|
||||
; read-only data 4MB addresses 0x800000-0xbfffff
|
||||
; read-write data 4MB addresses 0xc00000-0xffffff
|
||||
#define RODATA_OFFSET 0x400000
|
||||
; note that file offsets and runtime addresses are the same.
|
||||
; you should be able to change these constants without breaking anything:
|
||||
#define RODATA_ADDR 0x800000
|
||||
#define RWDATA_END 0x1000000
|
||||
#define EXECUTABLE_SIZE 0x1000000
|
||||
|
||||
; C OPERATOR PRECEDENCE
|
||||
; lowest
|
||||
|
|
27
05/main.b
27
05/main.b
|
@ -8,8 +8,6 @@ byte 0
|
|||
byte 0
|
||||
goto main
|
||||
|
||||
global output_fd
|
||||
|
||||
|
||||
global object_macros_size
|
||||
global function_macros_size
|
||||
|
@ -36,6 +34,9 @@ global enumerators
|
|||
; for unions, offset will always be 0.
|
||||
global structures
|
||||
global structures_bytes_used
|
||||
; file offset/runtime address to write next piece of read-only data; initialized in main
|
||||
global rodata_end_addr
|
||||
global output_file_data
|
||||
|
||||
#include util.b
|
||||
#include idents.b
|
||||
|
@ -149,6 +150,8 @@ function main
|
|||
local ast
|
||||
local p
|
||||
local i
|
||||
local output_fd
|
||||
|
||||
fill_in_powers_of_10()
|
||||
|
||||
typedefs = ident_list_create(100000)
|
||||
|
@ -173,8 +176,12 @@ function main
|
|||
input_filename = argv1
|
||||
output_filename = argv2
|
||||
:have_filenames
|
||||
output_fd = open_w(output_filename)
|
||||
rodata_end_offset = RODATA_OFFSET
|
||||
output_fd = open_rw(output_filename, 493)
|
||||
rodata_end_addr = RODATA_ADDR
|
||||
|
||||
ftruncate(output_fd, RWDATA_END)
|
||||
output_file_data = mmap(0, RWDATA_END, PROT_READ_WRITE, MAP_SHARED, output_fd, 0)
|
||||
if output_file_data ] 0xffffffffffff0000 goto mmap_output_fd_failed
|
||||
|
||||
pptokens = split_into_preprocessing_tokens(input_filename)
|
||||
;print_pptokens(pptokens)
|
||||
|
@ -195,8 +202,20 @@ function main
|
|||
|
||||
parse_tokens(tokens)
|
||||
|
||||
p = output_file_data + RODATA_ADDR
|
||||
munmap(output_file_data, RWDATA_END)
|
||||
close(output_fd)
|
||||
|
||||
exit(0)
|
||||
|
||||
:mmap_output_fd_failed
|
||||
fputs(2, .str_mmap_output_fd_failed)
|
||||
exit(1)
|
||||
:str_mmap_output_fd_failed
|
||||
string Couldn't mmap output file.
|
||||
byte 10
|
||||
byte 0
|
||||
|
||||
:usage_error
|
||||
fputs(2, .str_usage_error)
|
||||
exit(1)
|
||||
|
|
|
@ -24,5 +24,5 @@ typedef union B{
|
|||
} c;
|
||||
}B;
|
||||
|
||||
typedef int x[sizeof(A)];
|
||||
typedef int x[sizeof(A)+sizeof"hello"];
|
||||
typedef int y[sizeof(struct B)];
|
||||
|
|
11
05/parse.b
11
05/parse.b
|
@ -1500,7 +1500,7 @@ function type_sizeof
|
|||
if c == TYPE_ARRAY goto sizeof_array
|
||||
if c == TYPE_STRUCT goto sizeof_struct
|
||||
|
||||
fputs(2, .str_sizeof_bad) ; @TODO
|
||||
fputs(2, .str_sizeof_bad)
|
||||
exit(1)
|
||||
:str_sizeof_bad
|
||||
string type_sizeof bad type.
|
||||
|
@ -1621,7 +1621,6 @@ function evaluate_constant_expression
|
|||
c = *1expr
|
||||
|
||||
if c == EXPRESSION_CONSTANT_INT goto eval_constant_int
|
||||
if c == EXPRESSION_IDENTIFIER goto eval_constant_identifier
|
||||
if c == EXPRESSION_UNARY_PLUS goto eval_unary_plus
|
||||
if c == EXPRESSION_UNARY_MINUS goto eval_unary_minus
|
||||
if c == EXPRESSION_BITWISE_NOT goto eval_bitwise_not
|
||||
|
@ -1666,14 +1665,6 @@ function evaluate_constant_expression
|
|||
:str_eval_cast_bad_type
|
||||
string Bad type for constant cast (note: floating-point casts are not supported even though they are standard).
|
||||
byte 0
|
||||
:eval_constant_identifier
|
||||
; @TODO: enum values
|
||||
fputs(2, .str_constant_identifier)
|
||||
exit(1)
|
||||
:str_constant_identifier
|
||||
string Constant identifiers not handled (see @TODO).
|
||||
byte 10
|
||||
byte 0
|
||||
:eval_constant_int
|
||||
expr += 8
|
||||
*8p_value = *8expr
|
||||
|
|
|
@ -97,9 +97,6 @@ function get_keyword_str
|
|||
byte 0
|
||||
|
||||
|
||||
; file offset to write next piece of read-only data; initialized in main.b
|
||||
global rodata_end_offset
|
||||
|
||||
; turn pptokens into tokens, written to out.
|
||||
; tokens are 16 bytes and have the following format:
|
||||
; uchar type
|
||||
|
@ -256,26 +253,28 @@ function tokenize
|
|||
data = c
|
||||
goto token_output
|
||||
:tokenize_string_literal
|
||||
n = rodata_end_offset - RODATA_OFFSET
|
||||
n += RODATA_ADDR ; address of string
|
||||
lseek(output_fd, rodata_end_offset, SEEK_SET)
|
||||
data = rodata_end_addr
|
||||
p = output_file_data + rodata_end_addr
|
||||
|
||||
:string_literal_loop
|
||||
in += 1 ; skip opening "
|
||||
:string_literal_char_loop
|
||||
if *1in == '" goto string_literal_char_loop_end
|
||||
c = read_c_char(&in)
|
||||
if c ] 255 goto bad_char_in_string
|
||||
fputc(output_fd, c)
|
||||
*1p = c
|
||||
p += 1
|
||||
goto string_literal_char_loop
|
||||
:string_literal_char_loop_end
|
||||
pptoken_skip(&in) ; skip closing "
|
||||
pptoken_skip_spaces(&in)
|
||||
if *1in == '" goto string_literal_loop ; string concatenation, e.g. "Hello, " "world!"
|
||||
fputc(output_fd, 0) ; null terminator
|
||||
rodata_end_offset = lseek(output_fd, 0, SEEK_CUR)
|
||||
*1p = 0 ; null terminator
|
||||
p += 1
|
||||
rodata_end_addr = p - output_file_data
|
||||
|
||||
*1out = TOKEN_STRING_LITERAL
|
||||
out += 2 ; no info
|
||||
data = n
|
||||
goto token_output
|
||||
:tokenize_float
|
||||
; @NONSTANDARD: this doesn't allow for floats whose integral part is >=2^64, e.g. 1000000000000000000000000.0
|
||||
|
|
52
05/util.b
52
05/util.b
|
@ -139,12 +139,48 @@ function die
|
|||
fputs(2, message)
|
||||
exit(1)
|
||||
|
||||
function ftruncate
|
||||
argument fd
|
||||
argument length
|
||||
local x
|
||||
x = syscall(77, fd, length)
|
||||
if x != 0 goto ftruncate_failed
|
||||
return
|
||||
|
||||
:ftruncate_failed
|
||||
fputs(2, .str_ftruncate_failed)
|
||||
exit(1)
|
||||
:str_ftruncate_failed
|
||||
string ftruncated failed.
|
||||
byte 10
|
||||
byte 0
|
||||
|
||||
function mmap
|
||||
argument addr
|
||||
argument length
|
||||
argument prot
|
||||
argument flags
|
||||
argument fd
|
||||
argument offset
|
||||
return syscall(9, addr, length, prot, flags, fd, offset)
|
||||
|
||||
function munmap
|
||||
argument addr
|
||||
argument length
|
||||
return syscall(11, addr, length)
|
||||
|
||||
#define PROT_READ 1
|
||||
#define PROT_WRITE 2
|
||||
#define PROT_READ_WRITE 3
|
||||
#define MAP_SHARED 0x01
|
||||
#define MAP_PRIVATE_ANONYMOUS 0x22
|
||||
|
||||
function malloc
|
||||
argument size
|
||||
local total_size
|
||||
local memory
|
||||
total_size = size + 8
|
||||
memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
|
||||
memory = mmap(0, total_size, PROT_READ_WRITE, MAP_PRIVATE_ANONYMOUS, -1, 0)
|
||||
if memory ] 0xffffffffffff0000 goto malloc_failed
|
||||
*8memory = total_size
|
||||
return memory + 8
|
||||
|
@ -164,7 +200,7 @@ function free
|
|||
local size
|
||||
psize = memory - 8
|
||||
size = *8psize
|
||||
syscall(11, psize, size)
|
||||
munmap(psize, size)
|
||||
return
|
||||
|
||||
; returns a pointer to a null-terminated string containing the
|
||||
|
@ -569,6 +605,18 @@ function open_w
|
|||
file_error(filename)
|
||||
return -1
|
||||
|
||||
; open the given file for reading and writing with the given mode
|
||||
function open_rw
|
||||
argument filename
|
||||
argument mode
|
||||
local fd
|
||||
fd = syscall(2, filename, 0x242, mode)
|
||||
if fd < 0 goto open_rw_error
|
||||
return fd
|
||||
:open_rw_error
|
||||
file_error(filename)
|
||||
return -1
|
||||
|
||||
function close
|
||||
argument fd
|
||||
syscall(3, fd)
|
||||
|
|
|
@ -96,7 +96,7 @@ I'll leave that to someone else.
|
|||
## instruction set
|
||||
|
||||
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
|
||||
long! So it makes sense to select only a small subset of it to use.
|
||||
long! To make things simpler, we will only use a small subset.
|
||||
|
||||
Here are all the instructions we'll be using. If you're not familiar with
|
||||
x86-64 assembly, you might want to look over these (but you don't need to understand everything).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue