full build of tcc with itself - doesn't match gcc :o
This commit is contained in:
parent
59b7931165
commit
9c6b9a1450
11 changed files with 250 additions and 44 deletions
36
05/Makefile
36
05/Makefile
|
@ -1,6 +1,8 @@
|
|||
TCCDIR=tcc-0.9.27
|
||||
TCC=$(TCCDIR)/tcc
|
||||
all: out04 a.out test.out README.html
|
||||
TCC0=$(TCCDIR)/tcc0
|
||||
TCC1=$(TCCDIR)/tcc1
|
||||
TCCINST=/usr/local/lib/tcc-bootstrap
|
||||
all: out04 a.out test.out README.html $(TCCDIR)/lib/libtcc1.a
|
||||
in04: *.b ../04a/out04
|
||||
../04a/out04 main.b in04
|
||||
out04: in04 ../04/out03
|
||||
|
@ -11,13 +13,25 @@ out04: in04 ../04/out03
|
|||
./out04 $< $@
|
||||
a.out: main.c *.h out04
|
||||
./out04
|
||||
test.out: $(TCC) test.s.o test.c.o
|
||||
$(TCC) -static -nostdlib test.s.o test.c.o -o test.out
|
||||
test.s.o: $(TCC) test.s
|
||||
$(TCC) -static -nostdlib -c test.s -o test.s.o
|
||||
test.c.o: $(TCC) test.c
|
||||
$(TCC) -static -nostdlib -c test.c -o test.c.o
|
||||
$(TCC): $(TCCDIR)/*.c $(TCCDIR)/*.h out04
|
||||
cd $(TCCDIR) && ../out04 tcc.c tcc
|
||||
test.out: $(TCC0) test.s.o test.c.o
|
||||
$(TCC0) -static -nostdlib test.s.o test.c.o -o test.out
|
||||
test.s.o: $(TCC0) test.s
|
||||
$(TCC0) -static -nostdlib -c test.s -o test.s.o
|
||||
test.c.o: $(TCC0) test.c
|
||||
$(TCC0) -static -nostdlib -c test.c -o test.c.o
|
||||
$(TCC0): $(TCCDIR)/*.c $(TCCDIR)/*.h out04
|
||||
cd $(TCCDIR) && ../out04 tcc.c tcc0
|
||||
$(TCCDIR)/lib/libtcc1.a: $(TCC0) $(TCCDIR)/lib/*.[cS]
|
||||
$(TCC0) -c $(TCCDIR)/lib/alloca86_64-bt.S -o $(TCCDIR)/lib/alloca86_64-bt.o
|
||||
$(TCC0) -c $(TCCDIR)/lib/alloca86_64.S -o $(TCCDIR)/lib/alloca86_64.o
|
||||
$(TCC0) -c $(TCCDIR)/lib/va_list.c -o $(TCCDIR)/lib/va_list.o
|
||||
$(TCC0) -c $(TCCDIR)/lib/libtcc1.c -o $(TCCDIR)/lib/libtcc1.o
|
||||
$(TCC0) -ar $(TCCDIR)/lib/libtcc1.a $(TCCDIR)/lib/*.o
|
||||
install-tcc0: $(TCCDIR)/lib/libtcc1.a $(TCCDIR)/include/*.h
|
||||
mkdir -p $(TCCINST)/include
|
||||
cp -r $(TCCDIR)/include/*.h $(TCCINST)/include/
|
||||
cp -r $(TCCDIR)/lib/libtcc1.a $(TCCINST)/
|
||||
$(TCC1): $(TCC0) $(TCCINST)/libtcc1.a
|
||||
cd $(TCCDIR) && ./tcc0 tcc.c -o tcc1
|
||||
clean:
|
||||
rm -f out* README.html *.out *.o $(TCC)
|
||||
rm -f out* README.html *.out *.o $(TCC0) $(TCC1) $(TCCDIR)/lib/*.[oa]
|
||||
|
|
145
05/README.md
145
05/README.md
|
@ -9,8 +9,8 @@ make
|
|||
```
|
||||
|
||||
to build our C compiler and TCC. This will take some time (approx. 25 seconds on my computer).
|
||||
A test program, `test.out` will be compiled using `tcc`. If you run
|
||||
it, you should get the output
|
||||
Two test programs will be produced: `a.out`, compiled using our C compiler, and
|
||||
`test.out`, compiled using `tcc`. If you run either one, you should get the output
|
||||
|
||||
```
|
||||
Hello, world!
|
||||
|
@ -107,8 +107,149 @@ Tokens are one of:
|
|||
- A character literal (e.g. `'a'`, `'\n'`)
|
||||
- A floating-point literal (e.g. `3.6`, `5e10`)
|
||||
|
||||
Next, an internal representation of the program is constructed in memory.
|
||||
This is where we read the tokens `if` `(` `a` `)` `printf` `(` `"Hello!\n"` `)` `;`
|
||||
and interpret it as an if statement, whose condition is the variable `a`, and whose
|
||||
body consists of the single statement calling the `printf` function with the argument `"Hello!\n"`.
|
||||
|
||||
Finally, we output the code for every function.
|
||||
|
||||
## executable format
|
||||
|
||||
This compiler's executables are much more sophisticated than the previous ones'.
|
||||
Instead of storing code and data all in one segment, we have three segments: one
|
||||
6MB segment for code (the program's functions are only allowed to use up 4MB of that, though),
|
||||
one 4MB segment for read-only data (strings), and one 4MB segment for read-write data.
|
||||
|
||||
Well, it *should* only be read-write, but unfortunately it also has to be executable...
|
||||
|
||||
## syscalls
|
||||
|
||||
Of course, we need some way of making system calls in C.
|
||||
We do this with a macro, `__syscall`, which you'll find in `stdc_common.h`:
|
||||
|
||||
```
|
||||
static unsigned char __syscall_data[] = {
|
||||
// mov rax, [rsp+24]
|
||||
0x48, 0x8b, 0x84, 0x24, 24, 0, 0, 0,
|
||||
// mov rdi, rax
|
||||
0x48, 0x89, 0xc7,
|
||||
// mov rax, [rsp+32]
|
||||
0x48, 0x8b, 0x84, 0x24, 32, 0, 0, 0,
|
||||
// mov rsi, rax
|
||||
0x48, 0x89, 0xc6,
|
||||
// mov rax, [rsp+40]
|
||||
0x48, 0x8b, 0x84, 0x24, 40, 0, 0, 0,
|
||||
// mov rdx, rax
|
||||
0x48, 0x89, 0xc2,
|
||||
// mov rax, [rsp+48]
|
||||
0x48, 0x8b, 0x84, 0x24, 48, 0, 0, 0,
|
||||
// mov r10, rax
|
||||
0x49, 0x89, 0xc2,
|
||||
// mov rax, [rsp+56]
|
||||
0x48, 0x8b, 0x84, 0x24, 56, 0, 0, 0,
|
||||
// mov r8, rax
|
||||
0x49, 0x89, 0xc0,
|
||||
// mov rax, [rsp+64]
|
||||
0x48, 0x8b, 0x84, 0x24, 64, 0, 0, 0,
|
||||
// mov r9, rax
|
||||
0x49, 0x89, 0xc1,
|
||||
// mov rax, [rsp+16]
|
||||
0x48, 0x8b, 0x84, 0x24, 16, 0, 0, 0,
|
||||
// syscall
|
||||
0x0f, 0x05,
|
||||
// mov [rsp+8], rax
|
||||
0x48, 0x89, 0x84, 0x24, 8, 0, 0, 0,
|
||||
// ret
|
||||
0xc3
|
||||
};
|
||||
|
||||
#define __syscall(no, arg1, arg2, arg3, arg4, arg5, arg6)\
|
||||
(((unsigned long (*)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long))__syscall_data)\
|
||||
(no, arg1, arg2, arg3, arg4, arg5, arg6))
|
||||
```
|
||||
|
||||
The `__syscall_data` array contains machine language instructions which perform a system call, and the
|
||||
`__syscall` macro "calls" the array as if it were a function. This is why we need a read-write-executable data
|
||||
segment -- otherwise we'd need to implement system calls in the compiler.
|
||||
|
||||
## C standard library
|
||||
|
||||
The C89 standard specifies a bunch of "standard library" functions which any implementation has to make available, e.g.
|
||||
`printf()`, `atoi()`, `exit()`.
|
||||
Fortunately, we don't have to write these functions in the 04 language; we can write them in C.
|
||||
|
||||
To use a particular function, a C program needs to include the appropriate header file, e.g.
|
||||
`#include <stdio.h>` lets you use `printf()` and other I/O-related functions. Normally,
|
||||
these header files just declare what types the parameters to the functions should be,
|
||||
but we actually put the function implementations there.
|
||||
|
||||
Let's take a look at the contents of `ctype.h`, which provides the functions `islower`, `isupper`, etc.:
|
||||
```
|
||||
#ifndef _CTYPE_H
|
||||
#define _CTYPE_H
|
||||
|
||||
#include <stdc_common.h>
|
||||
|
||||
int islower(int c) {
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
|
||||
int isupper(int c) {
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
|
||||
int isalpha(int c) {
|
||||
return isupper(c) || islower(c);
|
||||
}
|
||||
|
||||
int isalnum(int c) {
|
||||
return isalpha(c) || isdigit(c);
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
#endif
|
||||
```
|
||||
The first two lines and last line prevent problems when the file is included multiple times.
|
||||
We begin by including `stdc_common.h`, which has a bunch of functions and type definitions which all
|
||||
our header files use, and then we define each of the necessary C standard library functions.
|
||||
|
||||
|
||||
## limitations
|
||||
|
||||
There are various minor ways in which this compiler doesn't actually handle all of C89.
|
||||
Here is a list of things we do wrong (this list is probably missing things, though):
|
||||
|
||||
- [trigraphs](https://en.wikipedia.org/wiki/Digraphs_and_trigraphs#C) are not handled
|
||||
- `char[]` string literal initializers can't contain null characters (e.g. `char x[] = "a\0b";` doesn't work)
|
||||
- you can only access members of l-values (e.g. `int x = function_which_returns_struct().member` doesn't work)
|
||||
- no default-int (this is a legacy feature of C, e.g. `main() { }` can technically stand in for `int main() {}`)
|
||||
- the keyword `auto` is not handled (again, a legacy feature of C)
|
||||
- `default:` must be the last label in a switch statement.
|
||||
- external variable declarations are ignored (e.g. `extern int x; int main() { return x; } int x = 5; ` doesn't work)
|
||||
- `typedef`s, and `struct`/`union`/`enum` declarations aren't allowed inside functions
|
||||
- conditional expressions aren't allowed inside `case` (horribly, `switch (x) { case 5 ? 6 : 3: ; }` is legal C).
|
||||
- bit-fields aren't handled
|
||||
- Technically, `1[array]` is equivalent to `array[1]`, but we don't handle that.
|
||||
- C89 has *very* weird typing rules about `void*`/`non-void*` inside conditional expressions. We don't handle that properly.
|
||||
- C89 allows calling functions without declaring them, for legacy reasons. We don't handle that.
|
||||
- Floating-point constant expressions are very limited. Only `double` literals and 0 are supported (it was hard enough
|
||||
to parse floating-point literals in a language without floating-point variables!)
|
||||
- Floating-point literals can't have their integer part greater than 2<sup>64</sup>-1.
|
||||
- Redefining a macro is always an error, even if it's the same definition.
|
||||
- You can't have a variable/function/etc. called `defined`.
|
||||
- Various little things about when macros are evaluated in some contexts.
|
||||
setjmp.h:// @NONSTANDARD: we don't actually support setjmp
|
||||
stddef.h:// @NONSTANDARD: we don't have wchar_t
|
||||
stdlib.h:// @NONSTANDARD: we don't define MB_CUR_MAX or any of the mbtowc functions
|
||||
time.h:// @NONSTANDARD(except in UTC+0): we don't support local time in timezones other than UTC+0.
|
||||
time.h: // @NONSTANDARD-ish.
|
||||
|
||||
|
||||
Also, the keywords `signed`, `volatile`, `register`, and `const` are all ignored. This shouldn't have an effect
|
||||
on any legal C program, though.
|
||||
|
||||
## modifications of tcc's source code
|
||||
|
||||
|
||||
|
|
29
05/main.c
29
05/main.c
|
@ -1,26 +1,15 @@
|
|||
/* #define _STDLIB_DEBUG */
|
||||
/* #include <math.h> */
|
||||
#include <stdio.h>
|
||||
/* #include <signal.h> */
|
||||
/* #include <stdlib.h> */
|
||||
/* #include <string.h> */
|
||||
/* #include <time.h> */
|
||||
/* #include <float.h> */
|
||||
/* #include <setjmp.h> */
|
||||
/* */
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int *p = 0x100;
|
||||
p += 1;
|
||||
switch (5) {
|
||||
case 5:
|
||||
switch (6) {
|
||||
default:;
|
||||
}
|
||||
case 6:
|
||||
;
|
||||
}
|
||||
printf("%p\n",p);
|
||||
printf("%p\n", malloc(1024*16));
|
||||
int *list = malloc(1024*4);
|
||||
printf("%p \n",list);
|
||||
list[1023] = 77;
|
||||
list = realloc(list, 1024*64);
|
||||
printf("%p \n",list);
|
||||
printf("%d\n",list[1023]);
|
||||
free(list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
13
05/stdlib.h
13
05/stdlib.h
|
@ -73,12 +73,25 @@ void *realloc(void *ptr, size_t size) {
|
|||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
#if 0
|
||||
// this (better) implementation doesn't seem to be copying stuff to the
|
||||
// new mapping properly
|
||||
uint64_t *memory = (char *)ptr - 16;
|
||||
uint64_t old_size = *memory;
|
||||
uint64_t *new_memory = _mremap(memory, old_size, size, MREMAP_MAYMOVE);
|
||||
if ((uint64_t)new_memory > 0xffffffffffff0000) return NULL;
|
||||
*new_memory = size;
|
||||
return (char *)new_memory + 16;
|
||||
#endif
|
||||
|
||||
uint64_t *memory = (char *)ptr - 16;
|
||||
uint64_t old_size = *memory;
|
||||
void *new = malloc(size);
|
||||
char *new_dat = (char *)new + 16;
|
||||
*(uint64_t *)new = size;
|
||||
memcpy(new_dat, ptr, old_size);
|
||||
free(ptr);
|
||||
return new_dat;
|
||||
}
|
||||
|
||||
|
||||
|
|
2
05/tcc-0.9.27/.gitignore
vendored
2
05/tcc-0.9.27/.gitignore
vendored
|
@ -16,6 +16,8 @@
|
|||
a.out
|
||||
tcc_g
|
||||
tcc
|
||||
tcc[0123456789]
|
||||
tcc[0123456789]a
|
||||
*-tcc
|
||||
libtcc*.def
|
||||
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
#define TCC_VERSION "0.9.27"
|
||||
#define CONFIG_TCC_STATIC 1
|
||||
#define CONFIG_TCC_ELFINTERP "/XXX"
|
||||
#define CONFIG_TCC_CRT_PREFIX "/XXX"
|
||||
#define CONFIG_SYSROOT "/XXX"
|
||||
//#define CONFIG_TCC_ELFINTERP "/XXX"
|
||||
//#define CONFIG_TCC_CRT_PREFIX "/XXX"
|
||||
//#define CONFIG_SYSROOT "/XXX"
|
||||
#define inline
|
||||
#define TCC_TARGET_X86_64 1
|
||||
#define ONE_SOURCE 1
|
||||
#define CONFIG_LDDIR "lib/x86_64-linux-gnu"
|
||||
#define CONFIG_TCCDIR "/usr/local/lib/tcc-bootstrap"
|
||||
|
|
|
@ -613,10 +613,17 @@ unsigned long long __fixunsxfdi (long double a1)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static long double negate_ld(long double d) {
|
||||
register unsigned long long *p = (unsigned long long *)&d;
|
||||
p[1] ^= 1ul<<15;
|
||||
return *(long double *)p;
|
||||
}
|
||||
|
||||
long long __fixxfdi (long double a1)
|
||||
{
|
||||
long long ret; int s;
|
||||
ret = __fixunsxfdi((s = a1 >= 0) ? a1 : -a1);
|
||||
ret = __fixunsxfdi((s = a1 >= 0) ? a1 : negate_ld(a1));
|
||||
return s ? ret : -ret;
|
||||
}
|
||||
#endif /* !ARM */
|
||||
|
|
|
@ -73,12 +73,25 @@ void *realloc(void *ptr, size_t size) {
|
|||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
#if 0
|
||||
// this (better) implementation doesn't seem to be copying stuff to the
|
||||
// new mapping properly
|
||||
uint64_t *memory = (char *)ptr - 16;
|
||||
uint64_t old_size = *memory;
|
||||
uint64_t *new_memory = _mremap(memory, old_size, size, MREMAP_MAYMOVE);
|
||||
if ((uint64_t)new_memory > 0xffffffffffff0000) return NULL;
|
||||
*new_memory = size;
|
||||
return (char *)new_memory + 16;
|
||||
#endif
|
||||
|
||||
uint64_t *memory = (char *)ptr - 16;
|
||||
uint64_t old_size = *memory;
|
||||
void *new = malloc(size);
|
||||
char *new_dat = (char *)new + 16;
|
||||
*(uint64_t *)new = size;
|
||||
memcpy(new_dat, ptr, old_size);
|
||||
free(ptr);
|
||||
return new_dat;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -30,14 +30,15 @@
|
|||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#ifdef __GNUC__
|
||||
#include <float.h>
|
||||
#if defined __GNUC__ || defined __TINYC__
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#include <setjmp.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#ifdef __GNUC__
|
||||
#if defined __GNUC__ || defined __TINYC__
|
||||
# include <unistd.h>
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
@ -211,7 +212,8 @@ extern long double strtold (const char *__nptr, char **__endptr);
|
|||
# define CONFIG_TCC_SYSINCLUDEPATHS \
|
||||
"{B}/include" \
|
||||
":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/local/include") \
|
||||
":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/include")
|
||||
":" ALSO_TRIPLET(CONFIG_SYSROOT "/usr/include") \
|
||||
":/usr/include/x86_64-linux-gnu"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -2458,6 +2458,16 @@ static void gen_cast_s(int t)
|
|||
gen_cast(&type);
|
||||
}
|
||||
|
||||
static long double negate_ld(long double d) {
|
||||
#if LDBL_MANT_DIG == 64
|
||||
register unsigned long long *p = (unsigned long long *)&d;
|
||||
p[1] ^= 1ul<<15;
|
||||
return *(long double *)p;
|
||||
#else
|
||||
return -d;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void gen_cast(CType *type)
|
||||
{
|
||||
int sbt, dbt, sf, df, c, p;
|
||||
|
@ -2499,12 +2509,12 @@ static void gen_cast(CType *type)
|
|||
if ((sbt & VT_UNSIGNED) || !(vtop->c.i >> 63))
|
||||
vtop->c.ld = vtop->c.i;
|
||||
else
|
||||
vtop->c.ld = -(long double)-vtop->c.i;
|
||||
vtop->c.ld = negate_ld((long double)-vtop->c.i);
|
||||
} else if(!sf) {
|
||||
if ((sbt & VT_UNSIGNED) || !(vtop->c.i >> 31))
|
||||
vtop->c.ld = (uint32_t)vtop->c.i;
|
||||
else
|
||||
vtop->c.ld = -(long double)-(uint32_t)vtop->c.i;
|
||||
vtop->c.ld = negate_ld((long double)-(uint32_t)vtop->c.i);
|
||||
}
|
||||
|
||||
if (dbt == VT_FLOAT)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue