diff --git a/doc/part-0x08.rst b/doc/part-0x08.rst index 7f938c8..633175b 100644 --- a/doc/part-0x08.rst +++ b/doc/part-0x08.rst @@ -30,3 +30,363 @@ I'll need to provide a few initial pieces: I'll skip the parser at first and hand hack some things, then try to port over my I/O layer from before. + +Also, talking to Steve got me to think about doing this in C99, because +a lot of the fun I've had with computers in the past involved hacking +on C projects. So, C99 it is. + + +Platforms +^^^^^^^^^ + +I've elected to set a new define type, ``PLATFORM_$PLATFORM``. The Makefile +sets this, so it's easier now to test building for different platforms. +Here's the current top-level definitions:: + + #ifndef __KF_DEFS_H__ + #define __KF_DEFS_H__ + + #include + #include + #include + + #ifdef PLATFORM_pc + #include "pc/defs.h" + #else + #include "default/defs.h" + #endif + +The ``pc/defs.h`` header:: + + #ifndef __KF_PC_DEFS_H__ + #define __KF_PC_DEFS_H__ + + typedef int32_t KF_INT; + typedef uintptr_t KF_ADDR; + + static const size_t DSTACK_SIZE = 65535; + static const size_t RSTACK_SIZE = 65535; + static const size_t DICT_SIZE = 65535; + + #endif /* __KF_PC_DEFS_H__ */ + + #endif /* __KF_DEFS_H__ */ + +The new stack +^^^^^^^^^^^^^ + +I'll start with a much simplified stack interface:: + + #ifndef __KF_STACK_H__ + #define __KF_STACK_H__ + + /* data stack interaction */ + bool dstack_pop(KF_INT *); + bool dstack_push(KF_INT); + bool dstack_get(size_t, KF_INT *); + size_t dstack_size(void); + void dstack_clear(void); + + /* return stack interaction */ + bool rstack_pop(KF_ADDR *); + bool rstack_push(KF_ADDR); + bool rstack_get(size_t, KF_ADDR *); + size_t rstack_size(void); + void rstack_clear(void); + + #endif /* __KF_STACK_H__ */ + +The implementation is simple enough; the ``rstack`` interface is similar +enough to the ``dstack`` that I'll just show the first:: + + #include "defs.h" + #include "stack.h" + + static KF_INT dstack[DSTACK_SIZE] = {0}; + static size_t dstack_len = 0; + + bool + dstack_pop(KF_INT *a) + { + if (dstack_len == 0) { + return false; + } + + *a = dstack[--dstack_len]; + return true; + } + + bool + dstack_push(KF_INT a) + { + if (dstack_len == DSTACK_SIZE) { + return false; + } + + dstack[dstack_len++] = a; + return true; + } + + bool + dstack_get(size_t i, KF_INT *a) + { + if (i >= dstack_len) { + return false; + } + + *a = dstack[dstack_len - i - 1]; + return true; + } + + size_t + dstack_size() + { + return dstack_len; + } + + void + dstack_clear() + { + dstack_len = 0; + } + +Words +^^^^^ + +Reading TIL has given me some new ideas on how to implement words:: + + #ifndef __KF_WORD_H__ + #define __KF_WORD_H__ + + /* + * Every word in the dictionary starts with a header: + * uint8_t length; + * uint8_t flags; + * char *name; + * uintptr_t next; + * + * The body looks like the following: + * uintptr_t codeword; + * uintptr_t body[]; + * + * The codeword is the interpreter for the body. This is defined in + * eval.c. Note that a native (or builtin function) has only a single + * body element. + * + * The body of a native word points to a function that's compiled in already. + */ + + + /* + * store_native writes a new dictionary entry for a native-compiled + * function. + */ + void store_native(uint8_t *, const char *, const uint8_t, void(*)(void)); + + /* + * match_word returns true if the current dictionary entry matches the + * token being searched for. + */ + bool match_word(uint8_t *, const char *, const uint8_t); + + /* + * word_link returns the offset to the next word. + */ + size_t word_link(uint8_t *); + + size_t word_body(uint8_t *); + + #endif /* __KF_WORD_H__ */ + +The codeword is the big changer here. I've put a native evaluator and +a codeword executor in the ``eval`` files: + + #ifndef __KF_EVAL_H__ + #define __KF_EVAL_H__ + + #include "defs.h" + + /* + * cwexec is the codeword executor. It assumes that the uintptr_t + * passed into it points to the correct executor (e.g. nexec), + * which is called with the next address. + */ + void cwexec(uintptr_t); + + + /* + * nexec is the native executor. + * + * It should take a uintptr_t containing the address of a code block + * and will execute the function starting there. The function should + * half the signature void(*target)(void) - a function returning + * nothing and taking no arguments. + */ + void nexec(uintptr_t); + + static const uintptr_t nexec_p = (uintptr_t)&nexec; + + + #endif /* __KF_EVAL_H__ */ + +The implementations of these are short:: + + #include "defs.h" + #include "eval.h" + + #include + +``nexec`` just casts its target to a void function and calls it. + + void + nexec(uintptr_t target) + { + ((void(*)(void))target)(); + } + +``cwexec`` is the magic part: it reads a pair of addresses; the first +is the executor, and the next is the start of the code body. In the +case of native execution, this is a pointer to a function. + + void + cwexec(uintptr_t entry) + { + uintptr_t target = 0; + uintptr_t codeword = 0; + + memcpy(&codeword, (void *)entry, sizeof(uintptr_t)); + memcpy(&target, (void *)(entry + sizeof(uintptr_t)), sizeof(uintptr_t)); + ((void(*)(uintptr_t))codeword)(target); + } + + +So I wrote a quick test program to check these out:: + + #include "defs.h" + #include "eval.h" + #include + #include + + static void + hello(void) + { + printf("hello, world\n"); + } + + int + main(void) + { + uintptr_t target = (uintptr_t)hello; + + nexec(hello); + + uint8_t arena[32] = { 0 }; + uintptr_t arena_p = (uintptr_t)arena; + + memcpy(arena, (void *)&nexec_p, sizeof(nexec_p)); + memcpy(arena + sizeof(nexec_p), (void *)&target, sizeof(target)); + + cwexec(arena_p); + } + +But does it work? + +:: + + $ gcc -o eval_test eval_test.c eval.o + $ ./eval_test + hello, world + hello, world + +What magic is this? + +Now I need to write a couple functions to make this easier:: + + #include "defs.h" + #include "eval.h" + #include "word.h" + + #include + + static uint8_t dict[DICT_SIZE] = {0}; + static size_t last = 0; + +The first two functions will operate on the internal dict, and are +intended to be used to maintain the internal dictionary. The first +adds a new word to the dictionary, and the second attempts to look +up a word by name and execute it:: + + void + append_word(const char *name, const uint8_t len, void(*target)(void)) + { + store_native(dict+last, name, len, target); + } + + bool + execute(const char *name, const uint8_t len) + { + size_t offset = 0; + size_t body = 0; + while (true) { + if (!match_word(dict+offset, name, len)) { + if ((offset = word_link(dict+offset)) == 0) { + return false; + } + continue; + } + + body = word_body(dict+offset); + cwexec(dict + body + offset); + return true; + } + } + + void + store_native(uint8_t *entry, const char *name, const uint8_t len, void(*target)(void)) + { + uintptr_t target_p = (uintptr_t)target; + size_t link = 2 + len + (2 * sizeof(uintptr_t)); + + /* write the header */ + entry[0] = len; + entry[1] = 0; // flags aren't used yet + memcpy(entry+2, name, len); + memcpy(entry+2+len, &link, sizeof(link)); + + /* write the native executor codeword and the function pointer */ + memcpy(entry, (uint8_t *)(&nexec_p), sizeof(uintptr_t)); + memcpy(entry + sizeof(uintptr_t), (uint8_t *)(&target_p), sizeof(uintptr_t)); + } + + bool + match_word(uint8_t *entry, const char *name, const uint8_t len) + { + if (entry[0] != len) { + return false; + } + + if (memcmp(entry+2, name, len) != 0) { + return false; + } + + return true; + } + + size_t + word_link(uint8_t *entry) + { + size_t link; + + if (entry[0] == 0) { + return 0; + } + memcpy(&link, entry+2+entry[0], sizeof(link)); + return link; + } + + size_t + word_body(uint8_t *entry) + { + return 2 + entry[0] + sizeof(size_t); + } + diff --git a/kf.c b/kf.c index 4e5e18f..dcc1cbc 100644 --- a/kf.c +++ b/kf.c @@ -20,7 +20,7 @@ main(void) uint8_t arena[128] = {0}; uintptr_t arena_p = (uintptr_t)arena; - store_native(arena, hello); + store_native(arena, "hello", 5, hello); cwexec(arena_p); diff --git a/word.c b/word.c index fa67e5f..8afd3f3 100644 --- a/word.c +++ b/word.c @@ -27,7 +27,7 @@ execute(const char *name, const uint8_t len) } body = word_body(dict+offset); - cwexec(dict + body + offset); + cwexec((uintptr_t)(dict + body + offset)); return true; } }