From 7fc402e30a90682dca8dc8be9e6728684886bf56 Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Wed, 28 Feb 2018 16:44:43 -0800 Subject: [PATCH] misc/kforth: Filling out more words. --- Makefile | 3 +- TODO.txt | 20 +++ dict.cc | 161 ++++++++++++++++++++++- doc/part-0x06.rst | 322 ++++++++++++++++++++++++++++++++++++++++++++++ io.cc | 31 ++++- io.h | 1 + 6 files changed, 532 insertions(+), 6 deletions(-) create mode 100644 TODO.txt create mode 100644 doc/part-0x06.rst diff --git a/Makefile b/Makefile index b8d3250..ea8521b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CXXSTD := c++14 -CXXFLAGS := -std=$(CXXSTD) -Wall -Werror -Os -static +CXXFLAGS := -std=$(CXXSTD) -Wall -Werror -O0 -g -static LDFLAGS := -static OBJS := linux/io.o \ io.o \ @@ -14,7 +14,6 @@ all: $(TARGET) $(TARGET): $(OBJS) $(CXX) $(CXXFLAGS) -o $@ $(OBJS) - strip $@ clean: rm -f $(OBJS) $(TARGET) diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..fa5e2de --- /dev/null +++ b/TODO.txt @@ -0,0 +1,20 @@ +nucleus layer: ++ ! ++ +! ++ @ ++ C! ++ C@ ++ CMOVE ++ CMOVE> ++ COUNT ++ FILL ++ EXECUTE ++ EXIT ++ I ++ J ++ >R ++ R> ++ R@ ++ U< ++ UM* ++ UM/MOD diff --git a/dict.cc b/dict.cc index a6a173f..baf7e21 100644 --- a/dict.cc +++ b/dict.cc @@ -24,8 +24,8 @@ pop_long(System *sys, KF_LONG *d) return false; } - *d = a << dshift; - *d += b; + *d = static_cast(a) << dshift; + *d += static_cast(b); sys->status = STATUS_OK; return true; } @@ -1001,6 +1001,22 @@ dlt(System *sys) return true; } +static bool +ddot(System *sys) +{ + KF_LONG da; + + if (!pop_long(sys, &da)) { + // Status is already set. + return false; + } + + write_dnum(sys->interface, da); + sys->interface->newline(); + sys->status = STATUS_OK; + return true; +} + static bool negate(System *sys) { @@ -1023,20 +1039,161 @@ negate(System *sys) return true; } +static bool +dnegate(System *sys) +{ + KF_LONG da; + + if (!pop_long(sys, &da)) { + // Status is already set. + return false; + } + + da = ~da; + da++; + + if (!push_long(sys, da)) { + // Status is already set. + return false; + } + + sys->status = STATUS_OK; + return true; +} + +static bool +pick(System *sys) +{ + KF_INT a, b; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + size_t i = sys->dstack.size() - a - 1; + if (!sys->dstack.get(i, b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.push(b)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; +} + +static bool +min(System *sys) +{ + KF_INT a, b; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + if (!sys->dstack.pop(&b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.push(a < b ? a : b)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; +} + +static bool +max(System *sys) +{ + KF_INT a, b; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + if (!sys->dstack.pop(&b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.push(a > b ? a : b)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; +} + +static bool +exclusive_or(System *sys) +{ + KF_INT a, b; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + if (!sys->dstack.pop(&b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.push(a ^ b)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; +} + +static bool +mod(System *sys) +{ + KF_INT a, b; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + if (!sys->dstack.pop(&b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.push(b % a)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; +} + void init_dict(System *sys) { sys->dict = nullptr; sys->dict = new Builtin((const char *)"SWAP", 4, sys->dict, swap); + sys->dict = new Builtin((const char *)"XOR", 3, sys->dict, exclusive_or); sys->dict = new Builtin((const char *)"ROT", 3, sys->dict, rot); sys->dict = new Builtin((const char *)"ROLL", 4, sys->dict, roll); + sys->dict = new Builtin((const char *)"PICK", 4, sys->dict, pick); sys->dict = new Builtin((const char *)"OVER", 4, sys->dict, over); sys->dict = new Builtin((const char *)"NEGATE", 6, sys->dict, negate); sys->dict = new Builtin((const char *)"OR", 2, sys->dict, lor); + sys->dict = new Builtin((const char *)"MOD", 3, sys->dict, mod); + sys->dict = new Builtin((const char *)"MIN", 3, sys->dict, min); + sys->dict = new Builtin((const char *)"MAX", 3, sys->dict, max); sys->dict = new Builtin((const char *)"DUP", 3, sys->dict, dup); sys->dict = new Builtin((const char *)"DROP", 4, sys->dict, drop); sys->dict = new Builtin((const char *)"DEPTH", 5, sys->dict, depth); sys->dict = new Builtin((const char *)"DEFINITIONS", 11, sys->dict, definitions); + sys->dict = new Builtin((const char *)"DNEGATE", 7, sys->dict, dnegate); + sys->dict = new Builtin((const char *)"D.", 2, sys->dict, ddot); sys->dict = new Builtin((const char *)"D<", 2, sys->dict, dlt); sys->dict = new Builtin((const char *)"D+", 2, sys->dict, dplus); sys->dict = new Builtin((const char *)"BYE", 3, sys->dict, bye); diff --git a/doc/part-0x06.rst b/doc/part-0x06.rst new file mode 100644 index 0000000..dfdb16a --- /dev/null +++ b/doc/part-0x06.rst @@ -0,0 +1,322 @@ +Write You a Forth, 0x06 +----------------------- + +:date: 2018-02-28 22:55 +:tags: wyaf, forth + +Lots of updates last night; SLOC-wise, I added a bunch of new definitions: + ++ ``DEPTH``, ``.`` and ``.S`` to inspect the stack ++ ``/MOD``, ``*/``, and ``*/MOD``, which required adding some idea of a long + type ++ ``0<``, ``0=``, ``0>``, ``<``, ``=``, and ``>`` for conditionals ++ ``DUP`` and ``?DUP`` ++ the logical operators ``AND``, ``OR``, and ``NEGATE`` ++ ``ABS`` ++ ``BYE`` moved from an interpreter hack to a defined word ++ ``D+`` and ``D-`` started me off on the concept of double numbers ++ ``DROP``, ``OVER``, and ``ROLL`` are more stack manipulation functions + +It's starting to feel a lot like a Forth... + +Speaking of SLOC, for shits and grins I decided to see how the code base has +grown: + ++-----------+---------------+--------+----------------------+---------------+ +| revision | lines of code | growth | focus | exec size (b) | ++-----------+---------------+--------+----------------------+---------------+ +| 0x02 | 133 | n/a | starting point | 38368 | ++-----------+---------------+--------+----------------------+---------------+ +| 0x03 | 245 | 1.8x | parsing | 40920 | ++-----------+---------------+--------+----------------------+---------------+ +| 0x04 | 369 | 1.5x | stack / numerics | 48736 | ++-----------+---------------+--------+----------------------+---------------+ +| 0x05 | 677 | 1.8x | initial dictionary | 62896 | ++-----------+---------------+--------+----------------------+---------------+ +| 0x06 | 1436 | 2.1x | expanding vocabulary | 85256 | ++-----------+---------------+--------+----------------------+---------------+ + +Note that the executable is compiled with ``-O0 -g`` on the +``x86_64-linux-gnu`` target. + +It makes sense that expanding the vocabulary is going to be a huge code +expansion. I did do more than that; so, I'm not really going to show most of +the work I did for the new words (a lot of it is repetative and mechanical). + +System updates +^^^^^^^^^^^^^^ + +Before I started expanding the dictionary, though, I made some changes to +the ``System``:: + + $ git diff HEAD^ system.h + diff --git a/system.h b/system.h + index 00f4a34..91aa1fa 100644 + --- a/system.h + +++ b/system.h + @@ -5,11 +5,24 @@ + #include "io.h" + #include "stack.h" + + +typedef enum _SYS_STATUS : uint8_t { + + STATUS_OK = 0, + + STATUS_STACK_OVERFLOW = 1, + + STATUS_STACK_UNDERFLOW = 2, + + STATUS_EXECUTION_FAILURE = 3, + + STATUS_UNKNOWN_WORD = 4 + +} SYS_STATUS; + + + +class Word; + + + typedef struct _System { + Stack dstack; + IO *interface; + - struct Word *dict; + + Word *dict; + + SYS_STATUS status; + } System; + + +void system_clear_error(System *sys); + +void system_write_status(System *sys); + + #endif // __KF_CORE_H__ + \ No newline at end of file + +I've started adding a notion of system state, which I've deliberately kept +separate from the parser state. The new functions aren't particularly +interesting; they just write a string to the ``interface`` field so you +get things like:: + + $ ./kforth + kforth interpreter + ? swap + stack underflow (error code 2). + ? what-word? + unknown word (error code 4). + ? 2 + ok. + +Note that this is separate from the parser errors:: + + $ ./kforth + kforth interpreter + ? AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + ok. + parse error: token too long + ? unknown word (error code 4). + ? + +Though this test does show that the interpreter could be made more robust. + +I/O updates +^^^^^^^^^^^ + +The next thing I did was move the ``write_dstack`` function into ``io.cc``; +this is needed to implement ``.S``. While I was at it, I decided to make +``write_num`` finally work well and correctly, and I think I've got the final +version done:: + + void + write_num(IO *interface, KF_INT n) + { + char buf[nbuflen]; + uint8_t i = nbuflen - 1; + memset(buf, 0, nbuflen); + + if (n < 0) { + interface->wrch('-'); + } + +I'm still not proud of this hack, but it seems to be the best way to deal with +this right now:: + + else if (n == 0) { + interface->wrch('0'); + return; + } + + while (n != 0) { + char x = n % 10; + +This was the magic that finally got it right: negating the digits as they're +going into the buffer. No more trying to invert the whole number, just each +digit:: + + x = x < 0 ? -x : x; + x += '0'; + buf[i--] = x; + n /= 10; + } + + interface->wrbuf(buf+i, nbuflen - i); + } + +My first pass at this wrote the string forwards, then reversed it. I didn't +like that; while performance isn't my first concern, it just seemed like a +fun challenge to get the reversed buffer written correctly. + +While I was in the I/O subsystem, I also decided to update the ``IO`` base +class to include a ``newline`` method; I had a few instances of +``interface->wrch('\n')`` sprinkled throughout, but that won't necessarily be +correct elsewhere. + +Miscellaneous updates +^^^^^^^^^^^^^^^^^^^^^^ + +I add a new definition to the ``defs.h`` files: a ``KF_LONG`` type to prepare +for the double numbers mentioned in the next section, and switched to static +compilation. + +New words! +^^^^^^^^^^ + +Finally, I started adding the new words in. I'm still trying to figure out a +good way to handle the address types (I think I'll just introduce a ``KF_ADDR`` +type) so I've punted on those for now. + +.. _pforth: http://www.softsynth.com/pforth/ +.. _gforth: https://www.gnu.org/software/gforth/ + +One of the interesting challenges is dealing with the double numbers. These are +on the stack as a pair of smaller numbers, e.g. if the double number type is 64 +bits and the standard number type is 32 bits, then you might see something like +this (via pforth_):: + + 0 1 0 1 D+ + ok + Stack<10> 0 2 + +So, how to deal with this? There's a ``D.`` word, which I don't have +implemented yet, that will let me see what pforth_ and gforth_ do:: + + $ pforth -q + Begin AUTO.INIT ------ + 0 1 D. 1 0 D. + 4294967296 1 + ^C + $ gforth + Gforth 0.7.2, Copyright (C) 1995-2008 Free Software Foundation, Inc. + Gforth comes with ABSOLUTELY NO WARRANTY; for details type `license' + Type `bye' to exit + 0 1 D. 1 0 D. 18446744073709551616 1 ok + +So, it looks like the first number on the stack is the low part, and the second +is the high part. This is, once again, pretty straightforward: I'll need to +shift the first number by the appropriate number of bits and then add the +second number to it. + + constexpr size_t dshift = (sizeof(KF_INT) * 8) - 1; + + static bool + pop_long(System *sys, KF_LONG *d) + { + KF_INT a = 0; + KF_INT b = 0; + if (!sys->dstack.pop(&a)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + if (!sys->dstack.pop(&b)) { + sys->status = STATUS_STACK_UNDERFLOW; + return false; + } + + *d = static_cast(a) << dshift; + *d += static_cast(b); + sys->status = STATUS_OK; + return true; + } + +This function also shows off the new status work and how that turns out. I've +kept the exec interface as a boolean to indicate success or failure. + +To push the results back onto the stack, I needed to first write a masking +function to make sure to clear out any lingering bits:: + + static inline KF_INT + mask(size_t bits) + { + KF_INT m = 0; + + for (size_t i = 0; i < bits; i++) { + m += 1 << i; + } + + return m; + } + +I should probably check `Hacker's Delight `_ to see +if there's any tricks for this. + +With the mask available, getting a long into a pair of ints requires shifting +and clearing for the high part and clearing for the low part:: + + static bool + push_long(System *sys, KF_LONG d) + { + KF_INT a = static_cast((d >> dshift) & mask(dshift)); + KF_INT b = static_cast(d & mask(dshift)); + + if (!sys->dstack.push(b)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + if (!sys->dstack.push(a)) { + sys->status = STATUS_STACK_OVERFLOW; + return false; + } + + sys->status = STATUS_OK; + return true; + } + +One of the words that interacts with doubles is ``D+``:: + + static bool + dplus(System *sys) + { + KF_LONG da, db; + + if (!pop_long(sys, &da)) { + // Status is already set. + return false; + } + + if (!pop_long(sys, &db)) { + // Status is already set. + return false; + } + + da += db; + + if (!push_long(sys, da)) { + // Status is already set. + return false; + } + + // Status is already set. + return true; + } + +The only other thing I really did was to add a ``remove`` method to the Stack +class to support ``ROLL``. + +Huge diff, but not as much to say about it --- next up, I think I'm going to +introduce the ``KF_ADDR`` type and start working on some of the address +interaction stuff. I'll also add more of the double number words, too. The +words I still have to implement from the `FORTH-83 standard`_ nuclear layer +are: + ++ ``!``, ``+!``, ``@``, ``C!``, ``C@``, ``CMOVE``, ``CMOVE>``, ``COUNT``, + ``FILL``: memory manipulation words ++ ``DNEGATE``, ``MAX``, ``MIN``, ``MOD``, ``XOR``: more arithmetic words ++ ``EXECUTE``, ``EXIT``, ``I``, ``J``, ``PICK``: various words ++ ``>R``, ``R>``, ``R@``: return stack words ++ ``U<``, ``UM*``, ``UM/MOD``: unsigned math words + +.. _FORTH-83 standard: http://forth.sourceforge.net/standard/fst83/fst83-12.htm + +As before, the snapshot for this update is tagged `part-0x06 +`_. \ No newline at end of file diff --git a/io.cc b/io.cc index bdb27cc..f2521c8 100644 --- a/io.cc +++ b/io.cc @@ -3,11 +3,10 @@ #include -static constexpr size_t nbuflen = 11; - void write_num(IO *interface, KF_INT n) { + static constexpr size_t nbuflen = 11; char buf[nbuflen]; uint8_t i = nbuflen - 1; memset(buf, 0, nbuflen); @@ -31,6 +30,34 @@ write_num(IO *interface, KF_INT n) interface->wrbuf(buf+i, nbuflen - i); } +void +write_dnum(IO *interface, KF_LONG n) +{ + static constexpr size_t dnbuflen = 21; + char buf[dnbuflen]; + uint8_t i = dnbuflen - 1; + memset(buf, 0, dnbuflen); + + if (n < 0) { + interface->wrch('-'); + } + else if (n == 0) { + interface->wrch('0'); + return; + } + + while (n != 0) { + char x = n % 10; + x = x < 0 ? -x : x; + x += '0'; + buf[i--] = x; + n /= 10; + } + + interface->wrbuf(buf+i, dnbuflen - i); +} + + void write_dstack(IO *interface, Stack dstack) { diff --git a/io.h b/io.h index 8604546..6e876a6 100644 --- a/io.h +++ b/io.h @@ -25,6 +25,7 @@ public: }; void write_num(IO *, KF_INT); +void write_dnum(IO *, KF_LONG); void write_dstack(IO *, Stack);