misc/kforth: Interpreter writeup, code cleanups.

This commit is contained in:
Kyle Isom 2018-02-27 08:04:17 -08:00
parent 14dc525084
commit 63e9a0a333
9 changed files with 443 additions and 31 deletions

53
dict.cc
View File

@ -4,6 +4,8 @@
#include "system.h" #include "system.h"
#include "word.h" #include "word.h"
#include <string.h>
static bool static bool
add(System *sys) add(System *sys)
{ {
@ -68,7 +70,7 @@ div(System *sys)
return false; return false;
} }
b *= a; b /= a;
return sys->dstack.push(b); return sys->dstack.push(b);
} }
@ -125,36 +127,61 @@ rot(System *sys)
static bool static bool
definitions(System *sys) definitions(System *sys)
{ {
Word *cursor = dict; Word *cursor = sys->dict;
char buf[MAX_TOKEN_LENGTH]; char buf[MAX_TOKEN_LENGTH];
size_t buflen = 0; char line[72];
size_t buflen = 0, linelen = 0;
bool ready = false;
while (cursor != nullptr) { while (cursor != nullptr) {
if (ready) {
ready = false;
sys->interface->wrln(line, linelen);
linelen = 0;
continue;
}
cursor->getname(buf, &buflen); cursor->getname(buf, &buflen);
sys->interface->wrln(buf, buflen);
// TODO: get rid of magic numbers
if ((buflen + linelen) > 72) {
ready = true;
continue;
}
memcpy(line + linelen, buf, buflen);
linelen += buflen;
if (linelen < 71) {
line[linelen++] = ' ';
}
else {
ready = true;
}
cursor = cursor->next(); cursor = cursor->next();
} }
sys->interface->wrln(line, linelen);
return true; return true;
} }
void void
init_dict() init_dict(System *sys)
{ {
dict = new Builtin((const char *)"DEFINITIONS", 11, dict, definitions); sys->dict = nullptr;
dict = new Builtin((const char *)"+", 1, dict, add); sys->dict = new Builtin((const char *)"DEFINITIONS", 11, sys->dict, definitions);
dict = new Builtin((const char *)"-", 1, dict, sub); sys->dict = new Builtin((const char *)"+", 1, sys->dict, add);
dict = new Builtin((const char *)"*", 1, dict, mul); sys->dict = new Builtin((const char *)"-", 1, sys->dict, sub);
dict = new Builtin((const char *)"/", 1, dict, div); sys->dict = new Builtin((const char *)"*", 1, sys->dict, mul);
dict = new Builtin((const char *)"SWAP", 4, dict, swap); sys->dict = new Builtin((const char *)"/", 1, sys->dict, div);
dict = new Builtin((const char *)"ROT", 3, dict, rot); sys->dict = new Builtin((const char *)"SWAP", 4, sys->dict, swap);
sys->dict = new Builtin((const char *)"ROT", 3, sys->dict, rot);
} }
LOOKUP LOOKUP
lookup(struct Token *token, System *sys) lookup(struct Token *token, System *sys)
{ {
Word *cursor = dict; Word *cursor = sys->dict;
KF_INT n; KF_INT n;
if (parse_num(token, &n)) { if (parse_num(token, &n)) {

4
dict.h
View File

@ -6,15 +6,13 @@
#include "system.h" #include "system.h"
#include "word.h" #include "word.h"
static Word *dict = nullptr;
typedef enum _LOOKUP_ : uint8_t { typedef enum _LOOKUP_ : uint8_t {
LOOKUP_OK = 0, // Lookup executed properly. LOOKUP_OK = 0, // Lookup executed properly.
LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary. LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary.
LOOKUP_FAILED = 2 // The word failed to execute. LOOKUP_FAILED = 2 // The word failed to execute.
} LOOKUP; } LOOKUP;
void init_dict(void); void init_dict(System *);
LOOKUP lookup(struct Token *, System *); LOOKUP lookup(struct Token *, System *);

View File

@ -55,7 +55,7 @@ won't be a target at first, but something to keep in mind as I progress.
Eventually, I'd like to build a zero-allocation Forth that can run on an Eventually, I'd like to build a zero-allocation Forth that can run on an
STM-32 or an MSP430, but the first goal is going to get a minimal Forth STM-32 or an MSP430, but the first goal is going to get a minimal Forth
working. I'll define the stages as working. I'll define the stages tentatively as
Stage 1 Stage 1
~~~~~~~ ~~~~~~~
@ -95,6 +95,15 @@ limiting factor. Fortunately, just a few days before I started this, the TI
wiki was updated_ to note that the latest compilers now support C++11 and wiki was updated_ to note that the latest compilers now support C++11 and
C++14, so I'll target C++14. C++14, so I'll target C++14.
As a reminder to myself: this is not going to be the prettiest or best or most
secure or production ready code. The goal is to have fun writing some software
again and to rekindle some of the joy of computing that I had before. Once I
have something working, I can go back and make an exercise of cleaning it up
and refactoring it. The prose in this series is also not going to be my finest
writing ever --- again, it suffices just to do it. The goal is to have
something to show, not to achieve perfection; it'll mostly going to be hacked
on while I'm on the bus or when I have a bit of downtime here and there.
.. _updated: http://processors.wiki.ti.com/index.php/C%2B%2B_Support_in_TI_Compilers#Status_as_of_February_2018 .. _updated: http://processors.wiki.ti.com/index.php/C%2B%2B_Support_in_TI_Compilers#Status_as_of_February_2018
I don't really know what I'm doing, so in the next section, I'll build out the I don't really know what I'm doing, so in the next section, I'll build out the

View File

@ -4,6 +4,9 @@ Write You a Forth, 0x05
:date: 2018-02-24 12:23 :date: 2018-02-24 12:23
:tags: wyaf, forth :tags: wyaf, forth
NB: Today's update was pretty large, so I don't show all of the code; this is
what ``git`` is for.
Today I need to start actually doing things with tokens. This requires two Today I need to start actually doing things with tokens. This requires two
things: things:
@ -30,6 +33,7 @@ stack-aware, so what I've done is define a ``System`` struct in ``system.h``::
typedef struct _System { typedef struct _System {
Stack<KF_INT> dstack; Stack<KF_INT> dstack;
IO *interface;
} System; } System;
@ -38,9 +42,382 @@ stack-aware, so what I've done is define a ``System`` struct in ``system.h``::
This will let me later add in support for the return stack and other things This will let me later add in support for the return stack and other things
that might be useful. Other ideas: adding something like an ``errno`` that might be useful. Other ideas: adding something like an ``errno``
equivalent to indicate the last error, and storing a dictionary of words. This equivalent to indicate the last error, and storing a dictionary of words. This
will need some restructuring, though. Anyways, this works for now. will need some restructuring, though. I've already moved the I/O into the
system as well. This took some finangling in ``kforth.cc``; I'm eliding the
diff here because it's so long, but it's basically a ``sed -i -e
's/interface./sys->interface.``.
The Word interface The Word interface
^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^
Now I can start defining a Word.h. Now I can start defining a Word.h. Maybe this is a case of when you have an
object-oriented language, everything looks like a class, but I decided to
design an abstract class for a Word and implement the first concrete class,
**Builtin**. What I came up with was::
class Word {
public:
virtual ~Word() {};
The *eval* method takes a ``System`` structure and executes some function.
virtual bool eval(System *) = 0;
The dictionary is a linked list, so next is used to traverse the list.
virtual Word *next(void) = 0;
The ``match`` method is used to determine whether this is the word being
referred to.
virtual bool match(struct Token *) = 0;
Finally, ``getname`` will fill in a ``char[MAX_TOKEN_SIZE]`` buffer with the
word's name.
virtual void getname(char *, size_t *) = 0;
};
With the interface defined, I can implement ``Builtins`` (I've elided the
common interface from the listing below to focus on the implementation)::
class Builtin : public Word {
public:
~Builtin() {};
Builtin(const char *name, size_t namelen, Word *head, bool (*fun)(System *));
private:
char name[MAX_TOKEN_LENGTH];
size_t namelen;
Word *prev;
bool (*fun)(System *);
};
The ``bool`` works as a first pass, but I think I'm going to have add some
notion of system conditions later on to denote why execution failed. One thing
that both ``pforth`` and ``gforth`` do that I don't yet do is to clear the
stack when there's an execution failure. At least, they clear the stack with an
unrecognised word. The implementation is pretty trivial::
#include "defs.h"
#include "parser.h"
#include "system.h"
#include "word.h"
#include <string.h>
Builtin::Builtin(const char *name, size_t namelen, Word *head, bool (*target)(System *))
: prev(head), fun(target)
{
memcpy(this->name, name, namelen);
this->namelen = namelen;
}
bool
Builtin::eval(System *sys)
{
return this->fun(sys);
}
Word *
Builtin::next()
{
return this->prev;
}
bool
Builtin::match(struct Token *token)
{
return match_token(this->name, this->namelen, token->token, token->length);
}
void
Builtin::getname(char *buf, size_t *buflen)
{
memcpy(buf, this->name, this->namelen);
*buflen = namelen;
}
Right. Now to do something with this.
The system dictionary
^^^^^^^^^^^^^^^^^^^^^
The dictionary's interface is minimal::
// dict.h
#ifndef __KF_DICT_H__
#define __KF_DICT_H__
#include "defs.h"
#include "parser.h"
#include "system.h"
#include "word.h"
typedef enum _LOOKUP_ : uint8_t {
LOOKUP_OK = 0, // Lookup executed properly.
LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary.
LOOKUP_FAILED = 2 // The word failed to execute.
} LOOKUP;
void init_dict(System *);
LOOKUP lookup(struct Token *, System *);
#endif // __KF_DICT_H__
There's a modicum of differentiation between "everything worked" and "no it
didn't," and by that I mean the lookup can tell you if the word wasn't found
or if there was a problem executing it.
I added a ``struct Word *dict`` field to the ``System`` struct, since we're
operating on these anyways. I guess it's best to start with the lookup function
first so that when I started adding builtins later it'll be easy to just
recompile and use them.
::
LOOKUP
lookup(struct Token *token, System *sys)
{
Word *cursor = sys->dict;
KF_INT n;
I seem to recall from *Programming a Problem-Oriented Language* that Chuck
Moore advocated checking whether a token was a number before looking it up
in the dictionary, so that's the approach I'll take::
if (parse_num(token, &n)) {
if (sys->dstack.push(n)) {
return LOOKUP_OK;
}
return LOOKUP_FAILED;
}
The remainder is pretty much bog-standard linked list traversal::
while (cursor != nullptr) {
if (cursor->match(token)) {
if (cursor->eval(sys)) {
return LOOKUP_OK;
}
return LOOKUP_FAILED;
}
cursor = cursor->next();
}
return LOOKUP_NOTFOUND;
}
This needs to get hooked up into the interpreter now; this is going to require
some reworking of the ``parser`` function::
static bool
parser(const char *buf, const size_t buflen)
{
static size_t offset = 0;
static struct Token token;
static PARSE_RESULT result = PARSE_FAIL;
static LOOKUP lresult = LOOKUP_FAILED;
static bool stop = false;
offset = 0;
// reset token
token.token = nullptr;
token.length = 0;
while ((result = parse_next(buf, buflen, &offset, &token)) == PARSE_OK) {
lresult = lookup(&token, &sys);
switch (lresult) {
case LOOKUP_OK:
continue;
case LOOKUP_NOTFOUND:
sys.interface->wrln((char *)"word not found", 15);
stop = true;
break;
case LOOKUP_FAILED:
sys.interface->wrln((char *)"execution failed", 17);
stop = true;
break;
default:
sys.interface->wrln((char *)"*** the world is broken ***", 27);
exit(1);
}
if (stop) {
stop = false;
break;
}
}
switch (result) {
case PARSE_OK:
return false;
case PARSE_EOB:
sys.interface->wrbuf(ok, 4);
return true;
case PARSE_LEN:
sys.interface->wrln((char *)"parse error: token too long", 27);
return false;
case PARSE_FAIL:
sys.interface->wrln((char *)"parser failure", 14);
return false;
default:
sys.interface->wrln((char *)"*** the world is broken ***", 27);
exit(1);
}
}
Now I feel like I'm at the part where I can start adding in functionality. The
easiest first builtin: addition. Almost impossible to screw this up, right?
::
static bool
add(System *sys)
{
KF_INT a = 0;
KF_INT b = 0;
if (!sys->dstack.pop(&a)) {
return false;
}
if (!sys->dstack.pop(&b)) {
return false;
}
b += a;
return sys->dstack.push(b);
}
Now this needs to go into the ``init_dict`` function::
void
init_dict(System *sys)
{
sys->dict = nullptr;
sys->dict = new Builtin((const char *)"+", 1, sys->dict, add);
}
And this needs to get added into the ``main`` function::
int
main(void)
{
init_dict(&sys);
#ifdef __linux__
Console interface;
sys.interface = &interface;
#endif
sys.interface->wrbuf(banner, bannerlen);
interpreter();
return 0;
}
The moment of truth
^^^^^^^^^^^^^^^^^^^
Hold on to your pants, let's see what breaks::
$ ./kforth
kforth interpreter
<>
? 2 3 +
ok.
<5>
Oh hey, look, it actually works. Time to add a few more definitions for good
measure:
+ the basic arithmetic operators `-`, `*`, `/`
+ the classic `SWAP` and `ROT` words
+ `DEFINITIONS` to look at all the definitions in the language
These are all pretty simple, fortunately. A few things that tripped me up,
though:
+ The *a* and *b* names kind of threw me off because I fill *a* first. This
means it's the last number on the stack; this didn't matter for addition,
but in subtraction, it means I had to be careful to do ``b -= a`` rather
than the other way around.
+ pforth and gfortran both support case insensitive matching, so I had to
modify the token matcher::
bool
match_token(const char *a, const size_t alen,
const char *b, const size_t blen)
{
if (alen != blen) {
return false;
}
for (size_t i = 0; i < alen; i++) {
if (a[i] == b[i]) {
continue;
}
if (!isalpha(a[i]) || !isalpha(b[i])) {
return false;
}
The XOR by 0x20 is just a neat trick for inverting the case of a letter.
if ((a[i] ^ 0x20) == b[i]) {
continue;
}
if (a[i] == (b[i] ^ 0x20)) {
continue;
}
return false;
}
return true;
}
+ I forgot to include the case for ``PARSE_OK`` in the result switch statement
in the ``parser`` function, so I could get a line of code evaluated but then
it'd die with "the world is broken."
+ When I tried doing some division, I ran into some weird issues::
$ ./kforth
kforth interpreter
<>
? 2 5040 /
ok.
<<3C><>>
It turns out that in ``write_num``, the case where *n = 0* results in nothing
happening, and therefore the buffer just being written. This is a dirty thing,
but I edge cased this::
$ git diff io.cc
diff --git a/io.cc b/io.cc
index 77e0e2a..a86156b 100644
--- a/io.cc
+++ b/io.cc
@@ -24,6 +24,10 @@ write_num(IO *interface, KF_INT n)
n++;
}
}
+ else if (n == 0) {
+ interface->wrch('0');
+ return;
+ }
while (n != 0) {
char ch = (n % 10) + '0';
May the compiler have mercy on my soul and whatnot.
For you sports fans keeping track at home, here's the classic bugs I've
introduced so far:
1. bounds overrun
2. missing case in a switch statement
But now here I am with the interpreter in good shape. Now I can start
implementing the builtins in earnest!
As before, see the tag `part-0x05 <https://github.com/kisom/kforth/tree/part-0x05>`_.

4
io.cc
View File

@ -24,6 +24,10 @@ write_num(IO *interface, KF_INT n)
n++; n++;
} }
} }
else if (n == 0) {
interface->wrch('0');
return;
}
while (n != 0) { while (n != 0) {
char ch = (n % 10) + '0'; char ch = (n % 10) + '0';

View File

@ -11,7 +11,6 @@
#endif // __linux__ #endif // __linux__
static char ok[] = "ok.\n"; static char ok[] = "ok.\n";
static char bye[] = "bye";
static System sys; static System sys;
@ -68,12 +67,6 @@ parser(const char *buf, const size_t buflen)
stop = false; stop = false;
break; break;
} }
// Temporary hack until the interpreter is working further.
if (match_token(token.token, token.length, bye, 3)) {
sys.interface->wrln((char *)"Goodbye!", 8);
exit(0);
}
} }
switch (result) { switch (result) {
@ -116,7 +109,7 @@ const size_t bannerlen = 19;
int int
main(void) main(void)
{ {
init_dict(); init_dict(&sys);
#ifdef __linux__ #ifdef __linux__
Console interface; Console interface;
sys.interface = &interface; sys.interface = &interface;

View File

@ -2,6 +2,7 @@
#include "parser.h" #include "parser.h"
#include "stack.h" #include "stack.h"
#include <ctype.h>
#include <string.h> #include <string.h>
static void static void
@ -24,6 +25,10 @@ match_token(const char *a, const size_t alen,
continue; continue;
} }
if (!isalpha(a[i]) || !isalpha(b[i])) {
return false;
}
if ((a[i] ^ 0x20) == b[i]) { if ((a[i] ^ 0x20) == b[i]) {
continue; continue;
} }

View File

@ -8,6 +8,7 @@
typedef struct _System { typedef struct _System {
Stack<KF_INT> dstack; Stack<KF_INT> dstack;
IO *interface; IO *interface;
struct Word *dict;
} System; } System;

2
word.h
View File

@ -25,7 +25,6 @@ public:
Word *next(void); Word *next(void);
bool match(struct Token *); bool match(struct Token *);
void getname(char *, size_t *); void getname(char *, size_t *);
private: private:
char name[MAX_TOKEN_LENGTH]; char name[MAX_TOKEN_LENGTH];
size_t namelen; size_t namelen;
@ -33,5 +32,4 @@ private:
bool (*fun)(System *); bool (*fun)(System *);
}; };
#endif // __KF_WORD_H__ #endif // __KF_WORD_H__