misc/kforth: Finish part 0x07 writeup.
This commit is contained in:
parent
3991bb353b
commit
7ff4b8d35b
|
@ -11,6 +11,8 @@ Contents:
|
|||
part-0x03
|
||||
part-0x04
|
||||
part-0x05
|
||||
part-0x06
|
||||
part-0x07
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
|
|
@ -205,6 +205,8 @@ is the high part. This is, once again, pretty straightforward: I'll need to
|
|||
shift the first number by the appropriate number of bits and then add the
|
||||
second number to it.
|
||||
|
||||
::
|
||||
|
||||
constexpr size_t dshift = (sizeof(KF_INT) * 8) - 1;
|
||||
|
||||
static bool
|
||||
|
|
|
@ -123,4 +123,238 @@ There's definitely a sense of finangling here.
|
|||
The return stack
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``>R`` series of words requires a
|
||||
The ``>R`` series of words requires a return stack, so I've added a
|
||||
``Stack<KF_ADDR>`` field to the ``System`` structure. The address stack
|
||||
manipulation functions I introduced earlier only operate on the data stack, so
|
||||
these require some extra verbosity; for example::
|
||||
|
||||
static bool
|
||||
to_r(System *sys)
|
||||
{
|
||||
KF_INT a;
|
||||
|
||||
if (!sys->dstack.pop(&a)) {
|
||||
sys->status = STATUS_STACK_UNDERFLOW;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!sys->rstack.push(static_cast<KF_ADDR>(a))) {
|
||||
sys->status = STATUS_RSTACK_OVERFLOW;
|
||||
return false;
|
||||
}
|
||||
|
||||
sys->status = STATUS_OK;
|
||||
return true;
|
||||
}
|
||||
|
||||
Adding the ``rstack`` field also required adding return stack over- and
|
||||
underflow status codes.
|
||||
|
||||
The arena
|
||||
^^^^^^^^^
|
||||
|
||||
As I was reading through the words left to implement, I found I'd have to
|
||||
implement ``COUNT``. This provides some support for counted strings, which
|
||||
are implemented as a byte array where the first byte is the length of the
|
||||
string. In my mind, this has two implications:
|
||||
|
||||
1. There needs to be some area of user memory that's available for storing
|
||||
strings and the like. I've termed this the arena, and it's a field in the
|
||||
``System`` structure now.
|
||||
2. There needs to be a Word type for addresses.
|
||||
|
||||
So now I have this definition for the ``System`` structure::
|
||||
|
||||
typedef struct _System {
|
||||
Stack<KF_INT> dstack;
|
||||
Stack<KF_ADDR> rstack;
|
||||
IO *interface;
|
||||
Word *dict;
|
||||
SYS_STATUS status;
|
||||
uint8_t arena[ARENA_SIZE];
|
||||
} System;
|
||||
|
||||
The ``Address`` type seems like it's easy enough to implement::
|
||||
|
||||
class Address : public Word {
|
||||
public:
|
||||
~Address() {};
|
||||
Address(const char *name, size_t namelen, Word *head, KF_ADDR addr);
|
||||
|
||||
bool eval(System *);
|
||||
Word *next(void);
|
||||
bool match(struct Token *);
|
||||
void getname(char *, size_t *);
|
||||
|
||||
private:
|
||||
char name[MAX_TOKEN_LENGTH];
|
||||
size_t namelen;
|
||||
Word *prev;
|
||||
KF_ADDR addr;
|
||||
};
|
||||
|
||||
And the implementation::
|
||||
|
||||
Address::Address(const char *name, size_t namelen, Word *head, KF_ADDR addr)
|
||||
: prev(head), addr(addr)
|
||||
{
|
||||
memcpy(this->name, name, namelen);
|
||||
this->namelen = namelen;
|
||||
}
|
||||
|
||||
bool
|
||||
Address::eval(System *sys)
|
||||
{
|
||||
KF_INT a;
|
||||
|
||||
a = static_cast<KF_INT>(this->addr & mask(dshift));
|
||||
if (!sys->dstack.push(a)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
a = static_cast<KF_INT>((this->addr >> dshift) & mask(dshift));
|
||||
if (!sys->dstack.push(a)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Word *
|
||||
Address::next(void)
|
||||
{
|
||||
return this->prev;
|
||||
}
|
||||
|
||||
bool
|
||||
Address::match(struct Token *token)
|
||||
{
|
||||
return match_token(this->name, this->namelen, token->token, token->length);
|
||||
}
|
||||
|
||||
void
|
||||
Address::getname(char *buf, size_t *buflen)
|
||||
{
|
||||
memcpy(buf, this->name, this->namelen);
|
||||
*buflen = namelen;
|
||||
}
|
||||
|
||||
It's kind of cool to see this at work::
|
||||
|
||||
$ ./kforth
|
||||
kforth interpreter
|
||||
? arena drop 2+ 0 @ .
|
||||
0
|
||||
ok.
|
||||
? arena drop 2+ 0 4 rot rot ! .
|
||||
stack underflow (error code 2).
|
||||
? arena drop 2+ 0 @ .
|
||||
4
|
||||
ok.
|
||||
|
||||
Unsigned numbers
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
This is really just a bunch of casting::
|
||||
|
||||
static bool
|
||||
u_dot(System *sys)
|
||||
{
|
||||
KF_INT a;
|
||||
KF_UINT b;
|
||||
|
||||
if (!sys->dstack.pop(&a)) {
|
||||
sys->status = STATUS_STACK_UNDERFLOW;
|
||||
return false;
|
||||
}
|
||||
b = static_cast<KF_UINT>(a);
|
||||
|
||||
write_unum(sys->interface, b);
|
||||
sys->interface->newline();
|
||||
sys->status = STATUS_OK;
|
||||
return true;
|
||||
}
|
||||
|
||||
Execute
|
||||
^^^^^^^
|
||||
|
||||
Implementing ``execute`` was fun, but it begins to highlight the limits of my
|
||||
approach so far.
|
||||
|
||||
|
||||
EXECUTE addr -- 79
|
||||
The word definition indicated by addr is executed. An error
|
||||
condition exists if addr is not a compilation address
|
||||
|
||||
For example::
|
||||
|
||||
(gdb) break 83
|
||||
Breakpoint 1 at 0x4077cf: file kforth.cc, line 83.
|
||||
(gdb) run
|
||||
Starting program: /home/kyle/code/kforth/kforth
|
||||
|
||||
Breakpoint 1, main () at kforth.cc:83
|
||||
83 Console interface;
|
||||
(gdb) p sys.dict->next()->next()->next()->next()
|
||||
$1 = (Word *) 0x7e45b0
|
||||
(gdb) p (Builtin) *sys.dict->next()->next()->next()->next()
|
||||
$2 = {<Word> = {_vptr$Word = 0x55f220 <vtable for Builtin+16>}, name = "+", '\000' <repeats 14 times>, namelen = 1, prev = 0x7e4570,
|
||||
fun = 0x406eb0 <add(_System*)>}
|
||||
(gdb) p/u 0x7e45b0
|
||||
$3 = 8275376
|
||||
(gdb) c
|
||||
Continuing.
|
||||
kforth interpreter
|
||||
? 2 3 8275376 0 execute .
|
||||
executing word: +
|
||||
5
|
||||
ok.
|
||||
|
||||
In case the ``gdb`` example wasn't clear, I printed the address of the fourth
|
||||
entry in the dictionary, which happens to be ``+``. I push the numbers 2 and 3
|
||||
onto the stack, then push the address of ``+`` on the stack, then call execute.
|
||||
As the dot function shows, it executes correctly, pushing the resulting 5 onto
|
||||
the stack. Which leads me to the next section, wherein I need to rethink the
|
||||
execution model.
|
||||
|
||||
The execution model
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In most of the Forth implementations I've, the dictionary is a list of
|
||||
contiguous pointers to words. That is, something like::
|
||||
|
||||
Word *dict[ARRAY_SIZE] = { 0 };
|
||||
|
||||
dict[0] = new Builtin((const char *)"+", 1, add);
|
||||
dict[1] = new Builtin((const char *)"-", 1, sub);
|
||||
|
||||
And so forth. Or, maybe,
|
||||
|
||||
::
|
||||
|
||||
Word dict[ARRAY_SIZE] = {
|
||||
Builtin((const char *)"+", 1, add),
|
||||
Builtin((const char *)"-", 1, sub)
|
||||
};
|
||||
|
||||
|
||||
So some questions:
|
||||
|
||||
+ How big should this array be?
|
||||
+ How do I handle different word types?
|
||||
+ How do I transfer execution to functions?
|
||||
|
||||
I'm thinking something like:
|
||||
|
||||
+ the parser looks up a word, and pushes the parser function's address onto the
|
||||
return stack.
|
||||
+ the parser jumps to the word's function pointer and executes it.
|
||||
+ the function pointer jumps back to the last address on the return stack.
|
||||
|
||||
The second step could involve chaining multiple functions in there. I don't
|
||||
know how to transfer execution to a random address in memory (maybe ``setjmp``
|
||||
and ``longjmp``), or how I'm going to push the current word's address onto the
|
||||
stack. I guess include some sort of additional fields in the system type.
|
||||
|
||||
This starts to jump into the realm of an operating system or virtual machine;
|
||||
the OS approach makes more sense for embedded system.
|
||||
|
|
Loading…
Reference in New Issue