misc/kforth: Interpreter writeup, code cleanups.
This commit is contained in:
		
							parent
							
								
									14dc525084
								
							
						
					
					
						commit
						63e9a0a333
					
				
							
								
								
									
										53
									
								
								dict.cc
								
								
								
								
							
							
						
						
									
										53
									
								
								dict.cc
								
								
								
								
							| 
						 | 
					@ -4,6 +4,8 @@
 | 
				
			||||||
#include "system.h"
 | 
					#include "system.h"
 | 
				
			||||||
#include "word.h"
 | 
					#include "word.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool
 | 
					static bool
 | 
				
			||||||
add(System *sys)
 | 
					add(System *sys)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -68,7 +70,7 @@ div(System *sys)
 | 
				
			||||||
		return false;
 | 
							return false;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	b *= a;
 | 
						b /= a;
 | 
				
			||||||
	return sys->dstack.push(b);
 | 
						return sys->dstack.push(b);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -125,36 +127,61 @@ rot(System *sys)
 | 
				
			||||||
static bool
 | 
					static bool
 | 
				
			||||||
definitions(System *sys)
 | 
					definitions(System *sys)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	Word	*cursor = dict;
 | 
						Word	*cursor = sys->dict;
 | 
				
			||||||
	char	 buf[MAX_TOKEN_LENGTH];
 | 
						char	 buf[MAX_TOKEN_LENGTH];
 | 
				
			||||||
	size_t	 buflen = 0;
 | 
						char	 line[72];
 | 
				
			||||||
 | 
						size_t	 buflen = 0, linelen = 0;
 | 
				
			||||||
 | 
						bool	 ready = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (cursor != nullptr) {
 | 
						while (cursor != nullptr) {
 | 
				
			||||||
 | 
							if (ready) {
 | 
				
			||||||
 | 
								ready = false;
 | 
				
			||||||
 | 
								sys->interface->wrln(line, linelen);
 | 
				
			||||||
 | 
								linelen = 0;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		cursor->getname(buf, &buflen);
 | 
							cursor->getname(buf, &buflen);
 | 
				
			||||||
		sys->interface->wrln(buf, buflen);
 | 
					
 | 
				
			||||||
 | 
							// TODO: get rid of magic numbers
 | 
				
			||||||
 | 
							if ((buflen + linelen) > 72) {
 | 
				
			||||||
 | 
								ready = true;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							memcpy(line + linelen, buf, buflen);
 | 
				
			||||||
 | 
							linelen += buflen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (linelen < 71) {
 | 
				
			||||||
 | 
								line[linelen++] = ' ';
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else {
 | 
				
			||||||
 | 
								ready = true;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		cursor = cursor->next();
 | 
							cursor = cursor->next();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sys->interface->wrln(line, linelen);
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
init_dict()
 | 
					init_dict(System *sys)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	dict = new Builtin((const char *)"DEFINITIONS", 11, dict, definitions);
 | 
						sys->dict = nullptr;
 | 
				
			||||||
	dict = new Builtin((const char *)"+", 1, dict, add);
 | 
						sys->dict = new Builtin((const char *)"DEFINITIONS", 11, sys->dict, definitions);
 | 
				
			||||||
	dict = new Builtin((const char *)"-", 1, dict, sub);
 | 
						sys->dict = new Builtin((const char *)"+", 1, sys->dict, add);
 | 
				
			||||||
	dict = new Builtin((const char *)"*", 1, dict, mul);
 | 
						sys->dict = new Builtin((const char *)"-", 1, sys->dict, sub);
 | 
				
			||||||
	dict = new Builtin((const char *)"/", 1, dict, div);
 | 
						sys->dict = new Builtin((const char *)"*", 1, sys->dict, mul);
 | 
				
			||||||
	dict = new Builtin((const char *)"SWAP", 4, dict, swap);
 | 
						sys->dict = new Builtin((const char *)"/", 1, sys->dict, div);
 | 
				
			||||||
	dict = new Builtin((const char *)"ROT", 3, dict, rot);
 | 
						sys->dict = new Builtin((const char *)"SWAP", 4, sys->dict, swap);
 | 
				
			||||||
 | 
						sys->dict = new Builtin((const char *)"ROT", 3, sys->dict, rot);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOOKUP
 | 
					LOOKUP
 | 
				
			||||||
lookup(struct Token *token, System *sys)
 | 
					lookup(struct Token *token, System *sys)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	Word	*cursor = dict;
 | 
						Word	*cursor = sys->dict;
 | 
				
			||||||
	KF_INT	 n;
 | 
						KF_INT	 n;
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	if (parse_num(token, &n)) {
 | 
						if (parse_num(token, &n)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4
									
								
								dict.h
								
								
								
								
							
							
						
						
									
										4
									
								
								dict.h
								
								
								
								
							| 
						 | 
					@ -6,15 +6,13 @@
 | 
				
			||||||
#include "system.h"
 | 
					#include "system.h"
 | 
				
			||||||
#include "word.h"
 | 
					#include "word.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static	Word	*dict = nullptr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
typedef enum _LOOKUP_ : uint8_t {
 | 
					typedef enum _LOOKUP_ : uint8_t {
 | 
				
			||||||
	LOOKUP_OK = 0,	     // Lookup executed properly.
 | 
						LOOKUP_OK = 0,	     // Lookup executed properly.
 | 
				
			||||||
	LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary.
 | 
						LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary.
 | 
				
			||||||
	LOOKUP_FAILED = 2    // The word failed to execute.
 | 
						LOOKUP_FAILED = 2    // The word failed to execute.
 | 
				
			||||||
} LOOKUP;
 | 
					} LOOKUP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void	init_dict(void);
 | 
					void	init_dict(System *);
 | 
				
			||||||
LOOKUP	lookup(struct Token *, System *);
 | 
					LOOKUP	lookup(struct Token *, System *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -55,7 +55,7 @@ won't be a target at first, but something to keep in mind as I progress.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Eventually, I'd like to build a zero-allocation Forth that can run on an
 | 
					Eventually, I'd like to build a zero-allocation Forth that can run on an
 | 
				
			||||||
STM-32 or an MSP430, but the first goal is going to get a minimal Forth
 | 
					STM-32 or an MSP430, but the first goal is going to get a minimal Forth
 | 
				
			||||||
working. I'll define the stages as
 | 
					working. I'll define the stages tentatively as
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Stage 1
 | 
					Stage 1
 | 
				
			||||||
~~~~~~~
 | 
					~~~~~~~
 | 
				
			||||||
| 
						 | 
					@ -95,6 +95,15 @@ limiting factor. Fortunately, just a few days before I started this, the TI
 | 
				
			||||||
wiki was updated_ to note that the latest compilers now support C++11 and
 | 
					wiki was updated_ to note that the latest compilers now support C++11 and
 | 
				
			||||||
C++14, so I'll target C++14.
 | 
					C++14, so I'll target C++14.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					As a reminder to myself: this is not going to be the prettiest or best or most
 | 
				
			||||||
 | 
					secure or production ready code. The goal is to have fun writing some software
 | 
				
			||||||
 | 
					again and to rekindle some of the joy of computing that I had before. Once I
 | 
				
			||||||
 | 
					have something working, I can go back and make an exercise of cleaning it up
 | 
				
			||||||
 | 
					and refactoring it. The prose in this series is also not going to be my finest
 | 
				
			||||||
 | 
					writing ever --- again, it suffices just to do it. The goal is to have
 | 
				
			||||||
 | 
					something to show, not to achieve perfection; it'll mostly going to be hacked
 | 
				
			||||||
 | 
					on while I'm on the bus or when I have a bit of downtime here and there.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. _updated: http://processors.wiki.ti.com/index.php/C%2B%2B_Support_in_TI_Compilers#Status_as_of_February_2018
 | 
					.. _updated: http://processors.wiki.ti.com/index.php/C%2B%2B_Support_in_TI_Compilers#Status_as_of_February_2018
 | 
				
			||||||
 | 
					
 | 
				
			||||||
I don't really know what I'm doing, so in the next section, I'll build out the
 | 
					I don't really know what I'm doing, so in the next section, I'll build out the
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,6 +4,9 @@ Write You a Forth, 0x05
 | 
				
			||||||
:date: 2018-02-24 12:23
 | 
					:date: 2018-02-24 12:23
 | 
				
			||||||
:tags: wyaf, forth
 | 
					:tags: wyaf, forth
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NB: Today's update was pretty large, so I don't show all of the code; this is
 | 
				
			||||||
 | 
					what ``git`` is for.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Today I need to start actually doing things with tokens. This requires two
 | 
					Today I need to start actually doing things with tokens. This requires two
 | 
				
			||||||
things:
 | 
					things:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,6 +33,7 @@ stack-aware, so what I've done is define a ``System`` struct in ``system.h``::
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        typedef struct _System {
 | 
					        typedef struct _System {
 | 
				
			||||||
                Stack<KF_INT>        dstack;
 | 
					                Stack<KF_INT>        dstack;
 | 
				
			||||||
 | 
					                IO                  *interface;
 | 
				
			||||||
        } System;
 | 
					        } System;
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
| 
						 | 
					@ -38,9 +42,382 @@ stack-aware, so what I've done is define a ``System`` struct in ``system.h``::
 | 
				
			||||||
This will let me later add in support for the return stack and other things
 | 
					This will let me later add in support for the return stack and other things
 | 
				
			||||||
that might be useful. Other ideas: adding something like an ``errno``
 | 
					that might be useful. Other ideas: adding something like an ``errno``
 | 
				
			||||||
equivalent to indicate the last error, and storing a dictionary of words. This
 | 
					equivalent to indicate the last error, and storing a dictionary of words. This
 | 
				
			||||||
will need some restructuring, though. Anyways, this works for now.
 | 
					will need some restructuring, though. I've already moved the I/O into the
 | 
				
			||||||
 | 
					system as well. This took some finangling in ``kforth.cc``; I'm eliding the
 | 
				
			||||||
 | 
					diff here because it's so long, but it's basically a ``sed -i -e
 | 
				
			||||||
 | 
					's/interface./sys->interface.``.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The Word interface
 | 
					The Word interface
 | 
				
			||||||
^^^^^^^^^^^^^^^^^^
 | 
					^^^^^^^^^^^^^^^^^^
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Now I can start defining a Word.h.
 | 
					Now I can start defining a Word.h. Maybe this is a case of when you have an
 | 
				
			||||||
 | 
					object-oriented language, everything looks like a class, but I decided to
 | 
				
			||||||
 | 
					design an abstract class for a Word and implement the first concrete class,
 | 
				
			||||||
 | 
					**Builtin**. What I came up with was::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class Word {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					            virtual ~Word() {};
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					The *eval* method takes a ``System`` structure and executes some function.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            virtual bool  eval(System *) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The dictionary is a linked list, so next is used to traverse the list.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            virtual Word *next(void) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ``match`` method is used to determine whether this is the word being
 | 
				
			||||||
 | 
					referred to.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            virtual bool  match(struct Token *) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Finally, ``getname`` will fill in a ``char[MAX_TOKEN_SIZE]`` buffer with the
 | 
				
			||||||
 | 
					word's name.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            virtual void  getname(char *, size_t *) = 0;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With the interface defined, I can implement ``Builtins`` (I've elided the
 | 
				
			||||||
 | 
					common interface from the listing below to focus on the implementation)::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        class Builtin : public Word {
 | 
				
			||||||
 | 
					        public:
 | 
				
			||||||
 | 
					                ~Builtin() {};
 | 
				
			||||||
 | 
					                Builtin(const char *name, size_t namelen, Word *head, bool (*fun)(System *));
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        private:
 | 
				
			||||||
 | 
					                char		 name[MAX_TOKEN_LENGTH];
 | 
				
			||||||
 | 
					                size_t		 namelen;
 | 
				
			||||||
 | 
					                Word		*prev;
 | 
				
			||||||
 | 
					                bool		(*fun)(System *);
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ``bool`` works as a first pass, but I think I'm going to have add some
 | 
				
			||||||
 | 
					notion of system conditions later on to denote why execution failed. One thing
 | 
				
			||||||
 | 
					that both ``pforth`` and ``gforth`` do that I don't yet do is to clear the
 | 
				
			||||||
 | 
					stack when there's an execution failure. At least, they clear the stack with an
 | 
				
			||||||
 | 
					unrecognised word. The implementation is pretty trivial::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #include "defs.h"
 | 
				
			||||||
 | 
					        #include "parser.h"
 | 
				
			||||||
 | 
					        #include "system.h"
 | 
				
			||||||
 | 
					        #include "word.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Builtin::Builtin(const char *name, size_t namelen, Word *head, bool (*target)(System *))
 | 
				
			||||||
 | 
					                : prev(head), fun(target)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                memcpy(this->name, name, namelen);
 | 
				
			||||||
 | 
					                this->namelen = namelen;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        bool
 | 
				
			||||||
 | 
					        Builtin::eval(System *sys)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                return this->fun(sys);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Word *
 | 
				
			||||||
 | 
					        Builtin::next()
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                return this->prev;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        bool
 | 
				
			||||||
 | 
					        Builtin::match(struct Token *token)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                return match_token(this->name, this->namelen, token->token, token->length);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        void
 | 
				
			||||||
 | 
					        Builtin::getname(char *buf, size_t *buflen)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                memcpy(buf, this->name, this->namelen);
 | 
				
			||||||
 | 
					                *buflen = namelen;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Right. Now to do something with this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The system dictionary
 | 
				
			||||||
 | 
					^^^^^^^^^^^^^^^^^^^^^
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The dictionary's interface is minimal::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // dict.h
 | 
				
			||||||
 | 
					        #ifndef __KF_DICT_H__
 | 
				
			||||||
 | 
					        #define __KF_DICT_H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #include "defs.h"
 | 
				
			||||||
 | 
					        #include "parser.h"
 | 
				
			||||||
 | 
					        #include "system.h"
 | 
				
			||||||
 | 
					        #include "word.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        typedef enum _LOOKUP_ : uint8_t {
 | 
				
			||||||
 | 
					                LOOKUP_OK = 0,	     // Lookup executed properly.
 | 
				
			||||||
 | 
					                LOOKUP_NOTFOUND = 1, // The token isn't in the dictionary.
 | 
				
			||||||
 | 
					                LOOKUP_FAILED = 2    // The word failed to execute.
 | 
				
			||||||
 | 
					        } LOOKUP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        void	init_dict(System *);
 | 
				
			||||||
 | 
					        LOOKUP	lookup(struct Token *, System *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #endif // __KF_DICT_H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					There's a modicum of differentiation between "everything worked" and "no it
 | 
				
			||||||
 | 
					didn't," and by that I mean the lookup can tell you if the word wasn't found
 | 
				
			||||||
 | 
					or if there was a problem executing it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					I added a ``struct Word *dict`` field to the ``System`` struct, since we're
 | 
				
			||||||
 | 
					operating on these anyways. I guess it's best to start with the lookup function
 | 
				
			||||||
 | 
					first so that when I started adding builtins later it'll be easy to just
 | 
				
			||||||
 | 
					recompile and use them.
 | 
				
			||||||
 | 
					::
 | 
				
			||||||
 | 
					        LOOKUP
 | 
				
			||||||
 | 
					        lookup(struct Token *token, System *sys)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                Word	*cursor = sys->dict;
 | 
				
			||||||
 | 
					                KF_INT	 n;
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					I seem to recall from *Programming a Problem-Oriented Language* that Chuck
 | 
				
			||||||
 | 
					Moore advocated checking whether a token was a number before looking it up
 | 
				
			||||||
 | 
					in the dictionary, so that's the approach I'll take:: 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if (parse_num(token, &n)) {
 | 
				
			||||||
 | 
					                        if (sys->dstack.push(n)) {
 | 
				
			||||||
 | 
					                                return LOOKUP_OK;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        return LOOKUP_FAILED;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The remainder is pretty much bog-standard linked list traversal::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                while (cursor != nullptr) {
 | 
				
			||||||
 | 
					                        if (cursor->match(token)) {
 | 
				
			||||||
 | 
					                                if (cursor->eval(sys)) {
 | 
				
			||||||
 | 
					                                        return LOOKUP_OK;
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                                return LOOKUP_FAILED;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        cursor = cursor->next();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                return LOOKUP_NOTFOUND;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This needs to get hooked up into the interpreter now; this is going to require
 | 
				
			||||||
 | 
					some reworking of the ``parser`` function::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        static bool
 | 
				
			||||||
 | 
					        parser(const char *buf, const size_t buflen)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                static size_t		offset = 0;
 | 
				
			||||||
 | 
					                static struct Token	token;
 | 
				
			||||||
 | 
					                static PARSE_RESULT	result = PARSE_FAIL;
 | 
				
			||||||
 | 
					                static LOOKUP		lresult = LOOKUP_FAILED;
 | 
				
			||||||
 | 
					                static bool		stop = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                offset = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // reset token
 | 
				
			||||||
 | 
					                token.token = nullptr;
 | 
				
			||||||
 | 
					                token.length = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                while ((result = parse_next(buf, buflen, &offset, &token)) == PARSE_OK) {
 | 
				
			||||||
 | 
					                        lresult = lookup(&token, &sys);
 | 
				
			||||||
 | 
					                        switch (lresult) {
 | 
				
			||||||
 | 
					                        case LOOKUP_OK:
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                        case LOOKUP_NOTFOUND:
 | 
				
			||||||
 | 
					                                sys.interface->wrln((char *)"word not found", 15);
 | 
				
			||||||
 | 
					                                stop = true;
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                        case LOOKUP_FAILED:
 | 
				
			||||||
 | 
					                                sys.interface->wrln((char *)"execution failed", 17);
 | 
				
			||||||
 | 
					                                stop = true;
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                        default:
 | 
				
			||||||
 | 
					                                sys.interface->wrln((char *)"*** the world is broken ***", 27);
 | 
				
			||||||
 | 
					                                exit(1);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                        if (stop) {
 | 
				
			||||||
 | 
					                                stop = false;
 | 
				
			||||||
 | 
					                                break;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                switch (result) {
 | 
				
			||||||
 | 
					                case PARSE_OK:
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                case PARSE_EOB:
 | 
				
			||||||
 | 
					                        sys.interface->wrbuf(ok, 4);
 | 
				
			||||||
 | 
					                        return true;
 | 
				
			||||||
 | 
					                case PARSE_LEN:
 | 
				
			||||||
 | 
					                        sys.interface->wrln((char *)"parse error: token too long", 27);
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                case PARSE_FAIL:
 | 
				
			||||||
 | 
					                        sys.interface->wrln((char *)"parser failure", 14);
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                default:
 | 
				
			||||||
 | 
					                        sys.interface->wrln((char *)"*** the world is broken ***", 27);
 | 
				
			||||||
 | 
					                        exit(1);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Now I feel like I'm at the part where I can start adding in functionality. The
 | 
				
			||||||
 | 
					easiest first builtin: addition. Almost impossible to screw this up, right?
 | 
				
			||||||
 | 
					::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        static bool
 | 
				
			||||||
 | 
					        add(System *sys)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                KF_INT	a = 0;
 | 
				
			||||||
 | 
					                KF_INT	b = 0;
 | 
				
			||||||
 | 
					                if (!sys->dstack.pop(&a)) {
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                if (!sys->dstack.pop(&b)) {
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                b += a;
 | 
				
			||||||
 | 
					                return sys->dstack.push(b);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Now this needs to go into the ``init_dict`` function::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        void
 | 
				
			||||||
 | 
					        init_dict(System *sys)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                sys->dict = nullptr;
 | 
				
			||||||
 | 
					                sys->dict = new Builtin((const char *)"+", 1, sys->dict, add);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					And this needs to get added into the ``main`` function::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        int
 | 
				
			||||||
 | 
					        main(void)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                init_dict(&sys);
 | 
				
			||||||
 | 
					        #ifdef __linux__
 | 
				
			||||||
 | 
					                Console interface;
 | 
				
			||||||
 | 
					                sys.interface = &interface;
 | 
				
			||||||
 | 
					        #endif
 | 
				
			||||||
 | 
					                sys.interface->wrbuf(banner, bannerlen);
 | 
				
			||||||
 | 
					                interpreter();
 | 
				
			||||||
 | 
					                return 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The moment of truth
 | 
				
			||||||
 | 
					^^^^^^^^^^^^^^^^^^^
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Hold on to your pants, let's see what breaks::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        $ ./kforth
 | 
				
			||||||
 | 
					        kforth interpreter
 | 
				
			||||||
 | 
					        <>
 | 
				
			||||||
 | 
					        ? 2 3 +
 | 
				
			||||||
 | 
					        ok.
 | 
				
			||||||
 | 
					        <5>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Oh hey, look, it actually works. Time to add a few more definitions for good
 | 
				
			||||||
 | 
					measure:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+ the basic arithmetic operators `-`, `*`, `/`
 | 
				
			||||||
 | 
					+ the classic `SWAP` and `ROT` words
 | 
				
			||||||
 | 
					+ `DEFINITIONS` to look at all the definitions in the language
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These are all pretty simple, fortunately. A few things that tripped me up,
 | 
				
			||||||
 | 
					though:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+ The *a* and *b* names kind of threw me off because I fill *a* first. This
 | 
				
			||||||
 | 
					  means it's the last number on the stack; this didn't matter for addition,
 | 
				
			||||||
 | 
					  but in subtraction, it means I had to be careful to do ``b -= a`` rather
 | 
				
			||||||
 | 
					  than the other way around.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+ pforth and gfortran both support case insensitive matching, so I had to
 | 
				
			||||||
 | 
					  modify the token matcher::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        bool
 | 
				
			||||||
 | 
					        match_token(const char *a, const size_t alen,
 | 
				
			||||||
 | 
					                const char *b, const size_t blen)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					                if (alen != blen) {
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                for (size_t i = 0; i < alen; i++) {
 | 
				
			||||||
 | 
					                        if (a[i] == b[i]) {
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        if (!isalpha(a[i]) || !isalpha(b[i])) {
 | 
				
			||||||
 | 
					                                return false;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					The XOR by 0x20 is just a neat trick for inverting the case of a letter.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        if ((a[i] ^ 0x20) == b[i]) {
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                        if (a[i] == (b[i] ^ 0x20)) {
 | 
				
			||||||
 | 
					                                continue;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                        return false;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                return true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+ I forgot to include the case for ``PARSE_OK`` in the result switch statement
 | 
				
			||||||
 | 
					  in the ``parser`` function, so I could get a line of code evaluated but then
 | 
				
			||||||
 | 
					  it'd die with "the world is broken."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+ When I tried doing some division, I ran into some weird issues::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        $ ./kforth 
 | 
				
			||||||
 | 
					        kforth interpreter
 | 
				
			||||||
 | 
					        <>
 | 
				
			||||||
 | 
					        ? 2 5040 /
 | 
				
			||||||
 | 
					        ok.
 | 
				
			||||||
 | 
					        <<3C><>>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It turns out that in ``write_num``, the case where *n = 0* results in nothing
 | 
				
			||||||
 | 
					happening, and therefore the buffer just being written. This is a dirty thing,
 | 
				
			||||||
 | 
					but I edge cased this::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        $ git diff io.cc 
 | 
				
			||||||
 | 
					        diff --git a/io.cc b/io.cc
 | 
				
			||||||
 | 
					        index 77e0e2a..a86156b 100644
 | 
				
			||||||
 | 
					        --- a/io.cc
 | 
				
			||||||
 | 
					        +++ b/io.cc
 | 
				
			||||||
 | 
					        @@ -24,6 +24,10 @@ write_num(IO *interface, KF_INT n)
 | 
				
			||||||
 | 
					                                n++;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					        +       else if (n == 0) {
 | 
				
			||||||
 | 
					        +               interface->wrch('0');
 | 
				
			||||||
 | 
					        +               return;
 | 
				
			||||||
 | 
					        +       }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					                while (n != 0) {
 | 
				
			||||||
 | 
					                        char ch = (n % 10) + '0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					May the compiler have mercy on my soul and whatnot.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For you sports fans keeping track at home, here's the classic bugs I've
 | 
				
			||||||
 | 
					introduced so far:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. bounds overrun
 | 
				
			||||||
 | 
					2. missing case in a switch statement
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					But now here I am with the interpreter in good shape. Now I can start
 | 
				
			||||||
 | 
					implementing the builtins in earnest!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					As before, see the tag `part-0x05 <https://github.com/kisom/kforth/tree/part-0x05>`_.
 | 
				
			||||||
							
								
								
									
										4
									
								
								io.cc
								
								
								
								
							
							
						
						
									
										4
									
								
								io.cc
								
								
								
								
							| 
						 | 
					@ -24,6 +24,10 @@ write_num(IO *interface, KF_INT n)
 | 
				
			||||||
			n++;
 | 
								n++;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						else if (n == 0) {
 | 
				
			||||||
 | 
							interface->wrch('0');
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (n != 0) {
 | 
						while (n != 0) {
 | 
				
			||||||
		char ch = (n % 10) + '0';
 | 
							char ch = (n % 10) + '0';
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -11,7 +11,6 @@
 | 
				
			||||||
#endif // __linux__
 | 
					#endif // __linux__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char     ok[] = "ok.\n";
 | 
					static char     ok[] = "ok.\n";
 | 
				
			||||||
static char	bye[] = "bye";
 | 
					 | 
				
			||||||
static System	sys;
 | 
					static System	sys;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,12 +67,6 @@ parser(const char *buf, const size_t buflen)
 | 
				
			||||||
			stop = false;
 | 
								stop = false;
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		
 | 
					 | 
				
			||||||
		// Temporary hack until the interpreter is working further.
 | 
					 | 
				
			||||||
		if (match_token(token.token, token.length, bye, 3)) {
 | 
					 | 
				
			||||||
			sys.interface->wrln((char *)"Goodbye!", 8);
 | 
					 | 
				
			||||||
			exit(0);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (result) {
 | 
						switch (result) {
 | 
				
			||||||
| 
						 | 
					@ -116,7 +109,7 @@ const size_t	bannerlen = 19;
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
main(void)
 | 
					main(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	init_dict();
 | 
						init_dict(&sys);
 | 
				
			||||||
#ifdef __linux__
 | 
					#ifdef __linux__
 | 
				
			||||||
	Console interface;
 | 
						Console interface;
 | 
				
			||||||
	sys.interface = &interface;
 | 
						sys.interface = &interface;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,6 +2,7 @@
 | 
				
			||||||
#include "parser.h"
 | 
					#include "parser.h"
 | 
				
			||||||
#include "stack.h"
 | 
					#include "stack.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <ctype.h>
 | 
				
			||||||
#include <string.h>
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
| 
						 | 
					@ -24,6 +25,10 @@ match_token(const char *a, const size_t alen,
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!isalpha(a[i]) || !isalpha(b[i])) {
 | 
				
			||||||
 | 
								return false;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							
 | 
				
			||||||
		if ((a[i] ^ 0x20) == b[i]) {
 | 
							if ((a[i] ^ 0x20) == b[i]) {
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										1
									
								
								system.h
								
								
								
								
							
							
						
						
									
										1
									
								
								system.h
								
								
								
								
							| 
						 | 
					@ -8,6 +8,7 @@
 | 
				
			||||||
typedef struct _System {
 | 
					typedef struct _System {
 | 
				
			||||||
	Stack<KF_INT>	 dstack;
 | 
						Stack<KF_INT>	 dstack;
 | 
				
			||||||
	IO		*interface;
 | 
						IO		*interface;
 | 
				
			||||||
 | 
						struct Word	*dict;
 | 
				
			||||||
} System;
 | 
					} System;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										2
									
								
								word.h
								
								
								
								
							
							
						
						
									
										2
									
								
								word.h
								
								
								
								
							| 
						 | 
					@ -25,7 +25,6 @@ public:
 | 
				
			||||||
	Word *next(void);
 | 
						Word *next(void);
 | 
				
			||||||
	bool  match(struct Token *);
 | 
						bool  match(struct Token *);
 | 
				
			||||||
	void  getname(char *, size_t *);
 | 
						void  getname(char *, size_t *);
 | 
				
			||||||
	
 | 
					 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
	char		 name[MAX_TOKEN_LENGTH];
 | 
						char		 name[MAX_TOKEN_LENGTH];
 | 
				
			||||||
	size_t		 namelen;
 | 
						size_t		 namelen;
 | 
				
			||||||
| 
						 | 
					@ -33,5 +32,4 @@ private:
 | 
				
			||||||
	bool		(*fun)(System *);
 | 
						bool		(*fun)(System *);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif // __KF_WORD_H__
 | 
					#endif // __KF_WORD_H__
 | 
				
			||||||
		Loading…
	
		Reference in New Issue