From 505d71906c77698b5cdce7c1697b761552f68a9c Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Fri, 23 Feb 2018 19:19:29 -0800 Subject: [PATCH] misc/kforth: Part 0x04 - parsing numerics. --- Makefile | 1 + defs.h | 3 + doc/index.rst | 1 + doc/part-0x04.rst | 313 ++++++++++++++++++++++++++++++++++++++++++++++ io.cc | 27 ++++ io.h | 3 + kforth.cc | 28 +++++ linux.h | 2 - linux/defs.h | 3 + parser.cc | 38 ++++++ parser.h | 5 + stack.h | 57 +++++++++ 12 files changed, 479 insertions(+), 2 deletions(-) create mode 100644 doc/part-0x04.rst create mode 100644 io.cc create mode 100644 stack.h diff --git a/Makefile b/Makefile index 16f194a..6b15ec0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ CXXSTD := c++14 CXXFLAGS := -std=$(CXXSTD) -Wall -Werror -g -O0 OBJS := linux/io.o \ + io.o \ parser.o \ kforth.o TARGET := kforth diff --git a/defs.h b/defs.h index 4dcc540..e070d27 100644 --- a/defs.h +++ b/defs.h @@ -3,6 +3,9 @@ #ifdef __linux__ #include "linux/defs.h" +#else +typedef int KF_INT; +constexpr uint8_t STACK_SIZE = 16; #endif constexpr size_t MAX_TOKEN_LENGTH = 16; diff --git a/doc/index.rst b/doc/index.rst index bd9aa42..d313229 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -9,6 +9,7 @@ Contents: part-0x01 part-0x02 part-0x03 + part-0x04 Indices and tables ================== diff --git a/doc/part-0x04.rst b/doc/part-0x04.rst new file mode 100644 index 0000000..6f5624d --- /dev/null +++ b/doc/part-0x04.rst @@ -0,0 +1,313 @@ +Write You a Forth, 0x04 +----------------------- + +:date: 2018-02-23 19:20 +:tags: wyaf, forth + +So, I lied about words being next. When I thought about it some more, what I +really need to do is start adding the stack in and adding support for parsing +numerics. I'll start with the stack, because it's pretty straightforward. + +I've added a new definition: ``constexpr uint8_t STACK_SIZE = 128``. This goes +in the ``linux/defs.h``, and the ``#else`` in the top ``defs.h`` will set a +smaller stack size for other targets. I've also defined a type called ``KF_INT`` +that, on Linux, is a ``uint32_t``:: + + index 4dcc540..e070d27 100644 + --- a/defs.h + +++ b/defs.h + @@ -3,6 +3,9 @@ + + #ifdef __linux__ + #include "linux/defs.h" + +#else + +typedef int KF_INT; + +constexpr uint8_t STACK_SIZE = 16; + #endif + + constexpr size_t MAX_TOKEN_LENGTH = 16; + diff --git a/linux/defs.h b/linux/defs.h + index 57cdaeb..3740f5a 100644 + --- a/linux/defs.h + +++ b/linux/defs.h + @@ -4,4 +4,7 @@ + #include + #include + + +typedef int32_t KF_INT; + +constexpr uint8_t STACK_SIZE = 128; + + + #endif + \ No newline at end of file + +It seems useful to be able to adapt the kind of numbers supported; an AVR might do +better with 16-bit integers, for example. + +``stack.h`` +^^^^^^^^^^^ + +The stack is going to be templated, because we'll need a ``double`` stack later +for floating point and a return address stack later. This means everything will +go under ``stack.h``. This is a pretty simple implementation that's CS 101 material; +I've opted to have the interface return ``bool``\ s for everything to indicate stack +overflow and underflow and out of bounds:: + + #ifndef __KF_STACK_H__ + #define __KF_STACK_H__ + + #include "defs.h" + + template + class Stack { + public: + bool push(T val); + bool pop(T &val); + bool get(size_t, T &); + size_t size(void) { return this->arrlen; }; + private: + T arr[STACK_SIZE]; + size_t arrlen; + }; + + // push returns false if there was a stack overflow. + template + bool + Stack::push(T val) + { + if ((this->arrlen + 1) > STACK_SIZE) { + return false; + } + + this->arr[this->arrlen++] = val; + return true; + } + + // pop returns false if there was a stack underflow. + template + bool + Stack::pop(T &val) + { + if (this->arrlen == 0) { + return false; + } + + val = this->arr[this->arrlen - 1]; + this->arrlen--; + } + + // get returns false on invalid bounds. + template + bool + Stack::get(size_t i, T &val) + { + if (i > this->arrlen) { + return false; + } + + val = this->arr[i]; + return true; + } + + #endif // __KF_STACK_H__ + +I'll put a ``Stack`` in ``kforth.cc`` later on. For now, this gives me +an interface for the numeric parser to push a number onto the stack. + +``parse_num`` +^^^^^^^^^^^^^ + +It seems like the best place for this is in ``parser.cc`` --- though I might +move into a token processor later. The definition for this goes in ``parser.h``, +and the body is in ``parser.cc``:: + + // parse_num tries to parse the token as a signed base 10 number, + // pushing it onto the stack if needed. + bool + parse_num(struct Token *token, Stack &s) + { + KF_INT n = 0; + uint8_t i = 0; + bool sign = false; + +It turns out you can't parse a zero-length token as a number... +:: + + if (token->length == 0) { + return false; + } + +I'll need to invert the number later if it's negative, but it's worth checking +the first character to see if it's negative. +:: + + if (token->token[i] == '-') { + i++; + sign = true; + } + +Parsing is done by checking whether each character is within the range of the ASCII +numeral values. Later on, I might add in separate functions for processing base 10 +and base 16 numbers, and decide which to use based on a prefix (like ``0x``). If the +character is between those values, then the working number is multiplied by 10 and +the digit added. +:: + + while (i < token->length) { + if (token->token[i] < '0') { + return false; + } + + if (token->token[i] > '9') { + return false; + } + + n *= 10; + n += (uint8_t)(token->token[i] - '0'); + i++; + } + +If it was a negative number, then the working number has to be inverted:: + + if (sign) { + n *= -1; + } + +Finally, return the result of pushing the number on the stack. One thing that +might come back to get me later is that this makes it impossible to tell if a +failure to parse the number is due to an invalid number or due to a stack +overflow. This will be a good candidate for revisiting later. +:: + + return s.push(n); + } + +``io.cc`` +^^^^^^^^^^ + +Conversely, it'll be useful to write a number to an ``IO`` interface. It +*seems* more useful right now to just provide a number → I/O function, but +that'll be easily adapted to a number → buffer function later. This will add +a real function to ``io.h``, which will require a corresponding ``io.cc`` +(which also needs to be added to the ``Makefile``):: + + #include "defs.h" + #include "io.h" + + #include + + void + write_num(IO &interface, KF_INT n) + { + +Through careful scientific study, I have determined that most number of digits +that a 32-bit integer needs is 10 bytes (sans the sign!). This will absolutely +need to be changed if ``KF_INT`` is ever moved to 64-bit (or larger!) numbers. +There's a TODO in the actual source code that notes this. :: + + char buf[10]; + uint8_t i = 10; + memset(buf, 0, 10); + +Because this is going out to an I/O interface, I don't need to store the sign +in the buffer itself and can just print it and invert the number. Inverting is +important; I ran into a bug earlier where I didn't invert it and my subtractions +below were correspondingly off. +:: + + if (n < 0) { + interface.wrch('-'); + n *= -1; + } + +The buffer has to be filled from the end to the beginning to do the inverse of +the parsing method:: + + while (n != 0) { + char ch = (n % 10) + '0'; + buf[i--] = ch; + n /= 10; + } + +But then it can be just dumped to the interface:: + + interface.wrbuf(buf+i, 11-i); + } + +``kforth.cc`` +^^^^^^^^^^^^^^ + +And now I come to the fun part: adding the stack in. After including ``stack.h``, +I've added a stack implementation to the top of the file:: + + // dstack is the data stack. + static Stack dstack; + +It's kind of useful to be able to print the stack:: + + static void + write_dstack(IO &interface) + { + KF_INT tmp; + interface.wrch('<'); + for (size_t i = 0; i < dstack.size(); i++) { + if (i > 0) { + interface.wrch(' '); + } + + dstack.get(i, tmp); + write_num(interface, tmp); + } + interface.wrch('>'); + } + +Surrounding the stack in angle brackets is a cool stylish sort of thing, I +guess. All this is no good if the interpreter isn't actually hooked up to the +number parser:: + + // The new while loop in the parser function in kforth.cc: + while ((result = parse_next(buf, buflen, &offset, &token)) == PARSE_OK) { + interface.wrbuf((char *)"token: ", 7); + interface.wrbuf(token.token, token.length); + interface.wrln((char *)".", 1); + + if (!parse_num(&token, dstack)) { + interface.wrln((char *)"failed to parse numeric", 23); + } + + // Temporary hack until the interpreter is working further. + if (match_token(token.token, token.length, bye, 3)) { + interface.wrln((char *)"Goodbye!", 8); + exit(0); + } + } + +But does it blend? +^^^^^^^^^^^^^^^^^^ + +Hopefully this works:: + + ~/code/kforth (0) $ make + g++ -std=c++14 -Wall -Werror -g -O0 -c -o linux/io.o linux/io.cc + g++ -std=c++14 -Wall -Werror -g -O0 -c -o io.o io.cc + g++ -std=c++14 -Wall -Werror -g -O0 -c -o parser.o parser.cc + g++ -std=c++14 -Wall -Werror -g -O0 -c -o kforth.o kforth.cc + g++ -o kforth linux/io.o io.o parser.o kforth.o + ~/code/kforth (0) $ ./kforth + kforth interpreter + <> + ? 2 -2 30 1000 -1010 + token: 2. + token: -2. + token: 30. + token: 1000. + token: -1010. + ok. + <2 -2 30 1000 -1010> + ? bye + token: bye. + failed to parse numeric + Goodbye! + ~/code/kforth (0) $ + +So there's that. Okay, next time *for real* I'll do a vocabulary thing. + diff --git a/io.cc b/io.cc new file mode 100644 index 0000000..f9dfd1f --- /dev/null +++ b/io.cc @@ -0,0 +1,27 @@ +#include "defs.h" +#include "io.h" + +#include + +void +write_num(IO &interface, KF_INT n) +{ + + // TODO(kyle): make the size of the buffer depend on the size of + // KF_INT. + char buf[10]; + uint8_t i = 10; + memset(buf, 0, i); + if (n < 0) { + interface.wrch('-'); + n *= -1; + } + + while (n != 0) { + char ch = (n % 10) + '0'; + buf[i--] = ch; + n /= 10; + } + + interface.wrbuf(buf+i, 11-i); +} \ No newline at end of file diff --git a/io.h b/io.h index fa25c5c..4f957a6 100644 --- a/io.h +++ b/io.h @@ -21,4 +21,7 @@ public: virtual void wrln(char *buf, size_t len) = 0; }; +void write_num(IO &, KF_INT); + + #endif // __KF_IO_H__ \ No newline at end of file diff --git a/kforth.cc b/kforth.cc index ebdec1c..77c7320 100644 --- a/kforth.cc +++ b/kforth.cc @@ -1,7 +1,9 @@ #include "io.h" #include "parser.h" +#include "stack.h" #include +#include #ifdef __linux__ #include "linux.h" @@ -10,6 +12,26 @@ static char ok[] = "ok.\n"; static char bye[] = "bye"; +// dstack is the data stack. +static Stack dstack; + + +static void +write_dstack(IO &interface) +{ + KF_INT tmp; + interface.wrch('<'); + for (size_t i = 0; i < dstack.size(); i++) { + if (i > 0) { + interface.wrch(' '); + } + + dstack.get(i, tmp); + write_num(interface, tmp); + } + interface.wrch('>'); +} + static bool parser(IO &interface, const char *buf, const size_t buflen) { @@ -28,6 +50,10 @@ parser(IO &interface, const char *buf, const size_t buflen) interface.wrbuf(token.token, token.length); interface.wrln((char *)".", 1); + if (!parse_num(&token, dstack)) { + interface.wrln((char *)"failed to parse numeric", 23); + } + // Temporary hack until the interpreter is working further. if (match_token(token.token, token.length, bye, 3)) { interface.wrln((char *)"Goodbye!", 8); @@ -58,6 +84,8 @@ interpreter(IO &interface) static char linebuf[81]; while (true) { + write_dstack(interface); + interface.wrch('\n'); interface.wrch('?'); interface.wrch(' '); buflen = interface.rdbuf(linebuf, 80, true, '\n'); diff --git a/linux.h b/linux.h index fe69a0e..a75ad15 100644 --- a/linux.h +++ b/linux.h @@ -6,7 +6,5 @@ // build support for linux #include "linux/io.h" -constexpr uint8_t STACK_SIZE = 128; - #endif // __KF_LINUX_H__ diff --git a/linux/defs.h b/linux/defs.h index 57cdaeb..3740f5a 100644 --- a/linux/defs.h +++ b/linux/defs.h @@ -4,4 +4,7 @@ #include #include +typedef int32_t KF_INT; +constexpr uint8_t STACK_SIZE = 128; + #endif \ No newline at end of file diff --git a/parser.cc b/parser.cc index c18af83..6a94687 100644 --- a/parser.cc +++ b/parser.cc @@ -1,5 +1,6 @@ #include "defs.h" #include "parser.h" +#include "stack.h" #include @@ -68,4 +69,41 @@ parse_next(const char *buf, const size_t length, size_t *offset, *offset = cursor; return PARSE_OK; +} + +bool +parse_num(struct Token *token, Stack &s) +{ + KF_INT n = 0; + uint8_t i = 0; + bool sign = false; + + if (token->length == 0) { + return false; + } + + if (token->token[i] == '-') { + i++; + sign = true; + } + + while (i < token->length) { + if (token->token[i] < '0') { + return false; + } + + if (token->token[i] > '9') { + return false; + } + + n *= 10; + n += (uint8_t)(token->token[i] - '0'); + i++; + } + + if (sign) { + n *= -1; + } + + return s.push(n); } \ No newline at end of file diff --git a/parser.h b/parser.h index 1af2adf..4cdef1d 100644 --- a/parser.h +++ b/parser.h @@ -2,6 +2,7 @@ #define __KF_PARSER_H__ #include "defs.h" +#include "stack.h" struct Token { char *token; @@ -18,5 +19,9 @@ typedef enum _PARSE_RESULT_ : uint8_t { bool match_token(const char *, const size_t, const char *, const size_t); PARSE_RESULT parse_next(const char *, const size_t, size_t *, struct Token *); +// TODO(kyle): investigate a better return value, e.g. to differentiate between +// stack failures and parse failures. +bool parse_num(struct Token *, Stack &); + #endif // __KF_PARSER_H__ diff --git a/stack.h b/stack.h new file mode 100644 index 0000000..e23fe9a --- /dev/null +++ b/stack.h @@ -0,0 +1,57 @@ +#ifndef __KF_STACK_H__ +#define __KF_STACK_H__ + +#include "defs.h" + +template +class Stack { +public: + bool push(T val); + bool pop(T &val); + bool get(size_t, T &); + size_t size(void) { return this->arrlen; }; +private: + T arr[STACK_SIZE]; + size_t arrlen; +}; + +// push returns false if there was a stack overflow. +template +bool +Stack::push(T val) +{ + if ((this->arrlen + 1) > STACK_SIZE) { + return false; + } + + this->arr[this->arrlen++] = val; + return true; +} + +// pop returns false if there was a stack underflow. +template +bool +Stack::pop(T &val) +{ + if (this->arrlen == 0) { + return false; + } + + val = this->arr[this->arrlen - 1]; + this->arrlen--; +} + +// get returns false on invalid bounds. +template +bool +Stack::get(size_t i, T &val) +{ + if (i > this->arrlen) { + return false; + } + + val = this->arr[i]; + return true; +} + +#endif // __KF_STACK_H__ \ No newline at end of file