misc/kforth: Part 0x04 - parsing numerics.
This commit is contained in:
parent
0ae7d49593
commit
505d71906c
1
Makefile
1
Makefile
|
@ -1,6 +1,7 @@
|
|||
CXXSTD := c++14
|
||||
CXXFLAGS := -std=$(CXXSTD) -Wall -Werror -g -O0
|
||||
OBJS := linux/io.o \
|
||||
io.o \
|
||||
parser.o \
|
||||
kforth.o
|
||||
TARGET := kforth
|
||||
|
|
3
defs.h
3
defs.h
|
@ -3,6 +3,9 @@
|
|||
|
||||
#ifdef __linux__
|
||||
#include "linux/defs.h"
|
||||
#else
|
||||
typedef int KF_INT;
|
||||
constexpr uint8_t STACK_SIZE = 16;
|
||||
#endif
|
||||
|
||||
constexpr size_t MAX_TOKEN_LENGTH = 16;
|
||||
|
|
|
@ -9,6 +9,7 @@ Contents:
|
|||
part-0x01
|
||||
part-0x02
|
||||
part-0x03
|
||||
part-0x04
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
|
|
@ -0,0 +1,313 @@
|
|||
Write You a Forth, 0x04
|
||||
-----------------------
|
||||
|
||||
:date: 2018-02-23 19:20
|
||||
:tags: wyaf, forth
|
||||
|
||||
So, I lied about words being next. When I thought about it some more, what I
|
||||
really need to do is start adding the stack in and adding support for parsing
|
||||
numerics. I'll start with the stack, because it's pretty straightforward.
|
||||
|
||||
I've added a new definition: ``constexpr uint8_t STACK_SIZE = 128``. This goes
|
||||
in the ``linux/defs.h``, and the ``#else`` in the top ``defs.h`` will set a
|
||||
smaller stack size for other targets. I've also defined a type called ``KF_INT``
|
||||
that, on Linux, is a ``uint32_t``::
|
||||
|
||||
index 4dcc540..e070d27 100644
|
||||
--- a/defs.h
|
||||
+++ b/defs.h
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
#ifdef __linux__
|
||||
#include "linux/defs.h"
|
||||
+#else
|
||||
+typedef int KF_INT;
|
||||
+constexpr uint8_t STACK_SIZE = 16;
|
||||
#endif
|
||||
|
||||
constexpr size_t MAX_TOKEN_LENGTH = 16;
|
||||
diff --git a/linux/defs.h b/linux/defs.h
|
||||
index 57cdaeb..3740f5a 100644
|
||||
--- a/linux/defs.h
|
||||
+++ b/linux/defs.h
|
||||
@@ -4,4 +4,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
+typedef int32_t KF_INT;
|
||||
+constexpr uint8_t STACK_SIZE = 128;
|
||||
+
|
||||
#endif
|
||||
\ No newline at end of file
|
||||
|
||||
It seems useful to be able to adapt the kind of numbers supported; an AVR might do
|
||||
better with 16-bit integers, for example.
|
||||
|
||||
``stack.h``
|
||||
^^^^^^^^^^^
|
||||
|
||||
The stack is going to be templated, because we'll need a ``double`` stack later
|
||||
for floating point and a return address stack later. This means everything will
|
||||
go under ``stack.h``. This is a pretty simple implementation that's CS 101 material;
|
||||
I've opted to have the interface return ``bool``\ s for everything to indicate stack
|
||||
overflow and underflow and out of bounds::
|
||||
|
||||
#ifndef __KF_STACK_H__
|
||||
#define __KF_STACK_H__
|
||||
|
||||
#include "defs.h"
|
||||
|
||||
template <typename T>
|
||||
class Stack {
|
||||
public:
|
||||
bool push(T val);
|
||||
bool pop(T &val);
|
||||
bool get(size_t, T &);
|
||||
size_t size(void) { return this->arrlen; };
|
||||
private:
|
||||
T arr[STACK_SIZE];
|
||||
size_t arrlen;
|
||||
};
|
||||
|
||||
// push returns false if there was a stack overflow.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::push(T val)
|
||||
{
|
||||
if ((this->arrlen + 1) > STACK_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this->arr[this->arrlen++] = val;
|
||||
return true;
|
||||
}
|
||||
|
||||
// pop returns false if there was a stack underflow.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::pop(T &val)
|
||||
{
|
||||
if (this->arrlen == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
val = this->arr[this->arrlen - 1];
|
||||
this->arrlen--;
|
||||
}
|
||||
|
||||
// get returns false on invalid bounds.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::get(size_t i, T &val)
|
||||
{
|
||||
if (i > this->arrlen) {
|
||||
return false;
|
||||
}
|
||||
|
||||
val = this->arr[i];
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // __KF_STACK_H__
|
||||
|
||||
I'll put a ``Stack<KF_INT>`` in ``kforth.cc`` later on. For now, this gives me
|
||||
an interface for the numeric parser to push a number onto the stack.
|
||||
|
||||
``parse_num``
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
It seems like the best place for this is in ``parser.cc`` --- though I might
|
||||
move into a token processor later. The definition for this goes in ``parser.h``,
|
||||
and the body is in ``parser.cc``::
|
||||
|
||||
// parse_num tries to parse the token as a signed base 10 number,
|
||||
// pushing it onto the stack if needed.
|
||||
bool
|
||||
parse_num(struct Token *token, Stack<KF_INT> &s)
|
||||
{
|
||||
KF_INT n = 0;
|
||||
uint8_t i = 0;
|
||||
bool sign = false;
|
||||
|
||||
It turns out you can't parse a zero-length token as a number...
|
||||
::
|
||||
|
||||
if (token->length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
I'll need to invert the number later if it's negative, but it's worth checking
|
||||
the first character to see if it's negative.
|
||||
::
|
||||
|
||||
if (token->token[i] == '-') {
|
||||
i++;
|
||||
sign = true;
|
||||
}
|
||||
|
||||
Parsing is done by checking whether each character is within the range of the ASCII
|
||||
numeral values. Later on, I might add in separate functions for processing base 10
|
||||
and base 16 numbers, and decide which to use based on a prefix (like ``0x``). If the
|
||||
character is between those values, then the working number is multiplied by 10 and
|
||||
the digit added.
|
||||
::
|
||||
|
||||
while (i < token->length) {
|
||||
if (token->token[i] < '0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (token->token[i] > '9') {
|
||||
return false;
|
||||
}
|
||||
|
||||
n *= 10;
|
||||
n += (uint8_t)(token->token[i] - '0');
|
||||
i++;
|
||||
}
|
||||
|
||||
If it was a negative number, then the working number has to be inverted::
|
||||
|
||||
if (sign) {
|
||||
n *= -1;
|
||||
}
|
||||
|
||||
Finally, return the result of pushing the number on the stack. One thing that
|
||||
might come back to get me later is that this makes it impossible to tell if a
|
||||
failure to parse the number is due to an invalid number or due to a stack
|
||||
overflow. This will be a good candidate for revisiting later.
|
||||
::
|
||||
|
||||
return s.push(n);
|
||||
}
|
||||
|
||||
``io.cc``
|
||||
^^^^^^^^^^
|
||||
|
||||
Conversely, it'll be useful to write a number to an ``IO`` interface. It
|
||||
*seems* more useful right now to just provide a number → I/O function, but
|
||||
that'll be easily adapted to a number → buffer function later. This will add
|
||||
a real function to ``io.h``, which will require a corresponding ``io.cc``
|
||||
(which also needs to be added to the ``Makefile``)::
|
||||
|
||||
#include "defs.h"
|
||||
#include "io.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
void
|
||||
write_num(IO &interface, KF_INT n)
|
||||
{
|
||||
|
||||
Through careful scientific study, I have determined that most number of digits
|
||||
that a 32-bit integer needs is 10 bytes (sans the sign!). This will absolutely
|
||||
need to be changed if ``KF_INT`` is ever moved to 64-bit (or larger!) numbers.
|
||||
There's a TODO in the actual source code that notes this. ::
|
||||
|
||||
char buf[10];
|
||||
uint8_t i = 10;
|
||||
memset(buf, 0, 10);
|
||||
|
||||
Because this is going out to an I/O interface, I don't need to store the sign
|
||||
in the buffer itself and can just print it and invert the number. Inverting is
|
||||
important; I ran into a bug earlier where I didn't invert it and my subtractions
|
||||
below were correspondingly off.
|
||||
::
|
||||
|
||||
if (n < 0) {
|
||||
interface.wrch('-');
|
||||
n *= -1;
|
||||
}
|
||||
|
||||
The buffer has to be filled from the end to the beginning to do the inverse of
|
||||
the parsing method::
|
||||
|
||||
while (n != 0) {
|
||||
char ch = (n % 10) + '0';
|
||||
buf[i--] = ch;
|
||||
n /= 10;
|
||||
}
|
||||
|
||||
But then it can be just dumped to the interface::
|
||||
|
||||
interface.wrbuf(buf+i, 11-i);
|
||||
}
|
||||
|
||||
``kforth.cc``
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
And now I come to the fun part: adding the stack in. After including ``stack.h``,
|
||||
I've added a stack implementation to the top of the file::
|
||||
|
||||
// dstack is the data stack.
|
||||
static Stack<KF_INT> dstack;
|
||||
|
||||
It's kind of useful to be able to print the stack::
|
||||
|
||||
static void
|
||||
write_dstack(IO &interface)
|
||||
{
|
||||
KF_INT tmp;
|
||||
interface.wrch('<');
|
||||
for (size_t i = 0; i < dstack.size(); i++) {
|
||||
if (i > 0) {
|
||||
interface.wrch(' ');
|
||||
}
|
||||
|
||||
dstack.get(i, tmp);
|
||||
write_num(interface, tmp);
|
||||
}
|
||||
interface.wrch('>');
|
||||
}
|
||||
|
||||
Surrounding the stack in angle brackets is a cool stylish sort of thing, I
|
||||
guess. All this is no good if the interpreter isn't actually hooked up to the
|
||||
number parser::
|
||||
|
||||
// The new while loop in the parser function in kforth.cc:
|
||||
while ((result = parse_next(buf, buflen, &offset, &token)) == PARSE_OK) {
|
||||
interface.wrbuf((char *)"token: ", 7);
|
||||
interface.wrbuf(token.token, token.length);
|
||||
interface.wrln((char *)".", 1);
|
||||
|
||||
if (!parse_num(&token, dstack)) {
|
||||
interface.wrln((char *)"failed to parse numeric", 23);
|
||||
}
|
||||
|
||||
// Temporary hack until the interpreter is working further.
|
||||
if (match_token(token.token, token.length, bye, 3)) {
|
||||
interface.wrln((char *)"Goodbye!", 8);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
But does it blend?
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Hopefully this works::
|
||||
|
||||
~/code/kforth (0) $ make
|
||||
g++ -std=c++14 -Wall -Werror -g -O0 -c -o linux/io.o linux/io.cc
|
||||
g++ -std=c++14 -Wall -Werror -g -O0 -c -o io.o io.cc
|
||||
g++ -std=c++14 -Wall -Werror -g -O0 -c -o parser.o parser.cc
|
||||
g++ -std=c++14 -Wall -Werror -g -O0 -c -o kforth.o kforth.cc
|
||||
g++ -o kforth linux/io.o io.o parser.o kforth.o
|
||||
~/code/kforth (0) $ ./kforth
|
||||
kforth interpreter
|
||||
<>
|
||||
? 2 -2 30 1000 -1010
|
||||
token: 2.
|
||||
token: -2.
|
||||
token: 30.
|
||||
token: 1000.
|
||||
token: -1010.
|
||||
ok.
|
||||
<2 -2 30 1000 -1010>
|
||||
? bye
|
||||
token: bye.
|
||||
failed to parse numeric
|
||||
Goodbye!
|
||||
~/code/kforth (0) $
|
||||
|
||||
So there's that. Okay, next time *for real* I'll do a vocabulary thing.
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#include "defs.h"
|
||||
#include "io.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
void
|
||||
write_num(IO &interface, KF_INT n)
|
||||
{
|
||||
|
||||
// TODO(kyle): make the size of the buffer depend on the size of
|
||||
// KF_INT.
|
||||
char buf[10];
|
||||
uint8_t i = 10;
|
||||
memset(buf, 0, i);
|
||||
if (n < 0) {
|
||||
interface.wrch('-');
|
||||
n *= -1;
|
||||
}
|
||||
|
||||
while (n != 0) {
|
||||
char ch = (n % 10) + '0';
|
||||
buf[i--] = ch;
|
||||
n /= 10;
|
||||
}
|
||||
|
||||
interface.wrbuf(buf+i, 11-i);
|
||||
}
|
3
io.h
3
io.h
|
@ -21,4 +21,7 @@ public:
|
|||
virtual void wrln(char *buf, size_t len) = 0;
|
||||
};
|
||||
|
||||
void write_num(IO &, KF_INT);
|
||||
|
||||
|
||||
#endif // __KF_IO_H__
|
28
kforth.cc
28
kforth.cc
|
@ -1,7 +1,9 @@
|
|||
#include "io.h"
|
||||
#include "parser.h"
|
||||
#include "stack.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include "linux.h"
|
||||
|
@ -10,6 +12,26 @@
|
|||
static char ok[] = "ok.\n";
|
||||
static char bye[] = "bye";
|
||||
|
||||
// dstack is the data stack.
|
||||
static Stack<KF_INT> dstack;
|
||||
|
||||
|
||||
static void
|
||||
write_dstack(IO &interface)
|
||||
{
|
||||
KF_INT tmp;
|
||||
interface.wrch('<');
|
||||
for (size_t i = 0; i < dstack.size(); i++) {
|
||||
if (i > 0) {
|
||||
interface.wrch(' ');
|
||||
}
|
||||
|
||||
dstack.get(i, tmp);
|
||||
write_num(interface, tmp);
|
||||
}
|
||||
interface.wrch('>');
|
||||
}
|
||||
|
||||
static bool
|
||||
parser(IO &interface, const char *buf, const size_t buflen)
|
||||
{
|
||||
|
@ -28,6 +50,10 @@ parser(IO &interface, const char *buf, const size_t buflen)
|
|||
interface.wrbuf(token.token, token.length);
|
||||
interface.wrln((char *)".", 1);
|
||||
|
||||
if (!parse_num(&token, dstack)) {
|
||||
interface.wrln((char *)"failed to parse numeric", 23);
|
||||
}
|
||||
|
||||
// Temporary hack until the interpreter is working further.
|
||||
if (match_token(token.token, token.length, bye, 3)) {
|
||||
interface.wrln((char *)"Goodbye!", 8);
|
||||
|
@ -58,6 +84,8 @@ interpreter(IO &interface)
|
|||
static char linebuf[81];
|
||||
|
||||
while (true) {
|
||||
write_dstack(interface);
|
||||
interface.wrch('\n');
|
||||
interface.wrch('?');
|
||||
interface.wrch(' ');
|
||||
buflen = interface.rdbuf(linebuf, 80, true, '\n');
|
||||
|
|
2
linux.h
2
linux.h
|
@ -6,7 +6,5 @@
|
|||
// build support for linux
|
||||
#include "linux/io.h"
|
||||
|
||||
constexpr uint8_t STACK_SIZE = 128;
|
||||
|
||||
|
||||
#endif // __KF_LINUX_H__
|
||||
|
|
|
@ -4,4 +4,7 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int32_t KF_INT;
|
||||
constexpr uint8_t STACK_SIZE = 128;
|
||||
|
||||
#endif
|
38
parser.cc
38
parser.cc
|
@ -1,5 +1,6 @@
|
|||
#include "defs.h"
|
||||
#include "parser.h"
|
||||
#include "stack.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
@ -68,4 +69,41 @@ parse_next(const char *buf, const size_t length, size_t *offset,
|
|||
|
||||
*offset = cursor;
|
||||
return PARSE_OK;
|
||||
}
|
||||
|
||||
bool
|
||||
parse_num(struct Token *token, Stack<KF_INT> &s)
|
||||
{
|
||||
KF_INT n = 0;
|
||||
uint8_t i = 0;
|
||||
bool sign = false;
|
||||
|
||||
if (token->length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (token->token[i] == '-') {
|
||||
i++;
|
||||
sign = true;
|
||||
}
|
||||
|
||||
while (i < token->length) {
|
||||
if (token->token[i] < '0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (token->token[i] > '9') {
|
||||
return false;
|
||||
}
|
||||
|
||||
n *= 10;
|
||||
n += (uint8_t)(token->token[i] - '0');
|
||||
i++;
|
||||
}
|
||||
|
||||
if (sign) {
|
||||
n *= -1;
|
||||
}
|
||||
|
||||
return s.push(n);
|
||||
}
|
5
parser.h
5
parser.h
|
@ -2,6 +2,7 @@
|
|||
#define __KF_PARSER_H__
|
||||
|
||||
#include "defs.h"
|
||||
#include "stack.h"
|
||||
|
||||
struct Token {
|
||||
char *token;
|
||||
|
@ -18,5 +19,9 @@ typedef enum _PARSE_RESULT_ : uint8_t {
|
|||
bool match_token(const char *, const size_t, const char *, const size_t);
|
||||
PARSE_RESULT parse_next(const char *, const size_t, size_t *, struct Token *);
|
||||
|
||||
// TODO(kyle): investigate a better return value, e.g. to differentiate between
|
||||
// stack failures and parse failures.
|
||||
bool parse_num(struct Token *, Stack<KF_INT> &);
|
||||
|
||||
|
||||
#endif // __KF_PARSER_H__
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
#ifndef __KF_STACK_H__
|
||||
#define __KF_STACK_H__
|
||||
|
||||
#include "defs.h"
|
||||
|
||||
template <typename T>
|
||||
class Stack {
|
||||
public:
|
||||
bool push(T val);
|
||||
bool pop(T &val);
|
||||
bool get(size_t, T &);
|
||||
size_t size(void) { return this->arrlen; };
|
||||
private:
|
||||
T arr[STACK_SIZE];
|
||||
size_t arrlen;
|
||||
};
|
||||
|
||||
// push returns false if there was a stack overflow.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::push(T val)
|
||||
{
|
||||
if ((this->arrlen + 1) > STACK_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
this->arr[this->arrlen++] = val;
|
||||
return true;
|
||||
}
|
||||
|
||||
// pop returns false if there was a stack underflow.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::pop(T &val)
|
||||
{
|
||||
if (this->arrlen == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
val = this->arr[this->arrlen - 1];
|
||||
this->arrlen--;
|
||||
}
|
||||
|
||||
// get returns false on invalid bounds.
|
||||
template <typename T>
|
||||
bool
|
||||
Stack<T>::get(size_t i, T &val)
|
||||
{
|
||||
if (i > this->arrlen) {
|
||||
return false;
|
||||
}
|
||||
|
||||
val = this->arr[i];
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // __KF_STACK_H__
|
Loading…
Reference in New Issue