瀏覽代碼

restructuring + coding style + update README + licensing consistency

Noah Vogt 3 年之前
父節點
當前提交
d8b3387487
共有 10 個文件被更改,包括 286 次插入294 次删除
  1. 1 1
      LICENSE
  2. 1 1
      Makefile
  3. 3 3
      README.md
  4. 29 54
      src/compiler.cpp
  5. 67 0
      src/compiler.h
  6. 21 49
      src/main.cpp
  7. 5 59
      src/tibasic.h
  8. 5 127
      src/token_list.h
  9. 113 0
      src/token_util.cpp
  10. 41 0
      src/token_util.h

+ 1 - 1
LICENSE

@@ -1,4 +1,4 @@
-Copyright (c) 2020 Noah Vogt
+Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
 Copyright (c) 2011 Matthew Iselin
 
 Permission to use, copy, modify, and distribute this software for any

+ 1 - 1
Makefile

@@ -13,7 +13,7 @@ DEPFILES := $(patsubst %.cpp,%.d,$(SRCFILES))
 
 .PHONY: all clean
 
-CXXFLAGS := -g -Wall -pedantic -O -D_FORTIFY_SOURCE=2
+CXXFLAGS := -g -Wall -pedantic -O
 
 CXX := g++
 

+ 3 - 3
README.md

@@ -7,11 +7,11 @@ Being able to type your programs on a real keyboard instead of the calculator's
 
 ## Installation
 #### Unix (GNU/Linux, MacOS, FreeBSD, etc.)
-If you are using Arch, you can just install my [AUR Package](https://aur.archlinux.org/packages/tibasicc-git/) with your prefered aur helper (yay, pacaur, yaourt, etc.):
+If you are using Arch, you can just install my [AUR Package](https://aur.archlinux.org/packages/tibasicc-git/) either with your prefered aur helper (paru, yay, etc.) or manually:
 
-    yay -S tibasicc-git
+    paru -S tibasicc-git
 
-For other systems, here is my also fairly easy general approach. I have only tested this on GNU/Linux, but it should work on any other unix-like OS like MacOS:
+For other systems, here is my also fairly easy general approach. I have recently only tested this on GNU/Linux, but it should still work on any other unix-like OS like MacOS:
 
     git clone https://github.com/noahvogt/tibasicc.git
     cd tibasicc

+ 29 - 54
src/tibasic.cpp → src/compiler.cpp

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
  * Copyright (c) 2011 Matthew Iselin
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -18,35 +19,31 @@
 #include <fstream>
 #include <vector>
 #include <string>
-
 #include <stdio.h>
 #include <string.h>
 
 #include "tibasic.h"
+#include "compiler.h"
 
 using namespace std;
 
 /* \todo More error handling. */
 
-unsigned short Compiler::doChecksum(size_t sum)
-{
+unsigned short Compiler::doChecksum(size_t sum) {
     /* Bottom 16 bits of the sum. */
     return (unsigned short) (sum & 0xFFFF);
 }
 
-size_t Compiler::sumBytes(const char *data, size_t len)
-{
+size_t Compiler::sumBytes(const char *data, size_t len) {
     size_t ret = 0;
     for(size_t i = 0; i < len; i++)
         ret += data[i];
     return ret;
 }
 
-string trim(const string& str)
-{
+string trim(const string& str) {
     size_t first = str.find_first_not_of(' ');
-    if (string::npos == first)
-    {
+    if (string::npos == first) {
         return str;
     }
     size_t last = str.find_last_not_of(' ');
@@ -55,8 +52,7 @@ string trim(const string& str)
 
 extern bool verbose;
 
-bool Compiler::compile(string inFile, string outFile)
-{
+bool Compiler::compile(string inFile, string outFile) {
     ifstream f(inFile.c_str(), ifstream::in);
 
     string tmpLine;
@@ -65,13 +61,11 @@ bool Compiler::compile(string inFile, string outFile)
     vector<token_t> output;
     unsigned short outputSize = 0;
 
-    while(!f.eof())
-    {
+    while(!f.eof()) {
         getline(f, tmpLine, '\n');
 
         /* ignore empty lines */
-        if(!tmpLine.length())
-        {
+        if(!tmpLine.length()) {
             if(verbose)
                 log(Debug, "Empty line detected!");
             continue;
@@ -86,8 +80,7 @@ bool Compiler::compile(string inFile, string outFile)
 
         /* ignore lines with now only whitespaces */
         bool containsSpaces = tmpLine.find_first_not_of(' ') != std::string::npos;
-        if(!containsSpaces)
-        {
+        if(!containsSpaces) {
             if(verbose)
                 log(Debug, "Line with only whitespaces / comments detected!");
             continue;
@@ -97,37 +90,31 @@ bool Compiler::compile(string inFile, string outFile)
         /* Parse. */
         token_t token;
 
-        while(tmpLine.length())
-        {
+        while(tmpLine.length()) {
             /* Grab the longest possible token we can from the input. */
             string s = tmpLine.substr(0, getLongestToken());
 
             bool validToken = false;
-            while(!validToken && s.length())
-            {
+            while(!validToken && s.length()) {
                 validToken = lookupToken(s, token);
                 if(!validToken)
                     s = s.substr(0, s.length() - 1);
             }
 
             /* Special case for alphabet characters */
-            if(!s.length() && isalpha(tmpLine[0]))
-            {
+            if(!s.length() && isalpha(tmpLine[0])) {
                 token.token = toupper(tmpLine[0]);
                 token.sz = 1;
 
                 s = tmpLine.substr(0, 1);
             }
 
-            if(!s.length())
-            {
+            if(!s.length()) {
                 /* Error, asplode! */
                 log(Error, "Invalid token.");
                 f.close();
                 return false;
-            }
-            else
-            {
+            } else {
                 outputSize += token.sz;
                 output.push_back(token);
                 if(verbose)
@@ -138,8 +125,7 @@ bool Compiler::compile(string inFile, string outFile)
 
         /* Output a newline. */
         bool gotNewline = lookupToken("\n", token);
-        if(gotNewline)
-        {
+        if(gotNewline) {
             outputSize += token.sz;
             output.push_back(token);
         }
@@ -151,7 +137,7 @@ bool Compiler::compile(string inFile, string outFile)
     memset(&ventry, 0, sizeof(VariableEntry));
     
     phdr.datalen = sizeof(VariableEntry) + outputSize + sizeof(outputSize);
-    strcpy(phdr.sig, "**TI83F*");
+    strcpy(phdr.sig, "**TI83F*x");
     phdr.extsig[0] = 0x1A; phdr.extsig[1] = 0x0A; phdr.extsig[2] = 0;
     strcpy(phdr.comment, "Generated by the TI-BASIC Compiler.");
 
@@ -165,8 +151,7 @@ bool Compiler::compile(string inFile, string outFile)
     size_t n = outFile.find_last_of('/');
     if(n == inFile.npos) n = outFile.find_last_of('\\');
     if(n == inFile.npos) n = 0; else n++;
-    for(; (i < 8) && (n < inFile.length() - 4); n++)
-    {
+    for(; (i < 8) && (n < inFile.length() - 4); n++) {
         if(outFile[n] == '.')
             break;
         ventry.name[i++] = toupper(outFile[n]);
@@ -183,10 +168,9 @@ bool Compiler::compile(string inFile, string outFile)
 
     for(vector<token_t>::iterator it = output.begin();
         it != output.end();
-        ++it)
-    {
-        fwrite(&(it->token), it->sz, 1, out);
-        sum += it->token;
+        ++it) {
+            fwrite(&(it->token), it->sz, 1, out);
+            sum += it->token;
     }
 
     /* Perform a checksum and write to file. */
@@ -200,12 +184,10 @@ bool Compiler::compile(string inFile, string outFile)
     return true;
 }
 
-bool Compiler::decompile(string inFile, string outFile)
-{
+bool Compiler::decompile(string inFile, string outFile) {
     /* Parse the file. */
     FILE *fp = fopen(inFile.c_str(), "rb");
-    if(!fp)
-    {
+    if(!fp) {
         log(Error, "Couldn't open input file.");
         return false;
     }
@@ -230,13 +212,11 @@ bool Compiler::decompile(string inFile, string outFile)
 
     bool bAsmProgram = false;
 
-    while((!feof(fp)) && (nBytesRead < tokenLength))
-    {
+    while((!feof(fp)) && (nBytesRead < tokenLength)) {
         fread(&temp, 1, 2, fp);
 
         /* If we're in assembly mode, just copy the bytes straight in a numbers. */
-        if(bAsmProgram)
-        {
+        if(bAsmProgram) {
             if(((temp & 0xFF) == 0x3F))
                 sOutput += "\n";
             sOutput += temp & 0xFF;
@@ -253,26 +233,21 @@ bool Compiler::decompile(string inFile, string outFile)
         if(!bIsFound)
             bIsFound = lookupToken(temp & 0xFF, conv);
 
-        if(!bIsFound)
-        {
+        if(!bIsFound) {
             sOutput += static_cast<char>(temp);
 
             fseek(fp, -1, SEEK_CUR);
             nBytesRead++;
-        }
-        else
-        {
+        } else {
             sOutput += conv;
 
             token_t tokenInfo;
             lookupToken(conv, tokenInfo);
 
-            if(tokenInfo.sz < sizeof(unsigned short))
-            {
+            if(tokenInfo.sz < sizeof(unsigned short)) {
                 fseek(fp, -1, SEEK_CUR);
                 nBytesRead++;
-            }
-            else
+            } else
                 nBytesRead += 2;
 
             if(conv == "AsmPrgm")

+ 67 - 0
src/compiler.h

@@ -0,0 +1,67 @@
+/* 
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
+ * Copyright (c) 2011 Matthew Iselin
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string>
+
+#ifdef _MSC_VER
+#define PACKED
+#else
+#define PACKED __attribute__((packed))
+#endif
+
+/* Compilation class. */
+class Compiler {
+    public:
+        Compiler() {}
+        virtual ~Compiler() {}
+
+        bool compile(std::string inFile, std::string outFile);
+
+        bool decompile(std::string inFile, std::string outFile);
+
+    private:
+        /* Perform a checksum over a region of data. */
+        size_t sumBytes(const char *data, size_t len);
+        unsigned short doChecksum(size_t sum);
+
+#ifdef _MSC_VER
+#pragma pack(push, 1)
+#endif
+
+        /* 8xp file header */
+        struct ProgramHeader {
+            char sig[9];
+            char extsig[3];
+            char comment[42];
+            unsigned short datalen;
+        } PACKED;
+
+        /* Variable entry */
+        struct VariableEntry {
+            unsigned short start;
+            unsigned short length1;
+            unsigned char type;
+            char name[8];
+            char ver;
+            char flags;
+            unsigned short length2;
+        } PACKED;
+
+#ifdef _MSC_VER
+#pragma pack(pop)
+#endif
+};

+ 21 - 49
src/main.cpp

@@ -1,4 +1,5 @@
 /* 
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
  * Copyright (c) 2011 Matthew Iselin
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -14,30 +15,21 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-/* When VC++ compiles in debug mode, it will set _DEBUG. This sets DEBUG */
-/* in order to avoid the VC++-ism. */
-#ifdef _DEBUG
-#define DEBUG
-#endif
-
 #include <iostream>
 #include <string>
-
 #include <string.h>
 
 #include "tibasic.h"
-
 #ifdef _WIN32
 #include "Shlwapi.h"
 #endif
+#include "compiler.h"
 
 using namespace std;
 
 /* Helper function to convert a string to uppercase. */
-char* strtoupper(char* str)
-{
-	for( size_t i = 0; i < strlen( str ); i++ )
-	{
+char* strtoupper(char* str) {
+	for( size_t i = 0; i < strlen( str ); i++ ) {
 		if( ! ( isupper( str[i] ) ) && isalpha( str[i] ) )
 			str[i] = _toupper( str[i] );
 	}
@@ -45,13 +37,11 @@ char* strtoupper(char* str)
 }
 
 /* Logs output from the build */
-void log(LogSeverity severity, const char *out)
-{
+void log(LogSeverity severity, const char *out) {
     cout << severityToString(severity) << ": " << out << endl;
 }
 
-void stripExtension(const char *in, char *out, size_t len)
-{
+void stripExtension(const char *in, char *out, size_t len) {
     if(strrchr(in, '.') == NULL)
         return;
     strncpy(out, in, len);
@@ -70,18 +60,15 @@ Options:\n\
 \t-v\t\tverbose / debug mode\n\
 \t-h, --help\tprint this help message";
 
-int main( int argc, char* argv[] )
-{
+int main( int argc, char* argv[] ) {
 	/* check for valid number of arguments */
-	if((argc < 2) || (argv[1] == NULL))
-	{
+	if((argc < 2) || (argv[1] == NULL)) {
 		/* display error and help message when no arguments given */
         cout << "Error: " << helpMessage << "\n";
 		return 1;
 	}
 
-    if(!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))
-    {
+    if(!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
 		/* display help message when help flag arguments given */
         cout << helpMessage << "\n";
         return 0;
@@ -95,30 +82,23 @@ int main( int argc, char* argv[] )
 
     /* Parse arguments */
     inFile = argv[argc - 1]; /* Last argument is always filename */
-    for(int i = 1; i < argc - 1; i++)
-    {
-        if(!strcmp(argv[i], "-o") && !outFile.length())
-        {
+    for(int i = 1; i < argc - 1; i++) {
+        if(!strcmp(argv[i], "-o") && !outFile.length()) {
             i++; /* Next argument is filename */
             /* Output filename */
-            if(i >= argc - 1)
-            {
+            if(i >= argc - 1) {
                 log(Error, "-o requires a parameter (output filename).");
                 return 1;
             }
             outFile = argv[i];
-        }
-        else if(!strcmp(argv[i], "-v"))
+        } else if(!strcmp(argv[i], "-v"))
             verbose = true;
         else if(!strcmp(argv[i], "-d"))
             bDecompile = true;
-        else if(!strcmp(argv[i], "--help"))
-        {
+        else if(!strcmp(argv[i], "--help")) {
             cout << helpMessage << "\n";
             return 0;
-        }
-        else
-        {
+        } else {
             log(Error, "Unknown option specified");
             return 1;
         }
@@ -127,19 +107,15 @@ int main( int argc, char* argv[] )
     /* If no output was given, rename the input with .8xp instead of .tib and 
      * use that as the output.
      */
-    if(!outFile.length())
-    {
+    if(!outFile.length()) {
         /* check for file extension and strip it if found */
-        if(strchr(inFile.c_str(), '.'))
-        {
+        if(strchr(inFile.c_str(), '.')) {
             char *tmp = new char[inFile.length()];
             stripExtension(inFile.c_str(), tmp, inFile.length());
 
             outFile = tmp;
             delete [] tmp;
-        }
-        else
-        {
+        } else {
             outFile = inFile;
         }
         
@@ -158,22 +134,18 @@ int main( int argc, char* argv[] )
     initialiseTokens();
 
     /* Compile time! */
-    if(inFile.length() && outFile.length())
-    {
+    if(inFile.length() && outFile.length()) {
         bool res = false;
         if(bDecompile)
             res = pCompiler->decompile(inFile, outFile);
         else
             res = pCompiler->compile(inFile, outFile);
 
-        if(!res)
-        {
+        if(!res) {
             log(Error, "Compilation failed.");
             return 1;
         }
-    }
-    else
-    {
+    } else {
         log(Error, "Either an input or output filename was not given.");
         return 1;
     }

+ 5 - 59
src/tibasic.h

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
  * Copyright (c) 2011 Matthew Iselin
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -19,22 +20,14 @@
 
 #include <string>
 
-#ifdef _MSC_VER
-#define PACKED
-#else
-#define PACKED __attribute__((packed))
-#endif
-
 /* Stores a token to be written and the size of that token. */
-typedef struct
-{
+typedef struct {
     unsigned short token;
     size_t sz;
 } token_t;
 
 /* Log severities */
-enum LogSeverity
-{
+enum LogSeverity {
     Error,
     Info,
     Debug
@@ -49,10 +42,8 @@ size_t getLongestToken();
 bool lookupToken(std::string in, token_t &ret);
 bool lookupToken(unsigned short in, std::string &out);
 
-inline const char *severityToString(LogSeverity s)
-{
-    switch(s)
-    {
+inline const char *severityToString(LogSeverity s) {
+    switch(s) {
         case Error:
             return "Error";
             break;
@@ -71,50 +62,5 @@ inline const char *severityToString(LogSeverity s)
 /* Log function */
 void log(LogSeverity, const char *);
 
-/* Compilation class. */
-class Compiler
-{
-    public:
-        Compiler() {}
-        virtual ~Compiler() {}
-
-        bool compile(std::string inFile, std::string outFile);
-
-        bool decompile(std::string inFile, std::string outFile);
-
-    private:
-        /* Perform a checksum over a region of data. */
-        size_t sumBytes(const char *data, size_t len);
-        unsigned short doChecksum(size_t sum);
-
-#ifdef _MSC_VER
-#pragma pack(push, 1)
-#endif
-
-        /* 8xp file header */
-        struct ProgramHeader
-        {
-            char sig[9];
-            char extsig[3];
-            char comment[42];
-            unsigned short datalen;
-        } PACKED;
-
-        /* Variable entry */
-        struct VariableEntry
-        {
-            unsigned short start;
-            unsigned short length1;
-            unsigned char type;
-            char name[8];
-            char ver;
-            char flags;
-            unsigned short length2;
-        } PACKED;
-
-#ifdef _MSC_VER
-#pragma pack(pop)
-#endif
-};
 
 #endif

+ 5 - 127
src/tokens.cpp → src/token_list.h

@@ -1,4 +1,5 @@
-/*
+/* 
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
  * Copyright (c) 2011 Matthew Iselin
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -14,131 +15,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#include <map>
-#include <string>
-
-#include <string.h>
-
-#include "tibasic.h"
-
-using namespace std;
-
-/* Describes a potential token to be read by the compiler */
-struct Token {
-    /* The compiled byte for the token */
-	unsigned char data;
-
-    /* The actual text to be converted by the interpreter */
-	const char* text;
-};
-
-/* A two byte token (0xBB, 0x7E and SysVar) */
-struct TwoByte {
-	unsigned short data;
-	const char* text;
-};
-
-/* Direct ASCII character to token conversion. */
-struct ConvertRule {
-	char c;				/* the character */
-	unsigned char tok;	/* the equivalent token */
-};
-
-/* References to lists defined after functions. */
-extern struct Token StandardTokens[200];
-extern struct TwoByte CalcVars[302];
-extern struct ConvertRule Replacements[39];
-
-/* string -> token mapping */
-map<string, token_t> g_TokenLookup;
-
-/* token -> string mapping */
-map<unsigned short, string> g_ReverseLookup;
-
-/* Longest input string possible */
-size_t g_LongestInput = 0;
-
-/* Shiny little template function that returns the size of an array. */
-template <typename T, int N> size_t arrayLen(T(&)[N]){return N;}
-
-/* Initialises the token map */
-void initialiseTokens()
-{
-    /* Iterate the main token list first. */
-    for(size_t i = 0; i < arrayLen(StandardTokens); i++)
-    {
-        token_t value;
-        value.token = StandardTokens[i].data;
-        value.sz = sizeof(StandardTokens[i].data);
-
-        size_t len = strlen(StandardTokens[i].text);
-        if(len > g_LongestInput)
-            g_LongestInput = len;
-
-        string s = string(StandardTokens[i].text);
-        g_TokenLookup[s] = value;
-        g_ReverseLookup[value.token] = s;
-    }
-
-    /* Now iterate the two-byte tokens. */
-    for(size_t i = 0; i < (sizeof(CalcVars) / sizeof(Token)); i++)
-    {
-        token_t value;
-        value.token = CalcVars[i].data;
-        value.sz = sizeof(CalcVars[i].data);
-
-        size_t len = strlen(CalcVars[i].text);
-        if(len > g_LongestInput)
-            g_LongestInput = len;
-        
-        string s = string(CalcVars[i].text);
-        g_TokenLookup[s] = value;
-        g_ReverseLookup[value.token] = s;
-    }
-
-    /* Finally, iterate single-character tokens. */
-    for(size_t i = 0; i < (sizeof(Replacements) / sizeof(ConvertRule)); i++)
-    {
-        token_t value;
-        value.token = Replacements[i].tok;
-        value.sz = sizeof(Replacements[i].tok);
-
-        char c[] = {Replacements[i].c, 0};
-        string s = c;
-
-        g_TokenLookup[s] = value;
-        g_ReverseLookup[value.token] = s;
-    }
-}
-
-size_t getLongestToken()
-{
-    return g_LongestInput;
-}
-
-/* Perform a lookup */
-bool lookupToken(string in, token_t &ret)
-{
-    if(in.length() > g_LongestInput)
-        return false;
-
-    if(g_TokenLookup.find(in) == g_TokenLookup.end())
-        return false;
-
-    ret = g_TokenLookup[in];
-
-    return true;
-}
-
-bool lookupToken(unsigned short in, string &out)
-{
-    if(g_ReverseLookup.find(in) == g_ReverseLookup.end())
-        return false;
-
-    out = g_ReverseLookup[in];
-
-    return true;
-}
+#include "token_util.h"
 
 /* Token List */
 #define TO_DMS			0x01
@@ -1248,7 +1125,8 @@ struct TwoByte CalcVars[] = {
 };
 
 /* Replacements
- * Replacements are rules that define special characters that must be replaced with a token.
+ * Replacements are rules that define special characters that must be replaced
+ * with a token.
  */
 struct ConvertRule Replacements[] = {
 	{ '"', DOUBLEQUOTE },

+ 113 - 0
src/token_util.cpp

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
+ * Copyright (c) 2011 Matthew Iselin
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <map>
+#include <string>
+#include <string.h>
+
+#include "tibasic.h"
+#include "token_list.h"
+
+using namespace std;
+
+/* References to lists defined after functions. */
+
+/* string -> token mapping */
+map<string, token_t> g_TokenLookup;
+
+/* token -> string mapping */
+map<unsigned short, string> g_ReverseLookup;
+
+/* Longest input string possible */
+size_t g_LongestInput = 0;
+
+/* Shiny little template function that returns the size of an array. */
+template <typename T, int N> size_t arrayLen(T(&)[N]){return N;}
+
+/* Initialises the token map */
+void initialiseTokens() {
+    /* Iterate the main token list first. */
+    for(size_t i = 0; i < arrayLen(StandardTokens); i++) {
+        token_t value;
+        value.token = StandardTokens[i].data;
+        value.sz = sizeof(StandardTokens[i].data);
+
+        size_t len = strlen(StandardTokens[i].text);
+        if(len > g_LongestInput)
+            g_LongestInput = len;
+
+        string s = string(StandardTokens[i].text);
+        g_TokenLookup[s] = value;
+        g_ReverseLookup[value.token] = s;
+    }
+
+    /* Now iterate the two-byte tokens. */
+    for(size_t i = 0; i < (sizeof(CalcVars) / sizeof(Token)); i++) {
+        token_t value;
+        value.token = CalcVars[i].data;
+        value.sz = sizeof(CalcVars[i].data);
+
+        size_t len = strlen(CalcVars[i].text);
+        if(len > g_LongestInput)
+            g_LongestInput = len;
+        
+        string s = string(CalcVars[i].text);
+        g_TokenLookup[s] = value;
+        g_ReverseLookup[value.token] = s;
+    }
+
+    /* Finally, iterate single-character tokens. */
+    for(size_t i = 0; i < (sizeof(Replacements) / sizeof(ConvertRule)); i++) {
+        token_t value;
+        value.token = Replacements[i].tok;
+        value.sz = sizeof(Replacements[i].tok);
+
+        char c[] = {Replacements[i].c, 0};
+        string s = c;
+
+        g_TokenLookup[s] = value;
+        g_ReverseLookup[value.token] = s;
+    }
+}
+
+size_t getLongestToken() {
+    return g_LongestInput;
+}
+
+/* Perform a lookup */
+bool lookupToken(string in, token_t &ret) {
+    if(in.length() > g_LongestInput)
+        return false;
+
+    if(g_TokenLookup.find(in) == g_TokenLookup.end())
+        return false;
+
+    ret = g_TokenLookup[in];
+
+    return true;
+}
+
+bool lookupToken(unsigned short in, string &out) {
+    if(g_ReverseLookup.find(in) == g_ReverseLookup.end())
+        return false;
+
+    out = g_ReverseLookup[in];
+
+    return true;
+}
+
+

+ 41 - 0
src/token_util.h

@@ -0,0 +1,41 @@
+/* 
+ * Copyright (c) 2021 Noah Vogt <noah@noahvogt.com>
+ * Copyright (c) 2011 Matthew Iselin
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Describes a potential token to be read by the compiler */
+struct Token {
+    /* The compiled byte for the token */
+	unsigned char data;
+
+    /* The actual text to be converted by the interpreter */
+	const char* text;
+};
+
+/* A two byte token (0xBB, 0x7E and SysVar) */
+struct TwoByte {
+	unsigned short data;
+	const char* text;
+};
+
+/* Direct ASCII character to token conversion. */
+struct ConvertRule {
+	char c;				/* the character */
+	unsigned char tok;	/* the equivalent token */
+};
+
+extern struct Token StandardTokens[200];
+extern struct TwoByte CalcVars[302];
+extern struct ConvertRule Replacements[39];