Skip to content

Commit

Permalink
Finishing up some things. I'm going to soon add the numparse, `strp…
Browse files Browse the repository at this point in the history
…arse`, and `chrparse` functions.
  • Loading branch information
silas-wr committed Jul 27, 2024
1 parent fd1ddae commit 2826326
Show file tree
Hide file tree
Showing 7 changed files with 317 additions and 8 deletions.
Empty file modified compile.sh
100644 → 100755
Empty file.
Binary file modified man/man1/crate.1.gz
Binary file not shown.
Binary file modified src/crate/crate
Binary file not shown.
2 changes: 1 addition & 1 deletion src/crate/crate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ vector<Token> lex(const string);
Program parse(const vector<Token>);
int compile(Program);
int run(string);
int strparse(string);
string strparse(string);
int numparse(string);
void nodes(Node, int);
void prog(Program);
157 changes: 154 additions & 3 deletions src/crate/extralex.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,156 @@
#include "crate.hpp"

bool isUpper(const string& s) {
return all_of(s.begin(), s.end(), [](unsigned char c){ return (toupper(c) == c); });
}
/* string strparse(string str)
{
// this code assumes that the string is syntactically correct, thanks to the lexer. all it does is produce the actual string, not a string containing it.
// Ex. if i put `strparse("\\0");`, I will get a string containing the EOF character.
// Ex. if i put `strparse("\\u1234);`, I will get a string containing the Unicode character U+1234.
char c;
string out = "";
string unicode = "";
string octal = "";
int uni = 4;
int oct = 3;
int uchar = 0;
int ochar = 0;
bool usequence = false;
bool osequence = false;
bool slash = false;
vector<string> octal {"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\017", "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027", "\030", "\031", "\032", "\033", "\034", "\035",\
"\036", "\037", "\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047", "\050", "\051", "\052", "\053", "\054",\
"\055", "\056", "\057", "\060", "\061", "\062", "\063", "\064", "\065", "\066", "\067", "\070", "\071", "\072", "\073",\
"\074", "\075", "\076", "\077", "\100", "\101", "\102", "\103", "\104", "\105", "\106", "\107", "\110", "\111", "\112",\
"\113", "\114", "\115", "\116", "\117", "\120", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\
"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016",\ };
for (int i = 0; i < str.size(); i++) {
c = str[i];
switch (c) {
case '\\':
if (slash) {
out += c;
slash = false;
} else {
slash = true;
}
break;
case '0':
if (slash) {
out += '\0';
slash = false;
} else {
out += c;
}
case 'a':
if (slash) {
out += '\a';
slash = false;
} else {
out += c;
}
case '"':
if (slash) {
out += '\"';
slash = false;
} else {
// this won't run: it'll have been corrected in the lexer.
;
}
case '\'':
if (slash) {
out += '\'';
slash = false;
} else {
out += c;
}
case '?':
if (slash) {
out += '\?';
slash = false;
} else {
out += c;
}
case 'b':
if (slash) {
out += '\b';
slash = false;
} else {
out += c;
}
case 'f':
if (slash) {
out += '\f';
slash = false;
} else {
out += c;
}
case 'n':
if (slash) {
out += '\n';
slash = false;
} else {
out += c;
}
case 'r':
if (slash) {
out += '\r';
slash = false;
} else {
out += c;
}
case 't':
if (slash) {
out += '\t';
slash = false;
} else {
out += c;
}
case 'v':
if (slash) {
out += '\v';
slash = false;
} else {
out += c;
}
case 'u':
if (slash) {
usequence = true;
} else {
out += c;
}
case 'o':
if (slash) {
osequence = true;
} else {
out += c;
}
default:
if (slash & !usequence & !osequence) {
cout << "Invalid escape sequence \\" << c << " .";
exit(1);
} else if (usequence) {
} else if (osequence) {
if (ochars.find(c) != 1) {
oct--;
if (oct == 0) {
oct = 3;
out += ""
}
} else {
cout << "Invalid octal sequence with " << c << " .";
exit(1);
}
} else {
out += c;
}
}
} */
164 changes: 161 additions & 3 deletions src/crate/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ vector<Token> lex(const string src)

bool unicode = false;
bool octal = false;
bool done = false;

int uni = 4;
int oct = 3;
Expand Down Expand Up @@ -155,6 +156,11 @@ vector<Token> lex(const string src)
load_type = "";
ok = false;
slash = false;
} else if (load_type == "char") {
cout << "[" << row << ", " << col << "] Unexpected EOL (unfinished-chr-with-eol)";
load_type = "";
ok = false;
slash = false;
}
col = 0;
eol = true;
Expand All @@ -169,7 +175,6 @@ vector<Token> lex(const string src)
}
// string
else if (load_type == "string") {
load_var += c;
if (c == '\\') {
if (slash) {
slash = false;
Expand Down Expand Up @@ -224,11 +229,99 @@ vector<Token> lex(const string src)
cur.row = row;
cur.col = col;
tlist.push_back(cur);

load_type = "";
load_var = "";
}
}

if (load_type == "") {
// no more string
;
} else {
load_var += c;
}
}
}
// character
else if (load_type == "char") {
if (done == true & c != '\'') {
cout << "[" << row << ", " << col << "] Expected end of character (expect-char-end)";
ok = false;
load_type = "";
load_var = "";
}

if (c == '\\') {
if (slash) {
slash = false;
done = true;
} else {
slash = true;
}
} else {
if (slash) {
if (c == 'u') {
unicode = true;
} else if (c == 'o') {
octal = true;
} else {
if (unicode) {
if (hex.find(c) != -1) {
uni--;
if (uni == 0) {
unicode = false;
uni = 4;
}
} else {
unicode = false;
uni = 4;
load_type = "";
load_var = "";

cout << "[" << row << ", " << col << "] Non-hexadecimal character in unicode sequence (bad-unicode).";
}
} else if (octal) {
if (octl.find(c) != -1) {
oct--;
if (oct == 0) {
octal = false;
oct = 3;
}
} else {
octal = false;
oct = 3;
load_type = "";
load_var = "";

load_var += c;
cout << "[" << row << ", " << col << "] Non-octal character in octal sequence (bad-octal).";
}
} else {
slash = false;
done = true;
}
}
} else {
if (c == '\'') {
cur.ttype = CHR;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);

done = false;
load_type = "";
load_var = "";
}
}

if (load_type == "") {
// no more string
;
} else {
load_var += c;
}
}
}
// start of comment
else if (c == '$') {
Expand Down Expand Up @@ -361,7 +454,72 @@ vector<Token> lex(const string src)
load_type = "";
load_var = "";
load_type = "string";
load_var += c;
}
// start of character
else if (c == '\'') {
if (load_type == "") {
;
} else if (load_type == "alpha") {
if (keys.find(load_var) != keys.end()) {
cur.ttype = keys[load_var];
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else {
cur.ttype = ID;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
}
} else if (load_type == "int") {
cur.ttype = INT;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else if (load_type == "float") {
cur.ttype = FLOAT;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else if (load_type == "operational") {
if (ops.find(load_var) != ops.end()) {
cur.ttype = ops[load_var];
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else {
cout << "[" << row << ", " << col << "] Invalid operator " << load_var << " .\n";
ok = false;
load_type = "";
load_var = "";
}
} else if (load_type == "period") {
cur.ttype = ARGS;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else if (load_type == "splat") {
cur.ttype = KWARGS;
cur.value = load_var;
cur.row = row;
cur.col = col;
tlist.push_back(cur);
} else {
cout << "[" << row << ", " << col << "] we're so sorry. something went wrong with the lexical analyzer. \n\tplease notify me at silas-wr/crate on github.\n";
ok = false; // make it uncompilable
load_type = "";
load_var = "";
}

load_type = "";
load_var = "";
load_type = "char";
}
// alphabetical
else if (alphabet.find(c) != -1) {
Expand Down
2 changes: 1 addition & 1 deletion src/crate/testlex.crate
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ $ params
...

$ objects
$ 'a'
'a'
"hello"
$ 1
$ 12
Expand Down

0 comments on commit 2826326

Please sign in to comment.