// This scanner uses the recursive descent method. // // The char pointers token_str and scan_str are pointers to the input string as // in the following example. // // | g | a | m | m | a | | a | l | p | h | a | // ^ ^ // token_str scan_str // // The char pointer token_buf points to a malloc buffer. // // | g | a | m | m | a | \0 | // ^ // token_buf #include "stdafx.h" #include "defs.h" #define T_INTEGER 1001 #define T_DOUBLE 1002 #define T_SYMBOL 1003 #define T_FUNCTION 1004 #define T_NEWLINE 1006 #define T_STRING 1007 #define T_GTEQ 1008 #define T_LTEQ 1009 #define T_EQ 1010 static void scan_expression(void); static void scan_term(void); static void scan_power(void); static void scan_factor(void); static void scan_symbol(void); static void scan_function_call(void); static void scan_subexpr(void); static void get_next_token(void); static void get_token(void); static void scan_stmt(void); static void scan_string(void); static void scan_relation(void); static void error(char *); static void update_token_buf(char *, char *); static int token, newline_flag, meta_mode; static char *input_str, *scan_str, *token_str, *token_buf; // Returns number of chars scanned and expr on stack. // Returns zero when nothing left to scan. int scan(char *s) { meta_mode = 0; expanding++; input_str = s; scan_str = s; get_next_token(); if (token == 0) { push(symbol(NIL)); expanding--; return 0; } scan_stmt(); expanding--; return (int) (token_str - input_str); } int scan_meta(char *s) { meta_mode = 1; expanding++; input_str = s; scan_str = s; get_next_token(); if (token == 0) { push(symbol(NIL)); expanding--; return 0; } scan_stmt(); expanding--; return (int) (token_str - input_str); } static void scan_stmt(void) { scan_relation(); if (token == '=') { get_next_token(); push_symbol(SETQ); swap(); scan_relation(); list(3); } } static void scan_relation(void) { scan_expression(); switch (token) { case T_EQ: push_symbol(TESTEQ); swap(); get_next_token(); scan_expression(); list(3); break; case T_LTEQ: push_symbol(TESTLE); swap(); get_next_token(); scan_expression(); list(3); break; case T_GTEQ: push_symbol(TESTGE); swap(); get_next_token(); scan_expression(); list(3); break; case '<': push_symbol(TESTLT); swap(); get_next_token(); scan_expression(); list(3); break; case '>': push_symbol(TESTGT); swap(); get_next_token(); scan_expression(); list(3); break; default: break; } } static void scan_expression(void) { int h = tos; switch (token) { case '+': get_next_token(); scan_term(); break; case '-': get_next_token(); scan_term(); negate(); break; default: scan_term(); break; } while (newline_flag == 0 && (token == '+' || token == '-')) { if (token == '+') { get_next_token(); scan_term(); } else { get_next_token(); scan_term(); negate(); } } if (tos - h > 1) { list(tos - h); push_symbol(ADD); swap(); cons(); } } static int is_factor(void) { switch (token) { case '*': case '/': return 1; case '(': case T_SYMBOL: case T_FUNCTION: case T_INTEGER: case T_DOUBLE: case T_STRING: if (newline_flag) { // implicit mul can't cross line scan_str = token_str; // better error display return 0; } else return 1; default: break; } return 0; } static void scan_term(void) { int h = tos; scan_power(); // discard integer 1 if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1)) pop(); while (is_factor()) { if (token == '*') { get_next_token(); scan_power(); } else if (token == '/') { get_next_token(); scan_power(); inverse(); } else scan_power(); // fold constants if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1])) multiply(); // discard integer 1 if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1)) pop(); } if (h == tos) push_integer(1); else if (tos - h > 1) { list(tos - h); push_symbol(MULTIPLY); swap(); cons(); } } static void scan_power(void) { scan_factor(); if (token == '^') { get_next_token(); push_symbol(POWER); swap(); scan_power(); list(3); } } static void scan_factor(void) { int h; h = tos; if (token == '(') scan_subexpr(); else if (token == T_SYMBOL) scan_symbol(); else if (token == T_FUNCTION) scan_function_call(); else if (token == T_INTEGER) { bignum_scan_integer(token_buf); get_next_token(); } else if (token == T_DOUBLE) { bignum_scan_float(token_buf); get_next_token(); } else if (token == T_STRING) scan_string(); else error("syntax error"); // index if (token == '[') { get_next_token(); push_symbol(INDEX); swap(); scan_expression(); while (token == ',') { get_next_token(); scan_expression(); } if (token != ']') error("] expected"); get_next_token(); list(tos - h); } while (token == '!') { get_next_token(); push_symbol(FACTORIAL); swap(); list(2); } } static void scan_symbol(void) { if (token != T_SYMBOL) error("symbol expected"); if (meta_mode && strlen(token_buf) == 1) switch (token_buf[0]) { case 'a': push(symbol(METAA)); break; case 'b': push(symbol(METAB)); break; case 'x': push(symbol(METAX)); break; default: push(usr_symbol(token_buf)); break; } else push(usr_symbol(token_buf)); get_next_token(); } static void scan_string(void) { new_string(token_buf); get_next_token(); } static void scan_function_call(void) { int n = 1; U *p; p = usr_symbol(token_buf); if (p == symbol(SYMBOL_D)) push_symbol(DERIVATIVE); else push(p); get_next_token(); // function name get_next_token(); // left paren if (token != ')') { scan_stmt(); n++; while (token == ',') { get_next_token(); scan_stmt(); n++; } } if (token != ')') error(") expected"); get_next_token(); list(n); } // scan subexpression static void scan_subexpr(void) { int n; if (token != '(') error("( expected"); get_next_token(); scan_stmt(); if (token == ',') { n = 1; while (token == ',') { get_next_token(); scan_stmt(); n++; } build_tensor(n); } if (token != ')') error(") expected"); get_next_token(); } static void error(char *errmsg) { printchar('\n'); // try not to put question mark on orphan line while (input_str != scan_str) { if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str) break; printchar(*input_str++); } printstr(" ? "); while (*input_str && (*input_str != '\n' && *input_str != '\r')) printchar(*input_str++); printchar('\n'); stop(errmsg); } // There are n expressions on the stack, possibly tensors. // // This function assembles the stack expressions into a single tensor. // // For example, at the top level of the expression ((a,b),(c,d)), the vectors // (a,b) and (c,d) would be on the stack. void build_tensor(int n) { // int i, j, k, ndim, nelem; int i; U **s; save(); s = stack + tos - n; p2 = alloc_tensor(n); p2->u.tensor->ndim = 1; p2->u.tensor->dim[0] = n; for (i = 0; i < n; i++) p2->u.tensor->elem[i] = s[i]; tos -= n; push(p2); restore(); } static void get_next_token() { newline_flag = 0; while (1) { get_token(); if (token != T_NEWLINE) break; newline_flag = 1; } } static void get_token(void) { // skip spaces while (isspace(*scan_str)) { if (*scan_str == '\n' || *scan_str == '\r') { token = T_NEWLINE; scan_str++; return; } scan_str++; } token_str = scan_str; // end of string? if (*scan_str == 0) { token = 0; return; } // number? if (isdigit(*scan_str) || *scan_str == '.') { while (isdigit(*scan_str)) scan_str++; if (*scan_str == '.') { scan_str++; while (isdigit(*scan_str)) scan_str++; if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) { scan_str += 2; while (isdigit(*scan_str)) scan_str++; } token = T_DOUBLE; } else token = T_INTEGER; update_token_buf(token_str, scan_str); return; } // symbol? if (isalpha(*scan_str)) { while (isalnum(*scan_str)) scan_str++; if (*scan_str == '(') token = T_FUNCTION; else token = T_SYMBOL; update_token_buf(token_str, scan_str); return; } // string ? if (*scan_str == '"') { scan_str++; while (*scan_str != '"') { if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r') error("runaway string"); scan_str++; } scan_str++; token = T_STRING; update_token_buf(token_str + 1, scan_str - 1); return; } // comment? if (*scan_str == '#' || *scan_str == '-' && scan_str[1] == '-') { while (*scan_str && *scan_str != '\n' && *scan_str != '\r') scan_str++; if (*scan_str) scan_str++; token = T_NEWLINE; return; } // relational operator? if (*scan_str == '=' && scan_str[1] == '=') { scan_str += 2; token = T_EQ; return; } if (*scan_str == '<' && scan_str[1] == '=') { scan_str += 2; token = T_LTEQ; return; } if (*scan_str == '>' && scan_str[1] == '=') { scan_str += 2; token = T_GTEQ; return; } // single char token token = *scan_str++; } static void update_token_buf(char *a, char *b) { int n; if (token_buf) free(token_buf); n = (int) (b - a); token_buf = (char *) malloc(n + 1); if (token_buf == 0) stop("malloc failure"); strncpy(token_buf, a, n); token_buf[n] = 0; } static char *s[] = { "a^^b", "a^^ ? b\nStop: syntax error", "(a+b", "(a+b ? \nStop: ) expected", "quote(1/(x*log(a*x)))", // test case A "1/(x*log(a*x))", "\"hello", "\"hello ? \nStop: runaway string", #if 0 // does not work anymore because of "tensor expected" error // implicit mul cannot cross newline "a[b\nc]", "a[b ? \n] expected", "a[b*\nc]", "a[b*c]", "a[b\n*c]", "a[b*c]", #endif // make sure question mark can appear after newlines "a+\nb+\nc+", "a+\nb+\nc+ ? \nStop: syntax error", // this bug fixed in version 30 (used to give one result, 14) "2+2\n(3+3)", "4\n6", // plus and minus cannot cross newline "1\n-1", "1\n-1", "1\n+1", "1\n1", }; void test_scan(void) { test(__FILE__, s, sizeof s / sizeof (char *)); } // Notes: // // Formerly add() and multiply() were used to construct expressions but // this preevaluation caused problems. // // For example, suppose A has the floating point value inf. // // Before, the expression A/A resulted in 1 because the scanner would // divide the symbols. // // After removing add() and multiply(), A/A results in nan which is the // correct result. // // The functions negate() and inverse() are used but they do not cause // problems with preevaluation of symbols.