705 lines
11 KiB
C++
705 lines
11 KiB
C++
// This scanner uses the recursive descent method.
|
|
//
|
|
// The char pointers token_str and scan_str are pointers to the input string as
|
|
// in the following example.
|
|
//
|
|
// | g | a | m | m | a | | a | l | p | h | a |
|
|
// ^ ^
|
|
// token_str scan_str
|
|
//
|
|
// The char pointer token_buf points to a malloc buffer.
|
|
//
|
|
// | g | a | m | m | a | \0 |
|
|
// ^
|
|
// token_buf
|
|
|
|
#include "stdafx.h"
|
|
#include "defs.h"
|
|
|
|
#define T_INTEGER 1001
|
|
#define T_DOUBLE 1002
|
|
#define T_SYMBOL 1003
|
|
#define T_FUNCTION 1004
|
|
#define T_NEWLINE 1006
|
|
#define T_STRING 1007
|
|
#define T_GTEQ 1008
|
|
#define T_LTEQ 1009
|
|
#define T_EQ 1010
|
|
|
|
static void scan_expression(void);
|
|
static void scan_term(void);
|
|
static void scan_power(void);
|
|
static void scan_factor(void);
|
|
static void scan_symbol(void);
|
|
static void scan_function_call(void);
|
|
static void scan_subexpr(void);
|
|
static void get_next_token(void);
|
|
static void get_token(void);
|
|
static void scan_stmt(void);
|
|
static void scan_string(void);
|
|
static void scan_relation(void);
|
|
static void error(char *);
|
|
static void update_token_buf(char *, char *);
|
|
|
|
static int token, newline_flag;
|
|
static char *input_str, *scan_str, *token_str, *token_buf;
|
|
|
|
// Returns number of chars scanned and expr on stack.
|
|
|
|
// Returns zero when nothing left to scan.
|
|
|
|
int
|
|
scan(char *s)
|
|
{
|
|
int x;
|
|
x = expanding;
|
|
expanding = 1;
|
|
input_str = s;
|
|
scan_str = s;
|
|
get_next_token();
|
|
if (token == 0) {
|
|
push(nil);
|
|
expanding = x;
|
|
return 0;
|
|
}
|
|
scan_stmt();
|
|
expanding = x;
|
|
return (int) (token_str - input_str);
|
|
}
|
|
|
|
static void
|
|
scan_stmt(void)
|
|
{
|
|
scan_relation();
|
|
if (token == '=') {
|
|
get_next_token();
|
|
push_symbol(SETQ);
|
|
swap();
|
|
scan_relation();
|
|
list(3);
|
|
}
|
|
}
|
|
|
|
static void
|
|
scan_relation(void)
|
|
{
|
|
scan_expression();
|
|
switch (token) {
|
|
case T_EQ:
|
|
push_symbol(TESTEQ);
|
|
swap();
|
|
get_next_token();
|
|
scan_expression();
|
|
list(3);
|
|
break;
|
|
case T_LTEQ:
|
|
push_symbol(TESTLE);
|
|
swap();
|
|
get_next_token();
|
|
scan_expression();
|
|
list(3);
|
|
break;
|
|
case T_GTEQ:
|
|
push_symbol(TESTGE);
|
|
swap();
|
|
get_next_token();
|
|
scan_expression();
|
|
list(3);
|
|
break;
|
|
case '<':
|
|
push_symbol(TESTLT);
|
|
swap();
|
|
get_next_token();
|
|
scan_expression();
|
|
list(3);
|
|
break;
|
|
case '>':
|
|
push_symbol(TESTGT);
|
|
swap();
|
|
get_next_token();
|
|
scan_expression();
|
|
list(3);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
scan_expression(void)
|
|
{
|
|
int h = tos;
|
|
switch (token) {
|
|
case '+':
|
|
get_next_token();
|
|
scan_term();
|
|
break;
|
|
case '-':
|
|
get_next_token();
|
|
scan_term();
|
|
negate();
|
|
break;
|
|
default:
|
|
scan_term();
|
|
break;
|
|
}
|
|
while (newline_flag == 0 && (token == '+' || token == '-')) {
|
|
if (token == '+') {
|
|
get_next_token();
|
|
scan_term();
|
|
} else {
|
|
get_next_token();
|
|
scan_term();
|
|
negate();
|
|
}
|
|
}
|
|
if (tos - h > 1) {
|
|
list(tos - h);
|
|
push_symbol(ADD);
|
|
swap();
|
|
cons();
|
|
}
|
|
}
|
|
|
|
static int
|
|
is_factor(void)
|
|
{
|
|
switch (token) {
|
|
case '*':
|
|
case '/':
|
|
return 1;
|
|
case '(':
|
|
case T_SYMBOL:
|
|
case T_FUNCTION:
|
|
case T_INTEGER:
|
|
case T_DOUBLE:
|
|
case T_STRING:
|
|
if (newline_flag) { // implicit mul can't cross line
|
|
scan_str = token_str; // better error display
|
|
return 0;
|
|
} else
|
|
return 1;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
scan_term(void)
|
|
{
|
|
int h = tos;
|
|
|
|
scan_power();
|
|
|
|
// for test case A
|
|
// added is_factor() for test case sin(1.0), have to keep double
|
|
|
|
if (is_factor() && tos > h && isplusone(stack[tos - 1]))
|
|
pop();
|
|
|
|
while (is_factor()) {
|
|
if (token == '*') {
|
|
get_next_token();
|
|
scan_power();
|
|
} else if (token == '/') {
|
|
get_next_token();
|
|
scan_power();
|
|
inverse();
|
|
} else
|
|
scan_power();
|
|
|
|
// fold constants
|
|
|
|
if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
|
|
multiply();
|
|
|
|
if (tos > h && isplusone(stack[tos - 1]))
|
|
pop();
|
|
}
|
|
|
|
if (h == tos)
|
|
push(_one);
|
|
else if (tos - h > 1) {
|
|
list(tos - h);
|
|
push_symbol(MULTIPLY);
|
|
swap();
|
|
cons();
|
|
}
|
|
}
|
|
|
|
static void
|
|
scan_power(void)
|
|
{
|
|
scan_factor();
|
|
if (token == '^') {
|
|
get_next_token();
|
|
push_symbol(POWER);
|
|
swap();
|
|
scan_power();
|
|
list(3);
|
|
}
|
|
}
|
|
|
|
static void
|
|
scan_factor(void)
|
|
{
|
|
int h;
|
|
|
|
h = tos;
|
|
|
|
if (token == '(')
|
|
scan_subexpr();
|
|
else if (token == T_SYMBOL)
|
|
scan_symbol();
|
|
else if (token == T_FUNCTION)
|
|
scan_function_call();
|
|
else if (token == T_INTEGER) {
|
|
scan_integer(token_buf);
|
|
get_next_token();
|
|
} else if (token == T_DOUBLE) {
|
|
scan_float(token_buf);
|
|
get_next_token();
|
|
} else if (token == T_STRING)
|
|
scan_string();
|
|
else
|
|
error(NULL);
|
|
|
|
// index
|
|
|
|
if (token == '[') {
|
|
get_next_token();
|
|
push_symbol(INDEX);
|
|
swap();
|
|
scan_expression();
|
|
while (token == ',') {
|
|
get_next_token();
|
|
scan_expression();
|
|
}
|
|
if (token != ']')
|
|
error("] expected");
|
|
get_next_token();
|
|
list(tos - h);
|
|
}
|
|
|
|
while (token == '!') {
|
|
get_next_token();
|
|
push_symbol(FACTORIAL);
|
|
swap();
|
|
list(2);
|
|
}
|
|
}
|
|
|
|
static void
|
|
scan_symbol(void)
|
|
{
|
|
if (token != T_SYMBOL)
|
|
error("symbol expected");
|
|
push(get_symbol(token_buf));
|
|
get_next_token();
|
|
}
|
|
|
|
static void
|
|
scan_string(void)
|
|
{
|
|
new_string(token_buf);
|
|
get_next_token();
|
|
}
|
|
|
|
static void
|
|
scan_function_call(void)
|
|
{
|
|
int n = 1;
|
|
U *p;
|
|
p = get_symbol(token_buf);
|
|
if (p == symbol(SYMBOL_D))
|
|
push_symbol(DERIVATIVE);
|
|
else
|
|
push(p);
|
|
get_next_token(); // function name
|
|
get_next_token(); // left paren
|
|
if (token != ')') {
|
|
scan_stmt();
|
|
n++;
|
|
while (token == ',') {
|
|
get_next_token();
|
|
scan_stmt();
|
|
n++;
|
|
}
|
|
}
|
|
if (token != ')')
|
|
error(") expected");
|
|
get_next_token();
|
|
list(n);
|
|
}
|
|
|
|
// scan subexpression
|
|
|
|
static void
|
|
scan_subexpr(void)
|
|
{
|
|
int n;
|
|
if (token != '(')
|
|
error("( expected");
|
|
get_next_token();
|
|
scan_stmt();
|
|
if (token == ',') {
|
|
n = 1;
|
|
while (token == ',') {
|
|
get_next_token();
|
|
scan_stmt();
|
|
n++;
|
|
}
|
|
build_tensor(n);
|
|
}
|
|
if (token != ')')
|
|
error(") expected");
|
|
get_next_token();
|
|
}
|
|
|
|
static void
|
|
error(char *errmsg)
|
|
{
|
|
printchar('\n');
|
|
|
|
// try not to put question mark on orphan line
|
|
|
|
while (input_str != scan_str) {
|
|
if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str)
|
|
break;
|
|
printchar(*input_str++);
|
|
}
|
|
|
|
printstr(" ? ");
|
|
|
|
while (*input_str && (*input_str != '\n' && *input_str != '\r'))
|
|
printchar(*input_str++);
|
|
|
|
printchar('\n');
|
|
|
|
stop(errmsg);
|
|
}
|
|
|
|
// There are n expressions on the stack, possibly tensors.
|
|
//
|
|
// This function assembles the stack expressions into a single tensor.
|
|
//
|
|
// For example, at the top level of the expression ((a,b),(c,d)), the vectors
|
|
// (a,b) and (c,d) would be on the stack.
|
|
|
|
void
|
|
build_tensor(int n)
|
|
{
|
|
// int i, j, k, ndim, nelem;
|
|
|
|
int i;
|
|
|
|
U **s;
|
|
|
|
save();
|
|
|
|
s = stack + tos - n;
|
|
|
|
#if 0 // used to do this in scanner, now do tensor promotion in evaluator
|
|
|
|
//---------------------------------------------------------------------
|
|
//
|
|
// all of the expressions should have the same dimensions
|
|
//
|
|
//---------------------------------------------------------------------
|
|
|
|
p1 = s[0];
|
|
|
|
for (i = 1; i < n; i++) {
|
|
p2 = s[i];
|
|
if (!consistent(p1, p2))
|
|
error("inconsistent dimension");
|
|
}
|
|
|
|
if (istensor(p1)) {
|
|
|
|
//-------------------------------------------------------------
|
|
//
|
|
// these are the dimensions of the new tensor
|
|
//
|
|
//-------------------------------------------------------------
|
|
|
|
ndim = p1->u.tensor->ndim + 1;
|
|
|
|
if (ndim >= MAXDIM)
|
|
error("too many indices");
|
|
|
|
nelem = n * p1->u.tensor->nelem;
|
|
|
|
p2 = alloc_tensor(nelem);
|
|
|
|
p2->u.tensor->ndim = ndim;
|
|
|
|
p2->u.tensor->dim[0] = n;
|
|
|
|
for (i = 0; i < p1->u.tensor->ndim; i++)
|
|
p2->u.tensor->dim[i + 1] = p1->u.tensor->dim[i];
|
|
|
|
k = 0;
|
|
for (i = 0; i < n; i++)
|
|
for (j = 0; j < p1->u.tensor->nelem; j++)
|
|
p2->u.tensor->elem[k++] = s[i]->u.tensor->elem[j];
|
|
|
|
} else {
|
|
|
|
p2 = alloc_tensor(n);
|
|
|
|
p2->u.tensor->ndim = 1;
|
|
|
|
p2->u.tensor->dim[0] = n;
|
|
|
|
for (i = 0; i < n; i++)
|
|
p2->u.tensor->elem[i] = s[i];
|
|
}
|
|
#else
|
|
p2 = alloc_tensor(n);
|
|
p2->u.tensor->ndim = 1;
|
|
p2->u.tensor->dim[0] = n;
|
|
for (i = 0; i < n; i++)
|
|
p2->u.tensor->elem[i] = s[i];
|
|
#endif
|
|
|
|
tos -= n;
|
|
|
|
push(p2);
|
|
|
|
restore();
|
|
}
|
|
|
|
int
|
|
consistent(U *p1, U *p2)
|
|
{
|
|
int i;
|
|
|
|
if (istensor(p1) && istensor(p2)) {
|
|
if (p1->u.tensor->ndim != p2->u.tensor->ndim)
|
|
return 0;
|
|
for (i = 0; i < p1->u.tensor->ndim; i++)
|
|
if (p1->u.tensor->dim[i] != p2->u.tensor->dim[i])
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
if (istensor(p1) || istensor(p2))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
get_next_token()
|
|
{
|
|
newline_flag = 0;
|
|
while (1) {
|
|
get_token();
|
|
if (token != T_NEWLINE)
|
|
break;
|
|
newline_flag = 1;
|
|
}
|
|
}
|
|
|
|
static void
|
|
get_token(void)
|
|
{
|
|
// skip spaces
|
|
|
|
while (isspace(*scan_str)) {
|
|
if (*scan_str == '\n' || *scan_str == '\r') {
|
|
token = T_NEWLINE;
|
|
scan_str++;
|
|
return;
|
|
}
|
|
scan_str++;
|
|
}
|
|
|
|
token_str = scan_str;
|
|
|
|
// end of string?
|
|
|
|
if (*scan_str == 0) {
|
|
token = 0;
|
|
return;
|
|
}
|
|
|
|
// number?
|
|
|
|
if (isdigit(*scan_str) || *scan_str == '.') {
|
|
while (isdigit(*scan_str))
|
|
scan_str++;
|
|
if (*scan_str == '.') {
|
|
scan_str++;
|
|
while (isdigit(*scan_str))
|
|
scan_str++;
|
|
if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) {
|
|
scan_str += 2;
|
|
while (isdigit(*scan_str))
|
|
scan_str++;
|
|
}
|
|
token = T_DOUBLE;
|
|
} else
|
|
token = T_INTEGER;
|
|
update_token_buf(token_str, scan_str);
|
|
return;
|
|
}
|
|
|
|
// symbol?
|
|
|
|
if (isalpha(*scan_str)) {
|
|
while (isalnum(*scan_str))
|
|
scan_str++;
|
|
if (*scan_str == '(')
|
|
token = T_FUNCTION;
|
|
else
|
|
token = T_SYMBOL;
|
|
update_token_buf(token_str, scan_str);
|
|
return;
|
|
}
|
|
|
|
// string ?
|
|
|
|
if (*scan_str == '"') {
|
|
scan_str++;
|
|
while (*scan_str != '"') {
|
|
if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r')
|
|
error("\" expected");
|
|
scan_str++;
|
|
}
|
|
scan_str++;
|
|
token = T_STRING;
|
|
update_token_buf(token_str + 1, scan_str - 1);
|
|
return;
|
|
}
|
|
|
|
// comment?
|
|
|
|
if (*scan_str == '#') {
|
|
while (*scan_str && *scan_str != '\n' && *scan_str != '\r')
|
|
scan_str++;
|
|
if (*scan_str)
|
|
scan_str++;
|
|
token = T_NEWLINE;
|
|
return;
|
|
}
|
|
|
|
// relational operator?
|
|
|
|
if (*scan_str == '=' && scan_str[1] == '=') {
|
|
scan_str += 2;
|
|
token = T_EQ;
|
|
return;
|
|
}
|
|
|
|
if (*scan_str == '<' && scan_str[1] == '=') {
|
|
scan_str += 2;
|
|
token = T_LTEQ;
|
|
return;
|
|
}
|
|
|
|
if (*scan_str == '>' && scan_str[1] == '=') {
|
|
scan_str += 2;
|
|
token = T_GTEQ;
|
|
return;
|
|
}
|
|
|
|
// single char token
|
|
|
|
token = *scan_str++;
|
|
}
|
|
|
|
static void
|
|
update_token_buf(char *a, char *b)
|
|
{
|
|
int n;
|
|
|
|
if (token_buf)
|
|
free(token_buf);
|
|
|
|
n = (int) (b - a);
|
|
|
|
token_buf = (char *) malloc(n + 1);
|
|
|
|
if (token_buf == 0)
|
|
stop("malloc failure");
|
|
|
|
strncpy(token_buf, a, n);
|
|
|
|
token_buf[n] = 0;
|
|
}
|
|
|
|
static char *s[] = {
|
|
|
|
"a^^b",
|
|
"a^^ ? b",
|
|
|
|
"(a+b",
|
|
"(a+b ? \n"
|
|
") expected",
|
|
|
|
"quote(1/(x*log(a*x)))", // test case A
|
|
"1/(x*log(a*x))",
|
|
|
|
"\"hello",
|
|
"\"hello ? \n\" expected",
|
|
|
|
#if 0 // does not work anymore because of "tensor expected" error
|
|
|
|
// implicit mul cannot cross newline
|
|
|
|
"a[b\nc]",
|
|
"a[b ? \n] expected",
|
|
|
|
"a[b*\nc]",
|
|
"a[b*c]",
|
|
|
|
"a[b\n*c]",
|
|
"a[b*c]",
|
|
|
|
#endif
|
|
|
|
// make sure question mark can appear after newlines
|
|
|
|
"a+\nb+\nc+",
|
|
"a+\nb+\nc+ ? ",
|
|
|
|
// this bug fixed in version 30 (used to give one result, 14)
|
|
|
|
"2+2\n(3+3)",
|
|
"4\n6",
|
|
|
|
// plus and minus cannot cross newline
|
|
|
|
"1\n-1",
|
|
"1\n-1",
|
|
|
|
"1\n+1",
|
|
"1\n1",
|
|
};
|
|
|
|
void
|
|
test_scan(void)
|
|
{
|
|
test(__FILE__, s, sizeof s / sizeof (char *));
|
|
}
|
|
|
|
// Notes:
|
|
//
|
|
// Formerly add() and multiply() were used to construct expressions but
|
|
// this preevaluation caused problems.
|
|
//
|
|
// For example, suppose A has the floating point value inf.
|
|
//
|
|
// Before, the expression A/A resulted in 1 because the scanner would
|
|
// divide the symbols.
|
|
//
|
|
// After removing add() and multiply(), A/A results in nan which is the
|
|
// correct result.
|
|
//
|
|
// The functions negate() and inverse() are used but they do not cause
|
|
// problems with preevaluation of symbols.
|
|
|