eigenmath/scan.cpp

// This scanner uses the recursive descent method.
//
// The char pointers token_str and scan_str are pointers to the input string as
// in the following example.
//
//	| g | a | m | m | a |   | a | l | p | h | a |
//	  ^                   ^
//	  token_str           scan_str
//
// The char pointer token_buf points to a malloc buffer.
//
//	| g | a | m | m | a | \0 |
//	  ^
//	  token_buf

#include "stdafx.h"
#include "defs.h"

#define T_INTEGER 1001
#define T_DOUBLE 1002
#define T_SYMBOL 1003
#define T_FUNCTION 1004
#define T_NEWLINE 1006
#define T_STRING 1007
#define T_GTEQ 1008
#define T_LTEQ 1009
#define T_EQ 1010

static void scan_expression(void);
static void scan_term(void);
static void scan_power(void);
static void scan_factor(void);
static void scan_symbol(void);
static void scan_function_call(void);
static void scan_subexpr(void);
static void get_next_token(void);
static void get_token(void);
static void scan_stmt(void);
static void scan_string(void);
static void scan_relation(void);
static void error(char *);
static void update_token_buf(char *, char *);

static int token, newline_flag;
static char *input_str, *scan_str, *token_str, *token_buf;

// Returns number of chars scanned and expr on stack.

// Returns zero when nothing left to scan.

int
scan(char *s)
{
	int x;
	x = expanding;
	expanding = 1;
	input_str = s;
	scan_str = s;
	get_next_token();
	if (token == 0) {
		push(nil);
		expanding = x;
		return 0;
	}
	scan_stmt();
	expanding = x;
	return (int) (token_str - input_str);
}

static void
scan_stmt(void)
{
	scan_relation();
	if (token == '=') {
		get_next_token();
		push_symbol(SETQ);
		swap();
		scan_relation();
		list(3);
	}
}

static void
scan_relation(void)
{
	scan_expression();
	switch (token) {
	case T_EQ:
		push_symbol(TESTEQ);
		swap();
		get_next_token();
		scan_expression();
		list(3);
		break;
	case T_LTEQ:
		push_symbol(TESTLE);
		swap();
		get_next_token();
		scan_expression();
		list(3);
		break;
	case T_GTEQ:
		push_symbol(TESTGE);
		swap();
		get_next_token();
		scan_expression();
		list(3);
		break;
	case '<':
		push_symbol(TESTLT);
		swap();
		get_next_token();
		scan_expression();
		list(3);
		break;
	case '>':
		push_symbol(TESTGT);
		swap();
		get_next_token();
		scan_expression();
		list(3);
		break;
	default:
		break;
	}
}

static void
scan_expression(void)
{
	int h = tos;
	switch (token) {
	case '+':
		get_next_token();
		scan_term();
		break;
	case '-':
		get_next_token();
		scan_term();
		negate();
		break;
	default:
		scan_term();
		break;
	}
	while (newline_flag == 0 && (token == '+' || token == '-')) {
		if (token == '+') {
			get_next_token();
			scan_term();
		} else {
			get_next_token();
			scan_term();
			negate();
		}
	}
	if (tos - h > 1) {
		list(tos - h);
		push_symbol(ADD);
		swap();
		cons();
	}
}

static int
is_factor(void)
{
	switch (token) {
	case '*':
	case '/':
		return 1;
	case '(':
	case T_SYMBOL:
	case T_FUNCTION:
	case T_INTEGER:
	case T_DOUBLE:
	case T_STRING:
		if (newline_flag) {	// implicit mul can't cross line
			scan_str = token_str;	// better error display
			return 0;
		} else
			return 1;
	default:
		break;
	}
	return 0;
}

static void
scan_term(void)
{
	int h = tos;

	scan_power();

	// for test case A
	// added is_factor() for test case sin(1.0), have to keep double

	if (is_factor() && tos > h && isplusone(stack[tos - 1]))
		pop();

	while (is_factor()) {
		if (token == '*') {
			get_next_token();
			scan_power();
		} else if (token == '/') {
			get_next_token();
			scan_power();
			inverse();
		} else
			scan_power();

		// fold constants

		if (tos > h + 1 && isnum(stack[tos - 2]) && isnum(stack[tos - 1]))
			multiply();

		if (tos > h && isplusone(stack[tos - 1]))
			pop();
	}

	if (h == tos)
		push(_one);
	else if (tos - h > 1) {
		list(tos - h);
		push_symbol(MULTIPLY);
		swap();
		cons();
	}
}

static void
scan_power(void)
{
	scan_factor();
	if (token == '^') {
		get_next_token();
		push_symbol(POWER);
		swap();
		scan_power();
		list(3);
	}
}

static void
scan_factor(void)
{
	int h;

	h = tos;

	if (token == '(')
		scan_subexpr();
	else if (token == T_SYMBOL)
		scan_symbol();
	else if (token == T_FUNCTION)
		scan_function_call();
	else if (token == T_INTEGER) {
		scan_integer(token_buf);
		get_next_token();
	} else if (token == T_DOUBLE) {
		scan_float(token_buf);
		get_next_token();
	} else if (token == T_STRING)
		scan_string();
	else
		error(NULL);

	// index

	if (token == '[') {
		get_next_token();
		push_symbol(INDEX);
		swap();
		scan_expression();
		while (token == ',') {
			get_next_token();
			scan_expression();
		}
		if (token != ']')
			error("] expected");
		get_next_token();
		list(tos - h);
	}

	while (token == '!') {
		get_next_token();
		push_symbol(FACTORIAL);
		swap();
		list(2);
	}
}

static void
scan_symbol(void)
{
	if (token != T_SYMBOL)
		error("symbol expected");
	push(get_symbol(token_buf));
	get_next_token();
}

static void
scan_string(void)
{
	new_string(token_buf);
	get_next_token();
}

static void
scan_function_call(void)
{
	int n = 1;
	U *p;
	p = get_symbol(token_buf);
	if (p == symbol(SYMBOL_D))
		push_symbol(DERIVATIVE);
	else
		push(p);
	get_next_token();	// function name
	get_next_token();	// left paren
	if (token != ')') {
		scan_stmt();
		n++;
		while (token == ',') {
			get_next_token();
			scan_stmt();
			n++;
		}
	}
	if (token != ')')
		error(") expected");
	get_next_token();
	list(n);
}

// scan subexpression

static void
scan_subexpr(void)
{
	int n;
	if (token != '(')
		error("( expected");
	get_next_token();
	scan_stmt();
	if (token == ',') {
		n = 1;
		while (token == ',') {
			get_next_token();
			scan_stmt();
			n++;
		}
		build_tensor(n);
	}
	if (token != ')')
		error(") expected");
	get_next_token();
}

static void
error(char *errmsg)
{
	printchar('\n');

	// try not to put question mark on orphan line

	while (input_str != scan_str) {
		if ((*input_str == '\n' || *input_str == '\r') && input_str + 1 == scan_str)
			break;
		printchar(*input_str++);
	}

	printstr(" ? ");

	while (*input_str && (*input_str != '\n' && *input_str != '\r'))
		printchar(*input_str++);

	printchar('\n');

	stop(errmsg);
}

// There are n expressions on the stack, possibly tensors.
//
// This function assembles the stack expressions into a single tensor.
//
// For example, at the top level of the expression ((a,b),(c,d)), the vectors
// (a,b) and (c,d) would be on the stack.

void
build_tensor(int n)
{
	// int i, j, k, ndim, nelem;

	int i;

	U **s;

	save();

	s = stack + tos - n;

#if 0	// used to do this in scanner, now do tensor promotion in evaluator

	//---------------------------------------------------------------------
	//
	//	all of the expressions should have the same dimensions
	//
	//---------------------------------------------------------------------

	p1 = s[0];

	for (i = 1; i < n; i++) {
		p2 = s[i];
		if (!consistent(p1, p2))
			error("inconsistent dimension");
	}

	if (istensor(p1)) {

		//-------------------------------------------------------------
		//
		//	these are the dimensions of the new tensor
		//
		//-------------------------------------------------------------

		ndim = p1->u.tensor->ndim + 1;

		if (ndim >= MAXDIM)
			error("too many indices");

		nelem = n * p1->u.tensor->nelem;

		p2 = alloc_tensor(nelem);

		p2->u.tensor->ndim = ndim;

		p2->u.tensor->dim[0] = n;

		for (i = 0; i < p1->u.tensor->ndim; i++)
			p2->u.tensor->dim[i + 1] = p1->u.tensor->dim[i];

		k = 0;
		for (i = 0; i < n; i++)
			for (j = 0; j < p1->u.tensor->nelem; j++)
				p2->u.tensor->elem[k++] = s[i]->u.tensor->elem[j];

	} else {

		p2 = alloc_tensor(n);

		p2->u.tensor->ndim = 1;

		p2->u.tensor->dim[0] = n;

		for (i = 0; i < n; i++)
			p2->u.tensor->elem[i] = s[i];
	}
#else
	p2 = alloc_tensor(n);
	p2->u.tensor->ndim = 1;
	p2->u.tensor->dim[0] = n;
	for (i = 0; i < n; i++)
		p2->u.tensor->elem[i] = s[i];
#endif

	tos -= n;

	push(p2);

	restore();
}

int
consistent(U *p1, U *p2)
{
	int i;

	if (istensor(p1) && istensor(p2)) {
		if (p1->u.tensor->ndim != p2->u.tensor->ndim)
			return 0;
		for (i = 0; i < p1->u.tensor->ndim; i++)
			if (p1->u.tensor->dim[i] != p2->u.tensor->dim[i])
				return 0;
		return 1;
	}

	if (istensor(p1) || istensor(p2))
		return 0;

	return 1;
}

static void
get_next_token()
{
	newline_flag = 0;
	while (1) {
		get_token();
		if (token != T_NEWLINE)
			break;
		newline_flag = 1;
	}
}

static void
get_token(void)
{
	// skip spaces

	while (isspace(*scan_str)) {
		if (*scan_str == '\n' || *scan_str == '\r') {
			token = T_NEWLINE;
			scan_str++;
			return;
		}
		scan_str++;
	}

	token_str = scan_str;

	// end of string?

	if (*scan_str == 0) {
		token = 0;
		return;
	}

	// number?

	if (isdigit(*scan_str) || *scan_str == '.') {
		while (isdigit(*scan_str))
			scan_str++;
		if (*scan_str == '.') {
			scan_str++;
			while (isdigit(*scan_str))
				scan_str++;
			if (*scan_str == 'e' && (scan_str[1] == '+' || scan_str[1] == '-' || isdigit(scan_str[1]))) {
				scan_str += 2;
				while (isdigit(*scan_str))
					scan_str++;
			}
			token = T_DOUBLE;
		} else
			token = T_INTEGER;
		update_token_buf(token_str, scan_str);
		return;
	}

	// symbol?

	if (isalpha(*scan_str)) {
		while (isalnum(*scan_str))
			scan_str++;
		if (*scan_str == '(')
			token = T_FUNCTION;
		else
			token = T_SYMBOL;
		update_token_buf(token_str, scan_str);
		return;
	}

	// string ?

	if (*scan_str == '"') {
		scan_str++;
		while (*scan_str != '"') {
			if (*scan_str == 0 || *scan_str == '\n' || *scan_str == '\r')
				error("\" expected");
			scan_str++;
		}
		scan_str++;
		token = T_STRING;
		update_token_buf(token_str + 1, scan_str - 1);
		return;
	}

	// comment?

	if (*scan_str == '#') {
		while (*scan_str && *scan_str != '\n' && *scan_str != '\r')
			scan_str++;
		if (*scan_str)
			scan_str++;
		token = T_NEWLINE;
		return;
	}

	// relational operator?

	if (*scan_str == '=' && scan_str[1] == '=') {
		scan_str += 2;
		token = T_EQ;
		return;
	}

	if (*scan_str == '<' && scan_str[1] == '=') {
		scan_str += 2;
		token = T_LTEQ;
		return;
	}

	if (*scan_str == '>' && scan_str[1] == '=') {
		scan_str += 2;
		token = T_GTEQ;
		return;
	}

	// single char token

	token = *scan_str++;
}

static void
update_token_buf(char *a, char *b)
{
	int n;

	if (token_buf)
		free(token_buf);

	n = (int) (b - a);

	token_buf = (char *) malloc(n + 1);

	if (token_buf == 0)
		stop("malloc failure");

	strncpy(token_buf, a, n);

	token_buf[n] = 0;
}

static char *s[] = {

	"a^^b",
	"a^^ ? b",

	"(a+b",
	"(a+b ? \n"
	") expected",

	"quote(1/(x*log(a*x)))",	// test case A
	"1/(x*log(a*x))",

	"\"hello",
	"\"hello ? \n\" expected",

#if 0 // does not work anymore because of "tensor expected" error

	// implicit mul cannot cross newline

	"a[b\nc]",
	"a[b ? \n] expected",

	"a[b*\nc]",
	"a[b*c]",

	"a[b\n*c]",
	"a[b*c]",

#endif

	// make sure question mark can appear after newlines

	"a+\nb+\nc+",
	"a+\nb+\nc+ ? ",

	// this bug fixed in version 30 (used to give one result, 14)

	"2+2\n(3+3)",
	"4\n6",

	// plus and minus cannot cross newline

	"1\n-1",
	"1\n-1",

	"1\n+1",
	"1\n1",
};

void
test_scan(void)
{
	test(__FILE__, s, sizeof s / sizeof (char *));
}

//	Notes:
//
//	Formerly add() and multiply() were used to construct expressions but
//	this preevaluation caused problems.
//
//	For example, suppose A has the floating point value inf.
//
//	Before, the expression A/A resulted in 1 because the scanner would
//	divide the symbols.
//
//	After removing add() and multiply(), A/A results in nan which is the
//	correct result.
//
//	The functions negate() and inverse() are used but they do not cause
//	problems with preevaluation of symbols.