Jul 8

THE LITTLE C INTERPRETER

表达式剖析器(THE EXPRESSION PARSER)

   读取和分析表达式的这部分代码叫做表达式剖析器。毫无疑问,表达式剖析器是C解释器中单一的最重要的部分。因为C语言定义表达式的方式比其他语言更加粗鄙,所以用大量的代码组成的C源文件来实现表达式剖析器。

   有几种不同的方式来设计C的表达式剖析器。许多商业的编译器用一种由parser-generator创建的table-driven parser。尽管table-driven parser一般来说要快过其他方式,但却很难手工构建。为了开发简易的C解释器,在这里我们使用递归-继承剖析器(recursive-descent parser.)
  
   一个递归-继承剖析器本质上是一大堆处理表达式的互相递归的函数。如果是在编译器里,那么剖析器将被用来生成与源码相符的标准的目标代码。无论如何,在解释器中,剖析器的目的就是对给定的表达式求值。
  
将源代码变成他们的组成元素
 
   所有解释器的基础是一个读取源码然后返回下一个逻辑符号的特殊的函数。基于历史原因,这些逻辑标记一般与标记(token)相关联。一般而言,计算机语言,特别是C语言,依据标记来定义程序。你可以想象标记是一个不可见的程序单元。比如说,相等运算符==就是一个标记。这两个等于号分开后意义将彻底改变。同理,if 也是一个标记。在C语言中无论是i还是f都没有其他意思。
  
   在ANSI C标准中,标记被定义为属于下列几组:
   keywords    identifiers    constants   
   strings    operators    pinctuation
  
   keywords(关键字)是那些如while的构成C语言的标记。Identifier(识别符)是变量、函数、用户定义类型的名字。而Constants(常量)和 Strings(字符串)是不解自明的,就像operators(运算符)一样。最后Punctuation(标点符号)包括了几个项目,像分号,逗号,大括号,小括号等。
  
   在简易C解释器中,这个被称作get_token()的函数可以从源码中读取标记。

 

/*Get a token*/
get_token()
{
    register char *temp;
    token_type=0;tok=0;
    temp=token;
    *temp='\0';

    /* skip over white space*/
    while(iswhite(*prog)&&*prog)
        ++prog;

    if (*prog=='\r')
    {
        ++prog;
        ++prog;
        while (iswhite(*prog)&&*prog)
            ++prog;
    }

    if (*prog=='\0')
    {
        *token='\0';
        tok=FINISHED;
        return (token_type=DELIMITER);
    }

    if (strchr("{}",*prog))
    {
        *temp=*prog;
        temp++;
        *temp='\0';
        prog++;
        return (token_type=BLOCK);
    }

    /* look for comments*/
    if (*prog=='/')
        if (*(prog+1)=='*')
        {
            prog+=2;
            do
            {
                while(*prog!='*') prog++;
                prog++;
            }while (*prog!='/');
            prog++;
        }

    if (strchr("!<>=",*prog))
    {
        switch (*prog)
        {
            case '=':
                if (*(prog+1)=='=')
                {
                    prog++;prog++;
                    *temp=EQ;
                    temp++; *temp=EQ; temp++;
                    *temp='\0';
                }
                break;
            case '!':
                if (*(prog+1)=='=')
                {
                    prog++;prog++;
                    *temp=NE;
                    temp++; *temp=NE; temp++;
                    *temp='\0';
                }
                break;
            case '<':
                if (*(prog+1)=='=')
                {
                    prog++;prog++;
                    *temp=LE;temp++;*temp=LE;
                }
                else
                {
                    prog++;
                    *temp=LT;
                }
                temp++;
                *temp='\0';
                break;
            case '>':
                if (*(prog+1)=='=')
                {
                    prog++;prog++;
                    *temp=GE;temp++;*temp=GE;
                }
                else
                {
                    prog++;
                    *temp=GT;
                }
                temp++;
                *temp='\0';
                break;
        }
        if (*token) return(token_type=DELIMITER);
    }

    if (strchr("+-*^/%=;(),'",*prog))
    {
        *temp=*prog;
        prog++;
        temp++;
        *temp='\0';
        return (token_type=DELIMITER);
    }

    if (*prog=='"')
    {
        prog++;
        while (*prog!='"' && *prog!='\r')
            *temp++=*prog++;
        if (*prog=='\r') sntx_err(SYNTAX);
        prog++;*temp='\0';
        return (token_type=STRING);
    }

    if (isdigit(*prog))
    {
        while(!isdelim(*prog))
            *temp++=*prog++;
        *temp='\0';
        return(token_type=NUMBER);
    }

    if(isalpha(*prog))
    {
        while (!isdelim(*prog))
            *temp++=*prog++;
        token_type=TEMP;
    }

    *temp='\0';

    /*see if a string is a command or variable*/
    if (token_type==TEMP)
    {
        tok=look_up(token);
        if(tok) token_type=KEYWORD;
        else token_type=IDENTIFIER;
    }
    return token_type;
}

get_token()函数用到了以下的全局变量和枚举类型:

 

extern char *prog;
extern char *p_buf;
extern char token[80];
extern char token_type;
extern char tok;


enum tok_types {DELIMITER,IDENTIFIER,NUMBER,
                KEYWORD,TEMP,STRING,BLOCK};
				
enum double_ops {LT=1,LE,GT,GE,EQ,NE};

enum error_msg{
    SYNTAX,UNBAL_PARENS,NO_EXP,EQUALS_EXPECTED,
    NOT_VAR,PARAM_ERR,SEMI_EXPECTED,
    UNBAL_BRACES,FUNC_UNDEF,TYPE_EXPECTED,
    NEST_FUNC,RET_NOCALL,PAREN_EXPECTED,
    WHILE_EXPECTED,QUOTE_EXPECTED,NOT_TEMP,
    TOO_MANY_LVARS};

   在源码中当前位置由prog指定。p_buf指针无法被解释器所改变而且经常指向正在被解析的程序的开头。get_token()函数开始跳过所有的空白,包括回车和空行。直到没有C标记包含空格,所以的空格必须被跳过。当然get_token()函数也要跳过注释。接下来,将每个标记的字符串表现形式归类到token,它的类型用token_type表达,而且,如果标记是一个关键字,它的内部表示将通过look_up()函数分配给tok。如你所见的get_token()函数,它将C的双字关系运算符转换成对应的枚举值。虽然不是技术需要,但这一步可以简化parser剖析器的实现。最后,如果parser剖析器遇到语法错误,将会调用sntx_err()函数。sntx_err()函数如下:
   

 

/*Display an error message*/
void sntx_err(int error)
{
    char *p,*temp;
    int linecount=0;
    register int i;

    static char *e[]={
        "Syntax error",
        "unbalanced parentheses",
        "no expression present",
        "equals sign expected",
        "not a variable",
        "parameter error",
        "semicolon expected",
        "unbalanced braces",
        "function undefined",
        "type specifier expected",
        "too many nested function calls",
        "return without call",
        "parentheses expected",
        "while expected",
        "closing quoto expected",
        "not a string",
        "too many local variable"
    };
    printf ("%s",e[error]);
    p=p_buf;
    while (p!=prog)
    {
        p++;
        if (*p=='\r')
            linecount++;
    }
    printf (" in line %d\n",linecount);
    temp=p;
    for (i=0;i<20 && p>p_buf && *p!='\n';i++,p--);
    for (i=0;i<30 && p<=temp;i++,p++)
        printf ("%c",*p);
    longjmp (e_buf,1);
}

   值得注意的是sntx_err()也会显示出错语句的行号,最后要注意的是sntx_err()调用longjmp()结束。因为语法错误会在嵌套或递归程序中频繁地出现,最简单的控制错误的方法就是跳到一个安全的地方。

 

   

 

Jul 7

因空间有限,Little C暂只包含5个库函数,分别是:getche(),putch(),puts(),print(),getnum().

 

//
//  lclib.c
//  cinterp
//
//  Created by long on 12. 6. 16..
//  Copyright 2012 __MyCompanyName__. All rights reserved.
//

/*  Internal Library Functions */

#include <stdio.h>
#include <stdlib.h>

extern char *prog;
extern char token[80];
extern char token_type;
extern char tok;

enum token_types{
    DELIMITER, IDENTIFIER, NUMBER, COMMAND, STRING,
    QUOTE, VARIABLE, BLOCK, FUNCTION
};

/*  These are the contants used to call sntx_err() when
    a syntax error occurs. Add more if you like.

    NOTE: SYNTAX is a generic error message used when
    nothing else seems appropriate.
*/

enum error_msg{
    SYNTAX, UNBAL_PARENS, NO_EXP, EQUALS_EXPECTED,
    NOT_VAR, PARAM_ERR, SEMI_EXPECTED,
    UNBAL_BRACES, FUNC_UNDEF, TYPE_EXPECTED,
    NEST_FUNC, RET_NOCALL, PAREN_EXPECTED,
    WHILE_EXPECED, QUOTE_EXPECTED, NOT_TEMP,
    TOO_MANY_LVARS
};


int get_token();
void sntx_err(int error);
void eval_exp(int *result);
void putback();

/*  Get a character from console */
char call_getche()
{
    char ch;
    ch=getchar();
    while(*prog!=')')  
        prog++;
    prog++;     /* advance to end of line */
    return ch;
}

/*  Put acharacter to the display */
int call_putch()
{
    int value;
    
    eval_exp(&value);
    printf("%c",value);
    return value;
}

/*  Call puts */
int call_puts()
{
    get_token();
    if(*token!='(')
        sntx_err(PAREN_EXPECTED);
    
    get_token();
    if(token_type!=QUOTE) 
        sntx_err(QUOTE_EXPECTED);
    
    puts(token);
    get_token();
    if(*token!=')')
        sntx_err(PAREN_EXPECTED);
    
    get_token();
    if(*token!=';')
        sntx_err(SEMI_EXPECTED);
    putback();
    return 0;
    
}


/*  A built-in console output function */
int print()
{
    int i;
    get_token();
    if(*token!='(')
        sntx_err(PAREN_EXPECTED);
    get_token();
    
    if(token_type==QUOTE){
        printf("%s",token);
    }
    else{
        putback();
        eval_exp(&i);
        printf("%d",i);
    }
    
    get_token();
    
    if(*token!=')')
        sntx_err(PAREN_EXPECTED);
    
    get_token();
    
    if(*token!=';')
        sntx_err(SEMI_EXPECTED);
    putback();
    return 0;
}


/*  Read an integer from keyboard */
int getnum()
{
    char s[80];
    
    gets(s);
    while(*prog!=')')
        prog++;
    
    prog++;
    return atoi(s);
}

如用Turbo/Borland C/C++,可用以下命令编译。

tcc -c parser.c

tcc -c lclib.c

tcc littlec.c parser.obj lclib.obj

 

Jul 7

这就是简易的C解释器的主程序代码,可正确编译。

gcc littlec.c parser.o lclib.o

clang littlec.c parser.o lclib.o

//
//  littlec.c
//  cinterp
//
//  Created by long on 12. 6. 16..
//  Copyright 2012 __MyCompanyName__. All rights reserved.
//

#include <stdio.h>
#include <setjmp.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#define NUM_FUNC        100
#define NUM_GLOBAL_VARS 100
#define NUM_LOCAL_VARS  200
#define NUM_BLOCK       100
#define ID_LEN          31
#define FUNC_CALLS      31
#define NUM_PARAMS      31
#define PROG_SIZE       10000
#define LOOP_NEST       31

enum tok_types {
    DELIMITER, IDENTIFIER, NUMBER, KEYWORD,
    TEMP, STRING, BLOCK
};

/* add additional C keyword tokens here */
enum tokens{
    ARG, CHAR, INT, IF, ELSE, FOR, DO, WHILE,
    SWITCH, RETURN, EOL, FINISHED, END
};

/* add additional double operators here */
enum double_ops{LT=1, LE, GT, GE, EQ, NE};


/*  These are the constants used to call sntx_err() when
 a syntax error occurs. Add more if you like.
 
 NOTE: SYNTAX is a generic error message used when 
 nothing else seems appropriate.
 */

enum error_msg{
    SYNTAX, UNBAL_PARENS, NO_EXP, EQUALS_EXPECTED,
    NOT_VAR, PARAM_ERR, SEMI_EXPECTED,
    UNBAL_BRACES, FUNC_UNDEF, TYPE_EXPECTED,
    NEST_FUNC, RET_NOCALL, PAREN_EXPECTED,
    WHILE_EXPECED, QUOTE_EXPECTED, NOT_TEMP,
    TOO_MANY_LVARS
};

char *prog;  /* current location in source code */
char *p_buf; /* points to start of program buffer */
jmp_buf e_buf;   /* hold environment for longjmp() */


/*  An array of these structures will hold the info
 associated with global variables.
 */
struct var_type{
    char var_name[ID_LEN];
    int var_type;
    int value;
}  global_vars[NUM_GLOBAL_VARS];

struct var_type local_var_stack[NUM_LOCAL_VARS];

/* This is the function call stack */
struct func_type{
    char func_name[ID_LEN];
    char *loc;        /* location of function entry point in file */
} func_table[NUM_FUNC];

int call_stack[NUM_FUNC];

/* Keyword lookup table */
struct commands{
    char command[20];
    char tok;
}table[]={  /* Commands must be entered lowercase */
    "if",IF,
    "else",ELSE,
    "for",FOR,
    "do",DO,
    "while",WHILE,
    "char",CHAR,
    "int",INT,
    "return",RETURN,
    
    "end",END,
    "",END
};

char token[80];
char token_type,tok;

int functos;    /* index to top of function call stack */
int func_index; /* index into function table */
int gvar_index; /* index into global variable table */
int lvartos;    /* index into  local variable stack */

int ret_value;  /* function return value */

void print();
void prescan();
void decl_global(),decl_local();
void call();
void putback();
void local_push(struct var_type i);
void eval_exp(int *value);
void sntx_err(int error);
void get_params();
void get_args();
void exec_if();
void exec_for();
void exec_do();
void exec_while();
void find_eob();
void func_push(int i);
void assign_var(char *var_name, int value);
void interp_block();
void func_ret();

int load_program(char *p,char *fname);
int find_var(char *s);
int func_pop();
int is_var(char *s);
int get_token();

char *find_func(char *name);

int main(int argc, char *argv[])
{
    if(argc!=2){
        printf ("usage: littlec <filename>\n");
        exit(1);
    }
    
    /* allocate memory for the program */
    if((p_buf=(char *) malloc(PROG_SIZE))==NULL){
        printf("allocation failure");
        exit(1);
    }
    
    /* load the program to execute */
    if( !load_program(p_buf,argv[1]))
        exit(1);
    if(setjmp(e_buf))   /* initialize long jump buffer */
        exit(1);
    
    /* set program pointer to start of the program buffer */
    prog=p_buf;
    prescan();  /* find the location of all function
                and global variables in the program */
    
    gvar_index=0;   /* initialize global variable index */
    lvartos=0;      /* initialize local variable stack index */
    functos=0;      /* initialize the CALL stack index */
    
    /* setup call to main() */
    prog=find_func("main");
    prog--;         /* abck up to opening ( */
    strcpy(token,"main");
    call();         /* call main() to start interpreting */
}

void interp_block()
{
    int value;
    char block=0;
    
    do{
        token_type=get_token();
        
        /*  If interpreting single statement, return
            on first semicolon.
         */
        
        /* see what kind of token is up */
        if(token_type==IDENTIFIER){
            /* Not a keyword, so process expression */
            putback();  /* restore token to input stream for
                         further processing by eval_exp() */
            eval_exp(&value);   /* process the expression */
            
            if (*token!=';') 
                sntx_err(SEMI_EXPECTED);
            else if(token_type==BLOCK){
                if(*token=='{')
                    block=1;    /* interpreting block, not statement */
                else return;    /* is a },so return */
            }
            else    /* is keyword */
                switch(tok){
                    case CHAR:
                    case INT:
                        putback();
                        decl_local();
                        break;
                    case RETURN:
                        func_ret();
                        return;
                    case IF:
                        exec_if();
                        break;
                    case ELSE:
                        /* find end of else block
                         and continue execution */
                        find_eob();
                        break;
                    case WHILE:
                        exec_while();
                        break;
                    case DO:
                        exec_do();
                        break;
                    case FOR:
                        exec_for();
                        break;
                    case END:
                        exit(0);
                }
        }
    } while(tok != FINISHED && block);
}
    
/* load a program. */
int load_program(char *p,char *fname)
{
    FILE *fp;
    int i=0;
        
    if((fp=fopen(fname,"rb"))==NULL)
        return 0;
    i=0;
    do{
        *p=getc(fp);
        p++;
        i++;
    } while (!feof(fp) && i<PROG_SIZE);
    *(p-2)='\0';    /* null terminate the program */
    fclose(fp);
    return 1;
}

/*  Find the location of all functions in the program
    and store global variables
 */
void prescan()
{
    char *p;
    char temp[32];
    int brace=0;    /*  when 0, this var tells us that
                        current source position is outside
                        of any function.
                     */
    p=prog;
    func_index=0;
    do{
        while(brace){
            get_token();
            if(*token=='{') brace++;
            if(*token=='}') brace--;
        }
        
        get_token();
        
        if(tok==CHAR || tok==INT){  /* is global var */
            putback();
            decl_global();
        }
        else if(token_type==IDENTIFIER){
            strcpy(temp,token);
            get_token();
            if(*token=='('){    /* must be assume a function */
                func_table[func_index].loc=prog;
                strcpy(func_table[func_index].func_name,temp);
                func_index++;
                while( *prog!= ')') prog++;
                prog++;
                /* prog points to opening curly brace of function */
            }
            else putback();
        }
        else if (*token=='{')
            brace++;
    } while(tok!=FINISHED);
        prog=p;
}

/*  Return the entry point of the specified function.
    Return NULL if not found
 */
char *find_func(char *name)
{
    register int i;
    
    for(i=0;i<func_index;i++)
        if(!strcmp(name,func_table[i].func_name))
            return func_table[i].loc;
    
    return NULL;
}

/* Declare a global variable */
void decl_global()
{
    get_token();
    
    global_vars[gvar_index].var_type=tok;
    global_vars[gvar_index].value=0;
    
    do{    /* process comma-separated list */
        get_token();    /* get name */
        strcpy(global_vars[gvar_index].var_name,token);
        get_token();
        gvar_index++;
    } while (*token != ';');
    if(*token!=';')
        sntx_err(SEMI_EXPECTED);
}

/* Declare a local variable. */
void decl_local()
{
    struct var_type i;
    get_token();
    
    i.var_type=tok;
    i.value=0;
    
    do{
        get_token();
        strcpy(i.var_name,token);
        local_push(i);
        get_token();
    } while (*token==',');
    if(*token!=';')
        sntx_err(SEMI_EXPECTED);
}

/* Call a function */
void call()
{
    char *loc,*temp;
    int lvartemp;
    
    loc=find_func(token);
    if(loc==NULL)
        sntx_err(FUNC_UNDEF);
    else{
        lvartemp=lvartos;   /* save local var stack index */
        get_args();
        temp=prog;  /* save return location */
        func_push(lvartemp);
        prog=loc;   /* reset prog to start of function */
        get_params();
        interp_block();
        prog=temp;  /* reset the program pointer */
        lvartos=func_pop();
    }
}

/*  Push the arguments to a function onto the local
    variable stack
 */
void get_args()
{
    int value,count,temp[NUM_PARAMS];
    struct var_type i;
    
    count=0;
    get_token();
    if(*token!='(')
        sntx_err(PAREN_EXPECTED);
    
    /* process a comma-separated list of values */
    do{
        eval_exp(&value);
        temp[count]=value; /* save temporarily */
        get_token();
        count++;
    } while (*token==',');
    count--;
    
    /* now, push on local_var_stack in reverse order */
    for(;count>=0;count--){
        i.value=temp[count];
        i.var_type=ARG;
        local_push(i);
    }
}

/* Get function parameters */
void get_params()
{
    struct var_type *p;
    int i;
    
    i=lvartos-1;
    do{
        get_token();
        p=&local_var_stack[i];
        
        if(*token!=')'){
            if(tok!=INT && tok!=CHAR)
                sntx_err(TYPE_EXPECTED);
            p->var_type=token_type;
            get_token();
            
            /*  link parameter name with argument 
                alreadyb on local var stack
             */
            strcpy(p->var_name,token);
            get_token();
            i--;
        }
        else break;
    } while(*token!=')');
    if(*token!=')')
        sntx_err(PAREN_EXPECTED);
}

/* Return from a function */
void func_ret()
{
    int value=0;
    
    /* get return value, if any */
    eval_exp(&value);
    
    ret_value=value;
}
           

/* Push local variable */
void local_push(struct var_type i)
{
    if(lvartos>NUM_LOCAL_VARS)
        sntx_err(TOO_MANY_LVARS);

    local_var_stack[lvartos]=i;
    lvartos++;
}

/* Pop index into local variable stack */
int func_pop()
{
    functos--;
    if(functos<0) 
        sntx_err(RET_NOCALL);
    return (call_stack[functos]);
            
}
                 
/* Push index of local variable stack */
void func_push(int i)
{
    if(functos > NUM_FUNC)
        sntx_err(NEST_FUNC);
    call_stack[functos]=i;
    
    functos++;
}
                 

/* Assign a value to a variable */
void assign_var(char *var_name, int value)
{
    register int i;
    
    /* first, see if it's a local variable */
    for(i=lvartos-1;i>= call_stack[functos-1];i--);{
        if(!strcmp(local_var_stack[i].var_name,var_name)){
            local_var_stack[i].value=value;
            return;
        }
    }
    if(i<call_stack[functos-1])
        /* if not local, try global var table */
        for (i=0;i < NUM_GLOBAL_VARS;i++)
            if(!strcmp(global_vars[i].var_name,var_name)){
                global_vars[i].value=value;
                return;
            }
    sntx_err(NOT_VAR);
}


/*  Find the value of a variable */
int find_var(char *s)
{
    register int i;
    
    /* first, see if it's a local variable */
    for(i=lvartos-1;i>= call_stack[functos-1];i--)
        if(!strcmp(local_var_stack[i].var_name,token))
            return local_var_stack[i].value;
            
       
    /* otherwise, try global vars */
    for (i=0;i < NUM_GLOBAL_VARS;i++)
        if(!strcmp(global_vars[i].var_name,s))
            return global_vars[i].value;
    
    sntx_err(NOT_VAR);
    return -1;
}


/*  Determine if an identifier is variable.
    Return 1 if var is found;o otherwise
 */
int is_var(char *s)
{
    register int i;
    
    /* first, see if it's a local variable */
    for(i=lvartos-1;i>= call_stack[functos-1];i--)
        if(!strcmp(local_var_stack[i].var_name,token))
            return 1;
    
    
    /* otherwise, try global vars */
    for (i=0;i < NUM_GLOBAL_VARS;i++)
        if(!strcmp(global_vars[i].var_name,s))
            return 1;
    
    return 0;
}

/*  Execute an IF statement */
void exec_if()
{
    int cound;
    
    eval_exp(&cound);   /* get left expression */
    
    if(cound){
        interp_block();
    }
    else{
        find_eob();
        get_token();
        if(tok!=ELSE){
            putback();
            return;
        }
        interp_block();
    }
}

/*  Execute a while loop */
void exec_while()
{
    int cound;
    char *temp;
    
    putback();
    temp=prog;  /* save location of top of while loop */
    
    get_token();
    eval_exp(&cound);   /* check the conditional expression */
    if(cound)
        interp_block();
    else{               /* skip around loop */
        find_eob();
        return;
    }
    prog=temp;          /* loop back to top */
}


/*  Execute a Do loop */
void exec_do()
{
    int cound;
    char *temp;
    
    putback();
    temp=prog;
    
    get_token();
    interp_block();
    get_token();
    if(tok!=WHILE) 
        sntx_err(WHILE_EXPECED);
    eval_exp(&cound);
    
    if(cound)       
        /* if tru loop; otherwise, continue on */
        prog=temp; 
}

/*  Find the end of a block */
void find_eob()
{
    int brace;
    
    get_token();
    brace=1;
    do {
        get_token();
        if(*token=='{')
            brace++;
        else if (*token=='}')
            brace--;
    } while(brace);
}

/*  Execute a for loop */
void exec_for()
{
    int cound;
    char *temp,*temp2;
    int brace;
    
    get_token();
    eval_exp(&cound);   /* initialization expression */
    if(*token!=';')
        sntx_err(SEMI_EXPECTED);
    prog++; /* get past hte ; */
    temp=prog;
    
    for(;;){
        eval_exp(&cound);   /* check the condition */
        if (*token!=';')
            sntx_err(SEMI_EXPECTED);
        prog++;
        temp2=prog;
        
        /* find the start of the block */
        brace=1;
        while(brace){
            get_token();
            if(*token=='(')
                brace++;
            if(*token==')')
                brace--;
        }
        
        if(cound) interp_block();
        else{
            find_eob();
            return;
        }
        prog=temp2;
        eval_exp(&cound);
        prog=temp;
    }
}

Jul 1

这是一个精简的C 解释器的语义剖析器的代码,从《THE CRAFT OF C》上一行一行打下来,可以编译通过,但无法正常运行。若有朋友知道怎样改写,本人将感激不尽!

The entire code for little C recursive-descent parser is shown here,along with some necessary support functions,global data,and data types.

 

//
//  parser.c
//  cinterp
//
//  Created by long on 12. 6. 16..
//  Copyright 2012 __MyCompanyName__. All rights reserved.
//  
//  Recursive descent parser for integer expressions
//  which may include variables and function calls.
//


#include "setjmp.h"
#include "math.h"
#include "ctype.h"
#include "stdlib.h"
#include "string.h"
#include "stdio.h"

#define NUM_FUNC        100
#define NUM_GLOBAL_VARS 100
#define NUM_LOCAL_VARS  200
#define ID_LEN          31
#define FUNC_CALLS      31
#define PROG_SIZE       10000
#define FOR_NEST        31

enum tok_types {
    DELIMITER, IDENTIFIER, NUMBER, KEYWORD,
    TEMP, STRING, BLOCK
};

enum tokens{
    ARG, CHAR, INT, IF, ELSE, FOR, DO, WHILE,
    SWITCH, RETURN, EOL, FINISHED, END
};

enum double_ops{LT=1, LE, GT, GE, EQ, NE};

/*  These are the constants used to call sntx_err() when
    a syntax error occurs. Add more if you like.

    NOTE: SYNTAX is a generic error message used when 
    nothing else seems appropriate.
*/

enum error_msg {
    SYNTAX, UNBAL_PARENS, NO_EXP, EQUALS_EXPECTED,
    NOT_VAR, PARAM_ERR, SEMI_EXPECTED,
    UNBAL_BRACES, FUNC_UNDEF, TYPE_EXPECTED,
    NEST_FUNC, RET_NOCALL, PAREN_EXPECTED,
    WHILE_EXPECED, QUOTE_EXPECTED, NOT_TEMP,
    TOO_MANY_LVARS
};

extern char *prog;  /* current location in source code */
extern char *p_buf; /* points to start of program buffer */
extern jmp_buf e_buf;   /* hold environment for longjmp() */


/*  An array of these structures will hold the info
    associated with global variables.
 */
extern struct var_type{
    char var_name[32];
    int var_type;
    int value;
}  global_vars[NUM_GLOBAL_VARS];


/* This is the function call stack */
extern struct func_type{
    char func_name[32];
    char *loc;        /* location of function entry point in file */
} func_stack[NUM_FUNC];

/* Keyword table */
extern struct commands{
    char command[20];
    char tok;
}table[];

/*  "Standard Library" functions are declared here so
    they can be put into the internal function table that
    follows.
 */
int call_getche(void),call_putch(void);
int call_puts(void),print(void),getnum(void);

struct intern_func_type{
    char *f_name;   /* functionname */
    int (* p)();    /* pointer to the function */
} intern_func[]={
    "getche",call_getche,
    "putch",call_putch,
    "puts",call_puts,
    "print",print,
    "getnum",getnum,
    "",0    /* null terminate the list */
};

extern char token[80];  /* string representation of token */
extern char token_type; /* contains type of token */
extern char tok;        /* internal representation of token */

extern int ret_value; /* function return value */

void eval_exp(int *value);
void eval_exp1(int *value);
void eval_exp2(int *value);
void eval_exp3(int *value);
void eval_exp4(int *value);
void eval_exp5(int *value);
void eval_exp0(int *value);
void atom(int *value);
void sntx_err(int error),putback(void);
void assign_var(char *var_name, int value);
int isdelim(char c),look_up(char *s), iswhite(char c);
int find_var(char *s),get_token(void);
int internal_func(char *s);
int is_var(char *s);
char *find_func(char *name);
void call(void);

/* Entry point into parser. */
void eval_exp(int *value)
{
    get_token();
    if(!*token){
        sntx_err(NO_EXP);
        return;
    }
    if(*token==';'){
        *value=0;   /* empty expression */
        return;
    }
    eval_exp0(value);
    putback();  /* return last token read to input stream */
}


/* Process an assignment expression */
void eval_exp0(int *value)
{
    /* holds name of var receiving the assignment */
    char temp[ID_LEN];

    register int temp_tok;
    
    if(token_type==IDENTIFIER){
        if(is_var(token)){  /* if a var, see if assignment */
            strcpy(temp,token);
            temp_tok=token_type;
            get_token();
            if(*token=='='){    /* is an assignment */
                get_token();
                eval_exp0(value);   /* get value to assign */
                assign_var(temp,*value);    /* assign the value */
                return;
            }
            else{   /* not an assignment */
                putback();  /*restore original token */
                strcpy(token,temp);
                token_type=temp_tok;
            }
        }
    }
    eval_exp1(value);
}


/*  This array is used by eval_exp1(). Because
    some compilers cannot initialize an array within a
    function it is defined as a global variable.
 */
char relops[7]={
    LT, LE, GT, GE, EQ, NE, 0
};

/* Process relational operators. */
void eval_exp1(int *value)
{
    int partial_value;
    register char op;
    
    eval_exp2(value);
    op=*token;
    if(strchr(relops,op)){
        get_token();
        eval_exp2(&partial_value);
        switch(op){     /* perform the relational operation */
            case LT:
                *value=*value < partial_value;
                break;
            case LE:
                *value=*value <= partial_value;
                break;
            case GT:
                *value=*value > partial_value;
                break;
            case GE:
                *value=*value >= partial_value;
                break;
            case EQ:
                *value=*value == partial_value;
                break;
            case NE:
                *value=*value != partial_value;
                break;
        }
    }
}


/* Add or substract two terms. */
void eval_exp2(int *value)
{
    register char op;
    int partial_value;
    
    eval_exp3(value);
    while((op=*token)=='+' || op=='-'){
        get_token();
        eval_exp3(&partial_value);
        switch(op){
            case '-':
                *value=*value-partial_value;
                break;
            case '+':
                *value =*value +partial_value;
                break;
        }
        
    }
}

/* Multiply or divide two factore. */
void eval_exp3(int *value)
{
    register char op;
    int partial_value,t;
    
    eval_exp4(value);
    while((op=*token) == '*' || op=='/' || op=='%'){
        get_token();
        eval_exp4(&partial_value);
        switch(op){
            case '*':
                *value= *value * partial_value;
                break;
            case '/':
                *value= (*value)/ partial_value;
                break;
            case '%':
                t=(*value)/partial_value;
                *value= *value-(t*partial_value);
                break;
        }
    }
}


/* Is a unary + or -. */
void eval_exp4(int *value)
{
    register char op;
    
    op='\0';
    if(*token=='+' || *token=='-'){
        op=*token;
        get_token();
    }
    eval_exp5(value);
    if(op && op=='-') 
        *value = -(*value);
}


/* Process parenthesized expression. */
void eval_exp5(int *value)
{
    if((*token=='(')){
        get_token();
        eval_exp0(value);   /* get subexpression */
        if(*token != ')')
            sntx_err(PAREN_EXPECTED);
        get_token();
    }
    else{
        atom(value);
    }
}

/* Find value of number, variable or function. */
void atom(int *value)
{
    int i;
    
    switch(token_type){
        case IDENTIFIER:
            i=internal_func(token);
            if(i != -1){    /* call "standard library" function */
                *value = (*intern_func[i].p)();
            }
            else if (find_func(token)){ /* call user-defined function */
                call();
                *value =ret_value;
            }
            else *value =find_var(token);   /* get bar's value */
            get_token();
            return;
        case NUMBER:
            *value = atoi(token);
            get_token();
            return;
        case DELIMITER: /* see if character constant */
            if(*token=='\''){
                *value= *prog;
                prog++;
                if(*prog!='\'') sntx_err(QUOTE_EXPECTED);
                prog++;
                get_token();
            }
            return;
        default:
            if( *token== ')') return;   /* process empty expression */
            else sntx_err(SYNTAX);
    }
}


/* Display an error message. */
void sntx_err(int error)
{
    char *p,*temp;
    int linecount=0;
    register int i;
    
    static char *e[]={
        "syntax error",
        "unbalanced parentheses",
        "no expression present",
        "equals sign expected",
        "not a variable",
        "parameter error",
        "semicolon expected",
        "unbalanced braces",
        "function undefined",
        "type specifier expected",
        "too many nested function calls",
        "return without call",
        "parentheses expected",
        "while expected",
        "closing quote expected",
        "not a string",
        "too many local variables"
    };
    
    printf("%s",e[error]);
    p=p_buf;
    while(p!=prog){
        p++;
        if(*p =='\r'){
            linecount++;
        }
    }
    printf(" line %d\n",linecount);
    
    temp=p;
    for(i=0;i<20 && p>p_buf && *p!='\n';i++,p--);
    for(i=0;i<30 && p<=temp;i++,p++)
        printf("%c",*p);
    
    longjmp(e_buf,1);   /* return to save point */
}

/* Get a token */
int get_token()
{
    register char *temp;
    
    token_type=0;
    tok=0;
    
    temp=token;
    *temp='\0';
    
    /* skip over white space */
    while(iswhite(*prog) && *prog) ++prog;
    
    if(*prog=='\r'){
        ++prog;
        ++prog;
        /* skip over white space */
        while(iswhite(*prog) && *prog) ++prog;
    }
    
    if( *prog=='\0'){   /* end of file */
        *token='\0';
        tok=FINISHED;
        return(token_type=DELIMITER);
    }
    
    if(strchr("{}",*prog)){ /* block delimiters */
        *temp= *prog;
        temp++;
        *temp='\0';
        prog++;
        return (token_type=BLOCK);
    }
    
    /* look for comments */
    if(*prog=='/')
        if(*(prog+1)=='*'){
            prog+=2;
            do{
                while (*prog != '*') prog++;
                prog++;
            } while (*prog !='/');
            prog++;
        }
    
    /* is or might be a relation operator */
    
    if(strchr("!<>=",*prog)){
        switch(*prog){
            case '=':
                if(*(prog+1) =='='){
                    prog++;
                    prog++;
                    *temp=EQ;
                    temp++;
                    *temp=EQ;
                    temp++;
                    *temp='\0';
                }
                break;
            case '!':
                if(*(prog+1)=='='){
                    prog++; prog++;
                    *temp=NE;
                    temp++;
                    *temp=NE;temp++;
                    *temp='\0';
                }
                break;
            case '<':
                if(*(prog+1)=='='){
                    prog++; prog++;
                    *temp=LE;
                    temp++;
                    *temp=LE;
                }
                else{
                    prog++;
                    *temp=LT;
                }
                temp++;
                *temp='\0';
                break;
            case '>':
                if(*(prog+1)=='='){
                    prog++; prog++;
                    *temp=GE;
                    temp++;
                    *temp=GE;
                }
                else{
                    prog++;
                    *temp=GT;
                }
                temp++;
                *temp='\0';
                break;
        }
        if( *token) return (token_type=DELIMITER);
    }
    
    if(strchr("+-*/^%=;(),'",*prog)){
        *temp=*prog;
        prog++;     /* advance to next position */
        temp++;
        *temp='\0';
        return (token_type=DELIMITER);
    }
    
    if( *prog=='"'){    /* quoted string */
        prog++;
        while(*prog!='"' && *prog!='\r')
            *temp++ = *prog++;
        if(*prog=='\r') sntx_err(SYNTAX);
        prog++;
        *temp='\0';
        return (token_type=STRING);
    }
    
    if(isdigit(*prog)){
        while(!isdelim(*prog))
            *temp++ = *prog++;
        *temp='\0';
        return (token_type=NUMBER);
    }
    
    if(isalpha(*prog)){ /* var or command */
        while(!isdelim(*prog))
            *temp++ = *prog++;
        token_type=TEMP;
    }
    
    *temp ='\0';
    
    /* see if a string is a command or a variable */
    if(token_type==TEMP){
        tok=look_up(token); /* convert to internal rep */
        if(tok)
            token_type=KEYWORD;
        else token_type=IDENTIFIER;
    }
    return token_type;
}


/* Return a token to input stream */
void putback()
{
    char *t;
    
    t=token;
    for(;*t;t++)
        prog--;
}

/*  Look up a token's internal representation in the
    token table.
 */
int look_up(char *s)
{
    register int i;
    char *p;
    
    /* convert to lowercase */
    p=s;
    while(*p){
        *p=tolower(*p);
        p++;
    }
    
    /* see if token is in table */
    for(i=0; *table[i].command; i++)
        if(!strcmp(table[i].command,s))
            return table[i].tok;
    return 0;   /* unknown command */
}

/*  Return index of internal library function or -1 
    if not found.
 */
int internal_func(char *s)
{
    int i;
    
    for(i=0;intern_func[i].f_name[0];i++){
        if(!strcmp(intern_func[i].f_name,s))
            return i;
    }
    return -1;
}

/* Return true if c is a delimiter. */
int isdelim(char c)
{
    if(strchr(" !;,+-<>'/*%^=()",c) || c==9||
       c=='\r' || c==0)
        return 1;
    return 0;
}

/* Return 1 if c is space or tab. */
int iswhite(char c)
{
    if(c==' ' || c=='\t') return 1;
    else return 0;
}


 

原书上推荐用Turbo/Borland C/C++ 或Microsoft C/C++

命令:tcc -c parser.c

           c1 -c parser.c