Compiler for C-like Project Language

When: 2006
Technologies: C, yacc, lex, compiler design, scanning, parsing, semantic analysis, code generation, optimization, programming language theory, algorithms, data structures

I mention this project, because even though it was a one semester class, it was one of the toughest projects I’ve worked on, and also one of my favorites. Compiler design is really an advanced topic, and only for those with a real interest in the subject, as I quickly learned. It helped me overcome the challenges I faced previously in learned algorithms and data structures in a theoretical context, by applying them on a real, practical project. Only a few in the class finished with a working compiler for the “C minus” language, and mine passed all tests, and produced the fastest assembly, earning me a free steak from the professor 🙂

The compiler itself was written in C, and although I certainly wouldn’t make that choice now, it seemed best with the existing libraries at the time.

Snippet of code generator

case CALLEXPR:
{
    //Procedure call
    //  Prepare arguments
    int numParams=0;
    temp=node->child[0];
    while(temp!=NULL)
    {
        if(temp->nodeKind==VARNODE && temp->kind.var==VARREF && temp->type.type==ARRTYPE)
        {   //Array argument, passed by reference
            symElement* ref = STFind(temp->scope, temp->attr.name);
            if(ref->memLoc<0)
            { //Local array reference
                sprintf(tstr, "%d(%%ebp)", ref->memLoc);
                outputInstr(NULL,"leal",tstr,"%eax");
            }
            else if(ref->memLoc>0)
            { //Parameter array reference
                //  Address calculation necessary
                sprintf(tstr, "%d(%%ebp)", ref->memLoc);
                outputInstr(NULL,"movl",tstr,"%eax");
            }
            else
            { //Global array reference
                outputInstr(NULL,"leal",temp->attr.name,"%eax");
            }
        }
        else
        {   //Calculate argument
            generateCode(temp);
        }

        //Push argument onto stack
        outputInstr(NULL,"pushl","%eax",NULL);

        numParams++;
        temp=temp->sibling;
    }

    //Function call
    sprintf(tstr,"CM_%s",node->attr.name);
    outputInstr(NULL,"call",tstr,NULL);

    if(numParams>0)
    {   //Clear arguments off stack
        sprintf(tstr,"$%d",numParams*4);
        outputInstr(NULL,"addl",tstr,"%esp");
    }
}
break;

Benchmark source

int multiplier;
int increment;
int seed;
int array[10000000];

int random(void) {
	seed = seed * multiplier + increment;
	if (seed < 0)
		seed = seed + 2147483648;
	return seed;
}

void initializeRand(void) {
	seed = 1337;
	multiplier = 1103515245;
	increment = 12345;
}

int partition(int a[], int start, int end) {
    int i;
    int j;
    int x;
    int t;
    
    x = a[start];
    i = start-1;
    j = end+1;
    while (1) {
        j = j - 1;
        while (a[j] > x)
            j = j - 1;
        i =i + 1;
        while (a[i] < x)
            i = i + 1;
        if (i<j) {
            t = a[i];
            a[i] = a[j];
            a[j] = t;
        } else {
            return j;
        }
    }
}

void sort(int a[], int start, int end) {
    int mid;
    if (start < end) {
        mid = partition(a, start, end);
        sort(a, start, mid);
        sort(a, mid+1, end);
    }        
}

int mod(int n, int q) {
	return n - n / q * q;
}

void main(void) {
	int i;
    int count;

	initializeRand();

    count = 0;
    while (count < 10) {
        i = 0;
        while (i < 10000000) {
            array[i] = mod(random(), 1000000);
            i = i + 1;
        }

        sort(array, 0, 10000000-1); 
        count = count + 1;
    }

	output(array[0]);
    output(array[10000000-1]);
}

Generated asm

.file "quickSortBenchmark.cm"
.comm multiplier,4,4
.comm increment,4,4
.comm seed,4,4
.comm array,40000000,4
.globl CM_random
.type CM_random, @function
CM_random:
	enter $0, $0
	pushl increment
	pushl multiplier
	movl seed, %eax
	popl %edx
	imull %edx, %eax
	popl %edx
	addl %edx, %eax
	movl %eax, seed
	pushl $0
	movl seed, %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL1
	movl $0, %eax
	jmp LBL2
LBL1:
	movl $1, %eax
LBL2:
	cmpl $1, %eax
	jnz LBL3
	pushl $2147483647
	movl seed, %eax
	popl %edx
	addl %edx, %eax
	movl %eax, seed
LBL3:
	movl seed, %eax
	jmp LBL0
LBL0:
	leave 
	ret 
.globl CM_initializeRand
.type CM_initializeRand, @function
CM_initializeRand:
	enter $0, $0
	movl $1337, seed
	movl $1103515245, multiplier
	movl $12345, increment
LBL4:
	leave 
	ret 
.globl CM_partition
.type CM_partition, @function
CM_partition:
	enter $16, $0
	movl 12(%ebp), %edx
	movl 8(%ebp), %ebx
	movl (%ebx,%edx,4), %eax
	movl %eax, -8(%ebp)
	pushl $1
	movl 12(%ebp), %eax
	popl %edx
	subl %edx, %eax
	movl %eax, -16(%ebp)
	pushl $1
	movl 16(%ebp), %eax
	popl %edx
	addl %edx, %eax
	movl %eax, -12(%ebp)
LBL6:
	movl $1, %eax
	cmpl $1, %eax
	jnz LBL7
	pushl $1
	movl -12(%ebp), %eax
	popl %edx
	subl %edx, %eax
	movl %eax, -12(%ebp)
LBL8:
	pushl -8(%ebp)
	movl -12(%ebp), %edx
	movl 8(%ebp), %ebx
	movl (%ebx,%edx,4), %eax
	popl %edx
	cmpl %edx, %eax
	jg LBL9
	movl $0, %eax
	jmp LBL10
LBL9:
	movl $1, %eax
LBL10:
	cmpl $1, %eax
	jnz LBL11
	pushl $1
	movl -12(%ebp), %eax
	popl %edx
	subl %edx, %eax
	movl %eax, -12(%ebp)
	jmp LBL8
LBL11:
	pushl $1
	movl -16(%ebp), %eax
	popl %edx
	addl %edx, %eax
	movl %eax, -16(%ebp)
LBL12:
	pushl -8(%ebp)
	movl -16(%ebp), %edx
	movl 8(%ebp), %ebx
	movl (%ebx,%edx,4), %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL13
	movl $0, %eax
	jmp LBL14
LBL13:
	movl $1, %eax
LBL14:
	cmpl $1, %eax
	jnz LBL15
	pushl $1
	movl -16(%ebp), %eax
	popl %edx
	addl %edx, %eax
	movl %eax, -16(%ebp)
	jmp LBL12
LBL15:
	pushl -12(%ebp)
	movl -16(%ebp), %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL16
	movl $0, %eax
	jmp LBL17
LBL16:
	movl $1, %eax
LBL17:
	cmpl $1, %eax
	jnz LBL18
	movl -16(%ebp), %edx
	movl 8(%ebp), %ebx
	movl (%ebx,%edx,4), %eax
	movl %eax, -4(%ebp)
	movl -12(%ebp), %edx
	movl 8(%ebp), %ebx
	pushl (%ebx,%edx,4)
	movl -16(%ebp), %eax
	movl %eax, %edx
	popl %eax
	movl 8(%ebp), %ebx
	movl %eax, (%ebx,%edx,4)
	pushl -4(%ebp)
	movl -12(%ebp), %eax
	movl %eax, %edx
	popl %eax
	movl 8(%ebp), %ebx
	movl %eax, (%ebx,%edx,4)
	jmp LBL19
LBL18:
	movl -12(%ebp), %eax
	jmp LBL5
LBL19:
	jmp LBL6
LBL7:
LBL5:
	leave 
	ret 
.globl CM_sort
.type CM_sort, @function
CM_sort:
	enter $4, $0
	pushl 16(%ebp)
	movl 12(%ebp), %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL21
	movl $0, %eax
	jmp LBL22
LBL21:
	movl $1, %eax
LBL22:
	cmpl $1, %eax
	jnz LBL23
	pushl 16(%ebp)
	pushl 12(%ebp)
	movl 8(%ebp), %eax
	pushl %eax
	call CM_partition
	addl $12, %esp
	movl %eax, -4(%ebp)
	pushl -4(%ebp)
	pushl 12(%ebp)
	pushl 8(%ebp)
	call CM_sort
	addl $12, %esp
	pushl 16(%ebp)
	pushl $1
	movl -4(%ebp), %eax
	popl %edx
	addl %edx, %eax
	pushl %eax
	pushl 8(%ebp)
	call CM_sort
	addl $12, %esp
LBL23:
LBL20:
	leave 
	ret 
.globl CM_mod
.type CM_mod, @function
CM_mod:
	enter $0, $0
	pushl 12(%ebp)
	pushl 12(%ebp)
	movl 8(%ebp), %eax
	popl %ecx
	xorl %edx, %edx
	idivl %ecx
	popl %edx
	imull %edx, %eax
	pushl %eax
	movl 8(%ebp), %eax
	popl %edx
	subl %edx, %eax
	jmp LBL24
LBL24:
	leave 
	ret 
.globl CM_main
.type CM_main, @function
CM_main:
	enter $8, $0
	call CM_initializeRand
	movl $0, -4(%ebp)
LBL26:
	pushl $10
	movl -4(%ebp), %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL27
	movl $0, %eax
	jmp LBL28
LBL27:
	movl $1, %eax
LBL28:
	cmpl $1, %eax
	jnz LBL29
	movl $0, -8(%ebp)
LBL30:
	pushl $10000000
	movl -8(%ebp), %eax
	popl %edx
	cmpl %edx, %eax
	jl LBL31
	movl $0, %eax
	jmp LBL32
LBL31:
	movl $1, %eax
LBL32:
	cmpl $1, %eax
	jnz LBL33
	movl $1000000, %eax
	pushl %eax
	call CM_random
	pushl %eax
	call CM_mod
	addl $8, %esp
	pushl %eax
	movl -8(%ebp), %eax
	movl %eax, %edx
	popl %eax
	leal array, %ebx
	movl %eax, (%ebx,%edx,4)
	pushl $1
	movl -8(%ebp), %eax
	popl %edx
	addl %edx, %eax
	movl %eax, -8(%ebp)
	jmp LBL30
LBL33:
	pushl $1
	movl $10000000, %eax
	popl %edx
	subl %edx, %eax
	pushl %eax
	pushl $0
	leal array, %eax
	pushl %eax
	call CM_sort
	addl $12, %esp
	pushl $1
	movl -4(%ebp), %eax
	popl %edx
	addl %edx, %eax
	movl %eax, -4(%ebp)
	jmp LBL26
LBL29:
	movl $0, %edx
	leal array, %ebx
	pushl (%ebx,%edx,4)
	call CM_output
	addl $4, %esp
	pushl $1
	movl $10000000, %eax
	popl %edx
	subl %edx, %eax
	movl %eax, %edx
	leal array, %ebx
	pushl (%ebx,%edx,4)
	call CM_output
	addl $4, %esp
LBL25:
	leave 
	ret