When: 2006
Technologies: C, yacc, lex, compiler design, scanning, parsing, semantic analysis, code generation, optimization, programming language theory, algorithms, data structures
I mention this project, because even though it was a one semester class, it was one of the toughest projects I’ve worked on, and also one of my favorites. Compiler design is really an advanced topic, and only for those with a real interest in the subject, as I quickly learned. It helped me overcome the challenges I faced previously in learned algorithms and data structures in a theoretical context, by applying them on a real, practical project. Only a few in the class finished with a working compiler for the “C minus” language, and mine passed all tests, and produced the fastest assembly, earning me a free steak from the professor 🙂
The compiler itself was written in C, and although I certainly wouldn’t make that choice now, it seemed best with the existing libraries at the time.
Snippet of code generator
case CALLEXPR: { //Procedure call // Prepare arguments int numParams=0; temp=node->child[0]; while(temp!=NULL) { if(temp->nodeKind==VARNODE && temp->kind.var==VARREF && temp->type.type==ARRTYPE) { //Array argument, passed by reference symElement* ref = STFind(temp->scope, temp->attr.name); if(ref->memLoc<0) { //Local array reference sprintf(tstr, "%d(%%ebp)", ref->memLoc); outputInstr(NULL,"leal",tstr,"%eax"); } else if(ref->memLoc>0) { //Parameter array reference // Address calculation necessary sprintf(tstr, "%d(%%ebp)", ref->memLoc); outputInstr(NULL,"movl",tstr,"%eax"); } else { //Global array reference outputInstr(NULL,"leal",temp->attr.name,"%eax"); } } else { //Calculate argument generateCode(temp); } //Push argument onto stack outputInstr(NULL,"pushl","%eax",NULL); numParams++; temp=temp->sibling; } //Function call sprintf(tstr,"CM_%s",node->attr.name); outputInstr(NULL,"call",tstr,NULL); if(numParams>0) { //Clear arguments off stack sprintf(tstr,"$%d",numParams*4); outputInstr(NULL,"addl",tstr,"%esp"); } } break;
Benchmark source
int multiplier; int increment; int seed; int array[10000000]; int random(void) { seed = seed * multiplier + increment; if (seed < 0) seed = seed + 2147483648; return seed; } void initializeRand(void) { seed = 1337; multiplier = 1103515245; increment = 12345; } int partition(int a[], int start, int end) { int i; int j; int x; int t; x = a[start]; i = start-1; j = end+1; while (1) { j = j - 1; while (a[j] > x) j = j - 1; i =i + 1; while (a[i] < x) i = i + 1; if (i<j) { t = a[i]; a[i] = a[j]; a[j] = t; } else { return j; } } } void sort(int a[], int start, int end) { int mid; if (start < end) { mid = partition(a, start, end); sort(a, start, mid); sort(a, mid+1, end); } } int mod(int n, int q) { return n - n / q * q; } void main(void) { int i; int count; initializeRand(); count = 0; while (count < 10) { i = 0; while (i < 10000000) { array[i] = mod(random(), 1000000); i = i + 1; } sort(array, 0, 10000000-1); count = count + 1; } output(array[0]); output(array[10000000-1]); }
Generated asm
.file "quickSortBenchmark.cm" .comm multiplier,4,4 .comm increment,4,4 .comm seed,4,4 .comm array,40000000,4 .globl CM_random .type CM_random, @function CM_random: enter $0, $0 pushl increment pushl multiplier movl seed, %eax popl %edx imull %edx, %eax popl %edx addl %edx, %eax movl %eax, seed pushl $0 movl seed, %eax popl %edx cmpl %edx, %eax jl LBL1 movl $0, %eax jmp LBL2 LBL1: movl $1, %eax LBL2: cmpl $1, %eax jnz LBL3 pushl $2147483647 movl seed, %eax popl %edx addl %edx, %eax movl %eax, seed LBL3: movl seed, %eax jmp LBL0 LBL0: leave ret .globl CM_initializeRand .type CM_initializeRand, @function CM_initializeRand: enter $0, $0 movl $1337, seed movl $1103515245, multiplier movl $12345, increment LBL4: leave ret .globl CM_partition .type CM_partition, @function CM_partition: enter $16, $0 movl 12(%ebp), %edx movl 8(%ebp), %ebx movl (%ebx,%edx,4), %eax movl %eax, -8(%ebp) pushl $1 movl 12(%ebp), %eax popl %edx subl %edx, %eax movl %eax, -16(%ebp) pushl $1 movl 16(%ebp), %eax popl %edx addl %edx, %eax movl %eax, -12(%ebp) LBL6: movl $1, %eax cmpl $1, %eax jnz LBL7 pushl $1 movl -12(%ebp), %eax popl %edx subl %edx, %eax movl %eax, -12(%ebp) LBL8: pushl -8(%ebp) movl -12(%ebp), %edx movl 8(%ebp), %ebx movl (%ebx,%edx,4), %eax popl %edx cmpl %edx, %eax jg LBL9 movl $0, %eax jmp LBL10 LBL9: movl $1, %eax LBL10: cmpl $1, %eax jnz LBL11 pushl $1 movl -12(%ebp), %eax popl %edx subl %edx, %eax movl %eax, -12(%ebp) jmp LBL8 LBL11: pushl $1 movl -16(%ebp), %eax popl %edx addl %edx, %eax movl %eax, -16(%ebp) LBL12: pushl -8(%ebp) movl -16(%ebp), %edx movl 8(%ebp), %ebx movl (%ebx,%edx,4), %eax popl %edx cmpl %edx, %eax jl LBL13 movl $0, %eax jmp LBL14 LBL13: movl $1, %eax LBL14: cmpl $1, %eax jnz LBL15 pushl $1 movl -16(%ebp), %eax popl %edx addl %edx, %eax movl %eax, -16(%ebp) jmp LBL12 LBL15: pushl -12(%ebp) movl -16(%ebp), %eax popl %edx cmpl %edx, %eax jl LBL16 movl $0, %eax jmp LBL17 LBL16: movl $1, %eax LBL17: cmpl $1, %eax jnz LBL18 movl -16(%ebp), %edx movl 8(%ebp), %ebx movl (%ebx,%edx,4), %eax movl %eax, -4(%ebp) movl -12(%ebp), %edx movl 8(%ebp), %ebx pushl (%ebx,%edx,4) movl -16(%ebp), %eax movl %eax, %edx popl %eax movl 8(%ebp), %ebx movl %eax, (%ebx,%edx,4) pushl -4(%ebp) movl -12(%ebp), %eax movl %eax, %edx popl %eax movl 8(%ebp), %ebx movl %eax, (%ebx,%edx,4) jmp LBL19 LBL18: movl -12(%ebp), %eax jmp LBL5 LBL19: jmp LBL6 LBL7: LBL5: leave ret .globl CM_sort .type CM_sort, @function CM_sort: enter $4, $0 pushl 16(%ebp) movl 12(%ebp), %eax popl %edx cmpl %edx, %eax jl LBL21 movl $0, %eax jmp LBL22 LBL21: movl $1, %eax LBL22: cmpl $1, %eax jnz LBL23 pushl 16(%ebp) pushl 12(%ebp) movl 8(%ebp), %eax pushl %eax call CM_partition addl $12, %esp movl %eax, -4(%ebp) pushl -4(%ebp) pushl 12(%ebp) pushl 8(%ebp) call CM_sort addl $12, %esp pushl 16(%ebp) pushl $1 movl -4(%ebp), %eax popl %edx addl %edx, %eax pushl %eax pushl 8(%ebp) call CM_sort addl $12, %esp LBL23: LBL20: leave ret .globl CM_mod .type CM_mod, @function CM_mod: enter $0, $0 pushl 12(%ebp) pushl 12(%ebp) movl 8(%ebp), %eax popl %ecx xorl %edx, %edx idivl %ecx popl %edx imull %edx, %eax pushl %eax movl 8(%ebp), %eax popl %edx subl %edx, %eax jmp LBL24 LBL24: leave ret .globl CM_main .type CM_main, @function CM_main: enter $8, $0 call CM_initializeRand movl $0, -4(%ebp) LBL26: pushl $10 movl -4(%ebp), %eax popl %edx cmpl %edx, %eax jl LBL27 movl $0, %eax jmp LBL28 LBL27: movl $1, %eax LBL28: cmpl $1, %eax jnz LBL29 movl $0, -8(%ebp) LBL30: pushl $10000000 movl -8(%ebp), %eax popl %edx cmpl %edx, %eax jl LBL31 movl $0, %eax jmp LBL32 LBL31: movl $1, %eax LBL32: cmpl $1, %eax jnz LBL33 movl $1000000, %eax pushl %eax call CM_random pushl %eax call CM_mod addl $8, %esp pushl %eax movl -8(%ebp), %eax movl %eax, %edx popl %eax leal array, %ebx movl %eax, (%ebx,%edx,4) pushl $1 movl -8(%ebp), %eax popl %edx addl %edx, %eax movl %eax, -8(%ebp) jmp LBL30 LBL33: pushl $1 movl $10000000, %eax popl %edx subl %edx, %eax pushl %eax pushl $0 leal array, %eax pushl %eax call CM_sort addl $12, %esp pushl $1 movl -4(%ebp), %eax popl %edx addl %edx, %eax movl %eax, -4(%ebp) jmp LBL26 LBL29: movl $0, %edx leal array, %ebx pushl (%ebx,%edx,4) call CM_output addl $4, %esp pushl $1 movl $10000000, %eax popl %edx subl %edx, %eax movl %eax, %edx leal array, %ebx pushl (%ebx,%edx,4) call CM_output addl $4, %esp LBL25: leave ret