summaryrefslogtreecommitdiff
path: root/examples/clang/clang.rl
blob: 7ecfeefdd6260bb911485496cab2185819086962 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/*
 * A mini C-like language scanner.
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

%%{
	machine clang;

	newline = '\n' @{curline += 1;};
	any_count_line = any | newline;

	# Consume a C comment.
	c_comment := any_count_line* :>> '*/' @{fgoto main;};

	main := |*

	# Alpha numberic characters or underscore.
	alnum_u = alnum | '_';

	# Alpha charactres or underscore.
	alpha_u = alpha | '_';

	# Symbols. Upon entering clear the buffer. On all transitions
	# buffer a character. Upon leaving dump the symbol.
	( punct - [_'"] ) {
		printf( "symbol(%i): %c\n", curline, tokstart[0] );
	};

	# Identifier. Upon entering clear the buffer. On all transitions
	# buffer a character. Upon leaving, dump the identifier.
	alpha_u alnum_u* {
		printf( "ident(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	# Single Quote.
	sliteralChar = [^'\\] | newline | ( '\\' . any_count_line );
	'\'' . sliteralChar* . '\'' {
		printf( "single_lit(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	# Double Quote.
	dliteralChar = [^"\\] | newline | ( '\\' any_count_line );
	'"' . dliteralChar* . '"' {
		printf( "double_lit(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	# Whitespace is standard ws, newlines and control codes.
	any_count_line - 0x21..0x7e;

	# Describe both c style comments and c++ style comments. The
	# priority bump on tne terminator of the comments brings us
	# out of the extend* which matches everything.
	'//' [^\n]* newline;

	'/*' { fgoto c_comment; };

	# Match an integer. We don't bother clearing the buf or filling it.
	# The float machine overlaps with int and it will do it.
	digit+ {
		printf( "int(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	# Match a float. Upon entering the machine clear the buf, buffer
	# characters on every trans and dump the float upon leaving.
	digit+ '.' digit+ {
		printf( "float(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	# Match a hex. Upon entering the hex part, clear the buf, buffer characters
	# on every trans and dump the hex on leaving transitions.
	'0x' xdigit+ {
		printf( "hex(%i): ", curline );
		fwrite( tokstart, 1, tokend-tokstart, stdout );
		printf("\n");
	};

	*|;
}%%

%% write data nofinal;

#define BUFSIZE 128

void scanner()
{
	static char buf[BUFSIZE];
	int cs, act, have = 0, curline = 1;
	char *tokstart, *tokend = 0;
	int done = 0;

	%% write init;

	while ( !done ) {
		char *p = buf + have, *pe;
		int len, space = BUFSIZE - have;
		
		if ( space == 0 ) {
			/* We've used up the entire buffer storing an already-parsed token
			 * prefix that must be preserved. */
			fprintf(stderr, "OUT OF BUFFER SPACE\n" );
			exit(1);
		}

		len = fread( p, 1, space, stdin );

		/* If this is the last buffer, tack on an EOF. */
		if ( len < space ) {
			p[len++] = 0;
			done = 1;
		}
			
		pe = p + len;
		%% write exec;

		if ( cs == clang_error ) {
			fprintf(stderr, "PARSE ERROR\n" );
			break;
		}

		if ( tokstart == 0 )
			have = 0;
		else {
			/* There is a prefix to preserve, shift it over. */
			have = pe - tokstart;
			memmove( buf, tokstart, have );
			tokend = buf + (tokend-tokstart);
			tokstart = buf;
		}
	}
}

int main()
{
	scanner();
	return 0;
}