c2d.tokenizer source code

1 // This file is part of Visual D
2 //
3 // Visual D integrates the D programming language into Visual Studio
4 // Copyright (c) 2010 by Rainer Schuetze, All Rights Reserved
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt
8 
9 module c2d.tokenizer;
10 
11 import std.ascii;
12 import std.string;
13 import std.utf;
14 
15 version = V2;
16 // version = Java;
17 // version = IDL;
18 version = dollar_in_ident;
19 
20 class Token
21 {
22 	// very basic C++ tokenizer, interested only in:
23 	enum {
24 		Comment,
25 		Newline,
26 		Identifier,
27 		Number,
28 		String,
29 
30 		Namespace,
31 		Struct,
32 		Class,
33 		Union,
34 		Enum,
35 		
36 		Typedef, // 10
37 		Extern,
38 		Static,
39 		Const,
40 		__In,
41 		
42 		__Out,
43 		__Body,
44 		__Asm,
45 		__Declspec,
46 		If,
47 		
48 		Else,  // 20
49 		Do,
50 		While,
51 		For,
52 		Return,
53 		
54 		Break,
55 		Continue,
56 		Switch,
57 		Goto,
58 		Delete,
59 
60 		BraceL, // 30
61 		BraceR,
62 		BracketL,
63 		BracketR,
64 		ParenL,
65 		ParenR,
66 
67 		Equal,
68 		Unequal,
69 		LessThan,
70 		LessEq,
71 		
72 		GreaterThan, // 40
73 		GreaterEq,
74 		Unordered,
75 		LessGreater,
76 		LessEqGreater,
77 		
78 		UnordGreater,
79 		UnordGreaterEq,
80 		UnordLess,
81 		UnordLessEq,
82 		UnordEq,
83 
84 		Shl, // 50
85 		Shr,
86 		Comma,
87 		Asterisk,
88 		Ampersand,
89 		
90 		Assign,
91 		Dot,
92 		Elipsis,
93 		Colon,
94 		DoubleColon,
95 		
96 		Semicolon, // 60
97 		Tilde,
98 		Question,
99 		Exclamation,
100 		Deref,
101 		
102 		Plus,
103 		PlusPlus,
104 		Minus,
105 		MinusMinus,
106 		Div,
107 		
108 		Mod, // 70
109 		Xor,
110 		Or,
111 		OrOr,
112 		AmpAmpersand,
113 		
114 		AddAsgn,
115 		SubAsgn,
116 		MulAsgn,
117 		DivAsgn,
118 		ModAsgn,
119 		
120 		AndAsgn, // 80
121 		XorAsgn,
122 		OrAsgn,
123 		ShlAsgn,
124 		ShrAsgn,
125 
126 		PPinclude,
127 		PPdefine,
128 		PPundef,
129 		PPif,
130 		PPifdef,
131 		
132 		PPifndef, // 90
133 		PPelse,
134 		PPelif,
135 		PPendif,
136 		PPother,
137 		PPinsert, // helper for reparsing
138 
139 		Fis,
140 		FisFis,
141 		Macro,
142 		Other,
143 		
144 		EOF, // 100
145 		V1Tokens
146 	}
147 
148 version(V2)
149 {
150 	enum {
151 		New = V1Tokens,
152 		Static_if,
153 		Mixin,
154 		Case,
155 		Default,
156 		Operator,
157 		Version,
158 		Sizeof,
159 		This,
160 		Static_cast,
161 		Dynamic_cast,
162 		Reinterpret_cast,
163 		Const_cast,
164 		Empty,    // helper for unspecified identifier in declaration
165 		Interface,
166 		Template,
167 	}
168 }
169 version(Java)
170 {
171 	enum {
172 		Instanceof = V1Tokens,
173 	}
174 }
175 	static bool isPPToken(int type)
176 	{
177 		switch(type)
178 		{
179 			case PPinclude, PPdefine, PPundef, PPif, PPifdef, PPifndef, 
180 			     PPelse, PPelif, PPendif, PPother:
181 				return true;
182 			default:
183 				return false;
184 		}
185 	}
186 
187 	static bool needsTrailingSemicolon(int type)
188 	{
189 		switch(type)
190 		{
191 			case Class, Struct, Union, Enum, Typedef:
192 				return true;
193 			default:
194 				return false;
195 		}
196 	}
197 
198 	static string toString(int type)
199 	{
200 		switch(type)
201 		{
202 		case Namespace:	     return "namespace";
203 		case Struct:	     return "struct";
204 		case Class:	     return "class";
205 		case Union:	     return "union";
206 		case Enum:	     return "enum";
207 		case Typedef:	     return "typedef";
208 		case Extern:	     return "extern";
209 		case Static:	     return "static";
210 		case Const:	     return "const";
211 		case __In:	     return "__in";
212 		case __Out:	     return "__out";
213 		case __Body:	     return "__body";
214 
215 		case __Asm:	     return "__asm";
216 		case __Declspec:     return "__declspec";
217 		case If:	     return "if";
218 		case Else:	     return "else";
219 		case Do:	     return "do";
220 		case While:	     return "while";
221 		case For:	     return "for";
222 		case Return:	     return "return";
223 		case Break:	     return "break";
224 		case Continue:	     return "continue";
225 		case Switch:	     return "switch";
226 		case Goto:	     return "goto";
227 		case Delete:	     return "delete";
228 
229 		case BraceL:	     return "{";
230 		case BraceR:	     return "}";
231 		case BracketL:	     return "[";
232 		case BracketR:	     return "]";
233 		case ParenL:	     return "(";
234 		case ParenR:	     return ")";
235 
236 		case Equal:	     return "==";
237 		case Unequal:	     return "!=";
238 		case LessThan:	     return "<";
239 		case LessEq:	     return "<=";
240 		case GreaterThan:    return ">";
241 		case GreaterEq:	     return ">=";
242 
243 		case Unordered:	     return "!<>=";
244 		case LessGreater:    return "<>";
245 		case LessEqGreater:  return "<>=";
246 		case UnordGreater:   return "!<=";
247 		case UnordGreaterEq: return "!<";
248 		case UnordLess:	     return "!>=";
249 		case UnordLessEq:    return "!>";
250 		case UnordEq:	     return "!<>";
251 
252 		case Shl:	     return "<<";
253 		case Shr:	     return ">>";
254 		case Comma:	     return ",";
255 		case Asterisk:	     return "*";
256 		case Ampersand:	     return "&";
257 		case Assign:	     return "=";
258 		case Dot:	     return ".";
259 		case Elipsis:	     return "...";
260 		case Colon:	     return ":";
261 		case DoubleColon:    return "::";
262 		case Semicolon:	     return ";";
263 		case Tilde:	     return "~";
264 		case Question:	     return "?";
265 		case Exclamation:    return "!";
266 		case Deref:	     return "->";
267 		case Plus:	     return "+";
268 		case PlusPlus:	     return "++";
269 		case Minus:	     return "-";
270 		case MinusMinus:     return "--";
271 		case Div:	     return "/";
272 		case Mod:	     return "%";
273 		case Xor:	     return "^";
274 		case Or:	     return "|";
275 		case OrOr:	     return "||";
276 		case AmpAmpersand:   return "&&";
277 		case AddAsgn:	     return "+=";
278 		case SubAsgn:	     return "-=";
279 		case MulAsgn:	     return "*=";
280 		case DivAsgn:	     return "/=";
281 		case ModAsgn:	     return "%=";
282 		case AndAsgn:	     return "&=";
283 		case XorAsgn:	     return "^=";
284 		case OrAsgn:	     return "|=";
285 		case ShlAsgn:	     return "<<=";
286 		case ShrAsgn:	     return ">>=";
287 
288 		case PPinclude:	     return "#include";
289 		case PPdefine:	     return "#define";
290 		case PPundef:	     return "#undef";
291 		case PPif:	     return "#if";
292 		case PPifdef:	     return "#ifdef";
293 		case PPifndef:	     return "#ifndef";
294 		case PPelse:	     return "#else";
295 		case PPelif:	     return "#elif";
296 		case PPendif:	     return "#endif";
297 
298 		case Fis:	     return "#";
299 		case FisFis:	     return "##";
300 
301 version(V2)
302 {
303 		case New:	return "new";
304 		case Static_if: return "__static_if";
305 		case Mixin:	return "__mixin";
306 		case Case:	return "case";
307 		case Default:	return "default";
308 		case Operator:	return "operator";
309 		case Version:	return "version";
310 		case Sizeof:	return "sizeof";
311 		case This:	return "this";
312 		case Static_cast: return "static_cast";
313 		case Dynamic_cast: return "dynamic_cast";
314 		case Reinterpret_cast: return "reinterpret_cast";
315 		case Const_cast: return "const_cast";
316 		case Empty:	return "";
317 		case Newline:	return "\n";
318 		case Interface: return "interface";
319 		case Template: return "template";
320 
321 }
322 version(Java)
323 {
324 		case Instanceof: return "instanceof";
325 }
326 		case Identifier: return "<identifier>";
327 		case Number: return "<number>";
328 		case String: return "<string>";
329 		case EOF: return "EOF";
330 			
331 		// other types supposed to fail because no representation available
332 		case Macro:
333 		case PPinsert:
334 		case Comment:
335 		case PPother:
336 		case Other:
337 		default:
338 			assert(type == EOF); // always fails
339 			return "<unexpected>";
340 		}
341 	}
342 
343 	int type;
344 	int lineno;
345 	string text;
346 	string pretext;
347 }
348 
349 ///////////////////////////////////////////////////////////////////////
350 
351 bool contains(T)(ref T[] arr, T val)
352 {
353 	foreach(T t; arr)
354 		if (t == val)
355 			return true;
356 	return false;
357 }
358 
359 void addunique(T)(ref T[] arr, T val)
360 {
361 	if (!contains(arr, val))
362 		arr ~= val;
363 }
364 
365 ///////////////////////////////////////////////////////////////////////
366 
367 class Tokenizer
368 {
369 	this(string txt)
370 	{
371 		text = txt;
372 		reinit();
373 	}
374 
375 	void reinit()
376 	{
377 		lastIndent = "";
378 		countTokens = 0;
379 		pos = 0;
380 		if(text.length >= 3 && text[0] == 0xef && text[1] == 0xbb && text[2] == 0xbf)
381 			pos += 3; // skip utf8 header
382 		lineno = 1;
383 		lastCharWasNewline = true;
384 		skipNewline = true;
385 		keepBackSlashAtEOL = false;
386 		enableASMComment = false;
387 	}
388 
389 	void pushText(string txt)
390 	{
391 		if(txt.length > 0)
392 		{
393 			if (pos < text.length)
394 			{
395 				txtstack ~= text;
396 				posstack ~= pos;
397 			}
398 			text = txt;
399 			pos = 0;
400 		}
401 	}
402 	bool popText()
403 	{
404 		if(txtstack.length <= 0)
405 			return false;
406 		text = txtstack[$-1];
407 		pos  = posstack[$-1];
408 
409 		txtstack.length = txtstack.length - 1;
410 		posstack.length = posstack.length - 1;
411 		return true;
412 	}
413 
414 	bool eof()
415 	{
416 		return pos >= text.length && txtstack.length <= 0;
417 	}
418 	bool eof(int n)
419 	{
420 		// this call is used to check for a close newline, so it does not need to check the text stack
421 		return pos + n >= text.length;
422 	}
423 
424 	bool isNewline()
425 	{
426 		if (text[pos] == '\n' || text[pos] == '\r')
427 			return true;
428 		return false;
429 	}
430 
431 	void incPos()
432 	{
433 		pos++;
434 		if (pos >= text.length)
435 			popText();
436 	}
437 
438 	bool handleBackSlash()
439 	{
440 		if (eof(1) || text[pos] != '\\')
441 			return false;
442 
443 		while (!eof(1) && text[pos] == '\\')
444 		{
445 			if (text[pos+1] == '\r' && !eof(2) && text[pos+2] == '\n')
446 			{
447 				lineno++;
448 				incPos();
449 				incPos();
450 				incPos();
451 			}
452 			else if (text[pos+1] == '\n')
453 			{
454 				lineno++;
455 				incPos();
456 				incPos();
457 			}
458 			else
459 				return false;
460 			if(keepBackSlashAtEOL)
461 				curText ~= "\\\n";
462 		}
463 		return true;
464 	}
465 
466 	bool nextChar()
467 	{
468 		if (eof())
469 			return false;
470 
471 		handleBackSlash();
472 		if (text[pos] == '\r' && !eof(1) && text[pos+1] == '\n')
473 		{
474 			lineno++;
475 			incPos();
476 			lastCharWasNewline = true;
477 		}
478 		else if (text[pos] == '\n')
479 		{
480 			lineno++;
481 			lastCharWasNewline = true;
482 		}
483 		else
484 			lastCharWasNewline = false;
485 		curText ~= text[pos];
486 		incPos();
487 		if (eof())
488 			return false;
489 
490 		return true;
491 	}
492 
493 	int skipSpace()
494 	{
495 		bool collectIndent = lastCharWasNewline;
496 		if(collectIndent)
497 			lastIndent = "";
498 
499 		int lines = lineno;
500 		handleBackSlash();
501 	cont_spaces:
502 		while(!eof() && isWhite(text[pos]))
503 		{
504 			if (isNewline())
505 			{
506 				if (!skipNewline)
507 					break;
508 				else
509 				{
510 					collectIndent = true;
511 					lastIndent = "";
512 				}
513 			}
514 			else if(collectIndent)
515 				lastIndent ~= text[pos];
516 
517 			nextChar();
518 		}
519 		if (!keepBackSlashAtEOL)
520 		{
521 			if(!eof(2) && text[pos] == '\\' && (text[pos+1] == '\n' || text[pos+1] == '\r'))
522 			{
523 				nextChar();
524 				nextChar();
525 				goto cont_spaces;
526 			}
527 		}
528 		else if (handleBackSlash())
529 			goto cont_spaces;
530 
531 		return lineno - lines;
532 	}
533 
534 	void skipLine()
535 	{
536 		while(!eof() && !isNewline())
537 			nextChar();
538 		if(!eof() && skipNewline)
539 			nextChar();
540 	}
541 
542 	bool skipString()
543 	{
544 		int sep = text[pos];
545 		nextChar();
546 		while(!eof() && text[pos] != sep)
547 		{
548 version(IDL) {} else {
549 			if(isNewline())
550 				throw new Exception("newline in string constant");
551 }
552 			if(!handleBackSlash())
553 			{
554 				if(text[pos] == '\\')
555 					nextChar();
556 				nextChar();
557 			}
558 		}
559 		if (eof())
560 			return false;
561 		nextChar();
562 		return true;
563 	}
564 
565 	bool skipIdent()
566 	{
567 		if (eof())
568 			return false;
569 		if(!isAlpha(text[pos]) && text[pos] != '_')
570 			return false;
571 		nextChar();
572 		return skipAlnum();
573 	}
574 
575 	bool skipAlnum()
576 	{
577 		version(dollar_in_ident)
578 			while(!eof() && (isAlphaNum(text[pos]) || text[pos] == '_' || text[pos] == '$'))
579 				nextChar();
580 		else
581 			while(!eof() && (isAlphaNum(text[pos]) || text[pos] == '_'))
582 				nextChar();
583 		return true;
584 	}
585 
586 	bool skipNumber()
587 	{
588 		nextChar();
589 		skipAlnum();
590 		if(eof() || text[pos] != '.')
591 			return true;
592 		// float
593 		nextChar();
594 		skipAlnum();
595 		if(text[pos-1] == 'E' || text[pos-1] == 'e' || text[pos-1] == 'P' || text[pos-1] == 'p')
596 			if(text[pos] == '+' || text[pos] == '-')
597 			{
598 				nextChar();
599 				skipAlnum();
600 			}
601 		return true;
602 	}
603 
604 	void skipComment()
605 	{
606 		while(nextChar())
607 		{
608 			if (text[pos] == '*' && pos + 1 < text.length && text[pos+1] == '/')
609 			{
610 				nextChar();
611 				nextChar();
612 				break;
613 			}
614 		}
615 	}
616 
617 	int checkChar(int def, charTypes...)()
618 	{
619 		int ch = text[pos];
620 		int isChar = true;
621 		bool found = false;
622 		foreach(int ct; charTypes)
623 		{
624 			if(isChar)
625 				found = (ct == ch);
626 			else if(found)
627 			{
628 				nextChar();
629 				return ct;
630 			}
631 			isChar = !isChar;
632 		}
633 		return def;		
634 	}
635 
636 	int checkNextChar(int def, charTypes...)()
637 	{
638 		// we were always sitting on a valid character, and we don't want appending "\\\n",
639 		//  so we do the relevant parts of nextChar() here
640 		lastCharWasNewline = false;
641 		curText ~= text[pos];
642 		incPos();
643 		if(!eof())
644 		{
645 			return checkChar!(def, charTypes);
646 		}
647 		return def;		
648 	}
649 	int contNextChar(int iftype, charTypes...)(Token tok)
650 	{
651 		if(tok.type == iftype && !eof())
652 		{
653 			tok.type = checkChar!(iftype, charTypes);
654 		}
655 		return tok.type;
656 	}
657 
658 	static int identifierToKeyword(string ident)
659 	{
660 		switch(ident)
661 		{
662 		case "namespace": return Token.Namespace;
663 		case "struct":    return Token.Struct;
664 		case "class":     return Token.Class;
665 		case "union":     return Token.Union;
666 		case "enum":      return Token.Enum;
667 		case "typedef":   return Token.Typedef;
668 		case "extern":    return Token.Extern;
669 		case "static":    return Token.Static;
670 		case "const":     return Token.Const;
671 		case "__in":      return Token.__In;
672 		case "__out":     return Token.__Out;
673 		case "__body":    return Token.__Body;
674 		case "_asm":      return Token.__Asm;
675 		case "__asm":     return Token.__Asm;
676 		case "__declspec":  return Token.__Declspec;
677 		case "if":        return Token.If;
678 		case "else":      return Token.Else;
679 		case "while":     return Token.While;
680 		case "do":        return Token.Do;
681 		case "for":       return Token.For;
682 		case "switch":    return Token.Switch;
683 		case "goto":      return Token.Goto;
684 		case "return":    return Token.Return;
685 		case "continue":  return Token.Continue;
686 		case "break":     return Token.Break;
687 		case "delete":    return Token.Delete;
688 version(V2)
689 {
690 		case "case":      return Token.Case;
691 		case "default":   return Token.Default;
692 		case "__static_if": return Token.Static_if;
693 		case "__mixin":   return Token.Mixin;
694 		case "__version": return Token.Version;
695 		case "sizeof":    return Token.Sizeof;
696 		case "operator":  return Token.Operator;
697 		case "new":       return Token.New;
698 		case "this":      return Token.This;
699 		case "static_cast": return Token.Static_cast;
700 		case "dynamic_cast": return Token.Dynamic_cast;
701 		case "reinterpret_cast": return Token.Reinterpret_cast;
702 		case "const_cast": return Token.Const_cast;
703 		case "interface": return Token.Interface;
704 		case "template":  return Token.Template;
705 }
706 version(Java)
707 {
708 		case "instanceof": return Token.Instanceof;
709 }
710 		default:          return Token.Identifier;
711 		}
712 	}
713 	
714 	bool next(Token tok)
715 	{
716 		curText = "";
717 		bool startOfLine = pos <= 0 || text[pos-1] == '\n' || text[pos-1] == '\r';
718 		if(skipSpace() > 0)
719 			startOfLine = true;
720 
721 		tok.pretext = curText;
722 		tok.lineno = lineno;
723 
724 		if(eof())
725 		{
726 			tok.text = "";
727 			tok.type = Token.EOF;
728 			return false;
729 		}
730 
731 		curText = "";
732 		tok.type = Token.Other;
733 
734 		switch(text[pos])
735 		{
736 		case '{':  tok.type = Token.BraceL;      nextChar(); break;
737 		case '}':  tok.type = Token.BraceR;      nextChar(); break;
738 		case '[':  tok.type = Token.BracketL;    nextChar(); break;
739 		case ']':  tok.type = Token.BracketR;    nextChar(); break;
740 		case '(':  tok.type = Token.ParenL;      nextChar(); break;
741 		case ')':  tok.type = Token.ParenR;      nextChar(); break;
742 		case ',':  tok.type = Token.Comma;       nextChar(); break;
743 		case '~':  tok.type = Token.Tilde;       nextChar(); break;
744 		case '?':  tok.type = Token.Question;    nextChar(); break;
745 		case '\r':
746 		case '\n': tok.type = Token.Newline;     nextChar(); break;
747 
748 		case '=':  tok.type = checkNextChar!(Token.Assign,      '=', Token.Equal); break;
749 		case '*':  tok.type = checkNextChar!(Token.Asterisk,    '=', Token.MulAsgn); break;
750 		case '%':  tok.type = checkNextChar!(Token.Mod,         '=', Token.ModAsgn); break;
751 		case '^':  tok.type = checkNextChar!(Token.Xor,         '=', Token.XorAsgn); break;
752 		case '&':  tok.type = checkNextChar!(Token.Ampersand,   '=', Token.AndAsgn, '&', Token.AmpAmpersand); break;
753 		case '|':  tok.type = checkNextChar!(Token.Or,          '=', Token.OrAsgn, '|', Token.OrOr); break;
754 		case ':':  tok.type = checkNextChar!(Token.Colon,       ':', Token.DoubleColon); break;
755 		case '-':  tok.type = checkNextChar!(Token.Minus,       '=', Token.SubAsgn, '>', Token.Deref, '-', Token.MinusMinus); break;
756 		case '+':  tok.type = checkNextChar!(Token.Plus,        '=', Token.AddAsgn, '+', Token.PlusPlus); break;
757 
758 		case '<':  
759 			tok.type = checkNextChar!(Token.LessThan,    '=', Token.LessEq, '<', Token.Shl, '>', Token.LessGreater); 
760 			contNextChar!(Token.Shl, '=', Token.ShlAsgn)(tok);
761 			contNextChar!(Token.LessGreater, '=', Token.LessEqGreater)(tok);
762 			break;
763 		case '>':  
764 			tok.type = checkNextChar!(Token.GreaterThan, '=', Token.GreaterEq, '>', Token.Shr); 
765 			contNextChar!(Token.Shr, '=', Token.ShrAsgn)(tok);
766 			break;
767 
768 		case '!':  
769 			// !  -> != !< !>
770 			tok.type = checkNextChar!(Token.Exclamation, '=', Token.Unequal, '<', Token.UnordGreaterEq, '>', Token.UnordLessEq); 
771 			// !< -> !<= !<>
772 			contNextChar!(Token.UnordGreaterEq, '=', Token.UnordGreater, '>', Token.UnordEq)(tok);
773 			// !<> -> !<>=
774 			contNextChar!(Token.UnordEq, '=', Token.Unordered)(tok);
775 			// !> -> !>=
776 			contNextChar!(Token.UnordLessEq, '=', Token.UnordLess)(tok);
777 			break;
778 
779 		case '.':
780 			tok.type = checkNextChar!(Token.Dot,         '.', Token.Elipsis);
781 			if(tok.type == Token.Elipsis)
782 			{
783 				if(text[pos] != '.')
784 					throw new Exception("missing third '.' for '...'");
785 				nextChar();
786 			}
787 			break;
788 			
789 		case '#':
790 			nextChar();
791 			if(!startOfLine)
792 			{
793 				if(text[pos] == '#')
794 				{
795 					tok.type = Token.FisFis;
796 					nextChar();
797 				}
798 				else
799 					tok.type = Token.Fis;
800 			}
801 			else if(skipSpace() == 0)
802 			{
803 				int identpos = pos;
804 				if (skipIdent())
805 				{
806 					string ident = text[identpos..pos];
807 					switch(ident)
808 					{
809 					case "include": tok.type = Token.PPinclude; break;
810 					case "define":  tok.type = Token.PPdefine;  break;
811 					case "undef":   tok.type = Token.PPundef;   break;
812 					case "ifdef":   tok.type = Token.PPifdef;   break;
813 					case "ifndef":  tok.type = Token.PPifndef;  break;
814 					case "if":      tok.type = Token.PPif;      break;
815 					case "elif":    tok.type = Token.PPelif;    break;
816 					case "else":    tok.type = Token.PPelse;    break;
817 					case "endif":   tok.type = Token.PPendif;   break;
818 					default:        tok.type = Token.PPother;   break;
819 					}
820 				}
821 			}
822 			break;
823 		
824 		case '0','1','2','3','4','5','6','7','8','9':
825 			skipNumber();
826 			tok.type = Token.Number;
827 			break;
828 
829 		case 'L':
830 			if(nextChar() && (text[pos] == '\"' || text[pos] == '\''))
831 				goto case '\"';
832 			skipAlnum();
833 			tok.type = Token.Identifier;
834 			break;
835 
836 		case 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z':
837 			goto case;
838 		case 'A','B','C','D','E','F','G','H','I','J','K',    'M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z':
839 			goto case;
840 		case '_':
841 			skipIdent();
842 			string ident = curText;
843 			if(ppOnly)
844 				tok.type = Token.Identifier;
845 			else
846 				tok.type = identifierToKeyword(ident);
847 			break;
848 		
849 		case '$':
850 			nextChar();
851 			skipAlnum();
852 			tok.type = Token.Macro;
853 			break;
854 		case ';':
855 			if (enableASMComment)
856 			{
857 				skipLine();
858 				tok.type = Token.Comment;
859 			}
860 			else
861 			{
862 				tok.type = Token.Semicolon;
863 				nextChar();
864 			}
865 			break;
866 		case '/':
867 			nextChar();
868 			tok.type = Token.Div;
869 			if(!eof())
870 			{
871 				if(text[pos] == '/')
872 				{
873 					skipLine();
874 					tok.type = Token.Comment;
875 				}
876 				else if(text[pos] == '*')
877 				{
878 					skipComment();
879 					tok.type = Token.Comment;
880 				}
881 				else if(text[pos] == '=')
882 				{
883 					nextChar();
884 					tok.type = Token.DivAsgn;
885 				}
886 			}
887 			break;
888 		
889 		case '\'':
890 		case '\"':
891 			skipString();
892 			tok.type = Token.String;
893 			break;
894 
895 		default:   
896 			tok.type = Token.Other; 
897 			nextChar();
898 			break;
899 		}
900 
901 		countTokens++;
902 		tok.text = curText;
903 		return true;
904 	}
905 
906 	string lastIndent;
907 	string text;
908 	string curText;
909 
910 	int[] posstack;
911 	string[] txtstack;
912 
913 	uint pos;
914 	int lineno;
915 	int countTokens;
916 	bool lastCharWasNewline;
917 	bool skipNewline;
918 	bool keepBackSlashAtEOL;
919 	bool enableASMComment;
920 	
921 	static bool ppOnly;
922 }
923