c2d.tokutil source code

1 // This file is part of Visual D
2 //
3 // Visual D integrates the D programming language into Visual Studio
4 // Copyright (c) 2010 by Rainer Schuetze, All Rights Reserved
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt
8 
9 module c2d.tokutil;
10 
11 import c2d.tokenizer;
12 import c2d.dlist;
13 import c2d.dgutil;
14 
15 import std.string;
16 import std.ascii;
17 import std.array;
18 //static import std.regexp;
19 static import std.regex;
20 static import std.conv;
21 
22 //////////////////////////////////////////////////////////////////////////////
23 alias DList!(c2d.tokenizer.Token) TokenList;
24 alias DListIterator!(c2d.tokenizer.Token) TokenIterator;
25 
26 alias object.AssociativeArray!(string, const(TokenList)) _wa2; // fully instantiate type info for TokenList[string]
27 
28 struct TokenRange
29 {
30 	TokenIterator start;
31 	TokenIterator end;
32 }
33 
34 struct SubMatch
35 {
36 	string ident;
37 	TokenIterator start;
38 	TokenIterator end;
39 }
40 
41 //////////////////////////////////////////////////////////////////////////////
42 Token createToken(string pretext, string text, int type, int lineno)
43 {
44 	Token tok = new Token();
45 	tok.pretext = pretext;
46 	tok.text = text;
47 	tok.type = type;
48 	tok.lineno = lineno;
49 	return tok;
50 }
51 
52 Token createToken(Token tok)
53 {
54 	Token ntok = new Token();
55 	ntok.pretext = tok.pretext;
56 	ntok.text = tok.text;
57 	ntok.type = tok.type;
58 	ntok.lineno = tok.lineno;
59 	return ntok;
60 }
61 
62 bool isCommentToken(Token tok, bool checkPP = true)
63 {
64 	return tok.type == Token.Comment || tok.type == Token.Newline || (checkPP && Token.isPPToken(tok.type));
65 }
66 
67 void skipComments(ref TokenIterator tokIt, bool skipPP = true)
68 {
69 	while (!tokIt.atEnd() && isCommentToken(*tokIt, skipPP))
70 		tokIt.advance();
71 }
72 
73 void comment_line(ref TokenIterator tokIt)
74 {
75 	TokenIterator it = tokIt + 1;
76 	string txt = tokIt.pretext ~ "// " ~ tokIt.text;
77 	while(!it.atEnd() && it.pretext.indexOf('\n') < 0 && it.type != Token.EOF)
78 	{
79 		txt ~= it.pretext ~ it.text;
80 		it.advance();
81 	}
82 	if(!it.atEnd())
83 	{
84 		tokIt.eraseUntil(it);
85 		tokIt.pretext = txt ~ tokIt.pretext;
86 	}
87 	else
88 		tokIt.text = "// " ~ tokIt.text;
89 }
90 
91 void nextToken(ref TokenIterator tokIt, bool skipPP = true)
92 {
93 	tokIt.advance();
94 	skipComments(tokIt, skipPP);
95 }
96 
97 void checkToken(ref TokenIterator tokIt, int type, bool skipPP = true)
98 {
99 	skipComments(tokIt, skipPP);
100 	
101 	if(tokIt.atEnd() ||tokIt.type != type)
102 	{
103 		string txt = tokIt.atEnd() ? "EOF" : tokIt.text;
104 		int lineno = tokIt.atEnd() ? (tokIt-1).atEnd() ? -1 : tokIt[-1].lineno : tokIt.lineno;
105 		throwException(lineno, "expected " ~ Token.toString(type) ~ " instead of " ~ txt);
106 	}
107 	nextToken(tokIt, skipPP);
108 }
109 
110 void checkOperator(ref TokenIterator tokIt)
111 {
112 	// TODO: allows any token 
113 	if(tokIt.type == Token.BracketL)
114 	{
115 		nextToken(tokIt);
116 		checkToken(tokIt, Token.BracketR);
117 	}
118 	else
119 		nextToken(tokIt);
120 }
121 
122 string tokensToIdentifier(TokenIterator start, TokenIterator end)
123 {
124 	string ident;
125 	while(!start.atEnd() && start != end)
126 	{
127 		if(ident.length > 0 && start.text.length > 0)
128 			if(isAlphaNum(ident[$-1]) && isAlphaNum(start.text[0]))
129 				ident ~= " ";
130 		ident ~= start.text;
131 		++start;
132 	}
133 	return ident;
134 }
135 
136 void identifierToKeywords(TokenIterator start, TokenIterator end)
137 {
138 	while(!start.atEnd() && start != end)
139 	{
140 		if(start.type == Token.Identifier)
141 			start.type = Tokenizer.identifierToKeyword(start.text);
142 		++start;
143 	}
144 }
145 
146 void identifierToKeywords(TokenList list)
147 {
148 	return identifierToKeywords(list.begin(), list.end());
149 }
150 
151 //////////////////////////////////////////////////////////////////////////////
152 TokenList copyTokenList(TokenIterator start, TokenIterator end, bool cloneTokens = true)
153 {
154 	TokenList tokenList = new TokenList;
155 	for(TokenIterator it = start; it != end; ++it)
156 	{
157 		Token tok = cloneTokens ? createToken(*it) : *it;
158 		tokenList.append(tok);
159 	}
160 	return tokenList;
161 }
162 
163 TokenList copyTokenList(TokenRange range, bool cloneTokens = true)
164 {
165 	return copyTokenList(range.start, range.end, cloneTokens);
166 }
167 
168 TokenList copyTokenList(TokenList tokenList, bool cloneTokens = true)
169 {
170 	return copyTokenList(tokenList.begin(), tokenList.end(), cloneTokens);
171 }
172 
173 TokenIterator insertTokenList(TokenIterator insBefore, TokenList tokenList)
174 {
175 	if(tokenList.empty())
176 		return insBefore;
177 	TokenIterator endit = tokenList.end() - 1;
178 	if(endit.type == Token.EOF && !insBefore.atEnd())
179 	{
180 		insBefore.pretext = endit.pretext ~ insBefore.pretext;
181 		endit.erase;
182 	}
183 	return insBefore.insertListBefore(tokenList);
184 }
185 
186 string tokenListToString(TokenIterator start, TokenIterator end, bool checkSpaceBetweenIdentifiers = false,
187 			 bool normalizePreText = false)
188 {
189 	string text;
190 	string prevtext;
191 	for(TokenIterator tokIt = start; tokIt != end; ++tokIt)
192 	{
193 		Token tok = *tokIt;
194 		string txt = normalizePreText ? tok.text : tok.pretext ~ tok.text;
195 		if(checkSpaceBetweenIdentifiers || normalizePreText)
196 		{
197 			if (prevtext == "__")
198 				txt = tok.text;
199 			else if (tok.text == "__")
200 				txt = "";
201 			else if (txt.length && prevtext.length)
202 			{
203 				char prevch = prevtext[$-1];
204 				char ch = txt[0];
205 				if((isAlphaNum(ch) || ch == '_') && (isAlphaNum(prevch) || prevch == '_'))
206 					txt = " " ~ txt;
207 			}
208 			prevtext = tok.text;
209 		}
210 		text ~= txt;
211 	}
212 	return text;
213 }
214 
215 string tokenListToString(TokenList tokenList, bool checkSpaceBetweenIdentifiers = false)
216 {
217 	return tokenListToString(tokenList.begin(), tokenList.end(), checkSpaceBetweenIdentifiers);
218 }
219 
220 bool compareTokenList(TokenIterator start1, TokenIterator end1, TokenIterator start2, TokenIterator end2)
221 {
222 	TokenIterator it1 = start1;
223 	TokenIterator it2 = start2;
224 	for( ; it1 != end1 && it2 != end2; ++it1, ++it2)
225 		if(it1.text != it2.text)
226 			return false;
227 
228 	return it1 == end1 && it2 == end2;
229 }
230 
231 //////////////////////////////////////////////////////////////////////////////
232 void reindentList(TokenIterator start, TokenIterator end, int indent, int tabsize)
233 {
234 	for(TokenIterator tokIt = start; tokIt != end; ++tokIt)
235 		tokIt.pretext = reindent(tokIt.pretext, indent, tabsize);
236 }
237 
238 void reindentList(TokenList tokenList, int indent, int tabsize)
239 {
240 	return reindentList(tokenList.begin(), tokenList.end(), indent, tabsize);
241 }
242 
243 //////////////////////////////////////////////////////////////////////////////
244 bool isClosingBracket(int type)
245 {
246 	return (type == Token.BraceR || type == Token.BracketR || type == Token.ParenR);
247 }
248 
249 bool isOpeningBracket(int type)
250 {
251 	return (type == Token.BraceL || type == Token.BracketL || type == Token.ParenL);
252 }
253 
254 bool isBracketPair(dchar ch1, dchar ch2)
255 {
256 	switch(ch1)
257 	{
258 		case '{': return ch2 == '}';
259 		case '}': return ch2 == '{';
260 		case '(': return ch2 == ')';
261 		case ')': return ch2 == ')';
262 		case '[': return ch2 == ']';
263 		case ']': return ch2 == '[';
264 		default:  return false;
265 	}
266 }
267 
268 //////////////////////////////////////////////////////////////////////////////
269 // iterator on token after closing bracket
270 bool advanceToClosingBracket(ref TokenIterator it, TokenIterator stopIt)
271 {
272 	TokenIterator prevIt = it; // for debugging
273 	int lineno = it.lineno;
274 	int open = it.type;
275 	int close;
276 	switch(open)
277 	{
278 	case Token.ParenL:
279 		close = Token.ParenR;
280 		break;
281 	case Token.BraceL:
282 		close = Token.BraceR;
283 		break;
284 	case Token.BracketL:
285 		close = Token.BracketR;
286 		break;
287 	default:
288 		throwException(lineno, "opening bracket expected instead of " ~ it.text);
289 	}
290 
291 	int level = 1;
292 	++it;
293 	while (level > 0)
294 	{
295 		if(it == stopIt)
296 			return false;
297 		if(it.atEnd())
298 			throwException(lineno, "end of file while looking for closing bracket");
299 		if(it.type == open)
300 			level++;
301 		else if(it.type == close)
302 			level--;
303 		++it;
304 	}
305 	return true;
306 }
307 
308 bool advanceToClosingBracket(ref TokenIterator it)
309 {
310 	TokenIterator noStop;
311 	return advanceToClosingBracket(it, noStop);
312 }
313 
314 // iterator on token with opening bracket
315 bool retreatToOpeningBracket(ref TokenIterator it, TokenIterator stopIt)
316 {
317 	int lineno = it.lineno;
318 	int open;
319 	int close = it.type;
320 	switch(close)
321 	{
322 	case Token.ParenR:
323 		open = Token.ParenL;
324 		break;
325 	case Token.BraceR:
326 		open = Token.BraceL;
327 		break;
328 	case Token.BracketR:
329 		open = Token.BracketL;
330 		break;
331 	default:
332 		throwException(lineno, "closing bracket expected instead of " ~ it.text);
333 	}
334 
335 	int level = 1;
336 	while (level > 0)
337 	{
338 		--it;
339 		if(it == stopIt)
340 			return false;
341 		if(it.atEnd())
342 			throwException(lineno, "beginnig of file while looking for opening bracket");
343 		if(it.type == close)
344 			level++;
345 		else if(it.type == open)
346 			level--;
347 	}
348 	return true;
349 }
350 
351 bool retreatToOpeningBracket(ref TokenIterator it)
352 {
353 	TokenIterator noStop;
354 	return retreatToOpeningBracket(it, noStop);
355 }
356 
357 //////////////////////////////////////////////////////////////////////////////
358 static void scanAny(TL)(ref TL tokenList, string text, int lineno = 1, bool combinePP = true)
359 {
360 	Tokenizer tokenizer = new Tokenizer(text);
361 	tokenizer.keepBackSlashAtEOL = true;
362 	tokenizer.lineno = lineno;
363 
364 	try
365 	{
366 		string pretext;
367 		Token pptok = new Token;
368 		Token tok;
369 		do
370 		{
371 			tok = new Token;
372 			tokenizer.next(tok);
373 
374 			if(combinePP && Token.isPPToken(tok.type))
375 			{
376 				tokenizer.skipNewline = false;
377 				while(tokenizer.next(pptok) && pptok.type != Token.Newline)
378 					tok.text ~= pptok.pretext ~ pptok.text;
379 				tokenizer.skipNewline = true;
380 				tok.text ~= pptok.pretext;
381 				if(pptok.type == Token.Newline)
382 					tok.text ~= "\n";
383 			}
384 			switch(tok.type)
385 			{
386 			case Token.Comment:
387 				if(startsWith(tok.text, ";")) // aasm comment?
388 					pretext ~= tok.pretext ~ "//" ~ tok.text;
389 				else
390 					pretext ~= tok.pretext ~ tok.text;
391 				break;
392 
393 			case Token.__Asm:
394 				tokenizer.enableASMComment = true;
395 				tokenizer.skipNewline = false;
396 				goto default;
397 
398 			case Token.BraceR:
399 				if(tokenizer.enableASMComment)
400 				{
401 					tokenizer.enableASMComment = false;
402 					tokenizer.skipNewline = true;
403 				}
404 				goto default;
405 
406 			default:
407 				tok.pretext = pretext ~ tok.pretext;
408 				static if(is(TL == Token[]))
409 					tokenList ~= tok;
410 				else static if(is(TL == string[]))
411 					tokenList ~= tok.text;
412 				else
413 					tokenList.append(tok);
414 				pretext = "";
415 				break;
416 			}
417 		} 
418 		while (tok.type != Token.EOF);
419 
420 	}
421 	catch(Exception e)
422 	{
423 		e.msg = "(" ~ std.conv.text(tokenizer.lineno) ~ "):" ~ e.msg;
424 		throw e;
425 	}
426 }
427 
428 TokenList scanText(string text, int lineno = 1, bool combinePP = true)
429 {
430 	TokenList tokenList = new TokenList;
431 	scanAny(tokenList, text, lineno, combinePP);
432 	return tokenList;
433 }
434 
435 void scanTextArray(TYPE)(ref TYPE[] tokens, string text, int lineno = 1, bool combinePP = true)
436 {
437 	scanAny(tokens, text, lineno, combinePP);
438 
439 	static if(is(TYPE == string))
440 	{
441 		while(tokens.length > 0 && tokens[$-1].length == 0)
442 			tokens = tokens[0..$-1];
443 	}
444 	else
445 	{
446 		while(tokens.length > 0 && tokens[$-1].text.length == 0)
447 			tokens = tokens[0..$-1];
448 	}
449 }
450 
451 ///////////////////////////////////////////////////////////////////////
452 int findSubmatch(ref SubMatch[] submatch, string ident)
453 {
454 	for(int i = 0; i < submatch.length; i++)
455 		if(submatch[i].ident == ident)
456 			return i;
457 	return -1;
458 }
459 
460 ///////////////////////////////////////////////////////////////////////
461 bool findTokenSequence(TokenIterator it, string[] search, bool checkBracketsSearch, bool checkBracketsMatch,
462                        string stopText, ref TokenRange match, ref SubMatch[] submatch)
463 {
464 	if(search.length == 0)
465 	{
466 		match.start = it;
467 		match.end = it;
468 		return true;
469 	}
470 
471 	void addSubmatch(string search, TokenIterator start, TokenIterator end)
472 	{
473 		SubMatch smatch;
474 		smatch.ident = search;
475 		smatch.start = start;
476 		smatch.end = end;
477 		submatch ~= smatch;
478 	}
479 
480 	bool compareTokens(TokenIterator start, TokenIterator end, ref TokenIterator it)
481 	{
482 		for(TokenIterator sit = start; !sit.atEnd() && sit != end; ++sit)
483 		{
484 			string sittext = strip(sit.text);
485 			if(sittext.length == 0)
486 				continue;
487 			while(!it.atEnd() && strip(it.text).length == 0)
488 				++it;
489 			if(it.atEnd())
490 				return false;
491 			if(strip(it.text) != sittext)
492 				return false;
493 			++it;
494 		}
495 		return true;
496 	}
497 	bool compareSubmatch(ref SubMatch sm, string txt)
498 	{
499 		string s = tokenListToString(sm.start, sm.end);
500 		return strip(s) == strip(txt);
501 	}
502 
503 	size_t p = 0;
504 	while(p < search.length && search[p].length == 0)
505 		p++;
506 	if(p >= search.length)
507 		return false;
508 
509 	size_t prevsubmatchLength = submatch.length;
510 
511 	while(!it.atEnd() && (stopText.length == 0 || it.text != stopText || search[p] == stopText))
512 	{
513 		bool dollar = indexOf(search[p], '$') >= 0;
514 		if(strip(it.text) == search[p] || dollar)
515 		{
516 			TokenIterator mit = it + (dollar ? 0 : 1);
517 			size_t i = p + (dollar ? 0 : 1);
518 			while(i < search.length && search[i].length == 0)
519 				i++;
520 			while(!mit.atEnd() && i < search.length)
521 			{
522 				string mittext = strip(mit.text);
523 				if(mittext.length == 0)
524 				{
525 					++mit;
526 					continue;
527 				}
528 				if(startsWith(search[i], "$"))
529 				{
530 					int idx = findSubmatch(submatch, search[i]);
531 					if(idx >= 0)
532 					{
533 						if(!compareTokens(submatch[idx].start, submatch[idx].end, mit))
534 							goto Lnomatch;
535 						goto LnoAdvance;
536 					}
537 					else if(startsWith(search[i], "$_num"))
538 					{
539 						if(mit.type != Token.Number)
540 							break;
541 						addSubmatch(search[i], mit, mit + 1);
542 					}
543 					else if(startsWith(search[i], "$_string"))
544 					{
545 						if(mit.type != Token.String)
546 							break;
547 						addSubmatch(search[i], mit, mit + 1);
548 					}
549 					else if(startsWith(search[i], "$_ident"))
550 					{
551 						if(mit.type != Token.Identifier)
552 							break;
553 						addSubmatch(search[i], mit, mit + 1);
554 					}
555 					else if(startsWith(search[i], "$_dotident"))
556 					{
557 						if(mit.type != Token.Identifier)
558 							break;
559 
560 						TokenIterator start = mit;
561 						while(!(mit + 1).atEnd() && !(mit + 2).atEnd() && 
562 						       mit[1].type == Token.Dot && mit[2].type == Token.Identifier)
563 						{
564 							mit.advance();
565 							mit.advance();
566 						}
567 						addSubmatch(search[i], start, mit + 1);
568 					}
569 					else if(startsWith(search[i], "$_expr"))
570 					{
571 						// ok to allow empty expression?
572 						TokenRange tailmatch;
573 						if (!findTokenSequence(mit, search[i+1 .. $], true, true, ";",
574 								       tailmatch, submatch))
575 						       break;
576 						addSubmatch(search[i], mit, tailmatch.start);
577 						mit = tailmatch.end;
578 						i = search.length;
579 						break;
580 					}
581 					else if(startsWith(search[i], "$_not") && i + 1 < search.length)
582 					{
583 						if(startsWith(search[i + 1], "$_ident"))
584 						{
585 							if(mit.type == Token.Identifier)
586 								break;
587 						}
588 						else if(startsWith(search[i + 1], "$_num"))
589 						{
590 							if(mit.type == Token.Number)
591 								break;
592 						}
593 						else if(startsWith(search[i], "$_string"))
594 						{
595 							if(mit.type != Token.String)
596 								break;
597 						}
598 						else if(mittext == search[i + 1])
599 							break;
600 						addSubmatch(search[i], mit, mit + 1);
601 						i++;
602 					}
603 					else if(startsWith(search[i], "$_opt"))
604 					{
605 						i++;
606 						if(i < search.length && mittext == search[i])
607 							addSubmatch(search[i-1], mit, mit + 1);
608 						else
609 						{
610 							addSubmatch(search[i-1], mit, mit);
611 							goto LnoAdvance; // nothing matched
612 						}
613 					}
614 					else
615 					{
616 						TokenRange tailmatch;
617 						if (!findTokenSequence(mit, search[i+1 .. $], checkBracketsMatch, checkBracketsMatch, 
618 								       stopText, tailmatch, submatch))
619 							break;
620 						addSubmatch(search[i], mit, tailmatch.start);
621 						mit = tailmatch.end;
622 						i = search.length;
623 						break;
624 					}
625 				}
626 				else
627 				{
628 					ptrdiff_t idx = indexOf(search[i], '$');
629 					if(idx < 0)
630 					{
631 						if (mittext != search[i])
632 							break;
633 					}
634 					else if(mittext.length < idx)
635 						break;
636 					else if(mittext[0 .. idx] != search[i][0 .. idx])
637 						break;
638 					else
639 					{
640 						int sidx = findSubmatch(submatch, search[i][idx .. $]);
641 						if(sidx < 0)
642 						{
643 							// create dummy token and list to add a submatch
644 							Token subtok = createToken("", mittext[idx .. $], Token.Identifier, mit.lineno);
645 							TokenList sublist = new TokenList;
646 							sublist.append(subtok);
647 							addSubmatch(search[i][idx .. $], sublist.begin(), sublist.end());
648 						}
649 						else if(!compareSubmatch(submatch[sidx], mittext[idx .. $]))
650 							break;
651 					}
652 				}
653 				++mit;
654 			LnoAdvance:
655 				i++;
656 				while(i < search.length && search[i].length == 0)
657 					i++;
658 			}
659 			if(i >= search.length)
660 			{
661 				match.start = it;
662 				match.end = mit;
663 				return true;
664 			}
665 		Lnomatch:
666 			submatch.length = prevsubmatchLength;
667 		}
668 		if(checkBracketsSearch && isOpeningBracket(it.type))
669 			advanceToClosingBracket(it);
670 		else if(checkBracketsSearch && isClosingBracket(it.type))
671 			break;
672 		else 
673 			it.advance();
674 	}
675 	return false;
676 }
677 
678 TokenList createReplacementTokenList(RTYPE) (RTYPE[] replace, TokenRange match, ref SubMatch[] submatch)
679 {
680 	TokenList tokenList = new TokenList;
681 	for(int i = 0; i < replace.length; i++)
682 	{
683 		string reptext;
684 		string pretext;
685 		int type = Token.PPinsert;
686 		static if (is(RTYPE == Token))
687 		{
688 			reptext = replace[i].text;
689 			pretext = replace[i].pretext;
690 			type = replace[i].type;
691 			if(reptext == "$" && i + 1 < replace.length && replace[i+1].pretext == "")
692 			{
693 				reptext ~= replace[i + 1].text;
694 				i++;
695 			}
696 		}
697 		else
698 		{
699 			reptext = replace[i];
700 		}
701 
702 		if(reptext == "$*")
703 			tokenList.appendList(copyTokenList(match));
704 
705 		else if(startsWith(reptext, "$"))
706 		{
707 			int idx = findSubmatch(submatch, reptext);
708 			if(idx < 0)
709 				throwException("no submatch for " ~ reptext);
710 
711 			TokenList list = copyTokenList(submatch[idx].start, submatch[idx].end);
712 			if(!list.empty && !list.begin().pretext.length) //&& pretext.length)
713 				list.begin().pretext = pretext; // ~ list.begin().pretext;
714 			tokenList.appendList(list);
715 		}
716 		else
717 		{
718 			Token tok = createToken(pretext, reptext, type, 0);
719 			tokenList.append(tok);
720 		}
721 	}
722 	return tokenList;
723 }
724 
725 
726 int _replaceTokenSequence(RTYPE)(TokenList srctoken, string[] search, RTYPE[] replace, bool checkBrackets)
727 {
728 	if(search.length == 0)
729 		return 0;
730 
731 	for(int i = 0; i < search.length; i++)
732 		search[i] = strip(search[i]);
733 
734 	int cntReplacements = 0;
735 	TokenIterator it = srctoken.begin();
736 	for( ; ; )
737 	{
738 		TokenRange match;
739 		SubMatch[] submatch;
740 		if(!findTokenSequence(it, search, false, checkBrackets, "", match, submatch))
741 			break;
742 
743 		string pretext = match.start.pretext;
744 		match.start.pretext = "";
745 		TokenList tokenList = createReplacementTokenList(replace, match, submatch);
746 
747 		if(!tokenList.empty())
748 			tokenList.begin().pretext = pretext ~ tokenList.begin().pretext;
749 
750 		srctoken.remove(match.start, match.end);
751 		srctoken.insertListBefore(match.end, tokenList);
752 		
753 		it = match.end;
754 		// avoid recursing into the replacement?
755 		cntReplacements++;
756 	}
757 	return cntReplacements;
758 }
759 
760 int replaceTokenSequence(TokenList srctoken, string[] search, string[] replace, bool checkBrackets)
761 {
762 	return _replaceTokenSequence(srctoken, search, replace, checkBrackets);
763 }
764 
765 int replaceTokenSequence(TokenList srctoken, string search, string replace, bool checkBrackets)
766 {
767 	string[] searchTokens;
768 	scanTextArray!(string)(searchTokens, search);
769 	Token[] replaceTokens;
770 	scanTextArray!(Token)(replaceTokens, replace);
771 
772 	return _replaceTokenSequence(srctoken, searchTokens, replaceTokens, checkBrackets);
773 }
774 
775 ///////////////////////////////////////////////////////////////////////
776 
777 TokenList scanArgument(ref TokenIterator it)
778 {
779 	TokenIterator start = it;
780 	
781 	while(!it.atEnd() && it.type != Token.Comma && it.type != Token.ParenR)
782 	{
783 		if(it.type == Token.ParenL)
784 			advanceToClosingBracket(it);
785 		else
786 			it.advance();
787 
788 		if(it.atEnd())
789 			throwException(start.lineno, "unterminated macro invocation");
790 	}
791 
792 	TokenList tokenList = new TokenList;
793 	for( ; start != it; ++start)
794 		tokenList.append(*start);
795 
796 	return tokenList;
797 }
798 
799 void replaceArgument(ref TokenIterator defIt, TokenList list, void delegate(bool, TokenList) expandList)
800 {
801 	// defIt on identifer to replace
802 	string pretext = defIt.pretext;
803 	int lineno = 0;
804 	if(!list.empty())
805 		lineno = list.begin().lineno;
806 
807 	if(pretext.length > 0)
808 	{
809 		defIt.insertBefore(createToken(pretext, "", Token.Comment, defIt.lineno));
810 	}
811 	defIt.erase();
812 	if(!defIt.atBegin() && defIt[-1].type == Token.Fis)
813 	{
814 		if(expandList)
815 		{
816 			list = copyTokenList(list);
817 			expandList(true, list);
818 		}
819 		// TODO: should create escape sequences?
820 		string insText = "\"" ~ strip(tokenListToString(list)) ~ "\"";
821 		Token tok = createToken("", insText, Token.String, defIt[-1].lineno);
822 		defIt.retreat();
823 		defIt.insertAfter(tok);
824 		defIt.erase(); // remove '#'
825 	}
826 	else
827 	{
828 		bool org = ((!defIt.atBegin() && defIt[-1].type == Token.FisFis) || (!defIt.atEnd() && defIt.type == Token.FisFis));
829 		TokenList insList = copyTokenList(list);
830 		if(!org && expandList)
831 			expandList(true, insList);
832 
833 		TokenIterator ins = defIt;
834 		insertTokenList(ins, insList);
835 	}
836 }
837 
838 TokenList removeFisFis(TokenList tokens)
839 {
840 	int cntFisFis = 0;
841 	TokenIterator it = tokens.begin();
842 	while(!it.atEnd())
843 	{
844 		if(it.type == Token.FisFis)
845 		{
846 			it.erase();
847 			if(!it.atEnd())
848 				it.pretext = "";
849 			cntFisFis++;
850 		}
851 		it.advance();
852 	}
853 	if(cntFisFis == 0)
854 		return tokens;
855 	
856 	string text = strip(tokenListToString(tokens));
857 	TokenList newList = scanText(text, tokens.begin().lineno);
858 	return newList;
859 }
860 
861 // returns iterator after insertion, it is set to iterator at beginning of insertion
862 TokenIterator expandDefine(ref TokenIterator it, TokenList define, void delegate(bool, TokenList) expandList)
863 {
864 	define = copyTokenList(define, true);
865 	TokenIterator srcIt = it;
866 	TokenIterator defIt = define.begin() + 2;
867 	string pretext = srcIt.pretext;
868 	
869 	TokenList[string] args;
870 	checkToken(it, Token.Identifier, false);
871 	if(!defIt.atEnd() && defIt.type == Token.ParenL && defIt.pretext.length == 0)
872 	{
873 		nextToken(defIt, false);
874 		checkToken(it, Token.ParenL, false);
875 		if(defIt.type != Token.ParenR)
876 		{
877 			for( ; ; )
878 			{
879 				string ident = defIt.text;
880 				checkToken(defIt, Token.Identifier, false);
881 				args[ident] = scanArgument(it);
882 				if(defIt.type == Token.ParenR)
883 					break;
884 				checkToken(defIt, Token.Comma, false);
885 				checkToken(it, Token.Comma, false);
886 			}
887 		}
888 		checkToken(defIt, Token.ParenR, false);
889 		checkToken(it, Token.ParenR, false);
890 	}
891 
892 	if(!defIt.atEnd())
893 		defIt.pretext = stripLeft(defIt.pretext);
894 
895 	define.begin().eraseUntil(defIt);
896 	while(!defIt.atEnd())
897 	{
898 		defIt.pretext = replace(defIt.pretext, "\\\n", "\n");
899 		if(defIt.type == Token.Identifier)
900 			if(TokenList* list = defIt.text in args)
901 			{
902 				replaceArgument(defIt, *list, expandList);
903 				continue;
904 			}
905 		defIt.advance();
906 	}
907 
908 	if(!define.empty())
909 	{
910 		define = removeFisFis(define);
911 		srcIt.eraseUntil(it);  // makes srcIt invalid, but it stays valid
912 		srcIt = it;
913 		if(expandList)
914 		{
915 			expandList(false, define);
916 			it = insertTokenList(srcIt, define); // it is after insertion now
917 		}
918 		else
919 			it = insertTokenList(srcIt, define);
920 	}
921 	else
922 	{
923 		srcIt.eraseUntil(it);  // makes srcIt invalid, but it stays valid
924 		srcIt = it;
925 	}
926 	if(!it.atEnd())
927 		it.pretext = pretext ~ it.pretext;
928 	return srcIt;
929 }
930 
931 enum MixinMode
932 {
933 	ExpandDefine,
934 	ExpressionMixin,
935 	StatementMixin,
936 	LabelMixin
937 }
938 
939 // if createMixins === 0: 
940 void expandPPdefines(TokenList srctokens, TokenList[string] defines, MixinMode mixinMode)
941 {
942 	for(TokenIterator it = srctokens.begin(); !it.atEnd(); )
943 	{
944 		if(it.type == Token.PPdefine)
945 		{
946 			string text = strip(it.text);
947 			TokenList defList = scanText(text, it.lineno, false);
948 			TokenIterator tokIt = defList.begin();
949 			assume(tokIt[0].type == Token.PPdefine);
950 			assume(tokIt[1].type == Token.Identifier);
951 
952 			if(TokenList* list = tokIt[1].text in defines)
953 			{
954 				// remove trailing comments
955 				while((defList.end()-1).text.empty())
956 					(defList.end()-1).erase();
957 
958 				*list = defList;
959 				if(mixinMode != MixinMode.ExpandDefine)
960 				{
961 					it.text = createMixinFunction(defList, mixinMode);
962 					it.type = Token.PPinsert;
963 				}
964 				else
965 				{
966 					string pretext = it.pretext;
967 					it.erase();
968 					it.pretext = pretext ~ it.pretext;
969 					continue;
970 				}
971 			}
972 			else
973 			{
974 				// expand content of define
975 				tokIt = tokIt + 2;
976 				if(tokIt.text == "(" && tokIt.pretext == "")
977 					advanceToClosingBracket(tokIt);
978 				bool changed = false;
979 				while(!tokIt.atEnd())
980 				{
981 					if(tokIt.type == Token.Identifier)
982 						if(TokenList* list = tokIt.text in defines)
983 						{
984 							if(*list !is null)
985 							{
986 								if(mixinMode != MixinMode.ExpandDefine)
987 									invokeMixin(tokIt, mixinMode);
988 								else
989 									expandDefine(tokIt, *list, null);
990 								changed = true;
991 								continue;
992 							}
993 						}
994 					tokIt.advance();
995 				}
996 				if(changed)
997 					it.text = tokenListToString(defList) ~ "\n";
998 			}
999 		}
1000 		else if(it.type == Token.PPundef)
1001 		{
1002 			TokenList undefList = scanText(it.text, it.lineno, false);
1003 			TokenIterator tokIt = undefList.begin();
1004 			assume(tokIt[0].type == Token.PPundef);
1005 			assume(tokIt[1].type == Token.Identifier);
1006 	
1007 			if(TokenList* list = tokIt[1].text in defines)
1008 			{
1009 				string pretext = it.pretext;
1010 				*list = null;
1011 				it.erase();
1012 				it.pretext = pretext ~ it.pretext;
1013 				continue;
1014 			}
1015 		}
1016 		else if(it.type == Token.Identifier)
1017 		{
1018 			if(TokenList* list = it.text in defines)
1019 			{
1020 				if(*list !is null)
1021 				{
1022 					if(mixinMode != MixinMode.ExpandDefine)
1023 						invokeMixin(it, mixinMode);
1024 					else
1025 						expandDefine(it, *list, null);
1026 					continue;
1027 				}
1028 			}
1029 		}
1030 		it.advance();
1031 	}
1032 }
1033 
1034 void insertTokenBefore(ref TokenIterator it, Token tok, string tokpretext = "")
1035 {
1036 	it.pretext ~= tok.pretext;
1037 	tok.pretext = tokpretext;
1038 	
1039 	it.insertBefore(tok);
1040 }
1041 
1042 void invokeMixin(ref TokenIterator it, MixinMode mixinMode)
1043 {
1044 	TokenIterator start = it;
1045 	assume(it.type == Token.Identifier);
1046 	string text = "mixin(" ~ it.text;
1047 
1048 	nextToken(it);
1049 	if(it.type == Token.ParenL && it.pretext.length == 0)
1050 	{
1051 		nextToken(it, false);
1052 		text ~= "(";
1053 		if(it.type != Token.ParenR)
1054 		{
1055 			string sep;
1056 			for( ; ; )
1057 			{
1058 				TokenList arg = scanArgument(it);
1059 				string argtext = strip(tokenListToString(arg));
1060 				//text ~= sep ~ "\"" ~ argtext ~ "\"";
1061 				text ~= sep ~ argtext;
1062 				sep = ", ";
1063 
1064 				if(it.type == Token.ParenR)
1065 					break;
1066 				checkToken(it, Token.Comma, false);
1067 			}
1068 		}
1069 		text ~= ")";
1070 		nextToken(it, false);
1071 	}
1072 	text ~= ")";
1073 	if(mixinMode == MixinMode.StatementMixin && it.type != Token.Semicolon)
1074 		text ~= ";";
1075 	if(mixinMode == MixinMode.LabelMixin && it.type == Token.Colon)
1076 	{
1077 		text ~= ";";
1078 		it.erase();
1079 	}
1080 
1081 	start.insertBefore(createToken(start.pretext, text, Token.PPinsert, it.lineno));
1082 	start.eraseUntil(it);
1083 }
1084 
1085 string createMixinFunction(TokenList tokList, MixinMode mixinMode)
1086 {
1087 	TokenIterator it = tokList.begin();
1088 	checkToken(it, Token.PPdefine, false);
1089 	string ident = it.text;
1090 	checkToken(it, Token.Identifier, false);
1091 
1092 	string text = "static __string " ~ ident ~ "(";
1093 
1094 	int[string] argsUsage;
1095 	if(it.type == Token.ParenL && it.pretext.length == 0)
1096 	{
1097 		nextToken(it);
1098 		if(it.type != Token.ParenR)
1099 		{
1100 			string sep;
1101 			for( ; ; )
1102 			{
1103 				string arg = it.text;
1104 				checkToken(it, Token.Identifier, false);
1105 				argsUsage[arg] = 0;
1106 
1107 				text ~= sep ~ "__string " ~ arg;
1108 				sep = ", ";
1109 
1110 				if(it.type == Token.ParenR)
1111 					break;
1112 				checkToken(it, Token.Comma, false);
1113 			}
1114 		}
1115 		nextToken(it);
1116 	}
1117 	text ~= ") { return \"";
1118 
1119 	if(!it.atEnd())
1120 		it.pretext = stripLeft(it.pretext);
1121 
1122 	while(!it.atEnd())
1123 	{
1124 		if(it.type == Token.Identifier && (it.text in argsUsage))
1125 		{
1126 			text ~= it.pretext ~ "\" ~ " ~ it.text ~ " ~ \"";
1127 			argsUsage[it.text]++;
1128 		}
1129 		else
1130 			text ~= replace(it.pretext ~ it.text, "\"", "\\\"");
1131 		it.advance();
1132 	}
1133 
1134 	if(mixinMode == MixinMode.StatementMixin && !endsWith(text, ";"))
1135 		text ~= ";";
1136 	if(mixinMode == MixinMode.LabelMixin && !endsWith(text, ";"))
1137 	{
1138 		if (!endsWith(text, ":"))
1139 			text ~= ":";
1140 		text ~= ";";
1141 	}
1142 
1143 	text = replace(text, "##", "");
1144 	text ~= "\"; }\n";
1145 	return text;
1146 }
1147 
1148 void regexReplacePPdefines(TokenList srctokens, string[string] defines)
1149 {
1150 	for(TokenIterator it = srctokens.begin(); !it.atEnd(); )
1151 	{
1152 		if(it.type == Token.PPdefine)
1153 		{
1154 			string text = strip(it.text);
1155 			TokenList defList = scanText(text, it.lineno, false);
1156 			TokenIterator tokIt = defList.begin();
1157 			assume(tokIt[0].type == Token.PPdefine);
1158 			assume(tokIt[1].type == Token.Identifier);
1159 
1160 			string ident = tokIt[1].text;
1161 			foreach(re, s; defines)
1162 			{
1163 				//if(std.regexp.find(ident, re) >= 0)
1164 				auto rex = std.regex.regex(re);
1165 				if(!std.regex.match(ident, rex).empty())
1166 				{
1167 					// no arguments supported so far
1168 					string posttext = "\n";
1169 					TokenIterator endIt = defList.end();
1170 					while(endIt[-1].type == Token.Newline || endIt[-1].type == Token.EOF || endIt[-1].type == Token.Comment)
1171 					{
1172 						endIt.retreat();
1173 						posttext = endIt.pretext ~ endIt.text ~ posttext;
1174 					}
1175 					string toktext = tokenListToString(tokIt + 2, endIt);
1176 					string txt = s;
1177 					txt = replace(txt, "$id", ident);
1178 					txt = replace(txt, "$text", toktext);
1179 					it.pretext ~= tokIt.pretext;
1180 					it.text = txt ~ posttext;
1181 					it.type = Token.PPinsert;
1182 					break;
1183 				}
1184 			}
1185 		}
1186 		it.advance();
1187 	}
1188 }
1189 
1190 ///////////////////////////////////////////////////////////////////////
1191 
1192 string testDefine(string txt, TokenList[string] defines)
1193 {
1194 	TokenList list = scanText(txt);
1195 	expandPPdefines(list, defines, MixinMode.ExpandDefine);
1196 	// src.fixConditionalCompilation();
1197 	string res = tokenListToString(list);
1198 	return res;
1199 }
1200 
1201 unittest
1202 {
1203 	string txt = 
1204 		  "#define X(a) a\n"
1205 		~ "before X(1) after\n"
1206 		~ "#undef X\n"
1207 		~ "X(2)\n"
1208 		~ "#define X(a)\n"
1209 		~ "X(3)\n"
1210 		;
1211 
1212 	string exp = 
1213 		  "before 1 after\n"
1214 		~ "X(2)\n"
1215 		~ "\n"
1216 		;
1217 
1218 	TokenList[string] defines = [ "X" : null ];
1219 	string res = testDefine(txt, defines);
1220 	assume(res == exp);
1221 }
1222 
1223 unittest
1224 {
1225 	string txt = 
1226 		  "#define X(a) #a\n"
1227 		~ "X(1)\n"
1228 		~ "#undef X\n"
1229 		~ "#define X(a) x(#a)\n"
1230 		~ "X(1+2+3)\n"
1231 		;
1232 
1233 	string exp = 
1234 		  "\"1\"\n"
1235 		~ "x(\"1+2+3\")\n"
1236 		;
1237 
1238 	TokenList[string] defines = [ "X" : null ];
1239 	string res = testDefine(txt, defines);
1240 	assume(res == exp);
1241 }
1242 
1243 unittest
1244 {
1245 	string txt = 
1246 		  "#define X(a) a##1\n"
1247 		~ "X(2)\n"
1248 		;
1249 
1250 	string exp = 
1251 		"21\n"
1252 		;
1253 
1254 	TokenList[string] defines = [ "X" : null ];
1255 	string res = testDefine(txt, defines);
1256 	assume(res == exp);
1257 }
1258 
1259 ///////////////////////////////////////////////////////////////////////
1260 
1261 string testMixin(string txt, TokenList[string] mixins)
1262 {
1263 	TokenList list = scanText(txt);
1264 	expandPPdefines(list, mixins, MixinMode.StatementMixin);
1265 	// src.fixConditionalCompilation();
1266 	string res = tokenListToString(list);
1267 	return res;
1268 }
1269 
1270 unittest
1271 {
1272 	string txt = 
1273 		  "#define X(a) x = a;\n"
1274 		~ "X(b);\n"
1275 		;
1276 
1277 	string exp = 
1278 		  "static __string X(__string a) { return \"x = \" ~ a ~ \";\"; }\n"
1279 		~ "mixin(X(b));\n"
1280 		;
1281 
1282 	TokenList[string] mixins = [ "X" : null ];
1283 	string res = testMixin(txt, mixins);
1284 	assume(res == exp);
1285 }
1286 
1287 ///////////////////////////////////////////////////////////////////////
1288 
1289 string testReplace(string txt, TokenList[string] defines)
1290 {
1291 	TokenList list = scanText(txt);
1292 	expandPPdefines(list, defines, MixinMode.ExpandDefine);
1293 	// src.fixConditionalCompilation();
1294 	string res = tokenListToString(list);
1295 	return res;
1296 }
1297 
1298 unittest
1299 {
1300 	string txt = 
1301 		  "  if (list_freelist) {\n"
1302 		~ "    list--;\n"
1303 		~ "__static_if(MEM_DEBUG) {\n"
1304 		~ "    mem_setnewfileline(list,file,line);\n"
1305 		~ "}\n"
1306 		~ "  } else {\n"
1307 		~ "    list++;\n"
1308 		~ "  }\n"
1309 		;
1310 
1311 	string exp = 
1312 		  "  if (list_freelist) {\n"
1313 		~ "    list--;\n"
1314 		~ "  } else {\n"
1315 		~ "    list++;\n"
1316 		~ "  }\n"
1317 		;
1318 
1319 	TokenList list = scanText(txt);
1320 
1321 	replaceTokenSequence(list, "__static_if(MEM_DEBUG) { $1 } else { $2 }", "$2", true);
1322 	replaceTokenSequence(list, "__static_if(MEM_DEBUG) { $1 }", "", true);
1323 
1324 	string res = tokenListToString(list);
1325 	assume(res == exp);
1326 }
1327 
1328 unittest
1329 {
1330 	string txt = 
1331 		  "#define X(p) \\\n"
1332 		~ "    int p##1(); \\\n"
1333 		~ "    int p##2(); \\\n"
1334 		~ "    int p##3();\n"
1335 		~ "X(a)\n"
1336 		~ "X(b)\n"
1337 		~ "X(c)\n";
1338 
1339 	string exp = 
1340 		  "int a1(); \n"
1341 		~ "    int a2(); \n"
1342 		~ "    int a3();\n"
1343 		~ "int b1(); \n"
1344 		~ "    int b2(); \n"
1345 		~ "    int b3();\n"
1346 		~ "int c1(); \n"
1347 		~ "    int c2(); \n"
1348 		~ "    int c3();\n";
1349 
1350 	TokenList list = scanText(txt);
1351 
1352 	TokenList[string] defines = [ "X" : null ];
1353 	expandPPdefines(list, defines, MixinMode.ExpandDefine);
1354 
1355 	string res = tokenListToString(list);
1356 	assume(res == exp);
1357 }
1358 
1359 unittest 
1360 {
1361 	string txt = "0 a __ b c";
1362 	TokenList list = scanText(txt);
1363 	string ntxt = tokenListToString(list, true);
1364 	assume(ntxt == "0 ab c");
1365 }