//modified by Germain Malenfant (germain.malenfant@searchevolution.com) %{ unit DLex; (* DelphiLex.L: lexical analyzer for Delphi Pascal, adapted to TP Lex, adapted from A.G. Sources by Thierry Coq, 1997 *) %} %{ (* * lex input file for delphi pascal scanner * extensions: to ways to spell "external" and "->" ok for "^". * *) %} %{ (* Note: Keywords are determined by scanning a keyword table, rather than including the keyword patterns in the Lex source which is done in the original version of this file. I prefer this method, because it makes the grammar itself more readable (handling case-insensitive keywords in Lex is quite cumbersome, e.g., you will have to write something like [Aa][Nn][Dd] to match the keyword `and'), and also produces a more (space-) efficient analyzer (184 states and 375 transitions for the keyword pattern version, against only 40 states and 68 transitions for the keyword table version). *) interface uses SysUtils, Classes, YaccUtilities, LexLib; type TDelphiLexer = Class( TCustomLexer) public chaine : string; directiveToken : integer; // utility functions function upper(str : String) : String; function isKeyword(id : string; var token : integer) : boolean; function isDirective(id : string; var token : integer) : boolean; function istype(id : string; var token : integer) : boolean; // Lexer main functions function yylex : Integer; override; procedure yyaction( yyruleno : integer); procedure commenteof; end; // TDelphiLexer; implementation uses Delphi; // implementation of RDD Yaccer. //=============================================== // KeyWords and Directives Arrays, constant. //=============================================== //please respect alphabetic order for binary search const id_len = 20; type Ident = string[id_len]; const (* table of Delphi Pascal keywords: *) no_of_keywords = 80; keyword : array [1..no_of_keywords] of Ident = ( { asm keyword was retired for now } 'AND', 'ARRAY', 'AS', 'ASM', 'AUTOMATED', 'BEGIN', 'CASE', 'CLASS', 'CONST', 'CONSTRUCTOR', 'CONTAINS', 'DEPRECATED', 'DESTRUCTOR', 'DISPINTERFACE', 'DIV', 'DO', 'DOWNTO', 'ELSE', 'END', 'EXCEPT', 'EXPORTS', 'EXTERNAL', 'FILE', 'FINALIZATION', 'FINALLY', 'FOR', 'FORWARD', 'FUNCTION', 'GOTO', 'IF', 'IMPLEMENTATION', 'IMPLEMENTS', 'IN', 'INHERITED', 'INITIALIZATION','INLINE', 'INTERFACE', 'IS', 'LABEL', 'LIBRARY', 'MOD', 'NIL', 'NOT', 'OBJECT', 'OF', 'ON', 'OR', 'OUT', 'PACKAGE', 'PACKED', 'PLATFORM', 'PRIVATE', 'PROCEDURE', 'PROGRAM', 'PROPERTY', 'PROTECTED', 'PUBLIC', 'PUBLISHED', 'RAISE', 'RECORD', 'REPEAT', 'REQUIRES', 'RESOURCESTRING','SET', 'SHL', 'SHR', 'STRING', 'STRINGRESOURCE','THEN', 'THREADVAR', 'TO', 'TRY', 'TYPE', 'UNIT', 'UNTIL', 'USES', 'VAR', 'WHILE', 'WITH', 'XOR' ); keyword_token : array [1..no_of_keywords] of integer = ( _AND_, _ARRAY_, _AS_, _ASM_, _AUTOMATED_, _BEGIN_, _CASE_, _CLASS_, _CONST_, _CONSTRUCTOR_, _CONTAINS_, _DEPRECATED_, _DESTRUCTOR_, _DISPINTERFACE_, _DIV_, _DO_, _DOWNTO_, _ELSE_, _END_, _EXCEPT_, _EXPORTS_, _EXTERNAL_, _FILE_, _FINALIZATION_, _FINALLY_, _FOR_, _FORWARD_, _FUNCTION_, _GOTO_, _IF_, _IMPLEMENTATION_, _IMPLEMENTS_, _IN_, _INHERITED_, _INITIALIZATION_,_INLINE_, _INTERFACE_, _IS_, _LABEL_, _LIBRARY_, _MOD_, _NIL_, _NOT_, _OBJECT_, _OF_, _ON_, _OR_, _OUT_, _PACKAGE_, _PACKED_, _PLATFORM_, _PRIVATE_, _PROCEDURE_, _PROGRAM_, _PROPERTY_, _PROTECTED_, _PUBLIC_, _PUBLISHED_, _RAISE_, _RECORD_, _REPEAT_, _REQUIRES_, _RESOURCESTRING_,_SET_, _SHL_, _SHR_, _STRING_, _STRINGRESOURCE_,_THEN_, _THREADVAR_, _TO_, _TRY_, _TYPE_, _UNIT_, _UNTIL_, _USES_, _VAR_, _WHILE_, _WITH_, _XOR_ ); (* table of Delphi Pascal Directives : *) no_of_directives = 31; directive : array [1..no_of_directives] of Ident = ( 'ABSOLUTE', 'ABSTRACT', 'ASSEMBLER', 'AT' , 'CDECL', 'DEFAULT', 'DISPID', 'DYNAMIC', 'EXPORT', 'FAR', 'INDEX', 'LOCAL', 'MESSAGE', 'NAME', 'NEAR', 'NODEFAULT', 'OVERLOAD', 'OVERRIDE', 'PASCAL', 'READ', 'READONLY', 'REGISTER', 'REINTRODUCE', 'RESIDENT', 'SAFECALL', 'STDCALL', 'STORED', 'VARARGS', 'VIRTUAL', 'WRITE', 'WRITEONLY' ); directive_token : array [1..no_of_directives] of integer = ( _ABSOLUTE_, _ABSTRACT_, _ASSEMBLER_, _AT_, _CDECL_, _DEFAULT_, _DISPID_, _DYNAMIC_, _EXPORT_, _FAR_, _INDEX_, _LOCAL_, _MESSAGE_, _NAME_, _NEAR_, _NODEFAULT_, _OVERLOAD_, _OVERRIDE_, _PASCAL_, _READ_, _READONLY_, _REGISTER_, _REINTRODUCE_, _RESIDENT_, _SAFECALL_, _STDCALL_, _STORED_, _VARARGS_, _VIRTUAL_, _WRITE_, _WRITEONLY_ ); (* table of Delphi Pascal Type : *) no_of_type = 22; (* string is a keyword *) typestr : array [1..no_of_type] of Ident = ( 'ANSISTRING', 'BOOLEAN', 'BYTE', 'CHAR', 'COMP', 'CURRENCY', 'DOUBLE', 'EXTENDED', 'INT64', 'INTEGER', 'LONGINT', 'LONGWORD', 'OLEVARIANT', 'PCHAR', 'REAL48', 'SHORTINT', 'SINGLE', 'SMALLINT', 'VARIANT', 'WIDECHAR', 'WIDESTRING', 'WORD' ); type_token : array [1..no_of_type] of integer = ( _ANSISTRING_, _BOOLEAN_, _BYTE_, _CHAR_, _COMP_, _CURRENCY_, _DOUBLE_, _EXTENDED_, _INT64_, _INTEGER_, _LONGINT_, _LONGWORD_, _OLEVARIANT_, _PCHAR_, _REAL48_, _SHORTINT_, _SINGLE_, _SMALLINT_, _VARIANT_, _WIDECHAR_, _WIDESTRING_, _WORD_ ); procedure TDelphiLexer.commenteof; begin writeln(yyErrorfile, 'unexpected EOF inside comment at line ' +intToStr( yylineno)); end(*commenteof*); function TDelphiLexer.upper(str : String) : String; (* converts str to uppercase *) var i : integer; begin for i := 1 to length(str) do str[i] := upCase(str[i]); upper := str end(*upper*); function TDelphiLexer.istype(id : string; var token : integer) : boolean; (* checks whether id is Pascal predefined type; if so, returns corresponding token number in token *) var m,n,k : integer; begin id:=upper(id); (* binary search: *) m := 1; n := no_of_type; while m<=n do begin k := m+(n-m) div 2; if id=typestr[k] then begin istype := true; token := type_token[k]; exit end else if id>typestr[k] then m := k+1 else n := k-1 end; istype := false end; function TDelphiLexer.isKeyword(id : string; var token : integer) : boolean; (* checks whether id is Pascal keyword; if so, returns corresponding token number in token *) var m, n, k : integer; begin id := upper(id); (* binary search: *) m := 1; n := no_of_keywords; while m<=n do begin k := m+(n-m) div 2; if id=keyword[k] then begin isKeyword := true; token := keyword_token[k]; exit end else if id>keyword[k] then m := k+1 else n := k-1 end; isKeyword := false end(*isKeyword*); function TDelphiLexer.isDirective(id : string; var token : integer) : boolean; (* checks whether id is Pascal directive; if so, returns corresponding token number in token *) var m, n, k : integer; begin id := upper(id); (* binary search: *) m := 1; n := no_of_directives; while m<=n do begin k := m+(n-m) div 2; if id=directive[k] then begin isDirective := true; token := directive_token[k]; directiveToken := token; exit end else if id>directive[k] then m := k+1 else n := k-1 end; isDirective := false end(*isDirective*); %} NQUOTE [^'] %% %{ var c : char; kw : integer; %} [aA][sS][mM] begin repeat c := get_char; if (c='e') or (c='E') then begin c := get_char; if (c='n') or (c='N') then begin c := get_char; if (c='d') or (c='D') then begin c := get_char; if c=';' then begin return(DELPHI._ASM_); exit; end else if c=#0 then begin commenteof; end else begin unget_char(c); unget_char(c); unget_char(c); end; end else if c=#0 then begin commenteof; exit; end else begin unget_char(c); unget_char(c);end; end else if c=#0 then begin commenteof; exit; end else unget_char(c); end else if c=#0 then begin commenteof; exit; end; until false; end; [a-zA-Z_]([a-zA-Z_0-9])* if isKeyword(yytext, kw) then begin writeln( yyOutput, 'keyword : "'+ yyText+ '"'); return(kw) end else if isDirective(yytext, kw) then begin writeln( yyOutput, 'directive : "'+ yyText+ '"'); {case kw of 1: return (DELPHI._ABSOLUTE_); 2: return (DELPHI._ABSTRACT_); 3: return (DELPHI._ASSEMBLER_); 4: return (DELPHI._AT_); 5: return (DELPHI._CDECL_); 6: return (DELPHI._DEFAULT_); 7: return (DELPHI._DISPID_); 8: return (DELPHI._DYNAMIC_); 9: return (DELPHI._EXPORT_); 10: return (DELPHI._FAR_); 11: return (DELPHI._INDEX_); 12: return (DELPHI._LOCAL_); 13: return (DELPHI._MESSAGE_); 14: return (DELPHI._NAME_); 15: return (DELPHI._NEAR_); 16: return (DELPHI._NODEFAULT_); 17: return (DELPHI._OVERLOAD_); 18: return (DELPHI._OVERRIDE_); 19: return (DELPHI._PASCAL_); 20: return (DELPHI._READ_); 21: return (DELPHI._READONLY_); 22: return (DELPHI._REGISTER_); 23: return (DELPHI._REINTRODUCE_); 24: return (DELPHI._RESIDENT_); 25: return (DELPHI._SAFECALL_); 26: return (DELPHI._STDCALL_); 27: return (DELPHI._STORED_); 28: return (DELPHI._VARARGS_); 29: return (DELPHI._VIRTUAL_); 30: return (DELPHI._WRITE_); 31: return (DELPHI._WRITEONLY_); end;} return(kw); end else if istype(yytext, kw) then begin writeln( yyOutput, 'type : "'+ yyText+ '"'); {case kw of 1:return (DELPHI._ANSISTRING_); 2:return (DELPHI._BOOLEAN_); 3:return (DELPHI._BYTE_); 4:return (DELPHI._CHAR_); 5:return (DELPHI._COMP_); 6:return (DELPHI._CURRENCY_); 7:return (DELPHI._DOUBLE_); 8:return (DELPHI._EXTENDED_); 9:return (DELPHI._INT64_); 10:return (DELPHI._INTEGER_); 11:return (DELPHI._LONGINT_); 12:return (DELPHI._LONGWORD_); 13:return (DELPHI._OLEVARIANT_); 14:return (DELPHI._PCHAR_); 15:return (DELPHI._REAL48_); 16:return (DELPHI._SHORTINT_); 17:return (DELPHI._SINGLE_); 18:return (DELPHI._SMALLINT_); 19:return (DELPHI._STRING_); 20:return (DELPHI._VARIANT_); 21:return (DELPHI._WIDECHAR_); 22:return (DELPHI._WIDESTRING_); 23:return (DELPHI._WORD_); end;} return(ID); end else begin writeln( yyOutput, 'ID : "'+ yyText+ '"'); return(ID) end; ":=" return(ASSIGNMENT); '' return(STRING_CONST); '({NQUOTE}|'')+' return(STRING_CONST); (#[0-9]+)|(#\$[0-9a-fA-F]+) return(STRING_CONST); ":" return(COLON); "," return(COMMA); "." return(DOT); ".." return(DOTDOT); "=" return(EQUAL); ">=" return(GE); ">" return(GT); "[" return(LBRAC); "<=" return(LE); "(" return(LPAREN); "<" return(LT); "-" return(MINUS); "<>" return(NOTEQUAL); "+" return(PLUS); "]" return(RBRAC); "$"[0-9a-fA-F]+ return(UNSIGNED_HEXA_INTEGER); [0-9]+ return(UNSIGNED_INTEGER); [0-9]+"."[0-9]+ return(UNSIGNED_REAL); ")" return(RPAREN); ";" return(SEMICOLON); "@" return(AT); "#" return(DIESE); "//" begin repeat c := get_char; case c of #10 : exit; #0 : begin commenteof; exit; end; end; until false end; "/" return(SLASH); "*" return(STAR); "**" return(STARSTAR); "->" | "^" return(UPARROW); "(*" begin repeat c := get_char; case c of '*' : begin c := get_char; if c=')' then exit else unget_char(c) end; #0 : begin commenteof; exit; end; end; until false end; "{" begin repeat c := get_char; case c of '}' : exit; #0 : begin commenteof; exit; end; end; until false end; [ \n\t\f] ; . begin writeln( yyOutput, 'ILLEGAL TEXT : "'+yyText+'"'); return(ILLEGAL); end;