Extract string property values from DFM files (Views: 27)
Problem/Question/Abstract: Does anybody know the name of the routine used in the IDE that enables control characters embedded into a string to be recognised. For example the characters 'Line 1'#13#10'Line 2' are recognised by the compiler as a single string literal. I'd like to build a similar facility into an application. Is a single routine used for this or is it embedded somewhere in the parser of the compiler? Answer: I'm pretty sure that the compiler uses an internal routine for this which is not accessible to us mere mortals. Perhaps you can extract something useful from the unit below. I wrote it to extract string property values from DFM files. It is a work in progress, so if you want to use it for the same purpose be aware that you may have DFMs it will not be able to digest without modifications. unit DFMParser; interface uses classes, sysutils; type TBaseParser = class private FText: string; FCurrent, FAnchor: Integer; FToken: string; protected procedure Error(const S: string); overload; procedure Error(const fmt: string; const A: array of const); overload; procedure DropAnchor; procedure NextToken; procedure NextChar; procedure SkipWhitespace; procedure SkipToEol; procedure SkipTo(ch: Char); procedure SkipToString(const S: string); function EndOfText: Boolean; function IsTokenChar: Boolean; function IsWhiteSpace: Boolean; function CurrentChar: Char; function LastWord: string; function ParseEncodedChar: Char; function ParseQuotedString: string; function ParseStringValue: string; public procedure Parse; virtual; abstract; constructor Create(const S: string); virtual; property Token: string read FToken; end; TParsePropertyEvent = procedure(const aComponentName, aPropertyName, aPropertyValue: string) of object; TDFMParser = class(TBaseParser) private FParsePropertyEvent: TParsePropertyEvent; protected procedure ParseComponent; procedure ParseProperty(const componentName: string); procedure ParsePropertyString(const componentName, propertyName: string); function TokenIsObject: Boolean; function IsEndToken: Boolean; procedure DoPropertyEvent(const componentName, propertyname, propvalue: string); public procedure Parse; override; property OnParseProperty: TParsePropertyEvent read FParsePropertyEvent write FParsePropertyEvent; end; EDFMParserError = class(Exception); TTranslationItemEvent = procedure(const name, value: string) of object; TTranslationParser = class(TBaseParser) private FTranslationItemEvent: TTranslationItemEvent; procedure ParseStringConstant; procedure SkipWhitespaceAndComments; procedure DoTranslationItem(const name, value: string); public constructor Create(const S: string); override; procedure Parse; override; property OnTranslationItem: TTranslationItemEvent read FTranslationItemEvent write FTranslationItemEvent; end; implementation uses charsets; const quote = ''''; constructor TBaseParser.Create(const S: string); begin FText := S; FCurrent := 1; end; function TBaseParser.CurrentChar: Char; begin result := FText[FCurrent]; end; procedure TBaseParser.DropAnchor; begin FAnchor := FCurrent; end; function TBaseParser.EndOfText: Boolean; begin result := FCurrent > Length(FText); end; procedure TBaseParser.Error(const S: string); begin raise EPArserError.Create(S); end; procedure TBaseParser.Error(const fmt: string; const A: array of const); begin Error(Format(fmt, A)); end; function TBaseParser.IsTokenChar: Boolean; begin result := (Currentchar in Charsets.IdentifierChars) or (CurrentChar = '.'); end; function TBaseParser.IsWhiteSpace: Boolean; begin result := Currentchar in [#1..#32]; end; function TBaseParser.LastWord: string; begin Assert(FAnchor <= FCurrent); result := Copy(FText, FAnchor, FCurrent - FAnchor); end; procedure TBaseParser.NextChar; begin Inc(FCurrent); if EndOfText then Error('Unexpected end of text'); end; procedure TBaseParser.NextToken; begin SkipWhitespace; DropAnchor; while not EndOfText and IsTokenChar do Inc(FCurrent); FToken := LastWord; end; procedure TBaseParser.SkipTo(ch: Char); begin while not EndOfText and (Currentchar <> ch) do NextChar; Inc(FCurrent); end; procedure TBaseParser.SkipToString(const S: string); var P: PChar; begin p := StrPos(@FText[FCurrent], Pchar(S)); if Assigned(p) then FCurrent := p - PChar(FText) + 1 + Length(S) else Error('Expected string "%s" not found', [s]); end; procedure TBaseParser.SkipToEol; begin while not EndOfText and (FText[FCurrent] <> #10) do Inc(FCurrent); end; procedure TBaseParser.SkipWhitespace; begin while not EndOfText and IsWhiteSpace do Inc(FCurrent); end; function TBaseParser.ParseQuotedString: string; begin Assert(CurrentChar = quote); Result := ''; repeat NextChar; {skip leading quote} DropAnchor; while CurrentChar <> quote do NextChar; Result := Result + LastWord; NextChar; if CurrentChar = quote then Result := Result + quote; {literal quote} until CurrentChar <> quote; SkipWhitespace; end; function TBaseParser.ParseEncodedChar: Char; var allowed: Charsets.TCharset; n: Integer; begin Assert(CurrentChar = '#'); NextChar; DropAnchor; if CurrentChar = '$' then begin allowed := CHarsets.HexNumerals; NextChar; end else allowed := Charsets.IntegerChars; while CurrentChar in allowed do NextChar; n := StrToInt(LastWord); if n > High(Byte) then Error('Encountered UNICODE character in string, cannot handle that.'); Result := Char(n); end; function TBaseParser.ParseStringValue: string; begin Result := ''; while True do case CurrentChar of quote: Result := Result + ParseQuotedString; '#': Result := Result + ParseEncodedChar; '+': begin NextChar; SkipWhitespace; end; else Break; end; end; { TDFMParser } procedure TDFMParser.DoPropertyEvent(const componentName, propertyname, propvalue: string); begin if Assigned(FParsePropertyEvent) then FParsePropertyEvent(componentName, propertyname, propvalue); end; function TDFMParser.IsEndToken: Boolean; begin result := Token = 'end'; end; procedure TDFMParser.Parse; begin while not EndOfText do begin ParseComponent; SkipWhitespace; end; end; procedure TDFMParser.ParseComponent; var componentName: string; begin if FToken = '' then NextToken; if not TokenIsObject then Error('Expected: inherited or object, found : %s', [Token]); NextToken; componentName := Token; SkipToEol; repeat NextToken; if TokenIsObject then ParseComponent else if not IsEndToken then ParseProperty(componentName); until IsEndToken or EndOfText; if IsEndToken then FToken := ''; end; procedure TDFMParser.ParseProperty(const componentName: string); var propname: string; begin propname := Token; SkipWhitespace; if CurrentChar <> '=' then Error('Expected: =, found %s', [Currentchar]); NextChar; SkipWhitespace; case CurrentChar of '{': SkipTo('}'); '(': SkipTo(')'); '[': SkipTo(']'); quote, '#': ParsePropertyString(componentName, propname); else SkipToEol end; end; procedure TDFMParser.ParsePropertyString(const componentName, propertyName: string); var propvalue: string; begin propvalue := ParseStringValue; if propvalue <> '' then DoPropertyEvent(componentName, propertyname, propvalue); end; function TDFMParser.TokenIsObject: Boolean; begin Result := (Token = 'inherited') or (Token = 'object') end; { TTranslationParser } constructor TTranslationParser.Create(const S: string); const resStr = 'resourcestring'; var lS: string; resourceStringPos: Integer; n1, n2: Integer; begin {Isolate the resourcestring section. We expect only one} lS := LowerCase(S); resourceStringPos := Pos(resStr, lS); if resourceStringPos = 0 then inherited Create('') else begin {look for an $ifdef german} n1 := Pos('{$ifdef german', lS); if n1 > 0 then begin {look for the following $else} Delete(lS, 1, n1 - 1); n2 := Pos('{$else}', lS); if n2 = 0 then Error('Malformed $IFDEF...$ELSE encountered, $ELSE not found'); Delete(lS, 1, n2 - 1); Inc(n1, n2 - 1); {look for the $ENDIF} n2 := Pos('{$endif}', lS); if n2 = 0 then Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found'); inherited Create(Copy(S, n1, n2 - 1)); end else begin {look for an $ifndef german} n1 := Pos('{$ifndef german', lS); if n1 = 0 then inherited Create('') else begin {in the $ifndef german construct the resourcestring keyword often comes after the $ifndef.} if n1 < resourceStringPos then n1 := resourceStringPos + Length(resstr); Delete(lS, 1, n1 - 1); {look for the $ENDIF} n2 := Pos('{$endif}', lS); if n2 = 0 then Error('Malformed $IFDEF...$ENDIF encountered, $ENDIF not found'); inherited Create(Copy(S, n1, n2 - 1)); end; end; end; end; procedure TTranslationParser.DoTranslationItem(const name, value: string); begin if Assigned(FTranslationItemEvent) then FTranslationItemEvent(name, value); end; procedure TTranslationParser.Parse; begin while not EndOfText do begin ParseStringConstant; SkipWhitespace; end; end; procedure TTranslationParser.ParseStringConstant; var name, value: string; begin SkipWhitespaceAndComments; if EndOfText then Exit; NextToken; name := Token; SkipWhitespaceAndComments; if EndOfText then Exit; if CurrentChar <> '=' then Error('Expected: =, found "%s"', [CurrentChar]); NextChar; SkipWhitespaceAndComments; if EndOfText then Exit; value := ParseStringValue; SkipWhiteSpace; if not EndOfText and (CurrentChar = ';') then NextChar; DoTranslationItem(name, value); end; procedure TTranslationParser.SkipWhitespaceAndComments; begin while True do begin SkipWhitespace; if not EndOfText then begin case CurrentChar of '/': SkipToEol; { single line comment } '{': SkipTo('}'); { comment } '(': begin NextChar; if CurrentChar = '*' then SkipToString('*)') else Error('Expected: comment or indentifier, found: "(%s"', [CurrentChar]); end; else Break end; end else Break; end; end; end. unit Charsets; interface type TCharSet = set of AnsiChar; const Signs: TCharset = ['-', '+']; Numerals: TCharset = ['0'..'9']; HexNumerals: TCharset = ['A'..'F', 'a'..'f', '0'..'9']; IntegerChars: TCharset = ['0'..'9', '-', '+']; IdentifierChars: TCharset = ['a'..'z', 'A'..'Z', '0'..'9', '_']; var Digits, Letters, LowerCaseLetters, UpperCaseLetters: TCharSet; FloatChars, SciFloatChars: TCharset; AlphaNum, NonAlphaNum: TCharset; { Need to call this again when locale changes. } procedure SetupCharsets; implementation uses Windows, Sysutils; var locale: DWORD = 0; procedure SetupCharsets; var ch: AnsiChar; begin if locale = GetThreadLocale then Exit else Locale := GetThreadLocale; LowerCaseLetters := []; UpperCaseLetters := []; AlphaNum := []; NonAlphaNum := []; Digits := Numerals; for ch := Low(ch) to High(ch) do begin if IsCharAlpha(ch) then if IsCharUpper(ch) then Include(UpperCaseLetters, ch) else Include(LowerCaseLetters, ch); if IsCharAlphanumeric(ch) then Include(AlphaNum, ch) else Include(NonAlphaNum, ch); end; Letters := LowerCaseLetters + UpperCaseLetters; FloatChars := IntegerChars; Include(FloatChars, DecimalSeparator); SciFloatChars := FloatChars + ['e', 'E']; end; initialization SetupCharsets; end. |