How to split up a formatted source string into substrings and integers (Views: 29)
Problem/Question/Abstract: How to split up a formatted source string into substrings and integers Answer: function Unformat(const source, pattern: string; const args: array of const): Integer; {The opposite of Format, Unformat splits up a formatted source string into substrings and Integers. It is an alternative to parsing when the format is known to be fixed. The pattern parameter contains the format string, which is a combination of plain characters and format specifiers. The following specifiers are supported: %s indicates that a string value is required %d indicates that an integer value is required %S indicates that a string value should be ignored %D indicates that an integer value should be ignored Unformat compares the source with the pattern, and plain characters that do not match will raise an EConvertError. When a format specifier is encountered in the pattern, an argument is fetched and used to store the result that is obtained from the source. Then the comparison continues. For each %s, the args list must contain a pointer to a string variable, followed by an integer specifying the maximum length of the string. For each %d, the args list must contain a pointer to an integer variable. When the end of the source string is reached, the function returns without modifying the remaining arguments, so you might wish to initialize your variables to "default" values before the function call. Unformat returns the number of values it has extracted. Examples: var s1, s2: string[31]; i: Integer; Unformat('[abc]123(def)', '[%s]%d(%s)', [@s1, 31, @i, @s2, 31]); (* s1 = 'abc', i = 123, s2 = 'def' *) Unformat('Hello, Universe!!!', '%s, %s%d', [@s1, 31, @s2, 31, @i]); (* s1 = 'Hello', s2 = 'Universe!!!', i is untouched *) Unformat('How much wood could a woodchuck chuck...', '%S %S %s could a %S %s...', [@s1, 31, @s2, 31]); (* s1 = 'wood', s2 = 'chuck' *) } function Min(a, b: Integer): Integer; assembler; { use AX for 16-bit, EAX for 32-bit } asm MOV EAX,a CMP EAX,b JLE @@1 MOV EAX,b @@1: end; var i, j, argindex, start, finish, maxlen: Integer; c: Char; begin Result := 0; argindex := 0; i := 1; j := 1; while (i < Length(pattern)) and (j <= Length(source)) do begin if pattern[i] = '%' then case pattern[i + 1] of 'D': begin Inc(i, 2); while (j <= Length(source)) and ((source[j] in Digits) or (source[j] = '-')) do Inc(j); Inc(Result); end; 'S': begin Inc(i, 2); if i > Length(pattern) then break else begin c := pattern[i]; while (j <= Length(source)) and (source[j] <> c) do Inc(j); end; Inc(Result); end; 'd': begin if argindex > High(args) then raise EConvertError.Create('Not enough arguments'); Inc(i, 2); start := j; while (j <= Length(source)) and ((source[j] in Digits) or (source[j] = '-')) do Inc(j); finish := j; if finish > start then PInteger(args[argindex].VPointer)^ := StrToInt(Copy(source, start, finish - start)); Inc(argindex); Inc(Result); end; 's': begin if argindex > High(args) - 1 then raise EConvertError.Create('Not enough arguments'); if args[argindex + 1].VType <> vtInteger then raise EConvertError.Create('No string size specified'); maxlen := args[argindex + 1].VInteger; Inc(i, 2); if i > Length(pattern) then begin args[argindex].VString^ := Copy(source, j, Min(Length(source) + 1 - j, maxlen)); Inc(argindex); break; end else begin c := pattern[i]; start := j; while (j <= Length(source)) and (source[j] <> c) do Inc(j); finish := j; args[argindex].VString^ := Copy(source, start, Min(finish - start, maxlen)); Inc(argindex, 2); end; Inc(Result); end; else Inc(i); end else {if pattern[i] <> source[j] then raise EConvertError.Create('Pattern mismatch') else} begin Inc(i); Inc(j); end; end; end; |