Boyer-Moore string searching (Views: 30)
Problem/Question/Abstract: Boyer-Moore string searching Answer: Solve 1: unit BMSearch; interface type {$IFDEF WINDOWS} size_t = Word; {$ELSE} size_t = LongInt; {$ENDIF} type TTranslationTable = array[char] of char; { translation table } TSearchBM = class(TObject) private FTranslate: TTranslationTable; { translation table } FJumpTable: array[char] of Byte; { Jumping table } FShift_1: integer; FPattern: pchar; FPatternLen: size_t; public procedure Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean); procedure PrepareStr(const Pattern: string; IgnoreCase: Boolean); function Search(Text: pchar; TextLen: size_t): pchar; function Pos(const S: string): integer; end; implementation uses SysUtils; {Ignore Case Table Translation} procedure CreateTranslationTable(var T: TTranslationTable; IgnoreCase: Boolean); var c: char; begin for c := #0 to #255 do T[c] := c; if not IgnoreCase then exit; for c := 'a' to 'z' do T[c] := UpCase(c); { Mapping all accented characters to their uppercase equivalent } T['Á'] := 'A'; T['À'] := 'A'; T['Ä'] := 'A'; T['Â'] := 'A'; T['á'] := 'A'; T['à'] := 'A'; T['ä'] := 'A'; T['â'] := 'A'; T['É'] := 'E'; T['È'] := 'E'; T['Ë'] := 'E'; T['Ê'] := 'E'; T['é'] := 'E'; T['è'] := 'E'; T['ë'] := 'E'; T['ê'] := 'E'; T['Í'] := 'I'; T['Ì'] := 'I'; T['Ï'] := 'I'; T['Î'] := 'I'; T['í'] := 'I'; T['ì'] := 'I'; T['ï'] := 'I'; T['î'] := 'I'; T['Ó'] := 'O'; T['Ò'] := 'O'; T['Ö'] := 'O'; T['Ô'] := 'O'; T['ó'] := 'O'; T['ò'] := 'O'; T['ö'] := 'O'; T['ô'] := 'O'; T['Ú'] := 'U'; T['Ù'] := 'U'; T['Ü'] := 'U'; T['Û'] := 'U'; T['ú'] := 'U'; T['ù'] := 'U'; T['ü'] := 'U'; T['û'] := 'U'; T['ñ'] := 'Ñ'; end; {Preparation of the jumping table} procedure TSearchBM.Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean); var i: integer; c, lastc: char; begin FPattern := Pattern; FPatternLen := PatternLen; if FPatternLen < 1 then FPatternLen := strlen(FPattern); {This algorythm is based on a character set of 256} if FPatternLen > 256 then exit; {1. Preparing translating table} CreateTranslationTable(FTranslate, IgnoreCase); {2. Preparing jumping table} for c := #0 to #255 do FJumpTable[c] := FPatternLen; for i := FPatternLen - 1 downto 0 do begin c := FTranslate[FPattern[i]]; if FJumpTable[c] >= FPatternLen - 1 then FJumpTable[c] := FPatternLen - 1 - i; end; FShift_1 := FPatternLen - 1; lastc := FTranslate[Pattern[FPatternLen - 1]]; for i := FPatternLen - 2 downto 0 do if FTranslate[FPattern[i]] = lastc then begin FShift_1 := FPatternLen - 1 - i; break; end; if FShift_1 = 0 then FShift_1 := 1; end; procedure TSearchBM.PrepareStr(const Pattern: string; IgnoreCase: Boolean); var str: pchar; begin if Pattern <> '' then begin {$IFDEF Windows} str := @Pattern[1]; {$ELSE} str := pchar(Pattern); {$ENDIF} Prepare(str, Length(Pattern), IgnoreCase); end; end; {Searching Last char & scanning right to left} function TSearchBM.Search(Text: pchar; TextLen: size_t): pchar; var shift, m1, j: integer; jumps: size_t; begin result := nil; if FPatternLen > 256 then exit; if TextLen < 1 then TextLen := strlen(Text); m1 := FPatternLen - 1; shift := 0; jumps := 0; {Searching the last character} while jumps <= TextLen do begin Inc(Text, shift); shift := FJumpTable[FTranslate[Text^]]; while shift <> 0 do begin Inc(jumps, shift); if jumps > TextLen then exit; Inc(Text, shift); shift := FJumpTable[FTranslate[Text^]]; end; { Compare right to left FPatternLen - 1 characters } if jumps >= m1 then begin j := 0; while FTranslate[FPattern[m1 - j]] = FTranslate[(Text - j)^] do begin Inc(j); if j = FPatternLen then begin result := Text - m1; exit; end; end; end; shift := FShift_1; Inc(jumps, shift); end; end; function TSearchBM.Pos(const S: string): integer; var str, p: pchar; begin result := 0; if S <> '' then begin {$IFDEF Windows} str := @S[1]; {$ELSE} str := pchar(S); {$ENDIF} p := Search(str, Length(S)); if p <> nil then result := 1 + p - str; end; end; end. Solve 2: Here's a demo program of the Boyer-Moore search algorithm. The basic idea is to first create a Boyer-Moore index table for the string you want to search for, and then call the BMsearch routine. Remember to turn-off Range Checking {$R-} in your finished program, otherwise the BMSearch will take 3-4 times longer than it should. {Public-domain demo of Boyer-Moore search algorithm. Guy McLoughlin - May 1, 1993.} program DemoBMSearch; {Boyer-Moore index table data definition} type BMTable = array[0..127] of byte; {Create a Boyer-Moore index table to search with.} procedure Create_BMTable(Pattern: string; var BMT: BMTable); var Index: byte; begin fillchar(BMT, sizeof(BMT), length(Pattern)); for Index := 1 to length(Pattern) do BMT[ord(Pattern[Index])] := (length(Pattern) - Index) end; {Boyer-Moore Search function. Returns 0 if string is not found. Returns 65,535 if BufferSize is too large, ie: greater than 65,520 bytes.} function BMsearch(var Buffer; BuffSize: word; var BMT: BMTable; Pattern: string): word; var Buffer2: array[1..65520] of char absolute Buffer; Index1, Index2, PatSize: word; begin if (BuffSize > 65520) then begin BMsearch := $FFFF; exit end; PatSize := length(Pattern); Index1 := PatSize; Index2 := PatSize; repeat if (Buffer2[Index1] = Pattern[Index2]) then begin dec(Index1); dec(Index2) end else begin if (succ(PatSize - Index2) > (BMT[ord(Buffer2[Index1])])) then inc(Index1, succ(PatSize - Index2)) else inc(Index1, BMT[ord(Buffer2[Index1])]); Index2 := PatSize end; until (Index2 < 1) or (Index1 > BuffSize); if (Index1 > BuffSize) then BMsearch := 0 else BMsearch := succ(Index1) end; type arby_64K = array[1..65520] of byte; var Index: word; st_Temp: string[10]; Buffer: ^arby_64K; BMT: BMTable; begin new(Buffer); fillchar(Buffer^, sizeof(Buffer^), 0); st_Temp := 'Gumby'; move(st_Temp[1], Buffer^[65516], length(st_Temp)); Create_BMTable(st_Temp, BMT); Index := BMSearch(Buffer^, sizeof(Buffer^), BMT, st_Temp); writeln(st_Temp, ' found at offset ', Index) end. |