Boyer-Moore-Horspool pattern matching (Views: 28)
Problem/Question/Abstract: Boyer-Moore-Horspool pattern matching Answer: Solve 1: function search(pat: PATTERN; text: TEXT): integer; var i, j, k, m, n: integer; skip: array[0..MAXCHAR] of integer; found: boolean; begin found := FALSE; search := 0; m := length(pat); if m = 0 then begin search := 1; found := TRUE; end; for k := 0 to MAXCHAR do skip[k] := m; {Preprocessing} for k := 1 to m - 1 do skip[ord(pat[k])] := m - k; k := m; n := length(text); {Search} while not found and (k < = n) do begin i := k; j := m; while (j = 1) do if text[i] <> pat[j] then j := -1 else begin j := j - 1; i := i - 1; end; if j = 0 then begin search := i + 1; found := TRUE; end; k := k + skip[ord(text[k])]; end; end; Solve 2: unit exbmh; interface uses Windows, SysUtils; procedure BMHInit(const pattern: pchar); function BMHSearch(cstring: pchar; const stringlen: integer): pchar; var found: pchar; implementation {Date last modified: 05-Jul-1997 Case-sensitive Boyer-Moore-Horspool pattern match Public domain by Raymond Gardner 7/92 Limitation: pattern length + string length must be less than 32767 10/21/93 rdg Fixed bug found by Jeff Dunlop} const Large = 32767; type TSkip = array[0..256] of integer; PSkip = ^TSkip; TByteArray = array[0..0] of byte; PByteArray = ^TByteArray; var patlen: integer; skip: TSkip; skip2: integer; pat: pchar; procedure BMHInit1(const pattern: pchar); var i, lastpatchar: integer; begin pat := pattern; patlen := StrLen(pattern); for i := 0 to 255 do skip[i] := patlen; for i := 0 to patlen - 1 do skip[Byte(pat[i])] := patlen - i - 1; lastpatchar := byte(pat[patlen - 1]); skip[lastpatchar] := Large; skip2 := patlen; for i := 0 to patlen - 2 do if byte(pat[i]) = lastpatchar then skip2 := patlen - i - 1; end; function BMHSearch1(cstring: pchar; const stringlen: integer): pchar; var i, j: integer; s: pchar; begin i := patlen - 1 - stringlen; result := nil; if i >= 0 then exit; inc(cstring, stringlen); while true do begin repeat inc(i, skip[byte(cstring[i])]); until i > = 0; if i < (Large - StringLen) then exit; dec(i, Large); j := patlen - 1; s := cstring + (i - j); dec(j); while (j >= 0) and (s[j] = pat[j]) do dec(j); if (j < 0) then begin result := s; exit; end; inc(i, skip2); if (i >= 0) then exit; end; end; procedure BMHInit(const pattern: pchar); var i, lastpatchar: integer; len: integer; skip: PSkip; begin pat := pattern; len := StrLen(pattern); patlen := len; skip := @BMHSearchs.Skip; for i := 0 to 255 do skip[i] := len; for i := 0 to len - 1 do skip[Byte(pattern[i])] := len - i - 1; lastpatchar := byte(pattern[len - 1]); skip[lastpatchar] := Large; skip2 := len; for i := 0 to len - 2 do if byte(pattern[i]) = lastpatchar then skip2 := len - i - 1; end; function inner(i: integer; c: PByteArray): integer; asm push ebx @L1: movzx ebx, byte ptr[edx + eax] add eax, [offset skip + ebx] jl @l1; pop ebx end; function BMHSearch(cstring: pchar; const stringlen: integer): pchar; var i, j: integer; s: pchar; pat: pchar; begin pat := BMHSearchs.pat; i := patlen - 1 - stringlen; result := nil; if i >= 0 then exit; inc(cstring, stringlen); while true do begin repeat inc(i, skip[byte(cstring[i])]); until i >= 0; if i < (Large - StringLen) then exit; dec(i, Large); j := patlen - 1; s := cstring + (i - j); dec(j); while (j >= 0) and (s[j] = pat[j]) do dec(j); if (j < 0) then begin result := s; exit; end; inc(i, skip2); if (i >= 0) then exit; end; end; const data = 'of a procedure to find a pattern in a stringThis is a test of a procedure to find a pattern in a string last This is a test of aprocedure to find a pattern in a string'; initialization BMHInit('last'); found := BMHSearch(data, length(data)); end. |