Mirror

Boyer-Moore-Horspool pattern matching (Views: 707)


Problem/Question/Abstract:

Boyer-Moore-Horspool pattern matching

Answer:

Solve 1:

function search(pat: PATTERN; text: TEXT): integer;
var
  i, j, k, m, n: integer;
  skip: array[0..MAXCHAR] of integer;
  found: boolean;
begin
  found := FALSE;
  search := 0;
  m := length(pat);
  if m = 0 then
  begin
    search := 1;
    found := TRUE;
  end;
  for k := 0 to MAXCHAR do
    skip[k] := m;
  {Preprocessing}
  for k := 1 to m - 1 do
    skip[ord(pat[k])] := m - k;
  k := m;
  n := length(text);
  {Search}
  while not found and (k < = n) do
  begin
    i := k;
    j := m;
    while (j = 1) do
      if text[i] <> pat[j] then
        j := -1
      else
      begin
        j := j - 1;
        i := i - 1;
      end;
    if j = 0 then
    begin
      search := i + 1;
      found := TRUE;
    end;
    k := k + skip[ord(text[k])];
  end;
end;


Solve 2:

unit exbmh;

interface

uses
  Windows, SysUtils;

procedure BMHInit(const pattern: pchar);
function BMHSearch(cstring: pchar; const stringlen: integer): pchar;

var
  found: pchar;

implementation

{Date last modified: 05-Jul-1997
Case-sensitive Boyer-Moore-Horspool pattern match
Public domain by Raymond Gardner 7/92
Limitation: pattern length + string length must be less than 32767
10/21/93 rdg  Fixed bug found by Jeff Dunlop}

const
  Large = 32767;

type
  TSkip = array[0..256] of integer;
  PSkip = ^TSkip;
  TByteArray = array[0..0] of byte;
  PByteArray = ^TByteArray;

var
  patlen: integer;
  skip: TSkip;
  skip2: integer;
  pat: pchar;

procedure BMHInit1(const pattern: pchar);
var
  i, lastpatchar: integer;
begin
  pat := pattern;
  patlen := StrLen(pattern);
  for i := 0 to 255 do
    skip[i] := patlen;
  for i := 0 to patlen - 1 do
    skip[Byte(pat[i])] := patlen - i - 1;
  lastpatchar := byte(pat[patlen - 1]);
  skip[lastpatchar] := Large;
  skip2 := patlen;
  for i := 0 to patlen - 2 do
    if byte(pat[i]) = lastpatchar then
      skip2 := patlen - i - 1;
end;

function BMHSearch1(cstring: pchar; const stringlen: integer): pchar;
var
  i, j: integer;
  s: pchar;
begin
  i := patlen - 1 - stringlen;
  result := nil;
  if i >= 0 then
    exit;
  inc(cstring, stringlen);
  while true do
  begin
    repeat
      inc(i, skip[byte(cstring[i])]);
    until
      i > = 0;
    if i < (Large - StringLen) then
      exit;
    dec(i, Large);
    j := patlen - 1;
    s := cstring + (i - j);
    dec(j);
    while (j >= 0) and (s[j] = pat[j]) do
      dec(j);
    if (j < 0) then
    begin
      result := s;
      exit;
    end;
    inc(i, skip2);
    if (i >= 0) then
      exit;
  end;
end;

procedure BMHInit(const pattern: pchar);
var
  i, lastpatchar: integer;
  len: integer;
  skip: PSkip;
begin
  pat := pattern;
  len := StrLen(pattern);
  patlen := len;
  skip := @BMHSearchs.Skip;
  for i := 0 to 255 do
    skip[i] := len;
  for i := 0 to len - 1 do
    skip[Byte(pattern[i])] := len - i - 1;
  lastpatchar := byte(pattern[len - 1]);
  skip[lastpatchar] := Large;
  skip2 := len;
  for i := 0 to len - 2 do
    if byte(pattern[i]) = lastpatchar then
      skip2 := len - i - 1;
end;

function inner(i: integer; c: PByteArray): integer;
asm
  push ebx
  @L1:
    movzx ebx, byte ptr[edx + eax]
    add eax, [offset skip + ebx]
    jl @l1;
    pop ebx
end;

function BMHSearch(cstring: pchar; const stringlen: integer): pchar;
var
  i, j: integer;
  s: pchar;
  pat: pchar;
begin
  pat := BMHSearchs.pat;
  i := patlen - 1 - stringlen;
  result := nil;
  if i >= 0 then
    exit;
  inc(cstring, stringlen);
  while true do
  begin
    repeat
      inc(i, skip[byte(cstring[i])]);
    until
      i >= 0;
    if i < (Large - StringLen) then
      exit;
    dec(i, Large);
    j := patlen - 1;
    s := cstring + (i - j);
    dec(j);
    while (j >= 0) and (s[j] = pat[j]) do
      dec(j);
    if (j < 0) then
    begin
      result := s;
      exit;
    end;
    inc(i, skip2);
    if (i >= 0) then
      exit;
  end;
end;

const
data = 'of a procedure to find a pattern in a stringThis is a test of a procedure to find a pattern in a string last This is a test of aprocedure to find a pattern in a string';

initialization
  BMHInit('last');
  found := BMHSearch(data, length(data));

end.

<< Back to main page