ボイヤー-ムーア法をHaskellで

昨日書いていたプログラムが、なんとか直せた。原因はIO汚染。Maybe aを返すところをIO(Maybe a)に変更したら怒られなくなった。

というわけで、昨日書いていたのは、ボイヤー-ムーア法。詳しくはリンク先で。で、確かボイヤー-ムーア法って、高速にするためにずらし表の作成方法がいろいろあったと思うんだけど、参考にしたのはアルゴリズム for Ruby

module BMSearch where
import Prelude hiding (lookup)
import Data.HashTable
import Char
import Maybe

mkDelta :: String -> IO (HashTable Char Int)
mkDelta str = do hash <- new (==) (hashInt.ord) :: IO (HashTable Char Int)
                 mapM_ (\(key,val) -> update hash key val) $ reverse $ flip zip [0..] $ reverse str
                 return hash


getMoveIndex :: HashTable Char Int -> String -> Char-> IO Int
getMoveIndex hash s c = do val <- lookup hash c
                           case val of
                                Nothing -> return $ length s
                                Just n  -> return $ n

patternMoveVal :: String -> Int -> Int -> Int
patternMoveVal pattern pi mi | mi >  pindex = mi
                             | otherwise    = pindex
                                              where pindex = flip (-) pi $ length pattern

search :: HashTable Char Int -> String -> String -> Int -> IO (Maybe Int)
search hash pattern text ti | ti >= (length text) = return Nothing
                            | otherwise = do search_ ( length pattern )
                                where 
                                search_ :: Int -> IO (Maybe Int)
                                search_ pi | pi == 0  = return $ Just ti
                                           | (ti + pi  -1 ) >= (length text )  = return Nothing
                                           | (text !! (ti + pi -1 )) == (pattern !! (pi - 1)) = search_ (pi - 1)
                                           | otherwise = do  mi <- getMoveIndex hash pattern $ text !! (ti + pi - 1 )
                                                             search hash pattern text $ (+) ti $ patternMoveVal pattern pi mi
                                      
bmSearch :: String -> String -> IO (Maybe Int)
bmSearch pattern text = do hash <- mkDelta pattern
                           search hash pattern text 0

ずらし表にHashTable使ったら、みごとにIOとつきあわなければいけなくなった。
実行結果はこんな感じ。

Prelude> :l   BMSearch
[1 of 1] Compiling BMSearch         ( BMSearch.hs, interpreted )
Ok, modules loaded: BMSearch.
*BMSearch> bmSearch "oe" "mogehogege"
Nothing
it :: Maybe Int
*BMSearch> bmSearch "oge" "mogehogege"
Just 1
it :: Maybe Int
*BMSearch> bmSearch "hoge" "mogehogege"
Just 4
it :: Maybe Int
*BMSearch> flip bmSearch "On a dark desert highway, cool wind in my hair" "wind"
Just 31
it :: Maybe Int

あと、今回は shelarcyさんのHaskellでのテストの自動化を考えるを参考にしてHUnitで簡単なテストを書いてみた。

module Main where
import Test.HUnit
import Data.HashTable hiding (lookup)
import BMSearch
import Maybe

eqArray [] ys = True
eqArray (x:xs) ys | elem x ys = eqArray xs ys
                  | otherwise = False

testMKDelta (str,result) = do hash <- mkDelta str
                              ary  <- toList hash
                              assertBool  str $ eqArray ary result

checkIndex :: HashTable Char Int -> String -> (Char,Int) -> IO()
checkIndex hash str (c,i) = do ret <- getMoveIndex hash str c
                               assertEqual str i ret
                        
                              
testGetMoveIndex :: (String,[(Char,Int)]) -> IO()
testGetMoveIndex (str,result) = do hash <- mkDelta str
                                   mapM_ (checkIndex hash str ) result

tSearch (pattern,text,expect) = do result <- bmSearch pattern text
                                   assertEqual str result expect
                                   where str = "pattern : " ++ pattern ++ " , text : " ++ text

test_BMSearch = do mapM_ tSearch testList
                   where
                        testList = [("hoge","mooohogessssss",Just 4)
                                   ,("aaa","aaaaaaaaaaaaaaa",Just 0)
                                   ,("zzz","aaaaaaaaaaaaaaa",Nothing)
                                   ,("lal","aaalaalcaaaalal",Just 12)]
                                   

test_map = do flip mapM_  testList 
                  where
                       testList = [ ("lisp", [('l',3),('i',2),('s',1),('p',0)]) 
                                   ,("haskell",[('h',6),('a',5),('s',4),('k',3),('e',2),('l',0)])
                                   ,("aaaa",[('a',0)])]:: [(String,[(Char,Int)])]

main = do runTestTT $ test ["mkDelta " ~: test_map testMKDelta 
                            ,"getMoveIndex" ~: test_map testGetMoveIndex  
                            ,"bmSearch" ~: test_BMSearch ]
: runghc Test_bmSearch.hs 
Cases: 3  Tried: 3  Errors: 0  Failures: 0