ボイヤー-ムーア法をHaskellで
昨日書いていたプログラムが、なんとか直せた。原因はIO汚染。Maybe aを返すところをIO(Maybe a)に変更したら怒られなくなった。
というわけで、昨日書いていたのは、ボイヤー-ムーア法。詳しくはリンク先で。で、確かボイヤー-ムーア法って、高速にするためにずらし表の作成方法がいろいろあったと思うんだけど、参考にしたのはアルゴリズム for Ruby。
module BMSearch where import Prelude hiding (lookup) import Data.HashTable import Char import Maybe mkDelta :: String -> IO (HashTable Char Int) mkDelta str = do hash <- new (==) (hashInt.ord) :: IO (HashTable Char Int) mapM_ (\(key,val) -> update hash key val) $ reverse $ flip zip [0..] $ reverse str return hash getMoveIndex :: HashTable Char Int -> String -> Char-> IO Int getMoveIndex hash s c = do val <- lookup hash c case val of Nothing -> return $ length s Just n -> return $ n patternMoveVal :: String -> Int -> Int -> Int patternMoveVal pattern pi mi | mi > pindex = mi | otherwise = pindex where pindex = flip (-) pi $ length pattern search :: HashTable Char Int -> String -> String -> Int -> IO (Maybe Int) search hash pattern text ti | ti >= (length text) = return Nothing | otherwise = do search_ ( length pattern ) where search_ :: Int -> IO (Maybe Int) search_ pi | pi == 0 = return $ Just ti | (ti + pi -1 ) >= (length text ) = return Nothing | (text !! (ti + pi -1 )) == (pattern !! (pi - 1)) = search_ (pi - 1) | otherwise = do mi <- getMoveIndex hash pattern $ text !! (ti + pi - 1 ) search hash pattern text $ (+) ti $ patternMoveVal pattern pi mi bmSearch :: String -> String -> IO (Maybe Int) bmSearch pattern text = do hash <- mkDelta pattern search hash pattern text 0
ずらし表にHashTable使ったら、みごとにIOとつきあわなければいけなくなった。
実行結果はこんな感じ。
Prelude> :l BMSearch [1 of 1] Compiling BMSearch ( BMSearch.hs, interpreted ) Ok, modules loaded: BMSearch. *BMSearch> bmSearch "oe" "mogehogege" Nothing it :: Maybe Int *BMSearch> bmSearch "oge" "mogehogege" Just 1 it :: Maybe Int *BMSearch> bmSearch "hoge" "mogehogege" Just 4 it :: Maybe Int *BMSearch> flip bmSearch "On a dark desert highway, cool wind in my hair" "wind" Just 31 it :: Maybe Int
あと、今回は shelarcyさんのHaskellでのテストの自動化を考えるを参考にしてHUnitで簡単なテストを書いてみた。
module Main where import Test.HUnit import Data.HashTable hiding (lookup) import BMSearch import Maybe eqArray [] ys = True eqArray (x:xs) ys | elem x ys = eqArray xs ys | otherwise = False testMKDelta (str,result) = do hash <- mkDelta str ary <- toList hash assertBool str $ eqArray ary result checkIndex :: HashTable Char Int -> String -> (Char,Int) -> IO() checkIndex hash str (c,i) = do ret <- getMoveIndex hash str c assertEqual str i ret testGetMoveIndex :: (String,[(Char,Int)]) -> IO() testGetMoveIndex (str,result) = do hash <- mkDelta str mapM_ (checkIndex hash str ) result tSearch (pattern,text,expect) = do result <- bmSearch pattern text assertEqual str result expect where str = "pattern : " ++ pattern ++ " , text : " ++ text test_BMSearch = do mapM_ tSearch testList where testList = [("hoge","mooohogessssss",Just 4) ,("aaa","aaaaaaaaaaaaaaa",Just 0) ,("zzz","aaaaaaaaaaaaaaa",Nothing) ,("lal","aaalaalcaaaalal",Just 12)] test_map = do flip mapM_ testList where testList = [ ("lisp", [('l',3),('i',2),('s',1),('p',0)]) ,("haskell",[('h',6),('a',5),('s',4),('k',3),('e',2),('l',0)]) ,("aaaa",[('a',0)])]:: [(String,[(Char,Int)])] main = do runTestTT $ test ["mkDelta " ~: test_map testMKDelta ,"getMoveIndex" ~: test_map testGetMoveIndex ,"bmSearch" ~: test_BMSearch ]
: runghc Test_bmSearch.hs Cases: 3 Tried: 3 Errors: 0 Failures: 0