绝对最小的解析库可能是:
{- | Input error type. -}
data InputError = InputError
deriving (Eq, Show)
instance Semigroup InputError where
(<>) _ _ = InputError
instance Monoid InputError where
mempty = InputError
{- | The parsing monad. -}
newtype Parser s e a = Parser ([s] -> Either e (a, [s]))
deriving stock Functor
-- Instances.
instance Applicative (Parser s e) where
pure x = Parser $ \ xs -> pure (x, xs)
(<*>) p q = Parser $ \ xs -> do
(f, ys) <- run p xs
(x, zs) <- run q ys
pure (f x, zs)
instance Monad (Parser s e) where
(>>=) p h = Parser $ \ xs -> do
(x, ys) <- run p xs
run (h x) ys
instance Monoid e => Alternative (Parser s e) where
empty = Parser $ \ _ -> Left mempty
(<|>) p q = Parser $ \ xs ->
case run p xs of
r1@(Right _) -> r1
Left e1 ->
case run q xs of
r2@(Right _) -> r2
Left e2 -> Left $ e1 <> e2
{- | Primitive parser getting one element out of the stream. -}
one :: Parser s InputError s
one = Parser $ \ xs ->
case uncons xs of
Nothing -> Left InputError
Just p -> Right p
{- | Run the parser on input and return the results. -}
run :: Parser s e a -> [s] -> Either e (a, [s])
run (Parser p) = p
编译(使用 GHC2021 语言;必须添加一些导入)。在 cabal repl 中的 ghci 中加载它:
ghci> let p = take 2 <$> many one
ghci> run p "0123"
Right ("01","")
这意味着解析器已经使用了所有的输入——我期望看到的是Right ("01", "23")
。
所以我的问题是:懒惰在哪里“中断”,可以这么说,有什么办法可以恢复它吗?我说的“恢复它”是指以不同的方式执行实例实现,这样它就many
和预期的一样懒惰,因为如果我添加
{- | Implement lazy 'many' -}
atMostN :: Monoid e => Word -> Parser s e a -> Parser s e [a]
atMostN n p = go n
where
go 0 = pure []
go m = do
r <- optional p
case r of
Nothing -> pure []
Just x -> (x: ) <$> go (pred m)
并将其加载到 ghci 中,我得到了预期的结果:
ghci> let q = atMostN 2 one
ghci> run q "0123"
Right ("01","23")