From 9c45a6cf4e26bf7adf893ed120f374c4c84786f9 Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Wed, 6 May 2026 17:22:21 +0100 Subject: [PATCH 1/7] support user state in types Does not update combinators (none of them type check). --- src/Symparsec/Parser.hs | 70 ++++++++++++++++++++-------------- src/Symparsec/Parser/Common.hs | 16 ++++---- src/Symparsec/Run.hs | 29 +++++++++----- 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/src/Symparsec/Parser.hs b/src/Symparsec/Parser.hs index 696f31d..852a3c2 100644 --- a/src/Symparsec/Parser.hs +++ b/src/Symparsec/Parser.hs @@ -14,7 +14,7 @@ import Singleraeh.List --import Singleraeh.Symbol -- | Parser state. -data State str n = State +data State str n u = State -- | Remaining input. { remaining :: str @@ -34,23 +34,32 @@ data State str n = State -- -- Overall index. Used for nicer error reporting after parse completion. , index :: n + + -- | User state. + , user :: u } deriving stock Show -- | Promoted 'State'. type PState = State Symbol Natural -- | Singled 'State'. -data SState (s :: PState) where - SState :: SSymbol rem -> SNat len -> SNat idx -> SState ('State rem len idx) +data SState (su :: u -> Type) (pu :: PState u) where + SState :: SSymbol rem -> SNat len -> SNat idx -> su u -> SState su ('State rem len idx user) + +-- | Demoted 'State'. +type DState = State String Natural -- | Demote an 'SState'. -demoteSState :: SState s -> State String Natural -demoteSState (SState srem slen sidx) = - State (fromSSymbol srem) (fromSNat slen) (fromSNat sidx) +demoteSState + :: (forall u. su u -> du) + -> SState su pu + -> DState du +demoteSState demoteSU (SState srem slen sidx suser) = + State (fromSSymbol srem) (fromSNat slen) (fromSNat sidx) (demoteSU suser) -instance Demotable SState where - type Demote SState = State String Natural - demote = demoteSState +instance Demotable su => Demotable (SState su) where + type Demote (SState su) = DState (Demote su) + demote = demoteSState demote {- data Span n = Span @@ -82,29 +91,33 @@ instance Demotable SError where -- -- TODO: megaparsec also returns a bool indicating if any input was consumed. -- Unsure what it's used for. -data Reply str n a = Reply +data Reply str n u a = Reply { result :: Result str n a -- ^ Parse result. - , state :: State str n -- ^ Final parser state. + , state :: State str n u -- ^ Final parser state. } deriving stock Show -- | Promoted 'Reply'. type PReply = Reply Symbol Natural -- | Singled 'Reply'. -data SReply (sa :: a -> Type) (rep :: PReply a) where - SReply :: SResult sa result -> SState state -> SReply sa ('Reply result state) +data SReply (su :: u -> Type) (sa :: a -> Type) (rep :: PReply u a) where + SReply :: SResult sa result -> SState su state -> SReply su sa ('Reply result state) + +-- | Demoted 'Reply'. +type DReply = Reply String Natural -- | Demote an 'SReply. demoteSReply - :: (forall a. sa a -> da) - -> SReply sa rep - -> Reply String Natural da -demoteSReply demoteSA (SReply sresult sstate) = - Reply (demoteSResult demoteSA sresult) (demoteSState sstate) + :: (forall u. su u -> du) + -> (forall a. sa a -> da) + -> SReply su sa rep + -> DReply du da +demoteSReply demoteSU demoteSA (SReply sresult sstate) = + Reply (demoteSResult demoteSA sresult) (demoteSState demoteSU sstate) -instance Demotable sa => Demotable (SReply sa) where - type Demote (SReply sa) = Reply String Natural (Demote sa) - demote = demoteSReply demote +instance (Demotable su, Demotable sa) => Demotable (SReply su sa) where + type Demote (SReply su sa) = DReply (Demote su) (Demote sa) + demote = demoteSReply demote demote -- | Parse result: a value, or an error. data Result str n a = OK a -- ^ Parser succeeded. @@ -114,20 +127,19 @@ data Result str n a = OK a -- ^ Parser succeeded. -- | Promoted 'Result'. type PResult = Result Symbol Natural ---type SState = State ---type SResult :: _ -> Type --- TODO ^ how to do explicit kind signature for GADT? - -- | Singled 'Result'. data SResult (sa :: a -> Type) (res :: PResult a) where SOK :: sa a -> SResult sa (OK a) SErr :: SError e -> SResult sa (Err e) +-- | Demoted 'Result'. +type DResult = Result String Natural + -- | Demote an 'SResult'. demoteSResult :: (forall a. sa a -> da) -> SResult sa res - -> Result String Natural da + -> DResult da demoteSResult demoteSA = \case SOK sa -> OK $ demoteSA sa SErr se -> Err $ demoteSError se @@ -137,14 +149,14 @@ instance Demotable sa => Demotable (SResult sa) where demote = demoteSResult demote -- | A parser is a function on parser state. -type Parser str n a = State str n -> Reply str n a +type Parser str n u a = State str n u -> Reply str n u a -- | Promoted 'Parser': a defunctionalization symbol to a function on promoted -- parser state. -type PParser a = PState ~> PReply a +type PParser u a = PState u ~> PReply u a -- | Singled 'Parser'. -type SParser sa p = Lam SState (SReply sa) p +type SParser su sa p = Lam (SState su) (SReply su sa) p --data SParser (sa :: a -> Type) (p :: PParser a) where -- SParser :: Lam SState (SReply sa) (PParser a) diff --git a/src/Symparsec/Parser/Common.hs b/src/Symparsec/Parser/Common.hs index 6db9507..5851371 100644 --- a/src/Symparsec/Parser/Common.hs +++ b/src/Symparsec/Parser/Common.hs @@ -45,17 +45,17 @@ import GHC.TypeError qualified as TE -- -- If at end of the string, the state is returned untouched, and @len@ is -- guaranteed to be 0. -type UnconsState :: PState -> (Maybe Char, PState) +type UnconsState :: PState u -> (Maybe Char, PState u) type family UnconsState s where - UnconsState ('State rem 0 idx) = '(Nothing, 'State rem 0 idx) - UnconsState ('State rem len idx) = UnconsState' (UnconsSymbol rem) len idx + UnconsState ('State rem 0 idx user) = '(Nothing, 'State rem 0 idx user) + UnconsState ('State rem len idx user) = UnconsState' (UnconsSymbol rem) len idx user type UnconsState' - :: Maybe (Char, Symbol) -> Natural -> Natural -> (Maybe Char, PState) -type family UnconsState' mstr len idx where - UnconsState' (Just '(ch, rem)) len idx = - '(Just ch, 'State rem (len-1) (idx+1)) - UnconsState' Nothing len idx = + :: Maybe (Char, Symbol) -> Natural -> Natural -> u -> (Maybe Char, PState u) +type family UnconsState' mstr len idx user where + UnconsState' (Just '(ch, rem)) len idx user = + '(Just ch, 'State rem (len-1) (idx+1) user) + UnconsState' Nothing len idx user = -- TODO could I change this to a regular parser error? should I? TE.TypeError (TE.Text "unrecoverable parser error: got to end of input string before len=0") diff --git a/src/Symparsec/Run.hs b/src/Symparsec/Run.hs index 930bc2e..f08c131 100644 --- a/src/Symparsec/Run.hs +++ b/src/Symparsec/Run.hs @@ -2,7 +2,7 @@ -- | Running Symparsec parsers. -module Symparsec.Run ( type Run, type RunTest ) where +module Symparsec.Run ( type Run, type Run', type RunTest, type RunTest' ) where import Symparsec.Parser import Data.Type.Symbol qualified as Symbol @@ -17,17 +17,21 @@ import TypeLevelShow.Natural ( type ShowNatDec ) -- -- * On success, returns a tuple of @(result :: a, remaining :: 'Symbol')@. -- * On failure, returns an 'TE.ErrorMessage'. -type Run :: PParser a -> Symbol -> Either TE.ErrorMessage (a, Symbol) -type Run p str = RunEnd str (p @@ StateInit str) +type Run :: PParser u a -> u -> Symbol -> Either TE.ErrorMessage (a, Symbol) +type Run p user str = RunEnd str (p @@ StateInit user str) -type RunEnd :: Symbol -> PReply a -> Either TE.ErrorMessage (a, Symbol) +-- | Run a parser on a 'Symbol'. The parser must have empty '()' user state. +type Run' :: PParser () a -> Symbol -> Either TE.ErrorMessage (a, Symbol) +type Run' p str = Run p '() str + +type RunEnd :: Symbol -> PReply u a -> Either TE.ErrorMessage (a, Symbol) type family RunEnd str rep where - RunEnd str ('Reply (OK a) ('State rem _len _idx)) = + RunEnd str ('Reply (OK a) ('State rem _len _idx _user)) = -- TODO I could return only @len@ of the remaining input @rem@, but -- that's more work than just returning @rem@, and I don't see a way -- this would matter for correct parsers. Right '(a, rem) - RunEnd str ('Reply (Err e) ('State _rem _len idx)) = + RunEnd str ('Reply (Err e) ('State _rem _len idx _user)) = Left (RenderPDoc (PrettyErrorTop idx str e)) -- | Run the given parser on the given 'Symbol', emitting a type error on @@ -36,8 +40,13 @@ type family RunEnd str rep where -- This /would/ be useful for @:k!@ runs, but it doesn't work properly with -- 'TE.TypeError's, printing @= (TypeError ...)@ instead of the error message. -- Alas! Instead, do something like @> Proxy \@(RunTest ...)@. -type RunTest :: PParser a -> Symbol -> (a, Symbol) -type RunTest p str = FromRightTypeError (Run p str) +type RunTest :: PParser u a -> u -> Symbol -> (a, Symbol) +type RunTest p user str = FromRightTypeError (Run p user str) + +-- | Run the given parser on the given 'Symbol', emitting a type error on +-- failure. The parser must have empty '()' user state. +type RunTest' :: PParser () a -> Symbol -> (a, Symbol) +type RunTest' p str = RunTest p '() str type FromRightTypeError :: Either TE.ErrorMessage a -> a type family FromRightTypeError eea where @@ -45,8 +54,8 @@ type family FromRightTypeError eea where FromRightTypeError (Left e) = TE.TypeError e -- | Initial parser state for the given 'Symbol'. -type StateInit :: Symbol -> PState -type StateInit str = 'State str (Symbol.Length str) 0 +type StateInit :: u -> Symbol -> PState u +type StateInit user str = 'State str (Symbol.Length str) 0 user -- | Pretty print a top-level parser error. -- From a053423eac8005d0f33673f9b477162ed0e93bee Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Wed, 6 May 2026 17:35:28 +0100 Subject: [PATCH 2/7] update combinators to use user state --- src/Symparsec/Parser/Alternative.hs | 18 +++++++-------- src/Symparsec/Parser/Applicative.hs | 12 +++++----- src/Symparsec/Parser/Count.hs | 2 +- src/Symparsec/Parser/Ensure.hs | 8 +++---- src/Symparsec/Parser/Eof.hs | 2 +- src/Symparsec/Parser/Functor.hs | 6 ++--- src/Symparsec/Parser/Isolate.hs | 22 +++++++++--------- src/Symparsec/Parser/Literal.hs | 8 +++---- src/Symparsec/Parser/Monad.hs | 4 ++-- src/Symparsec/Parser/Natural.hs | 22 +++++++++--------- src/Symparsec/Parser/Satisfy.hs | 6 ++--- src/Symparsec/Parser/Skip.hs | 8 +++---- src/Symparsec/Parser/Take.hs | 6 ++--- src/Symparsec/Parser/TakeRest.hs | 6 ++--- src/Symparsec/Parser/TakeWhile.hs | 4 ++-- src/Symparsec/Parser/Token.hs | 2 +- src/Symparsec/Parser/Try.hs | 4 ++-- src/Symparsec/Parser/While.hs | 36 ++++++++++++++--------------- 18 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/Symparsec/Parser/Alternative.hs b/src/Symparsec/Parser/Alternative.hs index 48c40e6..f19dead 100644 --- a/src/Symparsec/Parser/Alternative.hs +++ b/src/Symparsec/Parser/Alternative.hs @@ -22,22 +22,22 @@ import qualified Singleraeh.List as List -- Does not backtrack. Wrap parsers with 'Symparsec.Parser.Try' as needed. -- -- TODO shitty errors -type (<|>) :: PParser a -> PParser a -> PParser a +type (<|>) :: PParser u a -> PParser u a -> PParser u a infixl 3 <|> data (<|>) l r s type instance App (l <|> r) s = Plus r (l @@ s) -type Plus :: PParser a -> PReply a -> PReply a +type Plus :: PParser u a -> PReply u a -> PReply u a type family Plus r rep where Plus r ('Reply (OK a) s) = 'Reply (OK a) s Plus r ('Reply (Err _e) s) = r @@ s -- | 'Control.Alternative.empty' for parsers. Immediately fail with no consumption. -type Empty :: PParser a +type Empty :: PParser u a data Empty s type instance App Empty s = 'Reply (Err (Error1 "called empty parser")) s -- | 'Control.Alternative.optional' for parsers. -type Optional :: PParser a -> PParser (Maybe a) +type Optional :: PParser u a -> PParser u (Maybe a) type Optional p = Con1 Just <$> p <|> Pure Nothing {- Wow, I guess that works. But also, the manual version: @@ -52,7 +52,7 @@ type family OptionalEnd rep where -- | 'Control.Alternative.many' for parsers. -- -- Does not backtrack. Wrap parsers with 'Symparsec.Parser.Try' as needed. -type Many :: PParser a -> PParser [a] +type Many :: PParser u a -> PParser u [a] data Many p s type instance App (Many p) s = Many' p '[] (p @@ s) type family Many' p as rep where @@ -62,21 +62,21 @@ type family Many' p as rep where -- | 'Control.Alternative.some' for parsers. -- -- Does not backtrack. Wrap parsers with 'Symparsec.Parser.Try' as needed. -type Some :: PParser a -> PParser [a] +type Some :: PParser u a -> PParser u [a] type Some p = LiftA2 (Con2 '(:)) p (Many p) -- | @'SepBy' p sep@ parses zero or more occurrences of @p@, separated by @sep@. -- Returns a list of values parsed by @p@. -type SepBy :: PParser a -> PParser sep -> PParser [a] +type SepBy :: PParser u a -> PParser u sep -> PParser u [a] type SepBy p sep = SepBy1 p sep <|> Pure '[] -- | @'SepBy1' p sep@ parses one or more occurrences of @p@, separated by @sep@. -- Returns a list of values parsed by @p@. -type SepBy1 :: PParser a -> PParser sep -> PParser [a] +type SepBy1 :: PParser u a -> PParser u sep -> PParser u [a] type SepBy1 p sep = LiftA2 (Con2 '(:)) p (Many (sep *> p)) -- TODO doesn't backtrack, matching megaparsec. -type Choice :: [PParser a] -> PParser a +type Choice :: [PParser u a] -> PParser u a data Choice pList ps type instance App (Choice pList) ps = ChoiceStart pList ps diff --git a/src/Symparsec/Parser/Applicative.hs b/src/Symparsec/Parser/Applicative.hs index 7528525..f37706b 100644 --- a/src/Symparsec/Parser/Applicative.hs +++ b/src/Symparsec/Parser/Applicative.hs @@ -13,33 +13,33 @@ import Symparsec.Parser.Functor import DeFun.Function ( type IdSym, type ConstSym ) -- | '<*>' for parsers. Sequence two parsers, left to right. -type (<*>) :: PParser (a ~> b) -> PParser a -> PParser b +type (<*>) :: PParser u (a ~> b) -> PParser u a -> PParser u b infixl 4 <*> data (<*>) l r s type instance App (l <*> r) s = ApL r (l @@ s) -type ApL :: PParser a -> PReply (a ~> b) -> PReply b +type ApL :: PParser u a -> PReply u (a ~> b) -> PReply u b type family ApL r rep where ApL r ('Reply (OK fa) s) = (fa <$> r) @@ s ApL r ('Reply (Err e) s) = 'Reply (Err e) s -- | 'pure' for parsers. Non-consuming parser that just returns the given value. -type Pure :: a -> PParser a +type Pure :: a -> PParser u a data Pure a s type instance App (Pure a) s = 'Reply (OK a) s -- | 'liftA2' for parsers. Sequence two parsers, and combine their results with -- a binary type function. -type LiftA2 :: (a ~> b ~> c) -> PParser a -> PParser b -> PParser c +type LiftA2 :: (a ~> b ~> c) -> PParser u a -> PParser u b -> PParser u c type LiftA2 f l r = (f <$> l) <*> r -- | '*>' for parsers. Sequence two parsers left to right, discarding the value -- of the left parser. -type (*>) :: PParser a -> PParser b -> PParser b +type (*>) :: PParser u a -> PParser u b -> PParser u b infixl 4 *> type l *> r = (IdSym <$ l) <*> r -- | '<*' for parsers. Sequence two parsers left to right, discarding the value -- of the right parser. -type (<*) :: PParser a -> PParser b -> PParser a +type (<*) :: PParser u a -> PParser u b -> PParser u a infixl 4 <* type l <* r = LiftA2 ConstSym l r diff --git a/src/Symparsec/Parser/Count.hs b/src/Symparsec/Parser/Count.hs index e35b505..c77a739 100644 --- a/src/Symparsec/Parser/Count.hs +++ b/src/Symparsec/Parser/Count.hs @@ -8,7 +8,7 @@ import qualified Singleraeh.List as List -- TODO Could possibly make more efficient. -- | @'Count' n p@ parses @n@ occurrences of @p@. -type Count :: Natural -> PParser a -> PParser [a] +type Count :: Natural -> PParser u a -> PParser u [a] data Count n p s type instance App (Count n p) s = CountLoop p '[] n s diff --git a/src/Symparsec/Parser/Ensure.hs b/src/Symparsec/Parser/Ensure.hs index a249106..4848c2c 100644 --- a/src/Symparsec/Parser/Ensure.hs +++ b/src/Symparsec/Parser/Ensure.hs @@ -6,11 +6,11 @@ import Symparsec.Parser.Common import Symparsec.Utils ( type IfNatLte ) -- | Assert that there are at least @n@ characters remaining. Non-consuming. -type Ensure :: Natural -> PParser () +type Ensure :: Natural -> PParser u () data Ensure n s type instance App (Ensure n) s = Ensure' n s type family Ensure' n s where - Ensure' n ('State rem len idx) = + Ensure' n ('State rem len idx user) = IfNatLte n len - ('Reply (OK '()) ('State rem len idx)) - ('Reply (Err (Error1 (EStrInputTooShort n len))) ('State rem len idx)) + ('Reply (OK '()) ('State rem len idx user)) + ('Reply (Err (Error1 (EStrInputTooShort n len))) ('State rem len idx user)) diff --git a/src/Symparsec/Parser/Eof.hs b/src/Symparsec/Parser/Eof.hs index a403036..526d94a 100644 --- a/src/Symparsec/Parser/Eof.hs +++ b/src/Symparsec/Parser/Eof.hs @@ -5,7 +5,7 @@ module Symparsec.Parser.Eof ( type Eof ) where import Symparsec.Parser.Common -- | Assert end of input, or fail. -type Eof :: PParser () +type Eof :: PParser u () data Eof s type instance App Eof s = Eof' (UnconsState s) type family Eof' ms where diff --git a/src/Symparsec/Parser/Functor.hs b/src/Symparsec/Parser/Functor.hs index 04c723f..f880168 100644 --- a/src/Symparsec/Parser/Functor.hs +++ b/src/Symparsec/Parser/Functor.hs @@ -10,7 +10,7 @@ import Symparsec.Parser.Common import DeFun.Function ( type ConstSym1 ) -- | '<$>' for parsers. Apply the given type function to the result. -type (<$>) :: (a ~> b) -> PParser a -> PParser b +type (<$>) :: (a ~> b) -> PParser u a -> PParser u b infixl 4 <$> data (<$>) f p s type instance App (f <$> p) s = FmapEnd f (p @@ s) @@ -20,11 +20,11 @@ type family FmapEnd f rep where FmapEnd f ('Reply (Err e) s) = 'Reply (Err e) s -- | '<$' for parsers. Replace the parser result with the given value. -type (<$) :: a -> PParser b -> PParser a +type (<$) :: a -> PParser u b -> PParser u a infixl 4 <$ type a <$ p = ConstSym1 a <$> p -- | 'Data.Functor.$>' for parsers. Flipped t'Symparsec.Parser.Functor.<$'. -type ($>) :: PParser a -> b -> PParser b +type ($>) :: PParser u a -> b -> PParser u b infixl 4 $> type p $> a = ConstSym1 a <$> p diff --git a/src/Symparsec/Parser/Isolate.hs b/src/Symparsec/Parser/Isolate.hs index f51827b..4af2ce7 100644 --- a/src/Symparsec/Parser/Isolate.hs +++ b/src/Symparsec/Parser/Isolate.hs @@ -6,39 +6,39 @@ import Symparsec.Parser.Common import Symparsec.Utils ( type IfNatLte ) -- TODO can use 'Ensure' to help define this -type Isolate :: Natural -> PParser a -> PParser a +type Isolate :: Natural -> PParser u a -> PParser u a data Isolate n p s type instance App (Isolate n p) s = Isolate' n p s type family Isolate' n p s where - Isolate' n p ('State rem len idx) = + Isolate' n p ('State rem len idx user) = -- Could perhaps improve this, since 'OrdCond' probably does similar -- work to @len-n@. IfNatLte n len - (IsolateEnd len n (p @@ ('State rem n idx))) - ('Reply (Err (Error1 (EStrInputTooShort n len))) ('State rem len idx)) + (IsolateEnd len n (p @@ ('State rem n idx user))) + ('Reply (Err (Error1 (EStrInputTooShort n len))) ('State rem len idx user)) --type IsolateEnd :: Natural -> ? -> ? -- TODO are lenRem/lenConsumed actually good names? type family IsolateEnd lenOrig n rep where -- isolated parser succeeded and consumed all input: -- return success with state updated to have actual remaining length - IsolateEnd lenOrig n ('Reply (OK a) ('State rem 0 idx)) = - 'Reply (OK a) ('State rem (lenOrig-n) idx) + IsolateEnd lenOrig n ('Reply (OK a) ('State rem 0 idx user)) = + 'Reply (OK a) ('State rem (lenOrig-n) idx user) -- isolated parser failed - IsolateEnd lenOrig n ('Reply (Err e) ('State rem len idx)) = + IsolateEnd lenOrig n ('Reply (Err e) ('State rem len idx user)) = -- TODO add some isolate meta - 'Reply (Err e) ('State rem (lenOrig-n+len) idx) + 'Reply (Err e) ('State rem (lenOrig-n+len) idx user) -- isolated parser succeeded but didn't consume all input - IsolateEnd lenOrig n ('Reply (OK _a) ('State rem len idx)) = - 'Reply (Err (EIsolateIncomplete len)) ('State rem (lenOrig-n+len) idx) + IsolateEnd lenOrig n ('Reply (OK _a) ('State rem len idx user)) = + 'Reply (Err (EIsolateIncomplete len)) ('State rem (lenOrig-n+len) idx user) type EIsolateIncomplete n = Error1 ( "isolated parser completed without consuming all input (" ++ ShowNatDec n ++ " remaining)" ) -- TODO testing. args flipped because you're more likely to defun the len -type IsolateSym :: PParser a -> Natural ~> PParser a +type IsolateSym :: PParser u a -> Natural ~> PParser u a data IsolateSym p x type instance App (IsolateSym p) n = Isolate n p diff --git a/src/Symparsec/Parser/Literal.hs b/src/Symparsec/Parser/Literal.hs index e4190b7..1aa3ebc 100644 --- a/src/Symparsec/Parser/Literal.hs +++ b/src/Symparsec/Parser/Literal.hs @@ -25,17 +25,17 @@ type EWrongChar lit chExpect chGot = type EEof lit = EDuringLit lit "EOF while still parsing literal" -type Literal :: Symbol -> PParser () +type Literal :: Symbol -> PParser u () data Literal lit s type instance App (Literal lit) s = LiteralCheckLen lit s (Symbol.Length lit) -- now, I could use 'Ensure' here. but we add context to errors here, which I -- quite like. perhaps I should provide an @Ensure'@ that lets you add e detail? type family LiteralCheckLen lit s n where - LiteralCheckLen lit ('State rem len idx) litLen = + LiteralCheckLen lit ('State rem len idx user) litLen = IfNatLte litLen len - (LiteralStep lit ('State rem len idx)) - ('Reply (Err (ETooShort lit litLen len)) ('State rem len idx)) + (LiteralStep lit ('State rem len idx user)) + ('Reply (Err (ETooShort lit litLen len)) ('State rem len idx user)) type LiteralStep lit s = Literal' lit s (UnconsSymbol lit) (UnconsState s) type family Literal' lit sPrev ch ms where diff --git a/src/Symparsec/Parser/Monad.hs b/src/Symparsec/Parser/Monad.hs index 6ae7350..612179c 100644 --- a/src/Symparsec/Parser/Monad.hs +++ b/src/Symparsec/Parser/Monad.hs @@ -8,11 +8,11 @@ import Symparsec.Parser.Common -- | '>>=' for parsers. Sequentially compose two parsers, passing the value from -- the left parser as an argument to the second. -type (>>=) :: PParser a -> (a ~> PParser b) -> PParser b +type (>>=) :: PParser u a -> (a ~> PParser u b) -> PParser u b infixl 1 >>= data (>>=) l r s type instance App (l >>= r) s = BindL r (l @@ s) -type BindL :: (a ~> PParser b) -> PReply a -> PReply b +type BindL :: (a ~> PParser u b) -> PReply u a -> PReply u b type family BindL r rep where BindL r ('Reply (OK a) s) = r @@ a @@ s BindL r ('Reply (Err e) s) = 'Reply (Err e) s diff --git a/src/Symparsec/Parser/Natural.hs b/src/Symparsec/Parser/Natural.hs index cda6a63..a83c2e0 100644 --- a/src/Symparsec/Parser/Natural.hs +++ b/src/Symparsec/Parser/Natural.hs @@ -39,7 +39,7 @@ type NatHex = NatBase 16 ParseDigitHexSym -- -- Returns an error if it parses zero digits, or if any character fails to -- parse. -type NatBase :: Natural -> (Char ~> Maybe Natural) -> PParser Natural +type NatBase :: Natural -> (Char ~> Maybe Natural) -> PParser u Natural data NatBase base parseDigit s type instance App (NatBase base parseDigit) s = NatBaseStart base parseDigit s (UnconsState s) @@ -51,7 +51,7 @@ type family NatBaseStart base parseDigit sCh s where -- | Parse a 'Natural' with the given starting value. -- -- Skips some extra work. May be handy for hand-written parsers. -type NatBase1 :: Natural -> (Char ~> Maybe Natural) -> Natural -> PParser Natural +type NatBase1 :: Natural -> (Char ~> Maybe Natural) -> Natural -> PParser u Natural data NatBase1 base parseDigit digit s type instance App (NatBase1 base parseDigit digit) s = NatBase1' base parseDigit s digit (UnconsState s) @@ -68,13 +68,13 @@ type EInvalidDigit ch base = type NatBaseLoop :: Natural -> (Char ~> Maybe Natural) - -> PState - -> PState + -> PState u + -> PState u -> Natural -> Char -> Maybe Natural - -> (Maybe Char, PState) - -> PReply Natural + -> (Maybe Char, PState u) + -> PReply u Natural type family NatBaseLoop base parseDigit sCh s n chCur mDigit ms where -- parsed digit and have next char NatBaseLoop base parseDigit sCh s n chCur (Just digit) '(Just ch, sNext) = @@ -93,7 +93,7 @@ type family NatBaseLoop base parseDigit sCh s n chCur mDigit ms where -- Returns an error if it parses zero digits, or if the first digit fails to -- parse. Returns success on parsing up to EOF, or just before the first failed -- character parse. (Should match the behaviour of Megaparsec's number parsers.) -type NatBaseWhile :: Natural -> (Char ~> Maybe Natural) -> PParser Natural +type NatBaseWhile :: Natural -> (Char ~> Maybe Natural) -> PParser u Natural data NatBaseWhile base parseDigit s type instance App (NatBaseWhile base parseDigit) s = NatBaseWhileStart base parseDigit s (UnconsState s) @@ -118,13 +118,13 @@ type family NatBaseWhileStart2 base parseDigit sCh s chChur mDigit ms where type NatBaseWhileLoop :: Natural -> (Char ~> Maybe Natural) - -> PState - -> PState + -> PState u + -> PState u -> Natural -> Char -> Maybe Natural - -> (Maybe Char, PState) - -> PReply Natural + -> (Maybe Char, PState u) + -> PReply u Natural type family NatBaseWhileLoop base parseDigit sCh s n chCur mDigit ms where -- parsed digit and have next char NatBaseWhileLoop base parseDigit sCh s n chCur (Just digit) '(Just ch, sNext) = diff --git a/src/Symparsec/Parser/Satisfy.hs b/src/Symparsec/Parser/Satisfy.hs index 536b0aa..06d9be0 100644 --- a/src/Symparsec/Parser/Satisfy.hs +++ b/src/Symparsec/Parser/Satisfy.hs @@ -5,7 +5,7 @@ module Symparsec.Parser.Satisfy ( type Satisfy, type OneOf, type NoneOf ) where import Symparsec.Parser.Common -- may also be defined using @Token@ -type Satisfy :: (Char ~> Bool) -> PParser Char +type Satisfy :: (Char ~> Bool) -> PParser u Char data Satisfy chPred ps type instance App (Satisfy chPred) ps = SatisfyStart chPred ps (UnconsState ps) @@ -19,7 +19,7 @@ type family SatisfyValidate psPrev ps ch res where SatisfyValidate psPrev ps ch False = 'Reply (Err (Error1 "satisfy: char failed predicate")) psPrev -type OneOf :: [Char] -> PParser Char +type OneOf :: [Char] -> PParser u Char type OneOf chs = Satisfy (ElemSym chs) -- TODO put in singleraeh @@ -35,7 +35,7 @@ data ElemSym as a type instance App (ElemSym as) a = Elem a as -- may also be defined using @CompSym2 NotSym (ElemSym chs)@ -type NoneOf :: [Char] -> PParser Char +type NoneOf :: [Char] -> PParser u Char --type NoneOf chs = Satisfy (CompSym2 NotSym (ElemSym chs)) type NoneOf chs = Satisfy (NotElemSym chs) diff --git a/src/Symparsec/Parser/Skip.hs b/src/Symparsec/Parser/Skip.hs index d328d4f..494e3e8 100644 --- a/src/Symparsec/Parser/Skip.hs +++ b/src/Symparsec/Parser/Skip.hs @@ -8,14 +8,14 @@ import Symparsec.Parser.Applicative import Data.Type.Symbol qualified as Symbol -- | Skip forward @n@ characters. Fails if fewer than @n@ characters remain. -type Skip :: Natural -> PParser () +type Skip :: Natural -> PParser u () type Skip n = Ensure n *> SkipUnsafe n -- | Skip forward @n@ characters. @n@ must be less than or equal to the number -- of remaining characters. (Fairly unhelpful; use 'Skip' instead.) -type SkipUnsafe :: Natural -> PParser () +type SkipUnsafe :: Natural -> PParser u () data SkipUnsafe n s type instance App (SkipUnsafe n) s = SkipUnsafe' n s type family SkipUnsafe' n s where - SkipUnsafe' n ('State rem len idx) = - 'Reply (OK '()) ('State (Symbol.Drop n rem) (len-n) (idx+n)) + SkipUnsafe' n ('State rem len idx user) = + 'Reply (OK '()) ('State (Symbol.Drop n rem) (len-n) (idx+n) user) diff --git a/src/Symparsec/Parser/Take.hs b/src/Symparsec/Parser/Take.hs index c475791..ae5ce94 100644 --- a/src/Symparsec/Parser/Take.hs +++ b/src/Symparsec/Parser/Take.hs @@ -6,7 +6,7 @@ import Symparsec.Parser.Common import Singleraeh.Symbol ( type RevCharsToSymbol ) -- | Return the next @n@ characters. -type Take :: Natural -> PParser Symbol +type Take :: Natural -> PParser u Symbol data Take n s type instance App (Take n) s = Take' '[] n s (UnconsState s) type family Take' chs n sPrev s where @@ -19,12 +19,12 @@ type ETakeEnd n = Error1 ( "tried to take " ++ ShowNatDec n ++ " chars from empty string" ) -- | 'Take' defunctionalization symbol. -type TakeSym :: Natural ~> PParser Symbol +type TakeSym :: Natural ~> PParser u Symbol data TakeSym n type instance App TakeSym n = Take n -- | Return the next character. -type Take1 :: PParser Char +type Take1 :: PParser u Char data Take1 s type instance App Take1 s = Take1' s (UnconsState s) type family Take1' sPrev s where diff --git a/src/Symparsec/Parser/TakeRest.hs b/src/Symparsec/Parser/TakeRest.hs index dd186b5..676641a 100644 --- a/src/Symparsec/Parser/TakeRest.hs +++ b/src/Symparsec/Parser/TakeRest.hs @@ -8,12 +8,12 @@ import qualified Data.Type.Symbol as Symbol -- | Consume and return the rest of the input string. -- -- Never fails. May return the empty string. -type TakeRest :: PParser Symbol +type TakeRest :: PParser u Symbol data TakeRest s type instance App TakeRest s = TakeRest' s type family TakeRest' s where - TakeRest' ('State rem len idx) = - 'Reply (OK (Symbol.Take len rem)) ('State (Symbol.Drop len rem) 0 (idx+len)) + TakeRest' ('State rem len idx user) = + 'Reply (OK (Symbol.Take len rem)) ('State (Symbol.Drop len rem) 0 (idx+len) user) {- import GHC.TypeLits diff --git a/src/Symparsec/Parser/TakeWhile.hs b/src/Symparsec/Parser/TakeWhile.hs index 3c22458..59d7bda 100644 --- a/src/Symparsec/Parser/TakeWhile.hs +++ b/src/Symparsec/Parser/TakeWhile.hs @@ -10,7 +10,7 @@ import Singleraeh.Symbol ( type RevCharsToSymbol ) -- May also be defined via -- @'Symparsec.Parser.While.While' chPred 'Symparsec.Parser.TakeRest.TakeRest'@, -- but a custom implementation is more efficient. -type TakeWhile :: (Char ~> Bool) -> PParser Symbol +type TakeWhile :: (Char ~> Bool) -> PParser u Symbol data TakeWhile chPred s type instance App (TakeWhile chPred) s = TakeWhileStart chPred s (UnconsState s) @@ -36,7 +36,7 @@ type family TakeWhileLoop chPred sPrev sCh ch taken res ms where -- | Take one or more 'Char's for which the supplied predicate holds. -- -- Backtracks on failure. Same as megaparsec. -type TakeWhile1 :: (Char ~> Bool) -> PParser Symbol +type TakeWhile1 :: (Char ~> Bool) -> PParser u Symbol data TakeWhile1 chPred ps type instance App (TakeWhile1 chPred) ps = TakeWhile1Start chPred ps (UnconsState ps) diff --git a/src/Symparsec/Parser/Token.hs b/src/Symparsec/Parser/Token.hs index 4b3f6d6..639c436 100644 --- a/src/Symparsec/Parser/Token.hs +++ b/src/Symparsec/Parser/Token.hs @@ -5,7 +5,7 @@ module Symparsec.Parser.Token ( type Token ) where import Symparsec.Parser.Common -- | Should match @token@ from megaparsec. Backtracks. -type Token :: (Char ~> Maybe a) -> PParser a +type Token :: (Char ~> Maybe a) -> PParser u a data Token chParse ps type instance App (Token chParse) ps = TokenStart chParse ps (UnconsState ps) type family TokenStart chParse psPrev mps where diff --git a/src/Symparsec/Parser/Try.hs b/src/Symparsec/Parser/Try.hs index 720ddd1..2a77f75 100644 --- a/src/Symparsec/Parser/Try.hs +++ b/src/Symparsec/Parser/Try.hs @@ -5,10 +5,10 @@ module Symparsec.Parser.Try ( type Try ) where import Symparsec.Parser.Common -- | Run the given parser, backtracking on error. -type Try :: PParser a -> PParser a +type Try :: PParser u a -> PParser u a data Try p s type instance App (Try p) s = Try' s (p @@ s) -type Try' :: PState -> PReply a -> PReply a +type Try' :: PState u -> PReply u a -> PReply u a type family Try' sPrev rep where Try' sPrev ('Reply (OK a) s) = 'Reply (OK a) s Try' sPrev ('Reply (Err e) s) = 'Reply (Err e) sPrev diff --git a/src/Symparsec/Parser/While.hs b/src/Symparsec/Parser/While.hs index f88b580..1b4c389 100644 --- a/src/Symparsec/Parser/While.hs +++ b/src/Symparsec/Parser/While.hs @@ -5,28 +5,28 @@ module Symparsec.Parser.While ( type While ) where import Symparsec.Parser.Common -- | Run the given parser while the given character predicate succeeds. -type While :: (Char ~> Bool) -> PParser a -> PParser a +type While :: (Char ~> Bool) -> PParser u a -> PParser u a data While chPred p s type instance App (While chPred p) s = While' chPred p s type family While' chPred p s where - While' chPred p ('State rem len idx) = - WhileCountStart len rem idx chPred p (UnconsSymbol rem) + While' chPred p ('State rem len idx user) = + WhileCountStart len rem idx user chPred p (UnconsSymbol rem) -type family WhileCountStart len rem idx chPred p mstr where - WhileCountStart len rem idx chPred p (Just '(ch, str)) = - WhileCount len rem idx chPred p 0 (UnconsSymbol str) (chPred @@ ch) - WhileCountStart len rem idx chPred p Nothing = p @@ ('State rem 0 idx) +type family WhileCountStart len rem idx user chPred p mstr where + WhileCountStart len rem idx user chPred p (Just '(ch, str)) = + WhileCount len rem idx user chPred p 0 (UnconsSymbol str) (chPred @@ ch) + WhileCountStart len rem idx user chPred p Nothing = p @@ ('State rem 0 idx user) -type family WhileCount len rem idx chPred p n mstr res where - WhileCount len rem idx chPred p n (Just '(ch, str)) True = - WhileCount len rem idx chPred p (n+1) (UnconsSymbol str) (chPred @@ ch) - WhileCount len rem idx chPred p n (Just '(ch, str)) False = - WhileEnd (len-n) (p @@ ('State rem n idx)) - WhileCount len rem idx chPred p n Nothing True = - WhileEnd (len-(n+1)) (p @@ ('State rem (n+1) idx)) - WhileCount len rem idx chPred p n Nothing False = - WhileEnd (len-n) (p @@ ('State rem n idx)) +type family WhileCount len rem idx user chPred p n mstr res where + WhileCount len rem idx user chPred p n (Just '(ch, str)) True = + WhileCount len rem idx user chPred p (n+1) (UnconsSymbol str) (chPred @@ ch) + WhileCount len rem idx user chPred p n (Just '(ch, str)) False = + WhileEnd (len-n) (p @@ ('State rem n idx user)) + WhileCount len rem idx user chPred p n Nothing True = + WhileEnd (len-(n+1)) (p @@ ('State rem (n+1) idx user)) + WhileCount len rem idx user chPred p n Nothing False = + WhileEnd (len-n) (p @@ ('State rem n idx user)) type family WhileEnd lenRest rep where -- TODO note that we don't require that the inner parser fully consumes. @@ -35,5 +35,5 @@ type family WhileEnd lenRest rep where -- but by not requiring full consumption, we recover char-by-char behaviour! -- and we can still get full consumption by combining with Isolate. -- the inner parser should generally fully consume though, as a design point - WhileEnd lenRest ('Reply res ('State rem len idx)) = - 'Reply res ('State rem (lenRest+len) idx) + WhileEnd lenRest ('Reply res ('State rem len idx user)) = + 'Reply res ('State rem (lenRest+len) idx user) From 62e1398b50ce6b4d30ab335763837c8cf00f1173 Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Wed, 6 May 2026 17:45:10 +0100 Subject: [PATCH 3/7] update examples to use user state --- src/Symparsec/Example/Expr.hs | 12 ++++++------ src/Symparsec/Example/Printf.hs | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/Symparsec/Example/Expr.hs b/src/Symparsec/Example/Expr.hs index b3c4adb..2f6b84f 100644 --- a/src/Symparsec/Example/Expr.hs +++ b/src/Symparsec/Example/Expr.hs @@ -48,15 +48,15 @@ data ExprTok | TokParenL | TokParenR -type PExpr :: PParser (Expr Natural) +type PExpr :: PParser u (Expr Natural) data PExpr s type instance App PExpr s = PExprNext s '[] '[] (UnconsState s) type PExprNext - :: PState + :: PState u -> [ExprTok] -> [Expr Natural] - -> (Maybe Char, PState) - -> PReply (Expr Natural) + -> (Maybe Char, PState u) + -> PReply u (Expr Natural) type family PExprNext sPrev ops exprs s where PExprNext sPrev ops exprs '(Just ch, s) = PExprCh sPrev s ops exprs ch @@ -134,8 +134,8 @@ type family PExprEBOpOpCh ch where PExprEBOpOpCh _ = Nothing type PExprEBOp' - :: PState -> PState -> BOp -> Natural -> [Expr Natural] -> [ExprTok] - -> PReply (Expr Natural) + :: PState u -> PState u -> BOp -> Natural -> [Expr Natural] -> [ExprTok] + -> PReply u (Expr Natural) type family PExprEBOp' sPrev s op prec exprs ops where PExprEBOp' sPrev s op prec exprs (TokBOp opPrev : ops) = IfNatLte prec (BOpPrec opPrev) diff --git a/src/Symparsec/Example/Printf.hs b/src/Symparsec/Example/Printf.hs index e1adf2b..1841565 100644 --- a/src/Symparsec/Example/Printf.hs +++ b/src/Symparsec/Example/Printf.hs @@ -31,7 +31,7 @@ type Number = NatBaseWhile 10 ParseDigitDecSym -- special parser that I probably don't want (surely can just combine) -- backtracks!! -type NotChar :: Char -> PParser Char +type NotChar :: Char -> PParser u Char data NotChar c s type instance App (NotChar c) s = NotChar' c s (UnconsState s) type family NotChar' cNo sPrev s where @@ -71,13 +71,13 @@ data Flags = Flags , fAlternate :: Bool } -type FlagParser :: PParser Flags +type FlagParser :: PParser u Flags data FlagParser s type instance App FlagParser s = PFlags' EmptyFlags s (UnconsState s) type EmptyFlags = 'Flags Nothing Nothing False -type PFlags' :: Flags -> PState -> (Maybe Char, PState) -> PReply Flags +type PFlags' :: Flags -> PState u -> (Maybe Char, PState u) -> PReply u Flags type family PFlags' flags sPrev s where PFlags' ('Flags d i l) sPrev '(Just '-', s) = PFlags' ('Flags (Just (UpdateAdjust d LeftAdjust)) i l) s (UnconsState s) @@ -113,7 +113,10 @@ data FieldFormat = FF } -- copied as-is except for 'Con5' -type FFParser :: PParser FieldFormat +-- TODO with user state introduced, adding a standalone kind signature results +-- in a type error. GHC initialises an inner u0 and doesn't see u ~ u0. +-- Can be fixed by using type abstraction syntax, I think. +--type FFParser :: PParser u FieldFormat type FFParser = Con5 FF <$> FlagParser <*> Optional Number From 69ecf3070679781997ff626e2f2f1c197ecb39aa Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Thu, 7 May 2026 14:19:15 +0100 Subject: [PATCH 4/7] Symparsec: re-export stateless runners --- src/Symparsec.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Symparsec.hs b/src/Symparsec.hs index 548a2b7..2ba6078 100644 --- a/src/Symparsec.hs +++ b/src/Symparsec.hs @@ -13,7 +13,9 @@ module Symparsec ( -- * Base definitions type Run + , type Run' , type RunTest + , type RunTest' -- * Parsers , module Symparsec.Parsers From 0f85240e3fd4dda7f3fd3a0ce82d1ac2b303f46d Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Thu, 7 May 2026 14:19:44 +0100 Subject: [PATCH 5/7] update tests to use stateless runners --- test/Main.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/Main.hs b/test/Main.hs index d49cd06..8e0f714 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -12,10 +12,10 @@ type CstrX_Y = LiftA2 (Con2 '(,)) (Literal "_" *> Isolate 2 NatHex) spec :: Expect - '[ Run (Literal "raehik") "raehik" `Is` Right '( '(), "") - , Run (Literal "raeh") "raehraeh" `Is` Right '( '(), "raeh") - , Run (Skip 3 *> Literal "HI") "...HI" `Is` Right '( '(), "") - , Run (Literal "0x" *> NatHex) "0xfF" `Is` Right '( 255, "") - , Run CstrX_Y "Cstr12_AB" `Is` Right '( '(12, 0xAB), "") + '[ Run' (Literal "raehik") "raehik" `Is` Right '( '(), "") + , Run' (Literal "raeh") "raehraeh" `Is` Right '( '(), "raeh") + , Run' (Skip 3 *> Literal "HI") "...HI" `Is` Right '( '(), "") + , Run' (Literal "0x" *> NatHex) "0xfF" `Is` Right '( 255, "") + , Run' CstrX_Y "Cstr12_AB" `Is` Right '( '(12, 0xAB), "") ] spec = Valid From 304f0bffaf55f4745db9dd03e3b9e6b40d56ab74 Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Sat, 6 Jun 2026 16:34:21 +0200 Subject: [PATCH 6/7] readme: use Run' unit state runner --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cfa9a30..f1f151f 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ type PExample = Skip 1 *> Tuple (Isolate 2 NatHex) (Literal "_" *> TakeRest) Use it to parse a type-level string (in a GHCi session): ```haskell -ghci> :k! Run PExample "xFF_etc" -Run ... +ghci> :k! Run' PExample "xFF_etc" +Run' ... = Right '( '(255, "etc"), "") ``` From f06c093061481b060d8b9fe1cf0dad3947989f6e Mon Sep 17 00:00:00 2001 From: Ben Orchard Date: Sat, 6 Jun 2026 16:38:31 +0200 Subject: [PATCH 7/7] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96cb23b..cce7861 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.1.0 (unreleased) +- Parsers now take user state. + Use `Run'`, `RunTest'` to ignore (run with the empty state `'()`). + ## 2.0.0 (2025-10-11) Full rewrite.