3
F# - Efficient CharStream parser-combinator
source link: http://blog.stermon.com/articles/2022/03/28/fsharp-efficient-charstream-parser-combinator.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
Code Snippet
parser.fsx
#!/usr/bin/env -S dotnet fsi --langversion:6.0 --mlcompatibility --optimize --warnaserror+:25,26
(* This construct is for ML compatibility. The syntax '(typ,...,typ) ident'
is not used in F# code. Consider using 'ident<typ,...,typ>' instead. *)
#nowarn "62"
#time "on"
open System
[<RequireQualifiedAccess>]
module Shuffle =
let private rand = new Random ()
let private swap (xs:_[]) i j =
let tv = xs[i]
xs[i] <- xs[j]
xs[j] <- tv
let string (sep:string) (str:string) =
if 1 < Seq.length str then
let xs = str.Split(sep)
xs |> Array.iteri (fun i _ -> swap xs i (rand.Next(i, xs.Length)))
xs |> Array.reduce (fun x y -> sprintf "%s%s%s" x sep y)
else
str
module Parser =
type chars = char seq
module Converters =
[<RequireQualifiedAccess>]
module Chars =
let fromString (str:string) : chars = Seq.map id str
let toString (cs:chars) = cs |> Seq.map string |> Seq.fold (+) ""
let toInt (cs:chars) = cs |> toString |> int
module Combinator =
open Converters
type index = int
type error = string seq
type 'a output = (index * 'a * chars, error) Result
type 'a parser = index -> chars -> 'a output
let parse f (i:index) (cs:chars) : 'a output =
f i cs
(* Functor *)
let fmapP (f:'a -> 'b) (p:'a parser) : 'b parser =
fun i cs ->
match p i cs with
| Ok (j, a, rs) ->
Ok (j, f a, rs)
| Error es ->
Error <| seq { yield! es }
let (<!>) : ('a -> 'b) -> 'a parser -> 'b parser = fmapP
(* Applicative *)
let pureP a : 'a parser =
fun i cs ->
Ok (i, a, cs)
let lift2AP f (p1:'a parser) (p2:'b parser) : 'c parser =
fun i cs ->
match p1 i cs with
| Ok (j, a, xs) ->
match p2 j xs with
| Ok (k, b, ys) ->
Ok (k, f a b, ys)
| Error es2 ->
Error <| seq { yield! es2 }
| Error es1 ->
Error <| seq { yield! es1 }
let (<*>) (p1:('a -> 'b) parser) (p2:'a parser) : 'b parser =
fun i cs ->
match p1 i cs with
| Ok (j, f, xs) ->
match p2 j xs with
| Ok (k, a, ys) ->
Ok (k, f a, ys)
| Error es2 ->
Error <| seq { yield! es2 }
| Error es1 ->
Error <| seq { yield! es1 }
let ( *>) (p1:'a parser) (p2:'b parser) : 'b parser =
fun i cs ->
match p1 i cs with
| Ok (j, _, rs) ->
p2 j rs
| Error es ->
Error <| seq { yield! es }
let (<* ) (p1:'a parser) (p2:'b parser) : 'a parser =
lift2AP (fun x _ -> x) p1 p2
(* Monad *)
let bindP (p: 'a parser) (f: 'a -> 'b parser) : 'b parser =
fun i cs ->
match p i cs with
| Ok (j, a, ts) ->
parse (f a) j ts
| Error es1 ->
Error <| seq { yield! es1 }
let joinP (pp:'a parser parser) : 'a parser =
fun i cs ->
match pp i cs with
| Ok (j, p, rs) ->
p j rs
| Error es ->
Error <| seq { yield! es }
let (>>=) : 'a parser -> ('a -> 'b parser) -> 'b parser = bindP
type ParserBuilder () =
member __.Bind (p: 'a parser, f: 'a -> 'b parser) : 'b parser =
bindP p f
member __.Return a : 'a parser =
pureP a
member __.ReturnFrom p : 'a parser =
p
let parser = ParserBuilder ()
(* Alternate *)
let (<|>) (p1:'a parser) (p2:'a parser) : 'a parser =
fun i cs ->
match p1 i cs with
| Error es1 ->
match p2 i cs with
| Error es2 ->
seq {
yield! es1
yield! es2
}
|> Error
| ok2 ->
ok2
| ok1 ->
ok1
(* Error *)
let errmsg =
sprintf
"Parser error\n\
* Function.........: %s\n\
* Index............: %i\n\
* Unparsed chars...: %A\n"
(* Parsers *)
let failP (msg:string) : 'a parser =
fun i cs ->
Error <| seq { errmsg (sprintf "failP > %s" msg) i cs }
let getP : char parser =
fun i cs ->
if Seq.isEmpty cs then
Error <| seq { errmsg "getP" i cs }
else
Ok (i+1, Seq.head cs, Seq.tail cs)
let spanP f : chars parser =
fun i cs ->
let (ts,fs) =
( Seq.takeWhile f cs
, Seq.skipWhile f cs
)
if Seq.isEmpty ts then
Error <| seq { errmsg "spanP" i cs }
else
Ok (i + Seq.length ts, ts, fs)
let rec seqP p : 'a seq parser =
parser {
let! x = p
let! xs = seqP p
return
seq {
yield x
yield! xs
}
} <|> pureP Seq.empty
let sepBy p sep : 'a seq parser =
let rec aux =
( fun x xs ->
seq {
yield x
yield! xs
}
)
<!> p
<*> seqP (sep *> p)
aux <|> pureP Seq.empty
let satisfyP f : char parser =
parser {
let! x = getP
let r = f x
if r then
return x
else
let msg = sprintf "satisfyP > Result `%b` for `%c`" r x
return! (failP msg)
}
let charP c : char parser =
satisfyP ((=) c)
let stringP (str:string) : chars parser =
let rec aux cs =
parser {
if Seq.isEmpty cs then
return seq { () }
else
let h = Seq.head cs
let! x = charP h
let! xs = aux (Seq. tail cs)
return
seq {
yield x
yield! xs
}
}
str
|> Chars.fromString
|> aux
let tokenP (f:'a -> 'b) p (sep:'a parser) : 'b parser =
parser {
let! t = p
let! _ = sep (* Sample: spacesP <|> pureP Seq.empty *)
return f t
}
let spaceP : char parser =
satisfyP Char.IsWhiteSpace
let spacesP : chars parser =
spanP Char.IsWhiteSpace
let alphaP : char parser =
satisfyP Char.IsLetter
let alphasP : chars parser =
spanP Char.IsLetter
let numP : char parser =
satisfyP Char.IsNumber
let numsP : chars parser =
spanP Char.IsNumber
let alphanumP : char parser =
alphaP <|> numP
let alphanumsP : chars parser =
spanP (fun c -> Char.IsLetter c || Char.IsNumber c)
let run (p:'a parser) (str:string) =
str
|> Seq.map id
|> parse p 0
|> function
| Ok (_,a,rs) when Seq.isEmpty rs -> Ok a
| Ok (i,a,rs) -> Error <| errmsg "run" i rs
| Error e -> Error <| Seq.fold (+) "" e
open Parser
open Parser.Converters
open Parser.Combinator
type token =
| Street of string
| Number of string
| PostalCode of string
| City of string
| Region of string
| Country of string
let maybeSpacesP =
spacesP <|> pureP Seq.empty
let alphanumswsP : chars parser =
spanP
( fun c ->
Char.IsLetter c ||
Char.IsNumber c ||
Char.IsWhiteSpace c
)
let postalCodeCityP : token seq parser =
parser {
let! a = numP
let! b = numP
let! c = numP
let! d = numP
let! e = numP
let! _ = spacesP
let! city = alphasP
return
seq {
yield (PostalCode (string a + string b + string c + string d + string e))
yield (City (Chars.toString city))
}
}
let streetHelperP =
stringP "Rúa" <|>
stringP "Calle" <|>
stringP "C/"
let streetP : token seq parser =
parser {
let! ____ = streetHelperP
let! ____ = spacesP
let! name = alphanumswsP
return
seq {
yield (Street (Chars.toString name))
}
}
let numberP : token seq parser =
parser {
let! num = (stringP "s/n") <|> numsP
return
seq {
yield (Number (Chars.toString num))
}
}
let regionP : token seq parser =
parser {
let! reg = alphanumswsP
return
seq {
yield (Region (Chars.toString reg))
}
}
let countryP : token seq parser =
parser {
let! x = alphaP
let! y = alphaP
return
seq {
yield (Country (string x + string y))
}
}
let partsP =
postalCodeCityP <|> streetP <|> numberP <|> countryP <|> regionP
let addressP : token seq seq parser =
sepBy
(partsP)
(maybeSpacesP *> (charP ',') <* maybeSpacesP)
let rec main = function
| 0 -> ()
| n ->
let addr =
"Rúa Ponzos, 30, 15404 Ferrol, A Coruña, ES"
|> Shuffle.string ", "
printfn "# %s" addr
addr
|> run addressP
|> function
| Ok xs ->
xs
|> Seq.concat
|> Seq.sort
|> Seq.iter(printfn "* %A")
| error ->
printfn "%A" error
printfn "%s" String.Empty
main (n-1)
let _ =
main 7
Code Output:
[nix-shell:~/code/dotnet/src/parser]$ clear && ./parser.fsx
# ES, A Coruña, Rúa Ponzos, 15404 Ferrol, 30
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# 15404 Ferrol, 30, ES, A Coruña, Rúa Ponzos
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# Rúa Ponzos, 15404 Ferrol, 30, A Coruña, ES
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# 30, Rúa Ponzos, ES, A Coruña, 15404 Ferrol
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# 30, ES, 15404 Ferrol, Rúa Ponzos, A Coruña
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# A Coruña, 30, ES, 15404 Ferrol, Rúa Ponzos
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
# Rúa Ponzos, ES, 15404 Ferrol, A Coruña, 30
* Street "Ponzos"
* Number "30"
* PostalCode "15404"
* City "Ferrol"
* Region "A Coruña"
* Country "ES"
Real: 00:00:00.060, CPU: 00:00:00.059, GC gen0: 0, gen1: 0, gen2: 0
[nix-shell:~/code/dotnet/src/parser]$
References:
- Write You a Haskell (Stephen Diehl):
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK