Skip to content

Commit

Permalink
Improve lazy performance of Data.Text.Lazy.inits
Browse files Browse the repository at this point in the history
The previous implementation, itself based on an earlier version of
`Data.List.inits`, inherited the flaw that accessing the i-th element took
quadratic time O(i²). This now takes linear time O(i) as expected.

The current version of `Data.List.inits` uses a banker's queue to
obtain good performance when generating very long lists.
For lazy text, consisting of a few big chunks, that benefit seems
negligible. So I chose a simpler implementation.
  • Loading branch information
Lysxia committed Mar 14, 2024
1 parent 66fa38f commit d3c20b2
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 4 deletions.
2 changes: 2 additions & 0 deletions benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
import qualified Benchmarks.Equality as Equality
import qualified Benchmarks.FileRead as FileRead
import qualified Benchmarks.FoldLines as FoldLines
import qualified Benchmarks.Micro as Micro
import qualified Benchmarks.Multilang as Multilang
import qualified Benchmarks.Pure as Pure
import qualified Benchmarks.ReadNumbers as ReadNumbers
Expand Down Expand Up @@ -61,6 +62,7 @@ main = do
defaultMain
[ Builder.benchmark
, Concat.benchmark
, Micro.benchmark
, bgroup "DecodeUtf8"
[ env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html")
, env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml")
Expand Down
22 changes: 22 additions & 0 deletions benchmarks/haskell/Benchmarks/Micro.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- | Benchmarks on artificial data.

module Benchmarks.Micro (benchmark) where

import qualified Data.Text.Lazy as TL
import qualified Data.Text as T
import Test.Tasty.Bench (Benchmark, bgroup, bench, nf)

benchmark :: Benchmark
benchmark = bgroup "Micro"
[ -- Accessing i-th element should take O(i) time.
-- The 2k case should run in 2x the time of the 1k case.
bgroup "Lazy.inits"
[ bench "last 1k" $ nf (last . TL.inits) (chunks 1000)
, bench "last 2k" $ nf (last . TL.inits) (chunks 2000)
, bench "map-take1 1k" $ nf (map (TL.take 1) . TL.inits) (chunks 1000)
, bench "map-take1 2k" $ nf (map (TL.take 1) . TL.inits) (chunks 2000)
]
]

chunks :: Int -> TL.Text
chunks n = TL.fromChunks (replicate n (T.pack "a"))
16 changes: 12 additions & 4 deletions src/Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1443,10 +1443,18 @@ inits = (NE.toList P.$!) . initsNE
--
-- @since 2.1.2
initsNE :: Text -> NonEmpty Text
initsNE = (Empty NE.:|) . inits'
where inits' Empty = []
inits' (Chunk t ts) = L.map (\t' -> Chunk t' Empty) (NE.tail (T.initsNE t))
++ L.map (Chunk t) (inits' ts)
initsNE ts0 = Empty NE.:| inits' 0 ts0
where
inits' :: Int64 -- Number of previous chunks i
-> Text -- The remainder after dropping i chunks from ts0
-> [Text] -- Prefixes longer than the first i chunks of ts0.
inits' _ Empty = []
inits' i (Chunk t ts) = L.map (takeChunks i ts0) (NE.tail (T.initsNE t))
++ inits' (i + 1) ts

takeChunks :: Int64 -> Text -> T.Text -> Text
takeChunks !i (Chunk t ts) lastChunk | i > 0 = Chunk t (takeChunks (i - 1) ts lastChunk)
takeChunks _ _ lastChunk = Chunk lastChunk Empty

-- | /O(n)/ Return all final segments of the given 'Text', longest
-- first.
Expand Down
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ benchmark text-benchmarks
Benchmarks.Equality
Benchmarks.FileRead
Benchmarks.FoldLines
Benchmarks.Micro
Benchmarks.Multilang
Benchmarks.Programs.BigTable
Benchmarks.Programs.Cut
Expand Down

0 comments on commit d3c20b2

Please sign in to comment.