Skip to content

Commit

Permalink
hPutBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
BebeSparkelSparkel committed Apr 28, 2024
1 parent 3f2a71b commit 6c49fdd
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 19 deletions.
49 changes: 30 additions & 19 deletions src/Data/Text/Internal/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ module Data.Text.Internal.Builder
-- ** The Builder type
Builder
, LazyTextBuilder
, getTexts
, toLazyText
, toLazyTextWith
, starter

-- ** Constructing Builders
, singleton
Expand Down Expand Up @@ -92,13 +94,20 @@ import GHC.Stack (HasCallStack)
newtype Builder = Builder {
-- Invariant (from Data.Text.Lazy):
-- The lists include no null Texts.
runBuilder :: forall s. (Buffer s -> ST s [S.Text])
runBuilder :: forall s
. (Int -> (Int -> ST s (A.MArray s)) -> Buffer s -> ST s [S.Text])
-> Int -- buffer size
-> (Int -> ST s (A.MArray s)) -- new array
-> Buffer s
-> ST s [S.Text]
}

type LazyTextBuilder = Builder

getTexts :: Int -> (Int -> ST s (A.MArray s)) -> Builder -> ST s [S.Text]
getTexts chunkSize new b =
newBuffer new chunkSize >>= runBuilder (b `append` flush) starter chunkSize new

instance Semigroup Builder where
(<>) = append
{-# INLINE (<>) #-}
Expand Down Expand Up @@ -136,7 +145,7 @@ instance Ord Builder where
-- * @'toLazyText' 'empty' = 'L.empty'@
--
empty :: Builder
empty = Builder (\ k buf -> k buf)
empty = Builder (\ k new buf -> k new buf)
{-# INLINE empty #-}

-- | /O(1)./ A @Builder@ taking a single character, satisfying
Expand All @@ -160,6 +169,7 @@ singleton c = writeAtMost 4 $ \ marr o -> unsafeWrite marr o (safe c)
--
append :: Builder -> Builder -> Builder
append (Builder f) (Builder g) = Builder (f . g)
--append (Builder f) (Builder g) = Builder $ \ k new buf -> f (g k) new buf
{-# INLINE [0] append #-}

-- TODO: Experiment to find the right threshold.
Expand Down Expand Up @@ -196,22 +206,20 @@ fromText t@(Text arr off l)
--
-- @since 1.2.0.0
fromString :: String -> Builder
fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
let loop !marr !o !u !l [] = k (Buffer marr o u l)
fromString str = Builder $ \k chunkSize new (Buffer p0 o0 u0 l0) ->
let loop !marr !o !u !l [] = k chunkSize new (Buffer marr o u l)
loop marr o u l s@(c:cs)
| l <= 3 = do
A.shrinkM marr (o + u)
arr <- A.unsafeFreeze marr
let !t = Text arr o u
marr' <- A.new chunkSize
marr' <- new chunkSize
ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
return $ t : ts
| otherwise = do
n <- unsafeWrite marr (o+u) (safe c)
loop marr o (u+n) (l-n) cs
in loop p0 o0 u0 l0 str
where
chunkSize = smallChunkSize
{-# INLINEABLE fromString #-}

-- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
Expand Down Expand Up @@ -246,18 +254,21 @@ toLazyText = toLazyTextWith smallChunkSize
-- buffers will be the default buffer size.
toLazyTextWith :: Int -> Builder -> L.Text
toLazyTextWith chunkSize m = L.fromChunks (runST $
newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
newBuffer A.new chunkSize >>= runBuilder (m `append` flush) starter smallChunkSize A.new)

starter :: Monad m => a -> b -> c -> m [d]
starter _ _ _ = return []

-- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
-- yielding a new chunk in the result lazy @Text@.
flush :: Builder
flush = Builder $ \ k buf@(Buffer p o u l) ->
flush = Builder $ \ k cs new buf@(Buffer p o u l) ->
if u == 0
then k buf
then k cs new buf
else do arr <- A.unsafeFreeze p
let !b = Buffer p (o+u) 0 l
!t = Text arr o u
ts <- inlineInterleaveST (k b)
ts <- inlineInterleaveST (k cs new b)
return $! t : ts
{-# INLINE [1] flush #-}
-- defer inlining so that flush/flush rule may fire.
Expand All @@ -266,18 +277,18 @@ flush = Builder $ \ k buf@(Buffer p o u l) ->

-- | Sequence an ST operation on the buffer
withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
withBuffer f = Builder $ \k buf -> f buf >>= k
withBuffer f = Builder $ \k cs new buf -> f buf >>= k cs new
{-# INLINE withBuffer #-}

-- | Get the size of the buffer
withSize :: (Int -> Builder) -> Builder
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
runBuilder (f l) k buf
withSize f = Builder $ \ k cs new buf@(Buffer _ _ _ l) ->
runBuilder (f l) k cs new buf
{-# INLINE withSize #-}

-- | Map the resulting list of texts.
mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
mapBuilder f = Builder (fmap f .)
mapBuilder f = Builder $ \ k cs new b -> f <$> k cs new b

------------------------------------------------------------------------

Expand All @@ -286,7 +297,7 @@ ensureFree :: Int -> Builder
ensureFree !n = withSize $ \ l ->
if n <= l
then empty
else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
else flush `append'` Builder (\k chunkSize new _ -> k chunkSize new =<< newBuffer new (max n chunkSize))
{-# INLINE [0] ensureFree #-}

writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
Expand All @@ -305,9 +316,9 @@ writeBuffer f (Buffer p o u l) = do
return $! Buffer p o (u+n) (l-n)
{-# INLINE writeBuffer #-}

newBuffer :: Int -> ST s (Buffer s)
newBuffer size = do
arr <- A.new size
newBuffer :: (Int -> ST s (A.MArray s)) -> Int -> ST s (Buffer s)
newBuffer new size = do
arr <- new size
return $! Buffer arr 0 0 size
{-# INLINE newBuffer #-}

Expand Down
166 changes: 166 additions & 0 deletions src/Data/Text/Lazy/Builder/IO.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -Wno-name-shadowing #-}
module Data.Text.Lazy.Builder.IO
( hPutBuilder
, hPutBuilderUtf8
) where

import Control.Monad (join)
import Control.Monad.ST (runST)
import Data.Array.Byte (ByteArray(ByteArray))
import Data.Foldable (foldrM, for_, traverse_)
import Data.Functor (void)
import Data.IORef (readIORef, writeIORef)
import Data.Text (unpack)
import Data.Text.Array (new, newPinned)
import Data.Text.Internal (Text(Text))
import Data.Text.Internal.Builder (Builder, getTexts)
import Data.Text.Internal.Fusion (stream)
import Data.Text.Internal.Fusion.Types (Step(..), Stream(..))
import Data.Text.Internal.Lazy (smallChunkSize)
import GHC.Exts (byteArrayContents#)
import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBuffer, RawCharBuffer, emptyBuffer, newCharBuffer, writeCharBuf)
import GHC.IO.Encoding (textEncodingName)
import GHC.IO.Handle.Internals (flushWriteBuffer, wantWritableHandle, flushWriteBuffer)
import GHC.IO.Handle.Text (commitBuffer')
import GHC.IO.Handle.Types (Handle, Handle__(..), BufferMode(NoBuffering,LineBuffering,BlockBuffering), BufferList(BufferListCons,BufferListNil), Newline(LF,CRLF))
import GHC.Ptr (Ptr(Ptr))
import System.IO (hPutBuf, hPutChar)

hPutBuilder :: Handle -> Builder -> IO ()
hPutBuilder h b = do
(mode, nl, isUtf8, buf) <- wantWritableHandle "hPutStr" h $ \(Handle__ {..}) -> do
let isUtf8 = maybe False (("UTF-8" ==) . textEncodingName) haCodec
-- modified getSpareBuffer
buf <- case haBufferMode of
NoBuffering -> error "no buffer!"
BlockBuffering _ | haOutputNL == LF && isUtf8 -> error "no buffer!"
_ -> do
bufs <- readIORef haBuffers
buf <- readIORef haCharBuffer
case bufs of
BufferListCons b rest -> do
writeIORef haBuffers rest
return $ emptyBuffer b (bufSize buf) WriteBuffer
BufferListNil -> newCharBuffer (bufSize buf) WriteBuffer
return (haBufferMode, haOutputNL, isUtf8, buf)
case mode of
NoBuffering -> hPutChars h b
LineBuffering -> writeLines h nl buf b
BlockBuffering _
| nl == CRLF -> writeBlocksCRLF h buf b
| isUtf8 -> hPutBuilderUtf8 h b
| otherwise -> writeBlocksRaw h buf b

hPutBuilderUtf8 :: Handle -> Builder -> IO ()
hPutBuilderUtf8 h b = do
flushBytes
-- ????? Does the text ByteArray have a chance of being garbage collected before the flush finishes?
for_ textBuffers $ \(Text (ByteArray a#) _ bufR) -> hPutBuf h (Ptr (byteArrayContents# a#)) bufR
where
flushBytes = wantWritableHandle "hPutBuilder" h flushWriteBuffer
textBuffers = runST $ getTexts bufferSize newPinned b

bufferSize :: Int
bufferSize = 1024 -- I don't know what this should be

hPutChars :: Handle -> Builder -> IO ()
hPutChars h b
= traverse_ (hPutChar h)
. join
. fmap unpack
$ runST (getTexts bufferSize' new b)

writeLines :: Handle -> Newline -> CharBuffer -> Builder -> IO ()
writeLines h nl buf b = do
(Buffer {bufRaw, bufSize}, n) <- foldrM
(\t (buf, n) -> outer (stream t) buf n)
(buf, 0)
(runST $ getTexts bufferSize' new b)
void $ commitBuffer h bufRaw bufSize n Flush Release
where
outer (Stream next s len) buf@(Buffer {bufRaw, bufSize}) !n = inner s n
where
inner !s !n = case next s of
Done -> return (buf, n)
Skip s -> inner s n
Yield c s
| n + 1 >= bufSize -> do
buf <- commitBuffer h bufRaw bufSize n Flush NoRelease
outer (Stream next s len) buf 0
| c == '\n' -> do
n <- if nl == CRLF
then do
n <- writeCharBuf bufRaw n '\r'
writeCharBuf bufRaw n '\n'
else writeCharBuf bufRaw n '\n'
buf <- commitBuffer h bufRaw bufSize n Flush NoRelease
outer (Stream next s len) buf 0
| otherwise -> writeCharBuf bufRaw n c >>= inner s

writeBlocksCRLF :: Handle -> CharBuffer -> Builder -> IO ()
writeBlocksCRLF h buf b = do
(Buffer {bufRaw, bufSize}, n) <- foldrM
(\t (buf, n) -> outer (stream t) buf n)
(buf, 0)
(runST $ getTexts bufferSize' new b)
void $ commitBuffer h bufRaw bufSize n Flush Release
where
outer (Stream next s len) buf@(Buffer {bufRaw, bufSize}) !n = inner s n
where
inner !s !n = case next s of
Done -> return (buf, n)
Skip s -> inner s n
Yield c s
| n + 1 >= bufSize -> do
buf <- commitBuffer h bufRaw bufSize n Flush NoRelease
outer (Stream next s len) buf 0
| c == '\n' -> do
n <- writeCharBuf bufRaw n '\r'
n <- writeCharBuf bufRaw n '\n'
inner s n
| otherwise -> writeCharBuf bufRaw n c >>= inner s

writeBlocksRaw :: Handle -> CharBuffer -> Builder -> IO ()
writeBlocksRaw h buf b = do
(Buffer {bufRaw, bufSize}, n) <- foldrM
(\t (buf, n) -> outer (stream t) buf n)
(buf, 0)
(runST $ getTexts bufferSize' new b)
void $ commitBuffer h bufRaw bufSize n Flush Release
where
outer (Stream next s len) buf@(Buffer {bufRaw, bufSize}) !n = inner s n
where
inner !s !n = case next s of
Done -> return (buf, n)
Skip s -> inner s n
Yield c s
| n >= bufSize -> do
buf <- commitBuffer h bufRaw bufSize n Flush NoRelease
outer (Stream next s len) buf 0
| otherwise -> writeCharBuf bufRaw n c >>= inner s

bufferSize' :: Int
bufferSize' = smallChunkSize


-- This function is completely lifted from GHC.IO.Handle.Text.
commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Flush -> Release
-> IO CharBuffer
commitBuffer hdl !raw !sz !count flush release =
wantWritableHandle "commitAndReleaseBuffer" hdl $
commitBuffer' raw sz count (flushBool flush) (releaseBool release)
{-# INLINE commitBuffer #-}

data Flush = Flush | NoFlush
flushBool :: Flush -> Bool
flushBool Flush = True
flushBool NoFlush = False

data Release = Release | NoRelease
releaseBool :: Release -> Bool
releaseBool Release = True
releaseBool NoRelease = False
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ library
Data.Text.Lazy
Data.Text.Lazy.Builder
Data.Text.Lazy.Builder.Int
Data.Text.Lazy.Builder.IO
Data.Text.Lazy.Builder.RealFloat
Data.Text.Lazy.Encoding
Data.Text.Lazy.IO
Expand Down

0 comments on commit 6c49fdd

Please sign in to comment.