Skip to content

Commit

Permalink
Added file write benchmarks (#585)
Browse files Browse the repository at this point in the history
* Added file write benchmarks

* added Data.Text.IO.Utf8.hPutStr to benchmarks

* minor improvements

* defined <&> for old versions of base

* import <>

* added small chunks for hPutStr benchmarks

* more flexible FileWrite benchmarks

* whnfAppIO
  • Loading branch information
BebeSparkelSparkel authored May 7, 2024
1 parent f4c2b7e commit bd15593
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 0 deletions.
6 changes: 6 additions & 0 deletions benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import qualified Benchmarks.DecodeUtf8 as DecodeUtf8
import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
import qualified Benchmarks.Equality as Equality
import qualified Benchmarks.FileRead as FileRead
import qualified Benchmarks.FileWrite as FileWrite
import qualified Benchmarks.FoldLines as FoldLines
import qualified Benchmarks.Micro as Micro
import qualified Benchmarks.Multilang as Multilang
Expand Down Expand Up @@ -59,6 +60,9 @@ main = do
let tf = ("benchmarks/text-test-data" </>)
-- Cannot use envWithCleanup, because there is no instance NFData Handle
(sinkFn, sink) <- mkSink
(fileWriteBenchmarks, fileWriteCleanup) <- FileWrite.mkFileWriteBenchmarks $ do
(fp, h) <- mkSink
return (h, rmSink fp)
defaultMain
[ Builder.benchmark
, Concat.benchmark
Expand All @@ -77,6 +81,7 @@ main = do
]
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
, FileRead.benchmark (tf "russian.txt")
, fileWriteBenchmarks
, FoldLines.benchmark (tf "russian.txt")
, Multilang.benchmark
, bgroup "Pure"
Expand All @@ -102,3 +107,4 @@ main = do
]
]
rmSink sinkFn
fileWriteCleanup
132 changes: 132 additions & 0 deletions benchmarks/haskell/Benchmarks/FileWrite.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
-- | Benchmarks simple file writing
--
-- Tested in this benchmark:
--
-- * Writing a file to the disk
--

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}

module Benchmarks.FileWrite
( mkFileWriteBenchmarks
) where

import Control.DeepSeq (NFData, deepseq)
import Data.Bifunctor (first)
import Data.List (intercalate, intersperse)
import Data.Semigroup ((<>))

Check warning on line 18 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

The import of ‘Data.Semigroup’ is redundant

Check warning on line 18 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

The import of ‘Data.Semigroup’ is redundant
import Data.String (fromString)
import Data.Text (StrictText)
import Data.Text.Internal.Lazy (LazyText, defaultChunkSize)
import System.IO (Handle, Newline(CRLF,LF), NewlineMode(NewlineMode), BufferMode(NoBuffering,LineBuffering,BlockBuffering), hSetBuffering, hSetNewlineMode)

Check warning on line 22 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

The import of ‘NoBuffering’ from module ‘System.IO’ is redundant

Check warning on line 22 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

The import of ‘NoBuffering’ from module ‘System.IO’ is redundant
import Test.Tasty.Bench (Benchmark, bgroup, bench, whnfAppIO)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO.Utf8 as Utf8
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L

mkFileWriteBenchmarks :: IO (Handle, IO ()) -> IO (Benchmark, IO ())
mkFileWriteBenchmarks mkSinkNRemove = do
let writeData = L.cycle $ fromString [minBound..maxBound]

#ifdef ExtendedBenchmarks
lengths = [0..5] <> [10,20..100] <> [1000,3000,10000,100000]
#else
lengths = [0,1,100,3000,10000,100000]
#endif

testGroup :: NFData text => (Handle -> text -> IO ()) -> ((String, StrictText -> text)) -> Newline -> BufferMode -> IO (Benchmark, IO ())
testGroup hPutStr (textCharacteristics, select) nl mode = do
(h, removeFile) <- mkSinkNRemove
hSetBuffering h mode
hSetNewlineMode h $ NewlineMode nl nl
pure
( bgroup (intercalate " " [textCharacteristics, show nl, show mode]) $
lengths <&> \n -> let
t = select $ L.toStrict $ L.take n writeData
in bench ("length " <> show n)
$ deepseq t
$ whnfAppIO (hPutStr h) t
, removeFile
)

sequenceGroup "FileWrite hPutStr"
#ifdef ExtendedBenchmarks
[ testGroup T.hPutStr strict LF NoBuffering
, testGroup L.hPutStr lazy LF NoBuffering

, testGroup T.hPutStr strict LF LineBuffering
, testGroup T.hPutStr strict CRLF LineBuffering
, testGroup T.hPutStr strictNewlines LF LineBuffering
, testGroup T.hPutStr strictNewlines CRLF LineBuffering

, testGroup L.hPutStr lazy LF LineBuffering
, testGroup L.hPutStr lazy CRLF LineBuffering
, testGroup L.hPutStr lazySmallChunks LF LineBuffering
, testGroup L.hPutStr lazySmallChunks CRLF LineBuffering
, testGroup L.hPutStr lazyNewlines LF LineBuffering
, testGroup L.hPutStr lazyNewlines CRLF LineBuffering
, testGroup L.hPutStr lazySmallChunksNewlines LF LineBuffering
, testGroup L.hPutStr lazySmallChunksNewlines CRLF LineBuffering

, testGroup T.hPutStr strict LF (BlockBuffering Nothing)
, testGroup T.hPutStr strict CRLF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines LF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines CRLF (BlockBuffering Nothing)

, testGroup L.hPutStr lazy LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazy CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunks LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunks CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunksNewlines LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunksNewlines CRLF (BlockBuffering Nothing)

, sequenceGroup "UTF-8"
[ testGroup Utf8.hPutStr strict LF NoBuffering
, testGroup Utf8.hPutStr strict LF LineBuffering
, testGroup Utf8.hPutStr strict LF (BlockBuffering Nothing)
]
]
#else
[ testGroup T.hPutStr strictNewlines LF LineBuffering
, testGroup T.hPutStr strictNewlines CRLF LineBuffering

, testGroup T.hPutStr strict LF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines CRLF (BlockBuffering Nothing)

, testGroup L.hPutStr lazyNewlines LF LineBuffering
, testGroup L.hPutStr lazyNewlines CRLF LineBuffering

, testGroup L.hPutStr lazy LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines CRLF (BlockBuffering Nothing)

, sequenceGroup "UTF-8"
[ testGroup Utf8.hPutStr strict LF LineBuffering
, testGroup Utf8.hPutStr strict LF (BlockBuffering Nothing)
]
]
#endif

where
lazy, lazyNewlines, lazySmallChunks, lazySmallChunksNewlines :: (String, StrictText -> LazyText)
lazy = ("lazy", L.fromChunks . T.chunksOf defaultChunkSize)
lazyNewlines = ("lazy many newlines", snd lazy . snd strictNewlines)
lazySmallChunks = ("lazy small chunks", L.fromChunks . T.chunksOf 10)

Check warning on line 118 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

Defined but not used: ‘lazySmallChunks’

Check warning on line 118 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

Defined but not used: ‘lazySmallChunks’
lazySmallChunksNewlines = ("lazy small chunks many newlines", snd lazySmallChunks . snd strictNewlines)

Check warning on line 119 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

Defined but not used: ‘lazySmallChunksNewlines’

Check warning on line 119 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

Defined but not used: ‘lazySmallChunksNewlines’

strict, strictNewlines :: (String, StrictText -> StrictText)
strict = ("strict", id)
strictNewlines = ("strict many newlines", mconcat . intersperse "\n" . T.chunksOf 5)

sequenceGroup groupName tgs
= first (bgroup groupName)
. foldr (\(b,r) (bs,rs) -> (b:bs,r>>rs)) ([], return ())
<$> sequence tgs

(<&>) :: Functor f => f a -> (a -> b) -> f b
(<&>) = flip fmap

8 changes: 8 additions & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ flag pure-haskell
default: False
manual: True

flag ExtendedBenchmarks
description: Runs extra benchmarks which can be very slow.
default: False
manual: True

library
if arch(javascript) || flag(pure-haskell)
cpp-options: -DPURE_HASKELL
Expand Down Expand Up @@ -312,6 +317,8 @@ benchmark text-benchmarks
ghc-options: -Wall -O2 -rtsopts "-with-rtsopts=-A32m"
if impl(ghc >= 8.6)
ghc-options: -fproc-alignment=64
if flag(ExtendedBenchmarks)
cpp-options: -DExtendedBenchmarks

build-depends: base,
bytestring >= 0.10.4,
Expand All @@ -332,6 +339,7 @@ benchmark text-benchmarks
Benchmarks.EncodeUtf8
Benchmarks.Equality
Benchmarks.FileRead
Benchmarks.FileWrite
Benchmarks.FoldLines
Benchmarks.Micro
Benchmarks.Multilang
Expand Down

0 comments on commit bd15593

Please sign in to comment.