From 9b20ce2e7298abb9efac78728dee941d02e64705 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Mon, 13 Jan 2020 23:13:21 +0100 Subject: [PATCH] Redo file reading API --- hpath-io/hpath-io.cabal | 2 +- hpath-io/src/HPath/IO.hs | 66 +++++++----------- hpath-io/src/Streamly/ByteString.hs | 57 +++++++++++++++ hpath-io/test/HPath/IO/ReadFileEOFSpec.hs | 85 ----------------------- hpath-io/test/Utils.hs | 8 +-- 5 files changed, 85 insertions(+), 133 deletions(-) create mode 100644 hpath-io/src/Streamly/ByteString.hs delete mode 100644 hpath-io/test/HPath/IO/ReadFileEOFSpec.hs diff --git a/hpath-io/hpath-io.cabal b/hpath-io/hpath-io.cabal index 4072b96..648736c 100644 --- a/hpath-io/hpath-io.cabal +++ b/hpath-io/hpath-io.cabal @@ -32,7 +32,7 @@ library System.Posix.FD c-sources: cbits/dirutils.c - -- other-modules: + other-modules: Streamly.ByteString -- other-extensions: build-depends: base >= 4.8 && <5 , IfElse diff --git a/hpath-io/src/HPath/IO.hs b/hpath-io/src/HPath/IO.hs index c6ef399..d1740ea 100644 --- a/hpath-io/src/HPath/IO.hs +++ b/hpath-io/src/HPath/IO.hs @@ -36,6 +36,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE PackageImports #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} module HPath.IO ( @@ -66,7 +67,7 @@ module HPath.IO , moveFile -- * File reading , readFile - , readFileEOF + , readFileStream -- * File writing , writeFile , appendFile @@ -181,8 +182,14 @@ import HPath import HPath.Internal import HPath.IO.Errors import Prelude hiding (appendFile, readFile, writeFile) +import Streamly +import Streamly.ByteString +import qualified Streamly.Data.Fold as FL +import Streamly.Memory.Array import qualified Streamly.FileSystem.Handle as FH +import qualified Streamly.Internal.Data.Unfold as SU import qualified Streamly.Internal.FileSystem.Handle as IFH +import qualified Streamly.Internal.Memory.ArrayStream as AS import qualified Streamly.Prelude as S import qualified System.IO as SIO import System.IO.Error @@ -867,19 +874,14 @@ moveFile from to cm = do -------------------- --- |Read the given file at once into memory as a strict ByteString. +-- |Read the given file *at once* into memory as a lazy ByteString. -- Symbolic links are followed, no sanity checks on file size --- or file type. File must exist. --- --- Note: the size of the file is determined in advance, as to only --- have one allocation. +-- or file type. File must exist. Uses Builders under the hood +-- (hence lazy ByteString). -- -- Safety/reliability concerns: -- --- * since amount of bytes to read is determined in advance, --- the file might be read partially only if something else is --- appending to it while reading --- * the whole file is read into memory! +-- * the whole file is read into memory, this doesn't read lazily -- -- Throws: -- @@ -887,21 +889,15 @@ moveFile from to cm = do -- - `PermissionDenied` if we cannot read the file or the directory -- containting it -- - `NoSuchThing` if the file does not exist -readFile :: Path b -> IO ByteString -readFile (MkPath fp) = - bracket (openFd fp SPI.ReadOnly [] Nothing) (SPI.closeFd) $ \fd -> do - stat <- PF.getFdStatus fd - let fsize = PF.fileSize stat - SPB.fdRead fd (fromIntegral fsize) +readFile :: Path b -> IO L.ByteString +readFile path = do + stream <- readFileStream path + toLazyByteString <$> S.fold FL.mconcat (fmap byteString stream) --- |Read the given file in chunks of size `8192` into memory until --- `fread` returns 0. Returns a lazy ByteString, because it uses --- Builders under the hood. --- --- Safety/reliability concerns: --- --- * the whole file is read into memory! + +-- | Open the given file as a filestream. Once the filestream is +-- exits, the filehandle is cleaned up. -- -- Throws: -- @@ -909,23 +905,13 @@ readFile (MkPath fp) = -- - `PermissionDenied` if we cannot read the file or the directory -- containting it -- - `NoSuchThing` if the file does not exist -readFileEOF :: Path b -> IO L.ByteString -readFileEOF (MkPath fp) = - bracket (openFd fp SPI.ReadOnly [] Nothing) (SPI.closeFd) $ \fd -> - allocaBytes (fromIntegral bufSize) $ \buf -> read' fd buf mempty - where - bufSize :: CSize - bufSize = 8192 - read' :: Fd -> Ptr Word8 -> Builder -> IO L.ByteString - read' fd buf builder = do - size <- SPB.fdReadBuf fd buf bufSize - if size == 0 - then return $ toLazyByteString builder - else do - readBS <- unsafePackCStringFinalizer buf - (fromIntegral size) - (return ()) - read' fd buf (builder <> byteString readBS) +readFileStream :: Path b + -> IO (SerialT IO ByteString) +readFileStream (MkPath fp) = do + fd <- openFd fp SPI.ReadOnly [] Nothing + handle <- SPI.fdToHandle fd + let stream = (S.unfold (SU.finally SIO.hClose FH.readChunks) handle) >>= arrayToByteString + pure stream diff --git a/hpath-io/src/Streamly/ByteString.hs b/hpath-io/src/Streamly/ByteString.hs new file mode 100644 index 0000000..2cee0a1 --- /dev/null +++ b/hpath-io/src/Streamly/ByteString.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Streamly.ByteString where + +import Control.Monad.IO.Class +import Data.ByteString hiding (length) +import qualified Data.ByteString as BS +import Data.ByteString.Unsafe +import Data.Word (Word8) +import Foreign.ForeignPtr +import Foreign.ForeignPtr.Unsafe +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Prelude hiding (length) +import Streamly +import Streamly.Internal.Memory.Array.Types +import Streamly.Memory.Array +import qualified Streamly.Prelude as S + +toByteString :: + forall m. (MonadIO m, MonadAsync m) + => SerialT m (Array Word8) + -> m ByteString +toByteString stream = + let xs = S.mapM arrayToByteString stream + ys = S.foldlM' (\a b -> pure $ a <> b) mempty xs + in ys + +arrayToByteString :: (MonadIO m) => Array Word8 -> m ByteString +arrayToByteString arr + | length arr == 0 = return mempty +arrayToByteString Array {..} = + liftIO $ + withForeignPtr aStart $ \ptr -> + unsafePackCStringFinalizer ptr aLen (return ()) + where + aLen = + let p = unsafeForeignPtrToPtr aStart + in aEnd `minusPtr` p + +byteStringToArray :: (MonadIO m) => ByteString -> m (Array Word8) +byteStringToArray bs = + liftIO $ + unsafeUseAsCStringLen + bs + (\(ptr, _) -> do + let endPtr pr = (castPtr pr `plusPtr` (BS.length bs)) + fptr <- newForeignPtr_ (castPtr ptr) + return $ Array {aStart = fptr, aEnd = endPtr ptr, aBound = endPtr ptr}) + +fromByteString :: + forall m. (MonadIO m) + => ByteString + -> m (Array Word8) +fromByteString bs = byteStringToArray bs diff --git a/hpath-io/test/HPath/IO/ReadFileEOFSpec.hs b/hpath-io/test/HPath/IO/ReadFileEOFSpec.hs deleted file mode 100644 index 6a92b52..0000000 --- a/hpath-io/test/HPath/IO/ReadFileEOFSpec.hs +++ /dev/null @@ -1,85 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} - - -module HPath.IO.ReadFileEOFSpec where - - -import Test.Hspec -import System.IO.Error - ( - ioeGetErrorType - ) -import GHC.IO.Exception - ( - IOErrorType(..) - ) -import Utils - - - -upTmpDir :: IO () -upTmpDir = do - setTmpDir "ReadFileEOFSpec" - createTmpDir - -setupFiles :: IO () -setupFiles = do - createRegularFile' "fileWithContent" - createRegularFile' "fileWithoutContent" - createSymlink' "inputFileSymL" "fileWithContent" - createDir' "alreadyExistsD" - createRegularFile' "noPerms" - noPerms "noPerms" - createDir' "noPermsD" - createRegularFile' "noPermsD/inputFile" - noPerms "noPermsD" - writeFile' "fileWithContent" "Blahfaselgagaga" - - -cleanupFiles :: IO () -cleanupFiles = do - deleteFile' "fileWithContent" - deleteFile' "fileWithoutContent" - deleteFile' "inputFileSymL" - deleteDir' "alreadyExistsD" - normalFilePerms "noPerms" - deleteFile' "noPerms" - normalDirPerms "noPermsD" - deleteFile' "noPermsD/inputFile" - deleteDir' "noPermsD" - - -spec :: Spec -spec = beforeAll_ (upTmpDir >> setupFiles) $ afterAll_ cleanupFiles $ - describe "HPath.IO.readFileEOF" $ do - - -- successes -- - it "readFileEOF (Strict) file with content, everything clear" $ do - out <- readFileEOF' "fileWithContent" - out `shouldBe` "Blahfaselgagaga" - - it "readFileEOF (Strict) symlink, everything clear" $ do - out <- readFileEOF' "inputFileSymL" - out `shouldBe` "Blahfaselgagaga" - - it "readFileEOF (Strict) empty file, everything clear" $ do - out <- readFileEOF' "fileWithoutContent" - out `shouldBe` "" - - - -- posix failures -- - it "readFileEOF (Strict) directory, wrong file type" $ do - readFileEOF' "alreadyExistsD" - `shouldThrow` (\e -> ioeGetErrorType e == InappropriateType) - - it "readFileEOF (Strict) file, no permissions" $ do - readFileEOF' "noPerms" - `shouldThrow` (\e -> ioeGetErrorType e == PermissionDenied) - - it "readFileEOF (Strict) file, no permissions on dir" $ do - readFileEOF' "noPermsD/inputFile" - `shouldThrow` (\e -> ioeGetErrorType e == PermissionDenied) - - it "readFileEOF (Strict) file, no such file" $ do - readFileEOF' "lalala" - `shouldThrow` (\e -> ioeGetErrorType e == NoSuchThing) diff --git a/hpath-io/test/Utils.hs b/hpath-io/test/Utils.hs index c069cde..22627e7 100644 --- a/hpath-io/test/Utils.hs +++ b/hpath-io/test/Utils.hs @@ -27,7 +27,6 @@ import Data.IORef , IORef ) import HPath.IO -import HPath.IO.Errors import Prelude hiding (appendFile, readFile, writeFile) import Data.Maybe ( @@ -281,10 +280,5 @@ allDirectoryContents' ip = readFile' :: ByteString -> IO ByteString {-# NOINLINE readFile' #-} -readFile' p = withTmpDir p readFile - - -readFileEOF' :: ByteString -> IO L.ByteString -{-# NOINLINE readFileEOF' #-} -readFileEOF' p = withTmpDir p readFileEOF +readFile' p = withTmpDir p (fmap L.toStrict . readFile)