Redo file reading API

This commit is contained in:
Julian Ospald 2020-01-13 23:13:21 +01:00
parent 6a1f80bc17
commit 9b20ce2e72
No known key found for this signature in database
GPG Key ID: 511B62C09D50CD28
5 changed files with 85 additions and 133 deletions

View File

@ -32,7 +32,7 @@ library
System.Posix.FD
c-sources: cbits/dirutils.c
-- other-modules:
other-modules: Streamly.ByteString
-- other-extensions:
build-depends: base >= 4.8 && <5
, IfElse

View File

@ -36,6 +36,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module HPath.IO
(
@ -66,7 +67,7 @@ module HPath.IO
, moveFile
-- * File reading
, readFile
, readFileEOF
, readFileStream
-- * File writing
, writeFile
, appendFile
@ -181,8 +182,14 @@ import HPath
import HPath.Internal
import HPath.IO.Errors
import Prelude hiding (appendFile, readFile, writeFile)
import Streamly
import Streamly.ByteString
import qualified Streamly.Data.Fold as FL
import Streamly.Memory.Array
import qualified Streamly.FileSystem.Handle as FH
import qualified Streamly.Internal.Data.Unfold as SU
import qualified Streamly.Internal.FileSystem.Handle as IFH
import qualified Streamly.Internal.Memory.ArrayStream as AS
import qualified Streamly.Prelude as S
import qualified System.IO as SIO
import System.IO.Error
@ -867,19 +874,14 @@ moveFile from to cm = do
--------------------
-- |Read the given file at once into memory as a strict ByteString.
-- |Read the given file *at once* into memory as a lazy ByteString.
-- Symbolic links are followed, no sanity checks on file size
-- or file type. File must exist.
--
-- Note: the size of the file is determined in advance, as to only
-- have one allocation.
-- or file type. File must exist. Uses Builders under the hood
-- (hence lazy ByteString).
--
-- Safety/reliability concerns:
--
-- * since amount of bytes to read is determined in advance,
-- the file might be read partially only if something else is
-- appending to it while reading
-- * the whole file is read into memory!
-- * the whole file is read into memory, this doesn't read lazily
--
-- Throws:
--
@ -887,21 +889,15 @@ moveFile from to cm = do
-- - `PermissionDenied` if we cannot read the file or the directory
-- containting it
-- - `NoSuchThing` if the file does not exist
readFile :: Path b -> IO ByteString
readFile (MkPath fp) =
bracket (openFd fp SPI.ReadOnly [] Nothing) (SPI.closeFd) $ \fd -> do
stat <- PF.getFdStatus fd
let fsize = PF.fileSize stat
SPB.fdRead fd (fromIntegral fsize)
readFile :: Path b -> IO L.ByteString
readFile path = do
stream <- readFileStream path
toLazyByteString <$> S.fold FL.mconcat (fmap byteString stream)
-- |Read the given file in chunks of size `8192` into memory until
-- `fread` returns 0. Returns a lazy ByteString, because it uses
-- Builders under the hood.
--
-- Safety/reliability concerns:
--
-- * the whole file is read into memory!
-- | Open the given file as a filestream. Once the filestream is
-- exits, the filehandle is cleaned up.
--
-- Throws:
--
@ -909,23 +905,13 @@ readFile (MkPath fp) =
-- - `PermissionDenied` if we cannot read the file or the directory
-- containting it
-- - `NoSuchThing` if the file does not exist
readFileEOF :: Path b -> IO L.ByteString
readFileEOF (MkPath fp) =
bracket (openFd fp SPI.ReadOnly [] Nothing) (SPI.closeFd) $ \fd ->
allocaBytes (fromIntegral bufSize) $ \buf -> read' fd buf mempty
where
bufSize :: CSize
bufSize = 8192
read' :: Fd -> Ptr Word8 -> Builder -> IO L.ByteString
read' fd buf builder = do
size <- SPB.fdReadBuf fd buf bufSize
if size == 0
then return $ toLazyByteString builder
else do
readBS <- unsafePackCStringFinalizer buf
(fromIntegral size)
(return ())
read' fd buf (builder <> byteString readBS)
readFileStream :: Path b
-> IO (SerialT IO ByteString)
readFileStream (MkPath fp) = do
fd <- openFd fp SPI.ReadOnly [] Nothing
handle <- SPI.fdToHandle fd
let stream = (S.unfold (SU.finally SIO.hClose FH.readChunks) handle) >>= arrayToByteString
pure stream

View File

@ -0,0 +1,57 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Streamly.ByteString where
import Control.Monad.IO.Class
import Data.ByteString hiding (length)
import qualified Data.ByteString as BS
import Data.ByteString.Unsafe
import Data.Word (Word8)
import Foreign.ForeignPtr
import Foreign.ForeignPtr.Unsafe
import Foreign.Ptr (castPtr, minusPtr, plusPtr)
import Prelude hiding (length)
import Streamly
import Streamly.Internal.Memory.Array.Types
import Streamly.Memory.Array
import qualified Streamly.Prelude as S
toByteString ::
forall m. (MonadIO m, MonadAsync m)
=> SerialT m (Array Word8)
-> m ByteString
toByteString stream =
let xs = S.mapM arrayToByteString stream
ys = S.foldlM' (\a b -> pure $ a <> b) mempty xs
in ys
arrayToByteString :: (MonadIO m) => Array Word8 -> m ByteString
arrayToByteString arr
| length arr == 0 = return mempty
arrayToByteString Array {..} =
liftIO $
withForeignPtr aStart $ \ptr ->
unsafePackCStringFinalizer ptr aLen (return ())
where
aLen =
let p = unsafeForeignPtrToPtr aStart
in aEnd `minusPtr` p
byteStringToArray :: (MonadIO m) => ByteString -> m (Array Word8)
byteStringToArray bs =
liftIO $
unsafeUseAsCStringLen
bs
(\(ptr, _) -> do
let endPtr pr = (castPtr pr `plusPtr` (BS.length bs))
fptr <- newForeignPtr_ (castPtr ptr)
return $ Array {aStart = fptr, aEnd = endPtr ptr, aBound = endPtr ptr})
fromByteString ::
forall m. (MonadIO m)
=> ByteString
-> m (Array Word8)
fromByteString bs = byteStringToArray bs

View File

@ -1,85 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module HPath.IO.ReadFileEOFSpec where
import Test.Hspec
import System.IO.Error
(
ioeGetErrorType
)
import GHC.IO.Exception
(
IOErrorType(..)
)
import Utils
upTmpDir :: IO ()
upTmpDir = do
setTmpDir "ReadFileEOFSpec"
createTmpDir
setupFiles :: IO ()
setupFiles = do
createRegularFile' "fileWithContent"
createRegularFile' "fileWithoutContent"
createSymlink' "inputFileSymL" "fileWithContent"
createDir' "alreadyExistsD"
createRegularFile' "noPerms"
noPerms "noPerms"
createDir' "noPermsD"
createRegularFile' "noPermsD/inputFile"
noPerms "noPermsD"
writeFile' "fileWithContent" "Blahfaselgagaga"
cleanupFiles :: IO ()
cleanupFiles = do
deleteFile' "fileWithContent"
deleteFile' "fileWithoutContent"
deleteFile' "inputFileSymL"
deleteDir' "alreadyExistsD"
normalFilePerms "noPerms"
deleteFile' "noPerms"
normalDirPerms "noPermsD"
deleteFile' "noPermsD/inputFile"
deleteDir' "noPermsD"
spec :: Spec
spec = beforeAll_ (upTmpDir >> setupFiles) $ afterAll_ cleanupFiles $
describe "HPath.IO.readFileEOF" $ do
-- successes --
it "readFileEOF (Strict) file with content, everything clear" $ do
out <- readFileEOF' "fileWithContent"
out `shouldBe` "Blahfaselgagaga"
it "readFileEOF (Strict) symlink, everything clear" $ do
out <- readFileEOF' "inputFileSymL"
out `shouldBe` "Blahfaselgagaga"
it "readFileEOF (Strict) empty file, everything clear" $ do
out <- readFileEOF' "fileWithoutContent"
out `shouldBe` ""
-- posix failures --
it "readFileEOF (Strict) directory, wrong file type" $ do
readFileEOF' "alreadyExistsD"
`shouldThrow` (\e -> ioeGetErrorType e == InappropriateType)
it "readFileEOF (Strict) file, no permissions" $ do
readFileEOF' "noPerms"
`shouldThrow` (\e -> ioeGetErrorType e == PermissionDenied)
it "readFileEOF (Strict) file, no permissions on dir" $ do
readFileEOF' "noPermsD/inputFile"
`shouldThrow` (\e -> ioeGetErrorType e == PermissionDenied)
it "readFileEOF (Strict) file, no such file" $ do
readFileEOF' "lalala"
`shouldThrow` (\e -> ioeGetErrorType e == NoSuchThing)

View File

@ -27,7 +27,6 @@ import Data.IORef
, IORef
)
import HPath.IO
import HPath.IO.Errors
import Prelude hiding (appendFile, readFile, writeFile)
import Data.Maybe
(
@ -281,10 +280,5 @@ allDirectoryContents' ip =
readFile' :: ByteString -> IO ByteString
{-# NOINLINE readFile' #-}
readFile' p = withTmpDir p readFile
readFileEOF' :: ByteString -> IO L.ByteString
{-# NOINLINE readFileEOF' #-}
readFileEOF' p = withTmpDir p readFileEOF
readFile' p = withTmpDir p (fmap L.toStrict . readFile)