954 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			954 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			Haskell
		
	
	
	
	
	
| {-# LANGUAGE CPP, RankNTypes, DeriveDataTypeable, BangPatterns #-}
 | |
| #if __GLASGOW_HASKELL__ >= 702
 | |
| {-# LANGUAGE Trustworthy #-}
 | |
| #endif
 | |
| -----------------------------------------------------------------------------
 | |
| -- |
 | |
| -- Copyright   :  (c) 2006-2015 Duncan Coutts
 | |
| -- License     :  BSD-style
 | |
| --
 | |
| -- Maintainer  :  duncan@community.haskell.org
 | |
| --
 | |
| -- Pure and IO stream based interfaces to lower level zlib wrapper
 | |
| --
 | |
| -----------------------------------------------------------------------------
 | |
| module Codec.Compression.Zlib.Internal (
 | |
| 
 | |
|   -- * Pure interface
 | |
|   compress,
 | |
|   decompress,
 | |
| 
 | |
|   -- * Monadic incremental interface
 | |
|   -- $incremental-compression
 | |
| 
 | |
|   -- ** Using incremental compression
 | |
|   -- $using-incremental-compression
 | |
| 
 | |
|   CompressStream(..),
 | |
|   compressST,
 | |
|   compressIO,
 | |
|   foldCompressStream,
 | |
|   foldCompressStreamWithInput,
 | |
| 
 | |
|   -- ** Using incremental decompression
 | |
|   -- $using-incremental-decompression
 | |
| 
 | |
|   DecompressStream(..),
 | |
|   DecompressError(..),
 | |
|   decompressST,
 | |
|   decompressIO,
 | |
|   foldDecompressStream,
 | |
|   foldDecompressStreamWithInput,
 | |
| 
 | |
|   -- * The compression parameter types
 | |
|   CompressParams(..),
 | |
|   defaultCompressParams,
 | |
|   DecompressParams(..),
 | |
|   defaultDecompressParams,
 | |
|   Stream.Format(..),
 | |
|     Stream.gzipFormat,
 | |
|     Stream.zlibFormat,
 | |
|     Stream.rawFormat,
 | |
|     Stream.gzipOrZlibFormat,
 | |
|   Stream.CompressionLevel(..),
 | |
|     Stream.defaultCompression,
 | |
|     Stream.noCompression,
 | |
|     Stream.bestSpeed,
 | |
|     Stream.bestCompression,
 | |
|     Stream.compressionLevel,
 | |
|   Stream.Method(..),
 | |
|     Stream.deflateMethod,
 | |
|   Stream.WindowBits(..),
 | |
|     Stream.defaultWindowBits,
 | |
|     Stream.windowBits,
 | |
|   Stream.MemoryLevel(..),
 | |
|     Stream.defaultMemoryLevel,
 | |
|     Stream.minMemoryLevel,
 | |
|     Stream.maxMemoryLevel,
 | |
|     Stream.memoryLevel,
 | |
|   Stream.CompressionStrategy(..),
 | |
|     Stream.defaultStrategy,
 | |
|     Stream.filteredStrategy,
 | |
|     Stream.huffmanOnlyStrategy,
 | |
| 
 | |
|   ) where
 | |
| 
 | |
| import Prelude hiding (length)
 | |
| import Control.Monad (when)
 | |
| import Control.Exception (Exception, throw, assert)
 | |
| import Control.Monad.ST.Lazy hiding (stToIO)
 | |
| import Control.Monad.ST.Strict (stToIO)
 | |
| #if __GLASGOW_HASKELL__ >= 702
 | |
| import qualified Control.Monad.ST.Unsafe as Unsafe (unsafeIOToST)
 | |
| #else
 | |
| import qualified Control.Monad.ST.Strict as Unsafe (unsafeIOToST)
 | |
| #endif
 | |
| import Data.Typeable (Typeable)
 | |
| import qualified Data.ByteString.Lazy as L
 | |
| import qualified Data.ByteString.Lazy.Internal as L
 | |
| import qualified Data.ByteString          as S
 | |
| import qualified Data.ByteString.Internal as S
 | |
| import Data.Word (Word8)
 | |
| import GHC.IO (noDuplicate)
 | |
| 
 | |
| import qualified Codec.Compression.Zlib.Stream as Stream
 | |
| import Codec.Compression.Zlib.Stream (Stream)
 | |
| 
 | |
| -- | The full set of parameters for compression. The defaults are
 | |
| -- 'defaultCompressParams'.
 | |
| --
 | |
| -- The 'compressBufferSize' is the size of the first output buffer containing
 | |
| -- the compressed data. If you know an approximate upper bound on the size of
 | |
| -- the compressed data then setting this parameter can save memory. The default
 | |
| -- compression output buffer size is @16k@. If your extimate is wrong it does
 | |
| -- not matter too much, the default buffer size will be used for the remaining
 | |
| -- chunks.
 | |
| --
 | |
| data CompressParams = CompressParams {
 | |
|   compressLevel       :: !Stream.CompressionLevel,
 | |
|   compressMethod      :: !Stream.Method,
 | |
|   compressWindowBits  :: !Stream.WindowBits,
 | |
|   compressMemoryLevel :: !Stream.MemoryLevel,
 | |
|   compressStrategy    :: !Stream.CompressionStrategy,
 | |
|   compressBufferSize  :: !Int,
 | |
|   compressDictionary  :: Maybe S.ByteString
 | |
| } deriving Show
 | |
| 
 | |
| -- | The full set of parameters for decompression. The defaults are
 | |
| -- 'defaultDecompressParams'.
 | |
| --
 | |
| -- The 'decompressBufferSize' is the size of the first output buffer,
 | |
| -- containing the uncompressed data. If you know an exact or approximate upper
 | |
| -- bound on the size of the decompressed data then setting this parameter can
 | |
| -- save memory. The default decompression output buffer size is @32k@. If your
 | |
| -- extimate is wrong it does not matter too much, the default buffer size will
 | |
| -- be used for the remaining chunks.
 | |
| --
 | |
| -- One particular use case for setting the 'decompressBufferSize' is if you
 | |
| -- know the exact size of the decompressed data and want to produce a strict
 | |
| -- 'Data.ByteString.ByteString'. The compression and deccompression functions
 | |
| -- use lazy 'Data.ByteString.Lazy.ByteString's but if you set the
 | |
| -- 'decompressBufferSize' correctly then you can generate a lazy
 | |
| -- 'Data.ByteString.Lazy.ByteString' with exactly one chunk, which can be
 | |
| -- converted to a strict 'Data.ByteString.ByteString' in @O(1)@ time using
 | |
| -- @'Data.ByteString.concat' . 'Data.ByteString.Lazy.toChunks'@.
 | |
| --
 | |
| data DecompressParams = DecompressParams {
 | |
|   decompressWindowBits :: !Stream.WindowBits,
 | |
|   decompressBufferSize :: !Int,
 | |
|   decompressDictionary :: Maybe S.ByteString,
 | |
|   decompressAllMembers :: Bool
 | |
| } deriving Show
 | |
| 
 | |
| -- | The default set of parameters for compression. This is typically used with
 | |
| -- the @compressWith@ function with specific parameters overridden.
 | |
| --
 | |
| defaultCompressParams :: CompressParams
 | |
| defaultCompressParams = CompressParams {
 | |
|   compressLevel       = Stream.defaultCompression,
 | |
|   compressMethod      = Stream.deflateMethod,
 | |
|   compressWindowBits  = Stream.defaultWindowBits,
 | |
|   compressMemoryLevel = Stream.defaultMemoryLevel,
 | |
|   compressStrategy    = Stream.defaultStrategy,
 | |
|   compressBufferSize  = defaultCompressBufferSize,
 | |
|   compressDictionary  = Nothing
 | |
| }
 | |
| 
 | |
| -- | The default set of parameters for decompression. This is typically used with
 | |
| -- the @compressWith@ function with specific parameters overridden.
 | |
| --
 | |
| defaultDecompressParams :: DecompressParams
 | |
| defaultDecompressParams = DecompressParams {
 | |
|   decompressWindowBits = Stream.defaultWindowBits,
 | |
|   decompressBufferSize = defaultDecompressBufferSize,
 | |
|   decompressDictionary = Nothing,
 | |
|   decompressAllMembers = True
 | |
| }
 | |
| 
 | |
| -- | The default chunk sizes for the output of compression and decompression
 | |
| -- are 16k and 32k respectively (less a small accounting overhead).
 | |
| --
 | |
| defaultCompressBufferSize, defaultDecompressBufferSize :: Int
 | |
| defaultCompressBufferSize   = 16 * 1024 - L.chunkOverhead
 | |
| defaultDecompressBufferSize = 32 * 1024 - L.chunkOverhead
 | |
| 
 | |
| -- | The unfolding of the decompression process, where you provide a sequence
 | |
| -- of compressed data chunks as input and receive a sequence of uncompressed
 | |
| -- data chunks as output. The process is incremental, in that the demand for
 | |
| -- input and provision of output are interleaved.
 | |
| --
 | |
| -- To indicate the end of the input supply an empty input chunk. Note that
 | |
| -- for 'gzipFormat' with the default 'decompressAllMembers' @True@ you will
 | |
| -- have to do this, as the decompressor will look for any following members.
 | |
| -- With 'decompressAllMembers' @False@ the decompressor knows when the data
 | |
| -- ends and will produce 'DecompressStreamEnd' without you having to supply an
 | |
| -- empty chunk to indicate the end of the input.
 | |
| --
 | |
| data DecompressStream m =
 | |
| 
 | |
|      DecompressInputRequired {
 | |
|          decompressSupplyInput :: S.ByteString -> m (DecompressStream m)
 | |
|        }
 | |
| 
 | |
|    | DecompressOutputAvailable {
 | |
|          decompressOutput :: !S.ByteString,
 | |
|          decompressNext   :: m (DecompressStream m)
 | |
|        }
 | |
| 
 | |
|    -- | Includes any trailing unconsumed /input/ data.
 | |
|    | DecompressStreamEnd {
 | |
|          decompressUnconsumedInput :: S.ByteString
 | |
|        }
 | |
| 
 | |
|    -- | An error code
 | |
|    | DecompressStreamError {
 | |
|          decompressStreamError :: DecompressError
 | |
|        }
 | |
| 
 | |
| -- | The possible error cases when decompressing a stream.
 | |
| --
 | |
| -- This can be 'show'n to give a human readable error message.
 | |
| --
 | |
| data DecompressError =
 | |
|      -- | The compressed data stream ended prematurely. This may happen if the
 | |
|      -- input data stream was truncated.
 | |
|      TruncatedInput
 | |
| 
 | |
|      -- | It is possible to do zlib compression with a custom dictionary. This
 | |
|      -- allows slightly higher compression ratios for short files. However such
 | |
|      -- compressed streams require the same dictionary when decompressing. This
 | |
|      -- error is for when we encounter a compressed stream that needs a
 | |
|      -- dictionary, and it's not provided.
 | |
|    | DictionaryRequired
 | |
| 
 | |
|      -- | If the stream requires a dictionary and you provide one with the
 | |
|      -- wrong 'DictionaryHash' then you will get this error.
 | |
|    | DictionaryMismatch
 | |
| 
 | |
|      -- | If the compressed data stream is corrupted in any way then you will
 | |
|      -- get this error, for example if the input data just isn't a compressed
 | |
|      -- zlib data stream. In particular if the data checksum turns out to be
 | |
|      -- wrong then you will get all the decompressed data but this error at the
 | |
|      -- end, instead of the normal sucessful 'StreamEnd'.
 | |
|    | DataFormatError String
 | |
|   deriving (Eq, Typeable)
 | |
| 
 | |
| instance Show DecompressError where
 | |
|   show TruncatedInput     = modprefix "premature end of compressed data stream"
 | |
|   show DictionaryRequired = modprefix "compressed data stream requires custom dictionary"
 | |
|   show DictionaryMismatch = modprefix "given dictionary does not match the expected one"
 | |
|   show (DataFormatError detail) = modprefix ("compressed data stream format error (" ++ detail ++ ")")
 | |
| 
 | |
| modprefix :: ShowS
 | |
| modprefix = ("Codec.Compression.Zlib: " ++)
 | |
| 
 | |
| instance Exception DecompressError
 | |
| 
 | |
| -- | A fold over the 'DecompressStream' in the given monad.
 | |
| --
 | |
| -- One way to look at this is that it runs the stream, using callback functions
 | |
| -- for the four stream events.
 | |
| --
 | |
| foldDecompressStream :: Monad m
 | |
|                      => ((S.ByteString -> m a) -> m a)
 | |
|                      -> (S.ByteString -> m a -> m a)
 | |
|                      -> (S.ByteString -> m a)
 | |
|                      -> (DecompressError -> m a)
 | |
|                      -> DecompressStream m -> m a
 | |
| foldDecompressStream input output end err = fold
 | |
|   where
 | |
|     fold (DecompressInputRequired next) =
 | |
|       input (\x -> next x >>= fold)
 | |
| 
 | |
|     fold (DecompressOutputAvailable outchunk next) =
 | |
|       output outchunk (next >>= fold)
 | |
| 
 | |
|     fold (DecompressStreamEnd inchunk) = end inchunk
 | |
|     fold (DecompressStreamError derr)  = err derr
 | |
| 
 | |
| -- | A variant on 'foldCompressStream' that is pure rather than operating in a
 | |
| -- monad and where the input is provided by a lazy 'L.ByteString'. So we only
 | |
| -- have to deal with the output, end and error parts, making it like a foldr on
 | |
| -- a list of output chunks.
 | |
| --
 | |
| -- For example:
 | |
| --
 | |
| -- > toChunks = foldDecompressStreamWithInput (:) [] throw
 | |
| --
 | |
| foldDecompressStreamWithInput :: (S.ByteString -> a -> a)
 | |
|                               -> (L.ByteString -> a)
 | |
|                               -> (DecompressError -> a)
 | |
|                               -> (forall s. DecompressStream (ST s))
 | |
|                               -> L.ByteString
 | |
|                               -> a
 | |
| foldDecompressStreamWithInput chunk end err = \s lbs ->
 | |
|     runST (fold s (L.toChunks lbs))
 | |
|   where
 | |
|     fold (DecompressInputRequired next) [] =
 | |
|       next S.empty >>= \strm -> fold strm []
 | |
| 
 | |
|     fold (DecompressInputRequired next) (inchunk:inchunks) =
 | |
|       next inchunk >>= \s -> fold s inchunks
 | |
| 
 | |
|     fold (DecompressOutputAvailable outchunk next) inchunks = do
 | |
|       r <- next >>= \s -> fold s inchunks
 | |
|       return $ chunk outchunk r
 | |
| 
 | |
|     fold (DecompressStreamEnd inchunk) inchunks =
 | |
|       return $ end (L.fromChunks (inchunk:inchunks))
 | |
| 
 | |
|     fold (DecompressStreamError derr) _ =
 | |
|       return $ err derr
 | |
| 
 | |
| 
 | |
| -- $incremental-compression
 | |
| -- The pure 'compress' and 'decompress' functions are streaming in the sense
 | |
| -- that they can produce output without demanding all input, however they need
 | |
| -- the input data stream as a lazy 'L.ByteString'. Having the input data
 | |
| -- stream as a lazy 'L.ByteString' often requires using lazy I\/O which is not
 | |
| -- appropriate in all cicumstances.
 | |
| --
 | |
| -- For these cases an incremental interface is more appropriate. This interface
 | |
| -- allows both incremental input and output. Chunks of input data are supplied
 | |
| -- one by one (e.g. as they are obtained from an input source like a file or
 | |
| -- network source). Output is also produced chunk by chunk.
 | |
| --
 | |
| -- The incremental input and output is managed via the 'CompressStream' and
 | |
| -- 'DecompressStream' types. They represents the unfolding of the process of
 | |
| -- compressing and decompressing. They operates in either the 'ST' or 'IO'
 | |
| -- monads. They can be lifted into other incremental abstractions like pipes or
 | |
| -- conduits, or they can be used directly in the following style.
 | |
| 
 | |
| -- $using-incremental-compression
 | |
| --
 | |
| -- In a loop:
 | |
| --
 | |
| --  * Inspect the status of the stream
 | |
| --
 | |
| --  * When it is 'CompressInputRequired' then you should call the action,
 | |
| --    passing a chunk of input (or 'BS.empty' when no more input is available)
 | |
| --    to get the next state of the stream and continue the loop.
 | |
| --
 | |
| --  * When it is 'CompressOutputAvailable' then do something with the given
 | |
| --    chunk of output, and call the action to get the next state of the stream
 | |
| --    and continue the loop.
 | |
| --
 | |
| --  * When it is 'CompressStreamEnd' then terminate the loop.
 | |
| --
 | |
| -- Note that you cannot stop as soon as you have no more input, you need to
 | |
| -- carry on until all the output has been collected, i.e. until you get to
 | |
| -- 'CompressStreamEnd'.
 | |
| --
 | |
| -- Here is an example where we get input from one file handle and send the
 | |
| -- compressed output to another file handle.
 | |
| --
 | |
| -- > go :: Handle -> Handle -> CompressStream IO -> IO ()
 | |
| -- > go inh outh (CompressInputRequired next) = do
 | |
| -- >    inchunk <- BS.hGet inh 4096
 | |
| -- >    go inh outh =<< next inchunk
 | |
| -- > go inh outh (CompressOutputAvailable outchunk next) =
 | |
| -- >    BS.hPut outh outchunk
 | |
| -- >    go inh outh =<< next
 | |
| -- > go _ _ CompressStreamEnd = return ()
 | |
| --
 | |
| -- The same can be achieved with 'foldCompressStream':
 | |
| --
 | |
| -- > foldCompressStream
 | |
| -- >   (\next -> do inchunk <- BS.hGet inh 4096; next inchunk)
 | |
| -- >   (\outchunk next -> do BS.hPut outh outchunk; next)
 | |
| -- >   (return ())
 | |
| 
 | |
| -- $using-incremental-decompression
 | |
| --
 | |
| -- The use of 'DecompressStream' is very similar to 'CompressStream' but with
 | |
| -- a few differences:
 | |
| --
 | |
| -- * There is the extra possibility of a 'DecompressStreamError'
 | |
| --
 | |
| -- * There can be extra trailing data after a compressed stream, and the
 | |
| --   'DecompressStreamEnd' includes that.
 | |
| --
 | |
| -- Otherwise the same loop style applies, and there are fold functions.
 | |
| 
 | |
| -- | The unfolding of the compression process, where you provide a sequence
 | |
| -- of uncompressed data chunks as input and receive a sequence of compressed
 | |
| -- data chunks as output. The process is incremental, in that the demand for
 | |
| -- input and provision of output are interleaved.
 | |
| --
 | |
| data CompressStream m =
 | |
|      CompressInputRequired {
 | |
|          compressSupplyInput :: S.ByteString -> m (CompressStream m)
 | |
|        }
 | |
| 
 | |
|    | CompressOutputAvailable {
 | |
|         compressOutput :: !S.ByteString,
 | |
|         compressNext   :: m (CompressStream m)
 | |
|       }
 | |
| 
 | |
|    | CompressStreamEnd
 | |
| 
 | |
| -- | A fold over the 'CompressStream' in the given monad.
 | |
| --
 | |
| -- One way to look at this is that it runs the stream, using callback functions
 | |
| -- for the three stream events.
 | |
| --
 | |
| foldCompressStream :: Monad m
 | |
|                    => ((S.ByteString -> m a) -> m a)
 | |
|                    -> (S.ByteString -> m a -> m a)
 | |
|                    -> m a
 | |
|                    -> CompressStream m -> m a
 | |
| foldCompressStream input output end = fold
 | |
|   where
 | |
|     fold (CompressInputRequired next) =
 | |
|       input (\x -> next x >>= fold)
 | |
| 
 | |
|     fold (CompressOutputAvailable outchunk next) =
 | |
|       output outchunk (next >>= fold)
 | |
| 
 | |
|     fold CompressStreamEnd =
 | |
|       end
 | |
| 
 | |
| -- | A variant on 'foldCompressStream' that is pure rather than operating in a
 | |
| -- monad and where the input is provided by a lazy 'L.ByteString'. So we only
 | |
| -- have to deal with the output and end parts, making it just like a foldr on a
 | |
| -- list of output chunks.
 | |
| --
 | |
| -- For example:
 | |
| --
 | |
| -- > toChunks = foldCompressStreamWithInput (:) []
 | |
| --
 | |
| foldCompressStreamWithInput :: (S.ByteString -> a -> a)
 | |
|                             -> a
 | |
|                             -> (forall s. CompressStream (ST s))
 | |
|                             -> L.ByteString
 | |
|                             -> a
 | |
| foldCompressStreamWithInput chunk end = \s lbs ->
 | |
|     runST (fold s (L.toChunks lbs))
 | |
|   where
 | |
|     fold (CompressInputRequired next) [] =
 | |
|       next S.empty >>= \strm -> fold strm []
 | |
| 
 | |
|     fold (CompressInputRequired next) (inchunk:inchunks) =
 | |
|       next inchunk >>= \s -> fold s inchunks
 | |
| 
 | |
|     fold (CompressOutputAvailable outchunk next) inchunks = do
 | |
|       r <- next >>= \s -> fold s inchunks
 | |
|       return $ chunk outchunk r
 | |
| 
 | |
|     fold CompressStreamEnd _inchunks =
 | |
|       return end
 | |
| 
 | |
| 
 | |
| -- | Compress a data stream provided as a lazy 'L.ByteString'.
 | |
| --
 | |
| -- There are no expected error conditions. All input data streams are valid. It
 | |
| -- is possible for unexpected errors to occur, such as running out of memory,
 | |
| -- or finding the wrong version of the zlib C library, these are thrown as
 | |
| -- exceptions.
 | |
| --
 | |
| compress   :: Stream.Format -> CompressParams -> L.ByteString -> L.ByteString
 | |
| 
 | |
| -- | Incremental compression in the 'ST' monad. Using 'ST' makes it possible
 | |
| -- to write pure /lazy/ functions while making use of incremental compression.
 | |
| --
 | |
| compressST :: Stream.Format -> CompressParams -> CompressStream (ST s)
 | |
| 
 | |
| -- | Incremental compression in the 'IO' monad.
 | |
| --
 | |
| compressIO :: Stream.Format -> CompressParams -> CompressStream IO
 | |
| 
 | |
| compress   format params = foldCompressStreamWithInput
 | |
|                              L.Chunk L.Empty
 | |
|                              (compressStreamST format params)
 | |
| compressST format params = compressStreamST  format params
 | |
| compressIO format params = compressStreamIO  format params
 | |
| 
 | |
| compressStream :: Stream.Format -> CompressParams -> S.ByteString
 | |
|                -> Stream (CompressStream Stream)
 | |
| compressStream format (CompressParams compLevel method bits memLevel
 | |
|                                 strategy initChunkSize mdict) =
 | |
| 
 | |
|     \chunk -> do
 | |
|       Stream.deflateInit format compLevel method bits memLevel strategy
 | |
|       setDictionary mdict
 | |
|       case chunk of
 | |
|         _ | S.null chunk ->
 | |
|           fillBuffers 20   --gzip header is 20 bytes, others even smaller
 | |
| 
 | |
|         S.PS inFPtr offset length -> do
 | |
|           Stream.pushInputBuffer inFPtr offset length
 | |
|           fillBuffers initChunkSize
 | |
| 
 | |
|   where
 | |
|     -- we flick between two states:
 | |
|     --   * where one or other buffer is empty
 | |
|     --       - in which case we refill one or both
 | |
|     --   * where both buffers are non-empty
 | |
|     --       - in which case we compress until a buffer is empty
 | |
| 
 | |
|   fillBuffers :: Int -> Stream (CompressStream Stream)
 | |
|   fillBuffers outChunkSize = do
 | |
| #ifdef DEBUG
 | |
|     Stream.consistencyCheck
 | |
| #endif
 | |
| 
 | |
|     -- in this state there are two possabilities:
 | |
|     --   * no outbut buffer space is available
 | |
|     --       - in which case we must make more available
 | |
|     --   * no input buffer is available
 | |
|     --       - in which case we must supply more
 | |
|     inputBufferEmpty <- Stream.inputBufferEmpty
 | |
|     outputBufferFull <- Stream.outputBufferFull
 | |
| 
 | |
|     assert (inputBufferEmpty || outputBufferFull) $ return ()
 | |
| 
 | |
|     when outputBufferFull $ do
 | |
|       outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
 | |
|       Stream.pushOutputBuffer outFPtr 0 outChunkSize
 | |
| 
 | |
|     if inputBufferEmpty
 | |
|       then return $ CompressInputRequired $ \chunk ->
 | |
|            case chunk of
 | |
|              _ | S.null chunk          -> drainBuffers True
 | |
|              S.PS inFPtr offset length -> do
 | |
|                 Stream.pushInputBuffer inFPtr offset length
 | |
|                 drainBuffers False
 | |
|       else drainBuffers False
 | |
| 
 | |
| 
 | |
|   drainBuffers :: Bool -> Stream (CompressStream Stream)
 | |
|   drainBuffers lastChunk = do
 | |
| 
 | |
|     inputBufferEmpty' <- Stream.inputBufferEmpty
 | |
|     outputBufferFull' <- Stream.outputBufferFull
 | |
|     assert(not outputBufferFull'
 | |
|        && (lastChunk || not inputBufferEmpty')) $ return ()
 | |
|     -- this invariant guarantees we can always make forward progress
 | |
|     -- and that therefore a BufferError is impossible
 | |
| 
 | |
|     let flush = if lastChunk then Stream.Finish else Stream.NoFlush
 | |
|     status <- Stream.deflate flush
 | |
| 
 | |
|     case status of
 | |
|       Stream.Ok -> do
 | |
|         outputBufferFull <- Stream.outputBufferFull
 | |
|         if outputBufferFull
 | |
|           then do (outFPtr, offset, length) <- Stream.popOutputBuffer
 | |
|                   let chunk = S.PS outFPtr offset length
 | |
|                   return $ CompressOutputAvailable chunk $ do
 | |
|                     fillBuffers defaultCompressBufferSize
 | |
|           else do fillBuffers defaultCompressBufferSize
 | |
| 
 | |
|       Stream.StreamEnd -> do
 | |
|         inputBufferEmpty <- Stream.inputBufferEmpty
 | |
|         assert inputBufferEmpty $ return ()
 | |
|         outputBufferBytesAvailable <- Stream.outputBufferBytesAvailable
 | |
|         if outputBufferBytesAvailable > 0
 | |
|           then do (outFPtr, offset, length) <- Stream.popOutputBuffer
 | |
|                   let chunk = S.PS outFPtr offset length
 | |
|                   Stream.finalise
 | |
|                   return $ CompressOutputAvailable chunk (return CompressStreamEnd)
 | |
|           else do Stream.finalise
 | |
|                   return CompressStreamEnd
 | |
| 
 | |
|       Stream.Error code msg -> case code of
 | |
|         Stream.BufferError  -> fail "BufferError should be impossible!"
 | |
|         Stream.NeedDict _   -> fail "NeedDict is impossible!"
 | |
|         _                   -> fail msg
 | |
| 
 | |
|   -- Set the custom dictionary, if we were provided with one
 | |
|   -- and if the format supports it (zlib and raw, not gzip).
 | |
|   setDictionary :: Maybe S.ByteString -> Stream ()
 | |
|   setDictionary (Just dict)
 | |
|     | Stream.formatSupportsDictionary format = do
 | |
|         status <- Stream.deflateSetDictionary dict
 | |
|         case status of
 | |
|           Stream.Ok          -> return ()
 | |
|           Stream.Error _ msg -> fail msg
 | |
|           _                  -> fail "error when setting deflate dictionary"
 | |
|   setDictionary _ = return ()
 | |
| 
 | |
| 
 | |
| -- | Decompress a data stream provided as a lazy 'L.ByteString'.
 | |
| --
 | |
| -- It will throw an exception if any error is encountered in the input data.
 | |
| -- If you need more control over error handling then use one the incremental
 | |
| -- versions, 'decompressST' or 'decompressIO'.
 | |
| --
 | |
| decompress   :: Stream.Format -> DecompressParams -> L.ByteString -> L.ByteString
 | |
| 
 | |
| -- | Incremental decompression in the 'ST' monad. Using 'ST' makes it possible
 | |
| -- to write pure /lazy/ functions while making use of incremental decompression.
 | |
| --
 | |
| decompressST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
 | |
| 
 | |
| -- | Incremental decompression in the 'IO' monad.
 | |
| --
 | |
| decompressIO :: Stream.Format -> DecompressParams -> DecompressStream IO
 | |
| 
 | |
| decompress   format params = foldDecompressStreamWithInput
 | |
|                                L.Chunk (const L.Empty) throw
 | |
|                                (decompressStreamST format params)
 | |
| decompressST format params = decompressStreamST  format params
 | |
| decompressIO format params = decompressStreamIO  format params
 | |
| 
 | |
| 
 | |
| decompressStream :: Stream.Format -> DecompressParams
 | |
|                  -> Bool -> S.ByteString
 | |
|                  -> Stream (DecompressStream Stream)
 | |
| decompressStream format (DecompressParams bits initChunkSize mdict allMembers)
 | |
|                  resume =
 | |
| 
 | |
|     \chunk -> do
 | |
|       inputBufferEmpty <- Stream.inputBufferEmpty
 | |
|       outputBufferFull <- Stream.outputBufferFull
 | |
|       assert inputBufferEmpty $
 | |
|         if resume then assert (format == Stream.gzipFormat && allMembers) $
 | |
|                        Stream.inflateReset
 | |
|                   else assert outputBufferFull $
 | |
|                        Stream.inflateInit format bits
 | |
|       case chunk of
 | |
|         _ | S.null chunk -> do
 | |
|           -- special case to avoid demanding more input again
 | |
|           -- always an error anyway
 | |
|           when outputBufferFull $ do
 | |
|             let outChunkSize = 1
 | |
|             outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
 | |
|             Stream.pushOutputBuffer outFPtr 0 outChunkSize
 | |
|           drainBuffers True
 | |
| 
 | |
|         S.PS inFPtr offset length -> do
 | |
|           Stream.pushInputBuffer inFPtr offset length
 | |
|           -- Normally we start with no output buffer (so counts as full) but
 | |
|           -- if we're resuming then we'll usually still have output buffer
 | |
|           -- space available
 | |
|           assert (if not resume then outputBufferFull else True) $ return ()
 | |
|           if outputBufferFull
 | |
|             then fillBuffers initChunkSize
 | |
|             else drainBuffers False
 | |
| 
 | |
|   where
 | |
|     -- we flick between two states:
 | |
|     --   * where one or other buffer is empty
 | |
|     --       - in which case we refill one or both
 | |
|     --   * where both buffers are non-empty
 | |
|     --       - in which case we compress until a buffer is empty
 | |
| 
 | |
|   fillBuffers :: Int
 | |
|               -> Stream (DecompressStream Stream)
 | |
|   fillBuffers outChunkSize = do
 | |
| #ifdef DEBUG
 | |
|     Stream.consistencyCheck
 | |
| #endif
 | |
| 
 | |
|     -- in this state there are two possabilities:
 | |
|     --   * no outbut buffer space is available
 | |
|     --       - in which case we must make more available
 | |
|     --   * no input buffer is available
 | |
|     --       - in which case we must supply more
 | |
|     inputBufferEmpty <- Stream.inputBufferEmpty
 | |
|     outputBufferFull <- Stream.outputBufferFull
 | |
| 
 | |
|     assert (inputBufferEmpty || outputBufferFull) $ return ()
 | |
| 
 | |
|     when outputBufferFull $ do
 | |
|       outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
 | |
|       Stream.pushOutputBuffer outFPtr 0 outChunkSize
 | |
| 
 | |
|     if inputBufferEmpty
 | |
|       then return $ DecompressInputRequired $ \chunk ->
 | |
|            case chunk of
 | |
|              _ | S.null chunk          -> drainBuffers True
 | |
|              S.PS inFPtr offset length -> do
 | |
|                 Stream.pushInputBuffer inFPtr offset length
 | |
|                 drainBuffers False
 | |
|       else drainBuffers False
 | |
| 
 | |
| 
 | |
|   drainBuffers :: Bool -> Stream (DecompressStream Stream)
 | |
|   drainBuffers lastChunk = do
 | |
| 
 | |
|     inputBufferEmpty' <- Stream.inputBufferEmpty
 | |
|     outputBufferFull' <- Stream.outputBufferFull
 | |
|     assert(not outputBufferFull'
 | |
|        && (lastChunk || not inputBufferEmpty')) $ return ()
 | |
|     -- this invariant guarantees we can always make forward progress or at
 | |
|     -- least if a BufferError does occur that it must be due to a premature EOF
 | |
| 
 | |
|     status <- Stream.inflate Stream.NoFlush
 | |
| 
 | |
|     case status of
 | |
|       Stream.Ok -> do
 | |
|         outputBufferFull <- Stream.outputBufferFull
 | |
|         if outputBufferFull
 | |
|           then do (outFPtr, offset, length) <- Stream.popOutputBuffer
 | |
|                   let chunk = S.PS outFPtr offset length
 | |
|                   return $ DecompressOutputAvailable chunk $ do
 | |
|                     fillBuffers defaultDecompressBufferSize
 | |
|           else do fillBuffers defaultDecompressBufferSize
 | |
| 
 | |
|       Stream.StreamEnd      -> do
 | |
|         -- The decompressor tells us we're done.
 | |
|         -- Note that there may be input bytes still available if the stream is
 | |
|         -- embeded in some other data stream, so we return any trailing data.
 | |
|         inputBufferEmpty <- Stream.inputBufferEmpty
 | |
|         if inputBufferEmpty
 | |
|           then do finish (DecompressStreamEnd S.empty)
 | |
|           else do (inFPtr, offset, length) <- Stream.popRemainingInputBuffer
 | |
|                   let inchunk = S.PS inFPtr offset length
 | |
|                   finish (DecompressStreamEnd inchunk)
 | |
| 
 | |
|       Stream.Error code msg -> case code of
 | |
|         Stream.BufferError  -> finish (DecompressStreamError TruncatedInput)
 | |
|         Stream.NeedDict adler -> do
 | |
|           err <- setDictionary adler mdict
 | |
|           case err of
 | |
|             Just streamErr  -> finish streamErr
 | |
|             Nothing         -> drainBuffers lastChunk
 | |
|         Stream.DataError    -> finish (DecompressStreamError (DataFormatError msg))
 | |
|         _                   -> fail msg
 | |
| 
 | |
|   -- Note even if we end with an error we still try to flush the last chunk if
 | |
|   -- there is one. The user just has to decide what they want to trust.
 | |
|   finish end = do
 | |
|     outputBufferBytesAvailable <- Stream.outputBufferBytesAvailable
 | |
|     if outputBufferBytesAvailable > 0
 | |
|       then do (outFPtr, offset, length) <- Stream.popOutputBuffer
 | |
|               return (DecompressOutputAvailable (S.PS outFPtr offset length) (return end))
 | |
|       else return end
 | |
| 
 | |
|   setDictionary :: Stream.DictionaryHash -> Maybe S.ByteString
 | |
|                 -> Stream (Maybe (DecompressStream Stream))
 | |
|   setDictionary _adler Nothing =
 | |
|     return $ Just (DecompressStreamError DictionaryRequired)
 | |
|   setDictionary _adler (Just dict) = do
 | |
|     status <- Stream.inflateSetDictionary dict
 | |
|     case status of
 | |
|       Stream.Ok -> return Nothing
 | |
|       Stream.Error Stream.DataError _   ->
 | |
|         return $ Just (DecompressStreamError DictionaryMismatch)
 | |
|       _ -> fail "error when setting inflate dictionary"
 | |
| 
 | |
| 
 | |
| ------------------------------------------------------------------------------
 | |
| 
 | |
| mkStateST :: ST s (Stream.State s)
 | |
| mkStateIO :: IO (Stream.State RealWorld)
 | |
| mkStateST = strictToLazyST Stream.mkState
 | |
| mkStateIO = stToIO Stream.mkState
 | |
| 
 | |
| runStreamST :: Stream a -> Stream.State s -> ST s (a, Stream.State s)
 | |
| runStreamIO :: Stream a -> Stream.State RealWorld -> IO (a, Stream.State RealWorld)
 | |
| runStreamST strm zstate = strictToLazyST (Unsafe.unsafeIOToST noDuplicate >> Stream.runStream strm zstate)
 | |
| runStreamIO strm zstate = stToIO (Stream.runStream strm zstate)
 | |
| 
 | |
| compressStreamIO :: Stream.Format -> CompressParams -> CompressStream IO
 | |
| compressStreamIO format params =
 | |
|     CompressInputRequired {
 | |
|       compressSupplyInput = \chunk -> do
 | |
|         zstate <- mkStateIO
 | |
|         let next = compressStream format params
 | |
|         (strm', zstate') <- runStreamIO (next chunk) zstate
 | |
|         return (go strm' zstate')
 | |
|     }
 | |
|   where
 | |
|     go :: CompressStream Stream -> Stream.State RealWorld -> CompressStream IO
 | |
|     go (CompressInputRequired next) zstate =
 | |
|       CompressInputRequired {
 | |
|         compressSupplyInput = \chunk -> do
 | |
|           (strm', zstate') <- runStreamIO (next chunk) zstate
 | |
|           return (go strm' zstate')
 | |
|       }
 | |
| 
 | |
|     go (CompressOutputAvailable chunk next) zstate =
 | |
|       CompressOutputAvailable chunk $ do
 | |
|         (strm', zstate') <- runStreamIO next zstate
 | |
|         return (go strm' zstate')
 | |
| 
 | |
|     go CompressStreamEnd _ = CompressStreamEnd
 | |
| 
 | |
| compressStreamST :: Stream.Format -> CompressParams -> CompressStream (ST s)
 | |
| compressStreamST format params =
 | |
|     CompressInputRequired {
 | |
|       compressSupplyInput = \chunk -> do
 | |
|         zstate <- mkStateST
 | |
|         let next = compressStream format params
 | |
|         (strm', zstate') <- runStreamST (next chunk) zstate
 | |
|         return (go strm' zstate')
 | |
|     }
 | |
|   where
 | |
|     go :: CompressStream Stream -> Stream.State s -> CompressStream (ST s)
 | |
|     go (CompressInputRequired next) zstate =
 | |
|       CompressInputRequired {
 | |
|         compressSupplyInput = \chunk -> do
 | |
|           (strm', zstate') <- runStreamST (next chunk) zstate
 | |
|           return (go strm' zstate')
 | |
|       }
 | |
| 
 | |
|     go (CompressOutputAvailable chunk next) zstate =
 | |
|       CompressOutputAvailable chunk $ do
 | |
|         (strm', zstate') <- runStreamST next zstate
 | |
|         return (go strm' zstate')
 | |
| 
 | |
|     go CompressStreamEnd _ = CompressStreamEnd
 | |
| 
 | |
| 
 | |
| decompressStreamIO :: Stream.Format -> DecompressParams -> DecompressStream IO
 | |
| decompressStreamIO format params =
 | |
|       DecompressInputRequired $ \chunk -> do
 | |
|         zstate <- mkStateIO
 | |
|         let next = decompressStream format params False
 | |
|         (strm', zstate') <- runStreamIO (next chunk) zstate
 | |
|         go strm' zstate' (S.null chunk)
 | |
|   where
 | |
|     go :: DecompressStream Stream -> Stream.State RealWorld -> Bool
 | |
|        -> IO (DecompressStream IO)
 | |
|     go (DecompressInputRequired next) zstate !_ =
 | |
|       return $ DecompressInputRequired $ \chunk -> do
 | |
|         (strm', zstate') <- runStreamIO (next chunk) zstate
 | |
|         go strm' zstate' (S.null chunk)
 | |
| 
 | |
|     go (DecompressOutputAvailable chunk next) zstate !eof =
 | |
|       return $ DecompressOutputAvailable chunk $ do
 | |
|         (strm', zstate') <- runStreamIO next zstate
 | |
|         go strm' zstate' eof
 | |
| 
 | |
|     go (DecompressStreamEnd unconsumed) zstate !eof
 | |
|       | format == Stream.gzipFormat
 | |
|       , decompressAllMembers params
 | |
|       , not eof    = tryFollowingStream unconsumed zstate
 | |
|       | otherwise  = finaliseStreamEnd unconsumed zstate
 | |
| 
 | |
|     go (DecompressStreamError err) zstate !_ = finaliseStreamError err zstate
 | |
| 
 | |
|     tryFollowingStream :: S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
 | |
|     tryFollowingStream chunk zstate = case S.length chunk of
 | |
|       0 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
 | |
|          0 -> finaliseStreamEnd S.empty zstate
 | |
|          1 | S.head chunk' /= 0x1f
 | |
|            -> finaliseStreamEnd chunk' zstate
 | |
|          1 -> return $ DecompressInputRequired $ \chunk'' -> case S.length chunk'' of
 | |
|             0 -> finaliseStreamEnd chunk' zstate
 | |
|             _ -> checkHeaderSplit (S.head chunk') chunk'' zstate
 | |
|          _    -> checkHeader chunk' zstate
 | |
|       1 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
 | |
|          0    -> finaliseStreamEnd chunk zstate
 | |
|          _    -> checkHeaderSplit (S.head chunk) chunk' zstate
 | |
|       _       -> checkHeader chunk zstate
 | |
| 
 | |
|     checkHeaderSplit :: Word8 -> S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
 | |
|     checkHeaderSplit 0x1f chunk zstate
 | |
|       | S.head chunk == 0x8b = do
 | |
|         let resume = decompressStream format params True (S.pack [0x1f, 0x8b])
 | |
|         if S.length chunk > 1
 | |
|           then do
 | |
|             -- have to handle the remaining data in this chunk
 | |
|             (DecompressInputRequired next, zstate') <- runStreamIO resume zstate
 | |
|             (strm', zstate'') <- runStreamIO (next (S.tail chunk)) zstate'
 | |
|             go strm' zstate'' False
 | |
|           else do
 | |
|             -- subtle special case when the chunk tail is empty
 | |
|             -- yay for QC tests
 | |
|             (strm, zstate') <- runStreamIO resume zstate
 | |
|             go strm zstate' False
 | |
|     checkHeaderSplit byte chunk zstate =
 | |
|         finaliseStreamEnd (S.cons byte chunk) zstate
 | |
| 
 | |
|     checkHeader :: S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
 | |
|     checkHeader chunk zstate
 | |
|       | S.index chunk 0 == 0x1f
 | |
|       , S.index chunk 1 == 0x8b = do
 | |
|         let resume = decompressStream format params True chunk
 | |
|         (strm', zstate') <- runStreamIO resume zstate
 | |
|         go strm' zstate' False
 | |
|     checkHeader chunk zstate = finaliseStreamEnd chunk zstate
 | |
| 
 | |
|     finaliseStreamEnd unconsumed zstate = do
 | |
|         _ <- runStreamIO Stream.finalise zstate
 | |
|         return (DecompressStreamEnd unconsumed)
 | |
| 
 | |
|     finaliseStreamError err zstate = do
 | |
|         _ <- runStreamIO Stream.finalise zstate
 | |
|         return (DecompressStreamError err)
 | |
| 
 | |
| 
 | |
| decompressStreamST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
 | |
| decompressStreamST format params =
 | |
|       DecompressInputRequired $ \chunk -> do
 | |
|         zstate <- mkStateST
 | |
|         let next = decompressStream format params False
 | |
|         (strm', zstate') <- runStreamST (next chunk) zstate
 | |
|         go strm' zstate' (S.null chunk)
 | |
|   where
 | |
|     go :: DecompressStream Stream -> Stream.State s -> Bool
 | |
|        -> ST s (DecompressStream (ST s))
 | |
|     go (DecompressInputRequired next) zstate !_ =
 | |
|       return $ DecompressInputRequired $ \chunk -> do
 | |
|         (strm', zstate') <- runStreamST (next chunk) zstate
 | |
|         go strm' zstate' (S.null chunk)
 | |
| 
 | |
|     go (DecompressOutputAvailable chunk next) zstate !eof =
 | |
|       return $ DecompressOutputAvailable chunk $ do
 | |
|         (strm', zstate') <- runStreamST next zstate
 | |
|         go strm' zstate' eof
 | |
| 
 | |
|     go (DecompressStreamEnd unconsumed) zstate !eof
 | |
|       | format == Stream.gzipFormat
 | |
|       , decompressAllMembers params
 | |
|       , not eof    = tryFollowingStream unconsumed zstate
 | |
|       | otherwise  = finaliseStreamEnd unconsumed zstate
 | |
| 
 | |
|     go (DecompressStreamError err) zstate !_ = finaliseStreamError err zstate
 | |
| 
 | |
| 
 | |
|     tryFollowingStream :: S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
 | |
|     tryFollowingStream chunk zstate = 
 | |
|       case S.length chunk of
 | |
|       0 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
 | |
|          0 -> finaliseStreamEnd S.empty zstate
 | |
|          1 | S.head chunk' /= 0x1f
 | |
|            -> finaliseStreamEnd chunk' zstate
 | |
|          1 -> return $ DecompressInputRequired $ \chunk'' -> case S.length chunk'' of
 | |
|             0 -> finaliseStreamEnd chunk' zstate
 | |
|             _ -> checkHeaderSplit (S.head chunk') chunk'' zstate
 | |
|          _    -> checkHeader chunk' zstate
 | |
|       1 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
 | |
|          0    -> finaliseStreamEnd chunk zstate
 | |
|          _    -> checkHeaderSplit (S.head chunk) chunk' zstate
 | |
|       _       -> checkHeader chunk zstate
 | |
| 
 | |
|     checkHeaderSplit :: Word8 -> S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
 | |
|     checkHeaderSplit 0x1f chunk zstate
 | |
|       | S.head chunk == 0x8b = do
 | |
|         let resume = decompressStream format params True (S.pack [0x1f, 0x8b])
 | |
|         if S.length chunk > 1
 | |
|           then do
 | |
|             -- have to handle the remaining data in this chunk
 | |
|             (DecompressInputRequired next, zstate') <- runStreamST resume zstate
 | |
|             (strm', zstate'') <- runStreamST (next (S.tail chunk)) zstate'
 | |
|             go strm' zstate'' False
 | |
|           else do
 | |
|             -- subtle special case when the chunk tail is empty
 | |
|             -- yay for QC tests
 | |
|             (strm, zstate') <- runStreamST resume zstate
 | |
|             go strm zstate' False
 | |
|     checkHeaderSplit byte chunk zstate =
 | |
|         finaliseStreamEnd (S.cons byte chunk) zstate
 | |
| 
 | |
|     checkHeader :: S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
 | |
|     checkHeader chunk zstate
 | |
|       | S.index chunk 0 == 0x1f
 | |
|       , S.index chunk 1 == 0x8b = do
 | |
|         let resume = decompressStream format params True chunk
 | |
|         (strm', zstate') <- runStreamST resume zstate
 | |
|         go strm' zstate' False
 | |
|     checkHeader chunk zstate = finaliseStreamEnd chunk zstate
 | |
| 
 | |
|     finaliseStreamEnd unconsumed zstate = do
 | |
|         _ <- runStreamST Stream.finalise zstate
 | |
|         return (DecompressStreamEnd unconsumed)
 | |
| 
 | |
|     finaliseStreamError err zstate = do
 | |
|         _ <- runStreamST Stream.finalise zstate
 | |
|         return (DecompressStreamError err)
 |