diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 3445f6a..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "3rdparty/posix-paths"] - path = 3rdparty/posix-paths - url = https://github.com/hasufell/posix-paths.git diff --git a/3rdparty/posix-paths b/3rdparty/posix-paths deleted file mode 160000 index 5338c03..0000000 --- a/3rdparty/posix-paths +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5338c03af0a6efeb3914123e9ff085387c3151f9 diff --git a/README.md b/README.md index 7717cef..5990e61 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # HPath -Support for well-typed paths in Haskell. +Support for well-typed paths in Haskell. Also provides ByteString based filepath +manipulation. ## Motivation @@ -14,9 +15,13 @@ The library that came closest to my needs was but the API turned out to be oddly complicated for my use case, so I decided to fork it. +Similarly, [posix-paths](https://github.com/JohnLato/posix-paths) +was exactly what I wanted for the low-level operations, but upstream seems dead, +so it is forked as well and merged into this library. + ## Differences to 'path' -* doesn't attempt to fake IO-related types into the path, so whether a path points to a file or directory is up to your IO-code to decide... this should be a library that is used _with_ a proper IO File Type +* doesn't attempt to fake IO-related information into the path, so whether a path points to a file or directory is up to your IO-code to decide... * trailing path separators will be preserved if they exist, no messing with that * uses safe ByteString for filepaths under the hood instead of unsafe String * fixes broken [dirname](https://github.com/chrisdone/path/issues/18) @@ -27,3 +32,7 @@ decided to fork it. * allows `~/` as relative path, because on posix level `~` is just a regular filename that does _NOT_ point to `$HOME` * remove TH, it sucks +## Differences to 'posix-paths' + +* `hasTrailingPathSeparator` behaves in the same way as `System.FilePath` +* `dropTrailingPathSeparator` behaves in the same way as `System.FilePath` diff --git a/benchmarks/Bench.hs b/benchmarks/Bench.hs new file mode 100644 index 0000000..1afc52b --- /dev/null +++ b/benchmarks/Bench.hs @@ -0,0 +1,90 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} + +{-# OPTIONS_GHC -Wall #-} +import Control.Applicative +import Control.Monad +import System.Directory +import System.FilePath (()) +import System.Posix.ByteString.FilePath +import System.Posix.Directory.ByteString as PosixBS +import System.Posix.Directory.Traversals +import qualified System.Posix.FilePath as PosixBS +import System.Posix.Files.ByteString + +import Control.Exception +import qualified Data.ByteString.Char8 as BS + +import System.Environment (getArgs, withArgs) +import System.IO.Error +import System.IO.Unsafe +import System.Process (system) +import Criterion.Main + + +-- | Based on code from 'Real World Haskell', at +-- http://book.realworldhaskell.org/read/io-case-study-a-library-for-searching-the-filesystem.html#id620419 +listFilesRecursive :: FilePath -> IO [FilePath] +listFilesRecursive topdir = do + names <- System.Directory.getDirectoryContents topdir + let properNames = filter (`notElem` [".", ".."]) names + paths <- forM properNames $ \name -> do + let path = topdir name + isDir <- doesDirectoryExist path + if isDir + then listFilesRecursive path + else return [path] + return (topdir : concat paths) + +---------------------------------------------------------- + +getDirectoryContentsBS :: RawFilePath -> IO [RawFilePath] +getDirectoryContentsBS path = + modifyIOError ((`ioeSetFileName` (BS.unpack path)) . + (`ioeSetLocation` "getDirectoryContentsBS")) $ do + bracket + (PosixBS.openDirStream path) + PosixBS.closeDirStream + loop + where + loop dirp = do + e <- PosixBS.readDirStream dirp + if BS.null e then return [] else do + es <- loop dirp + return (e:es) + + +-- | similar to 'listFilesRecursive, but uses RawFilePaths +listFilesRecursiveBS :: RawFilePath -> IO [RawFilePath] +listFilesRecursiveBS topdir = do + names <- getDirectoryContentsBS topdir + let properNames = filter (`notElem` [".", ".."]) names + paths <- forM properNames $ \name -> unsafeInterleaveIO $ do + let path = PosixBS.combine topdir name + isDir <- isDirectory <$> getFileStatus path + if isDir + then listFilesRecursiveBS path + else return [path] + return (topdir : concat paths) +---------------------------------------------------------- + + +benchTraverse :: RawFilePath -> IO () +benchTraverse = traverseDirectory (\() p -> BS.putStrLn p) () + +main :: IO () +main = do + args <- getArgs + let (d,otherArgs) = case args of + [] -> ("/usr/local",[]) + x:xs -> (x,xs) + withArgs otherArgs $ defaultMain + [ bench "traverse (FilePath)" $ nfIO $ listFilesRecursive d >>= mapM_ putStrLn + , bench "traverse (RawFilePath)" $ nfIO $ listFilesRecursiveBS (BS.pack d) >>= mapM_ BS.putStrLn + , bench "allDirectoryContents" $ nfIO $ allDirectoryContents (BS.pack d) >>= mapM_ BS.putStrLn + , bench "allDirectoryContents'" $ nfIO $ allDirectoryContents' (BS.pack d) >>= mapM_ BS.putStrLn + , bench "traverseDirectory" $ nfIO $ benchTraverse (BS.pack d) + , bench "unix find" $ nfIO $ void $ system ("find " ++ d) + ] diff --git a/cbits/dirutils.c b/cbits/dirutils.c new file mode 100644 index 0000000..660607d --- /dev/null +++ b/cbits/dirutils.c @@ -0,0 +1,7 @@ +#include "dirutils.h" +unsigned int + __posixdir_d_type(struct dirent* d) + { + return(d -> d_type); + } + diff --git a/cbits/dirutils.h b/cbits/dirutils.h new file mode 100644 index 0000000..fd93b2e --- /dev/null +++ b/cbits/dirutils.h @@ -0,0 +1,13 @@ +#ifndef POSIXPATHS_CBITS_DIRUTILS_H +#define POSIXPATHS_CBITS_DIRUTILS_H + +#include +#include +#include +#include +#include + +extern unsigned int + __posixdir_d_type(struct dirent* d) + ; +#endif diff --git a/doctests.hs b/doctests-hpath.hs similarity index 80% rename from doctests.hs rename to doctests-hpath.hs index d7d8316..d56d5d3 100644 --- a/doctests.hs +++ b/doctests-hpath.hs @@ -2,12 +2,14 @@ module Main where +import Control.Applicative + import Test.DocTest import Test.HUnit main = doctest - [ "-isrc" + ["-isrc" , "-XOverloadedStrings" , "src/HPath.hs" ] diff --git a/doctests-posix.hs b/doctests-posix.hs new file mode 100644 index 0000000..56efe28 --- /dev/null +++ b/doctests-posix.hs @@ -0,0 +1,25 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import Control.Applicative +import System.Posix.Directory.Traversals + +import Test.DocTest +import Test.HUnit + +main = do + doctest + [ "-isrc" + , "-XOverloadedStrings" + , "System.Posix.FilePath" + ] + runTestTT unitTests + + +unitTests :: Test +unitTests = test + [ TestCase $ do + r <- (==) <$> allDirectoryContents "." <*> allDirectoryContents' "." + assertBool "allDirectoryContents == allDirectoryContents'" r + ] diff --git a/hpath.cabal b/hpath.cabal index 585ee9b..66f73bf 100644 --- a/hpath.cabal +++ b/hpath.cabal @@ -9,34 +9,73 @@ maintainer: Julian Ospald copyright: 2015–2016 FP Complete, Julian Ospald 2016 category: Filesystem build-type: Simple -cabal-version: >=1.8 -extra-source-files: README.md, CHANGELOG +cabal-version: >=1.14 +extra-source-files: README.md + CHANGELOG + cbits/dirutils.h + doctests.hs + benchmarks/*.hs library hs-source-dirs: src/ + default-language: Haskell2010 ghc-options: -Wall -O2 - exposed-modules: HPath, HPath.Internal - build-depends: base >= 4 && <5 - , HUnit - , bytestring + c-sources: cbits/dirutils.c + exposed-modules: HPath, + HPath.Internal, + System.Posix.Directory.Foreign, + System.Posix.Directory.Traversals, + System.Posix.FilePath + build-depends: base >= 4.2 && <5 + , bytestring >= 0.9.2.0 , deepseq , exceptions , hspec - , posix-paths + , unix >= 2.5 , utf8-string , word8 -test-suite doctests +test-suite doctests-hpath + default-language: Haskell2010 type: exitcode-stdio-1.0 ghc-options: -threaded - main-is: doctests.hs + main-is: doctests-hpath.hs build-depends: base , HUnit - , bytestring + , QuickCheck , doctest >= 0.8 , hpath - , posix-paths + +test-suite doctests-posix + default-language: Haskell2010 + type: exitcode-stdio-1.0 + ghc-options: -threaded + main-is: doctests-posix.hs + build-depends: base, + bytestring, + unix, + hpath, + doctest >= 0.8, + HUnit, + QuickCheck + +benchmark bench.hs + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: benchmarks + main-is: Bench.hs + + build-depends: + base, + hpath, + bytestring, + unix, + directory >= 1.1 && < 1.3, + filepath >= 1.2 && < 1.4, + process >= 1.0 && < 1.3, + criterion >= 0.6 && < 0.9 + ghc-options: -O2 source-repository head type: git diff --git a/src/System/Posix/Directory/Foreign.hsc b/src/System/Posix/Directory/Foreign.hsc new file mode 100644 index 0000000..59f22ae --- /dev/null +++ b/src/System/Posix/Directory/Foreign.hsc @@ -0,0 +1,55 @@ +module System.Posix.Directory.Foreign where + +import Data.Bits +import Data.List (foldl') +import Foreign.C.Types + +#include +#include +#include +#include +#include +#include + +newtype DirType = DirType Int deriving (Eq, Show) +data Flags = Flags Int | UnsupportedFlag String deriving (Eq, Show) + +unFlags :: Flags -> Int +unFlags (Flags i) = i +unFlags (UnsupportedFlag name) = error (name ++ " is not supported on this platform") + +-- |Returns @True@ if posix-paths was compiled with support for the provided +-- flag. (As of this writing, the only flag for which this check may be +-- necessary is 'oCloexec'; all other flags will always yield @True@.) +isSupported :: Flags -> Bool +isSupported (Flags _) = True +isSupported _ = False + +-- |@O_CLOEXEC@ is not supported on every POSIX platform. Use +-- @'isSupported' oCloexec@ to determine if support for @O_CLOEXEC@ was +-- compiled into your version of posix-paths. (If not, using @oCloexec@ will +-- throw an exception.) +oCloexec :: Flags +#ifdef O_CLOEXEC +oCloexec = Flags #{const O_CLOEXEC} +#else +{-# WARNING oCloexec + "This version of posix-paths was compiled without @O_CLOEXEC@ support." #-} +oCloexec = UnsupportedFlag "O_CLOEXEC" +#endif + + + +-- If these enum declarations occur earlier in the file, haddock +-- gets royally confused about the above doc comments. +-- Probably http://trac.haskell.org/haddock/ticket/138 + +#{enum DirType, DirType, DT_BLK, DT_CHR, DT_DIR, DT_FIFO, DT_LNK, DT_REG, DT_SOCK, DT_UNKNOWN} + +#{enum Flags, Flags, O_APPEND, O_ASYNC, O_CREAT, O_DIRECTORY, O_EXCL, O_NOCTTY, O_NOFOLLOW, O_NONBLOCK, O_RDONLY, O_WRONLY, O_RDWR, O_SYNC, O_TRUNC} + +pathMax :: Int +pathMax = #{const PATH_MAX} + +unionFlags :: [Flags] -> CInt +unionFlags = fromIntegral . foldl' ((. unFlags) . (.|.)) 0 diff --git a/src/System/Posix/Directory/Traversals.hs b/src/System/Posix/Directory/Traversals.hs new file mode 100644 index 0000000..ef9a6f4 --- /dev/null +++ b/src/System/Posix/Directory/Traversals.hs @@ -0,0 +1,269 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} + +{-# OPTIONS_GHC -Wall #-} +module System.Posix.Directory.Traversals ( + + getDirectoryContents +, getDirectoryContents' + +, allDirectoryContents +, allDirectoryContents' +, traverseDirectory + +-- lower-level stuff +, readDirEnt +, packDirStream +, unpackDirStream +, openFd + +, realpath +) where + +import Control.Applicative +import Control.Monad +import System.Posix.FilePath (()) +import System.Posix.Directory.Foreign + +import qualified System.Posix as Posix +import System.IO.Error +import Control.Exception +import qualified Data.ByteString.Char8 as BS +import System.Posix.ByteString.FilePath +import System.Posix.Directory.ByteString as PosixBS +import System.Posix.Files.ByteString + +import System.IO.Unsafe +import Unsafe.Coerce (unsafeCoerce) +import Foreign.C.Error +import Foreign.C.String +import Foreign.C.Types +import Foreign.Marshal.Alloc (alloca,allocaBytes) +import Foreign.Ptr +import Foreign.Storable + + + + +---------------------------------------------------------- + +-- | Get all files from a directory and its subdirectories. +-- +-- Upon entering a directory, 'allDirectoryContents' will get all entries +-- strictly. However the returned list is lazy in that directories will only +-- be accessed on demand. +allDirectoryContents :: RawFilePath -> IO [RawFilePath] +allDirectoryContents topdir = do + namesAndTypes <- getDirectoryContents topdir + let properNames = filter ((`notElem` [".", ".."]) . snd) namesAndTypes + paths <- forM properNames $ \(typ,name) -> unsafeInterleaveIO $ do + let path = topdir name + case () of + () | typ == dtDir -> allDirectoryContents path + | typ == dtUnknown -> do + isDir <- isDirectory <$> getFileStatus path + if isDir + then allDirectoryContents path + else return [path] + | otherwise -> return [path] + return (topdir : concat paths) + +-- | Get all files from a directory and its subdirectories strictly. +allDirectoryContents' :: RawFilePath -> IO [RawFilePath] +allDirectoryContents' = fmap reverse . traverseDirectory (\acc fp -> return (fp:acc)) [] +-- this uses traverseDirectory because it's more efficient than forcing the +-- lazy version. + +-- | Recursively apply the 'action' to the parent directory and all +-- files/subdirectories. +-- +-- This function allows for memory-efficient traversals. +traverseDirectory :: (s -> RawFilePath -> IO s) -> s -> RawFilePath -> IO s +traverseDirectory act s0 topdir = toploop + where + toploop = do + isDir <- isDirectory <$> getFileStatus topdir + s' <- act s0 topdir + if isDir then actOnDirContents topdir s' loop + else return s' + loop typ path acc = do + isDir <- case () of + () | typ == dtDir -> return True + | typ == dtUnknown -> isDirectory <$> getFileStatus path + | otherwise -> return False + if isDir + then act acc path >>= \acc' -> actOnDirContents path acc' loop + else act acc path + +actOnDirContents :: RawFilePath + -> b + -> (DirType -> RawFilePath -> b -> IO b) + -> IO b +actOnDirContents pathRelToTop b f = + modifyIOError ((`ioeSetFileName` (BS.unpack pathRelToTop)) . + (`ioeSetLocation` "findBSTypRel")) $ do + bracket + (openDirStream pathRelToTop) + (Posix.closeDirStream) + (\dirp -> loop dirp b) + where + loop dirp b' = do + (typ,e) <- readDirEnt dirp + if (e == "") + then return b' + else do + if (e == "." || e == "..") + then loop dirp b' + else f typ (pathRelToTop e) b' >>= loop dirp + + +---------------------------------------------------------- +-- dodgy stuff + +type CDir = () +type CDirent = () + +-- Posix doesn't export DirStream, so to re-use that type we need to use +-- unsafeCoerce. It's just a newtype, so this is a legitimate usage. +-- ugly trick. +unpackDirStream :: DirStream -> Ptr CDir +unpackDirStream = unsafeCoerce + +packDirStream :: Ptr CDir -> DirStream +packDirStream = unsafeCoerce + +-- the __hscore_* functions are defined in the unix package. We can import them and let +-- the linker figure it out. +foreign import ccall unsafe "__hscore_readdir" + c_readdir :: Ptr CDir -> Ptr (Ptr CDirent) -> IO CInt + +foreign import ccall unsafe "__hscore_free_dirent" + c_freeDirEnt :: Ptr CDirent -> IO () + +foreign import ccall unsafe "__hscore_d_name" + c_name :: Ptr CDirent -> IO CString + +foreign import ccall unsafe "__posixdir_d_type" + c_type :: Ptr CDirent -> IO DirType + +foreign import ccall "realpath" + c_realpath :: CString -> CString -> IO CString + +foreign import ccall unsafe "fdopendir" + c_fdopendir :: Posix.Fd -> IO (Ptr ()) + +foreign import ccall unsafe "open" + c_open :: CString -> CInt -> Posix.CMode -> IO CInt + +---------------------------------------------------------- +-- less dodgy but still lower-level + + +readDirEnt :: DirStream -> IO (DirType, RawFilePath) +readDirEnt (unpackDirStream -> dirp) = + alloca $ \ptr_dEnt -> loop ptr_dEnt + where + loop ptr_dEnt = do + resetErrno + r <- c_readdir dirp ptr_dEnt + if (r == 0) + then do + dEnt <- peek ptr_dEnt + if (dEnt == nullPtr) + then return (dtUnknown,BS.empty) + else do + dName <- c_name dEnt >>= peekFilePath + dType <- c_type dEnt + c_freeDirEnt dEnt + return (dType, dName) + else do + errno <- getErrno + if (errno == eINTR) + then loop ptr_dEnt + else do + let (Errno eo) = errno + if (eo == 0) + then return (dtUnknown,BS.empty) + else throwErrno "readDirEnt" + + +getDirectoryContents :: RawFilePath -> IO [(DirType, RawFilePath)] +getDirectoryContents path = + modifyIOError ((`ioeSetFileName` (BS.unpack path)) . + (`ioeSetLocation` "System.Posix.Directory.Traversals.getDirectoryContents")) $ do + bracket + (PosixBS.openDirStream path) + PosixBS.closeDirStream + loop + where + loop dirp = do + t@(_typ,e) <- readDirEnt dirp + if BS.null e then return [] else do + es <- loop dirp + return (t:es) + + +fdOpendir :: Posix.Fd -> IO DirStream +fdOpendir fd = + packDirStream <$> throwErrnoIfNull "fdOpendir" (c_fdopendir fd) + + +getDirectoryContents' :: Posix.Fd -> IO [(DirType, RawFilePath)] +getDirectoryContents' fd = + bracket + (fdOpendir fd) + PosixBS.closeDirStream + loop + where + loop dirp = do + t@(_typ,e) <- readDirEnt dirp + if BS.null e then return [] else do + es <- loop dirp + return (t:es) + + +open_ :: CString + -> Posix.OpenMode + -> [Flags] + -> Maybe Posix.FileMode + -> IO Posix.Fd +open_ str how optional_flags maybe_mode = do + fd <- c_open str all_flags mode_w + return (Posix.Fd fd) + where + all_flags = unionFlags $ optional_flags ++ [open_mode] ++ creat + + + (creat, mode_w) = case maybe_mode of + Nothing -> ([],0) + Just x -> ([oCreat], x) + + open_mode = case how of + Posix.ReadOnly -> oRdonly + Posix.WriteOnly -> oWronly + Posix.ReadWrite -> oRdwr + + +-- |Open and optionally create this file. See 'System.Posix.Files' +-- for information on how to use the 'FileMode' type. +openFd :: RawFilePath + -> Posix.OpenMode + -> [Flags] + -> Maybe Posix.FileMode + -> IO Posix.Fd +openFd name how optional_flags maybe_mode = + withFilePath name $ \str -> + throwErrnoPathIfMinus1Retry "openFd" name $ + open_ str how optional_flags maybe_mode + + +-- | return the canonicalized absolute pathname +-- +-- like canonicalizePath, but uses realpath(3) +realpath :: RawFilePath -> IO RawFilePath +realpath inp = do + allocaBytes pathMax $ \tmp -> do + void $ BS.useAsCString inp $ \cstr -> throwErrnoIfNull "realpath" $ c_realpath cstr tmp + BS.packCString tmp diff --git a/src/System/Posix/FilePath.hs b/src/System/Posix/FilePath.hs new file mode 100644 index 0000000..7c52b7b --- /dev/null +++ b/src/System/Posix/FilePath.hs @@ -0,0 +1,535 @@ +{-# LANGUAGE TupleSections #-} + +{-# OPTIONS_GHC -Wall #-} + +-- | The equivalent of "System.FilePath" on raw (byte string) file paths. +-- +-- Not all functions of "System.FilePath" are implemented yet. Feel free to contribute! +module System.Posix.FilePath ( + + pathSeparator +, isPathSeparator +, searchPathSeparator +, isSearchPathSeparator +, extSeparator +, isExtSeparator + +, splitExtension +, takeExtension +, replaceExtension +, dropExtension +, addExtension +, hasExtension +, (<.>) +, splitExtensions +, dropExtensions +, takeExtensions + +, splitFileName +, takeFileName +, replaceFileName +, dropFileName +, takeBaseName +, replaceBaseName +, takeDirectory +, replaceDirectory +, combine +, () +, splitPath +, joinPath +, normalise +, splitDirectories + +, hasTrailingPathSeparator +, addTrailingPathSeparator +, dropTrailingPathSeparator + +, isRelative +, isAbsolute +, isValid +, equalFilePath + +, module System.Posix.ByteString.FilePath +) where + +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import System.Posix.ByteString.FilePath + +import Data.Maybe (isJust) +import Data.Word8 + +import Control.Arrow (second) + +-- $setup +-- >>> import Data.Char +-- >>> import Test.QuickCheck +-- >>> import Control.Applicative +-- >>> import qualified Data.ByteString as BS +-- >>> import Data.ByteString (ByteString) +-- >>> instance Arbitrary ByteString where arbitrary = BS.pack <$> arbitrary +-- >>> instance CoArbitrary ByteString where coarbitrary = coarbitrary . BS.unpack +-- +-- >>> let _chr :: Word8 -> Char; _chr = chr . fromIntegral + + +-- | Path separator character +pathSeparator :: Word8 +pathSeparator = _slash + +-- | Check if a character is the path separator +-- +-- prop> \n -> (_chr n == '/') == isPathSeparator n +isPathSeparator :: Word8 -> Bool +isPathSeparator = (== pathSeparator) + +-- | Search path separator +searchPathSeparator :: Word8 +searchPathSeparator = _colon + +-- | Check if a character is the search path separator +-- +-- prop> \n -> (_chr n == ':') == isSearchPathSeparator n +isSearchPathSeparator :: Word8 -> Bool +isSearchPathSeparator = (== searchPathSeparator) + +-- | File extension separator +extSeparator :: Word8 +extSeparator = _period + +-- | Check if a character is the file extension separator +-- +-- prop> \n -> (_chr n == '.') == isExtSeparator n +isExtSeparator :: Word8 -> Bool +isExtSeparator = (== extSeparator) + +------------------------ +-- extension stuff + +-- | Split a 'RawFilePath' into a path+filename and extension +-- +-- >>> splitExtension "file.exe" +-- ("file",".exe") +-- >>> splitExtension "file" +-- ("file","") +-- >>> splitExtension "/path/file.tar.gz" +-- ("/path/file.tar",".gz") +-- +-- prop> \path -> uncurry (BS.append) (splitExtension path) == path +splitExtension :: RawFilePath -> (RawFilePath, ByteString) +splitExtension x = if BS.null basename + then (x,BS.empty) + else (BS.append path (BS.init basename),BS.cons extSeparator fileExt) + where + (path,file) = splitFileNameRaw x + (basename,fileExt) = BS.breakEnd isExtSeparator file + +-- | Get the final extension from a 'RawFilePath' +-- +-- >>> takeExtension "file.exe" +-- ".exe" +-- >>> takeExtension "file" +-- "" +-- >>> takeExtension "/path/file.tar.gz" +-- ".gz" +takeExtension :: RawFilePath -> ByteString +takeExtension = snd . splitExtension + +-- | Change a file's extension +-- +-- prop> \path -> let ext = takeExtension path in replaceExtension path ext == path +replaceExtension :: RawFilePath -> ByteString -> RawFilePath +replaceExtension path ext = dropExtension path <.> ext + +-- | Drop the final extension from a 'RawFilePath' +-- +-- >>> dropExtension "file.exe" +-- "file" +-- >>> dropExtension "file" +-- "file" +-- >>> dropExtension "/path/file.tar.gz" +-- "/path/file.tar" +dropExtension :: RawFilePath -> RawFilePath +dropExtension = fst . splitExtension + +-- | Add an extension to a 'RawFilePath' +-- +-- >>> addExtension "file" ".exe" +-- "file.exe" +-- >>> addExtension "file.tar" ".gz" +-- "file.tar.gz" +-- >>> addExtension "/path/" ".ext" +-- "/path/.ext" +addExtension :: RawFilePath -> ByteString -> RawFilePath +addExtension file ext + | BS.null ext = file + | isExtSeparator (BS.head ext) = BS.append file ext + | otherwise = BS.intercalate (BS.singleton extSeparator) [file, ext] + + +-- | Operator version of 'addExtension' +(<.>) :: RawFilePath -> ByteString -> RawFilePath +(<.>) = addExtension + +-- | Check if a 'RawFilePath' has an extension +-- +-- >>> hasExtension "file" +-- False +-- >>> hasExtension "file.tar" +-- True +-- >>> hasExtension "/path.part1/" +-- False +hasExtension :: RawFilePath -> Bool +hasExtension = isJust . BS.elemIndex extSeparator . takeFileName + +-- | Split a 'RawFilePath' on the first extension +-- +-- >>> splitExtensions "/path/file.tar.gz" +-- ("/path/file",".tar.gz") +-- +-- prop> \path -> uncurry addExtension (splitExtensions path) == path +splitExtensions :: RawFilePath -> (RawFilePath, ByteString) +splitExtensions x = if BS.null basename + then (path,fileExt) + else (BS.append path basename,fileExt) + where + (path,file) = splitFileNameRaw x + (basename,fileExt) = BS.break isExtSeparator file + +-- | Remove all extensions from a 'RawFilePath' +-- +-- >>> dropExtensions "/path/file.tar.gz" +-- "/path/file" +dropExtensions :: RawFilePath -> RawFilePath +dropExtensions = fst . splitExtensions + +-- | Take all extensions from a 'RawFilePath' +-- +-- >>> takeExtensions "/path/file.tar.gz" +-- ".tar.gz" +takeExtensions :: RawFilePath -> ByteString +takeExtensions = snd . splitExtensions + +------------------------ +-- more stuff + +-- | Split a 'RawFilePath' into (path,file). 'combine' is the inverse +-- +-- >>> splitFileName "path/file.txt" +-- ("path/","file.txt") +-- >>> splitFileName "path/" +-- ("path/","") +-- >>> splitFileName "file.txt" +-- ("./","file.txt") +-- +-- prop> \path -> uncurry combine (splitFileName path) == path || fst (splitFileName path) == "./" +splitFileName :: RawFilePath -> (RawFilePath, RawFilePath) +splitFileName x = if BS.null path + then (dotSlash, file) + else (path,file) + where + (path,file) = splitFileNameRaw x + dotSlash = _period `BS.cons` (BS.singleton pathSeparator) + + +-- | Get the file name +-- +-- >>> takeFileName "path/file.txt" +-- "file.txt" +-- >>> takeFileName "path/" +-- "" +takeFileName :: RawFilePath -> RawFilePath +takeFileName = snd . splitFileName + +-- | Change the file name +-- +-- prop> \path -> replaceFileName path (takeFileName path) == path +replaceFileName :: RawFilePath -> ByteString -> RawFilePath +replaceFileName x y = fst (splitFileNameRaw x) y + +-- | Drop the file name +-- +-- >>> dropFileName "path/file.txt" +-- "path/" +-- >>> dropFileName "file.txt" +-- "./" +dropFileName :: RawFilePath -> RawFilePath +dropFileName = fst . splitFileName + +-- | Get the file name, without a trailing extension +-- +-- >>> takeBaseName "path/file.tar.gz" +-- "file.tar" +-- >>> takeBaseName "" +-- "" +takeBaseName :: RawFilePath -> ByteString +takeBaseName = dropExtension . takeFileName + +-- | Change the base name +-- +-- >>> replaceBaseName "path/file.tar.gz" "bob" +-- "path/bob.gz" +-- +-- prop> \path -> replaceBaseName path (takeBaseName path) == path +replaceBaseName :: RawFilePath -> ByteString -> RawFilePath +replaceBaseName path name = combineRaw dir (name <.> ext) + where + (dir,file) = splitFileNameRaw path + ext = takeExtension file + +-- | Get the directory, moving up one level if it's already a directory +-- +-- >>> takeDirectory "path/file.txt" +-- "path" +-- >>> takeDirectory "file" +-- "." +-- >>> takeDirectory "/path/to/" +-- "/path/to" +-- >>> takeDirectory "/path/to" +-- "/path" +takeDirectory :: RawFilePath -> RawFilePath +takeDirectory x = case () of + () | x == BS.singleton pathSeparator -> x + | BS.null res && not (BS.null file) -> file + | otherwise -> res + where + res = fst $ BS.spanEnd isPathSeparator file + file = dropFileName x + +-- | Change the directory component of a 'RawFilePath' +-- +-- prop> \path -> replaceDirectory path (takeDirectory path) `equalFilePath` path || takeDirectory path == "." +replaceDirectory :: RawFilePath -> ByteString -> RawFilePath +replaceDirectory file dir = combineRaw dir (takeFileName file) + +-- | Join two paths together +-- +-- >>> combine "/" "file" +-- "/file" +-- >>> combine "/path/to" "file" +-- "/path/to/file" +-- >>> combine "file" "/absolute/path" +-- "/absolute/path" +combine :: RawFilePath -> RawFilePath -> RawFilePath +combine a b | not (BS.null b) && isPathSeparator (BS.head b) = b + | otherwise = combineRaw a b + +-- | Operator version of combine +() :: RawFilePath -> RawFilePath -> RawFilePath +() = combine + +-- | Split a path into a list of components: +-- +-- >>> splitPath "/path/to/file.txt" +-- ["/","path/","to/","file.txt"] +-- +-- prop> \path -> BS.concat (splitPath path) == path +splitPath :: RawFilePath -> [RawFilePath] +splitPath = splitter + where + splitter x + | BS.null x = [] + | otherwise = case BS.elemIndex pathSeparator x of + Nothing -> [x] + Just ix -> case BS.findIndex (not . isPathSeparator) $ BS.drop (ix+1) x of + Nothing -> [x] + Just runlen -> uncurry (:) . second splitter $ BS.splitAt (ix+1+runlen) x + +-- | Like 'splitPath', but without trailing slashes +-- +-- >>> splitDirectories "/path/to/file.txt" +-- ["/","path","to","file.txt"] +-- >>> splitDirectories "" +-- [] +splitDirectories :: RawFilePath -> [RawFilePath] +splitDirectories x + | BS.null x = [] + | isPathSeparator (BS.head x) = let (root,rest) = BS.splitAt 1 x + in root : splitter rest + | otherwise = splitter x + where + splitter = filter (not . BS.null) . BS.split pathSeparator + +-- | Join a split path back together +-- +-- prop> \path -> joinPath (splitPath path) == path +-- +-- >>> joinPath ["path","to","file.txt"] +-- "path/to/file.txt" +joinPath :: [RawFilePath] -> RawFilePath +joinPath = foldr () BS.empty + + +-- |Normalise a file. +-- +-- >>> normalise "/file/\\test////" +-- "/file/\\test/" +-- >>> normalise "/file/./test" +-- "/file/test" +-- >>> normalise "/test/file/../bob/fred/" +-- "/test/file/../bob/fred/" +-- >>> normalise "../bob/fred/" +-- "../bob/fred/" +-- >>> normalise "./bob/fred/" +-- "bob/fred/" +-- >>> normalise "./bob////.fred/./...///./..///#." +-- "bob/.fred/.../../#." +-- >>> normalise "." +-- "." +-- >>> normalise "./" +-- "./" +-- >>> normalise "./." +-- "./" +-- >>> normalise "/./" +-- "/" +-- >>> normalise "/" +-- "/" +-- >>> normalise "bob/fred/." +-- "bob/fred/" +-- >>> normalise "//home" +-- "/home" +normalise :: RawFilePath -> RawFilePath +normalise filepath = + result `BS.append` + (if addPathSeparator + then BS.singleton pathSeparator + else BS.empty) + where + result = let n = f filepath + in if BS.null n + then BS.singleton _period + else n + addPathSeparator = isDirPath filepath && + not (hasTrailingPathSeparator result) + isDirPath xs = hasTrailingPathSeparator xs + || not (BS.null xs) && BS.last xs == _period + && hasTrailingPathSeparator (BS.init xs) + f = joinPath . dropDots . propSep . splitDirectories + propSep :: [ByteString] -> [ByteString] + propSep (x:xs) + | BS.all (== pathSeparator) x = BS.singleton pathSeparator : xs + | otherwise = x : xs + propSep [] = [] + dropDots :: [ByteString] -> [ByteString] + dropDots = filter (BS.singleton _period /=) + + +------------------------ +-- trailing path separators + +-- | Check if the last character of a 'RawFilePath' is '/'. +-- +-- >>> hasTrailingPathSeparator "/path/" +-- True +-- >>> hasTrailingPathSeparator "/" +-- True +-- >>> hasTrailingPathSeparator "/path" +-- False +hasTrailingPathSeparator :: RawFilePath -> Bool +hasTrailingPathSeparator x + | BS.null x = False + | otherwise = isPathSeparator $ BS.last x + +-- | Add a trailing path separator. +-- +-- >>> addTrailingPathSeparator "/path" +-- "/path/" +-- >>> addTrailingPathSeparator "/path/" +-- "/path/" +-- >>> addTrailingPathSeparator "/" +-- "/" +addTrailingPathSeparator :: RawFilePath -> RawFilePath +addTrailingPathSeparator x = if hasTrailingPathSeparator x + then x + else x `BS.snoc` pathSeparator + +-- | Remove a trailing path separator +-- +-- >>> dropTrailingPathSeparator "/path/" +-- "/path" +-- >>> dropTrailingPathSeparator "/path////" +-- "/path" +-- >>> dropTrailingPathSeparator "/" +-- "/" +-- >>> dropTrailingPathSeparator "//" +-- "/" +dropTrailingPathSeparator :: RawFilePath -> RawFilePath +dropTrailingPathSeparator x + | x == BS.singleton pathSeparator = x + | otherwise = if hasTrailingPathSeparator x + then dropTrailingPathSeparator $ BS.init x + else x + +------------------------ +-- Filename/system stuff + +-- | Check if a path is absolute +-- +-- >>> isAbsolute "/path" +-- True +-- >>> isAbsolute "path" +-- False +-- >>> isAbsolute "" +-- False +isAbsolute :: RawFilePath -> Bool +isAbsolute x + | BS.length x > 0 = isPathSeparator (BS.head x) + | otherwise = False + +-- | Check if a path is relative +-- +-- prop> \path -> isRelative path /= isAbsolute path +isRelative :: RawFilePath -> Bool +isRelative = not . isAbsolute + +-- | Is a FilePath valid, i.e. could you create a file like it? +-- +-- >>> isValid "" +-- False +-- >>> isValid "\0" +-- False +-- >>> isValid "/random_ path:*" +-- True +isValid :: RawFilePath -> Bool +isValid filepath + | BS.null filepath = False + | _nul `BS.elem` filepath = False + | otherwise = True + +-- |Equality of two filepaths. The filepaths are normalised +-- and trailing path separators are dropped. +-- +-- >>> equalFilePath "foo" "foo" +-- True +-- >>> equalFilePath "foo" "foo/" +-- True +-- >>> equalFilePath "foo" "./foo" +-- True +-- >>> equalFilePath "foo" "/foo" +-- False +-- >>> equalFilePath "foo" "FOO" +-- False +-- >>> equalFilePath "foo" "../foo" +-- False +-- +-- prop> \p -> equalFilePath p p +equalFilePath :: RawFilePath -> RawFilePath -> Bool +equalFilePath p1 p2 = f p1 == f p2 + where + f x = dropTrailingPathSeparator $ normalise x + +------------------------ +-- internal stuff + +-- Just split the input FileName without adding/normalizing or changing +-- anything. +splitFileNameRaw :: RawFilePath -> (RawFilePath, RawFilePath) +splitFileNameRaw x = BS.breakEnd isPathSeparator x + +-- | Combine two paths, assuming rhs is NOT absolute. +combineRaw :: RawFilePath -> RawFilePath -> RawFilePath +combineRaw a b | BS.null a = b + | BS.null b = a + | isPathSeparator (BS.last a) = BS.append a b + | otherwise = BS.intercalate (BS.singleton pathSeparator) [a, b] +