You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

265 lines
7.9 KiB

  1. -- |
  2. -- Module : System.Posix.Directory.Traversals
  3. -- Copyright : © 2016 Julian Ospald
  4. -- License : BSD3
  5. --
  6. -- Maintainer : Julian Ospald <hasufell@posteo.de>
  7. -- Stability : experimental
  8. -- Portability : portable
  9. --
  10. -- Traversal and read operations on directories.
  11. {-# LANGUAGE CPP #-}
  12. {-# LANGUAGE ForeignFunctionInterface #-}
  13. {-# LANGUAGE OverloadedStrings #-}
  14. {-# LANGUAGE PackageImports #-}
  15. {-# LANGUAGE TupleSections #-}
  16. {-# LANGUAGE ViewPatterns #-}
  17. {-# OPTIONS_GHC -Wall #-}
  18. module System.Posix.Directory.Traversals (
  19. getDirectoryContents
  20. , getDirectoryContents'
  21. , allDirectoryContents
  22. , allDirectoryContents'
  23. , traverseDirectory
  24. -- lower-level stuff
  25. , readDirEnt
  26. , packDirStream
  27. , unpackDirStream
  28. , fdOpendir
  29. , realpath
  30. ) where
  31. #if __GLASGOW_HASKELL__ < 710
  32. import Control.Applicative ((<$>))
  33. #endif
  34. import Control.Monad
  35. import System.Posix.FilePath ((</>))
  36. import System.Posix.Directory.Foreign
  37. import qualified System.Posix as Posix
  38. import System.IO.Error
  39. import Control.Exception
  40. import qualified Data.ByteString.Char8 as BS
  41. import System.Posix.ByteString.FilePath
  42. import System.Posix.Directory.ByteString as PosixBS
  43. import System.Posix.Files.ByteString
  44. import System.IO.Unsafe
  45. import "unix" System.Posix.IO.ByteString (closeFd)
  46. import Unsafe.Coerce (unsafeCoerce)
  47. import Foreign.C.Error
  48. import Foreign.C.String
  49. import Foreign.C.Types
  50. import Foreign.Marshal.Alloc (alloca,allocaBytes)
  51. import Foreign.Ptr
  52. import Foreign.Storable
  53. ----------------------------------------------------------
  54. -- | Get all files from a directory and its subdirectories.
  55. --
  56. -- Upon entering a directory, 'allDirectoryContents' will get all entries
  57. -- strictly. However the returned list is lazy in that directories will only
  58. -- be accessed on demand.
  59. --
  60. -- Follows symbolic links for the input dir.
  61. allDirectoryContents :: RawFilePath -> IO [RawFilePath]
  62. allDirectoryContents topdir = do
  63. namesAndTypes <- getDirectoryContents topdir
  64. let properNames = filter ((`notElem` [".", ".."]) . snd) namesAndTypes
  65. paths <- forM properNames $ \(typ,name) -> unsafeInterleaveIO $ do
  66. let path = topdir </> name
  67. case () of
  68. () | typ == dtDir -> allDirectoryContents path
  69. | typ == dtUnknown -> do
  70. isDir <- isDirectory <$> getFileStatus path
  71. if isDir
  72. then allDirectoryContents path
  73. else return [path]
  74. | otherwise -> return [path]
  75. return (topdir : concat paths)
  76. -- | Get all files from a directory and its subdirectories strictly.
  77. --
  78. -- Follows symbolic links for the input dir.
  79. allDirectoryContents' :: RawFilePath -> IO [RawFilePath]
  80. allDirectoryContents' = fmap reverse . traverseDirectory (\acc fp -> return (fp:acc)) []
  81. -- this uses traverseDirectory because it's more efficient than forcing the
  82. -- lazy version.
  83. -- | Recursively apply the 'action' to the parent directory and all
  84. -- files/subdirectories.
  85. --
  86. -- This function allows for memory-efficient traversals.
  87. --
  88. -- Follows symbolic links for the input dir.
  89. traverseDirectory :: (s -> RawFilePath -> IO s) -> s -> RawFilePath -> IO s
  90. traverseDirectory act s0 topdir = toploop
  91. where
  92. toploop = do
  93. isDir <- isDirectory <$> getFileStatus topdir
  94. s' <- act s0 topdir
  95. if isDir then actOnDirContents topdir s' loop
  96. else return s'
  97. loop typ path acc = do
  98. isDir <- case () of
  99. () | typ == dtDir -> return True
  100. | typ == dtUnknown -> isDirectory <$> getFileStatus path
  101. | otherwise -> return False
  102. if isDir
  103. then act acc path >>= \acc' -> actOnDirContents path acc' loop
  104. else act acc path
  105. actOnDirContents :: RawFilePath
  106. -> b
  107. -> (DirType -> RawFilePath -> b -> IO b)
  108. -> IO b
  109. actOnDirContents pathRelToTop b f =
  110. modifyIOError ((`ioeSetFileName` (BS.unpack pathRelToTop)) .
  111. (`ioeSetLocation` "findBSTypRel")) $
  112. bracket
  113. (openDirStream pathRelToTop)
  114. Posix.closeDirStream
  115. (\dirp -> loop dirp b)
  116. where
  117. loop dirp b' = do
  118. (typ,e) <- readDirEnt dirp
  119. if (e == "")
  120. then return b'
  121. else
  122. if (e == "." || e == "..")
  123. then loop dirp b'
  124. else f typ (pathRelToTop </> e) b' >>= loop dirp
  125. ----------------------------------------------------------
  126. -- dodgy stuff
  127. type CDir = ()
  128. type CDirent = ()
  129. -- Posix doesn't export DirStream, so to re-use that type we need to use
  130. -- unsafeCoerce. It's just a newtype, so this is a legitimate usage.
  131. -- ugly trick.
  132. unpackDirStream :: DirStream -> Ptr CDir
  133. unpackDirStream = unsafeCoerce
  134. packDirStream :: Ptr CDir -> DirStream
  135. packDirStream = unsafeCoerce
  136. -- the __hscore_* functions are defined in the unix package. We can import them and let
  137. -- the linker figure it out.
  138. foreign import ccall unsafe "__hscore_readdir"
  139. c_readdir :: Ptr CDir -> Ptr (Ptr CDirent) -> IO CInt
  140. foreign import ccall unsafe "__hscore_free_dirent"
  141. c_freeDirEnt :: Ptr CDirent -> IO ()
  142. foreign import ccall unsafe "__hscore_d_name"
  143. c_name :: Ptr CDirent -> IO CString
  144. foreign import ccall unsafe "__posixdir_d_type"
  145. c_type :: Ptr CDirent -> IO DirType
  146. foreign import ccall "realpath"
  147. c_realpath :: CString -> CString -> IO CString
  148. foreign import ccall unsafe "fdopendir"
  149. c_fdopendir :: Posix.Fd -> IO (Ptr ())
  150. ----------------------------------------------------------
  151. -- less dodgy but still lower-level
  152. readDirEnt :: DirStream -> IO (DirType, RawFilePath)
  153. readDirEnt (unpackDirStream -> dirp) =
  154. alloca $ \ptr_dEnt -> loop ptr_dEnt
  155. where
  156. loop ptr_dEnt = do
  157. resetErrno
  158. r <- c_readdir dirp ptr_dEnt
  159. if (r == 0)
  160. then do
  161. dEnt <- peek ptr_dEnt
  162. if (dEnt == nullPtr)
  163. then return (dtUnknown,BS.empty)
  164. else do
  165. dName <- c_name dEnt >>= peekFilePath
  166. dType <- c_type dEnt
  167. c_freeDirEnt dEnt
  168. return (dType, dName)
  169. else do
  170. errno <- getErrno
  171. if (errno == eINTR)
  172. then loop ptr_dEnt
  173. else do
  174. let (Errno eo) = errno
  175. if (eo == 0)
  176. then return (dtUnknown,BS.empty)
  177. else throwErrno "readDirEnt"
  178. -- |Gets all directory contents (not recursively).
  179. getDirectoryContents :: RawFilePath -> IO [(DirType, RawFilePath)]
  180. getDirectoryContents path =
  181. modifyIOError ((`ioeSetFileName` (BS.unpack path)) .
  182. (`ioeSetLocation` "System.Posix.Directory.Traversals.getDirectoryContents")) $
  183. bracket
  184. (PosixBS.openDirStream path)
  185. PosixBS.closeDirStream
  186. _dirloop
  187. -- |Binding to @fdopendir(3)@.
  188. fdOpendir :: Posix.Fd -> IO DirStream
  189. fdOpendir fd =
  190. packDirStream <$> throwErrnoIfNull "fdOpendir" (c_fdopendir fd)
  191. -- |Like `getDirectoryContents` except for a file descriptor.
  192. --
  193. -- To avoid complicated error checks, the file descriptor is
  194. -- __always__ closed, even if `fdOpendir` fails. Usually, this
  195. -- only happens on successful `fdOpendir` and after the directory
  196. -- stream is closed. Also see the manpage of @fdopendir(3)@ for
  197. -- more details.
  198. getDirectoryContents' :: Posix.Fd -> IO [(DirType, RawFilePath)]
  199. getDirectoryContents' fd = do
  200. dirstream <- fdOpendir fd `catchIOError` \e -> do
  201. closeFd fd
  202. ioError e
  203. -- closeDirStream closes the filedescriptor
  204. finally (_dirloop dirstream) (PosixBS.closeDirStream dirstream)
  205. _dirloop :: DirStream -> IO [(DirType, RawFilePath)]
  206. {-# INLINE _dirloop #-}
  207. _dirloop dirp = do
  208. t@(_typ,e) <- readDirEnt dirp
  209. if BS.null e then return [] else do
  210. es <- _dirloop dirp
  211. return (t:es)
  212. -- | return the canonicalized absolute pathname
  213. --
  214. -- like canonicalizePath, but uses @realpath(3)@
  215. realpath :: RawFilePath -> IO RawFilePath
  216. realpath inp =
  217. allocaBytes pathMax $ \tmp -> do
  218. void $ BS.useAsCString inp $ \cstr -> throwErrnoIfNull "realpath" $ c_realpath cstr tmp
  219. BS.packCString tmp