ghcup-hs/lib/GHCup/Download.hs

506 lines
17 KiB
Haskell
Raw Normal View History

{-# LANGUAGE CPP #-}
2020-01-11 20:15:05 +00:00
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
2020-03-21 21:19:37 +00:00
{-# LANGUAGE OverloadedStrings #-}
2020-01-11 20:15:05 +00:00
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
2020-03-21 21:19:37 +00:00
{-# LANGUAGE TypeFamilies #-}
2020-01-11 20:15:05 +00:00
2020-07-21 23:08:58 +00:00
{-|
Module : GHCup.Download
Description : Downloading
Copyright : (c) Julian Ospald, 2020
2020-07-30 18:04:02 +00:00
License : LGPL-3.0
2020-07-21 23:08:58 +00:00
Maintainer : hasufell@hasufell.de
Stability : experimental
Portability : POSIX
Module for handling all download related functions.
Generally we support downloading via:
- curl (default)
- wget
- internal downloader (only when compiled)
-}
2020-01-11 20:15:05 +00:00
module GHCup.Download where
#if defined(INTERNAL_DOWNLOADER)
import GHCup.Download.IOStreams
import GHCup.Download.Utils
#endif
2020-01-11 20:15:05 +00:00
import GHCup.Errors
import GHCup.Types
import GHCup.Types.JSON ( )
import GHCup.Types.Optics
import GHCup.Utils
import GHCup.Utils.File
import GHCup.Utils.Prelude
import GHCup.Version
2020-01-11 20:15:05 +00:00
import Control.Applicative
import Control.Exception.Safe
import Control.Monad
2020-04-09 17:53:22 +00:00
#if !MIN_VERSION_base(4,13,0)
import Control.Monad.Fail ( MonadFail )
#endif
2020-01-11 20:15:05 +00:00
import Control.Monad.Logger
import Control.Monad.Reader
import Control.Monad.Trans.Resource
hiding ( throwM )
import Data.Aeson
2020-08-09 15:39:02 +00:00
import Data.Bifunctor
2020-01-11 20:15:05 +00:00
import Data.ByteString ( ByteString )
2020-04-29 17:36:16 +00:00
#if defined(INTERNAL_DOWNLOADER)
2020-01-11 20:15:05 +00:00
import Data.CaseInsensitive ( CI )
2020-04-09 17:53:22 +00:00
#endif
2020-01-11 20:15:05 +00:00
import Data.Maybe
import Data.String.Interpolate
import Data.Time.Clock
import Data.Time.Clock.POSIX
#if defined(INTERNAL_DOWNLOADER)
2020-01-11 20:15:05 +00:00
import Data.Time.Format
2020-04-09 17:53:22 +00:00
#endif
2020-01-11 20:15:05 +00:00
import Data.Versions
2020-04-29 17:12:58 +00:00
import Data.Word8
2020-01-11 20:15:05 +00:00
import GHC.IO.Exception
import HPath
import HPath.IO as HIO hiding ( hideError )
2020-01-11 20:15:05 +00:00
import Haskus.Utils.Variant.Excepts
import Optics
import Prelude hiding ( abs
, readFile
, writeFile
)
import System.IO.Error
2020-04-29 17:12:58 +00:00
import System.Posix.Env.ByteString ( getEnv )
2020-01-11 20:15:05 +00:00
import URI.ByteString
2020-04-09 16:27:07 +00:00
import qualified Crypto.Hash.SHA256 as SHA256
2020-04-29 17:12:58 +00:00
import qualified Data.ByteString as BS
2020-04-09 16:27:07 +00:00
import qualified Data.ByteString.Base16 as B16
2020-01-11 20:15:05 +00:00
import qualified Data.ByteString.Lazy as L
#if defined(INTERNAL_DOWNLOADER)
2020-01-11 20:15:05 +00:00
import qualified Data.CaseInsensitive as CI
import qualified Data.Map.Strict as M
import qualified Data.Text as T
2020-04-09 17:53:22 +00:00
#endif
2020-01-11 20:15:05 +00:00
import qualified Data.Text.Encoding as E
2020-08-09 15:39:02 +00:00
import qualified Data.Yaml as Y
2020-01-11 20:15:05 +00:00
import qualified System.Posix.Files.ByteString as PF
import qualified System.Posix.RawFilePath.Directory
as RD
------------------
--[ High-level ]--
------------------
2020-04-27 21:23:34 +00:00
-- | Like 'getDownloads', but tries to fall back to
2020-08-09 15:39:02 +00:00
-- cached ~/.ghcup/cache/ghcup-<format-ver>.yaml
2020-04-27 21:23:34 +00:00
getDownloadsF :: ( FromJSONKey Tool
, FromJSONKey Version
, FromJSON VersionInfo
, MonadIO m
, MonadCatch m
, MonadLogger m
, MonadThrow m
, MonadFail m
2020-04-29 17:12:58 +00:00
, MonadReader Settings m
2020-04-27 21:23:34 +00:00
)
=> URLSource
-> Excepts
'[JSONError , DownloadFailed , FileDoesNotExistError]
m
GHCupInfo
getDownloadsF urlSource = do
case urlSource of
GHCupURL ->
liftE
$ handleIO (\_ -> readFromCache)
$ catchE @_ @'[JSONError , FileDoesNotExistError]
(\(DownloadFailed _) -> readFromCache)
$ getDownloads urlSource
(OwnSource _) -> liftE $ getDownloads urlSource
(OwnSpec _) -> liftE $ getDownloads urlSource
where
readFromCache = do
Settings {dirs = Dirs {..}} <- lift ask
2020-04-27 21:23:34 +00:00
lift $ $(logWarn)
[i|Could not get download info, trying cached version (this may not be recent!)|]
let path = view pathL' ghcupURL
2020-08-09 15:39:02 +00:00
yaml_file <- (cacheDir </>) <$> urlBaseName path
2020-04-27 21:23:34 +00:00
bs <-
handleIO' NoSuchThing
2020-08-09 15:39:02 +00:00
(\_ -> throwE $ FileDoesNotExistError (toFilePath yaml_file))
2020-04-27 21:23:34 +00:00
$ liftIO
2020-08-09 15:39:02 +00:00
$ readFile yaml_file
lE' JSONDecodeError $ bimap show id $ Y.decodeEither' (L.toStrict bs)
2020-04-27 21:23:34 +00:00
2020-01-11 20:15:05 +00:00
-- | Downloads the download information! But only if we need to ;P
getDownloads :: ( FromJSONKey Tool
, FromJSONKey Version
, FromJSON VersionInfo
, MonadIO m
, MonadCatch m
, MonadLogger m
, MonadThrow m
, MonadFail m
2020-04-29 17:12:58 +00:00
, MonadReader Settings m
2020-01-11 20:15:05 +00:00
)
2020-03-17 17:39:01 +00:00
=> URLSource
2020-04-10 15:36:27 +00:00
-> Excepts '[JSONError , DownloadFailed] m GHCupInfo
2020-03-17 17:39:01 +00:00
getDownloads urlSource = do
2020-01-11 20:15:05 +00:00
lift $ $(logDebug) [i|Receiving download info from: #{urlSource}|]
case urlSource of
GHCupURL -> do
2020-03-09 19:49:10 +00:00
bs <- reThrowAll DownloadFailed $ smartDl ghcupURL
2020-08-09 15:39:02 +00:00
lE' JSONDecodeError $ bimap show id $ Y.decodeEither' (L.toStrict bs)
2020-01-11 20:15:05 +00:00
(OwnSource url) -> do
2020-03-09 19:49:10 +00:00
bs <- reThrowAll DownloadFailed $ downloadBS url
2020-08-09 15:39:02 +00:00
lE' JSONDecodeError $ bimap show id $ Y.decodeEither' (L.toStrict bs)
2020-01-11 20:15:05 +00:00
(OwnSpec av) -> pure $ av
where
-- First check if the json file is in the ~/.ghcup/cache dir
-- and check it's access time. If it has been accessed within the
-- last 5 minutes, just reuse it.
--
-- If not, then send a HEAD request and check for modification time.
-- Only download the file if the modification time is newer
-- than the local file.
--
-- Always save the local file with the mod time of the remote file.
2020-03-09 19:49:10 +00:00
smartDl :: forall m1
2020-04-29 17:12:58 +00:00
. ( MonadCatch m1
, MonadIO m1
, MonadFail m1
, MonadLogger m1
, MonadReader Settings m1
)
2020-03-17 17:39:01 +00:00
=> URI
-> Excepts
'[ FileDoesNotExistError
, HTTPStatusError
, URIParseError
, UnsupportedScheme
, NoLocationHeader
, TooManyRedirs
, ProcessError
2020-03-17 17:39:01 +00:00
]
m1
L.ByteString
2020-03-09 19:49:10 +00:00
smartDl uri' = do
Settings {dirs = Dirs {..}} <- lift ask
2020-01-11 20:15:05 +00:00
let path = view pathL' uri'
2020-03-17 18:16:21 +00:00
json_file <- (cacheDir </>) <$> urlBaseName path
e <- liftIO $ doesFileExist json_file
2020-01-11 20:15:05 +00:00
if e
then do
accessTime <-
PF.accessTimeHiRes
<$> (liftIO $ PF.getFileStatus (toFilePath json_file))
currentTime <- liftIO $ getPOSIXTime
-- access time won't work on most linuxes, but we can try regardless
if (currentTime - accessTime) > 300
then do -- no access in last 5 minutes, re-check upstream mod time
getModTime >>= \case
Just modTime -> do
fileMod <- liftIO $ getModificationTime json_file
if modTime > fileMod
2020-04-27 21:23:34 +00:00
then dlWithMod modTime json_file
2020-01-11 20:15:05 +00:00
else liftIO $ readFile json_file
Nothing -> do
lift $ $(logDebug) [i|Unable to get/parse Last-Modified header|]
2020-04-27 21:23:34 +00:00
dlWithoutMod json_file
2020-01-11 20:15:05 +00:00
else -- access in less than 5 minutes, re-use file
liftIO $ readFile json_file
else do
liftIO $ createDirRecursive' cacheDir
2020-01-11 20:15:05 +00:00
getModTime >>= \case
2020-04-27 21:23:34 +00:00
Just modTime -> dlWithMod modTime json_file
2020-01-11 20:15:05 +00:00
Nothing -> do
2020-04-27 21:23:34 +00:00
-- although we don't know last-modified, we still save
-- it to a file, so we might use it in offline mode
lift $ $(logDebug) [i|Unable to get/parse Last-Modified header|]
2020-04-27 21:23:34 +00:00
dlWithoutMod json_file
2020-01-11 20:15:05 +00:00
where
2020-04-27 21:23:34 +00:00
dlWithMod modTime json_file = do
bs <- liftE $ downloadBS uri'
liftIO $ writeFileWithModTime modTime json_file bs
pure bs
dlWithoutMod json_file = do
bs <- liftE $ downloadBS uri'
liftIO $ hideError doesNotExistErrorType $ deleteFile json_file
liftIO $ writeFileL json_file (Just newFilePerms) bs
liftIO $ setModificationTime json_file (fromIntegral @Int 0)
pure bs
2020-01-11 20:15:05 +00:00
getModTime = do
#if !defined(INTERNAL_DOWNLOADER)
pure Nothing
#else
2020-01-11 20:15:05 +00:00
headers <-
handleIO (\_ -> pure mempty)
$ liftE
$ ( catchAllE
(\_ ->
pure mempty :: Excepts '[] m1 (M.Map (CI ByteString) ByteString)
)
$ getHead uri'
)
pure $ parseModifiedHeader headers
parseModifiedHeader :: (M.Map (CI ByteString) ByteString) -> Maybe UTCTime
parseModifiedHeader headers =
2020-03-21 21:19:37 +00:00
(M.lookup (CI.mk "Last-Modified") headers) >>= \h -> parseTimeM
2020-01-11 20:15:05 +00:00
True
defaultTimeLocale
"%a, %d %b %Y %H:%M:%S %Z"
2020-04-17 07:30:45 +00:00
(T.unpack . decUTF8Safe $ h)
2020-01-11 20:15:05 +00:00
2020-04-09 17:53:22 +00:00
#endif
2020-01-11 20:15:05 +00:00
writeFileWithModTime :: UTCTime -> Path Abs -> L.ByteString -> IO ()
writeFileWithModTime utctime path content = do
let mod_time = utcTimeToPOSIXSeconds utctime
writeFileL path (Just newFilePerms) content
setModificationTimeHiRes path mod_time
2020-04-10 17:27:17 +00:00
getDownloadInfo :: Tool
2020-01-11 20:15:05 +00:00
-> Version
-- ^ tool version
2020-04-10 17:27:17 +00:00
-> PlatformRequest
-> GHCupDownloads
-> Either NoDownload DownloadInfo
getDownloadInfo t v (PlatformRequest a p mv) dls = maybe
2020-01-11 20:15:05 +00:00
(Left NoDownload)
Right
(case p of
-- non-musl won't work on alpine
Linux Alpine -> with_distro <|> without_distro_ver
_ -> with_distro <|> without_distro_ver <|> without_distro)
2020-01-11 20:15:05 +00:00
where
with_distro = distro_preview id id
without_distro_ver = distro_preview id (const Nothing)
without_distro = distro_preview (set _Linux UnknownLinux) (const Nothing)
distro_preview f g =
preview (ix t % ix v % viArch % ix a % ix (f p) % ix (g mv)) dls
-- | Tries to download from the given http or https url
-- and saves the result in continuous memory into a file.
-- If the filename is not provided, then we:
-- 1. try to guess the filename from the url path
-- 2. otherwise create a random file
--
-- The file must not exist.
download :: ( MonadMask m
, MonadReader Settings m
, MonadThrow m
, MonadLogger m
, MonadIO m
)
=> DownloadInfo
-> Path Abs -- ^ destination dir
-> Maybe (Path Rel) -- ^ optional filename
-> Excepts '[DigestError , DownloadFailed] m (Path Abs)
download dli dest mfn
2020-03-21 21:19:37 +00:00
| scheme == "https" = dl
| scheme == "http" = dl
| scheme == "file" = cp
2020-01-11 20:15:05 +00:00
| otherwise = throwE $ DownloadFailed (variantFromValue UnsupportedScheme)
where
scheme = view (dlUri % uriSchemeL' % schemeBSL') dli
cp = do
-- destination dir must exist
liftIO $ createDirRecursive' dest
2020-01-11 20:15:05 +00:00
destFile <- getDestFile
fromFile <- parseAbs path
liftIO $ copyFile fromFile destFile Strict
pure destFile
dl = do
2020-04-17 07:30:45 +00:00
let uri' = decUTF8Safe (serializeURIRef' (view dlUri dli))
2020-01-11 20:15:05 +00:00
lift $ $(logInfo) [i|downloading: #{uri'}|]
-- destination dir must exist
liftIO $ createDirRecursive' dest
2020-01-11 20:15:05 +00:00
destFile <- getDestFile
-- download
flip onException
(liftIO $ hideError doesNotExistErrorType $ deleteFile destFile)
$ catchAllE @_ @'[ProcessError, DownloadFailed, UnsupportedScheme]
(\e ->
(liftIO $ hideError doesNotExistErrorType $ deleteFile destFile)
>> (throwE . DownloadFailed $ e)
) $ do
2020-04-29 17:12:58 +00:00
lift getDownloader >>= \case
Curl -> do
o' <- liftIO getCurlOpts
liftE $ lEM @_ @'[ProcessError] $ liftIO $ exec "curl" True
(o' ++ ["-fL", "-o", toFilePath destFile, serializeURIRef' $ view dlUri dli]) Nothing Nothing
Wget -> do
o' <- liftIO getWgetOpts
liftE $ lEM @_ @'[ProcessError] $ liftIO $ exec "wget" True
(o' ++ ["-O", toFilePath destFile , serializeURIRef' $ view dlUri dli]) Nothing Nothing
#if defined(INTERNAL_DOWNLOADER)
Internal -> do
(https, host, fullPath, port) <- liftE $ uriToQuadruple (view dlUri dli)
liftE $ downloadToFile https host fullPath port destFile
#endif
2020-01-11 20:15:05 +00:00
liftE $ checkDigest dli destFile
pure destFile
-- Manage to find a file we can write the body into.
getDestFile :: MonadThrow m => m (Path Abs)
getDestFile = maybe (urlBaseName path <&> (dest </>)) (pure . (dest </>)) mfn
path = view (dlUri % pathL') dli
-- | Download into tmpdir or use cached version, if it exists. If filename
-- is omitted, infers the filename from the url.
downloadCached :: ( MonadMask m
, MonadResource m
, MonadThrow m
, MonadLogger m
, MonadIO m
, MonadReader Settings m
)
=> DownloadInfo
-> Maybe (Path Rel) -- ^ optional filename
-> Excepts '[DigestError , DownloadFailed] m (Path Abs)
downloadCached dli mfn = do
cache <- lift getCache
case cache of
True -> do
Settings {dirs = Dirs {..}} <- lift ask
2020-01-11 20:15:05 +00:00
fn <- maybe (urlBaseName $ view (dlUri % pathL') dli) pure mfn
let cachfile = cacheDir </> fn
2020-01-11 20:15:05 +00:00
fileExists <- liftIO $ doesFileExist cachfile
if
| fileExists -> do
liftE $ checkDigest dli cachfile
pure $ cachfile
| otherwise -> liftE $ download dli cacheDir mfn
2020-01-11 20:15:05 +00:00
False -> do
tmp <- lift withGHCupTmpDir
liftE $ download dli tmp mfn
------------------
--[ Low-level ]--
------------------
2020-01-11 20:15:05 +00:00
-- | This is used for downloading the JSON.
2020-04-29 17:12:58 +00:00
downloadBS :: (MonadReader Settings m, MonadCatch m, MonadIO m, MonadLogger m)
2020-01-11 20:15:05 +00:00
=> URI
-> Excepts
'[ FileDoesNotExistError
, HTTPStatusError
, URIParseError
, UnsupportedScheme
, NoLocationHeader
, TooManyRedirs
, ProcessError
2020-01-11 20:15:05 +00:00
]
m
L.ByteString
downloadBS uri'
2020-03-21 21:19:37 +00:00
| scheme == "https"
2020-01-11 20:15:05 +00:00
= dl True
2020-03-21 21:19:37 +00:00
| scheme == "http"
2020-01-11 20:15:05 +00:00
= dl False
2020-03-21 21:19:37 +00:00
| scheme == "file"
2020-01-11 20:15:05 +00:00
= liftIOException doesNotExistErrorType (FileDoesNotExistError path)
$ (liftIO $ RD.readFile path)
| otherwise
= throwE UnsupportedScheme
where
scheme = view (uriSchemeL' % schemeBSL') uri'
path = view pathL' uri'
2020-04-29 17:12:58 +00:00
#if defined(INTERNAL_DOWNLOADER)
dl https = do
#else
2020-04-09 17:53:22 +00:00
dl _ = do
2020-04-29 17:12:58 +00:00
#endif
2020-04-27 21:23:34 +00:00
lift $ $(logDebug) [i|downloading: #{serializeURIRef' uri'}|]
2020-04-29 17:12:58 +00:00
lift getDownloader >>= \case
Curl -> do
o' <- liftIO getCurlOpts
let exe = [rel|curl|]
args = o' ++ ["-sSfL", serializeURIRef' uri']
liftIO (executeOut exe args Nothing) >>= \case
CapturedProcess ExitSuccess stdout _ -> do
pure $ L.fromStrict stdout
CapturedProcess (ExitFailure i') _ _ -> throwE $ NonZeroExit i' (toFilePath exe) args
Wget -> do
o' <- liftIO getWgetOpts
let exe = [rel|wget|]
args = o' ++ ["-qO-", serializeURIRef' uri']
liftIO (executeOut exe args Nothing) >>= \case
CapturedProcess ExitSuccess stdout _ -> do
pure $ L.fromStrict stdout
CapturedProcess (ExitFailure i') _ _ -> throwE $ NonZeroExit i' (toFilePath exe) args
#if defined(INTERNAL_DOWNLOADER)
Internal -> do
(_, host', fullPath', port') <- liftE $ uriToQuadruple uri'
liftE $ downloadBS' https host' fullPath' port'
#endif
2020-01-11 20:15:05 +00:00
2020-04-17 07:30:45 +00:00
checkDigest :: (MonadIO m, MonadThrow m, MonadLogger m, MonadReader Settings m)
2020-01-11 20:15:05 +00:00
=> DownloadInfo
-> Path Abs
-> Excepts '[DigestError] m ()
checkDigest dli file = do
verify <- lift ask <&> (not . noVerify)
when verify $ do
2020-04-17 07:30:45 +00:00
p' <- toFilePath <$> basename file
2020-03-17 00:59:23 +00:00
lift $ $(logInfo) [i|verifying digest of: #{p'}|]
2020-01-11 20:15:05 +00:00
c <- liftIO $ readFile file
2020-04-17 07:30:45 +00:00
cDigest <- throwEither . E.decodeUtf8' . B16.encode . SHA256.hashlazy $ c
let eDigest = view dlHash dli
2020-01-11 20:15:05 +00:00
when ((cDigest /= eDigest) && verify) $ throwE (DigestError cDigest eDigest)
2020-04-29 17:12:58 +00:00
-- | Get additional curl args from env. This is an undocumented option.
getCurlOpts :: IO [ByteString]
getCurlOpts =
getEnv "GHCUP_CURL_OPTS" >>= \case
Just r -> pure $ BS.split _space r
Nothing -> pure []
-- | Get additional wget args from env. This is an undocumented option.
getWgetOpts :: IO [ByteString]
getWgetOpts =
getEnv "GHCUP_WGET_OPTS" >>= \case
Just r -> pure $ BS.split _space r
Nothing -> pure []