From fdf45e0fe66721f77f4fda91e3418701d1960dac Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sat, 24 Jul 2021 16:36:31 +0200 Subject: [PATCH] Do etags hashing wrt #193 --- .gitlab/script/ghcup_version.sh | 57 ++++- app/ghcup-gen/Validate.hs | 4 +- lib/GHCup.hs | 2 +- lib/GHCup/Download.hs | 391 +++++++++++++++----------------- lib/GHCup/Download/IOStreams.hs | 125 +++------- lib/GHCup/Errors.hs | 24 +- lib/GHCup/Utils/File/Windows.hs | 14 ++ lib/GHCup/Utils/Prelude.hs | 36 +-- 8 files changed, 314 insertions(+), 339 deletions(-) diff --git a/.gitlab/script/ghcup_version.sh b/.gitlab/script/ghcup_version.sh index cd14874..714732f 100755 --- a/.gitlab/script/ghcup_version.sh +++ b/.gitlab/script/ghcup_version.sh @@ -12,6 +12,10 @@ ecabal() { cabal "$@" } +raw_eghcup() { + ghcup -v -c "$@" +} + eghcup() { if [ "${OS}" = "WINDOWS" ] ; then ghcup -v -c -s file:/$CI_PROJECT_DIR/ghcup-${JSON_VERSION}.yaml "$@" @@ -20,6 +24,12 @@ eghcup() { fi } +if [ "${OS}" = "WINDOWS" ] ; then + GHCUP_DIR="${GHCUP_INSTALL_BASE_PREFIX}"/ghcup +else + GHCUP_DIR="${GHCUP_INSTALL_BASE_PREFIX}"/.ghcup +fi + git describe --always ### build @@ -65,11 +75,7 @@ fi ### cleanup -if [ "${OS}" = "WINDOWS" ] ; then - rm -rf "${GHCUP_INSTALL_BASE_PREFIX}"/ghcup -else - rm -rf "${GHCUP_INSTALL_BASE_PREFIX}"/.ghcup -fi +rm -rf "${GHCUP_DIR}" ### manual cli based testing @@ -88,6 +94,7 @@ cabal --version eghcup debug-info +# also test etags eghcup list eghcup list -t ghc eghcup list -t cabal @@ -155,6 +162,40 @@ if [ "${OS}" = "LINUX" ] ; then fi fi +sha_sum() { + if [ "${OS}" = "FREEBSD" ] ; then + sha256 "$@" + else + sha256sum "$@" + fi + +} + +# test etags +rm -f "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml" +raw_eghcup -s https://www.haskell.org/ghcup/data/ghcup-${JSON_VERSION}.yaml list +# snapshot yaml and etags file +etag=$(cat "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml.etags") +sha=$(sha_sum "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml") +# invalidate access time timer, which is 5minutes, so we re-download +touch -a -m -t '199901010101' "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml" +# redownload same file with some newlines added +raw_eghcup -s https://www.haskell.org/ghcup/exp/ghcup-${JSON_VERSION}.yaml list +# snapshot new yaml and etags file +etag2=$(cat "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml.etags") +sha2=$(sha_sum "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml") +# compare +[ "${etag}" != "${etag2}" ] +[ "${sha}" != "${sha2}" ] +# invalidate access time timer, which is 5minutes, but don't expect a re-download +touch -a -m -t '199901010101' "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml" +# this time, we expect the same hash and etag +raw_eghcup -s https://www.haskell.org/ghcup/exp/ghcup-${JSON_VERSION}.yaml list +etag3=$(cat "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml.etags") +sha3=$(sha_sum "${GHCUP_DIR}/cache/ghcup-${JSON_VERSION}.yaml") +[ "${etag2}" = "${etag3}" ] +[ "${sha2}" = "${sha3}" ] + eghcup upgrade eghcup upgrade -f @@ -162,8 +203,4 @@ eghcup upgrade -f # nuke eghcup nuke -if [ "${OS}" = "WINDOWS" ] ; then - [ ! -e "${GHCUP_INSTALL_BASE_PREFIX}/ghcup" ] -else - [ ! -e "${GHCUP_INSTALL_BASE_PREFIX}/.ghcup" ] -fi +[ ! -e "${GHCUP_DIR}" ] diff --git a/app/ghcup-gen/Validate.hs b/app/ghcup-gen/Validate.hs index a8f414d..34a61b1 100644 --- a/app/ghcup-gen/Validate.hs +++ b/app/ghcup-gen/Validate.hs @@ -256,7 +256,7 @@ validateTarballs (TarballFilter etool versionRegex) dls gt = do case etool of Right (Just GHCup) -> do tmpUnpack <- lift mkGhcupTmpDir - _ <- liftE $ download dli tmpUnpack Nothing + _ <- liftE $ download (_dlUri dli) (Just (_dlHash dli)) tmpUnpack Nothing False pure Nothing Right _ -> do p <- liftE $ downloadCached dli Nothing @@ -266,7 +266,7 @@ validateTarballs (TarballFilter etool versionRegex) dls gt = do $ p Left ShimGen -> do tmpUnpack <- lift mkGhcupTmpDir - _ <- liftE $ download dli tmpUnpack Nothing + _ <- liftE $ download (_dlUri dli) (Just (_dlHash dli)) tmpUnpack Nothing False pure Nothing case r of VRight (Just basePath) -> do diff --git a/lib/GHCup.hs b/lib/GHCup.hs index a95d7ed..2bc9788 100644 --- a/lib/GHCup.hs +++ b/lib/GHCup.hs @@ -2137,7 +2137,7 @@ upgradeGHCup mtarget force' = do dli <- liftE $ getDownloadInfo GHCup latestVer tmp <- lift withGHCupTmpDir let fn = "ghcup" <> exeExt - p <- liftE $ download dli tmp (Just fn) + p <- liftE $ download (_dlUri dli) (Just (_dlHash dli)) tmp (Just fn) False let destDir = takeDirectory destFile destFile = fromMaybe (binDir fn) mtarget lift $ $(logDebug) [i|mkdir -p #{destDir}|] diff --git a/lib/GHCup/Download.hs b/lib/GHCup/Download.hs index c1046bd..16ddc08 100644 --- a/lib/GHCup/Download.hs +++ b/lib/GHCup/Download.hs @@ -55,20 +55,19 @@ import Data.Aeson import Data.Bifunctor import Data.ByteString ( ByteString ) #if defined(INTERNAL_DOWNLOADER) -import Data.CaseInsensitive ( CI ) +import Data.CaseInsensitive ( mk ) #endif import Data.List.Extra import Data.Maybe import Data.String.Interpolate import Data.Time.Clock import Data.Time.Clock.POSIX -#if defined(INTERNAL_DOWNLOADER) -import Data.Time.Format -#endif import Data.Versions -import Data.Word8 -import GHC.IO.Exception +import Data.Word8 hiding ( isSpace ) import Haskus.Utils.Variant.Excepts +#if defined(INTERNAL_DOWNLOADER) +import Network.Http.Client hiding ( URL ) +#endif import Optics import Prelude hiding ( abs , readFile @@ -76,8 +75,11 @@ import Prelude hiding ( abs ) import System.Directory import System.Environment +import System.Exit import System.FilePath import System.IO.Error +import System.IO.Temp +import Text.PrettyPrint.HughesPJClass ( prettyShow ) import URI.ByteString import qualified Crypto.Hash.SHA256 as SHA256 @@ -85,10 +87,8 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Lazy as L import qualified Data.Map.Strict as M -#if defined(INTERNAL_DOWNLOADER) -import qualified Data.CaseInsensitive as CI -#endif import qualified Data.Text as T +import qualified Data.Text.IO as T import qualified Data.Text.Encoding as E import qualified Data.Yaml as Y @@ -149,19 +149,14 @@ getDownloadsF = do in GHCupInfo tr newDownloads newGlobalTools -readFromCache :: ( MonadReader env m - , HasDirs env - , MonadIO m - , MonadCatch m) - => URI - -> Excepts '[JSONError, FileDoesNotExistError] m L.ByteString -readFromCache uri = do - Dirs{..} <- lift getDirs - let yaml_file = cacheDir (T.unpack . decUTF8Safe . urlBaseName . view pathL' $ uri) - handleIO' NoSuchThing (\_ -> throwE $ FileDoesNotExistError yaml_file) - . liftIO - . L.readFile - $ yaml_file +yamlFromCache :: (MonadReader env m, HasDirs env) => URI -> m FilePath +yamlFromCache uri = do + Dirs{..} <- getDirs + pure (cacheDir (T.unpack . decUTF8Safe . urlBaseName . view pathL' $ uri)) + + +etagsFile :: FilePath -> FilePath +etagsFile = (<.> "etags") getBase :: ( MonadReader env m @@ -174,33 +169,41 @@ getBase :: ( MonadReader env m , MonadMask m ) => URI - -> Excepts '[JSONError , FileDoesNotExistError] m GHCupInfo + -> Excepts '[JSONError] m GHCupInfo getBase uri = do Settings { noNetwork } <- lift getSettings - bs <- if noNetwork - then readFromCache uri - else handleIO (\_ -> warnCache >> readFromCache uri) - . catchE @_ @'[JSONError, FileDoesNotExistError] (\(DownloadFailed _) -> warnCache >> readFromCache uri) - . reThrowAll @_ @_ @'[JSONError, DownloadFailed] DownloadFailed - $ smartDl uri + yaml <- lift $ yamlFromCache uri + unless noNetwork $ + handleIO (\e -> warnCache (displayException e)) + . catchE @_ @_ @'[] (\e@(DownloadFailed _) -> warnCache (prettyShow e)) + . reThrowAll @_ @_ @'[DownloadFailed] DownloadFailed + . smartDl + $ uri liftE - . lE' @_ @_ @'[JSONError] JSONDecodeError - . first show - . Y.decodeEither' - . L.toStrict - $ bs + . onE_ (onError yaml) + . lEM' @_ @_ @'[JSONError] JSONDecodeError + . fmap (first (\e -> [i|#{displayException e} +Consider removing "#{yaml}" manually.|])) + . liftIO + . Y.decodeFileEither + $ yaml where - warnCache = lift $ $(logWarn) - [i|Could not get download info, trying cached version (this may not be recent!)|] + -- On error, remove the etags file and set access time to 0. This should ensure the next invocation + -- may re-download and succeed. + onError :: (MonadLogger m, MonadMask m, MonadCatch m, MonadIO m) => FilePath -> m () + onError fp = do + let efp = etagsFile fp + handleIO (\e -> $(logWarn) [i|Couldn't remove file #{efp}, error was: #{displayException e}|]) + (hideError doesNotExistErrorType $ rmFile efp) + liftIO $ hideError doesNotExistErrorType $ setAccessTime fp (posixSecondsToUTCTime (fromIntegral @Int 0)) + warnCache s = do + lift $ $(logWarn) [i|Could not get download info, trying cached version (this may not be recent!)|] + lift $ $(logDebug) [i|Error was: #{s}|] -- First check if the json file is in the ~/.ghcup/cache dir -- and check it's access time. If it has been accessed within the -- last 5 minutes, just reuse it. -- - -- If not, then send a HEAD request and check for modification time. - -- Only download the file if the modification time is newer - -- than the local file. - -- -- Always save the local file with the mod time of the remote file. smartDl :: forall m1 env1 . ( MonadReader env1 m1 @@ -214,92 +217,32 @@ getBase uri = do ) => URI -> Excepts - '[ FileDoesNotExistError - , HTTPStatusError - , URIParseError - , UnsupportedScheme - , NoLocationHeader - , TooManyRedirs - , ProcessError - , NoNetwork + '[ DownloadFailed + , DigestError ] m1 - L.ByteString + () smartDl uri' = do - Dirs{..} <- lift getDirs - let path = view pathL' uri' - let json_file = cacheDir (T.unpack . decUTF8Safe . urlBaseName $ path) + json_file <- lift $ yamlFromCache uri' e <- liftIO $ doesFileExist json_file + currentTime <- liftIO getCurrentTime if e then do accessTime <- liftIO $ getAccessTime json_file - currentTime <- liftIO getCurrentTime -- access time won't work on most linuxes, but we can try regardless - if (utcTimeToPOSIXSeconds currentTime - utcTimeToPOSIXSeconds accessTime) > 300 - then do -- no access in last 5 minutes, re-check upstream mod time - getModTime >>= \case - Just modTime -> do - fileMod <- liftIO $ getModificationTime json_file - if modTime > fileMod - then dlWithMod modTime json_file - else liftIO $ L.readFile json_file - Nothing -> do - lift $ $(logDebug) [i|Unable to get/parse Last-Modified header|] - dlWithoutMod json_file - else -- access in less than 5 minutes, re-use file - liftIO $ L.readFile json_file - else do - getModTime >>= \case - Just modTime -> dlWithMod modTime json_file - Nothing -> do - -- although we don't know last-modified, we still save - -- it to a file, so we might use it in offline mode - lift $ $(logDebug) [i|Unable to get/parse Last-Modified header|] - dlWithoutMod json_file - + when ((utcTimeToPOSIXSeconds currentTime - utcTimeToPOSIXSeconds accessTime) > 300) $ + -- no access in last 5 minutes, re-check upstream mod time + dlWithMod currentTime json_file + else + dlWithMod currentTime json_file where dlWithMod modTime json_file = do - bs <- liftE $ downloadBS uri' - liftIO $ writeFileWithModTime modTime json_file bs - pure bs - dlWithoutMod json_file = do - bs <- liftE $ downloadBS uri' - lift $ hideError doesNotExistErrorType $ recycleFile json_file - liftIO $ L.writeFile json_file bs - liftIO $ setModificationTime json_file (posixSecondsToUTCTime (fromIntegral @Int 0)) - pure bs - - - getModTime = do -#if !defined(INTERNAL_DOWNLOADER) - pure Nothing -#else - headers <- - handleIO (\_ -> pure mempty) - $ liftE - $ ( catchAllE - (\_ -> - pure mempty :: Excepts '[] m1 (M.Map (CI ByteString) ByteString) - ) - $ getHead uri' - ) - pure $ parseModifiedHeader headers - - parseModifiedHeader :: (M.Map (CI ByteString) ByteString) -> Maybe UTCTime - parseModifiedHeader headers = - (M.lookup (CI.mk "Last-Modified") headers) >>= \h -> parseTimeM - True - defaultTimeLocale - "%a, %d %b %Y %H:%M:%S %Z" - (T.unpack . decUTF8Safe $ h) - -#endif - - writeFileWithModTime :: UTCTime -> FilePath -> L.ByteString -> IO () - writeFileWithModTime utctime path content = do - L.writeFile path content - setModificationTime path utctime + let (dir, fn) = splitFileName json_file + f <- liftE $ download uri' Nothing dir (Just fn) True + liftIO $ setModificationTime f modTime + liftIO $ setAccessTime f modTime + getDownloadInfo :: ( MonadReader env m @@ -359,32 +302,32 @@ download :: ( MonadReader env m , MonadLogger m , MonadIO m ) - => DownloadInfo + => URI + -> Maybe T.Text -- ^ expected hash -> FilePath -- ^ destination dir -> Maybe FilePath -- ^ optional filename + -> Bool -- ^ whether to read an write etags -> Excepts '[DigestError , DownloadFailed] m FilePath -download dli dest mfn +download uri eDigest dest mfn etags | scheme == "https" = dl | scheme == "http" = dl | scheme == "file" = cp | otherwise = throwE $ DownloadFailed (variantFromValue UnsupportedScheme) where - scheme = view (dlUri % uriSchemeL' % schemeBSL') dli + scheme = view (uriSchemeL' % schemeBSL') uri cp = do -- destination dir must exist liftIO $ createDirRecursive' dest - let destFile = getDestFile let fromFile = T.unpack . decUTF8Safe $ path liftIO $ copyFile fromFile destFile pure destFile dl = do - let uri' = decUTF8Safe (serializeURIRef' (view dlUri dli)) + let uri' = decUTF8Safe (serializeURIRef' uri) lift $ $(logInfo) [i|downloading: #{uri'}|] -- destination dir must exist liftIO $ createDirRecursive' dest - let destFile = getDestFile -- download flip onException @@ -399,28 +342,134 @@ download dli dest mfn case downloader of Curl -> do o' <- liftIO getCurlOpts - liftE $ lEM @_ @'[ProcessError] $ exec "curl" - (o' ++ ["-fL", "-o", destFile, (T.unpack . decUTF8Safe) $ serializeURIRef' $ view dlUri dli]) Nothing Nothing + if etags + then do + dh <- liftIO $ emptySystemTempFile "curl-header" + flip finally (try @_ @SomeException $ rmFile dh) $ + flip finally (try @_ @SomeException $ rmFile (destFile <.> "tmp")) $ do + metag <- readETag destFile + liftE $ lEM @_ @'[ProcessError] $ exec "curl" + (o' ++ (if etags then ["--dump-header", dh] else []) + ++ maybe [] (\t -> ["-H", [i|If-None-Match: #{t}|]]) metag + ++ ["-fL", "-o", destFile <.> "tmp", T.unpack uri']) Nothing Nothing + headers <- liftIO $ T.readFile dh + + -- this nonsense is necessary, because some older versions of curl would overwrite + -- the destination file when 304 is returned + case fmap T.words . listToMaybe . fmap T.strip . T.lines $ headers of + Just (http':sc:_) + | sc == "304" + , T.pack "HTTP" `T.isPrefixOf` http' -> $logDebug [i|Status code was 304, not overwriting|] + | T.pack "HTTP" `T.isPrefixOf` http' -> do + $logDebug [i|Status code was #{sc}, overwriting|] + liftIO $ copyFile (destFile <.> "tmp") destFile + _ -> liftE $ throwE @_ @'[DownloadFailed] (DownloadFailed (toVariantAt @0 (MalformedHeaders headers) + :: V '[MalformedHeaders])) + + writeEtags (parseEtags headers) + else + liftE $ lEM @_ @'[ProcessError] $ exec "curl" + (o' ++ ["-fL", "-o", destFile, T.unpack uri']) Nothing Nothing Wget -> do - o' <- liftIO getWgetOpts - liftE $ lEM @_ @'[ProcessError] $ exec "wget" - (o' ++ ["-O", destFile , (T.unpack . decUTF8Safe) $ serializeURIRef' $ view dlUri dli]) Nothing Nothing + destFileTemp <- liftIO $ emptySystemTempFile "wget-tmp" + flip finally (try @_ @SomeException $ rmFile destFileTemp) $ do + o' <- liftIO getWgetOpts + if etags + then do + metag <- readETag destFile + let opts = o' ++ maybe [] (\t -> ["--header", [i|If-None-Match: #{t}|]]) metag + ++ ["-q", "-S", "-O", destFileTemp , T.unpack uri'] + CapturedProcess {_exitCode, _stdErr} <- lift $ executeOut "wget" opts Nothing + case _exitCode of + ExitSuccess -> do + liftIO $ copyFile destFileTemp destFile + writeEtags (parseEtags (decUTF8Safe' _stdErr)) + ExitFailure i' + | i' == 8 + , Just _ <- find (T.pack "304 Not Modified" `T.isInfixOf`) . T.lines . decUTF8Safe' $ _stdErr + -> do + $logDebug "Not modified, skipping download" + writeEtags (parseEtags (decUTF8Safe' _stdErr)) + | otherwise -> throwE (NonZeroExit i' "wget" opts) + else do + let opts = o' ++ ["-O", destFileTemp , T.unpack uri'] + liftE $ lEM @_ @'[ProcessError] $ exec "wget" opts Nothing Nothing + liftIO $ copyFile destFileTemp destFile #if defined(INTERNAL_DOWNLOADER) Internal -> do - (https, host, fullPath, port) <- liftE $ uriToQuadruple (view dlUri dli) - liftE $ downloadToFile https host fullPath port destFile + (https, host, fullPath, port) <- liftE $ uriToQuadruple uri + if etags + then do + metag <- readETag destFile + let addHeaders = maybe mempty (\etag -> M.fromList [ (mk . E.encodeUtf8 . T.pack $ "If-None-Match" + , E.encodeUtf8 etag)]) metag + liftE + $ catchE @HTTPNotModified @'[DownloadFailed] @'[] (\(HTTPNotModified etag) -> lift $ writeEtags (pure $ Just etag)) + $ do + r <- downloadToFile https host fullPath port destFile addHeaders + writeEtags (pure $ decUTF8Safe <$> getHeader r "etag") + else void $ liftE $ catchE @HTTPNotModified + @'[DownloadFailed] + (\e@(HTTPNotModified _) -> + throwE @_ @'[DownloadFailed] (DownloadFailed (toVariantAt @0 e :: V '[HTTPNotModified]))) + $ downloadToFile https host fullPath port destFile mempty #endif - liftE $ checkDigest dli destFile + forM_ eDigest (liftE . flip checkDigest destFile) pure destFile + -- Manage to find a file we can write the body into. - getDestFile :: FilePath - getDestFile = maybe (dest T.unpack (decUTF8Safe (urlBaseName path))) + destFile :: FilePath + destFile = maybe (dest T.unpack (decUTF8Safe (urlBaseName path))) (dest ) mfn - path = view (dlUri % pathL') dli + path = view pathL' uri + + parseEtags :: (MonadLogger m, MonadIO m, MonadThrow m) => T.Text -> m (Maybe T.Text) + parseEtags stderr = do + let mEtag = find (\line -> T.pack "etag:" `T.isPrefixOf` T.toLower line) . fmap T.strip . T.lines $ stderr + case T.words <$> mEtag of + (Just []) -> do + $logDebug "Couldn't parse etags, no input: " + pure Nothing + (Just [_, etag']) -> do + $logDebug [i|Parsed etag: #{etag'}|] + pure (Just etag') + (Just xs) -> do + $logDebug ("Couldn't parse etags, unexpected input: " <> T.unwords xs) + pure Nothing + Nothing -> do + $logDebug "No etags header found" + pure Nothing + + writeEtags :: (MonadLogger m, MonadIO m, MonadThrow m) => m (Maybe T.Text) -> m () + writeEtags getTags = do + getTags >>= \case + Just t -> do + $logDebug [i|Writing etagsFile #{(etagsFile destFile)}|] + liftIO $ T.writeFile (etagsFile destFile) t + Nothing -> + $logDebug [i|No etags files written|] + + readETag :: (MonadLogger m, MonadCatch m, MonadIO m) => FilePath -> m (Maybe T.Text) + readETag fp = do + e <- liftIO $ doesFileExist fp + if e + then do + rE <- try @_ @SomeException $ liftIO $ fmap stripNewline' $ T.readFile (etagsFile fp) + case rE of + (Right et) -> do + $logDebug [i|Read etag: #{et}|] + pure (Just et) + (Left _) -> do + $logDebug [i|Etag file doesn't exist (yet)|] + pure Nothing + else do + $logDebug [i|Skipping and deleting etags file because destination file #{fp} doesn't exist|] + liftIO $ hideError doesNotExistErrorType $ rmFile (etagsFile fp) + pure Nothing -- | Download into tmpdir or use cached version, if it exists. If filename @@ -444,7 +493,7 @@ downloadCached dli mfn = do True -> downloadCached' dli mfn Nothing False -> do tmp <- lift withGHCupTmpDir - liftE $ download dli tmp mfn + liftE $ download (_dlUri dli) (Just (_dlHash dli)) tmp mfn False downloadCached' :: ( MonadReader env m @@ -468,9 +517,9 @@ downloadCached' dli mfn mDestDir = do fileExists <- liftIO $ doesFileExist cachfile if | fileExists -> do - liftE $ checkDigest dli cachfile + liftE $ checkDigest (view dlHash dli) cachfile pure cachfile - | otherwise -> liftE $ download dli destDir mfn + | otherwise -> liftE $ download (_dlUri dli) (Just (_dlHash dli)) destDir mfn False @@ -481,73 +530,6 @@ downloadCached' dli mfn mDestDir = do - --- | This is used for downloading the JSON. -downloadBS :: ( MonadReader env m - , HasSettings env - , MonadCatch m - , MonadIO m - , MonadLogger m - ) - => URI - -> Excepts - '[ FileDoesNotExistError - , HTTPStatusError - , URIParseError - , UnsupportedScheme - , NoLocationHeader - , TooManyRedirs - , ProcessError - , NoNetwork - ] - m - L.ByteString -downloadBS uri' - | scheme == "https" - = dl True - | scheme == "http" - = dl False - | scheme == "file" - = liftIOException doesNotExistErrorType (FileDoesNotExistError $ T.unpack $ decUTF8Safe path) - (liftIO $ L.readFile (T.unpack $ decUTF8Safe path)) - | otherwise - = throwE UnsupportedScheme - - where - scheme = view (uriSchemeL' % schemeBSL') uri' - path = view pathL' uri' -#if defined(INTERNAL_DOWNLOADER) - dl https = do -#else - dl _ = do -#endif - lift $ $(logDebug) [i|downloading: #{serializeURIRef' uri'}|] - Settings{ downloader, noNetwork } <- lift getSettings - when noNetwork $ throwE NoNetwork - case downloader of - Curl -> do - o' <- liftIO getCurlOpts - let exe = "curl" - args = o' ++ ["-sSfL", T.unpack $ decUTF8Safe $ serializeURIRef' uri'] - lift (executeOut exe args Nothing) >>= \case - CapturedProcess ExitSuccess stdout _ -> do - pure stdout - CapturedProcess (ExitFailure i') _ _ -> throwE $ NonZeroExit i' exe args - Wget -> do - o' <- liftIO getWgetOpts - let exe = "wget" - args = o' ++ ["-qO-", T.unpack $ decUTF8Safe $ serializeURIRef' uri'] - lift (executeOut exe args Nothing) >>= \case - CapturedProcess ExitSuccess stdout _ -> do - pure stdout - CapturedProcess (ExitFailure i') _ _ -> throwE $ NonZeroExit i' exe args -#if defined(INTERNAL_DOWNLOADER) - Internal -> do - (_, host', fullPath', port') <- liftE $ uriToQuadruple uri' - liftE $ downloadBS' https host' fullPath' port' -#endif - - checkDigest :: ( MonadReader env m , HasDirs env , HasSettings env @@ -555,10 +537,10 @@ checkDigest :: ( MonadReader env m , MonadThrow m , MonadLogger m ) - => DownloadInfo + => T.Text -- ^ the hash -> FilePath -> Excepts '[DigestError] m () -checkDigest dli file = do +checkDigest eDigest file = do Settings{ noVerify } <- lift getSettings let verify = not noVerify when verify $ do @@ -566,7 +548,6 @@ checkDigest dli file = do lift $ $(logInfo) [i|verifying digest of: #{p'}|] c <- liftIO $ L.readFile file cDigest <- throwEither . E.decodeUtf8' . B16.encode . SHA256.hashlazy $ c - let eDigest = view dlHash dli when ((cDigest /= eDigest) && verify) $ throwE (DigestError cDigest eDigest) diff --git a/lib/GHCup/Download/IOStreams.hs b/lib/GHCup/Download/IOStreams.hs index 8eb94e4..9682a64 100644 --- a/lib/GHCup/Download/IOStreams.hs +++ b/lib/GHCup/Download/IOStreams.hs @@ -9,9 +9,7 @@ module GHCup.Download.IOStreams where import GHCup.Download.Utils import GHCup.Errors -import GHCup.Types.Optics import GHCup.Types.JSON ( ) -import GHCup.Utils.File import GHCup.Utils.Prelude import Control.Applicative @@ -20,7 +18,7 @@ import Control.Monad import Control.Monad.Reader import Data.ByteString ( ByteString ) import Data.ByteString.Builder -import Data.CaseInsensitive ( CI ) +import Data.CaseInsensitive ( CI, original, mk ) import Data.IORef import Data.Maybe import Data.Text.Read @@ -32,7 +30,6 @@ import Prelude hiding ( abs , writeFile ) import System.ProgressBar -import System.IO import URI.ByteString import qualified Data.ByteString as BS @@ -67,7 +64,7 @@ downloadBS' :: MonadIO m downloadBS' https host path port = do bref <- liftIO $ newIORef (mempty :: Builder) let stepper bs = modifyIORef bref (<> byteString bs) - downloadInternal False https host path port stepper + void $ downloadInternal False https host path port stepper (pure ()) mempty liftIO (readIORef bref <&> toLazyByteString) @@ -77,12 +74,17 @@ downloadToFile :: (MonadMask m, MonadIO m) -> ByteString -- ^ path (e.g. "/my/file") including query -> Maybe Int -- ^ optional port (e.g. 3000) -> FilePath -- ^ destination file to create and write to - -> Excepts '[DownloadFailed] m () -downloadToFile https host fullPath port destFile = do - fd <- liftIO $ openFile destFile WriteMode - let stepper = BS.hPut fd - flip finally (liftIO $ hClose fd) - $ reThrowAll DownloadFailed $ downloadInternal True https host fullPath port stepper + -> M.Map (CI ByteString) ByteString -- ^ additional headers + -> Excepts '[DownloadFailed, HTTPNotModified] m Response +downloadToFile https host fullPath port destFile addHeaders = do + let stepper = BS.appendFile destFile + setup = BS.writeFile destFile mempty + catchAllE (\case + (V (HTTPStatusError i headers)) + | i == 304 + , Just e <- M.lookup (mk "etag") headers -> throwE $ HTTPNotModified (decUTF8Safe e) + v -> throwE $ DownloadFailed v + ) $ downloadInternal True https host fullPath port stepper setup addHeaders downloadInternal :: MonadIO m @@ -92,6 +94,8 @@ downloadInternal :: MonadIO m -> ByteString -- ^ path with query -> Maybe Int -- ^ optional port -> (ByteString -> IO a) -- ^ the consuming step function + -> IO a -- ^ setup action + -> M.Map (CI ByteString) ByteString -- ^ additional headers -> Excepts '[ HTTPStatusError , URIParseError @@ -100,19 +104,21 @@ downloadInternal :: MonadIO m , TooManyRedirs ] m - () + Response downloadInternal = go (5 :: Int) where - go redirs progressBar https host path port consumer = do + go redirs progressBar https host path port consumer setup addHeaders = do r <- liftIO $ withConnection' https host port action veitherToExcepts r >>= \case - Just r' -> + Right r' -> if redirs > 0 then followRedirectURL r' else throwE TooManyRedirs - Nothing -> pure () + Left res -> pure res where action c = do - let q = buildRequest1 $ http GET path + let q = buildRequest1 $ do + http GET path + flip M.traverseWithKey addHeaders $ \key val -> setHeader (original key) val sendRequest c q emptyBody @@ -121,28 +127,30 @@ downloadInternal = go (5 :: Int) (\r i' -> runE $ do let scode = getStatusCode r if - | scode >= 200 && scode < 300 -> downloadStream r i' >> pure Nothing + | scode >= 200 && scode < 300 -> liftIO $ downloadStream r i' >> pure (Left r) + | scode == 304 -> throwE $ HTTPStatusError scode (getHeaderMap r) | scode >= 300 && scode < 400 -> case getHeader r "Location" of - Just r' -> pure $ Just r' + Just r' -> pure $ Right r' Nothing -> throwE NoLocationHeader - | otherwise -> throwE $ HTTPStatusError scode + | otherwise -> throwE $ HTTPStatusError scode (getHeaderMap r) ) followRedirectURL bs = case parseURI strictURIParserOptions bs of Right uri' -> do (https', host', fullPath', port') <- liftE $ uriToQuadruple uri' - go (redirs - 1) progressBar https' host' fullPath' port' consumer + go (redirs - 1) progressBar https' host' fullPath' port' consumer setup addHeaders Left e -> throwE e downloadStream r i' = do + void setup let size = case getHeader r "Content-Length" of Just x' -> case decimal $ decUTF8Safe x' of Left _ -> 0 Right (r', _) -> r' Nothing -> 0 - mpb <- if progressBar - then Just <$> liftIO (newProgressBar defStyle 10 (Progress 0 size ())) + (mpb :: Maybe (ProgressBar ())) <- if progressBar + then Just <$> newProgressBar defStyle 10 (Progress 0 size ()) else pure Nothing outStream <- liftIO $ Streams.makeOutputStream @@ -155,79 +163,6 @@ downloadInternal = go (5 :: Int) liftIO $ Streams.connect i' outStream -getHead :: (MonadCatch m, MonadIO m) - => URI - -> Excepts - '[ HTTPStatusError - , URIParseError - , UnsupportedScheme - , NoLocationHeader - , TooManyRedirs - , ProcessError - ] - m - (M.Map (CI ByteString) ByteString) -getHead uri' | scheme == "https" = head' True - | scheme == "http" = head' False - | otherwise = throwE UnsupportedScheme - - where - scheme = view (uriSchemeL' % schemeBSL') uri' - head' https = do - (_, host', fullPath', port') <- liftE $ uriToQuadruple uri' - liftE $ headInternal https host' fullPath' port' - - -headInternal :: MonadIO m - => Bool -- ^ https? - -> ByteString -- ^ host - -> ByteString -- ^ path with query - -> Maybe Int -- ^ optional port - -> Excepts - '[ HTTPStatusError - , URIParseError - , UnsupportedScheme - , TooManyRedirs - , NoLocationHeader - ] - m - (M.Map (CI ByteString) ByteString) -headInternal = go (5 :: Int) - - where - go redirs https host path port = do - r <- liftIO $ withConnection' https host port action - veitherToExcepts r >>= \case - Left r' -> - if redirs > 0 then followRedirectURL r' else throwE TooManyRedirs - Right hs -> pure hs - where - - action c = do - let q = buildRequest1 $ http HEAD path - - sendRequest c q emptyBody - - unsafeReceiveResponse - c - (\r _ -> runE $ do - let scode = getStatusCode r - if - | scode >= 200 && scode < 300 -> do - let headers = getHeaderMap r - pure $ Right headers - | scode >= 300 && scode < 400 -> case getHeader r "Location" of - Just r' -> pure $ Left r' - Nothing -> throwE NoLocationHeader - | otherwise -> throwE $ HTTPStatusError scode - ) - - followRedirectURL bs = case parseURI strictURIParserOptions bs of - Right uri' -> do - (https', host', fullPath', port') <- liftE $ uriToQuadruple uri' - go (redirs - 1) https' host' fullPath' port' - Left e -> throwE e - withConnection' :: Bool -> ByteString diff --git a/lib/GHCup/Errors.hs b/lib/GHCup/Errors.hs index bd803b2..fa039d7 100644 --- a/lib/GHCup/Errors.hs +++ b/lib/GHCup/Errors.hs @@ -27,6 +27,8 @@ import Codec.Archive import qualified Codec.Archive.Tar as Tar #endif import Control.Exception.Safe +import Data.ByteString ( ByteString ) +import Data.CaseInsensitive ( CI ) import Data.String.Interpolate import Data.Text ( Text ) import Data.Versions @@ -35,6 +37,8 @@ import Text.PrettyPrint hiding ( (<>) ) import Text.PrettyPrint.HughesPJClass hiding ( (<>) ) import URI.ByteString +import qualified Data.Map.Strict as M + ------------------------ @@ -180,13 +184,29 @@ instance Pretty DigestError where text [i|Digest error: expected "#{expectedDigest}", but got "#{currentDigest}"|] -- | Unexpected HTTP status. -data HTTPStatusError = HTTPStatusError Int +data HTTPStatusError = HTTPStatusError Int (M.Map (CI ByteString) ByteString) deriving Show instance Pretty HTTPStatusError where - pPrint (HTTPStatusError status) = + pPrint (HTTPStatusError status _) = text [i|Unexpected HTTP status: #{status}|] +-- | Malformed headers. +data MalformedHeaders = MalformedHeaders Text + deriving Show + +instance Pretty MalformedHeaders where + pPrint (MalformedHeaders h) = + text [i|Headers are malformed: #{h}|] + +-- | Unexpected HTTP status. +data HTTPNotModified = HTTPNotModified Text + deriving Show + +instance Pretty HTTPNotModified where + pPrint (HTTPNotModified etag) = + text [i|Remote resource not modifed, etag was: #{etag}|] + -- | The 'Location' header was expected during a 3xx redirect, but not found. data NoLocationHeader = NoLocationHeader deriving Show diff --git a/lib/GHCup/Utils/File/Windows.hs b/lib/GHCup/Utils/File/Windows.hs index dd4db4b..b7b6e17 100644 --- a/lib/GHCup/Utils/File/Windows.hs +++ b/lib/GHCup/Utils/File/Windows.hs @@ -209,6 +209,20 @@ exec exe args chdir env = do pure $ toProcessError exe args exit_code +-- | Thin wrapper around `executeFile`. +execShell :: MonadIO m + => FilePath -- ^ thing to execute + -> [FilePath] -- ^ args for the thing + -> Maybe FilePath -- ^ optionally chdir into this + -> Maybe [(String, String)] -- ^ optional environment + -> m (Either ProcessError ()) +execShell exe args chdir env = do + let cmd = exe <> " " <> concatMap (' ':) args + cp <- createProcessWithMingwPath ((shell cmd) { cwd = chdir, env = env }) + exit_code <- liftIO $ withCreateProcess cp $ \_ _ _ p -> waitForProcess p + pure $ toProcessError cmd [] exit_code + + chmod_755 :: MonadIO m => FilePath -> m () chmod_755 fp = let perm = setOwnerWritable True emptyPermissions diff --git a/lib/GHCup/Utils/Prelude.hs b/lib/GHCup/Utils/Prelude.hs index 259bc40..13033ee 100644 --- a/lib/GHCup/Utils/Prelude.hs +++ b/lib/GHCup/Utils/Prelude.hs @@ -410,12 +410,7 @@ rmPathForcibly :: ( MonadIO m -> m () rmPathForcibly fp = #if defined(IS_WINDOWS) - recovering (fullJitterBackoff 25000 <> limitRetries 10) - [\_ -> Handler (\e -> pure $ isPermissionError e) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == InappropriateType)) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == UnsatisfiedConstraints)) - ] - (\_ -> liftIO $ removePathForcibly fp) + recover (liftIO $ removePathForcibly fp) #else liftIO $ removePathForcibly fp #endif @@ -426,12 +421,7 @@ rmDirectory :: (MonadIO m, MonadMask m) -> m () rmDirectory fp = #if defined(IS_WINDOWS) - recovering (fullJitterBackoff 25000 <> limitRetries 10) - [\_ -> Handler (\e -> pure $ isPermissionError e) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == UnsatisfiedConstraints)) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == InappropriateType)) - ] - (\_ -> liftIO $ removeDirectory fp) + recover (liftIO $ removeDirectory fp) #else liftIO $ removeDirectory fp #endif @@ -469,12 +459,7 @@ rmFile :: ( MonadIO m -> m () rmFile fp = #if defined(IS_WINDOWS) - recovering (fullJitterBackoff 25000 <> limitRetries 10) - [\_ -> Handler (\e -> pure $ isPermissionError e) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == InappropriateType)) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == UnsatisfiedConstraints)) - ] - (\_ -> liftIO $ removeFile fp) + recover (liftIO $ removeFile fp) #else liftIO $ removeFile fp #endif @@ -485,12 +470,7 @@ rmDirectoryLink :: (MonadIO m, MonadMask m, MonadReader env m, HasDirs env) -> m () rmDirectoryLink fp = #if defined(IS_WINDOWS) - recovering (fullJitterBackoff 25000 <> limitRetries 10) - [\_ -> Handler (\e -> pure $ isPermissionError e) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == InappropriateType)) - ,\_ -> Handler (\e -> pure (ioeGetErrorType e == UnsatisfiedConstraints)) - ] - (\_ -> liftIO $ removeDirectoryLink fp) + recover (liftIO $ removeDirectoryLink fp) #else liftIO $ removeDirectoryLink fp #endif @@ -525,6 +505,14 @@ stripNewline s | otherwise = head s : stripNewline (tail s) +-- | Strip @\\r@ and @\\n@ from 'ByteString's +stripNewline' :: T.Text -> T.Text +stripNewline' s + | T.null s = mempty + | T.head s `elem` "\n\r" = stripNewline' (T.tail s) + | otherwise = T.singleton (T.head s) <> stripNewline' (T.tail s) + + isNewLine :: Word8 -> Bool isNewLine w | w == _lf = True