From 44d8ba53db6d5c8dc694e9dab9aeeba102b23c1e Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Thu, 7 Mar 2024 20:48:23 -0300 Subject: [PATCH 01/28] Make `multiQueryStatement_` return a Stream, and count statements We still don't store the count or do anything with it other than printing, but this is a start. --- src/Codd/Internal.hs | 47 ++++++++++----- src/Codd/Internal/MultiQueryStatement.hs | 58 +++++++++++++------ .../SchemaVerificationSpec.hs | 11 ++-- 3 files changed, 81 insertions(+), 35 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index c76811e7..8c4408a5 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -24,6 +24,7 @@ import Codd.Parsing ( AddedSqlMigration(..) , FileStream(..) , ParsedSql(..) , SqlMigration(..) + , SqlPiece(..) , hoistAddedSqlMigration , parseAddedSqlMigration , parseSqlPiecesStreaming @@ -51,7 +52,6 @@ import Control.Monad ( (>=>) , forM , forM_ , unless - , void , when ) import Control.Monad.IO.Class ( MonadIO(..) ) @@ -205,12 +205,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -908,15 +908,22 @@ applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigrati else NotInTransaction statementRetryPol logInfoNoNewline $ "Applying " <> Text.pack fn - appliedMigrationDuration <- - timeAction (multiQueryStatement_ inTxn conn $ migrationSql sqlMig) + (numStatements, appliedMigrationDuration) <- + timeAction + ( Streaming.length_ + $ Streaming.filter countsAsRealStatement + $ multiQueryStatement_ inTxn conn + $ migrationSql sqlMig + ) `onException` logInfo " [failed]" timestamp <- withTransaction isolLvl conn $ afterMigRun fn migTimestamp Nothing appliedMigrationDuration logInfo $ " (" <> prettyPrintDuration appliedMigrationDuration - <> ")" + <> ", " + <> Fmt.sformat Fmt.int numStatements + <> ")" pure AppliedMigration { appliedMigrationName = migrationName sqlMig , appliedMigrationTimestamp = migTimestamp @@ -943,16 +950,30 @@ applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigrati (Fmt.fixed @Double 1) (fromIntegral @Integer (round (10 * dps / pico_1s)) / 10) <> "s" -- e.g. 10.5s + -- | If our goal were only to resume migration application from the last statement that failed, + -- we could count white space and the body and ending of COPY. However, that would be an odd number from a human + -- perspective, and our parser is also more prone to changing how it e.g. groups comments with actual statements, + -- so it's more dangerous to count these things. + countsAsRealStatement = \case + Left _text -> True -- Unparsed SQL counts as a single statement + Right p -> case p of + CommentPiece _ -> False + WhiteSpacePiece _ -> False + CopyFromStdinRows _ -> False + CopyFromStdinEnd _ -> False + _ -> True timeAction f = do before <- liftIO $ getTime Monotonic - void f - after <- liftIO $ getTime Monotonic + ret <- f + after <- liftIO $ getTime Monotonic pure - $ picosecondsToDiffTime + ( ret + , picosecondsToDiffTime $ (pico_1s :: Integer) * fromIntegral (sec after - sec before) + (pico_1ns :: Integer) * fromIntegral (nsec after - nsec before) + ) data MigrationRegistered = MigrationRegistered | MigrationNotRegistered diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index b381ebf8..0ef548d6 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -23,7 +23,11 @@ import qualified Database.PostgreSQL.Simple.Internal import qualified Database.PostgreSQL.Simple.Types as DB import Prelude hiding ( takeWhile ) +import Streaming ( Of + , Stream + ) import qualified Streaming.Prelude as Streaming +import qualified Streaming.Prelude as S import UnliftIO ( Exception , MonadIO , MonadUnliftIO @@ -40,14 +44,19 @@ data SqlStatementException = SqlStatementException instance Exception SqlStatementException + +txnStatus :: MonadIO m => DB.Connection -> m PQ.TransactionStatus +txnStatus conn = liftIO $ PGInternal.withConnection conn PQ.transactionStatus + -- | Runs SQL that could be either a row-returning or count-returning statement. Just like postgresql-simple, throws exceptions in case of SQL errors. -singleStatement_ :: MonadIO m => DB.Connection -> Text -> m () +singleStatement_ + :: MonadIO m => DB.Connection -> Text -> m PQ.TransactionStatus singleStatement_ conn sql = do res <- liftIO $ PGInternal.exec conn $ encodeUtf8 sql status <- liftIO $ PQ.resultStatus res case status of - PQ.CommandOk -> pure () - PQ.TuplesOk -> pure () + PQ.CommandOk -> txnStatus conn + PQ.TuplesOk -> txnStatus conn _ -> liftIO $ handle (throwIO . SqlStatementException sql) @@ -57,6 +66,12 @@ singleStatement_ conn sql = do data InTransaction = InTransaction | NotInTransaction RetryPolicy deriving stock (Eq) +-- data StatementApplied = WhitespaceOrCommentSkipped | StatementApplied PQ.TransactionStatus + +-- isNotCommentOrWhiteSpace :: StatementApplied -> Bool +-- isNotCommentOrWhiteSpace (StatementApplied _) = True +-- isNotCommentOrWhiteSpace _ = False + -- | A bit like singleStatement_, but following these criteria: -- 1. If already in a transaction, then statements will be executed one-by-one. -- 2. If not in a transaction, then statements will be executed one-by-one and @@ -66,32 +81,41 @@ multiQueryStatement_ => InTransaction -> DB.Connection -> ParsedSql m - -> m () + -> Stream (Of (Either Text SqlPiece)) m () multiQueryStatement_ inTxn conn sql = case (sql, inTxn) of - (UnparsedSql t, InTransaction) -> singleStatement_ conn t - (UnparsedSql t, NotInTransaction retryPol) -> - retry_ retryPol $ singleStatement_ conn t + (UnparsedSql t, InTransaction) -> + S.mapM (fmap (const $ Left t) . singleStatement_ conn) (S.yield t) -- There must be some simpler function to avoid mapM and yield.. + (UnparsedSql t, NotInTransaction retryPol) -> S.mapM + (fmap (const $ Left t) . retry_ retryPol . singleStatement_ conn) + (S.yield t) (WellParsedSql stms, NotInTransaction retryPol) -> -- We retry individual statements in no-txn migrations - flip Streaming.mapM_ stms - $ \stm -> retry_ retryPol $ runSingleStatementInternal_ conn stm + flip Streaming.mapM stms $ \stm -> + fmap (const $ Right stm) + $ retry_ retryPol + $ runSingleStatementInternal_ conn stm (WellParsedSql stms, InTransaction) -> -- We don't retry individual statements in in-txn migrations - flip Streaming.mapM_ stms - $ \stm -> runSingleStatementInternal_ conn stm + Streaming.mapM + (\stm -> Right stm <$ runSingleStatementInternal_ conn stm) + stms -runSingleStatementInternal_ :: MonadIO m => DB.Connection -> SqlPiece -> m () -runSingleStatementInternal_ _ (CommentPiece _) = pure () -runSingleStatementInternal_ _ (WhiteSpacePiece _) = pure () +runSingleStatementInternal_ + :: MonadIO m => DB.Connection -> SqlPiece -> m PQ.TransactionStatus +runSingleStatementInternal_ conn (CommentPiece _) = txnStatus conn +runSingleStatementInternal_ conn (WhiteSpacePiece _) = txnStatus conn runSingleStatementInternal_ conn (BeginTransaction s) = singleStatement_ conn s runSingleStatementInternal_ conn (CommitTransaction s) = singleStatement_ conn s runSingleStatementInternal_ conn (RollbackTransaction s) = singleStatement_ conn s runSingleStatementInternal_ conn (OtherSqlPiece s) = singleStatement_ conn s -runSingleStatementInternal_ conn (CopyFromStdinStatement copyStm) = +runSingleStatementInternal_ conn (CopyFromStdinStatement copyStm) = do liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) -runSingleStatementInternal_ conn (CopyFromStdinRows copyRows) = + txnStatus conn +runSingleStatementInternal_ conn (CopyFromStdinRows copyRows) = do liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows -runSingleStatementInternal_ conn (CopyFromStdinEnd _) = + txnStatus conn +runSingleStatementInternal_ conn (CopyFromStdinEnd _) = do liftIO $ void $ DB.putCopyEnd conn + txnStatus conn diff --git a/test/DbDependentSpecs/SchemaVerificationSpec.hs b/test/DbDependentSpecs/SchemaVerificationSpec.hs index a72d14f6..4950c4bb 100644 --- a/test/DbDependentSpecs/SchemaVerificationSpec.hs +++ b/test/DbDependentSpecs/SchemaVerificationSpec.hs @@ -1557,11 +1557,12 @@ spec = do connInfo testConnTimeout $ \conn -> - multiQueryStatement_ - (NotInTransaction - singleTryPolicy - ) - conn + Streaming.effects + $ multiQueryStatement_ + (NotInTransaction + singleTryPolicy + ) + conn $ mkValidSql undoSql hashesAfterUndo <- getHashes emptyDbInfo From 213140630ae791d0911a4634a2fe690683163ff8 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Thu, 7 Mar 2024 21:04:41 -0300 Subject: [PATCH 02/28] Register number of applied statements in codd_schema --- src/Codd/Internal.hs | 72 +++++++++++++++++++++++++++++--------------- src/Codd/Parsing.hs | 9 +++--- 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 8c4408a5..9a009c8c 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -205,12 +205,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -343,7 +343,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- to create codd_schema. (runAfterMig, newBootCheck) <- case mDefaultConn of Nothing -> pure - ( \_ _ appliedAt _ -> DB.fromOnly <$> unsafeQuery1 + ( \_ _ appliedAt _ _ -> DB.fromOnly <$> unsafeQuery1 conn "SELECT COALESCE(?, now())" (DB.Only appliedAt) @@ -439,6 +439,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn appliedMigrationTimestamp (Just appliedMigrationAt) appliedMigrationDuration + appliedMigrationNumStatements runMigs :: (MonadUnliftIO n, CoddLogger n, CanStartTxn n txn) @@ -449,6 +450,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime + -> Int -> txn UTCTime ) -> n [AppliedMigration] @@ -465,6 +467,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime + -> Int -> txn UTCTime ) -> m (ApplyMigsResult b) @@ -506,27 +509,37 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn conn retryPolicy (allMigs migBlock) - (\fp ts appliedAt duration -> + (\fp ts appliedAt duration numAppliedStmts -> withTransaction txnIsolationLvl conn - $ registerMig fp ts appliedAt duration + $ registerMig + fp + ts + appliedAt + duration + numAppliedStmts ) <*> withTransaction txnIsolationLvl conn (act conn) data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application + | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly deriving stock (Bounded, Enum, Eq, Ord, Show) detectCoddSchema :: MonadIO m => DB.Connection -> m CoddSchemaVersion detectCoddSchema conn = do - cols :: [Text] <- map DB.fromOnly <$> query - conn - "select attname from pg_attribute join pg_class on attrelid=pg_class.oid join pg_namespace on relnamespace=pg_namespace.oid where relname=? AND nspname=? AND attnum >= 1 order by attnum" - ("sql_migrations" :: Text, "codd_schema" :: Text) + cols :: [Text] <- + map DB.fromOnly + <$> query + conn + "select attname from pg_attribute join pg_class on attrelid=pg_class.oid join pg_namespace on relnamespace=pg_namespace.oid where relname='sql_migrations' AND nspname='codd_schema' AND attnum >= 1 order by attnum" + () case cols of [] -> pure CoddSchemaDoesNotExist ["id", "migration_timestamp", "applied_at", "name"] -> pure CoddSchemaV1 ["id", "migration_timestamp", "applied_at", "name", "application_duration"] -> pure CoddSchemaV2 + ["id", "migration_timestamp", "applied_at", "name", "application_duration", "num_applied_statements"] + -> pure CoddSchemaV3 _ -> error $ "Internal codd error. Unless you've manually modified the codd_schema.sql_migrations table, this is a bug in codd. Please report it and include the following as column names in your report: " @@ -579,7 +592,10 @@ createCoddSchema targetVersion txnIsolationLvl conn = CoddSchemaV1 -> execvoid_ conn "ALTER TABLE codd_schema.sql_migrations ADD COLUMN application_duration INTERVAL" - CoddSchemaV2 -> pure () + CoddSchemaV2 -> execvoid_ + conn + "ALTER TABLE codd_schema.sql_migrations ADD COLUMN num_applied_statements INT" + CoddSchemaV3 -> pure () -- `succ` is a partial function, but it should never throw in this context go (succ currentSchemaVersion) @@ -895,6 +911,7 @@ applySingleMigration -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime + -> Int -> txn UTCTime ) -> TxnIsolationLvl @@ -908,7 +925,7 @@ applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigrati else NotInTransaction statementRetryPol logInfoNoNewline $ "Applying " <> Text.pack fn - (numStatements, appliedMigrationDuration) <- + (appliedMigrationNumStatements, appliedMigrationDuration) <- timeAction ( Streaming.length_ $ Streaming.filter countsAsRealStatement @@ -916,19 +933,24 @@ applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigrati $ migrationSql sqlMig ) `onException` logInfo " [failed]" - timestamp <- withTransaction isolLvl conn - $ afterMigRun fn migTimestamp Nothing appliedMigrationDuration + timestamp <- withTransaction isolLvl conn $ afterMigRun + fn + migTimestamp + Nothing + appliedMigrationDuration + appliedMigrationNumStatements logInfo $ " (" <> prettyPrintDuration appliedMigrationDuration <> ", " - <> Fmt.sformat Fmt.int numStatements + <> Fmt.sformat Fmt.int appliedMigrationNumStatements <> ")" - pure AppliedMigration { appliedMigrationName = migrationName sqlMig - , appliedMigrationTimestamp = migTimestamp - , appliedMigrationAt = timestamp + pure AppliedMigration { appliedMigrationName = migrationName sqlMig + , appliedMigrationTimestamp = migTimestamp + , appliedMigrationAt = timestamp , appliedMigrationDuration + , appliedMigrationNumStatements } where pico_1ns = 1_000 @@ -990,12 +1012,13 @@ registerRanMigration -> DB.UTCTimestamp -> Maybe UTCTime -- ^ The time the migration finished being applied. If not supplied, pg's now() will be used -> DiffTime + -> Int -- ^ The number of applied statements -> m UTCTime -registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration +registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration numAppliedStatements = withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 conn - "INSERT INTO codd_schema.sql_migrations (migration_timestamp, name, applied_at, application_duration) \ - \ VALUES (?, ?, COALESCE(?, now()), ?) \ + "INSERT INTO codd_schema.sql_migrations (migration_timestamp, name, applied_at, application_duration, num_applied_statements) \ + \ VALUES (?, ?, COALESCE(?, now()), ?, ?) \ \ RETURNING applied_at" ( migTimestamp , fn @@ -1005,4 +1028,5 @@ registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDura realToFrac @Double @NominalDiffTime $ fromIntegral (diffTimeToPicoseconds appliedMigrationDuration) / 1_000_000_000_000 + , numAppliedStatements ) diff --git a/src/Codd/Parsing.hs b/src/Codd/Parsing.hs index 110aad4d..e0c57e8b 100644 --- a/src/Codd/Parsing.hs +++ b/src/Codd/Parsing.hs @@ -151,12 +151,13 @@ data AddedSqlMigration m = AddedSqlMigration } data AppliedMigration = AppliedMigration - { appliedMigrationName :: FilePath - , appliedMigrationTimestamp :: DB.UTCTimestamp + { appliedMigrationName :: FilePath + , appliedMigrationTimestamp :: DB.UTCTimestamp -- ^ The migration's timestamp as extracted from its file name. - , appliedMigrationAt :: UTCTime + , appliedMigrationAt :: UTCTime -- ^ When the migration was effectively applied. - , appliedMigrationDuration :: DiffTime + , appliedMigrationDuration :: DiffTime + , appliedMigrationNumStatements :: Int } data FileStream m = FileStream From 1eedf4415773378e41d83079392f4626fec1c018 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Thu, 7 Mar 2024 21:25:11 -0300 Subject: [PATCH 03/28] Also register if a no-txn migration failed This is necessary to allow resuming application of failed no-txn migrations from their point of failure onwards. While this should be extremely rare, if we are in the business of retrying `BEGIN ... COMMIT` blocks of no-txn migrations, we're already in extremely rare territory. It is also behaviour that might be useful in the darkest of hours. We will still need to change how we collect pending migrations, because now a migration's applied state is no longer binary (i.e. applied or not), implement seeking and a bunch of other things. --- src/Codd/Internal.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 9a009c8c..d0a75b86 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -205,12 +205,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -538,7 +538,7 @@ detectCoddSchema conn = do pure CoddSchemaV1 ["id", "migration_timestamp", "applied_at", "name", "application_duration"] -> pure CoddSchemaV2 - ["id", "migration_timestamp", "applied_at", "name", "application_duration", "num_applied_statements"] + ["id", "migration_timestamp", "applied_at", "name", "application_duration", "num_applied_statements", "no_txn_failed_at"] -> pure CoddSchemaV3 _ -> error @@ -594,7 +594,7 @@ createCoddSchema targetVersion txnIsolationLvl conn = "ALTER TABLE codd_schema.sql_migrations ADD COLUMN application_duration INTERVAL" CoddSchemaV2 -> execvoid_ conn - "ALTER TABLE codd_schema.sql_migrations ADD COLUMN num_applied_statements INT" + "ALTER TABLE codd_schema.sql_migrations ADD COLUMN num_applied_statements INT, ADD COLUMN no_txn_failed_at timestamptz, ALTER COLUMN applied_at DROP NOT NULL, ADD CONSTRAINT no_txn_mig_applied_or_failed CHECK ((applied_at IS NULL) <> (no_txn_failed_at IS NULL))" CoddSchemaV3 -> pure () -- `succ` is a partial function, but it should never throw in this context From 40483a16d5b2703ffc27ddaf41853301f3827bdc Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Fri, 8 Mar 2024 21:21:58 -0300 Subject: [PATCH 04/28] Changing code so we can insert into codd_schema even on sql statement errors Right now this amounts to throwing exceptions in different places, and using a new custom exception type to carry information of where the last `BEGIN` was higher up in the call stack (this does not feel great), if there was one at all. But so far we don't transmit that yet. Also, this removes calls to retry statements in no-txn migrations from functions deeper in the call stack, so that retrying is done higher up where it's possible to re-read files and seek the stream to the right statement. And this actually helped clean up some of the code. Still much to do, though. --- src/Codd/Internal.hs | 89 ++++++++------- src/Codd/Internal/MultiQueryStatement.hs | 104 +++++++++--------- test/DbDependentSpecs/ApplicationSpec.hs | 3 +- .../SchemaVerificationSpec.hs | 9 +- 4 files changed, 102 insertions(+), 103 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index d0a75b86..1c0cb34e 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -5,7 +5,7 @@ import Prelude hiding ( readFile ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal.MultiQueryStatement - ( InTransaction(..) + ( SqlStatementException , multiQueryStatement_ ) import Codd.Internal.Retry ( RetryIteration(..) @@ -24,7 +24,6 @@ import Codd.Parsing ( AddedSqlMigration(..) , FileStream(..) , ParsedSql(..) , SqlMigration(..) - , SqlPiece(..) , hoistAddedSqlMigration , parseAddedSqlMigration , parseSqlPiecesStreaming @@ -77,6 +76,7 @@ import qualified Database.PostgreSQL.Simple as DB import qualified Database.PostgreSQL.Simple.Time as DB import qualified Formatting as Fmt +import Streaming ( Of(..) ) import qualified Streaming.Prelude as Streaming import System.Clock ( Clock(Monotonic) , TimeSpec(..) @@ -86,10 +86,10 @@ import System.Exit ( exitFailure ) import System.FilePath ( () , takeFileName ) -import UnliftIO ( MonadUnliftIO +import UnliftIO ( Exception + , MonadUnliftIO , hClose , newIORef - , onException , readIORef , timeout , writeIORef @@ -205,12 +205,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -444,7 +444,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn runMigs :: (MonadUnliftIO n, CoddLogger n, CanStartTxn n txn) => DB.Connection - -> RetryPolicy -> NonEmpty (AddedSqlMigration n) -> ( FilePath -> DB.UTCTimestamp @@ -454,9 +453,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> txn UTCTime ) -> n [AppliedMigration] - runMigs conn withRetryPolicy migs runAfterMig = + runMigs conn migs runAfterMig = fmap NE.toList $ forM migs $ applySingleMigration conn - withRetryPolicy runAfterMig txnIsolationLvl runBlock @@ -475,6 +473,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn if blockInTxn migBlock then do res <- + -- Naturally, we retry entire in-txn block transactions on error, not individual statements retryFold retryPolicy (\previousBlock RetryIteration { tryNumber } -> @@ -492,14 +491,13 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn $ ApplyMigsResult <$> runMigs conn - singleTryPolicy (allMigs (hoistBlockOfMigrations lift blockFinal ) ) - registerMig -- We retry entire transactions, not individual statements + registerMig <*> act conn logInfo "COMMITed transaction" pure res @@ -507,7 +505,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ApplyMigsResult <$> runMigs conn - retryPolicy (allMigs migBlock) (\fp ts appliedAt duration numAppliedStmts -> withTransaction txnIsolationLvl conn @@ -900,13 +897,22 @@ blockCustomConnInfo :: BlockOfMigrations m -> Maybe DB.ConnectInfo blockCustomConnInfo (BlockOfMigrations (AddedSqlMigration { addedSqlMig } :| _) _) = migrationCustomConnInfo addedSqlMig +data NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartFromLastExplicitBegin Int -- ^ No-txn migrations can have explicit BEGIN and COMMIT statements. If a statement inside that BEGIN..COMMIT block fails, it's useless to retry it. In that case, we must retry from the last BEGIN statement, whose statement-number inside the migration is contained here. + | NoTxnMigMustRetryFailedStatement -- ^ When we're not inside an explicit BEGIN..COMMIT block in a no-txn migration, we must retry the statement that failed itself + deriving stock Show +data MigrationApplicationFailure = MigrationApplicationFailure + { sqlStatementEx :: SqlStatementException + , noTxnMigRetryInstructions :: Maybe NoTxnMigFailureRetryInstructions + } + deriving stock Show +instance Exception MigrationApplicationFailure + -- | Applies a single migration and returns the time when it finished being applied. Does not -- itself register that the migration ran, only runs "afterMigRun" after applying the migration. applySingleMigration :: forall m txn . (MonadUnliftIO m, CoddLogger m, CanStartTxn m txn) => DB.Connection - -> RetryPolicy -> ( FilePath -> DB.UTCTimestamp -> Maybe UTCTime @@ -917,22 +923,35 @@ applySingleMigration -> TxnIsolationLvl -> AddedSqlMigration m -> m AppliedMigration -applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigration sqlMig migTimestamp) +applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimestamp) = do - let fn = migrationName sqlMig - inTxn = if migrationInTxn sqlMig - then InTransaction - else NotInTransaction statementRetryPol + let fn = migrationName sqlMig logInfoNoNewline $ "Applying " <> Text.pack fn (appliedMigrationNumStatements, appliedMigrationDuration) <- - timeAction - ( Streaming.length_ - $ Streaming.filter countsAsRealStatement - $ multiQueryStatement_ inTxn conn - $ migrationSql sqlMig - ) - `onException` logInfo " [failed]" + timeAction $ do + (numStmts :> errorOrDone) <- + Streaming.length $ multiQueryStatement_ conn $ migrationSql + sqlMig + case errorOrDone of + Just err -> do + -- TODO: register partially applied no-txn migration + logInfo " [failed]" + logError "GOING TO THROW EXCEPTION" + -- TODO: only for now, throw assuming we're not in an explicit begin..commit block. We'll implement + -- that later. + noTxnMigRetryInstructions <- if migrationInTxn sqlMig + then pure Nothing + else do + logError + "TODO: register partially applied no-txn migration. Use INSERT ON CONFLICT since we might have progressed after a retry!" + pure $ Just NoTxnMigMustRetryFailedStatement + throwIO $ MigrationApplicationFailure + { sqlStatementEx = err + , noTxnMigRetryInstructions + } + Nothing -> pure numStmts + timestamp <- withTransaction isolLvl conn $ afterMigRun fn migTimestamp @@ -972,18 +991,6 @@ applySingleMigration conn statementRetryPol afterMigRun isolLvl (AddedSqlMigrati (Fmt.fixed @Double 1) (fromIntegral @Integer (round (10 * dps / pico_1s)) / 10) <> "s" -- e.g. 10.5s - -- | If our goal were only to resume migration application from the last statement that failed, - -- we could count white space and the body and ending of COPY. However, that would be an odd number from a human - -- perspective, and our parser is also more prone to changing how it e.g. groups comments with actual statements, - -- so it's more dangerous to count these things. - countsAsRealStatement = \case - Left _text -> True -- Unparsed SQL counts as a single statement - Right p -> case p of - CommentPiece _ -> False - WhiteSpacePiece _ -> False - CopyFromStdinRows _ -> False - CopyFromStdinEnd _ -> False - _ -> True timeAction f = do before <- liftIO $ getTime Monotonic ret <- f diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 0ef548d6..2dcbc73b 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -1,16 +1,13 @@ module Codd.Internal.MultiQueryStatement - ( InTransaction(..) - , SqlStatementException(..) + ( SqlStatementException(..) , multiQueryStatement_ , runSingleStatementInternal_ ) where -import Codd.Internal.Retry ( retry_ ) import Codd.Logging ( CoddLogger ) import Codd.Parsing ( ParsedSql(..) , SqlPiece(..) ) -import Codd.Types ( RetryPolicy ) import Control.Monad ( void ) import Data.Text ( Text ) import Data.Text.Encoding ( encodeUtf8 ) @@ -23,9 +20,10 @@ import qualified Database.PostgreSQL.Simple.Internal import qualified Database.PostgreSQL.Simple.Types as DB import Prelude hiding ( takeWhile ) -import Streaming ( Of +import Streaming ( Of(..) , Stream ) +import qualified Streaming.Internal as S import qualified Streaming.Prelude as Streaming import qualified Streaming.Prelude as S import UnliftIO ( Exception @@ -33,7 +31,6 @@ import UnliftIO ( Exception , MonadUnliftIO , handle , liftIO - , throwIO ) data SqlStatementException = SqlStatementException @@ -41,69 +38,70 @@ data SqlStatementException = SqlStatementException , psimpleError :: DB.SqlError } deriving stock Show - instance Exception SqlStatementException +data StatementApplied = NotACountableStatement + -- ^ Some SQL is comprised only of comments and white space. That is valid and parsed SQL that will make it to the functions in this module, but it's important we recognize them so we don't send them to postgres, and so our callers, who wish to count real applied statements, can do exactly that. + -- This counting is necessary to resume failed no-txn migration application from the right statement if necessary, and while for that end we could count white space and comments, those are much more prone to change in our custom parser than actual individual SQL statements. Plus, they would make no sense to a human manually checking them. + | StatementApplied PQ.TransactionStatus + -- ^ Contains the transaction state _after_ the statement was applied. + | StatementErred SqlStatementException txnStatus :: MonadIO m => DB.Connection -> m PQ.TransactionStatus txnStatus conn = liftIO $ PGInternal.withConnection conn PQ.transactionStatus --- | Runs SQL that could be either a row-returning or count-returning statement. Just like postgresql-simple, throws exceptions in case of SQL errors. -singleStatement_ - :: MonadIO m => DB.Connection -> Text -> m PQ.TransactionStatus +-- | Runs SQL that could be either a row-returning or count-returning statement. Unlike postgresql-simple, this does not throw exceptions in case of SQL errors, but rather returns them. +singleStatement_ :: MonadIO m => DB.Connection -> Text -> m StatementApplied singleStatement_ conn sql = do res <- liftIO $ PGInternal.exec conn $ encodeUtf8 sql status <- liftIO $ PQ.resultStatus res case status of - PQ.CommandOk -> txnStatus conn - PQ.TuplesOk -> txnStatus conn + PQ.CommandOk -> StatementApplied <$> txnStatus conn + PQ.TuplesOk -> StatementApplied <$> txnStatus conn _ -> liftIO - $ handle (throwIO . SqlStatementException sql) + $ handle (pure . StatementErred . SqlStatementException sql) $ - -- Throw to catch and re-throw.. a bit nasty, but should be ok + -- Throw to catch and with that get the statement that failed and error message.. a bit nasty, but should be ok PGInternal.throwResultError "singleStatement_" res status -data InTransaction = InTransaction | NotInTransaction RetryPolicy deriving stock (Eq) - --- data StatementApplied = WhitespaceOrCommentSkipped | StatementApplied PQ.TransactionStatus - --- isNotCommentOrWhiteSpace :: StatementApplied -> Bool --- isNotCommentOrWhiteSpace (StatementApplied _) = True --- isNotCommentOrWhiteSpace _ = False - --- | A bit like singleStatement_, but following these criteria: --- 1. If already in a transaction, then statements will be executed one-by-one. --- 2. If not in a transaction, then statements will be executed one-by-one and --- will be retried according to the retry policy. +-- | Returns a Stream with the transaction status after each applied statement, until either every statement +-- is applied or one of them throws an exception, which will be in the return value of the stream. +-- The returned stream does not contain statements that aren't meant to be counted. multiQueryStatement_ - :: (MonadUnliftIO m, CoddLogger m) - => InTransaction - -> DB.Connection + :: forall m + . (MonadUnliftIO m, CoddLogger m) + => DB.Connection -> ParsedSql m - -> Stream (Of (Either Text SqlPiece)) m () -multiQueryStatement_ inTxn conn sql = case (sql, inTxn) of - (UnparsedSql t, InTransaction) -> - S.mapM (fmap (const $ Left t) . singleStatement_ conn) (S.yield t) -- There must be some simpler function to avoid mapM and yield.. - (UnparsedSql t, NotInTransaction retryPol) -> S.mapM - (fmap (const $ Left t) . retry_ retryPol . singleStatement_ conn) - (S.yield t) - (WellParsedSql stms, NotInTransaction retryPol) -> - -- We retry individual statements in no-txn migrations - flip Streaming.mapM stms $ \stm -> - fmap (const $ Right stm) - $ retry_ retryPol - $ runSingleStatementInternal_ conn stm - (WellParsedSql stms, InTransaction) -> - -- We don't retry individual statements in in-txn migrations - Streaming.mapM - (\stm -> Right stm <$ runSingleStatementInternal_ conn stm) - stms + -> Stream (Of PQ.TransactionStatus) m (Maybe SqlStatementException) +multiQueryStatement_ conn sql = + partitionEithersReturn id + $ S.mapMaybe + (\case + NotACountableStatement -> Nothing + StatementApplied ts -> Just $ Right ts + StatementErred e -> Just $ Left e + ) + $ case sql of + -- There must be some simpler function to avoid mapM and yield.. + UnparsedSql t -> S.mapM (singleStatement_ conn) (S.yield t) + WellParsedSql stms -> + Streaming.mapM (runSingleStatementInternal_ conn) stms + where + -- | Like `S.partitionEithers`, but with `Left` being considered an error and put as the stream's return value if one exists while the `Rights` are streamed. + partitionEithersReturn + :: (a -> Either e s) -> Stream (Of a) m r -> Stream (Of s) m (Maybe e) + partitionEithersReturn f = \case + S.Step (el :> rest) -> case f el of + Left err -> S.Return $ Just err + Right x -> S.Step $ x :> partitionEithersReturn f rest + S.Effect m -> S.Effect $ partitionEithersReturn f <$> m + S.Return _ -> S.Return Nothing runSingleStatementInternal_ - :: MonadIO m => DB.Connection -> SqlPiece -> m PQ.TransactionStatus -runSingleStatementInternal_ conn (CommentPiece _) = txnStatus conn -runSingleStatementInternal_ conn (WhiteSpacePiece _) = txnStatus conn + :: MonadIO m => DB.Connection -> SqlPiece -> m StatementApplied +runSingleStatementInternal_ _ (CommentPiece _) = pure NotACountableStatement +runSingleStatementInternal_ _ (WhiteSpacePiece _) = pure NotACountableStatement runSingleStatementInternal_ conn (BeginTransaction s) = singleStatement_ conn s runSingleStatementInternal_ conn (CommitTransaction s) = singleStatement_ conn s @@ -112,10 +110,10 @@ runSingleStatementInternal_ conn (RollbackTransaction s) = runSingleStatementInternal_ conn (OtherSqlPiece s) = singleStatement_ conn s runSingleStatementInternal_ conn (CopyFromStdinStatement copyStm) = do liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) - txnStatus conn + StatementApplied <$> txnStatus conn runSingleStatementInternal_ conn (CopyFromStdinRows copyRows) = do liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows - txnStatus conn + pure NotACountableStatement runSingleStatementInternal_ conn (CopyFromStdinEnd _) = do liftIO $ void $ DB.putCopyEnd conn - txnStatus conn + pure NotACountableStatement diff --git a/test/DbDependentSpecs/ApplicationSpec.hs b/test/DbDependentSpecs/ApplicationSpec.hs index b61cb14a..87e00aef 100644 --- a/test/DbDependentSpecs/ApplicationSpec.hs +++ b/test/DbDependentSpecs/ApplicationSpec.hs @@ -6,6 +6,7 @@ import Codd ( VerifySchemas(..) ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal ( CoddSchemaVersion(..) + , MigrationApplicationFailure , createCoddSchema , detectCoddSchema , withConnection @@ -697,7 +698,7 @@ spec = do testConnTimeout (const $ pure ()) ) - `shouldThrow` (\(e :: SqlStatementException) -> + `shouldThrow` (\(e :: MigrationApplicationFailure) -> "division by zero" `List.isInfixOf` show e diff --git a/test/DbDependentSpecs/SchemaVerificationSpec.hs b/test/DbDependentSpecs/SchemaVerificationSpec.hs index 4950c4bb..57e4d2c8 100644 --- a/test/DbDependentSpecs/SchemaVerificationSpec.hs +++ b/test/DbDependentSpecs/SchemaVerificationSpec.hs @@ -4,11 +4,7 @@ import Codd ( applyMigrationsNoCheck ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal ( withConnection ) import Codd.Internal.MultiQueryStatement - ( InTransaction - ( NotInTransaction - ) - , multiQueryStatement_ - ) + ( multiQueryStatement_ ) import Codd.Logging ( runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , EnvVars @@ -1559,9 +1555,6 @@ spec = do $ \conn -> Streaming.effects $ multiQueryStatement_ - (NotInTransaction - singleTryPolicy - ) conn $ mkValidSql undoSql hashesAfterUndo <- getHashes From c7fac6aa89386e1954ee164d5ee4e798b043f3aa Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 9 Mar 2024 22:00:10 -0300 Subject: [PATCH 05/28] Don't group no-txn migs together so retrying the block means retrying a single migration. This invariant is not obvious and so fragile to rely on. This commit also includes a bit more of the plumbing that is necessary to retry no-txn migs appropriately, but there's still much to do. --- src/Codd/Internal.hs | 112 ++++++++++++++++++++++++++----------- src/Codd/Internal/Retry.hs | 43 +++++++------- 2 files changed, 102 insertions(+), 53 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 1c0cb34e..1e57d26d 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -42,10 +42,7 @@ import Codd.Representations ( DbRep , logSchemasComparison , readRepresentationsFromDbWithSettings ) -import Codd.Types ( RetryPolicy(..) - , TxnIsolationLvl(..) - , singleTryPolicy - ) +import Codd.Types ( TxnIsolationLvl(..) ) import Control.Monad ( (>=>) , foldM , forM @@ -205,12 +202,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -474,7 +471,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn then do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements - retryFold + retryFold @MigrationApplicationFailure retryPolicy (\previousBlock RetryIteration { tryNumber } -> if tryNumber == 0 @@ -502,20 +499,60 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn logInfo "COMMITed transaction" pure res else - ApplyMigsResult - <$> runMigs - conn - (allMigs migBlock) - (\fp ts appliedAt duration numAppliedStmts -> - withTransaction txnIsolationLvl conn - $ registerMig - fp - ts - appliedAt - duration - numAppliedStmts + -- Retrying in no-txn migrations is much more complicated due to the presence of explicit user-written + -- `BEGIN..COMMIT` blocks. If the failed statement is outside one of those, we retry that statement. + -- Otherwise, we need to ROLLBACK, and retry from the last BEGIN statement onwards. + retryFold @MigrationApplicationFailure + retryPolicy + (\(previousBlock, _) RetryIteration { lastException } -> + case lastException of + Nothing -> pure (previousBlock, Nothing) + Just MigrationApplicationFailure { noTxnMigRetryInstructions } + -> case noTxnMigRetryInstructions of + Nothing -> + error + "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" + Just (NoTxnMigMustRestartFromLastExplicitBegin beginStmtNumber) + -> do + logDebug + "Re-reading migrations of this block from disk" + freshBlock <- reReadBlock + previousBlock + pure + ( freshBlock + , Just beginStmtNumber + ) + Just NoTxnMigMustRetryFailedStatement + -> pure (previousBlock, Nothing) ) - <*> withTransaction txnIsolationLvl conn (act conn) + (migBlock, Nothing) + $ \(blockFinal, skipToNthStatement) -> do + let + streamToApply = case skipToNthStatement of + -- TODO: We don't group no-txn migrations together so this should be the only migration in the block. + -- Still it'd be nice to have this better typed so we don't have a cross-function invariant to maintain/remember + Just n -> + error + "Needs to drop n countable statements" + $ NE.head (allMigs blockFinal) + Nothing -> error + "Still need to implementa all of this" + ApplyMigsResult + <$> runMigs + conn + (allMigs migBlock) + (\fp ts appliedAt duration numAppliedStmts -> + withTransaction txnIsolationLvl conn + $ registerMig + fp + ts + appliedAt + duration + numAppliedStmts + ) + <*> withTransaction txnIsolationLvl + conn + (act conn) data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -746,12 +783,15 @@ parseMigrationFiles migsCompleted sqlMigrations = do (pure . readFromMemory . listPendingFromMemory) sqlMigrations - -- Group migrations in blocks of consecutive transactions by in-txn/no-txn and custom - -- connection string. + -- Group consecutive in-txn migrations with the same connection string together since they're atomic. in blocks of consecutive transactions by in-txn/no-txn and custom + -- Every other case should be migrations alone in their own groups. pure - $ NE.groupWith - (\(_, AddedSqlMigration m _) -> - (migrationInTxn m, migrationCustomConnInfo m) + $ NE.groupBy + (\(_, AddedSqlMigration m1 _) (_, AddedSqlMigration m2 _) -> + migrationInTxn m1 + && migrationInTxn m2 + && migrationCustomConnInfo m1 + == migrationCustomConnInfo m2 ) pendingParsedMigrations <&> \migs -> BlockOfMigrations { allMigs = snd <$> migs @@ -934,10 +974,8 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes Streaming.length $ multiQueryStatement_ conn $ migrationSql sqlMig case errorOrDone of - Just err -> do - -- TODO: register partially applied no-txn migration + Just sqlStatementEx -> do logInfo " [failed]" - logError "GOING TO THROW EXCEPTION" -- TODO: only for now, throw assuming we're not in an explicit begin..commit block. We'll implement -- that later. noTxnMigRetryInstructions <- if migrationInTxn sqlMig @@ -945,9 +983,17 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes else do logError "TODO: register partially applied no-txn migration. Use INSERT ON CONFLICT since we might have progressed after a retry!" + logError + $ "After applying " + <> Fmt.sformat Fmt.int numStmts + <> " statements from " + <> Text.pack fn + <> ", the " + <> Fmt.sformat Fmt.ords (numStmts + 1) + <> " failed to be applied." pure $ Just NoTxnMigMustRetryFailedStatement throwIO $ MigrationApplicationFailure - { sqlStatementEx = err + { sqlStatementEx , noTxnMigRetryInstructions } Nothing -> pure numStmts diff --git a/src/Codd/Internal/Retry.hs b/src/Codd/Internal/Retry.hs index 102ad16f..a8bbb120 100644 --- a/src/Codd/Internal/Retry.hs +++ b/src/Codd/Internal/Retry.hs @@ -1,6 +1,5 @@ module Codd.Internal.Retry ( RetryIteration(..) - , retry_ , retryFold ) where @@ -15,50 +14,54 @@ import Data.Maybe ( isNothing ) import qualified Data.Text as Text import UnliftIO ( MonadUnliftIO ) import UnliftIO.Concurrent ( threadDelay ) -import UnliftIO.Exception ( catchAny ) +import UnliftIO.Exception ( Exception + , catch + ) -data RetryIteration = RetryIteration - { isLastTry :: Bool - , tryNumber :: Int +data RetryIteration a = RetryIteration + { isLastTry :: Bool + , tryNumber :: Int -- ^ 0-indexed try number. E.g. 0 is the first try, 1 is the first *retry*. + , lastException :: Maybe a + -- ^ If this is a retry, the exception that caused it } -retry_ :: (MonadUnliftIO m, CoddLogger m) => RetryPolicy -> m c -> m c -retry_ rpol f = retryFold rpol accf () (const f) where accf () _ = pure () - -- | Retries an action as many times and with wait intervals according -- to the supplied `RetryPolicy`, but only retries in case of synchronous -- exceptions. Provides fold-like behavior for an accumulator -- for each try, including the first one. retryFold - :: (MonadUnliftIO m, CoddLogger m) + :: forall e m b a + . (MonadUnliftIO m, CoddLogger m, Exception e) => RetryPolicy - -> (b -> RetryIteration -> m b) + -> (b -> RetryIteration e -> m b) -- ^ Accumulating function. This runs even for the first try. -> b -- ^ Initial value of the accumulator. -> (b -> m a) - -- ^ Action to retry. Any synchronous exceptions are caught and logged. + -- ^ Action to retry. Any exceptions of the chosen type are caught and logged as errors. -- Retries don't happen in case no exceptions are thrown. -> m a -retryFold initialPol accf acc0 f = go initialPol acc0 0 +retryFold initialPol accf acc0 f = go initialPol acc0 0 Nothing where - go rpol previousAcc tryNumber = do + go rpol previousAcc tryNumber lastException = do let mNextPol = retryPolicyIterate rpol - thisIter = - RetryIteration { isLastTry = isNothing mNextPol, tryNumber } + thisIter = RetryIteration { isLastTry = isNothing mNextPol + , tryNumber + , lastException + } thisAcc <- accf previousAcc thisIter case mNextPol of Nothing -> f thisAcc - -- UnliftIO's `catchAny` does not catch async exceptions, - -- which is what we want here. - Just (waitIfFail, nextPol) -> catchAny (f thisAcc) $ \e -> do + Just (waitIfFail, nextPol) -> catch (f thisAcc) $ \(ex :: e) -> do let waitTimeMS :: Int = truncate $ (realToFrac waitIfFail :: Float) * 1000 - logError $ "Got SQL Error: " <> Text.pack (show e) + -- It would be more reasonable if this retryFold function didn't print anything, letting + -- its callers do that. Maybe in the future. + logError $ "Got SQL Error: " <> Text.pack (show ex) logWarn $ "Waiting " <> Text.pack (show waitTimeMS) <> "ms before next try" threadDelay (1000 * waitTimeMS) - go nextPol thisAcc (tryNumber + 1) + go nextPol thisAcc (tryNumber + 1) (Just ex) From b2a3b72ceb6692f03a81c534c6da75635a642ae0 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sun, 10 Mar 2024 11:09:54 -0300 Subject: [PATCH 06/28] Properly compute the statement number of the last `BEGIN` statement --- src/Codd/Internal.hs | 80 ++++++++++++++++++++---- src/Codd/Internal/MultiQueryStatement.hs | 1 + 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 1e57d26d..8d003001 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -7,6 +7,7 @@ import Codd.Environment ( CoddSettings(..) ) import Codd.Internal.MultiQueryStatement ( SqlStatementException , multiQueryStatement_ + , txnStatus ) import Codd.Internal.Retry ( RetryIteration(..) , retryFold @@ -48,6 +49,7 @@ import Control.Monad ( (>=>) , forM , forM_ , unless + , void , when ) import Control.Monad.IO.Class ( MonadIO(..) ) @@ -69,9 +71,11 @@ import Data.Time ( DiffTime , diffTimeToPicoseconds , picosecondsToDiffTime ) +import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB import qualified Database.PostgreSQL.Simple.Time as DB +import Debug.Trace import qualified Formatting as Fmt import Streaming ( Of(..) ) import qualified Streaming.Prelude as Streaming @@ -202,12 +206,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -514,6 +518,11 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" Just (NoTxnMigMustRestartFromLastExplicitBegin beginStmtNumber) -> do + void $ liftIO $ DB.execute_ + conn + "ROLLBACK" + logWarn + "ROLLBACKed last explicitly started transaction before retrying" logDebug "Re-reading migrations of this block from disk" freshBlock <- reReadBlock @@ -967,17 +976,54 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes = do let fn = migrationName sqlMig logInfoNoNewline $ "Applying " <> Text.pack fn + initialTxnStatus <- txnStatus conn -- TODO: For no-txn migrations this should be "not in a transaction". Should we assert that? + logInfo $ "BEFORE FIRST: " <> Text.pack (show initialTxnStatus) (appliedMigrationNumStatements, appliedMigrationDuration) <- timeAction $ do - (numStmts :> errorOrDone) <- - Streaming.length $ multiQueryStatement_ conn $ migrationSql - sqlMig + ((numStmts, mLastBegin, _) :> errorOrDone) <- + Streaming.fold + (\(!l, !lastBegin, !lastTxnStatus) txnStatusNow -> + -- TODO: Properly compute lastBegin over fold, with it being Nothing + -- if outside a BEGIN..COMMIT block and a number otherwise + traceShowId + $ (l + 1, , txnStatusNow) + $ case (lastTxnStatus, txnStatusNow) of + (PQ.TransInTrans, PQ.TransInTrans) -> + lastBegin + (PQ.TransIdle, PQ.TransIdle) -> Nothing + (PQ.TransIdle, PQ.TransInTrans) -> + Just (l + 1) + (PQ.TransInTrans, PQ.TransIdle) -> + Nothing + (PQ.TransActive, _) -> + error + "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" + (_, PQ.TransActive) -> + error + "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" + (PQ.TransInError, _) -> + error + "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + (_, PQ.TransInError) -> + error + "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + (PQ.TransUnknown, _) -> + error + "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + (_, PQ.TransUnknown) -> + error + "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + ) + (0, Nothing, initialTxnStatus) + id + $ multiQueryStatement_ conn + $ migrationSql sqlMig + afterTxnStatus <- txnStatus conn + logInfo $ "AFTER LAST: " <> Text.pack (show afterTxnStatus) case errorOrDone of Just sqlStatementEx -> do logInfo " [failed]" - -- TODO: only for now, throw assuming we're not in an explicit begin..commit block. We'll implement - -- that later. noTxnMigRetryInstructions <- if migrationInTxn sqlMig then pure Nothing else do @@ -986,12 +1032,20 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes logError $ "After applying " <> Fmt.sformat Fmt.int numStmts - <> " statements from " + <> " statements from no-txn migration " <> Text.pack fn <> ", the " <> Fmt.sformat Fmt.ords (numStmts + 1) <> " failed to be applied." - pure $ Just NoTxnMigMustRetryFailedStatement + case mLastBegin of + Nothing -> + pure $ Just + NoTxnMigMustRetryFailedStatement + Just lastBeginNum -> do + pure + $ Just + $ NoTxnMigMustRestartFromLastExplicitBegin + lastBeginNum throwIO $ MigrationApplicationFailure { sqlStatementEx , noTxnMigRetryInstructions diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 2dcbc73b..da1352dc 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -2,6 +2,7 @@ module Codd.Internal.MultiQueryStatement ( SqlStatementException(..) , multiQueryStatement_ , runSingleStatementInternal_ + , txnStatus ) where import Codd.Logging ( CoddLogger ) From f8472b80c1195c50d33a7029b69e7c448e9aa435 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Mon, 11 Mar 2024 18:01:51 -0300 Subject: [PATCH 07/28] Precise types for blocks of in/no-txn migrations, cleaner code --- src/Codd/Internal.hs | 359 +++++++++++++---------- src/Codd/Internal/MultiQueryStatement.hs | 11 +- test/ParsingSpec.hs | 5 +- 3 files changed, 212 insertions(+), 163 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 8d003001..b30ae789 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -206,12 +206,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -306,11 +306,9 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn map (hoistBlockOfMigrations lift) pendingMigs isSingleInTxnBlock = case pendingMigs of [] -> True - [block] - | blockInTxn block - && fromMaybe defaultConnInfo (blockCustomConnInfo block) + [block@(BlockInTxn _)] -> + fromMaybe defaultConnInfo (blockCustomConnInfo block) == defaultConnInfo - -> True _ -> False -- Note: We could probably compound this Monad with StateT instead of using an MVar, but IIRC that creates issues @@ -375,16 +373,22 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) ApplyMigsResult justAppliedMigs newSingleBlockResult <- - if isSingleInTxnBlock - then runBlock + case (block, isSingleInTxnBlock) of + (BlockInTxn inTxnBlock, True) -> runInTxnBlock (fmap Just . actionAfter hoistedBlocks) conn - block + inTxnBlock + runAfterMig + (BlockInTxn inTxnBlock, False) -> runInTxnBlock + (const $ pure Nothing) + conn + inTxnBlock + runAfterMig + (BlockNoTxn noTxnBlock, _) -> runNoTxnMig + (const $ pure Nothing) + conn + noTxnBlock runAfterMig - else runBlock (const $ pure Nothing) - conn - block - runAfterMig -- Keep in mind that migrations are applied but might not be registered if -- we still haven't run any default-connection-string migrations. @@ -442,10 +446,10 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn appliedMigrationDuration appliedMigrationNumStatements - runMigs - :: (MonadUnliftIO n, CoddLogger n, CanStartTxn n txn) - => DB.Connection - -> NonEmpty (AddedSqlMigration n) + runInTxnBlock + :: (DB.Connection -> txn b) + -> DB.Connection + -> ConsecutiveInTxnMigrations m -> ( FilePath -> DB.UTCTimestamp -> Maybe UTCTime @@ -453,15 +457,41 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> Int -> txn UTCTime ) - -> n [AppliedMigration] - runMigs conn migs runAfterMig = - fmap NE.toList $ forM migs $ applySingleMigration conn - runAfterMig - txnIsolationLvl - runBlock + -> m (ApplyMigsResult b) + runInTxnBlock act conn migBlock registerMig = do + res <- + -- Naturally, we retry entire in-txn block transactions on error, not individual statements + retryFold @MigrationApplicationFailure + retryPolicy + (\previousBlock RetryIteration { tryNumber } -> + if tryNumber == 0 + then pure previousBlock + else do + logDebug + "Re-reading migrations of this block from disk" + reReadBlock previousBlock + ) + migBlock + $ \blockFinal -> do + logInfo "BEGINning transaction" + withTransaction txnIsolationLvl conn $ do + let hoistedMigs :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + ranMigs <- + fmap NE.toList + $ forM hoistedMigs + $ applySingleMigration conn + registerMig + txnIsolationLvl + ApplyMigsResult ranMigs <$> act conn + logInfo "COMMITed transaction" + pure res + + runNoTxnMig :: (DB.Connection -> txn b) -> DB.Connection - -> BlockOfMigrations m + -> SingleNoTxnMigration m -> ( FilePath -> DB.UTCTimestamp -> Maybe UTCTime @@ -470,98 +500,62 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> txn UTCTime ) -> m (ApplyMigsResult b) - runBlock act conn migBlock registerMig = do - if blockInTxn migBlock - then do - res <- - -- Naturally, we retry entire in-txn block transactions on error, not individual statements - retryFold @MigrationApplicationFailure - retryPolicy - (\previousBlock RetryIteration { tryNumber } -> - if tryNumber == 0 - then pure previousBlock - else do + runNoTxnMig act conn mig registerMig = do + -- Retrying in no-txn migrations is much more complicated due to the presence of explicit user-written + -- `BEGIN..COMMIT` blocks. If the failed statement is outside one of those, we retry that statement. + -- Otherwise, we need to ROLLBACK, and retry from the last BEGIN statement onwards. + retryFold @MigrationApplicationFailure + retryPolicy + (\(previousMig, _) RetryIteration { lastException } -> + case lastException of + Nothing -> pure (previousMig, Nothing) + Just MigrationApplicationFailure { noTxnMigRetryInstructions } + -> case noTxnMigRetryInstructions of + Nothing -> + error + "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" + Just (NoTxnMigMustRestartFromLastExplicitBegin beginStmtNumber) + -> do + void $ liftIO $ DB.execute_ + conn + "ROLLBACK" + logWarn + "ROLLBACKed last explicitly started transaction before retrying" logDebug - "Re-reading migrations of this block from disk" - reReadBlock previousBlock - ) - migBlock - $ \blockFinal -> do - logInfo "BEGINning transaction" - withTransaction txnIsolationLvl conn - $ ApplyMigsResult - <$> runMigs - conn - (allMigs - (hoistBlockOfMigrations - lift - blockFinal - ) - ) - registerMig - <*> act conn - logInfo "COMMITed transaction" - pure res - else - -- Retrying in no-txn migrations is much more complicated due to the presence of explicit user-written - -- `BEGIN..COMMIT` blocks. If the failed statement is outside one of those, we retry that statement. - -- Otherwise, we need to ROLLBACK, and retry from the last BEGIN statement onwards. - retryFold @MigrationApplicationFailure - retryPolicy - (\(previousBlock, _) RetryIteration { lastException } -> - case lastException of - Nothing -> pure (previousBlock, Nothing) - Just MigrationApplicationFailure { noTxnMigRetryInstructions } - -> case noTxnMigRetryInstructions of - Nothing -> - error - "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" - Just (NoTxnMigMustRestartFromLastExplicitBegin beginStmtNumber) - -> do - void $ liftIO $ DB.execute_ - conn - "ROLLBACK" - logWarn - "ROLLBACKed last explicitly started transaction before retrying" - logDebug - "Re-reading migrations of this block from disk" - freshBlock <- reReadBlock - previousBlock - pure - ( freshBlock - , Just beginStmtNumber - ) - Just NoTxnMigMustRetryFailedStatement - -> pure (previousBlock, Nothing) - ) - (migBlock, Nothing) - $ \(blockFinal, skipToNthStatement) -> do - let - streamToApply = case skipToNthStatement of - -- TODO: We don't group no-txn migrations together so this should be the only migration in the block. - -- Still it'd be nice to have this better typed so we don't have a cross-function invariant to maintain/remember - Just n -> - error - "Needs to drop n countable statements" - $ NE.head (allMigs blockFinal) - Nothing -> error - "Still need to implementa all of this" - ApplyMigsResult - <$> runMigs - conn - (allMigs migBlock) - (\fp ts appliedAt duration numAppliedStmts -> - withTransaction txnIsolationLvl conn - $ registerMig - fp - ts - appliedAt - duration - numAppliedStmts - ) - <*> withTransaction txnIsolationLvl - conn - (act conn) + "Re-reading failed no-txn migration from disk" + -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! + logWarn + $ "TODO (NOT IMPLEMENTED): Will skip the first " + <> Fmt.sformat + Fmt.int + (beginStmtNumber - 1) + <> " SQL statements, which have already been applied, and start applying from the " + <> Fmt.sformat Fmt.ords + beginStmtNumber + <> " statement" + freshBlock <- reReadMig previousMig + pure (freshBlock, Just beginStmtNumber) + Just NoTxnMigMustRetryFailedStatement -> + pure (previousMig, Nothing) + ) + (mig, Nothing) + $ \(migFinal, _skipToNthStatement) -> do + ApplyMigsResult + . (: []) + <$> applySingleMigration @m @txn + conn + (\fp ts appliedAt duration numAppliedStmts -> + withTransaction txnIsolationLvl conn + $ registerMig + fp + ts + appliedAt + duration + numAppliedStmts + ) + txnIsolationLvl + (singleNoTxnMig migFinal) + <*> withTransaction txnIsolationLvl conn (act conn) data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -671,7 +665,7 @@ collectPendingMigrations defaultConnString sqlMigrations txnIsolationLvl connect -- The specific error below isn't necessary at this stage, but it's much more informative -- than the errors we'd generate further ahead. let bootstrapMigBlocks = - takeWhile isDifferentDbMigBlock pendingMigs + takeWhile hasNonDefaultConnectionString pendingMigs when (null bootstrapMigBlocks) $ do logError "The earliest existing migration has no custom connection string or there are no migrations at all. Exiting." @@ -680,11 +674,14 @@ collectPendingMigrations defaultConnString sqlMigrations txnIsolationLvl connect pure $ PendingMigrations pendingMigs bootCheck where - isDifferentDbMigBlock (BlockOfMigrations (m1 :| _) _) = - case migrationCustomConnInfo $ addedSqlMig m1 of - Nothing -> False - Just connInfo -> DB.connectDatabase defaultConnString - /= DB.connectDatabase connInfo + hasNonDefaultConnectionString block = + let mConnInfo = migrationCustomConnInfo $ addedSqlMig $ case block of + BlockInTxn (ConsecutiveInTxnMigrations (m1 :| _) _) -> m1 + BlockNoTxn (SingleNoTxnMigration m _) -> m + in case mConnInfo of + Nothing -> False + Just connInfo -> DB.connectDatabase defaultConnString + /= DB.connectDatabase connInfo collect bootCheck = do logInfoNoNewline "Looking for pending migrations..." migsAlreadyApplied :: [FilePath] <- @@ -792,8 +789,7 @@ parseMigrationFiles migsCompleted sqlMigrations = do (pure . readFromMemory . listPendingFromMemory) sqlMigrations - -- Group consecutive in-txn migrations with the same connection string together since they're atomic. in blocks of consecutive transactions by in-txn/no-txn and custom - -- Every other case should be migrations alone in their own groups. + -- Group consecutive in-txn migrations with the same connection string together for atomic application pure $ NE.groupBy (\(_, AddedSqlMigration m1 _) (_, AddedSqlMigration m2 _) -> @@ -803,9 +799,17 @@ parseMigrationFiles migsCompleted sqlMigrations = do == migrationCustomConnInfo m2 ) pendingParsedMigrations - <&> \migs -> BlockOfMigrations { allMigs = snd <$> migs - , reReadBlock = reRead migs - } + <&> \migs -> + let firstMig = snd $ NE.head migs + in if migrationInTxn (addedSqlMig firstMig) + then BlockInTxn ConsecutiveInTxnMigrations + { inTxnMigs = snd <$> migs + , reReadBlock = reRead migs + } + else BlockNoTxn SingleNoTxnMigration + { singleNoTxnMig = firstMig + , reReadMig = reReadNoTxn migs + } where reRead oldMigsAndPaths = do @@ -816,9 +820,21 @@ parseMigrationFiles migsCompleted sqlMigrations = do (Right fileStream, _) -> closeFileStream fileStream >> pure (filePath fileStream) newMigs <- readFromDisk filePaths - pure BlockOfMigrations { allMigs = snd <$> newMigs - , reReadBlock = reRead newMigs - } + pure ConsecutiveInTxnMigrations { inTxnMigs = snd <$> newMigs + , reReadBlock = reRead newMigs + } + -- | TODO: This is a near duplicate of `reRead`. Improve this. + reReadNoTxn oldMigsAndPaths = do + -- Close handles of all migrations in the block, re-open and read+parse them + filePaths <- forM oldMigsAndPaths $ \case + (Left _memStream, _) -> + error "Re-reading in-memory streams is not yet implemented" + (Right fileStream, _) -> + closeFileStream fileStream >> pure (filePath fileStream) + newMigs <- readFromDisk filePaths + pure SingleNoTxnMigration { singleNoTxnMig = snd $ NE.head newMigs + , reReadMig = reReadNoTxn newMigs + } readFromMemory :: [AddedSqlMigration m] -> [(Either String (FileStream m), AddedSqlMigration m)] @@ -919,10 +935,18 @@ laxCheckLastAction coddSettings expectedReps _blocksOfMigs conn = do -- | A collection of consecutive migrations that has the same (in-txn, db-connection) -- attributes. -data BlockOfMigrations m = BlockOfMigrations - { allMigs :: NonEmpty (AddedSqlMigration m) - , reReadBlock :: m (BlockOfMigrations m) +data BlockOfMigrations m = BlockInTxn (ConsecutiveInTxnMigrations m) | BlockNoTxn (SingleNoTxnMigration m) +data SingleNoTxnMigration m = SingleNoTxnMigration + { singleNoTxnMig :: AddedSqlMigration m + , reReadMig :: m (SingleNoTxnMigration m) + } +data ConsecutiveInTxnMigrations m = ConsecutiveInTxnMigrations + { inTxnMigs :: NonEmpty (AddedSqlMigration m) + , reReadBlock :: m (ConsecutiveInTxnMigrations m) } +allMigs :: BlockOfMigrations m -> NonEmpty (AddedSqlMigration m) +allMigs (BlockInTxn b) = inTxnMigs b +allMigs (BlockNoTxn b) = singleNoTxnMig b :| [] hoistBlockOfMigrations :: forall m n @@ -930,20 +954,31 @@ hoistBlockOfMigrations => (forall x . m x -> n x) -> BlockOfMigrations m -> BlockOfMigrations n -hoistBlockOfMigrations hoist (BlockOfMigrations {..}) = - let hoistedAllMigs = hoistAddedSqlMigration hoist <$> allMigs - hoistedReReadBlock = - hoist $ reReadBlock <&> hoistBlockOfMigrations hoist - in BlockOfMigrations { allMigs = hoistedAllMigs - , reReadBlock = hoistedReReadBlock - } +hoistBlockOfMigrations hoist = \case + BlockInTxn b -> BlockInTxn $ hoistInTxnBlock b + BlockNoTxn b -> BlockNoTxn $ hoistNoTxnBlock b + where + hoistInTxnBlock ConsecutiveInTxnMigrations {..} = + let hoistedAllMigs = hoistAddedSqlMigration hoist <$> inTxnMigs + hoistedReReadBlock = hoist $ reReadBlock <&> hoistInTxnBlock + in ConsecutiveInTxnMigrations { inTxnMigs = hoistedAllMigs + , reReadBlock = hoistedReReadBlock + } + hoistNoTxnBlock SingleNoTxnMigration {..} = + let hoistedAllMigs = hoistAddedSqlMigration hoist singleNoTxnMig + hoistedReReadBlock = hoist $ reReadMig <&> hoistNoTxnBlock + in SingleNoTxnMigration { singleNoTxnMig = hoistedAllMigs + , reReadMig = hoistedReReadBlock + } blockInTxn :: BlockOfMigrations m -> Bool -blockInTxn (BlockOfMigrations (AddedSqlMigration { addedSqlMig } :| _) _) = - migrationInTxn addedSqlMig +blockInTxn (BlockInTxn _) = True +blockInTxn (BlockNoTxn _) = False blockCustomConnInfo :: BlockOfMigrations m -> Maybe DB.ConnectInfo -blockCustomConnInfo (BlockOfMigrations (AddedSqlMigration { addedSqlMig } :| _) _) +blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { addedSqlMig } :| _) _)) + = migrationCustomConnInfo addedSqlMig +blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _)) = migrationCustomConnInfo addedSqlMig data NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartFromLastExplicitBegin Int -- ^ No-txn migrations can have explicit BEGIN and COMMIT statements. If a statement inside that BEGIN..COMMIT block fails, it's useless to retry it. In that case, we must retry from the last BEGIN statement, whose statement-number inside the migration is contained here. @@ -996,24 +1031,30 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes Just (l + 1) (PQ.TransInTrans, PQ.TransIdle) -> Nothing - (PQ.TransActive, _) -> + states@(PQ.TransActive, _) -> error - "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" - (_, PQ.TransActive) -> + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream: " + ++ show states + states@(_, PQ.TransActive) -> error - "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" - (PQ.TransInError, _) -> + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" + ++ show states + states@(PQ.TransInError, _) -> error - "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" - (_, PQ.TransInError) -> + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + ++ show states + states@(_, PQ.TransInError) -> error - "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" - (PQ.TransUnknown, _) -> + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + ++ show states + states@(PQ.TransUnknown, _) -> error - "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." - (_, PQ.TransUnknown) -> + $ "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + ++ show states + states@(_, PQ.TransUnknown) -> error - "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + $ "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + ++ show states ) (0, Nothing, initialTxnStatus) id @@ -1028,7 +1069,7 @@ applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimes then pure Nothing else do logError - "TODO: register partially applied no-txn migration. Use INSERT ON CONFLICT since we might have progressed after a retry!" + "TODO: register partially applied no-txn migration. Use INSERT ON CONFLICT since we might have progressed after a retry! However, we can't do that here as this might be a no-txn migration with a non-default connection string. Ouch!" logError $ "After applying " <> Fmt.sformat Fmt.int numStmts diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index da1352dc..5a89b089 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -111,10 +111,17 @@ runSingleStatementInternal_ conn (RollbackTransaction s) = runSingleStatementInternal_ conn (OtherSqlPiece s) = singleStatement_ conn s runSingleStatementInternal_ conn (CopyFromStdinStatement copyStm) = do liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) - StatementApplied <$> txnStatus conn + -- Unlike every other SqlPiece, COPY does not fit into a single constructor. + -- For counting it doesn't matter if we count `COPY FROM` or the ending of `COPY`. + -- For skipping it doesn't matter either which one we count, as we'll skip N countable + -- statements when necessary and start from N+1, whatever that is. + -- Since the txnStatus here is TransActive (query ongoing), it is simpler + -- if we count the ending of `COPY`, as after that the status is TransIdle, so + -- callers have one fewer state to deal with. + pure NotACountableStatement runSingleStatementInternal_ conn (CopyFromStdinRows copyRows) = do liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows pure NotACountableStatement runSingleStatementInternal_ conn (CopyFromStdinEnd _) = do liftIO $ void $ DB.putCopyEnd conn - pure NotACountableStatement + StatementApplied <$> txnStatus conn diff --git a/test/ParsingSpec.hs b/test/ParsingSpec.hs index 3f916c42..24cc1245 100644 --- a/test/ParsingSpec.hs +++ b/test/ParsingSpec.hs @@ -2,6 +2,7 @@ module ParsingSpec where import Codd.Internal ( BlockOfMigrations(..) + , ConsecutiveInTxnMigrations(..) , parseMigrationFiles ) import Codd.Logging ( runCoddLogger ) @@ -528,7 +529,7 @@ spec = do $ runResourceT @IO $ runCoddLogger $ do - [BlockOfMigrations { allMigs = asqlmig :| [] }] <- + [BlockInTxn ConsecutiveInTxnMigrations { inTxnMigs = asqlmig :| [] }] <- parseMigrationFiles [] $ Left ["test/migrations/normal-parse-test/"] rawFileContents <- @@ -556,7 +557,7 @@ spec = do $ runResourceT @IO $ runCoddLogger $ do - [BlockOfMigrations { allMigs = asqlmig :| [] }] <- + [BlockInTxn ConsecutiveInTxnMigrations { inTxnMigs = asqlmig :| [] }] <- parseMigrationFiles [] $ Left ["test/migrations/no-parse-test/"] rawFileContents <- From 40fdf554f790ec66f1d0378c0c115ed29938553c Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sun, 17 Mar 2024 10:31:36 -0300 Subject: [PATCH 08/28] Correctly skip runnable-countable statements when retrying no-txn migs Also fine-tune error messages, and some other messages, too --- src/Codd/Internal.hs | 343 +++++++++++------- src/Codd/Internal/MultiQueryStatement.hs | 96 +++-- src/Codd/Internal/Retry.hs | 32 +- test/DbDependentSpecs/ApplicationSpec.hs | 121 +----- test/DbDependentSpecs/RetrySpec.hs | 105 ++++++ .../SchemaVerificationSpec.hs | 12 +- 6 files changed, 400 insertions(+), 309 deletions(-) create mode 100644 test/DbDependentSpecs/RetrySpec.hs diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index b30ae789..c2d4e5bf 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -6,14 +6,16 @@ import Prelude hiding ( readFile ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal.MultiQueryStatement ( SqlStatementException + , StatementApplied(..) , multiQueryStatement_ + , singleStatement_ + , skipNonCountableRunnableStatements , txnStatus ) import Codd.Internal.Retry ( RetryIteration(..) , retryFold ) import Codd.Logging ( CoddLogger - , logDebug , logError , logInfo , logInfoNoNewline @@ -75,7 +77,6 @@ import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB import qualified Database.PostgreSQL.Simple.Time as DB -import Debug.Trace import qualified Formatting as Fmt import Streaming ( Of(..) ) import qualified Streaming.Prelude as Streaming @@ -91,6 +92,7 @@ import UnliftIO ( Exception , MonadUnliftIO , hClose , newIORef + , onException , readIORef , timeout , writeIORef @@ -206,12 +208,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -304,12 +306,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn let dbName = Text.pack $ DB.connectDatabase defaultConnInfo hoistedBlocks :: [BlockOfMigrations txn] = map (hoistBlockOfMigrations lift) pendingMigs - isSingleInTxnBlock = case pendingMigs of - [] -> True - [block@(BlockInTxn _)] -> - fromMaybe defaultConnInfo (blockCustomConnInfo block) - == defaultConnInfo - _ -> False -- Note: We could probably compound this Monad with StateT instead of using an MVar, but IIRC that creates issues -- with MonadUnliftIO. @@ -353,7 +349,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn if coddSchemaVersion bootCheck /= maxBound then do logInfo - "Creating or updating codd_schema..." + "Creating or updating codd_schema..." createCoddSchema @txn maxBound txnIsolationLvl @@ -373,7 +369,11 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) ApplyMigsResult justAppliedMigs newSingleBlockResult <- - case (block, isSingleInTxnBlock) of + case + ( block + , isOneShotApplication defaultConnInfo pendingMigs + ) + of (BlockInTxn inTxnBlock, True) -> runInTxnBlock (fmap Just . actionAfter hoistedBlocks) conn @@ -460,31 +460,47 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> m (ApplyMigsResult b) runInTxnBlock act conn migBlock registerMig = do res <- - -- Naturally, we retry entire in-txn block transactions on error, not individual statements + -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations retryFold @MigrationApplicationFailure retryPolicy (\previousBlock RetryIteration { tryNumber } -> if tryNumber == 0 then pure previousBlock else do - logDebug - "Re-reading migrations of this block from disk" + -- logDebug + -- "Re-reading migrations of this block from disk" reReadBlock previousBlock ) migBlock + (\case + Left lastEx -> do + logError + "Failed after all configured retries. Giving up." + throwIO lastEx + Right ret -> pure ret + ) $ \blockFinal -> do logInfo "BEGINning transaction" - withTransaction txnIsolationLvl conn $ do - let hoistedMigs :: NonEmpty (AddedSqlMigration txn) - hoistedMigs = hoistAddedSqlMigration lift - <$> inTxnMigs blockFinal - ranMigs <- - fmap NE.toList - $ forM hoistedMigs - $ applySingleMigration conn - registerMig - txnIsolationLvl - ApplyMigsResult ranMigs <$> act conn + flip + onException + (logInfo + "ROLLBACKed transaction" + ) + $ withTransaction txnIsolationLvl conn + $ do + let hoistedMigs + :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = + hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + ranMigs <- + fmap NE.toList + $ forM hoistedMigs + $ applySingleMigration conn + registerMig + txnIsolationLvl + 0 + ApplyMigsResult ranMigs <$> act conn logInfo "COMMITed transaction" pure res @@ -501,45 +517,43 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) -> m (ApplyMigsResult b) runNoTxnMig act conn mig registerMig = do - -- Retrying in no-txn migrations is much more complicated due to the presence of explicit user-written - -- `BEGIN..COMMIT` blocks. If the failed statement is outside one of those, we retry that statement. - -- Otherwise, we need to ROLLBACK, and retry from the last BEGIN statement onwards. retryFold @MigrationApplicationFailure retryPolicy (\(previousMig, _) RetryIteration { lastException } -> case lastException of - Nothing -> pure (previousMig, Nothing) + Nothing -> pure (previousMig, 0) Just MigrationApplicationFailure { noTxnMigRetryInstructions } -> case noTxnMigRetryInstructions of Nothing -> error "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" - Just (NoTxnMigMustRestartFromLastExplicitBegin beginStmtNumber) + Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip) -> do - void $ liftIO $ DB.execute_ - conn - "ROLLBACK" - logWarn - "ROLLBACKed last explicitly started transaction before retrying" - logDebug - "Re-reading failed no-txn migration from disk" - -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! + -- logDebug + -- "Re-reading failed no-txn migration from disk" logWarn - $ "TODO (NOT IMPLEMENTED): Will skip the first " - <> Fmt.sformat - Fmt.int - (beginStmtNumber - 1) + $ "Skipping the first " + <> Fmt.sformat Fmt.int + numStmtsToSkip <> " SQL statements, which have already been applied, and start applying from the " - <> Fmt.sformat Fmt.ords - beginStmtNumber + <> Fmt.sformat + Fmt.ords + (numStmtsToSkip + 1) <> " statement" + -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! + -- Re-reading from disk only to skip statements is unnecessary work when the failed statement was not in an explicit `BEGIN ... COMMIT` section, but having more than one code path to handle such extremely rare errors is not worth it freshBlock <- reReadMig previousMig - pure (freshBlock, Just beginStmtNumber) - Just NoTxnMigMustRetryFailedStatement -> - pure (previousMig, Nothing) + pure (freshBlock, numStmtsToSkip) ) - (mig, Nothing) - $ \(migFinal, _skipToNthStatement) -> do + (mig, 0) + (\case + Left lastEx -> do + logError + "Failed after all configured retries. Giving up." + throwIO lastEx + Right ret -> pure ret + ) + $ \(migFinal, numStmtsToSkip) -> do ApplyMigsResult . (: []) <$> applySingleMigration @m @txn @@ -554,6 +568,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn numAppliedStmts ) txnIsolationLvl + numStmtsToSkip (singleNoTxnMig migFinal) <*> withTransaction txnIsolationLvl conn (act conn) @@ -712,7 +727,7 @@ collectPendingMigrations defaultConnString sqlMigrations txnIsolationLvl connect UnparsedSql _ -> logWarn $ Text.pack (migrationName $ addedSqlMig mig) - <> " is not to be parsed and thus will be considered in is entirety as in-txn, without support for COPY." + <> " is not to be parsed and thus will be treated in its entirety as an in-txn migration with a single statement, without support for COPY." _ -> pure () pure blocksOfPendingMigs @@ -832,7 +847,7 @@ parseMigrationFiles migsCompleted sqlMigrations = do (Right fileStream, _) -> closeFileStream fileStream >> pure (filePath fileStream) newMigs <- readFromDisk filePaths - pure SingleNoTxnMigration { singleNoTxnMig = snd $ NE.head newMigs + pure SingleNoTxnMigration { singleNoTxnMig = snd (NE.head newMigs) , reReadMig = reReadNoTxn newMigs } readFromMemory @@ -913,9 +928,10 @@ strictCheckLastAction -> ([BlockOfMigrations m] -> DB.Connection -> m ()) strictCheckLastAction coddSettings expectedReps blocksOfMigs conn = do cksums <- readRepresentationsFromDbWithSettings coddSettings conn - unless (all blockInTxn blocksOfMigs) $ do - logWarn - "IMPORTANT: Due to the presence of no-txn or custom-connection migrations, all migrations have been applied. We'll run a schema check." + unless (isOneShotApplication (migsConnString coddSettings) blocksOfMigs) + $ do + logWarn + "Because it wasn't possible to apply all pending migrations in a single transaction, all migrations have been applied. We'll run a schema check." logSchemasComparison cksums expectedReps when (cksums /= expectedReps) $ throwIO $ userError "Exiting. Database's schema differ from expected." @@ -933,8 +949,8 @@ laxCheckLastAction coddSettings expectedReps _blocksOfMigs conn = do logSchemasComparison cksums expectedReps pure cksums --- | A collection of consecutive migrations that has the same (in-txn, db-connection) --- attributes. +-- | A collection of consecutive migrations. Consecutive in-txn migrations with the same connection string are grouped together, +-- no-txn migrations appear alone. data BlockOfMigrations m = BlockInTxn (ConsecutiveInTxnMigrations m) | BlockNoTxn (SingleNoTxnMigration m) data SingleNoTxnMigration m = SingleNoTxnMigration { singleNoTxnMig :: AddedSqlMigration m @@ -971,9 +987,14 @@ hoistBlockOfMigrations hoist = \case , reReadMig = hoistedReReadBlock } -blockInTxn :: BlockOfMigrations m -> Bool -blockInTxn (BlockInTxn _) = True -blockInTxn (BlockNoTxn _) = False +-- | Returns True only if all pending migrations are in-txn and of the same connection string, meaning they'll all be applied +-- in a single transaction. +isOneShotApplication :: DB.ConnectInfo -> [BlockOfMigrations m] -> Bool +isOneShotApplication defaultConnInfo pending = case pending of + [] -> True + [block@(BlockInTxn _)] -> + fromMaybe defaultConnInfo (blockCustomConnInfo block) == defaultConnInfo + _ -> False blockCustomConnInfo :: BlockOfMigrations m -> Maybe DB.ConnectInfo blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { addedSqlMig } :| _) _)) @@ -981,8 +1002,7 @@ blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _)) = migrationCustomConnInfo addedSqlMig -data NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartFromLastExplicitBegin Int -- ^ No-txn migrations can have explicit BEGIN and COMMIT statements. If a statement inside that BEGIN..COMMIT block fails, it's useless to retry it. In that case, we must retry from the last BEGIN statement, whose statement-number inside the migration is contained here. - | NoTxnMigMustRetryFailedStatement -- ^ When we're not inside an explicit BEGIN..COMMIT block in a no-txn migration, we must retry the statement that failed itself +newtype NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartAfterSkipping Int deriving stock Show data MigrationApplicationFailure = MigrationApplicationFailure { sqlStatementEx :: SqlStatementException @@ -1005,93 +1025,136 @@ applySingleMigration -> txn UTCTime ) -> TxnIsolationLvl + -> Int + -- ^ Number of countable-runnable statements to skip completely. Useful when retrying no-txn migrations from exactly the statements they last failed in. -> AddedSqlMigration m -> m AppliedMigration -applySingleMigration conn afterMigRun isolLvl (AddedSqlMigration sqlMig migTimestamp) +applySingleMigration conn afterMigRun isolLvl numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) = do let fn = migrationName sqlMig logInfoNoNewline $ "Applying " <> Text.pack fn - initialTxnStatus <- txnStatus conn -- TODO: For no-txn migrations this should be "not in a transaction". Should we assert that? - logInfo $ "BEFORE FIRST: " <> Text.pack (show initialTxnStatus) - (appliedMigrationNumStatements, appliedMigrationDuration) <- + ((appliedMigrationNumStatements, errorOrDone, mLastBegin), appliedMigrationDuration) <- timeAction $ do - ((numStmts, mLastBegin, _) :> errorOrDone) <- - Streaming.fold - (\(!l, !lastBegin, !lastTxnStatus) txnStatusNow -> - -- TODO: Properly compute lastBegin over fold, with it being Nothing - -- if outside a BEGIN..COMMIT block and a number otherwise - traceShowId - $ (l + 1, , txnStatusNow) - $ case (lastTxnStatus, txnStatusNow) of - (PQ.TransInTrans, PQ.TransInTrans) -> - lastBegin - (PQ.TransIdle, PQ.TransIdle) -> Nothing - (PQ.TransIdle, PQ.TransInTrans) -> - Just (l + 1) - (PQ.TransInTrans, PQ.TransIdle) -> - Nothing - states@(PQ.TransActive, _) -> - error - $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream: " - ++ show states - states@(_, PQ.TransActive) -> - error - $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" - ++ show states - states@(PQ.TransInError, _) -> - error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" - ++ show states - states@(_, PQ.TransInError) -> - error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" - ++ show states - states@(PQ.TransUnknown, _) -> - error - $ "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." - ++ show states - states@(_, PQ.TransUnknown) -> - error - $ "Connection to database went sour. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." - ++ show states + case + ( migrationSql sqlMig + , numCountableRunnableStmtsToSkip > 0 ) - (0, Nothing, initialTxnStatus) - id - $ multiQueryStatement_ conn - $ migrationSql sqlMig - afterTxnStatus <- txnStatus conn - logInfo $ "AFTER LAST: " <> Text.pack (show afterTxnStatus) - case errorOrDone of - Just sqlStatementEx -> do - logInfo " [failed]" - noTxnMigRetryInstructions <- if migrationInTxn sqlMig - then pure Nothing - else do + of + (UnparsedSql _, True) -> + error + "Internal error in codd. A migration with no-parse cannot have failed in a statement that is not its 0th statement. Please report this as a bug." + (UnparsedSql unparsedSql, False) -> do + applyResult <- singleStatement_ conn unparsedSql + case applyResult of + NotACountableStatement -> + error + "Internal error in codd. no-parse migration cannot be NotACountableStatement. Please report this as a bug" + StatementErred err -> + pure (0, Just err, Nothing) + StatementApplied _ -> + pure (1, Nothing, Nothing) + (WellParsedSql sqlStream, _) -> do + initialTxnStatus <- txnStatus conn -- TODO: For no-txn migrations this should be "not in a transaction". Should we assert that? + ((numStmts, mLastBegin, _) :> errorOrDone) <- + Streaming.fold + (\(!l, !lastBegin, !lastTxnStatus) txnStatusNow -> + (l + 1, , txnStatusNow) + $ case + (lastTxnStatus, txnStatusNow) + of + (PQ.TransInTrans, PQ.TransInTrans) + -> lastBegin + (PQ.TransIdle, PQ.TransIdle) + -> Nothing + (PQ.TransIdle, PQ.TransInTrans) + -> Just (l + 1) + (PQ.TransInTrans, PQ.TransIdle) + -> Nothing + states@(PQ.TransActive, _) -> + error + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream: " + ++ show states + states@(_, PQ.TransActive) -> + error + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" + ++ show states + states@(PQ.TransInError, _) + -> error + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + ++ show states + states@(_, PQ.TransInError) + -> error + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + ++ show states + states@(PQ.TransUnknown, _) + -> error + $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + ++ show states + states@(_, PQ.TransUnknown) + -> error + $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + ++ show states + ) + ( numCountableRunnableStmtsToSkip + , Nothing + , initialTxnStatus + ) + id + $ multiQueryStatement_ conn + $ skipNonCountableRunnableStatements + numCountableRunnableStmtsToSkip + sqlStream + pure (numStmts, errorOrDone, mLastBegin) + + case errorOrDone of + Just sqlStatementEx -> do + logInfo " [failed]" + logError $ Text.pack $ show sqlStatementEx + noTxnMigRetryInstructions <- if migrationInTxn sqlMig + then pure Nothing + else do + case mLastBegin of + Nothing -> do logError - "TODO: register partially applied no-txn migration. Use INSERT ON CONFLICT since we might have progressed after a retry! However, we can't do that here as this might be a no-txn migration with a non-default connection string. Ouch!" + $ "After applying " + <> Fmt.sformat + Fmt.int + appliedMigrationNumStatements + <> " statements from no-txn migration " + <> Text.pack fn + <> ", the " + <> Fmt.sformat + Fmt.ords + (appliedMigrationNumStatements + 1) + <> " failed to be applied. Codd will resume the next retry or codd up from it" + pure $ Just $ NoTxnMigMustRestartAfterSkipping + appliedMigrationNumStatements + Just lastBeginNum -> do logError $ "After applying " - <> Fmt.sformat Fmt.int numStmts - <> " statements from no-txn migration " + <> Fmt.sformat + Fmt.int + appliedMigrationNumStatements + <> " statements from no-txn migration " <> Text.pack fn <> ", the " - <> Fmt.sformat Fmt.ords (numStmts + 1) - <> " failed to be applied." - case mLastBegin of - Nothing -> - pure $ Just - NoTxnMigMustRetryFailedStatement - Just lastBeginNum -> do - pure - $ Just - $ NoTxnMigMustRestartFromLastExplicitBegin - lastBeginNum - throwIO $ MigrationApplicationFailure - { sqlStatementEx - , noTxnMigRetryInstructions - } - Nothing -> pure numStmts + <> Fmt.sformat + Fmt.ords + (appliedMigrationNumStatements + 1) + <> " failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the " + <> Fmt.sformat Fmt.ords lastBeginNum + <> " statement in this migration" + void $ liftIO $ DB.execute_ conn "ROLLBACK" + logInfo + "ROLLBACKed last explicitly started transaction" + pure $ Just $ NoTxnMigMustRestartAfterSkipping + (lastBeginNum - 1) + throwIO $ MigrationApplicationFailure + { sqlStatementEx + , noTxnMigRetryInstructions + } + Nothing -> pure () timestamp <- withTransaction isolLvl conn $ afterMigRun fn diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 5a89b089..a4e9792f 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -1,14 +1,15 @@ module Codd.Internal.MultiQueryStatement ( SqlStatementException(..) + , StatementApplied(..) , multiQueryStatement_ , runSingleStatementInternal_ + , singleStatement_ + , skipNonCountableRunnableStatements , txnStatus ) where import Codd.Logging ( CoddLogger ) -import Codd.Parsing ( ParsedSql(..) - , SqlPiece(..) - ) +import Codd.Parsing ( SqlPiece(..) ) import Control.Monad ( void ) import Data.Text ( Text ) import Data.Text.Encoding ( encodeUtf8 ) @@ -73,7 +74,7 @@ multiQueryStatement_ :: forall m . (MonadUnliftIO m, CoddLogger m) => DB.Connection - -> ParsedSql m + -> Stream (Of SqlPiece) m () -> Stream (Of PQ.TransactionStatus) m (Maybe SqlStatementException) multiQueryStatement_ conn sql = partitionEithersReturn id @@ -83,11 +84,7 @@ multiQueryStatement_ conn sql = StatementApplied ts -> Just $ Right ts StatementErred e -> Just $ Left e ) - $ case sql of - -- There must be some simpler function to avoid mapM and yield.. - UnparsedSql t -> S.mapM (singleStatement_ conn) (S.yield t) - WellParsedSql stms -> - Streaming.mapM (runSingleStatementInternal_ conn) stms + $ Streaming.mapM (runSingleStatementInternal_ conn) sql where -- | Like `S.partitionEithers`, but with `Left` being considered an error and put as the stream's return value if one exists while the `Rights` are streamed. partitionEithersReturn @@ -99,29 +96,62 @@ multiQueryStatement_ conn sql = S.Effect m -> S.Effect $ partitionEithersReturn f <$> m S.Return _ -> S.Return Nothing +isCountableRunnable :: SqlPiece -> Bool +isCountableRunnable = \case + OtherSqlPiece _ -> True + CommentPiece _ -> False + WhiteSpacePiece _ -> False + BeginTransaction _ -> True + CommitTransaction _ -> True + RollbackTransaction _ -> True + CopyFromStdinStatement _ -> False + CopyFromStdinRows _ -> False + CopyFromStdinEnd _ -> True + +-- | Skips the first n non countable-runnable statements from the stream. +-- TODO: Test this function in isolation. E.g. one must never fall in a CopyFromStdinRows after skipping any number of statements. +-- But also test basic cases including COMMIT, BEGIN, ROLLBACK, etc. +skipNonCountableRunnableStatements + :: Monad m => Int -> Stream (Of SqlPiece) m r -> Stream (Of SqlPiece) m r +skipNonCountableRunnableStatements numCountableRunnableToSkip = + S.catMaybes + . S.scan + (\(skipped, _) p -> if skipped >= numCountableRunnableToSkip + then (skipped, Just p) + else if isCountableRunnable p + then (skipped + 1, Nothing) + else (skipped, Nothing) + ) + (0, Nothing) + snd + + runSingleStatementInternal_ :: MonadIO m => DB.Connection -> SqlPiece -> m StatementApplied -runSingleStatementInternal_ _ (CommentPiece _) = pure NotACountableStatement -runSingleStatementInternal_ _ (WhiteSpacePiece _) = pure NotACountableStatement -runSingleStatementInternal_ conn (BeginTransaction s) = singleStatement_ conn s -runSingleStatementInternal_ conn (CommitTransaction s) = - singleStatement_ conn s -runSingleStatementInternal_ conn (RollbackTransaction s) = - singleStatement_ conn s -runSingleStatementInternal_ conn (OtherSqlPiece s) = singleStatement_ conn s -runSingleStatementInternal_ conn (CopyFromStdinStatement copyStm) = do - liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) - -- Unlike every other SqlPiece, COPY does not fit into a single constructor. - -- For counting it doesn't matter if we count `COPY FROM` or the ending of `COPY`. - -- For skipping it doesn't matter either which one we count, as we'll skip N countable - -- statements when necessary and start from N+1, whatever that is. - -- Since the txnStatus here is TransActive (query ongoing), it is simpler - -- if we count the ending of `COPY`, as after that the status is TransIdle, so - -- callers have one fewer state to deal with. - pure NotACountableStatement -runSingleStatementInternal_ conn (CopyFromStdinRows copyRows) = do - liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows - pure NotACountableStatement -runSingleStatementInternal_ conn (CopyFromStdinEnd _) = do - liftIO $ void $ DB.putCopyEnd conn - StatementApplied <$> txnStatus conn +runSingleStatementInternal_ conn p = case p of + CommentPiece _ -> applied + WhiteSpacePiece _ -> applied + BeginTransaction s -> singleStatement_ conn s + CommitTransaction s -> singleStatement_ conn s + RollbackTransaction s -> singleStatement_ conn s + OtherSqlPiece s -> singleStatement_ conn s + CopyFromStdinStatement copyStm -> do + liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) + -- Unlike every other SqlPiece, COPY does not fit into a single constructor. + -- For counting it doesn't matter if we count `COPY FROM` or the ending of `COPY`. + -- For skipping it doesn't matter either which one we count, as we'll skip N countable + -- statements when necessary and start from N+1, whatever that is. + -- Since the txnStatus here is TransActive (query ongoing), it is simpler + -- if we count the ending of `COPY`, as after that the status is TransIdle, so + -- callers have one fewer state to deal with. + applied + CopyFromStdinRows copyRows -> do + liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows + applied + CopyFromStdinEnd _ -> do + liftIO $ void $ DB.putCopyEnd conn + applied + where + applied = if isCountableRunnable p + then StatementApplied <$> txnStatus conn + else pure NotACountableStatement diff --git a/src/Codd/Internal/Retry.hs b/src/Codd/Internal/Retry.hs index a8bbb120..d309eaa0 100644 --- a/src/Codd/Internal/Retry.hs +++ b/src/Codd/Internal/Retry.hs @@ -4,7 +4,6 @@ module Codd.Internal.Retry ) where import Codd.Logging ( CoddLogger - , logError , logWarn ) import Codd.Types ( RetryPolicy(..) @@ -38,11 +37,13 @@ retryFold -- ^ Accumulating function. This runs even for the first try. -> b -- ^ Initial value of the accumulator. + -> (Either e a -> m a) + -- ^ Called after the action succeeds or after all retries fail. -> (b -> m a) -- ^ Action to retry. Any exceptions of the chosen type are caught and logged as errors. -- Retries don't happen in case no exceptions are thrown. -> m a -retryFold initialPol accf acc0 f = go initialPol acc0 0 Nothing +retryFold initialPol accf acc0 final f = go initialPol acc0 0 Nothing where go rpol previousAcc tryNumber lastException = do let mNextPol = retryPolicyIterate rpol @@ -52,16 +53,17 @@ retryFold initialPol accf acc0 f = go initialPol acc0 0 Nothing } thisAcc <- accf previousAcc thisIter case mNextPol of - Nothing -> f thisAcc - Just (waitIfFail, nextPol) -> catch (f thisAcc) $ \(ex :: e) -> do - let waitTimeMS :: Int = - truncate $ (realToFrac waitIfFail :: Float) * 1000 - -- It would be more reasonable if this retryFold function didn't print anything, letting - -- its callers do that. Maybe in the future. - logError $ "Got SQL Error: " <> Text.pack (show ex) - logWarn - $ "Waiting " - <> Text.pack (show waitTimeMS) - <> "ms before next try" - threadDelay (1000 * waitTimeMS) - go nextPol thisAcc (tryNumber + 1) (Just ex) + Nothing -> catch (f thisAcc) (final . Left) + Just (waitIfFail, nextPol) -> do + ret <- catch (f thisAcc) $ \(ex :: e) -> do + let waitTimeMS :: Int = + truncate $ (realToFrac waitIfFail :: Float) * 1000 + -- It would be more reasonable if this retryFold function didn't print anything, letting + -- its callers do that. Maybe in the future. + logWarn + $ "Waiting " + <> Text.pack (show waitTimeMS) + <> "ms before next try" + threadDelay (1000 * waitTimeMS) + go nextPol thisAcc (tryNumber + 1) (Just ex) + final (Right ret) diff --git a/test/DbDependentSpecs/ApplicationSpec.hs b/test/DbDependentSpecs/ApplicationSpec.hs index 87e00aef..ca514dd5 100644 --- a/test/DbDependentSpecs/ApplicationSpec.hs +++ b/test/DbDependentSpecs/ApplicationSpec.hs @@ -6,39 +6,27 @@ import Codd ( VerifySchemas(..) ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal ( CoddSchemaVersion(..) - , MigrationApplicationFailure , createCoddSchema , detectCoddSchema , withConnection ) -import Codd.Internal.MultiQueryStatement - ( SqlStatementException ) -import Codd.Logging ( LoggingT(runLoggingT) - , Newline(..) - , runCoddLogger - ) +import Codd.Logging ( runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , SqlMigration(..) , hoistAddedSqlMigration ) import Codd.Query ( unsafeQuery1 ) import Codd.Representations.Types ( DbRep(..) ) -import Codd.Types ( RetryBackoffPolicy(..) - , RetryPolicy(..) - , TxnIsolationLvl(..) - ) +import Codd.Types ( TxnIsolationLvl(..) ) import Control.Monad ( forM_ , void ) -import Control.Monad.Reader ( ReaderT(..) ) import Control.Monad.Trans ( lift ) import Control.Monad.Trans.Resource ( MonadThrow ) import qualified Data.Aeson as Aeson -import qualified Data.List as List import qualified Data.Map.Strict as Map import Data.Text ( Text ) import qualified Data.Text as Text -import qualified Data.Text.IO as Text import Data.Time ( CalendarDiffTime(ctTime) , UTCTime , diffUTCTime @@ -60,16 +48,7 @@ import DbUtils ( aroundFreshDatabase import Test.Hspec import Test.QuickCheck import qualified Test.QuickCheck as QC -import UnliftIO ( MonadIO - , hFlush - , liftIO - , stdout - ) -import UnliftIO.Concurrent ( MVar - , modifyMVar_ - , newMVar - , readMVar - ) +import UnliftIO ( liftIO ) placeHoldersMig, selectMig, copyMig :: MonadThrow m => AddedSqlMigration m placeHoldersMig = AddedSqlMigration @@ -664,82 +643,6 @@ spec = do countTxIds `shouldBe` 4 countInterns `shouldBe` 4 totalRows `shouldBe` 10 - context - "Retry Policy retries and backoff work as expected" - $ do - let - testRetries inTxn emptyTestDbInfo = do - -- Kind of ugly.. we test behaviour by analyzing logs and - -- trust that threadDelay is called. - logsmv <- newMVar [] - runMVarLogger - logsmv - (applyMigrationsNoCheck - (emptyTestDbInfo - { retryPolicy = - RetryPolicy - 7 - (ExponentialBackoff - (realToFrac - @Double - 0.001 - ) - ) - , sqlMigrations = - [ if inTxn - then - "test/migrations/retry-policy-test-in-txn/" - else - "test/migrations/retry-policy-test-no-txn/" - ] - } - ) - Nothing - testConnTimeout - (const $ pure ()) - ) - `shouldThrow` (\(e :: MigrationApplicationFailure) -> - "division by zero" - `List.isInfixOf` show - e - - && "SELECT 7/0" - `List.isInfixOf` show - e - ) - logs <- readMVar logsmv - length - (filter - ("before next try" `Text.isInfixOf` - ) - logs - ) - `shouldBe` 7 - -- The last attempt isn't logged because we don't catch exceptions for it - length - (filter - ("division by zero" `Text.isInfixOf` - ) - logs - ) - `shouldBe` 7 - forM_ [1 :: Int, 2, 4, 8, 16, 32, 64] - $ \delay -> - length - (filter - (( "Waiting " - <> Text.pack - (show - delay - ) - <> "ms" - ) `Text.isInfixOf` - ) - logs - ) - `shouldBe` 1 - it "For in-txn migrations" $ testRetries True - it "For no-txn migrations" $ testRetries False describe "Custom connection-string migrations" $ do it @@ -1026,21 +929,3 @@ diversifyAppCheckMigs defaultConnInfo createCoddTestDbMigs = do $ getIncreasingTimestamp 0 ] pure $ DiverseMigrationOrder migsInOrder - - -runMVarLogger :: MonadIO m => MVar [Text] -> LoggingT m a -> m a -runMVarLogger logsmv m = runReaderT - (runLoggingT m) - ( \newline msg -> modifyMVar_ - logsmv - (\l -> do - case newline of - NoNewline -> do - Text.hPutStr stdout msg - hFlush stdout - WithNewline -> Text.hPutStrLn stdout msg - pure $ l ++ [msg] - ) - , const True - , True - ) diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs new file mode 100644 index 00000000..c3565193 --- /dev/null +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -0,0 +1,105 @@ +module DbDependentSpecs.RetrySpec where + +import Codd ( applyMigrationsNoCheck ) +import Codd.Environment ( CoddSettings(..) ) +import Codd.Internal ( MigrationApplicationFailure ) +import Codd.Logging ( LoggingT(runLoggingT) + , Newline(..) + ) +import Codd.Types ( RetryBackoffPolicy(..) + , RetryPolicy(..) + ) +import Control.Monad ( forM_ ) +import Control.Monad.Reader ( ReaderT(..) ) +import qualified Data.List as List +import Data.Text ( Text ) +import qualified Data.Text as Text +import qualified Data.Text.IO as Text +import DbUtils ( aroundFreshDatabase + , testConnTimeout + ) +import Test.Hspec +import UnliftIO ( MonadIO + , hFlush + , stdout + ) +import UnliftIO.Concurrent ( MVar + , modifyMVar_ + , newMVar + , readMVar + ) + +spec :: Spec +spec = do + describe "DbDependentSpecs" $ do + describe "Retry tests" $ aroundFreshDatabase $ do + let + testRetries inTxn emptyTestDbInfo = do + -- Kind of ugly.. we test behaviour by analyzing logs and + -- trust that threadDelay is called. + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + (emptyTestDbInfo + { retryPolicy = RetryPolicy + 7 + (ExponentialBackoff + (realToFrac @Double 0.001) + ) + , sqlMigrations = + [ if inTxn + then + "test/migrations/retry-policy-test-in-txn/" + else + "test/migrations/retry-policy-test-no-txn/" + ] + } + ) + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: MigrationApplicationFailure) -> + "division by zero" + `List.isInfixOf` show e + + && "SELECT 7/0" + `List.isInfixOf` show e + ) + logs <- readMVar logsmv + length (filter ("before next try" `Text.isInfixOf`) logs) + `shouldBe` 7 + -- The last attempt isn't logged because we don't catch exceptions for it + length (filter ("division by zero" `Text.isInfixOf`) logs) + `shouldBe` 8 -- The error appears once before each retry, but also once when the final exception is thrown + forM_ [1 :: Int, 2, 4, 8, 16, 32, 64] $ \delay -> + length + (filter + (( "Waiting " + <> Text.pack (show delay) + <> "ms" + ) `Text.isInfixOf` + ) + logs + ) + `shouldBe` 1 + it "For in-txn migrations" $ testRetries True + it "For no-txn migrations" $ testRetries False + +runMVarLogger :: MonadIO m => MVar [Text] -> LoggingT m a -> m a +runMVarLogger logsmv m = runReaderT + (runLoggingT m) + ( \newline msg -> modifyMVar_ + logsmv + (\l -> do + case newline of + NoNewline -> do + Text.hPutStr stdout msg + hFlush stdout + WithNewline -> Text.hPutStrLn stdout msg + pure $ l ++ [msg] + ) + , const True + , True + ) diff --git a/test/DbDependentSpecs/SchemaVerificationSpec.hs b/test/DbDependentSpecs/SchemaVerificationSpec.hs index 57e4d2c8..1944123a 100644 --- a/test/DbDependentSpecs/SchemaVerificationSpec.hs +++ b/test/DbDependentSpecs/SchemaVerificationSpec.hs @@ -8,6 +8,7 @@ import Codd.Internal.MultiQueryStatement import Codd.Logging ( runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , EnvVars + , ParsedSql(..) , PureStream(..) , SqlMigration(..) , hoistAddedSqlMigration @@ -25,7 +26,6 @@ import Codd.Representations.Database ( queryServerMajorVersion import Codd.Representations.Types ( ObjName(..) ) import Codd.Types ( SchemaAlgo(..) , SchemaSelection(..) - , singleTryPolicy ) import Control.Monad ( foldM , forM @@ -1548,7 +1548,8 @@ spec = do case mUndoSql of Nothing -> pure expectedHashesAfterUndo Just undoSql -> do - runCoddLogger + void + $ runCoddLogger $ withConnection connInfo testConnTimeout @@ -1556,7 +1557,12 @@ spec = do Streaming.effects $ multiQueryStatement_ conn - $ mkValidSql undoSql + $ (\(WellParsedSql sqlStream) -> + sqlStream + ) + (mkValidSql + undoSql + ) hashesAfterUndo <- getHashes emptyDbInfo let From ea2a7903f6b9a90dba733ad0e03a0ed185b426ca Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Thu, 21 Mar 2024 16:40:24 -0300 Subject: [PATCH 09/28] Make registering applied migrations an MVar to avoid state-updating plumbing --- src/Codd/Internal.hs | 280 +++++++++++++++++++++++-------------------- 1 file changed, 150 insertions(+), 130 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index c2d4e5bf..e9958a1e 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -111,7 +111,7 @@ import UnliftIO.IO ( IOMode(ReadMode) ) import UnliftIO.MVar ( modifyMVar , newMVar - , readMVar + , readMVar, modifyMVar_ ) import UnliftIO.Resource ( MonadResource , ReleaseKey @@ -120,6 +120,7 @@ import UnliftIO.Resource ( MonadResource , release , runResourceT ) +import Data.Bifunctor (second) dbIdentifier :: Text -> DB.Query dbIdentifier s = "\"" <> fromString (Text.unpack s) <> "\"" @@ -208,12 +209,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -310,6 +311,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- Note: We could probably compound this Monad with StateT instead of using an MVar, but IIRC that creates issues -- with MonadUnliftIO. connsPerInfo <- newMVar (mempty :: [(DB.ConnectInfo, DB.Connection)]) + appliedMigs <- newMVar (mempty :: [(AppliedMigration, MigrationRegistered)]) + coddSchemaExists <- newMVar $ coddSchemaVersion bootstrapCheck /= CoddSchemaDoesNotExist let openConn :: DB.ConnectInfo -> m (ReleaseKey, DB.Connection) openConn cinfo = flip allocate DB.close $ do mConn <- lookup cinfo <$> readMVar connsPerInfo @@ -324,51 +327,96 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn queryConn :: DB.ConnectInfo -> m (Maybe DB.Connection) queryConn cinfo = lookup cinfo <$> readMVar connsPerInfo - (appliedMigs :: [(AppliedMigration, MigrationRegistered)], finalBootCheck :: BootstrapCheck, singleInTxnBlockResult :: Maybe - a) <- + createCoddSchemaAndFlushPendingMigrations :: m () + createCoddSchemaAndFlushPendingMigrations = do + mDefaultConn <- queryConn defaultConnInfo + csExists <- readMVar coddSchemaExists + case mDefaultConn of + Just defaultConn -> do + unless csExists $ do + logInfo + "Creating or updating codd_schema..." + createCoddSchema @txn + maxBound + txnIsolationLvl + defaultConn + modifyMVar_ coddSchemaExists $ const $ pure True + modifyMVar_ appliedMigs $ \apmigs -> do + withTransaction @txn txnIsolationLvl defaultConn $ + forM_ [ am | (am, MigrationNotRegistered) <- apmigs ] + $ \AppliedMigration {..} -> registerRanMigration @txn + defaultConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + (Just appliedMigrationAt) + appliedMigrationDuration + appliedMigrationNumStatements + pure $ map (second (const MigrationRegistered)) apmigs + Nothing -> pure () + + -- TODO: Have argument to identify if this is a partially applied no-txn migration + registerAppliedMigIfPossible :: forall t. (CanStartTxn m (t m), MonadIO (t m), MonadUnliftIO (t m), MonadTrans t) => DB.Connection -> FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int -> t m UTCTime + registerAppliedMigIfPossible anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements = do + mDefaultConn <- lift $ queryConn defaultConnInfo + (registered, appliedAt) <- + case mDefaultConn of + Nothing -> fmap (MigrationNotRegistered,) $ + case appliedMigrationAt of + Nothing -> + DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT NOW()" () + Just timeApplied -> pure timeApplied + Just defaultConn -> do + timeApplied <- registerRanMigration @(t m) defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements + pure (MigrationRegistered, timeApplied) + + modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName + , appliedMigrationTimestamp + , appliedMigrationAt = appliedAt + , appliedMigrationDuration + , appliedMigrationNumStatements + }, registered)] + + pure appliedAt + + registerAppliedMigIfPossibleNoTxn :: DB.Connection -> FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int -> m UTCTime + registerAppliedMigIfPossibleNoTxn anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements = do + mDefaultConn <- queryConn defaultConnInfo + (registered, appliedAt) <- + case mDefaultConn of + Nothing -> fmap (MigrationNotRegistered,) $ + case appliedMigrationAt of + Nothing -> + DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT NOW()" () + Just timeApplied -> pure timeApplied + Just defaultConn -> do + timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements + pure (MigrationRegistered, timeApplied) + + modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName + , appliedMigrationTimestamp + , appliedMigrationAt = appliedAt + , appliedMigrationDuration + , appliedMigrationNumStatements + }, registered)] + + pure appliedAt + + + singleInTxnBlockResult :: Maybe a <- foldM - (\(previouslyAppliedMigs, bootCheck, _) block -> do + (\_ block -> do let cinfo = fromMaybe defaultConnInfo (blockCustomConnInfo block) (_, conn) <- openConn cinfo - mDefaultConn <- queryConn defaultConnInfo - -- The default connection string is the one which should have permissions - -- to create codd_schema. - (runAfterMig, newBootCheck) <- case mDefaultConn of - Nothing -> pure - ( \_ _ appliedAt _ _ -> DB.fromOnly <$> unsafeQuery1 - conn - "SELECT COALESCE(?, now())" - (DB.Only appliedAt) - , bootCheck - ) - Just defaultConn -> do - newBootCheck <- - if coddSchemaVersion bootCheck /= maxBound - then do - logInfo - "Creating or updating codd_schema..." - createCoddSchema @txn - maxBound - txnIsolationLvl - defaultConn - pure bootCheck - { defaultConnAccessible = True - , coddSchemaVersion = maxBound - } - else pure bootCheck - registerPendingMigrations @txn - defaultConn - previouslyAppliedMigs - pure - ( registerRanMigration @txn defaultConn - txnIsolationLvl - , newBootCheck - ) - - ApplyMigsResult justAppliedMigs newSingleBlockResult <- + -- Create codd_schema and flush previously applied migrations if possible. We do this here + -- since we expect _some_ of the migration blocks to use the default connection string, and after + -- that we can register migrations were applied. + createCoddSchemaAndFlushPendingMigrations + + ApplyMigsResult _justAppliedMigs newSingleBlockResult <- case ( block , isOneShotApplication defaultConnInfo pendingMigs @@ -378,33 +426,20 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn (fmap Just . actionAfter hoistedBlocks) conn inTxnBlock - runAfterMig + (registerAppliedMigIfPossible conn) (BlockInTxn inTxnBlock, False) -> runInTxnBlock (const $ pure Nothing) conn inTxnBlock - runAfterMig + (registerAppliedMigIfPossible conn) (BlockNoTxn noTxnBlock, _) -> runNoTxnMig - (const $ pure Nothing) conn noTxnBlock - runAfterMig - - -- Keep in mind that migrations are applied but might not be registered if - -- we still haven't run any default-connection-string migrations. - pure - ( map - ( - , if coddSchemaVersion newBootCheck >= CoddSchemaV1 - then MigrationRegistered - else MigrationNotRegistered - ) - (map fst previouslyAppliedMigs ++ justAppliedMigs) - , newBootCheck - , newSingleBlockResult - ) + (registerAppliedMigIfPossibleNoTxn conn) + + pure newSingleBlockResult ) - ([], bootstrapCheck, Nothing) + Nothing pendingMigs actAfterResult <- case singleInTxnBlockResult of @@ -414,11 +449,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- In that case, we assume the default-connection-string will be valid after those migrations -- and use that to register all applied migrations and then run "actionAfter". (_, defaultConn) <- openConn defaultConnInfo - when (coddSchemaVersion finalBootCheck /= maxBound) $ do - logInfo "Creating or updating codd_schema..." - createCoddSchema @txn maxBound txnIsolationLvl defaultConn - withTransaction @txn txnIsolationLvl defaultConn - $ registerPendingMigrations @txn defaultConn appliedMigs + createCoddSchemaAndFlushPendingMigrations withTransaction txnIsolationLvl defaultConn $ actionAfter hoistedBlocks defaultConn @@ -429,23 +460,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn return actAfterResult where - registerPendingMigrations - :: forall x n - . (MonadUnliftIO n, MonadIO x, CanStartTxn n x) - => DB.Connection - -> [(AppliedMigration, MigrationRegistered)] - -> n () - registerPendingMigrations defaultConn appliedMigs = - forM_ [ am | (am, MigrationNotRegistered) <- appliedMigs ] - $ \AppliedMigration {..} -> registerRanMigration @x - defaultConn - txnIsolationLvl - appliedMigrationName - appliedMigrationTimestamp - (Just appliedMigrationAt) - appliedMigrationDuration - appliedMigrationNumStatements - runInTxnBlock :: (DB.Connection -> txn b) -> DB.Connection @@ -455,7 +469,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> Maybe UTCTime -> DiffTime -> Int - -> txn UTCTime + -> txn UTCTime -- ^ This `txn` is a hoax; this should run in `m` to be correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` will be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. This is a flaw of our transaction tracking type-level interface. ) -> m (ApplyMigsResult b) runInTxnBlock act conn migBlock registerMig = do @@ -466,10 +480,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn (\previousBlock RetryIteration { tryNumber } -> if tryNumber == 0 then pure previousBlock - else do - -- logDebug - -- "Re-reading migrations of this block from disk" - reReadBlock previousBlock + else reReadBlock previousBlock ) migBlock (\case @@ -498,25 +509,23 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn $ forM hoistedMigs $ applySingleMigration conn registerMig - txnIsolationLvl 0 ApplyMigsResult ranMigs <$> act conn logInfo "COMMITed transaction" pure res runNoTxnMig - :: (DB.Connection -> txn b) - -> DB.Connection + :: DB.Connection -> SingleNoTxnMigration m -> ( FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int - -> txn UTCTime + -> m UTCTime -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. ) - -> m (ApplyMigsResult b) - runNoTxnMig act conn mig registerMig = do + -> m (ApplyMigsResult (Maybe x)) + runNoTxnMig conn mig registerMig = do retryFold @MigrationApplicationFailure retryPolicy (\(previousMig, _) RetryIteration { lastException } -> @@ -526,11 +535,18 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> case noTxnMigRetryInstructions of Nothing -> error - "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions for that" - Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip) + "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions" + Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip timeSpentApplying) -> do - -- logDebug - -- "Re-reading failed no-txn migration from disk" + let addedMig = + singleNoTxnMig previousMig + migData = addedSqlMig addedMig + void $ registerMig + (migrationName migData) + (addedSqlTimestamp addedMig) + Nothing + timeSpentApplying + numStmtsToSkip logWarn $ "Skipping the first " <> Fmt.sformat Fmt.int @@ -554,23 +570,12 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn Right ret -> pure ret ) $ \(migFinal, numStmtsToSkip) -> do - ApplyMigsResult - . (: []) - <$> applySingleMigration @m @txn - conn - (\fp ts appliedAt duration numAppliedStmts -> - withTransaction txnIsolationLvl conn - $ registerMig - fp - ts - appliedAt - duration - numAppliedStmts - ) - txnIsolationLvl - numStmtsToSkip - (singleNoTxnMig migFinal) - <*> withTransaction txnIsolationLvl conn (act conn) + appliedMig <- applySingleMigration + conn + registerMig + numStmtsToSkip + (singleNoTxnMig migFinal) + pure $ ApplyMigsResult [appliedMig] Nothing data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -1002,7 +1007,11 @@ blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _)) = migrationCustomConnInfo addedSqlMig -newtype NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartAfterSkipping Int +data NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartAfterSkipping + Int + -- ^ Number of statements successfully applied, which is also the number of statements to skip in the next attempt + DiffTime + -- ^ Time spent applying statements in this failed attempt deriving stock Show data MigrationApplicationFailure = MigrationApplicationFailure { sqlStatementEx :: SqlStatementException @@ -1011,25 +1020,24 @@ data MigrationApplicationFailure = MigrationApplicationFailure deriving stock Show instance Exception MigrationApplicationFailure --- | Applies a single migration and returns the time when it finished being applied. Does not --- itself register that the migration ran, only runs "afterMigRun" after applying the migration. +-- | Applies a single migration and returns the time when it finished being applied. Calls the supplied +-- `registerMigRan` after applying the migration. applySingleMigration - :: forall m txn - . (MonadUnliftIO m, CoddLogger m, CanStartTxn m txn) + :: forall m + . (MonadUnliftIO m, CoddLogger m) => DB.Connection -> ( FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int - -> txn UTCTime + -> m UTCTime ) - -> TxnIsolationLvl -> Int -- ^ Number of countable-runnable statements to skip completely. Useful when retrying no-txn migrations from exactly the statements they last failed in. -> AddedSqlMigration m -> m AppliedMigration -applySingleMigration conn afterMigRun isolLvl numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) +applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) = do let fn = migrationName sqlMig logInfoNoNewline $ "Applying " <> Text.pack fn @@ -1128,8 +1136,14 @@ applySingleMigration conn afterMigRun isolLvl numCountableRunnableStmtsToSkip (A Fmt.ords (appliedMigrationNumStatements + 1) <> " failed to be applied. Codd will resume the next retry or codd up from it" + void $ afterMigRun + fn + migTimestamp + Nothing + appliedMigrationDuration + appliedMigrationNumStatements pure $ Just $ NoTxnMigMustRestartAfterSkipping - appliedMigrationNumStatements + appliedMigrationNumStatements appliedMigrationDuration Just lastBeginNum -> do logError $ "After applying " @@ -1146,17 +1160,23 @@ applySingleMigration conn afterMigRun isolLvl numCountableRunnableStmtsToSkip (A <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" void $ liftIO $ DB.execute_ conn "ROLLBACK" + void $ afterMigRun + fn + migTimestamp + Nothing + appliedMigrationDuration + appliedMigrationNumStatements logInfo "ROLLBACKed last explicitly started transaction" pure $ Just $ NoTxnMigMustRestartAfterSkipping - (lastBeginNum - 1) + (lastBeginNum - 1) appliedMigrationDuration throwIO $ MigrationApplicationFailure { sqlStatementEx , noTxnMigRetryInstructions } Nothing -> pure () - timestamp <- withTransaction isolLvl conn $ afterMigRun + timestamp <- afterMigRun fn migTimestamp Nothing From 946f9dcca9ba7311352f6167cf14f22a1fd2fa32 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Thu, 21 Mar 2024 17:19:58 -0300 Subject: [PATCH 10/28] Register failing no-txn migrations properly --- src/Codd/Internal.hs | 189 ++++++++++++++++++------------------------- 1 file changed, 77 insertions(+), 112 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index e9958a1e..c2f9fca0 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -349,25 +349,22 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp - (Just appliedMigrationAt) + (SpecificTime appliedMigrationAt) appliedMigrationDuration appliedMigrationNumStatements + MigrationAppliedSuccessfully -- TODO: What if a failing no-txn migration ran just enough to make the default connection string accessible? Then we want to register it properly here, not assume it succeeded. pure $ map (second (const MigrationRegistered)) apmigs Nothing -> pure () - -- TODO: Have argument to identify if this is a partially applied no-txn migration - registerAppliedMigIfPossible :: forall t. (CanStartTxn m (t m), MonadIO (t m), MonadUnliftIO (t m), MonadTrans t) => DB.Connection -> FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int -> t m UTCTime - registerAppliedMigIfPossible anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements = do + registerAppliedMigIfPossible :: forall t. (CanStartTxn m (t m), MonadIO (t m), MonadUnliftIO (t m), MonadTrans t) => DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> t m () + registerAppliedMigIfPossible anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do mDefaultConn <- lift $ queryConn defaultConnInfo (registered, appliedAt) <- case mDefaultConn of Nothing -> fmap (MigrationNotRegistered,) $ - case appliedMigrationAt of - Nothing -> - DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT NOW()" () - Just timeApplied -> pure timeApplied + DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT clock_timestamp()" () Just defaultConn -> do - timeApplied <- registerRanMigration @(t m) defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements + timeApplied <- registerRanMigration @(t m) defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus pure (MigrationRegistered, timeApplied) modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName @@ -377,20 +374,16 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn , appliedMigrationNumStatements }, registered)] - pure appliedAt - - registerAppliedMigIfPossibleNoTxn :: DB.Connection -> FilePath -> DB.UTCTimestamp -> Maybe UTCTime -> DiffTime -> Int -> m UTCTime - registerAppliedMigIfPossibleNoTxn anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements = do + -- TODO: This function is a copy of the one above, but runs in a different monad. It'd be nicer to avoid the duplication. + registerAppliedMigIfPossibleNoTxn :: DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m () + registerAppliedMigIfPossibleNoTxn anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do mDefaultConn <- queryConn defaultConnInfo (registered, appliedAt) <- case mDefaultConn of Nothing -> fmap (MigrationNotRegistered,) $ - case appliedMigrationAt of - Nothing -> - DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT NOW()" () - Just timeApplied -> pure timeApplied + DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT clock_timestamp()" () Just defaultConn -> do - timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp appliedMigrationAt appliedMigrationDuration appliedMigrationNumStatements + timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus pure (MigrationRegistered, timeApplied) modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName @@ -399,8 +392,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn , appliedMigrationDuration , appliedMigrationNumStatements }, registered)] - - pure appliedAt singleInTxnBlockResult :: Maybe a <- @@ -416,28 +407,26 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- that we can register migrations were applied. createCoddSchemaAndFlushPendingMigrations - ApplyMigsResult _justAppliedMigs newSingleBlockResult <- - case - ( block - , isOneShotApplication defaultConnInfo pendingMigs - ) + case + ( block + , isOneShotApplication defaultConnInfo pendingMigs + ) of - (BlockInTxn inTxnBlock, True) -> runInTxnBlock - (fmap Just . actionAfter hoistedBlocks) - conn - inTxnBlock - (registerAppliedMigIfPossible conn) - (BlockInTxn inTxnBlock, False) -> runInTxnBlock - (const $ pure Nothing) - conn - inTxnBlock - (registerAppliedMigIfPossible conn) - (BlockNoTxn noTxnBlock, _) -> runNoTxnMig - conn - noTxnBlock - (registerAppliedMigIfPossibleNoTxn conn) + (BlockInTxn inTxnBlock, True) -> runInTxnBlock + (fmap Just . actionAfter hoistedBlocks) + conn + inTxnBlock + (registerAppliedMigIfPossible conn) + (BlockInTxn inTxnBlock, False) -> runInTxnBlock + (const $ pure Nothing) + conn + inTxnBlock + (registerAppliedMigIfPossible conn) + (BlockNoTxn noTxnBlock, _) -> runNoTxnMig + conn + noTxnBlock + (registerAppliedMigIfPossibleNoTxn conn) - pure newSingleBlockResult ) Nothing pendingMigs @@ -464,14 +453,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn :: (DB.Connection -> txn b) -> DB.Connection -> ConsecutiveInTxnMigrations m - -> ( FilePath - -> DB.UTCTimestamp - -> Maybe UTCTime - -> DiffTime - -> Int - -> txn UTCTime -- ^ This `txn` is a hoax; this should run in `m` to be correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` will be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. This is a flaw of our transaction tracking type-level interface. - ) - -> m (ApplyMigsResult b) + -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn ()) -- ^ This `txn` is a hoax; this should run in `m` to be correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` will be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. This is a flaw of our transaction tracking type-level interface. + -> m b runInTxnBlock act conn migBlock registerMig = do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations @@ -504,27 +487,19 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn hoistedMigs = hoistAddedSqlMigration lift <$> inTxnMigs blockFinal - ranMigs <- - fmap NE.toList - $ forM hoistedMigs + forM_ hoistedMigs $ applySingleMigration conn registerMig 0 - ApplyMigsResult ranMigs <$> act conn + act conn logInfo "COMMITed transaction" pure res runNoTxnMig :: DB.Connection -> SingleNoTxnMigration m - -> ( FilePath - -> DB.UTCTimestamp - -> Maybe UTCTime - -> DiffTime - -> Int - -> m UTCTime -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. - ) - -> m (ApplyMigsResult (Maybe x)) + -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. + -> m (Maybe x) runNoTxnMig conn mig registerMig = do retryFold @MigrationApplicationFailure retryPolicy @@ -536,17 +511,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn Nothing -> error "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions" - Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip timeSpentApplying) + Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip _timeSpentApplying) -> do - let addedMig = - singleNoTxnMig previousMig - migData = addedSqlMig addedMig - void $ registerMig - (migrationName migData) - (addedSqlTimestamp addedMig) - Nothing - timeSpentApplying - numStmtsToSkip logWarn $ "Skipping the first " <> Fmt.sformat Fmt.int @@ -570,12 +536,12 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn Right ret -> pure ret ) $ \(migFinal, numStmtsToSkip) -> do - appliedMig <- applySingleMigration + applySingleMigration conn registerMig numStmtsToSkip (singleNoTxnMig migFinal) - pure $ ApplyMigsResult [appliedMig] Nothing + pure Nothing data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -917,12 +883,6 @@ parseMigrationFiles migsCompleted sqlMigrations = do } pure (Right fileStreamAgain, asqlmigAgain) -data ApplyMigsResult a = ApplyMigsResult - { migsAppliedAt :: [AppliedMigration] - , actionAfterResult :: a - } - - -- | This can be used as a last-action when applying migrations to -- strict-check schemas, logging differences, success and throwing -- an exception if they mismatch. @@ -1026,18 +986,12 @@ applySingleMigration :: forall m . (MonadUnliftIO m, CoddLogger m) => DB.Connection - -> ( FilePath - -> DB.UTCTimestamp - -> Maybe UTCTime - -> DiffTime - -> Int - -> m UTCTime - ) + -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) -> Int -- ^ Number of countable-runnable statements to skip completely. Useful when retrying no-txn migrations from exactly the statements they last failed in. -> AddedSqlMigration m - -> m AppliedMigration -applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) + -> m () +applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) = do let fn = migrationName sqlMig logInfoNoNewline $ "Applying " <> Text.pack fn @@ -1136,12 +1090,12 @@ applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlM Fmt.ords (appliedMigrationNumStatements + 1) <> " failed to be applied. Codd will resume the next retry or codd up from it" - void $ afterMigRun + registerMigRan fn migTimestamp - Nothing appliedMigrationDuration appliedMigrationNumStatements + NoTxnMigrationFailed pure $ Just $ NoTxnMigMustRestartAfterSkipping appliedMigrationNumStatements appliedMigrationDuration Just lastBeginNum -> do @@ -1160,12 +1114,12 @@ applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlM <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" void $ liftIO $ DB.execute_ conn "ROLLBACK" - void $ afterMigRun + registerMigRan fn migTimestamp - Nothing appliedMigrationDuration appliedMigrationNumStatements + NoTxnMigrationFailed logInfo "ROLLBACKed last explicitly started transaction" pure $ Just $ NoTxnMigMustRestartAfterSkipping @@ -1176,25 +1130,17 @@ applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlM } Nothing -> pure () - timestamp <- afterMigRun - fn + registerMigRan fn migTimestamp - Nothing appliedMigrationDuration appliedMigrationNumStatements + MigrationAppliedSuccessfully logInfo $ " (" <> prettyPrintDuration appliedMigrationDuration <> ", " <> Fmt.sformat Fmt.int appliedMigrationNumStatements <> ")" - - pure AppliedMigration { appliedMigrationName = migrationName sqlMig - , appliedMigrationTimestamp = migTimestamp - , appliedMigrationAt = timestamp - , appliedMigrationDuration - , appliedMigrationNumStatements - } where pico_1ns = 1_000 pico_1ms = 1_000_000_000 @@ -1229,9 +1175,18 @@ applySingleMigration conn afterMigRun numCountableRunnableStmtsToSkip (AddedSqlM ) data MigrationRegistered = MigrationRegistered | MigrationNotRegistered +data MigrationApplicationStatus = NoTxnMigrationFailed | MigrationAppliedSuccessfully +data MigrationLastStatementAppliedAt = NowInPostgresTime | SpecificTime UTCTime +type RegisterMigrationFunc m = FilePath + -> DB.UTCTimestamp + -> MigrationLastStatementAppliedAt + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> m () -- | Registers in the DB that a migration with supplied name and timestamp --- has been applied and returns the value used for the "applied_at" column. +-- has been either successfully applied or partially failed (the latter only makes sense for no-txn migrations). -- Fails if the codd_schema hasn't yet been created. registerRanMigration :: forall txn m @@ -1241,23 +1196,33 @@ registerRanMigration -> TxnIsolationLvl -> FilePath -> DB.UTCTimestamp - -> Maybe UTCTime -- ^ The time the migration finished being applied. If not supplied, pg's now() will be used + -> MigrationLastStatementAppliedAt -- ^ The time the last statement of the migration was applied or when it failed. -> DiffTime -> Int -- ^ The number of applied statements + -> MigrationApplicationStatus -> m UTCTime -registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration numAppliedStatements - = withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 - conn - "INSERT INTO codd_schema.sql_migrations (migration_timestamp, name, applied_at, application_duration, num_applied_statements) \ - \ VALUES (?, ?, COALESCE(?, now()), ?, ?) \ - \ RETURNING applied_at" +registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration numAppliedStatements apStatus + = let + (args, timestampValue) = case (appliedAt, apStatus) of + (NowInPostgresTime, NoTxnMigrationFailed) -> ("?, clock_timestamp()", Nothing) + (NowInPostgresTime, MigrationAppliedSuccessfully) -> ("clock_timestamp(), ?", Nothing) + (SpecificTime t, NoTxnMigrationFailed) -> ("NULL, ?", Just t) + (SpecificTime t, MigrationAppliedSuccessfully) -> ("?, NULL", Just t) + + in + withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 + conn ("INSERT INTO codd_schema.sql_migrations as m (migration_timestamp, name, application_duration, num_applied_statements, applied_at, no_txn_failed_at) \ + \ SELECT ?, ?, ?, ?, " <> args <> " \ + \ ON CONFLICT (name) DO UPDATE \ + \ SET application_duration=EXCLUDED.application_duration + m.application_duration \ + \ , num_applied_statements=EXCLUDED.num_applied_statements \ + \ RETURNING COALESCE(applied_at, no_txn_failed_at)") ( migTimestamp , fn - , appliedAt - , -- postgresql-simple does not have a `ToField DiffTime` instance :( - realToFrac @Double @NominalDiffTime + , realToFrac @Double @NominalDiffTime $ fromIntegral (diffTimeToPicoseconds appliedMigrationDuration) / 1_000_000_000_000 , numAppliedStatements + , timestampValue ) From d08fc02fbdc4cc4871ae29cca66f2a4a4056a10f Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 23 Mar 2024 11:36:42 -0300 Subject: [PATCH 11/28] Add runtime txnStatus check and fix places where it fails This will catch invalid assumptions in the codebase --- src/Codd/Internal.hs | 104 ++++++++++++++++------- src/Codd/Internal/MultiQueryStatement.hs | 10 +-- src/Codd/Query.hs | 29 ++++++- 3 files changed, 103 insertions(+), 40 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index c2f9fca0..a06748d3 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -10,7 +10,6 @@ import Codd.Internal.MultiQueryStatement , multiQueryStatement_ , singleStatement_ , skipNonCountableRunnableStatements - , txnStatus ) import Codd.Internal.Retry ( RetryIteration(..) , retryFold @@ -38,6 +37,7 @@ import Codd.Query ( CanStartTxn , NotInTxn , execvoid_ , query + , txnStatus , unsafeQuery1 , withTransaction ) @@ -319,7 +319,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn case mConn of Just conn -> pure conn Nothing -> modifyMVar connsPerInfo $ \m -> do - -- Need to unliftIO to log in here? -- logInfo $ "Connecting to (TODO: REDACT PASSWORD) " <> Text.pack (show cinfo) conn <- connectWithTimeout cinfo connectTimeout pure ((cinfo, conn) : m, conn) @@ -356,32 +355,30 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn pure $ map (second (const MigrationRegistered)) apmigs Nothing -> pure () - registerAppliedMigIfPossible :: forall t. (CanStartTxn m (t m), MonadIO (t m), MonadUnliftIO (t m), MonadTrans t) => DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> t m () - registerAppliedMigIfPossible anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do - mDefaultConn <- lift $ queryConn defaultConnInfo - (registered, appliedAt) <- - case mDefaultConn of - Nothing -> fmap (MigrationNotRegistered,) $ - DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT clock_timestamp()" () - Just defaultConn -> do - timeApplied <- registerRanMigration @(t m) defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus - pure (MigrationRegistered, timeApplied) + -- | The function used to register applied migrations for when a block of in-txn migrations using the default connection string are to be applied. + -- This will use same transaction as the one used to apply the migrations to insert into codd_schema.sql_migrations. + registerAppliedMigDefaultConnInTxnBlock :: FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn () + registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do + (_, defaultConn) <- lift $ openConn defaultConnInfo + timeApplied <- registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName , appliedMigrationTimestamp - , appliedMigrationAt = appliedAt + , appliedMigrationAt = timeApplied , appliedMigrationDuration , appliedMigrationNumStatements - }, registered)] + }, MigrationRegistered)] - -- TODO: This function is a copy of the one above, but runs in a different monad. It'd be nicer to avoid the duplication. - registerAppliedMigIfPossibleNoTxn :: DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m () - registerAppliedMigIfPossibleNoTxn anyConnection appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do + -- | The function used to register applied migrations for when either a no-txn migration or a block of in-txn migrations _not_ using the default connection string are to be applied. + -- This will account for the possibility that the default connection string still isn't accessible by storing in-memory that some migrations were applied but not registered, + -- and also will BEGIN..COMMIT-wrap the insertion using the default connection if it's available. + registerAppliedMigIfPossibleOthers :: DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m () + registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do mDefaultConn <- queryConn defaultConnInfo (registered, appliedAt) <- case mDefaultConn of Nothing -> fmap (MigrationNotRegistered,) $ - DB.fromOnly <$> unsafeQuery1 anyConnection "SELECT clock_timestamp()" () + DB.fromOnly <$> unsafeQuery1 blockConn "SELECT clock_timestamp()" () Just defaultConn -> do timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus pure (MigrationRegistered, timeApplied) @@ -402,6 +399,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn (blockCustomConnInfo block) (_, conn) <- openConn cinfo + -- Create codd_schema and flush previously applied migrations if possible. We do this here -- since we expect _some_ of the migration blocks to use the default connection string, and after -- that we can register migrations were applied. @@ -412,20 +410,20 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn , isOneShotApplication defaultConnInfo pendingMigs ) of - (BlockInTxn inTxnBlock, True) -> runInTxnBlock + (BlockInTxn inTxnBlock, True) -> runInTxnBlockDefaultConn (fmap Just . actionAfter hoistedBlocks) conn inTxnBlock - (registerAppliedMigIfPossible conn) - (BlockInTxn inTxnBlock, False) -> runInTxnBlock + registerAppliedMigDefaultConnInTxnBlock + (BlockInTxn inTxnBlock, False) -> if cinfo == defaultConnInfo then runInTxnBlockDefaultConn (const $ pure Nothing) conn inTxnBlock registerAppliedMigDefaultConnInTxnBlock else runInTxnBlockNotDefaultConn (const $ pure Nothing) conn inTxnBlock - (registerAppliedMigIfPossible conn) + (registerAppliedMigIfPossibleOthers conn) (BlockNoTxn noTxnBlock, _) -> runNoTxnMig conn noTxnBlock - (registerAppliedMigIfPossibleNoTxn conn) + (registerAppliedMigIfPossibleOthers conn) ) Nothing @@ -449,13 +447,59 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn return actAfterResult where - runInTxnBlock + runInTxnBlockNotDefaultConn + :: (DB.Connection -> txn b) + -> DB.Connection + -> ConsecutiveInTxnMigrations m + -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. + -> m b + runInTxnBlockNotDefaultConn act conn migBlock registerMig = do + res <- + -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations + retryFold @MigrationApplicationFailure + retryPolicy + (\previousBlock RetryIteration { tryNumber } -> + if tryNumber == 0 + then pure previousBlock + else reReadBlock previousBlock + ) + migBlock + (\case + Left lastEx -> do + logError + "Failed after all configured retries. Giving up." + throwIO lastEx + Right ret -> pure ret + ) + $ \blockFinal -> do + logInfo "BEGINning transaction" + flip + onException + (logInfo + "ROLLBACKed transaction" + ) + $ withTransaction @txn txnIsolationLvl conn + $ do + let hoistedMigs + :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = + hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + forM_ hoistedMigs + $ applySingleMigration conn + (\fp ts duration numStmts apStatus -> lift $ registerMig fp ts duration numStmts apStatus) + 0 + act conn + logInfo "COMMITed transaction" + pure res + + runInTxnBlockDefaultConn :: (DB.Connection -> txn b) -> DB.Connection -> ConsecutiveInTxnMigrations m - -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn ()) -- ^ This `txn` is a hoax; this should run in `m` to be correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` will be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. This is a flaw of our transaction tracking type-level interface. + -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn ()) -- ^ Using the `txn` is right: registering applied migrations happens in the default connection, and so it will happen in the same transaction as the migrations themselves. -> m b - runInTxnBlock act conn migBlock registerMig = do + runInTxnBlockDefaultConn act conn migBlock registerMig = do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations retryFold @MigrationApplicationFailure @@ -1035,19 +1079,19 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS -> Nothing states@(PQ.TransActive, _) -> error - $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream: " + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this as a bug: " ++ show states states@(_, PQ.TransActive) -> error - $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this upstream" + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this as a bug" ++ show states states@(PQ.TransInError, _) -> error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug" ++ show states states@(_, PQ.TransInError) -> error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this upstream" + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug" ++ show states states@(PQ.TransUnknown, _) -> error diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index a4e9792f..c1280f48 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -5,11 +5,11 @@ module Codd.Internal.MultiQueryStatement , runSingleStatementInternal_ , singleStatement_ , skipNonCountableRunnableStatements - , txnStatus ) where import Codd.Logging ( CoddLogger ) import Codd.Parsing ( SqlPiece(..) ) +import Codd.Query ( txnStatus ) import Control.Monad ( void ) import Data.Text ( Text ) import Data.Text.Encoding ( encodeUtf8 ) @@ -29,7 +29,6 @@ import qualified Streaming.Internal as S import qualified Streaming.Prelude as Streaming import qualified Streaming.Prelude as S import UnliftIO ( Exception - , MonadIO , MonadUnliftIO , handle , liftIO @@ -49,11 +48,10 @@ data StatementApplied = NotACountableStatement -- ^ Contains the transaction state _after_ the statement was applied. | StatementErred SqlStatementException -txnStatus :: MonadIO m => DB.Connection -> m PQ.TransactionStatus -txnStatus conn = liftIO $ PGInternal.withConnection conn PQ.transactionStatus -- | Runs SQL that could be either a row-returning or count-returning statement. Unlike postgresql-simple, this does not throw exceptions in case of SQL errors, but rather returns them. -singleStatement_ :: MonadIO m => DB.Connection -> Text -> m StatementApplied +singleStatement_ + :: MonadUnliftIO m => DB.Connection -> Text -> m StatementApplied singleStatement_ conn sql = do res <- liftIO $ PGInternal.exec conn $ encodeUtf8 sql status <- liftIO $ PQ.resultStatus res @@ -127,7 +125,7 @@ skipNonCountableRunnableStatements numCountableRunnableToSkip = runSingleStatementInternal_ - :: MonadIO m => DB.Connection -> SqlPiece -> m StatementApplied + :: MonadUnliftIO m => DB.Connection -> SqlPiece -> m StatementApplied runSingleStatementInternal_ conn p = case p of CommentPiece _ -> applied WhiteSpacePiece _ -> applied diff --git a/src/Codd/Query.hs b/src/Codd/Query.hs index 83e92794..a5cab029 100644 --- a/src/Codd/Query.hs +++ b/src/Codd/Query.hs @@ -5,6 +5,7 @@ module Codd.Query , NotInTxn , execvoid_ , query + , txnStatus , unsafeQuery1 , withTransaction ) where @@ -13,13 +14,18 @@ import Codd.Logging ( CoddLogger , LoggingT ) import Codd.Types ( TxnIsolationLvl(..) ) -import Control.Monad ( void ) +import Control.Monad ( void + , when + ) import Control.Monad.Reader ( ReaderT ) import Control.Monad.State ( StateT ) import Control.Monad.Trans ( MonadTrans(..) ) import Control.Monad.Trans.Writer ( WriterT ) import Data.Kind ( Type ) +import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB +import qualified Database.PostgreSQL.Simple.Internal + as PGInternal import UnliftIO ( MonadIO(..) , MonadUnliftIO , onException @@ -84,7 +90,7 @@ newtype InTxnT m a = InTxnT { unTxnT :: m a } instance MonadTrans InTxnT where lift = InTxnT --- 1. First we start with our basic assumptions: `IO` has no open transactions, and `SomeMonadTransformerT IO` also don't. +-- 1. First we start with our basic assumptions: `IO` has no open transactions, and `SomeMonadTransformerT IO` also doesn't. -- We do this for some common monad transformers to increase compatibility. instance NotInTxn IO instance NotInTxn m => NotInTxn (LoggingT m) @@ -105,7 +111,7 @@ instance (InTxn m, Monoid w) => InTxn (WriterT w m) -- 3. Now we want the type-level trickery to let us infer from class constraints if we're inside an `InTxn` monad or not. -- There are possibly many ways to go about this with GHC. I'm not well versed in them. --- My ~first~ second attempt is to use we use multi-parameter classes to avoid duplicate instances (since instance heads are ignored +-- My ~first~ second attempt is to use multi-parameter classes to avoid duplicate instances (since instance heads are ignored -- for instance selection). -- However, in some contexts where the `txn` monad type appears only in class constraints, but not in the function's arguments, using -- `withTransaction` will require enabling AllowAmbiguousTypes and explicit type applications like `withTransaction @txn`. @@ -144,11 +150,26 @@ withTransaction withTransaction isolLvl conn f = do t :: CheckTxnWit m txn <- txnCheck case t of - AlreadyInTxn -> f + AlreadyInTxn -> assertTxnStatus PQ.TransInTrans >> f NotInTxn -> do + assertTxnStatus PQ.TransIdle execvoid_ conn $ beginStatement isolLvl -- Note: once we stop rolling back on exception here, we can relax this function's `MonadUnliftIO` -- constraint to just `MonadIO` v <- unTxnT f `onException` liftIO (DB.rollback conn) liftIO $ DB.commit conn pure v + where + assertTxnStatus :: MonadUnliftIO n => PQ.TransactionStatus -> n () + assertTxnStatus s = do + actualStatus <- txnStatus conn + when (actualStatus /= s) + $ error + $ "Internal error in codd. We were expecting txnStatus " + ++ show s + ++ " but got " + ++ show actualStatus + ++ ". Please report this as a bug" + +txnStatus :: MonadUnliftIO m => DB.Connection -> m PQ.TransactionStatus +txnStatus conn = liftIO $ PGInternal.withConnection conn PQ.transactionStatus From cb237975faabac247a7ecd3189035c58bf864471 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 23 Mar 2024 12:28:16 -0300 Subject: [PATCH 12/28] Comment on hole in type-level sandbox for transaction handling --- src/Codd/Query.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Codd/Query.hs b/src/Codd/Query.hs index a5cab029..1fb06ff4 100644 --- a/src/Codd/Query.hs +++ b/src/Codd/Query.hs @@ -77,7 +77,12 @@ We want to allow functions to specify the following constraints: Of course, any type level constraints we devise must assume the user will not call `execute conn "BEGIN"`, `execute conn "COMMIT"`, or similar statements, but rather that they'll use functions exposed by this module to manage transactions. And with that little bit of discipline, we should be able to moderately achieve goals 1 to 3, where "moderately" means -this is not necessarily an airtight sandbox, but any ways to break out of it might be unlikely in codd's codebase. +this is not necessarily an airtight sandbox, but ways to break out of it should be harder in codd's codebase. + +One existing exception to this airtightness is handling multiple connections at once. It's not hard to do something like +`withTransaction @txn isolLvl conn1 $ someInTxnTFunc conn2` when conn2 is in fact not in a transaction. + +We want to address this hole eventually. -} class Monad m => InTxn (m :: Type -> Type) From e4568c3c939ff51d98f1a663a35ccfc3d27a47ad Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 23 Mar 2024 15:58:44 -0300 Subject: [PATCH 13/28] Add new tests --- codd.cabal | 1 + test/DbDependentSpecs/RetrySpec.hs | 198 +++++++++++++----- test/DbUtils.hs | 96 +++++---- ...2000-01-01-00-00-00-bootstrap-but-fail.sql | 23 ++ ...0-01-01-00-00-00-divide-by-0-migration.sql | 9 +- ...0-01-01-00-00-00-divide-by-0-migration.sql | 9 +- 6 files changed, 246 insertions(+), 90 deletions(-) create mode 100644 test/migrations/bootstrap-no-txn-fails-but-makes-default-conn-accessible/2000-01-01-00-00-00-bootstrap-but-fail.sql diff --git a/codd.cabal b/codd.cabal index 83a65dcc..69e5b9aa 100644 --- a/codd.cabal +++ b/codd.cabal @@ -211,6 +211,7 @@ test-suite codd-test DbDependentSpecs.AnalysisSpec DbDependentSpecs.AppCommandsSpec DbDependentSpecs.ApplicationSpec + DbDependentSpecs.RetrySpec DbDependentSpecs.SchemaVerificationSpec DbDependentSpecs.InvariantsSpec DbUtils diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index c3565193..be866ef0 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -2,24 +2,41 @@ module DbDependentSpecs.RetrySpec where import Codd ( applyMigrationsNoCheck ) import Codd.Environment ( CoddSettings(..) ) -import Codd.Internal ( MigrationApplicationFailure ) +import Codd.Internal ( MigrationApplicationFailure + , connectWithTimeout + , withConnection + ) import Codd.Logging ( LoggingT(runLoggingT) , Newline(..) ) +import Codd.Parsing ( AddedSqlMigration(..) + , ParsedSql(..) + , SqlMigration(..) + ) +import Codd.Query ( unsafeQuery1 ) import Codd.Types ( RetryBackoffPolicy(..) , RetryPolicy(..) ) import Control.Monad ( forM_ ) import Control.Monad.Reader ( ReaderT(..) ) +import Control.Monad.Trans.Resource ( MonadThrow ) import qualified Data.List as List import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Text.IO as Text +import Data.Time ( UTCTime ) +import qualified Database.PostgreSQL.Simple as DB import DbUtils ( aroundFreshDatabase + , aroundTestDbInfo + , createTestUserMig + , createTestUserMigPol + , getIncreasingTimestamp + , mkValidSql , testConnTimeout ) import Test.Hspec import UnliftIO ( MonadIO + , SomeException , hFlush , stdout ) @@ -29,63 +46,148 @@ import UnliftIO.Concurrent ( MVar , readMVar ) +createTableMig, addColumnMig :: MonadThrow m => AddedSqlMigration m +createTableMig = AddedSqlMigration + SqlMigration { migrationName = "0001-create-table.sql" + , migrationSql = mkValidSql "CREATE TABLE anytable ();" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 1) +addColumnMig = AddedSqlMigration + SqlMigration + { migrationName = "0002-add-column.sql" + , migrationSql = mkValidSql + "ALTER TABLE anytable ADD COLUMN anycolumn TEXT;" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 2) + spec :: Spec spec = do describe "DbDependentSpecs" $ do - describe "Retry tests" $ aroundFreshDatabase $ do - let - testRetries inTxn emptyTestDbInfo = do - -- Kind of ugly.. we test behaviour by analyzing logs and - -- trust that threadDelay is called. - logsmv <- newMVar [] - runMVarLogger - logsmv - (applyMigrationsNoCheck - (emptyTestDbInfo - { retryPolicy = RetryPolicy - 7 - (ExponentialBackoff - (realToFrac @Double 0.001) + describe "Retry tests" $ do + aroundTestDbInfo + $ it + "Bootstrapping no-txn migration still gets registered if it makes default connection string accessible before failing" + $ \emptyTestDbInfo -> do + -- Migration will make it accessible despite throwing an exception, and thus codd should _still_ + -- register that it was partially applied + withConnection (migsConnString emptyTestDbInfo) + testConnTimeout + (const $ pure ()) + `shouldThrow` (\(e :: SomeException) -> + "database \"codd-test-db\" does not exist" + `List.isInfixOf` show e ) - , sqlMigrations = - [ if inTxn - then - "test/migrations/retry-policy-test-in-txn/" - else - "test/migrations/retry-policy-test-no-txn/" - ] - } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + (emptyTestDbInfo + { retryPolicy = RetryPolicy + 3 + (ExponentialBackoff + (realToFrac @Double 0.001) + ) + , sqlMigrations = + [ "test/migrations/bootstrap-no-txn-fails-but-makes-default-conn-accessible" + ] + } + ) + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: MigrationApplicationFailure) -> + "division by zero" + `List.isInfixOf` show e + + && "SELECT 1/0" + `List.isInfixOf` show e + ) + + + withConnection (migsConnString emptyTestDbInfo) + testConnTimeout + $ \conn -> do + (apname, apnstmts, apat, apfailedat) <- + unsafeQuery1 + conn + "SELECT name, num_applied_statements, applied_at, no_txn_failed_at FROM codd_schema.sql_migrations" + () + (apname, apnstmts, apat) + `shouldBe` ( "bootstrap-but-fail.sql" :: String + , 5 :: Int + , Nothing :: Maybe UTCTime + ) + apfailedat + `shouldNotBe` (Nothing :: Maybe UTCTime) + + describe "Side-effect-less migrations" $ aroundFreshDatabase $ do + let + testRetries inTxn emptyTestDbInfo = do + -- Kind of ugly.. we test behaviour by analyzing logs and + -- trust that threadDelay is called. + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + (emptyTestDbInfo + { retryPolicy = RetryPolicy + 7 + (ExponentialBackoff + (realToFrac @Double 0.001) + ) + , sqlMigrations = + [ if inTxn + then + "test/migrations/retry-policy-test-in-txn/" + else + "test/migrations/retry-policy-test-no-txn/" + ] + } + ) + Nothing + testConnTimeout + (const $ pure ()) ) - Nothing - testConnTimeout - (const $ pure ()) - ) - `shouldThrow` (\(e :: MigrationApplicationFailure) -> - "division by zero" - `List.isInfixOf` show e + `shouldThrow` (\(e :: MigrationApplicationFailure) -> + "division by zero" + `List.isInfixOf` show e - && "SELECT 7/0" - `List.isInfixOf` show e - ) - logs <- readMVar logsmv - length (filter ("before next try" `Text.isInfixOf`) logs) - `shouldBe` 7 - -- The last attempt isn't logged because we don't catch exceptions for it - length (filter ("division by zero" `Text.isInfixOf`) logs) - `shouldBe` 8 -- The error appears once before each retry, but also once when the final exception is thrown - forM_ [1 :: Int, 2, 4, 8, 16, 32, 64] $ \delay -> + && "SELECT 7/0" + `List.isInfixOf` show e + ) + logs <- readMVar logsmv + length + (filter ("before next try" `Text.isInfixOf`) + logs + ) + `shouldBe` 7 + -- The last attempt isn't logged because we don't catch exceptions for it length (filter - (( "Waiting " - <> Text.pack (show delay) - <> "ms" - ) `Text.isInfixOf` - ) + ("division by zero" `Text.isInfixOf`) logs ) - `shouldBe` 1 - it "For in-txn migrations" $ testRetries True - it "For no-txn migrations" $ testRetries False + `shouldBe` 8 -- The error appears once before each retry, but also once when the final exception is thrown + forM_ [1 :: Int, 2, 4, 8, 16, 32, 64] $ \delay -> + length + (filter + (( "Waiting " + <> Text.pack (show delay) + <> "ms" + ) `Text.isInfixOf` + ) + logs + ) + `shouldBe` 1 + it "For in-txn migrations" $ testRetries True + it "For no-txn migrations" $ testRetries False runMVarLogger :: MonadIO m => MVar [Text] -> LoggingT m a -> m a runMVarLogger logsmv m = runReaderT diff --git a/test/DbUtils.hs b/test/DbUtils.hs index af11e7df..12e8cd38 100644 --- a/test/DbUtils.hs +++ b/test/DbUtils.hs @@ -145,6 +145,7 @@ createTestUserMigPol = do let psql = + -- IMPORTANT: If you change this migration, also change the 2000-01-01-00-00-00-bootstrap-but-fail.sql file and the test that uses it mkValidSql $ "DO\n" <> "$do$\n" @@ -185,10 +186,11 @@ testCoddSettings = do } -- | Doesn't create a Database, doesn't create anything. Just supplies the Test CoddSettings from Env Vars to your test. +-- This does cleanup if you create any databases or tables in the `postgres` DB, though. aroundTestDbInfo :: SpecWith CoddSettings -> Spec aroundTestDbInfo = around $ \act -> do coddSettings <- testCoddSettings - act coddSettings + act coddSettings `finally` cleanupAfterTest aroundFreshDatabase :: SpecWith CoddSettings -> Spec aroundFreshDatabase = aroundDatabaseWithMigs [] @@ -198,9 +200,8 @@ aroundDatabaseWithMigs -> SpecWith CoddSettings -> Spec aroundDatabaseWithMigs startingMigs = around $ \act -> do - coddSettings@CoddSettings { migsConnString } <- testCoddSettings + coddSettings <- testCoddSettings - -- TODO: Reuse withCoddDbAndDrop! runCoddLogger (do @@ -211,43 +212,66 @@ aroundDatabaseWithMigs startingMigs = around $ \act -> do (const $ pure ()) liftIO (act coddSettings) ) - `finally` withConnection - migsConnString { connectUser = "postgres" - , connectDatabase = "postgres" - } - testConnTimeout + `finally` cleanupAfterTest + +cleanupAfterTest :: IO () +cleanupAfterTest = do + CoddSettings { migsConnString } <- testCoddSettings + withConnection + migsConnString { connectUser = "postgres" + , connectDatabase = "postgres" + } + testConnTimeout -- Some things aren't associated to a Schema and not even to a Database; they belong under the entire DB/postgres instance. -- So we reset these things here, with the goal of getting the DB in the same state as it would be before even "createUserTestMig" -- from "testCoddSettings" runs, so that each test is guaranteed the same starting DB environment. - (\conn -> do - execvoid_ conn "ALTER ROLE postgres RESET ALL;" - execvoid_ conn "DROP DATABASE \"codd-test-db\";" + (\conn -> do + execvoid_ conn "ALTER ROLE postgres RESET ALL;" + dbs :: [String] <- + map DB.fromOnly + <$> query + conn + "SELECT datname FROM pg_catalog.pg_database WHERE datname NOT IN ('postgres', 'template0', 'template1')" + () + forM_ dbs $ \db -> + execvoid_ conn $ "DROP DATABASE \"" <> fromString db <> "\"" - allRoles :: [String] <- - map DB.fromOnly - <$> query - conn - "SELECT rolname FROM pg_roles WHERE rolname NOT IN ('postgres') AND rolname NOT LIKE 'pg_%' ORDER BY rolname DESC" - () - forM_ allRoles $ \role -> do - let escapedRole = - fromString ("\"" <> role <> "\"") - execvoid_ conn - $ "DROP OWNED BY " - <> escapedRole - -- <> "; REVOKE ALL ON ALL TABLES IN SCHEMA public FROM " - -- <> escapedRole - -- <> "; REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM " - -- <> escapedRole - -- <> "; REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM " - -- <> escapedRole - -- <> "; REVOKE ALL ON SCHEMA public FROM " - -- <> escapedRole - -- <> "; REVOKE ALL ON DATABASE \"codd-test-db\" FROM " - -- <> escapedRole - <> "; DROP ROLE " - <> escapedRole - ) + allRoles :: [String] <- + map DB.fromOnly + <$> query + conn + "SELECT rolname FROM pg_roles WHERE rolname NOT IN ('postgres') AND rolname NOT LIKE 'pg_%' ORDER BY rolname DESC" + () + forM_ allRoles $ \role -> do + let escapedRole = fromString ("\"" <> role <> "\"") + execvoid_ conn + $ "DROP OWNED BY " + <> escapedRole + -- <> "; REVOKE ALL ON ALL TABLES IN SCHEMA public FROM " + -- <> escapedRole + -- <> "; REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM " + -- <> escapedRole + -- <> "; REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM " + -- <> escapedRole + -- <> "; REVOKE ALL ON SCHEMA public FROM " + -- <> escapedRole + -- <> "; REVOKE ALL ON DATABASE \"codd-test-db\" FROM " + -- <> escapedRole + <> "; DROP ROLE " + <> escapedRole + + createdTables :: [(String, String)] <- query + conn + "SELECT schemaname, tablename FROM pg_catalog.pg_tables WHERE schemaname NOT IN ('pg_catalog', 'information_schema')" + () + forM_ createdTables $ \(schema, tbl) -> + execvoid_ conn + $ "DROP TABLE \"" + <> fromString schema + <> "\".\"" + <> fromString tbl + <> "\" CASCADE" + ) -- | Returns a Postgres UTC Timestamp that increases with its input parameter. getIncreasingTimestamp :: DiffTime -> DB.UTCTimestamp diff --git a/test/migrations/bootstrap-no-txn-fails-but-makes-default-conn-accessible/2000-01-01-00-00-00-bootstrap-but-fail.sql b/test/migrations/bootstrap-no-txn-fails-but-makes-default-conn-accessible/2000-01-01-00-00-00-bootstrap-but-fail.sql new file mode 100644 index 00000000..959dc7f8 --- /dev/null +++ b/test/migrations/bootstrap-no-txn-fails-but-makes-default-conn-accessible/2000-01-01-00-00-00-bootstrap-but-fail.sql @@ -0,0 +1,23 @@ +-- codd: no-txn +-- codd-connection: dbname=postgres host=127.0.0.1 user=postgres port=5434 +-- This migration is used in our retry policy tests + +-- The CREATE TABLE statements ensure we're rolling back and retrying properly, or else they will fail due to +-- tables already existing. + +CREATE TABLE some_table(); +DO +$do$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'codd-test-user') THEN + CREATE USER "codd-test-user"; + END IF; +END +$do$; +CREATE DATABASE "codd-test-db" WITH OWNER="codd-test-user"; +GRANT CONNECT ON DATABASE "codd-test-db" TO "codd-test-user"; + +CREATE TABLE other_table(); +-- The statement below will fail, but the default connection string will be accessible after that happens! +SELECT 1/0; +SELECT 42; diff --git a/test/migrations/retry-policy-test-in-txn/2000-01-01-00-00-00-divide-by-0-migration.sql b/test/migrations/retry-policy-test-in-txn/2000-01-01-00-00-00-divide-by-0-migration.sql index 28a7e657..8c44ac1b 100644 --- a/test/migrations/retry-policy-test-in-txn/2000-01-01-00-00-00-divide-by-0-migration.sql +++ b/test/migrations/retry-policy-test-in-txn/2000-01-01-00-00-00-divide-by-0-migration.sql @@ -11,6 +11,10 @@ -- have more than the first few SQL statements (should only be the -- statements in the first line, really) in memory. +-- The CREATE TABLE statements ensure we're rolling back and retrying properly, or else they will fail due to +-- tables already existing. + +CREATE TABLE any_table(); SELECT 1; SELECT 1; SELECT 1; @@ -23,6 +27,5 @@ SELECT 1; SELECT 1; SELECT 1; SELECT 1; -SELECT 1; -SELECT 1; -SELECT 7/0 \ No newline at end of file +CREATE TABLE other_table(); +SELECT 7/0 diff --git a/test/migrations/retry-policy-test-no-txn/2000-01-01-00-00-00-divide-by-0-migration.sql b/test/migrations/retry-policy-test-no-txn/2000-01-01-00-00-00-divide-by-0-migration.sql index 3ccc1454..c9156780 100644 --- a/test/migrations/retry-policy-test-no-txn/2000-01-01-00-00-00-divide-by-0-migration.sql +++ b/test/migrations/retry-policy-test-no-txn/2000-01-01-00-00-00-divide-by-0-migration.sql @@ -2,6 +2,10 @@ -- This migration is used in our retry policy tests -- See why so many "SELECT 1" in the other "divide-by-0-migration.sql" file. +-- The CREATE TABLE statements ensure we're retrying no-txn migrations from the right statement, or else they will fail due to +-- tables already existing. + +CREATE TABLE any_table(); SELECT 1; SELECT 1; SELECT 1; @@ -13,6 +17,5 @@ SELECT 1; SELECT 1; SELECT 1; SELECT 1; -SELECT 1; -SELECT 1; -SELECT 7/0 \ No newline at end of file +CREATE TABLE other_table(); +SELECT 7/0 From bd351e7f65430a7f73d09bf2f2a6ee1da655b1fe Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 23 Mar 2024 17:44:00 -0300 Subject: [PATCH 14/28] Return error types instead of using exceptions, make new test pass This is a super specific error case of a failed bootstrapping no-txn migration that runs enough statements to make the default connection string accessible. With this commit codd will create codd_schema after this failure and register this migration was partially applied. This is the most unlikely and useless error case I can think of, and the implementation messes with retry timeouts, so it's not clear this is a good idea. Plus, `codd add` might run into it. Since we have to address other issues with `codd add` given the new code anyway, it might not be that big of a problem. At least we can say codd will be helpful even in the most extreme cases. Even if I can't think of a case where this particular feature will do that. --- src/Codd/Internal.hs | 120 ++++++++++++++++++----------- src/Codd/Internal/Retry.hs | 61 +++++++-------- src/Codd/Query.hs | 3 + test/DbDependentSpecs/RetrySpec.hs | 9 +-- 4 files changed, 109 insertions(+), 84 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index a06748d3..205dcc38 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -71,7 +71,7 @@ import Data.Time ( DiffTime , NominalDiffTime , UTCTime , diffTimeToPicoseconds - , picosecondsToDiffTime + , picosecondsToDiffTime, secondsToDiffTime ) import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB @@ -92,10 +92,9 @@ import UnliftIO ( Exception , MonadUnliftIO , hClose , newIORef - , onException , readIORef , timeout - , writeIORef + , writeIORef, try, SomeException ) import UnliftIO.Concurrent ( threadDelay ) import UnliftIO.Directory ( listDirectory ) @@ -374,11 +373,21 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- and also will BEGIN..COMMIT-wrap the insertion using the default connection if it's available. registerAppliedMigIfPossibleOthers :: DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m () registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do + csExists <- readMVar coddSchemaExists + case (apStatus, csExists) of + (NoTxnMigrationFailed, False) -> do + -- Super duper ultra extra special case: we try to create codd_schema as a partially-run no-txn migration may have applied statements that make the default connection string accessible. The same isn't possible with in-txn migrations. + -- This will increase the delay between retry intervals beyond what the user has specified since we're adding a bit of a timeout to each retry of failed no-txn migrations. Since this is an extremely rare error case, it probably doesn't + -- matter too much what we do. I have questions if we should even support this, to be honest. Hacky stuff below: + void $ try @m @SomeException $ withConnection defaultConnInfo (min 0.3 connectTimeout) $ \_conn -> do + void $ openConn defaultConnInfo + createCoddSchemaAndFlushPendingMigrations + _ -> pure () mDefaultConn <- queryConn defaultConnInfo (registered, appliedAt) <- case mDefaultConn of Nothing -> fmap (MigrationNotRegistered,) $ - DB.fromOnly <$> unsafeQuery1 blockConn "SELECT clock_timestamp()" () + DB.fromOnly <$> unsafeQuery1 blockConn "SELECT clock_timestamp()" () Just defaultConn -> do timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus pure (MigrationRegistered, timeApplied) @@ -473,23 +482,24 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) $ \blockFinal -> do logInfo "BEGINning transaction" - flip - onException - (logInfo - "ROLLBACKed transaction" - ) - $ withTransaction @txn txnIsolationLvl conn + withTransaction @txn txnIsolationLvl conn $ do let hoistedMigs :: NonEmpty (AddedSqlMigration txn) hoistedMigs = hoistAddedSqlMigration lift <$> inTxnMigs blockFinal - forM_ hoistedMigs - $ applySingleMigration conn - (\fp ts duration numStmts apStatus -> lift $ registerMig fp ts duration numStmts apStatus) - 0 - act conn + errorOrOk <- forMExcept hoistedMigs + $ applySingleMigration conn + (\fp ts duration numStmts apStatus -> lift $ registerMig fp ts duration numStmts apStatus) + 0 + case errorOrOk of + Left e -> do + logInfo + "ROLLBACKed transaction" + pure $ Left e + Right () -> + Right <$> act conn logInfo "COMMITed transaction" pure res @@ -519,23 +529,24 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) $ \blockFinal -> do logInfo "BEGINning transaction" - flip - onException - (logInfo - "ROLLBACKed transaction" - ) - $ withTransaction txnIsolationLvl conn + withTransaction txnIsolationLvl conn $ do let hoistedMigs :: NonEmpty (AddedSqlMigration txn) hoistedMigs = hoistAddedSqlMigration lift <$> inTxnMigs blockFinal - forM_ hoistedMigs + errorOrOk <- forMExcept hoistedMigs $ applySingleMigration conn registerMig 0 - act conn + case errorOrOk of + Left e -> do + logInfo + "ROLLBACKed transaction" + pure $ Left e + Right () -> + Right <$> act conn logInfo "COMMITed transaction" pure res @@ -547,8 +558,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn runNoTxnMig conn mig registerMig = do retryFold @MigrationApplicationFailure retryPolicy - (\(previousMig, _) RetryIteration { lastException } -> - case lastException of + (\(previousMig, _) RetryIteration { lastError } -> + case lastError of Nothing -> pure (previousMig, 0) Just MigrationApplicationFailure { noTxnMigRetryInstructions } -> case noTxnMigRetryInstructions of @@ -573,19 +584,26 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) (mig, 0) (\case - Left lastEx -> do + Left lastErr -> do logError "Failed after all configured retries. Giving up." - throwIO lastEx + -- The warning below would be very nice to have, but we don't want it appearing for new users of codd when they're trying `codd add` for the first time. We should know if this migration application happens during `codd add` and omit the warning below. + -- logWarn "IMPORTANT:\n\ + -- \ If this is a database you care about and can't simply recreate (e.g. a Production database), then read the following _very carefully_:\n\ + -- \ A no-txn migration failed to be applied completely but may have had some of its statements applied. Your database may have been left in an intermediary state.\n\ + -- \ If you think this is a temporary error and that resuming from the exact statement inside the no-txn migration that failed might work, you can just run `codd up` and codd _will resume_ migration application precisely from that last failed statement.\n\ + -- \ If you're going to do that, however, do _not_ edit the migration as changing the position of the failed statement inside it will make codd silently continue from the wrong place.\n\ + -- \ But if this is not going away merely by resuming application from the last failed statement, one other option you have is to look at the last error above to see how many statements codd applied and which statement failed. This should help you pinpoint the precise failing statement inside the migration, even if it might not be entirely obvious how codd counts statements internally. \n\ + -- \ Once you know which statement that is, you can edit the migration and remove that failing statement and all others that come after it. You can then rewrite that part of the migration in a way you think will fix the problem. Just make sure you don't change anything before it. After that you can run `codd up` and codd will resume application after skipping the statements that had been applied, meaning it will resume from the first statement in the rewritten part of the migration." + throwIO lastErr Right ret -> pure ret ) - $ \(migFinal, numStmtsToSkip) -> do - applySingleMigration + $ \(migFinal, numStmtsToSkip) -> + fmap (const Nothing) <$> applySingleMigration conn registerMig numStmtsToSkip (singleNoTxnMig migFinal) - pure Nothing data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -1024,8 +1042,16 @@ data MigrationApplicationFailure = MigrationApplicationFailure deriving stock Show instance Exception MigrationApplicationFailure --- | Applies a single migration and returns the time when it finished being applied. Calls the supplied --- `registerMigRan` after applying the migration. +forMExcept :: Monad m => NonEmpty a -> (a -> m (Either e ())) -> m (Either e ()) +forMExcept nl f = go (NE.toList nl) + where + go [] = pure $ Right () + go (x:xs) = f x >>= \case + Left e -> pure $ Left e + Right () -> go xs + +-- | Applies a single migration and returns an error if it failed to be applied or `()` otherwise. Calls the supplied +-- `registerMigRan` after the migration is applied or fails. Does not throw exceptions coming from SQL errors. applySingleMigration :: forall m . (MonadUnliftIO m, CoddLogger m) @@ -1034,7 +1060,7 @@ applySingleMigration -> Int -- ^ Number of countable-runnable statements to skip completely. Useful when retrying no-txn migrations from exactly the statements they last failed in. -> AddedSqlMigration m - -> m () + -> m (Either MigrationApplicationFailure ()) applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) = do let fn = migrationName sqlMig @@ -1168,23 +1194,25 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS "ROLLBACKed last explicitly started transaction" pure $ Just $ NoTxnMigMustRestartAfterSkipping (lastBeginNum - 1) appliedMigrationDuration - throwIO $ MigrationApplicationFailure + pure $ Left $ MigrationApplicationFailure { sqlStatementEx , noTxnMigRetryInstructions } - Nothing -> pure () + Nothing -> do + registerMigRan fn + migTimestamp + appliedMigrationDuration + appliedMigrationNumStatements + MigrationAppliedSuccessfully + logInfo + $ " (" + <> prettyPrintDuration appliedMigrationDuration + <> ", " + <> Fmt.sformat Fmt.int appliedMigrationNumStatements + <> ")" + pure $ Right () + - registerMigRan fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - MigrationAppliedSuccessfully - logInfo - $ " (" - <> prettyPrintDuration appliedMigrationDuration - <> ", " - <> Fmt.sformat Fmt.int appliedMigrationNumStatements - <> ")" where pico_1ns = 1_000 pico_1ms = 1_000_000_000 diff --git a/src/Codd/Internal/Retry.hs b/src/Codd/Internal/Retry.hs index d309eaa0..6ba19545 100644 --- a/src/Codd/Internal/Retry.hs +++ b/src/Codd/Internal/Retry.hs @@ -13,25 +13,22 @@ import Data.Maybe ( isNothing ) import qualified Data.Text as Text import UnliftIO ( MonadUnliftIO ) import UnliftIO.Concurrent ( threadDelay ) -import UnliftIO.Exception ( Exception - , catch - ) -data RetryIteration a = RetryIteration - { isLastTry :: Bool - , tryNumber :: Int +data RetryIteration e = RetryIteration + { isLastTry :: Bool + , tryNumber :: Int -- ^ 0-indexed try number. E.g. 0 is the first try, 1 is the first *retry*. - , lastException :: Maybe a - -- ^ If this is a retry, the exception that caused it + , lastError :: Maybe e + -- ^ If this is a retry, the error that caused it } -- | Retries an action as many times and with wait intervals according --- to the supplied `RetryPolicy`, but only retries in case of synchronous --- exceptions. Provides fold-like behavior for an accumulator +-- to the supplied `RetryPolicy`, but uses result types to know when to retry, so exceptions +-- will just bubble up - this will not handle them at all. Provides fold-like behavior for an accumulator -- for each try, including the first one. retryFold :: forall e m b a - . (MonadUnliftIO m, CoddLogger m, Exception e) + . (MonadUnliftIO m, CoddLogger m) => RetryPolicy -> (b -> RetryIteration e -> m b) -- ^ Accumulating function. This runs even for the first try. @@ -39,31 +36,35 @@ retryFold -- ^ Initial value of the accumulator. -> (Either e a -> m a) -- ^ Called after the action succeeds or after all retries fail. - -> (b -> m a) - -- ^ Action to retry. Any exceptions of the chosen type are caught and logged as errors. - -- Retries don't happen in case no exceptions are thrown. + -> (b -> m (Either e a)) + -- ^ Action to attempt/retry, which will return a `Left` if it needs to be retried and a `Right` if it succeeded. -> m a retryFold initialPol accf acc0 final f = go initialPol acc0 0 Nothing where - go rpol previousAcc tryNumber lastException = do + go rpol previousAcc tryNumber lastError = do let mNextPol = retryPolicyIterate rpol - thisIter = RetryIteration { isLastTry = isNothing mNextPol + thisIter = RetryIteration { isLastTry = isNothing mNextPol , tryNumber - , lastException + , lastError } thisAcc <- accf previousAcc thisIter case mNextPol of - Nothing -> catch (f thisAcc) (final . Left) + Nothing -> f thisAcc >>= final Just (waitIfFail, nextPol) -> do - ret <- catch (f thisAcc) $ \(ex :: e) -> do - let waitTimeMS :: Int = - truncate $ (realToFrac waitIfFail :: Float) * 1000 - -- It would be more reasonable if this retryFold function didn't print anything, letting - -- its callers do that. Maybe in the future. - logWarn - $ "Waiting " - <> Text.pack (show waitTimeMS) - <> "ms before next try" - threadDelay (1000 * waitTimeMS) - go nextPol thisAcc (tryNumber + 1) (Just ex) - final (Right ret) + ret <- f thisAcc + case ret of + Left e -> do + let + waitTimeMS :: Int = + truncate + $ (realToFrac waitIfFail :: Float) + * 1000 + -- It would be more reasonable if this retryFold function didn't print anything, letting + -- its callers do that. Maybe in the future. + logWarn + $ "Waiting " + <> Text.pack (show waitTimeMS) + <> "ms before next try" + threadDelay (1000 * waitTimeMS) + go nextPol thisAcc (tryNumber + 1) (Just e) + Right a -> final $ Right a diff --git a/src/Codd/Query.hs b/src/Codd/Query.hs index 1fb06ff4..b5d0a365 100644 --- a/src/Codd/Query.hs +++ b/src/Codd/Query.hs @@ -17,6 +17,7 @@ import Codd.Types ( TxnIsolationLvl(..) ) import Control.Monad ( void , when ) +import Control.Monad.Except ( ExceptT ) import Control.Monad.Reader ( ReaderT ) import Control.Monad.State ( StateT ) import Control.Monad.Trans ( MonadTrans(..) ) @@ -102,6 +103,7 @@ instance NotInTxn m => NotInTxn (LoggingT m) instance NotInTxn m => NotInTxn (ResourceT m) instance NotInTxn m => NotInTxn (ReaderT r m) instance NotInTxn m => NotInTxn (StateT s m) +instance NotInTxn m => NotInTxn (ExceptT e m) instance (NotInTxn m, Monoid w) => NotInTxn (WriterT w m) -- 2. Next, if some monad `m` is inside a transaction, `SomeMonadTransformerT m` also is. @@ -112,6 +114,7 @@ instance InTxn m => InTxn (LoggingT m) instance InTxn m => InTxn (ResourceT m) instance InTxn m => InTxn (ReaderT r m) instance InTxn m => InTxn (StateT s m) +instance InTxn m => InTxn (ExceptT e m) instance (InTxn m, Monoid w) => InTxn (WriterT w m) -- 3. Now we want the type-level trickery to let us infer from class constraints if we're inside an `InTxn` monad or not. diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index be866ef0..22fa8d15 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -3,14 +3,12 @@ module DbDependentSpecs.RetrySpec where import Codd ( applyMigrationsNoCheck ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal ( MigrationApplicationFailure - , connectWithTimeout , withConnection ) import Codd.Logging ( LoggingT(runLoggingT) , Newline(..) ) import Codd.Parsing ( AddedSqlMigration(..) - , ParsedSql(..) , SqlMigration(..) ) import Codd.Query ( unsafeQuery1 ) @@ -25,11 +23,8 @@ import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Text.IO as Text import Data.Time ( UTCTime ) -import qualified Database.PostgreSQL.Simple as DB import DbUtils ( aroundFreshDatabase , aroundTestDbInfo - , createTestUserMig - , createTestUserMigPol , getIncreasingTimestamp , mkValidSql , testConnTimeout @@ -120,7 +115,7 @@ spec = do "SELECT name, num_applied_statements, applied_at, no_txn_failed_at FROM codd_schema.sql_migrations" () (apname, apnstmts, apat) - `shouldBe` ( "bootstrap-but-fail.sql" :: String + `shouldBe` ( "2000-01-01-00-00-00-bootstrap-but-fail.sql" :: String , 5 :: Int , Nothing :: Maybe UTCTime ) @@ -130,8 +125,6 @@ spec = do describe "Side-effect-less migrations" $ aroundFreshDatabase $ do let testRetries inTxn emptyTestDbInfo = do - -- Kind of ugly.. we test behaviour by analyzing logs and - -- trust that threadDelay is called. logsmv <- newMVar [] runMVarLogger logsmv From b7d2ccfdbdfc88e5359d109adf8681a2fb6c6830 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 23 Mar 2024 18:47:43 -0300 Subject: [PATCH 15/28] Make some error states unrepresentable with associated type families More type applications to the rescue.. but it did remove one code path that was printing "This is an internal error.." --- src/Codd/Internal.hs | 568 ++++++++++++++++------------- test/DbDependentSpecs/RetrySpec.hs | 6 +- 2 files changed, 325 insertions(+), 249 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 205dcc38..d10dbb4b 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BlockArguments, AllowAmbiguousTypes #-} +{-# LANGUAGE DataKinds #-} module Codd.Internal where import Prelude hiding ( readFile ) @@ -58,6 +59,7 @@ import Control.Monad.IO.Class ( MonadIO(..) ) import Control.Monad.Trans ( MonadTrans(..) ) import Control.Monad.Trans.Resource ( MonadThrow ) import Data.Functor ( (<&>) ) +import Data.Kind ( Type ) import qualified Data.List as List import Data.List ( sortOn ) import Data.List.NonEmpty ( NonEmpty(..) ) @@ -71,7 +73,7 @@ import Data.Time ( DiffTime , NominalDiffTime , UTCTime , diffTimeToPicoseconds - , picosecondsToDiffTime, secondsToDiffTime + , picosecondsToDiffTime ) import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB @@ -90,11 +92,13 @@ import System.FilePath ( () ) import UnliftIO ( Exception , MonadUnliftIO + , SomeException , hClose , newIORef , readIORef , timeout - , writeIORef, try, SomeException + , try + , writeIORef ) import UnliftIO.Concurrent ( threadDelay ) import UnliftIO.Directory ( listDirectory ) @@ -109,8 +113,9 @@ import UnliftIO.IO ( IOMode(ReadMode) , openFile ) import UnliftIO.MVar ( modifyMVar + , modifyMVar_ , newMVar - , readMVar, modifyMVar_ + , readMVar ) import UnliftIO.Resource ( MonadResource , ReleaseKey @@ -119,7 +124,6 @@ import UnliftIO.Resource ( MonadResource , release , runResourceT ) -import Data.Bifunctor (second) dbIdentifier :: Text -> DB.Query dbIdentifier s = "\"" <> fromString (Text.unpack s) <> "\"" @@ -208,12 +212,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -310,8 +314,9 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -- Note: We could probably compound this Monad with StateT instead of using an MVar, but IIRC that creates issues -- with MonadUnliftIO. connsPerInfo <- newMVar (mempty :: [(DB.ConnectInfo, DB.Connection)]) - appliedMigs <- newMVar (mempty :: [(AppliedMigration, MigrationRegistered)]) - coddSchemaExists <- newMVar $ coddSchemaVersion bootstrapCheck /= CoddSchemaDoesNotExist + unregisteredButAppliedMigs <- newMVar (mempty :: [AppliedMigration]) + coddSchemaExists <- + newMVar $ coddSchemaVersion bootstrapCheck /= CoddSchemaDoesNotExist let openConn :: DB.ConnectInfo -> m (ReleaseKey, DB.Connection) openConn cinfo = flip allocate DB.close $ do mConn <- lookup cinfo <$> readMVar connsPerInfo @@ -328,21 +333,21 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn createCoddSchemaAndFlushPendingMigrations :: m () createCoddSchemaAndFlushPendingMigrations = do mDefaultConn <- queryConn defaultConnInfo - csExists <- readMVar coddSchemaExists + csExists <- readMVar coddSchemaExists case mDefaultConn of Just defaultConn -> do unless csExists $ do logInfo "Creating or updating codd_schema..." - createCoddSchema @txn - maxBound - txnIsolationLvl - defaultConn + createCoddSchema @txn maxBound + txnIsolationLvl + defaultConn modifyMVar_ coddSchemaExists $ const $ pure True - modifyMVar_ appliedMigs $ \apmigs -> do - withTransaction @txn txnIsolationLvl defaultConn $ - forM_ [ am | (am, MigrationNotRegistered) <- apmigs ] - $ \AppliedMigration {..} -> registerRanMigration @txn + modifyMVar_ unregisteredButAppliedMigs $ \apmigs -> do + withTransaction @txn txnIsolationLvl defaultConn + $ forM_ apmigs + $ \AppliedMigration {..} -> + registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName @@ -350,93 +355,137 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn (SpecificTime appliedMigrationAt) appliedMigrationDuration appliedMigrationNumStatements - MigrationAppliedSuccessfully -- TODO: What if a failing no-txn migration ran just enough to make the default connection string accessible? Then we want to register it properly here, not assume it succeeded. - pure $ map (second (const MigrationRegistered)) apmigs + MigrationAppliedSuccessfully + pure [] Nothing -> pure () - -- | The function used to register applied migrations for when a block of in-txn migrations using the default connection string are to be applied. - -- This will use same transaction as the one used to apply the migrations to insert into codd_schema.sql_migrations. - registerAppliedMigDefaultConnInTxnBlock :: FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn () - registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do - (_, defaultConn) <- lift $ openConn defaultConnInfo - timeApplied <- registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus - - modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName - , appliedMigrationTimestamp - , appliedMigrationAt = timeApplied - , appliedMigrationDuration - , appliedMigrationNumStatements - }, MigrationRegistered)] - - -- | The function used to register applied migrations for when either a no-txn migration or a block of in-txn migrations _not_ using the default connection string are to be applied. - -- This will account for the possibility that the default connection string still isn't accessible by storing in-memory that some migrations were applied but not registered, - -- and also will BEGIN..COMMIT-wrap the insertion using the default connection if it's available. - registerAppliedMigIfPossibleOthers :: DB.Connection -> FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m () - registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus = do - csExists <- readMVar coddSchemaExists - case (apStatus, csExists) of - (NoTxnMigrationFailed, False) -> do - -- Super duper ultra extra special case: we try to create codd_schema as a partially-run no-txn migration may have applied statements that make the default connection string accessible. The same isn't possible with in-txn migrations. - -- This will increase the delay between retry intervals beyond what the user has specified since we're adding a bit of a timeout to each retry of failed no-txn migrations. Since this is an extremely rare error case, it probably doesn't - -- matter too much what we do. I have questions if we should even support this, to be honest. Hacky stuff below: - void $ try @m @SomeException $ withConnection defaultConnInfo (min 0.3 connectTimeout) $ \_conn -> do - void $ openConn defaultConnInfo - createCoddSchemaAndFlushPendingMigrations - _ -> pure () - mDefaultConn <- queryConn defaultConnInfo - (registered, appliedAt) <- +-- | The function used to register applied migrations for when a block of in-txn migrations using the default connection string are to be applied. +-- This will use same transaction as the one used to apply the migrations to insert into codd_schema.sql_migrations. + registerAppliedMigDefaultConnInTxnBlock + :: FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> txn () + registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus + = do + (_, defaultConn) <- lift $ openConn defaultConnInfo + void $ registerRanMigration @txn + defaultConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + NowInPostgresTime + appliedMigrationDuration + appliedMigrationNumStatements + apStatus + +-- | The function used to register applied migrations for when either a no-txn migration or a block of in-txn migrations _not_ using the default connection string are to be applied. +-- This will account for the possibility that the default connection string still isn't accessible by storing in-memory that some migrations were applied but not registered, +-- and also will BEGIN..COMMIT-wrap the insertion using the default connection if it's available. + registerAppliedMigIfPossibleOthers + :: DB.Connection + -> FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> m () + registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus + = do + csExists <- readMVar coddSchemaExists + case (apStatus, csExists) of + (NoTxnMigrationFailed, False) -> do + -- Super duper ultra extra special case: we try to create codd_schema as a partially-run no-txn migration may have applied statements that make the default connection string accessible. The same isn't possible with in-txn migrations. + -- This will increase the delay between retry intervals beyond what the user has specified since we're adding a bit of a timeout to each retry of failed no-txn migrations. Since this is an extremely rare error case, it probably doesn't + -- matter too much what we do. I have questions if we should even support this, to be honest. Hacky stuff below: + void + $ try @m @SomeException + $ withConnection defaultConnInfo + (min 0.3 connectTimeout) + $ \_conn -> do + void $ openConn defaultConnInfo + createCoddSchemaAndFlushPendingMigrations + _ -> pure () + mDefaultConn <- queryConn defaultConnInfo case mDefaultConn of - Nothing -> fmap (MigrationNotRegistered,) $ - DB.fromOnly <$> unsafeQuery1 blockConn "SELECT clock_timestamp()" () + Nothing -> do + appliedAt <- + DB.fromOnly + <$> unsafeQuery1 + blockConn + "SELECT clock_timestamp()" + () + modifyMVar_ unregisteredButAppliedMigs $ \apmigs -> + pure + $ apmigs + ++ [ AppliedMigration + { appliedMigrationName + , appliedMigrationTimestamp + , appliedMigrationAt = appliedAt + , appliedMigrationDuration + , appliedMigrationNumStatements + } + ] Just defaultConn -> do - timeApplied <- withTransaction @txn txnIsolationLvl defaultConn $ registerRanMigration @txn defaultConn txnIsolationLvl appliedMigrationName appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration appliedMigrationNumStatements apStatus - pure (MigrationRegistered, timeApplied) - - modifyMVar_ appliedMigs $ \apmigs -> pure $ apmigs ++ [(AppliedMigration { appliedMigrationName - , appliedMigrationTimestamp - , appliedMigrationAt = appliedAt - , appliedMigrationDuration - , appliedMigrationNumStatements - }, registered)] - - - singleInTxnBlockResult :: Maybe a <- - foldM - (\_ block -> do - let - cinfo = fromMaybe defaultConnInfo - (blockCustomConnInfo block) - (_, conn) <- openConn cinfo - - - -- Create codd_schema and flush previously applied migrations if possible. We do this here - -- since we expect _some_ of the migration blocks to use the default connection string, and after - -- that we can register migrations were applied. - createCoddSchemaAndFlushPendingMigrations - - case + void + $ withTransaction @txn txnIsolationLvl + defaultConn + $ registerRanMigration @txn + defaultConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + NowInPostgresTime + appliedMigrationDuration + appliedMigrationNumStatements + apStatus + + + + singleInTxnBlockResult :: Maybe a <- foldM + (\_ block -> do + let cinfo = + fromMaybe defaultConnInfo (blockCustomConnInfo block) + (_, conn) <- openConn cinfo + + + -- Create codd_schema and flush previously applied migrations if possible. We do this here + -- since we expect _some_ of the migration blocks to use the default connection string, and after + -- that we can register migrations were applied. + createCoddSchemaAndFlushPendingMigrations + + case ( block , isOneShotApplication defaultConnInfo pendingMigs ) - of - (BlockInTxn inTxnBlock, True) -> runInTxnBlockDefaultConn - (fmap Just . actionAfter hoistedBlocks) - conn - inTxnBlock - registerAppliedMigDefaultConnInTxnBlock - (BlockInTxn inTxnBlock, False) -> if cinfo == defaultConnInfo then runInTxnBlockDefaultConn (const $ pure Nothing) conn inTxnBlock registerAppliedMigDefaultConnInTxnBlock else runInTxnBlockNotDefaultConn - (const $ pure Nothing) - conn - inTxnBlock - (registerAppliedMigIfPossibleOthers conn) + of + (BlockInTxn inTxnBlock, True) -> + runInTxnBlockDefaultConn + (fmap Just . actionAfter hoistedBlocks) + conn + inTxnBlock + registerAppliedMigDefaultConnInTxnBlock + (BlockInTxn inTxnBlock, False) -> + if cinfo == defaultConnInfo + then runInTxnBlockDefaultConn + (const $ pure Nothing) + conn + inTxnBlock + registerAppliedMigDefaultConnInTxnBlock + else runInTxnBlockNotDefaultConn + (const $ pure Nothing) + conn + inTxnBlock + (registerAppliedMigIfPossibleOthers conn) (BlockNoTxn noTxnBlock, _) -> runNoTxnMig conn noTxnBlock (registerAppliedMigIfPossibleOthers conn) - - ) - Nothing - pendingMigs + ) + Nothing + pendingMigs actAfterResult <- case singleInTxnBlockResult of Just result -> pure result @@ -460,12 +509,18 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn :: (DB.Connection -> txn b) -> DB.Connection -> ConsecutiveInTxnMigrations m - -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. + -> ( FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> m () + ) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. -> m b runInTxnBlockNotDefaultConn act conn migBlock registerMig = do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations - retryFold @MigrationApplicationFailure + retryFold retryPolicy (\previousBlock RetryIteration { tryNumber } -> if tryNumber == 0 @@ -482,24 +537,27 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) $ \blockFinal -> do logInfo "BEGINning transaction" - withTransaction @txn txnIsolationLvl conn - $ do - let hoistedMigs - :: NonEmpty (AddedSqlMigration txn) - hoistedMigs = - hoistAddedSqlMigration lift - <$> inTxnMigs blockFinal - errorOrOk <- forMExcept hoistedMigs - $ applySingleMigration conn - (\fp ts duration numStmts apStatus -> lift $ registerMig fp ts duration numStmts apStatus) - 0 - case errorOrOk of - Left e -> do - logInfo - "ROLLBACKed transaction" - pure $ Left e - Right () -> - Right <$> act conn + withTransaction @txn txnIsolationLvl conn $ do + let hoistedMigs :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + errorOrOk <- + forMExcept hoistedMigs $ applySingleMigration + conn + (\fp ts duration numStmts apStatus -> + lift $ registerMig fp + ts + duration + numStmts + apStatus + ) + NoSkipStatements + case errorOrOk of + Left e -> do + logInfo + "ROLLBACKed transaction" + pure $ Left e + Right () -> Right <$> act conn logInfo "COMMITed transaction" pure res @@ -507,12 +565,18 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn :: (DB.Connection -> txn b) -> DB.Connection -> ConsecutiveInTxnMigrations m - -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> txn ()) -- ^ Using the `txn` is right: registering applied migrations happens in the default connection, and so it will happen in the same transaction as the migrations themselves. + -> ( FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> txn () + ) -- ^ Using the `txn` is right: registering applied migrations happens in the default connection, and so it will happen in the same transaction as the migrations themselves. -> m b runInTxnBlockDefaultConn act conn migBlock registerMig = do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations - retryFold @MigrationApplicationFailure + retryFold retryPolicy (\previousBlock RetryIteration { tryNumber } -> if tryNumber == 0 @@ -529,58 +593,56 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) $ \blockFinal -> do logInfo "BEGINning transaction" - withTransaction txnIsolationLvl conn - $ do - let hoistedMigs - :: NonEmpty (AddedSqlMigration txn) - hoistedMigs = - hoistAddedSqlMigration lift - <$> inTxnMigs blockFinal - errorOrOk <- forMExcept hoistedMigs - $ applySingleMigration conn - registerMig - 0 - case errorOrOk of - Left e -> do - logInfo - "ROLLBACKed transaction" - pure $ Left e - Right () -> - Right <$> act conn + withTransaction txnIsolationLvl conn $ do + let hoistedMigs :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + errorOrOk <- + forMExcept hoistedMigs $ applySingleMigration + conn + registerMig + NoSkipStatements + case errorOrOk of + Left e -> do + logInfo + "ROLLBACKed transaction" + pure $ Left e + Right () -> Right <$> act conn logInfo "COMMITed transaction" pure res runNoTxnMig :: DB.Connection -> SingleNoTxnMigration m - -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. + -> ( FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> m () + ) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. -> m (Maybe x) runNoTxnMig conn mig registerMig = do - retryFold @MigrationApplicationFailure + retryFold retryPolicy (\(previousMig, _) RetryIteration { lastError } -> case lastError of Nothing -> pure (previousMig, 0) - Just MigrationApplicationFailure { noTxnMigRetryInstructions } - -> case noTxnMigRetryInstructions of - Nothing -> - error - "Internal error in codd, please report. This is supposed to be a no-txn migration, yet the internal error does not contain retry instructions" - Just (NoTxnMigMustRestartAfterSkipping numStmtsToSkip _timeSpentApplying) - -> do - logWarn - $ "Skipping the first " - <> Fmt.sformat Fmt.int - numStmtsToSkip - <> " SQL statements, which have already been applied, and start applying from the " - <> Fmt.sformat - Fmt.ords - (numStmtsToSkip + 1) - <> " statement" - -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! - -- Re-reading from disk only to skip statements is unnecessary work when the failed statement was not in an explicit `BEGIN ... COMMIT` section, but having more than one code path to handle such extremely rare errors is not worth it - freshBlock <- reReadMig previousMig - pure (freshBlock, numStmtsToSkip) + Just NoTxnMigrationApplicationFailure { noTxnMigAppliedStatements } + -> do + logWarn + $ "Skipping the first " + <> Fmt.sformat Fmt.int + noTxnMigAppliedStatements + <> " SQL statements, which have already been applied, and start applying from the " + <> Fmt.sformat + Fmt.ords + (noTxnMigAppliedStatements + 1) + <> " statement" + -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! + -- Re-reading from disk only to skip statements is unnecessary work when the failed statement was not in an explicit `BEGIN ... COMMIT` section, but having more than one code path to handle such extremely rare errors is not worth it + freshBlock <- reReadMig previousMig + pure (freshBlock, noTxnMigAppliedStatements) ) (mig, 0) (\case @@ -600,10 +662,10 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) $ \(migFinal, numStmtsToSkip) -> fmap (const Nothing) <$> applySingleMigration - conn - registerMig - numStmtsToSkip - (singleNoTxnMig migFinal) + conn + registerMig + (SkipStatementsNoTxn numStmtsToSkip) + (singleNoTxnMig migFinal) data CoddSchemaVersion = CoddSchemaDoesNotExist | CoddSchemaV1 | CoddSchemaV2 -- ^ V2 includes duration of each migration's application | CoddSchemaV3 -- ^ V3 includes the number of SQL statements applied per migration, allowing codd to resume application of even failed no-txn migrations correctly @@ -1029,41 +1091,60 @@ blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _)) = migrationCustomConnInfo addedSqlMig -data NoTxnMigFailureRetryInstructions = NoTxnMigMustRestartAfterSkipping - Int - -- ^ Number of statements successfully applied, which is also the number of statements to skip in the next attempt - DiffTime - -- ^ Time spent applying statements in this failed attempt - deriving stock Show -data MigrationApplicationFailure = MigrationApplicationFailure +data NoTxnMigrationApplicationFailure = NoTxnMigrationApplicationFailure { sqlStatementEx :: SqlStatementException - , noTxnMigRetryInstructions :: Maybe NoTxnMigFailureRetryInstructions + , noTxnMigAppliedStatements :: Int } deriving stock Show -instance Exception MigrationApplicationFailure +instance Exception NoTxnMigrationApplicationFailure -forMExcept :: Monad m => NonEmpty a -> (a -> m (Either e ())) -> m (Either e ()) +-- | Applies the supplied function to each element in the list in order, stopping on the first one that returns a `Left`. +forMExcept + :: Monad m => NonEmpty a -> (a -> m (Either e ())) -> m (Either e ()) forMExcept nl f = go (NE.toList nl) - where - go [] = pure $ Right () - go (x:xs) = f x >>= \case - Left e -> pure $ Left e - Right () -> go xs + where + go [] = pure $ Right () + go (x : xs) = f x >>= \case + Left e -> pure $ Left e + Right () -> go xs + +class SkipStatements a where + type SkipError a :: Type + numStatementsToSkip :: a -> Int + mkSqlError :: SqlStatementException -> Int -> SkipError a + +newtype SkipStatementsNoTxn = SkipStatementsNoTxn Int +data NoSkipStatements = NoSkipStatements +instance SkipStatements SkipStatementsNoTxn where + type SkipError SkipStatementsNoTxn = NoTxnMigrationApplicationFailure -- TODO: Rename to NoTxnNoTxnMigrationApplicationFailure + numStatementsToSkip (SkipStatementsNoTxn n) = n + mkSqlError = NoTxnMigrationApplicationFailure +instance SkipStatements NoSkipStatements where + type SkipError NoSkipStatements = SqlStatementException + numStatementsToSkip NoSkipStatements = 0 + mkSqlError ex _ = ex -- | Applies a single migration and returns an error if it failed to be applied or `()` otherwise. Calls the supplied -- `registerMigRan` after the migration is applied or fails. Does not throw exceptions coming from SQL errors. applySingleMigration - :: forall m - . (MonadUnliftIO m, CoddLogger m) + :: forall m s + . (MonadUnliftIO m, CoddLogger m, SkipStatements s) => DB.Connection - -> (FilePath -> DB.UTCTimestamp -> DiffTime -> Int -> MigrationApplicationStatus -> m ()) - -> Int + -> ( FilePath + -> DB.UTCTimestamp + -> DiffTime + -> Int + -> MigrationApplicationStatus + -> m () + ) + -> s -- ^ Number of countable-runnable statements to skip completely. Useful when retrying no-txn migrations from exactly the statements they last failed in. -> AddedSqlMigration m - -> m (Either MigrationApplicationFailure ()) -applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedSqlMigration sqlMig migTimestamp) + -> m (Either (SkipError s) ()) +applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimestamp) = do - let fn = migrationName sqlMig + let fn = migrationName sqlMig + numCountableRunnableStmtsToSkip = numStatementsToSkip skip logInfoNoNewline $ "Applying " <> Text.pack fn ((appliedMigrationNumStatements, errorOrDone, mLastBegin), appliedMigrationDuration) <- @@ -1143,8 +1224,8 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS Just sqlStatementEx -> do logInfo " [failed]" logError $ Text.pack $ show sqlStatementEx - noTxnMigRetryInstructions <- if migrationInTxn sqlMig - then pure Nothing + fmap Left $ if migrationInTxn sqlMig + then pure $ mkSqlError @s sqlStatementEx 0 else do case mLastBegin of Nothing -> do @@ -1160,14 +1241,14 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS Fmt.ords (appliedMigrationNumStatements + 1) <> " failed to be applied. Codd will resume the next retry or codd up from it" - registerMigRan - fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - NoTxnMigrationFailed - pure $ Just $ NoTxnMigMustRestartAfterSkipping - appliedMigrationNumStatements appliedMigrationDuration + registerMigRan fn + migTimestamp + appliedMigrationDuration + appliedMigrationNumStatements + NoTxnMigrationFailed + pure $ mkSqlError @s + sqlStatementEx + appliedMigrationNumStatements Just lastBeginNum -> do logError $ "After applying " @@ -1184,33 +1265,28 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" void $ liftIO $ DB.execute_ conn "ROLLBACK" - registerMigRan - fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - NoTxnMigrationFailed + registerMigRan fn + migTimestamp + appliedMigrationDuration + appliedMigrationNumStatements + NoTxnMigrationFailed logInfo "ROLLBACKed last explicitly started transaction" - pure $ Just $ NoTxnMigMustRestartAfterSkipping - (lastBeginNum - 1) appliedMigrationDuration - pure $ Left $ MigrationApplicationFailure - { sqlStatementEx - , noTxnMigRetryInstructions - } + pure $ mkSqlError @s sqlStatementEx + (lastBeginNum - 1) Nothing -> do - registerMigRan fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - MigrationAppliedSuccessfully - logInfo - $ " (" - <> prettyPrintDuration appliedMigrationDuration - <> ", " - <> Fmt.sformat Fmt.int appliedMigrationNumStatements - <> ")" - pure $ Right () + registerMigRan fn + migTimestamp + appliedMigrationDuration + appliedMigrationNumStatements + MigrationAppliedSuccessfully + logInfo + $ " (" + <> prettyPrintDuration appliedMigrationDuration + <> ", " + <> Fmt.sformat Fmt.int appliedMigrationNumStatements + <> ")" + pure $ Right () where @@ -1246,20 +1322,14 @@ applySingleMigration conn registerMigRan numCountableRunnableStmtsToSkip (AddedS * fromIntegral (nsec after - nsec before) ) -data MigrationRegistered = MigrationRegistered | MigrationNotRegistered data MigrationApplicationStatus = NoTxnMigrationFailed | MigrationAppliedSuccessfully +-- | This type exists because bootstrapping migrations can't be registered until codd_schema is created. But we want the time when they were applied to truly reflect when they were +-- applied, so we wouldn't be able to use NowInPostgresTime by the time codd_schema is created. data MigrationLastStatementAppliedAt = NowInPostgresTime | SpecificTime UTCTime -type RegisterMigrationFunc m = FilePath - -> DB.UTCTimestamp - -> MigrationLastStatementAppliedAt - -> DiffTime - -> Int - -> MigrationApplicationStatus - -> m () -- | Registers in the DB that a migration with supplied name and timestamp -- has been either successfully applied or partially failed (the latter only makes sense for no-txn migrations). --- Fails if the codd_schema hasn't yet been created. +-- Will throw an error if codd_schema hasn't yet been created. registerRanMigration :: forall txn m . (MonadUnliftIO m, MonadIO txn, CanStartTxn m txn) @@ -1275,26 +1345,32 @@ registerRanMigration -> m UTCTime registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration numAppliedStatements apStatus = let - (args, timestampValue) = case (appliedAt, apStatus) of - (NowInPostgresTime, NoTxnMigrationFailed) -> ("?, clock_timestamp()", Nothing) - (NowInPostgresTime, MigrationAppliedSuccessfully) -> ("clock_timestamp(), ?", Nothing) - (SpecificTime t, NoTxnMigrationFailed) -> ("NULL, ?", Just t) - (SpecificTime t, MigrationAppliedSuccessfully) -> ("?, NULL", Just t) - + (args, timestampValue) = case (appliedAt, apStatus) of + (NowInPostgresTime, NoTxnMigrationFailed) -> + ("?, clock_timestamp()", Nothing) + (NowInPostgresTime, MigrationAppliedSuccessfully) -> + ("clock_timestamp(), ?", Nothing) + (SpecificTime t, NoTxnMigrationFailed) -> ("NULL, ?", Just t) + (SpecificTime t, MigrationAppliedSuccessfully) -> + ("?, NULL", Just t) in - withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 - conn ("INSERT INTO codd_schema.sql_migrations as m (migration_timestamp, name, application_duration, num_applied_statements, applied_at, no_txn_failed_at) \ - \ SELECT ?, ?, ?, ?, " <> args <> " \ + withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 + conn + ("INSERT INTO codd_schema.sql_migrations as m (migration_timestamp, name, application_duration, num_applied_statements, applied_at, no_txn_failed_at) \ + \ SELECT ?, ?, ?, ?, " + <> args + <> " \ \ ON CONFLICT (name) DO UPDATE \ \ SET application_duration=EXCLUDED.application_duration + m.application_duration \ \ , num_applied_statements=EXCLUDED.num_applied_statements \ - \ RETURNING COALESCE(applied_at, no_txn_failed_at)") - ( migTimestamp - , fn + \ RETURNING COALESCE(applied_at, no_txn_failed_at)" + ) + ( migTimestamp + , fn -- postgresql-simple does not have a `ToField DiffTime` instance :( - , realToFrac @Double @NominalDiffTime - $ fromIntegral (diffTimeToPicoseconds appliedMigrationDuration) - / 1_000_000_000_000 - , numAppliedStatements - , timestampValue - ) + , realToFrac @Double @NominalDiffTime + $ fromIntegral (diffTimeToPicoseconds appliedMigrationDuration) + / 1_000_000_000_000 + , numAppliedStatements + , timestampValue + ) diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 22fa8d15..12b530f8 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -2,7 +2,7 @@ module DbDependentSpecs.RetrySpec where import Codd ( applyMigrationsNoCheck ) import Codd.Environment ( CoddSettings(..) ) -import Codd.Internal ( MigrationApplicationFailure +import Codd.Internal ( NoTxnMigrationApplicationFailure , withConnection ) import Codd.Logging ( LoggingT(runLoggingT) @@ -97,7 +97,7 @@ spec = do testConnTimeout (const $ pure ()) ) - `shouldThrow` (\(e :: MigrationApplicationFailure) -> + `shouldThrow` (\(e :: NoTxnMigrationApplicationFailure) -> "division by zero" `List.isInfixOf` show e @@ -148,7 +148,7 @@ spec = do testConnTimeout (const $ pure ()) ) - `shouldThrow` (\(e :: MigrationApplicationFailure) -> + `shouldThrow` (\(e :: SomeException) -> "division by zero" `List.isInfixOf` show e From 0c829a2062a57325d43f78f096c0c6c399d8133c Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Tue, 26 Mar 2024 21:32:08 -0300 Subject: [PATCH 16/28] Resuming partially applied no-txn migration on `codd up` --- src/Codd/Internal.hs | 337 +++++++++++++++++------------ src/Codd/Parsing.hs | 14 +- test/DbDependentSpecs/RetrySpec.hs | 17 ++ test/ParsingSpec.hs | 4 +- 4 files changed, 225 insertions(+), 147 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index d10dbb4b..f9c0a806 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -25,6 +25,7 @@ import Codd.Parsing ( AddedSqlMigration(..) , AppliedMigration(..) , EnvVars(..) , FileStream(..) + , MigrationApplicationStatus(..) , ParsedSql(..) , SqlMigration(..) , hoistAddedSqlMigration @@ -64,6 +65,8 @@ import qualified Data.List as List import Data.List ( sortOn ) import Data.List.NonEmpty ( NonEmpty(..) ) import qualified Data.List.NonEmpty as NE +import Data.Map.Strict ( Map ) +import qualified Data.Map.Strict as Map import Data.Maybe ( fromMaybe ) import Data.String ( fromString ) import Data.Text ( Text ) @@ -212,12 +215,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -354,8 +357,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn appliedMigrationTimestamp (SpecificTime appliedMigrationAt) appliedMigrationDuration - appliedMigrationNumStatements - MigrationAppliedSuccessfully + appliedMigrationStatus pure [] Nothing -> pure () @@ -365,10 +367,9 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn :: FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> txn () - registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus + registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus = do (_, defaultConn) <- lift $ openConn defaultConnInfo void $ registerRanMigration @txn @@ -378,8 +379,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration - appliedMigrationNumStatements - apStatus + appliedMigrationStatus -- | The function used to register applied migrations for when either a no-txn migration or a block of in-txn migrations _not_ using the default connection string are to be applied. -- This will account for the possibility that the default connection string still isn't accessible by storing in-memory that some migrations were applied but not registered, @@ -389,14 +389,13 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> m () - registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationNumStatements apStatus + registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus = do csExists <- readMVar coddSchemaExists - case (apStatus, csExists) of - (NoTxnMigrationFailed, False) -> do + case (appliedMigrationStatus, csExists) of + (NoTxnMigrationFailed _, False) -> do -- Super duper ultra extra special case: we try to create codd_schema as a partially-run no-txn migration may have applied statements that make the default connection string accessible. The same isn't possible with in-txn migrations. -- This will increase the delay between retry intervals beyond what the user has specified since we're adding a bit of a timeout to each retry of failed no-txn migrations. Since this is an extremely rare error case, it probably doesn't -- matter too much what we do. I have questions if we should even support this, to be honest. Hacky stuff below: @@ -425,7 +424,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn , appliedMigrationTimestamp , appliedMigrationAt = appliedAt , appliedMigrationDuration - , appliedMigrationNumStatements + , appliedMigrationStatus } ] Just defaultConn -> do @@ -439,8 +438,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn appliedMigrationTimestamp NowInPostgresTime appliedMigrationDuration - appliedMigrationNumStatements - apStatus + appliedMigrationStatus @@ -512,7 +510,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> ( FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> m () ) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. @@ -544,12 +541,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn errorOrOk <- forMExcept hoistedMigs $ applySingleMigration conn - (\fp ts duration numStmts apStatus -> - lift $ registerMig fp - ts - duration - numStmts - apStatus + (\fp ts duration apStatus -> + lift $ registerMig fp ts duration apStatus ) NoSkipStatements case errorOrOk of @@ -568,7 +561,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> ( FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> txn () ) -- ^ Using the `txn` is right: registering applied migrations happens in the default connection, and so it will happen in the same transaction as the migrations themselves. @@ -617,7 +609,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> ( FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> m () ) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. @@ -627,7 +618,22 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn retryPolicy (\(previousMig, _) RetryIteration { lastError } -> case lastError of - Nothing -> pure (previousMig, 0) + Nothing -> do + let numStmtsApplied = + numStatementsAlreadyApplied mig + when (numStmtsApplied > 0) + $ logWarn + $ "Resuming application of partially applied no-txn migration " + <> Text.pack + (migrationName + (addedSqlMig (singleNoTxnMig mig)) + ) + <> ". Skipping the first " + <> Fmt.sformat Fmt.int numStmtsApplied + <> " SQL statements, which have already been applied, and start applying from the " + <> Fmt.sformat Fmt.ords (numStmtsApplied + 1) + <> " statement" + pure (previousMig, numStmtsApplied) Just NoTxnMigrationApplicationFailure { noTxnMigAppliedStatements } -> do logWarn @@ -639,8 +645,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn Fmt.ords (noTxnMigAppliedStatements + 1) <> " statement" - -- TODO: Resuming from the Nth statement shall soon be possible not just for a `BEGIN` statement, but also from a fresh invocation of "codd up" after a previous one failed! - -- Re-reading from disk only to skip statements is unnecessary work when the failed statement was not in an explicit `BEGIN ... COMMIT` section, but having more than one code path to handle such extremely rare errors is not worth it freshBlock <- reReadMig previousMig pure (freshBlock, noTxnMigAppliedStatements) ) @@ -787,27 +791,39 @@ collectPendingMigrations defaultConnString sqlMigrations txnIsolationLvl connect hasNonDefaultConnectionString block = let mConnInfo = migrationCustomConnInfo $ addedSqlMig $ case block of BlockInTxn (ConsecutiveInTxnMigrations (m1 :| _) _) -> m1 - BlockNoTxn (SingleNoTxnMigration m _) -> m + BlockNoTxn (SingleNoTxnMigration m _ _ ) -> m in case mConnInfo of Nothing -> False Just connInfo -> DB.connectDatabase defaultConnString /= DB.connectDatabase connInfo collect bootCheck = do logInfoNoNewline "Looking for pending migrations..." - migsAlreadyApplied :: [FilePath] <- - if coddSchemaVersion bootCheck == CoddSchemaDoesNotExist - then pure [] - else withConnection - defaultConnString - connectTimeout - (\conn -> - withTransaction @(InTxnT m) txnIsolationLvl conn - $ map DB.fromOnly - <$> query - conn - "SELECT name FROM codd_schema.sql_migrations WHERE applied_at IS NOT NULL" - () - ) + migsAlreadyApplied :: Map FilePath MigrationApplicationStatus <- + Map.fromList + <$> if coddSchemaVersion bootCheck == CoddSchemaDoesNotExist + then pure [] + else do + rows <- withConnection + defaultConnString + connectTimeout + (\conn -> + withTransaction @(InTxnT m) + txnIsolationLvl + conn + $ query + conn + "SELECT name, no_txn_failed_at IS NULL, num_applied_statements FROM codd_schema.sql_migrations" + () + ) + pure $ map + (\(name, succeeded, numStmts) -> if succeeded + then + ( name + , MigrationAppliedSuccessfully numStmts + ) + else (name, NoTxnMigrationFailed numStmts) + ) + rows blocksOfPendingMigs <- parseMigrationFiles migsAlreadyApplied sqlMigrations @@ -863,8 +879,7 @@ closeFileStream (FileStream _ releaseKey _) = do mrkey <- readIORef releaseKey forM_ mrkey release --- | Returns all migrations on the supplied folders (including possibly already applied ones) --- except for the explicitly supplied ones. +-- | Returns all migrations on the supplied folders except for the explicitly supplied ones. listMigrationsFromDisk :: MonadIO m => [FilePath] @@ -887,12 +902,14 @@ parseMigrationFiles , MonadThrow m , EnvVars m ) - => [FilePath] + => Map FilePath MigrationApplicationStatus + -- ^ Migrations fully or partially applied (the latter can only happen with no-txn migrations) and how many statements have been applied per migration -> Either [FilePath] [AddedSqlMigration m] -> m [BlockOfMigrations m] -parseMigrationFiles migsCompleted sqlMigrations = do +parseMigrationFiles migsApplied sqlMigrations = do pendingParsedMigrations :: [ ( Either String (FileStream m) , AddedSqlMigration m + , Maybe Int -- ^ How many statements to skip ) ] <- either (listPendingFromDisk >=> readFromDisk) @@ -902,7 +919,7 @@ parseMigrationFiles migsCompleted sqlMigrations = do -- Group consecutive in-txn migrations with the same connection string together for atomic application pure $ NE.groupBy - (\(_, AddedSqlMigration m1 _) (_, AddedSqlMigration m2 _) -> + (\(_, AddedSqlMigration m1 _, _) (_, AddedSqlMigration m2 _, _) -> migrationInTxn m1 && migrationInTxn m2 && migrationCustomConnInfo m1 @@ -910,102 +927,136 @@ parseMigrationFiles migsCompleted sqlMigrations = do ) pendingParsedMigrations <&> \migs -> - let firstMig = snd $ NE.head migs - in if migrationInTxn (addedSqlMig firstMig) + let (_, firstMig, mNumStmtsApplied) = NE.head migs + in + if migrationInTxn (addedSqlMig firstMig) then BlockInTxn ConsecutiveInTxnMigrations - { inTxnMigs = snd <$> migs + { inTxnMigs = snd3 <$> migs , reReadBlock = reRead migs } else BlockNoTxn SingleNoTxnMigration - { singleNoTxnMig = firstMig - , reReadMig = reReadNoTxn migs + { singleNoTxnMig = firstMig + , reReadMig = reReadNoTxn migs + $ fromMaybe 0 mNumStmtsApplied + , numStatementsAlreadyApplied = fromMaybe + 0 + mNumStmtsApplied } where + snd3 (_, v, _) = v reRead oldMigsAndPaths = do -- Close handles of all migrations in the block, re-open and read+parse them filePaths <- forM oldMigsAndPaths $ \case - (Left _memStream, _) -> + (Left _memStream, _, _) -> error "Re-reading in-memory streams is not yet implemented" - (Right fileStream, _) -> + (Right fileStream, _, _) -> closeFileStream fileStream >> pure (filePath fileStream) newMigs <- readFromDisk filePaths - pure ConsecutiveInTxnMigrations { inTxnMigs = snd <$> newMigs + pure ConsecutiveInTxnMigrations { inTxnMigs = snd3 <$> newMigs , reReadBlock = reRead newMigs } -- | TODO: This is a near duplicate of `reRead`. Improve this. - reReadNoTxn oldMigsAndPaths = do + reReadNoTxn oldMigsAndPaths numStatementsAlreadyApplied = do -- Close handles of all migrations in the block, re-open and read+parse them filePaths <- forM oldMigsAndPaths $ \case - (Left _memStream, _) -> + (Left _memStream, _, _) -> error "Re-reading in-memory streams is not yet implemented" - (Right fileStream, _) -> + (Right fileStream, _, _) -> closeFileStream fileStream >> pure (filePath fileStream) newMigs <- readFromDisk filePaths - pure SingleNoTxnMigration { singleNoTxnMig = snd (NE.head newMigs) - , reReadMig = reReadNoTxn newMigs - } + pure SingleNoTxnMigration + { singleNoTxnMig = snd3 (NE.head newMigs) + , reReadMig = reReadNoTxn newMigs numStatementsAlreadyApplied + , numStatementsAlreadyApplied + } readFromMemory :: [AddedSqlMigration m] - -> [(Either String (FileStream m), AddedSqlMigration m)] + -> [(Either String (FileStream m), AddedSqlMigration m, Maybe Int)] readFromMemory ams = map (\asqlmig@(AddedSqlMigration mig _) -> - (Left $ migrationName mig, asqlmig) + ( Left $ migrationName mig + , asqlmig + , case Map.lookup (migrationName mig) migsApplied of + Just (NoTxnMigrationFailed n) -> Just n + _ -> Nothing + ) ) $ sortOn (\(AddedSqlMigration _ ts) -> ts) ams + migNamesFullyApplied = + [ name + | (name, status) <- Map.toList migsApplied + , case status of + MigrationAppliedSuccessfully _ -> True + NoTxnMigrationFailed _ -> False + ] listPendingFromMemory = filter - (\(AddedSqlMigration mig _) -> migrationName mig `notElem` migsCompleted + (\(AddedSqlMigration mig _) -> + migrationName mig `notElem` migNamesFullyApplied ) - listPendingFromDisk sqlDirs = listMigrationsFromDisk sqlDirs migsCompleted + listPendingFromDisk sqlDirs = + listMigrationsFromDisk sqlDirs migNamesFullyApplied readFromDisk :: forall t . Traversable t => t FilePath - -> m (t (Either String (FileStream m), AddedSqlMigration m)) + -> m + ( t + ( Either String (FileStream m) + , AddedSqlMigration m + , Maybe Int + ) + ) readFromDisk pendingSqlMigrationFiles = forM pendingSqlMigrationFiles $ \pendingMigrationPath -> do fs :: FileStream m <- delayedOpenStreamFile pendingMigrationPath let fn = takeFileName $ filePath fs parsedMig <- parseAddedSqlMigration fn fs - case parsedMig of - Left err -> do - throwIO - $ userError - $ "Fatal error parsing migration '" - <> fn - <> "': " - <> err - Right asqlmig@(AddedSqlMigration mig _) -> do - case migrationSql mig of - UnparsedSql _ -> do - -- We can close the file with peace of mind in this case, as it has been read into memory in its entirety. In fact, - -- it's already closed because the stream was consumed completely, but let's be explicit. - closeFileStream fs - pure (Right fs, asqlmig) - _ -> do - -- Close the file so we don't crash due to the shell's open files limit. The handle will be opened again - -- when the stream is forced next time. - -- This isn't terribly pretty and assumes the file won't change in between now and the moment it'll be opened again for SQL to be effectivelly applied, - -- but that assumption is probably fine and unavoidable if we want to report errors in any pending migrations before we start applying their SQL. - closeFileStream fs - fileStreamAgain :: FileStream m <- - delayedOpenStreamFile pendingMigrationPath - let sqlPiecesStreamAgain = - substituteEnvVarsInSqlPiecesStream - ( migrationEnvVars - $ addedSqlMig asqlmig - ) - $ parseSqlPiecesStreaming - $ fileStream fileStreamAgain - asqlmigAgain = asqlmig - { addedSqlMig = (addedSqlMig asqlmig) - { migrationSql = WellParsedSql - sqlPiecesStreamAgain - } - } - pure (Right fileStreamAgain, asqlmigAgain) + let numAppliedStatementsNoTxn = case Map.lookup fn migsApplied of + Just (NoTxnMigrationFailed n) -> Just n + _ -> Nothing + + + fmap (\(a, b) -> (a, b, numAppliedStatementsNoTxn)) + $ case parsedMig of + Left err -> do + throwIO + $ userError + $ "Fatal error parsing migration '" + <> fn + <> "': " + <> err + Right asqlmig@(AddedSqlMigration mig _) -> do + case migrationSql mig of + UnparsedSql _ -> do + -- We can close the file with peace of mind in this case, as it has been read into memory in its entirety. In fact, + -- it's already closed because the stream was consumed completely, but let's be explicit. + closeFileStream fs + pure (Right fs, asqlmig) + _ -> do + -- Close the file so we don't crash due to the shell's open files limit. The handle will be opened again + -- when the stream is forced next time. + -- This isn't terribly pretty and assumes the file won't change in between now and the moment it'll be opened again for SQL to be effectivelly applied, + -- but that assumption is probably fine and unavoidable if we want to report errors in any pending migrations before we start applying their SQL. + closeFileStream fs + fileStreamAgain :: FileStream m <- + delayedOpenStreamFile pendingMigrationPath + let sqlPiecesStreamAgain = + substituteEnvVarsInSqlPiecesStream + ( migrationEnvVars + $ addedSqlMig asqlmig + ) + $ parseSqlPiecesStreaming + $ fileStream fileStreamAgain + asqlmigAgain = asqlmig + { addedSqlMig = (addedSqlMig asqlmig) + { migrationSql = WellParsedSql + sqlPiecesStreamAgain + } + } + pure (Right fileStreamAgain, asqlmigAgain) -- | This can be used as a last-action when applying migrations to -- strict-check schemas, logging differences, success and throwing @@ -1042,8 +1093,9 @@ laxCheckLastAction coddSettings expectedReps _blocksOfMigs conn = do -- no-txn migrations appear alone. data BlockOfMigrations m = BlockInTxn (ConsecutiveInTxnMigrations m) | BlockNoTxn (SingleNoTxnMigration m) data SingleNoTxnMigration m = SingleNoTxnMigration - { singleNoTxnMig :: AddedSqlMigration m - , reReadMig :: m (SingleNoTxnMigration m) + { singleNoTxnMig :: AddedSqlMigration m + , reReadMig :: m (SingleNoTxnMigration m) + , numStatementsAlreadyApplied :: Int } data ConsecutiveInTxnMigrations m = ConsecutiveInTxnMigrations { inTxnMigs :: NonEmpty (AddedSqlMigration m) @@ -1073,7 +1125,8 @@ hoistBlockOfMigrations hoist = \case let hoistedAllMigs = hoistAddedSqlMigration hoist singleNoTxnMig hoistedReReadBlock = hoist $ reReadMig <&> hoistNoTxnBlock in SingleNoTxnMigration { singleNoTxnMig = hoistedAllMigs - , reReadMig = hoistedReReadBlock + , reReadMig = hoistedReReadBlock + , numStatementsAlreadyApplied } -- | Returns True only if all pending migrations are in-txn and of the same connection string, meaning they'll all be applied @@ -1088,7 +1141,7 @@ isOneShotApplication defaultConnInfo pending = case pending of blockCustomConnInfo :: BlockOfMigrations m -> Maybe DB.ConnectInfo blockCustomConnInfo (BlockInTxn (ConsecutiveInTxnMigrations (AddedSqlMigration { addedSqlMig } :| _) _)) = migrationCustomConnInfo addedSqlMig -blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _)) +blockCustomConnInfo (BlockNoTxn (SingleNoTxnMigration (AddedSqlMigration { addedSqlMig }) _ _)) = migrationCustomConnInfo addedSqlMig data NoTxnMigrationApplicationFailure = NoTxnMigrationApplicationFailure @@ -1133,7 +1186,6 @@ applySingleMigration -> ( FilePath -> DB.UTCTimestamp -> DiffTime - -> Int -> MigrationApplicationStatus -> m () ) @@ -1241,11 +1293,13 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes Fmt.ords (appliedMigrationNumStatements + 1) <> " failed to be applied. Codd will resume the next retry or codd up from it" - registerMigRan fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - NoTxnMigrationFailed + registerMigRan + fn + migTimestamp + appliedMigrationDuration + (NoTxnMigrationFailed + appliedMigrationNumStatements + ) pure $ mkSqlError @s sqlStatementEx appliedMigrationNumStatements @@ -1265,21 +1319,23 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" void $ liftIO $ DB.execute_ conn "ROLLBACK" - registerMigRan fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - NoTxnMigrationFailed + registerMigRan + fn + migTimestamp + appliedMigrationDuration + (NoTxnMigrationFailed + appliedMigrationNumStatements + ) logInfo "ROLLBACKed last explicitly started transaction" pure $ mkSqlError @s sqlStatementEx (lastBeginNum - 1) Nothing -> do - registerMigRan fn - migTimestamp - appliedMigrationDuration - appliedMigrationNumStatements - MigrationAppliedSuccessfully + registerMigRan + fn + migTimestamp + appliedMigrationDuration + (MigrationAppliedSuccessfully appliedMigrationNumStatements) logInfo $ " (" <> prettyPrintDuration appliedMigrationDuration @@ -1322,7 +1378,6 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes * fromIntegral (nsec after - nsec before) ) -data MigrationApplicationStatus = NoTxnMigrationFailed | MigrationAppliedSuccessfully -- | This type exists because bootstrapping migrations can't be registered until codd_schema is created. But we want the time when they were applied to truly reflect when they were -- applied, so we wouldn't be able to use NowInPostgresTime by the time codd_schema is created. data MigrationLastStatementAppliedAt = NowInPostgresTime | SpecificTime UTCTime @@ -1340,29 +1395,31 @@ registerRanMigration -> DB.UTCTimestamp -> MigrationLastStatementAppliedAt -- ^ The time the last statement of the migration was applied or when it failed. -> DiffTime - -> Int -- ^ The number of applied statements -> MigrationApplicationStatus -> m UTCTime -registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration numAppliedStatements apStatus - = let - (args, timestampValue) = case (appliedAt, apStatus) of - (NowInPostgresTime, NoTxnMigrationFailed) -> - ("?, clock_timestamp()", Nothing) - (NowInPostgresTime, MigrationAppliedSuccessfully) -> - ("clock_timestamp(), ?", Nothing) - (SpecificTime t, NoTxnMigrationFailed) -> ("NULL, ?", Just t) - (SpecificTime t, MigrationAppliedSuccessfully) -> - ("?, NULL", Just t) +registerRanMigration conn isolLvl fn migTimestamp appliedAt appliedMigrationDuration apStatus + = let (args, numAppliedStatements, timestampValue) = + case (appliedAt, apStatus) of + (NowInPostgresTime, NoTxnMigrationFailed numStmts) -> + ("?, ?, clock_timestamp()", numStmts, Nothing) + (NowInPostgresTime, MigrationAppliedSuccessfully numStmts) -> + ("?, clock_timestamp(), ?", numStmts, Nothing) + (SpecificTime t, NoTxnMigrationFailed numStmts) -> + ("?, NULL, ?", numStmts, Just t) + (SpecificTime t, MigrationAppliedSuccessfully numStmts) -> + ("?, ?, NULL", numStmts, Just t) in withTransaction @txn isolLvl conn $ DB.fromOnly <$> unsafeQuery1 conn ("INSERT INTO codd_schema.sql_migrations as m (migration_timestamp, name, application_duration, num_applied_statements, applied_at, no_txn_failed_at) \ - \ SELECT ?, ?, ?, ?, " + \ SELECT ?, ?, ?, " <> args <> " \ \ ON CONFLICT (name) DO UPDATE \ \ SET application_duration=EXCLUDED.application_duration + m.application_duration \ \ , num_applied_statements=EXCLUDED.num_applied_statements \ + \ , applied_at=EXCLUDED.applied_at \ + \ , no_txn_failed_at=EXCLUDED.no_txn_failed_at \ \ RETURNING COALESCE(applied_at, no_txn_failed_at)" ) ( migTimestamp diff --git a/src/Codd/Parsing.hs b/src/Codd/Parsing.hs index e0c57e8b..30d297ca 100644 --- a/src/Codd/Parsing.hs +++ b/src/Codd/Parsing.hs @@ -25,6 +25,7 @@ module Codd.Parsing , CoddCommentParseResult(..) , EnvVars(..) , FileStream(..) + , MigrationApplicationStatus(..) , SqlPiece(..) , ParsedSql(..) , PureStream(..) @@ -150,14 +151,17 @@ data AddedSqlMigration m = AddedSqlMigration , addedSqlTimestamp :: DB.UTCTimestamp } +-- | Holds applied status and number of applied statements. +data MigrationApplicationStatus = NoTxnMigrationFailed Int | MigrationAppliedSuccessfully Int + data AppliedMigration = AppliedMigration - { appliedMigrationName :: FilePath - , appliedMigrationTimestamp :: DB.UTCTimestamp + { appliedMigrationName :: FilePath + , appliedMigrationTimestamp :: DB.UTCTimestamp -- ^ The migration's timestamp as extracted from its file name. - , appliedMigrationAt :: UTCTime + , appliedMigrationAt :: UTCTime -- ^ When the migration was effectively applied. - , appliedMigrationDuration :: DiffTime - , appliedMigrationNumStatements :: Int + , appliedMigrationDuration :: DiffTime + , appliedMigrationStatus :: MigrationApplicationStatus } data FileStream m = FileStream diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 12b530f8..7e58b80f 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -65,6 +65,23 @@ spec :: Spec spec = do describe "DbDependentSpecs" $ do describe "Retry tests" $ do + -- aroundTestDbInfo + -- $ it + -- "In-txn migrations in same database get registered in the same transaction even for a different user" + -- $ error "TODO" + -- aroundTestDbInfo + -- $ it + -- "In-txn migrations in different database get registered after all said migrations are committed, not after each one is applied inside a transaction" + -- $ error "TODO" + -- aroundTestDbInfo + -- $ it + -- "No-txn migration with failure in COMMIT statement retries from the right place" + -- $ error "TODO" + -- aroundTestDbInfo + -- $ it + -- "No-txn migration is resumed from the right statement on a new `codd up` invocation" + -- $ error "TODO" + aroundTestDbInfo $ it "Bootstrapping no-txn migration still gets registered if it makes default connection string accessible before failing" diff --git a/test/ParsingSpec.hs b/test/ParsingSpec.hs index 24cc1245..48266e39 100644 --- a/test/ParsingSpec.hs +++ b/test/ParsingSpec.hs @@ -530,7 +530,7 @@ spec = do $ runCoddLogger $ do [BlockInTxn ConsecutiveInTxnMigrations { inTxnMigs = asqlmig :| [] }] <- - parseMigrationFiles [] + parseMigrationFiles mempty $ Left ["test/migrations/normal-parse-test/"] rawFileContents <- liftIO @@ -558,7 +558,7 @@ spec = do $ runCoddLogger $ do [BlockInTxn ConsecutiveInTxnMigrations { inTxnMigs = asqlmig :| [] }] <- - parseMigrationFiles [] + parseMigrationFiles mempty $ Left ["test/migrations/no-parse-test/"] rawFileContents <- liftIO From b3e5d3c570fa3ed27d891bbe3529d56b284f2c9d Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 09:52:24 -0300 Subject: [PATCH 17/28] Apply in-txn migrations on non-default connection atomically when possible --- src/Codd/Internal.hs | 307 ++++++++++++++++------------- test/DbDependentSpecs/RetrySpec.hs | 107 +++++++++- 2 files changed, 274 insertions(+), 140 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index f9c0a806..064246ab 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -215,12 +215,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -326,7 +326,9 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn case mConn of Just conn -> pure conn Nothing -> modifyMVar connsPerInfo $ \m -> do - -- logInfo $ "Connecting to (TODO: REDACT PASSWORD) " <> Text.pack (show cinfo) + -- print + -- $ "Connecting to (TODO: REDACT PASSWORD) " + -- <> Text.pack (show cinfo) conn <- connectWithTimeout cinfo connectTimeout pure ((cinfo, conn) : m, conn) @@ -361,37 +363,64 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn pure [] Nothing -> pure () --- | The function used to register applied migrations for when a block of in-txn migrations using the default connection string are to be applied. --- This will use same transaction as the one used to apply the migrations to insert into codd_schema.sql_migrations. - registerAppliedMigDefaultConnInTxnBlock - :: FilePath +-- | The function used to register applied migrations for in-txn migrations. +-- This will use same transaction as the one used to apply the migrations to insert into codd_schema.sql_migrations as long as the block's connection is on the default database (even under a different user) and codd_schema has been created, and will otherwise only register them in-memory so they're applied at the earliest future opportunity. + registerAppliedInTxnMig + :: DB.Connection + -> DB.ConnectInfo + -> FilePath -> DB.UTCTimestamp -> DiffTime -> MigrationApplicationStatus -> txn () - registerAppliedMigDefaultConnInTxnBlock appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus + registerAppliedInTxnMig blockConn blockConnInfo appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus = do - (_, defaultConn) <- lift $ openConn defaultConnInfo - void $ registerRanMigration @txn - defaultConn - txnIsolationLvl - appliedMigrationName - appliedMigrationTimestamp - NowInPostgresTime - appliedMigrationDuration - appliedMigrationStatus - --- | The function used to register applied migrations for when either a no-txn migration or a block of in-txn migrations _not_ using the default connection string are to be applied. --- This will account for the possibility that the default connection string still isn't accessible by storing in-memory that some migrations were applied but not registered, --- and also will BEGIN..COMMIT-wrap the insertion using the default connection if it's available. - registerAppliedMigIfPossibleOthers + csExists <- readMVar coddSchemaExists + -- We can insert into codd_schema.sql_migrations with any user + if DB.connectDatabase blockConnInfo + == DB.connectDatabase defaultConnInfo + && csExists + then + void $ registerRanMigration @txn + blockConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + NowInPostgresTime + appliedMigrationDuration + appliedMigrationStatus + else + do + appliedAt <- + DB.fromOnly + <$> unsafeQuery1 + blockConn + "SELECT clock_timestamp()" + () + modifyMVar_ unregisteredButAppliedMigs $ \apmigs -> + pure + $ apmigs + ++ [ AppliedMigration + { appliedMigrationName + , appliedMigrationTimestamp + , appliedMigrationAt = appliedAt + , appliedMigrationDuration + , appliedMigrationStatus + } + ] + + +-- | The function used to register fully or partially applied no-txn migrations. +-- This will account for the possibility that the default connection string still isn't accessible and that codd_schema still hasn't been created by storing in memory that some migrations were applied but not registered, leaving flushing of those to the database to a later opportunity. + registerAppliedNoTxnMig :: DB.Connection + -> DB.ConnectInfo -> FilePath -> DB.UTCTimestamp -> DiffTime -> MigrationApplicationStatus -> m () - registerAppliedMigIfPossibleOthers blockConn appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus + registerAppliedNoTxnMig blockConn blockConnInfo appliedMigrationName appliedMigrationTimestamp appliedMigrationDuration appliedMigrationStatus = do csExists <- readMVar coddSchemaExists case (appliedMigrationStatus, csExists) of @@ -408,37 +437,57 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn createCoddSchemaAndFlushPendingMigrations _ -> pure () mDefaultConn <- queryConn defaultConnInfo - case mDefaultConn of - Nothing -> do - appliedAt <- - DB.fromOnly - <$> unsafeQuery1 - blockConn - "SELECT clock_timestamp()" - () - modifyMVar_ unregisteredButAppliedMigs $ \apmigs -> - pure - $ apmigs - ++ [ AppliedMigration - { appliedMigrationName - , appliedMigrationTimestamp - , appliedMigrationAt = appliedAt - , appliedMigrationDuration - , appliedMigrationStatus - } - ] - Just defaultConn -> do - void - $ withTransaction @txn txnIsolationLvl - defaultConn - $ registerRanMigration @txn - defaultConn - txnIsolationLvl - appliedMigrationName - appliedMigrationTimestamp - NowInPostgresTime - appliedMigrationDuration - appliedMigrationStatus + case + ( mDefaultConn + , DB.connectDatabase blockConnInfo + == DB.connectDatabase defaultConnInfo + && csExists + ) + of + (Nothing, False) -> do -- No default connection available and migrations running on non-default database + appliedAt <- + DB.fromOnly + <$> unsafeQuery1 + blockConn + "SELECT clock_timestamp()" + () + modifyMVar_ unregisteredButAppliedMigs + $ \apmigs -> + pure + $ apmigs + ++ [ AppliedMigration + { appliedMigrationName + , appliedMigrationTimestamp + , appliedMigrationAt = + appliedAt + , appliedMigrationDuration + , appliedMigrationStatus + } + ] + (Just defaultConn, False) -> do -- Running migrations on non-default database, but default connection is available + void + $ withTransaction @txn txnIsolationLvl + defaultConn + $ registerRanMigration @txn + defaultConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + NowInPostgresTime + appliedMigrationDuration + appliedMigrationStatus + (_, True) -> do -- Migrations running on default database means we can register them here if codd_schema exists + void + $ withTransaction @txn txnIsolationLvl + blockConn + $ registerRanMigration @txn + blockConn + txnIsolationLvl + appliedMigrationName + appliedMigrationTimestamp + NowInPostgresTime + appliedMigrationDuration + appliedMigrationStatus @@ -459,28 +508,20 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn , isOneShotApplication defaultConnInfo pendingMigs ) of - (BlockInTxn inTxnBlock, True) -> - runInTxnBlockDefaultConn - (fmap Just . actionAfter hoistedBlocks) - conn - inTxnBlock - registerAppliedMigDefaultConnInTxnBlock - (BlockInTxn inTxnBlock, False) -> - if cinfo == defaultConnInfo - then runInTxnBlockDefaultConn - (const $ pure Nothing) - conn - inTxnBlock - registerAppliedMigDefaultConnInTxnBlock - else runInTxnBlockNotDefaultConn - (const $ pure Nothing) - conn - inTxnBlock - (registerAppliedMigIfPossibleOthers conn) + (BlockInTxn inTxnBlock, True) -> runInTxnBlock + (fmap Just . actionAfter hoistedBlocks) + conn + inTxnBlock + (registerAppliedInTxnMig conn cinfo) + (BlockInTxn inTxnBlock, False) -> runInTxnBlock + (const $ pure Nothing) + conn + inTxnBlock + (registerAppliedInTxnMig conn cinfo) (BlockNoTxn noTxnBlock, _) -> runNoTxnMig conn noTxnBlock - (registerAppliedMigIfPossibleOthers conn) + (registerAppliedNoTxnMig conn cinfo) ) Nothing pendingMigs @@ -503,58 +544,58 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn return actAfterResult where - runInTxnBlockNotDefaultConn - :: (DB.Connection -> txn b) - -> DB.Connection - -> ConsecutiveInTxnMigrations m - -> ( FilePath - -> DB.UTCTimestamp - -> DiffTime - -> MigrationApplicationStatus - -> m () - ) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. - -> m b - runInTxnBlockNotDefaultConn act conn migBlock registerMig = do - res <- - -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations - retryFold - retryPolicy - (\previousBlock RetryIteration { tryNumber } -> - if tryNumber == 0 - then pure previousBlock - else reReadBlock previousBlock - ) - migBlock - (\case - Left lastEx -> do - logError - "Failed after all configured retries. Giving up." - throwIO lastEx - Right ret -> pure ret - ) - $ \blockFinal -> do - logInfo "BEGINning transaction" - withTransaction @txn txnIsolationLvl conn $ do - let hoistedMigs :: NonEmpty (AddedSqlMigration txn) - hoistedMigs = hoistAddedSqlMigration lift - <$> inTxnMigs blockFinal - errorOrOk <- - forMExcept hoistedMigs $ applySingleMigration - conn - (\fp ts duration apStatus -> - lift $ registerMig fp ts duration apStatus - ) - NoSkipStatements - case errorOrOk of - Left e -> do - logInfo - "ROLLBACKed transaction" - pure $ Left e - Right () -> Right <$> act conn - logInfo "COMMITed transaction" - pure res - - runInTxnBlockDefaultConn + -- runInTxnBlockNotDefaultConn + -- :: (DB.Connection -> txn b) + -- -> DB.Connection + -- -> ConsecutiveInTxnMigrations m + -- -> ( FilePath + -- -> DB.UTCTimestamp + -- -> DiffTime + -- -> MigrationApplicationStatus + -- -> m () + -- ) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. + -- -> m b + -- runInTxnBlockNotDefaultConn act conn migBlock registerMig = do + -- res <- + -- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations + -- retryFold + -- retryPolicy + -- (\previousBlock RetryIteration { tryNumber } -> + -- if tryNumber == 0 + -- then pure previousBlock + -- else reReadBlock previousBlock + -- ) + -- migBlock + -- (\case + -- Left lastEx -> do + -- logError + -- "Failed after all configured retries. Giving up." + -- throwIO lastEx + -- Right ret -> pure ret + -- ) + -- $ \blockFinal -> do + -- logInfo "BEGINning transaction" + -- withTransaction @txn txnIsolationLvl conn $ do + -- let hoistedMigs :: NonEmpty (AddedSqlMigration txn) + -- hoistedMigs = hoistAddedSqlMigration lift + -- <$> inTxnMigs blockFinal + -- errorOrOk <- + -- forMExcept hoistedMigs $ applySingleMigration + -- conn + -- (\fp ts duration apStatus -> + -- lift $ registerMig fp ts duration apStatus + -- ) + -- NoSkipStatements + -- case errorOrOk of + -- Left e -> do + -- logInfo + -- "ROLLBACKed transaction" + -- pure $ Left e + -- Right () -> Right <$> act conn + -- logInfo "COMMITed transaction" + -- pure res + + runInTxnBlock :: (DB.Connection -> txn b) -> DB.Connection -> ConsecutiveInTxnMigrations m @@ -563,9 +604,9 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> DiffTime -> MigrationApplicationStatus -> txn () - ) -- ^ Using the `txn` is right: registering applied migrations happens in the default connection, and so it will happen in the same transaction as the migrations themselves. + ) -- ^ Using the `txn` monad is right: registering applied migrations happens in the same connection that applies migrations if that is the default-database connection, and should be (but this is not yet implemented) scheduled to be inserted into codd_schema.sql_migrations in the first future opportunity, meaning when this function is called it's merely an in-memory operation, which can also run in `txn`. -> m b - runInTxnBlockDefaultConn act conn migBlock registerMig = do + runInTxnBlock act conn migBlock registerMig = do res <- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations retryFold @@ -745,7 +786,9 @@ createCoddSchema targetVersion txnIsolationLvl conn = "ALTER TABLE codd_schema.sql_migrations ADD COLUMN application_duration INTERVAL" CoddSchemaV2 -> execvoid_ conn - "ALTER TABLE codd_schema.sql_migrations ADD COLUMN num_applied_statements INT, ADD COLUMN no_txn_failed_at timestamptz, ALTER COLUMN applied_at DROP NOT NULL, ADD CONSTRAINT no_txn_mig_applied_or_failed CHECK ((applied_at IS NULL) <> (no_txn_failed_at IS NULL))" + "ALTER TABLE codd_schema.sql_migrations ADD COLUMN num_applied_statements INT, ADD COLUMN no_txn_failed_at timestamptz, ALTER COLUMN applied_at DROP NOT NULL, ADD CONSTRAINT no_txn_mig_applied_or_failed CHECK ((applied_at IS NULL) <> (no_txn_failed_at IS NULL)); \n\ + \ -- Grant UPDATE so in-txn migrations running under different users can register themselves atomically \n\ + \GRANT UPDATE ON TABLE codd_schema.sql_migrations TO PUBLIC;" CoddSchemaV3 -> pure () -- `succ` is a partial function, but it should never throw in this context diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 7e58b80f..f8b45819 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -7,6 +7,7 @@ import Codd.Internal ( NoTxnMigrationApplicationFailu ) import Codd.Logging ( LoggingT(runLoggingT) , Newline(..) + , runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , SqlMigration(..) @@ -19,10 +20,12 @@ import Control.Monad ( forM_ ) import Control.Monad.Reader ( ReaderT(..) ) import Control.Monad.Trans.Resource ( MonadThrow ) import qualified Data.List as List +import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Text.IO as Text import Data.Time ( UTCTime ) +import qualified Database.PostgreSQL.Simple as DB import DbUtils ( aroundFreshDatabase , aroundTestDbInfo , getIncreasingTimestamp @@ -41,41 +44,129 @@ import UnliftIO.Concurrent ( MVar , readMVar ) -createTableMig, addColumnMig :: MonadThrow m => AddedSqlMigration m +alwaysPassingMig, createTableMig, addColumnMig, alwaysFailingMig + :: MonadThrow m => AddedSqlMigration m +alwaysPassingMig = AddedSqlMigration + SqlMigration { migrationName = "0001-always-passing.sql" + , migrationSql = mkValidSql "SELECT 99" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 1) createTableMig = AddedSqlMigration - SqlMigration { migrationName = "0001-create-table.sql" + SqlMigration { migrationName = "0002-create-table.sql" , migrationSql = mkValidSql "CREATE TABLE anytable ();" , migrationInTxn = True , migrationCustomConnInfo = Nothing , migrationEnvVars = mempty } - (getIncreasingTimestamp 1) + (getIncreasingTimestamp 2) addColumnMig = AddedSqlMigration SqlMigration - { migrationName = "0002-add-column.sql" + { migrationName = "0003-add-column.sql" , migrationSql = mkValidSql "ALTER TABLE anytable ADD COLUMN anycolumn TEXT;" , migrationInTxn = True , migrationCustomConnInfo = Nothing , migrationEnvVars = mempty } - (getIncreasingTimestamp 2) + (getIncreasingTimestamp 3) +alwaysFailingMig = AddedSqlMigration + SqlMigration { migrationName = "0004-always-failing.sql" + , migrationSql = mkValidSql "SELECT 5/0" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 4) + +changeConnUser + :: CoddSettings -> String -> AddedSqlMigration m -> AddedSqlMigration m +changeConnUser dbInfo newUser mig = mig + { addedSqlMig = (addedSqlMig mig) + { migrationCustomConnInfo = + let cinfo = fromMaybe + (migsConnString dbInfo) + (migrationCustomConnInfo (addedSqlMig mig)) + in Just cinfo { DB.connectUser = newUser } + } + } spec :: Spec spec = do describe "DbDependentSpecs" $ do describe "Retry tests" $ do + aroundFreshDatabase + $ it + "In-txn migrations in same database as the default connection string get registered in the same transaction even for a different user" + $ \dbInfo -> do + -- To test this we put three consecutive in-txn migrations on the default database under a different user, where the last migration always fails. + -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. + runCoddLogger + (applyMigrationsNoCheck + (dbInfo + { retryPolicy = RetryPolicy + 0 + (ExponentialBackoff + (realToFrac @Double 0.001) + ) + } + ) + (Just + [ alwaysPassingMig -- Make sure the default connection is available and yet we're not to use it for registering the 3 migrations below + , changeConnUser dbInfo + "codd-test-user" + createTableMig + , changeConnUser dbInfo + "codd-test-user" + addColumnMig + , changeConnUser dbInfo + "codd-test-user" + alwaysFailingMig + ] + ) + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(_ :: SomeException) -> True) + allRegisteredMigs :: [String] <- + map DB.fromOnly <$> withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name from codd_schema.sql_migrations" + () + ) + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO + createTableMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO addColumnMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO + alwaysFailingMig + ) + ] -- aroundTestDbInfo -- $ it - -- "In-txn migrations in same database get registered in the same transaction even for a different user" + -- "In-txn migrations in non-default database get registered after all said migrations are committed, not after each one is applied inside a transaction" -- $ error "TODO" -- aroundTestDbInfo -- $ it - -- "In-txn migrations in different database get registered after all said migrations are committed, not after each one is applied inside a transaction" + -- "No-txn migration with failure in COMMIT statement retries from the right place" -- $ error "TODO" -- aroundTestDbInfo -- $ it - -- "No-txn migration with failure in COMMIT statement retries from the right place" + -- "No-txn migrations with COPY have countable-runnable statements skipped correctly" -- $ error "TODO" -- aroundTestDbInfo -- $ it From babcf61f59760355b02267e8be4e4623bdec3635 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 10:58:18 -0300 Subject: [PATCH 18/28] Add a test for partially applied no-txn migrations --- src/Codd/Internal.hs | 20 +- test/DbDependentSpecs/RetrySpec.hs | 187 ++++++++++++++++-- ...1-00-00-00-create-table-with-unique-id.sql | 8 + ...-duplicate-inside-explicit-transaction.sql | 26 +++ 4 files changed, 214 insertions(+), 27 deletions(-) create mode 100644 test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql create mode 100644 test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 064246ab..910d099e 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -215,12 +215,12 @@ checkNeedsBootstrapping connInfo connectTimeout = if isServerNotAvailableError e then Nothing - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -671,7 +671,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn ) <> ". Skipping the first " <> Fmt.sformat Fmt.int numStmtsApplied - <> " SQL statements, which have already been applied, and start applying from the " + <> " SQL statements, which have already been applied, and starting application from the " <> Fmt.sformat Fmt.ords (numStmtsApplied + 1) <> " statement" pure (previousMig, numStmtsApplied) @@ -681,7 +681,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn $ "Skipping the first " <> Fmt.sformat Fmt.int noTxnMigAppliedStatements - <> " SQL statements, which have already been applied, and start applying from the " + <> " SQL statements, which have already been applied, and starting application from the " <> Fmt.sformat Fmt.ords (noTxnMigAppliedStatements + 1) @@ -1366,9 +1366,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes fn migTimestamp appliedMigrationDuration - (NoTxnMigrationFailed - appliedMigrationNumStatements - ) + (NoTxnMigrationFailed (lastBeginNum - 1)) logInfo "ROLLBACKed last explicitly started transaction" pure $ mkSqlError @s sqlStatementEx diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index f8b45819..cc95bf79 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -105,14 +105,7 @@ spec = do -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. runCoddLogger (applyMigrationsNoCheck - (dbInfo - { retryPolicy = RetryPolicy - 0 - (ExponentialBackoff - (realToFrac @Double 0.001) - ) - } - ) + dbInfo (Just [ alwaysPassingMig -- Make sure the default connection is available and yet we're not to use it for registering the 3 migrations below , changeConnUser dbInfo @@ -160,18 +153,180 @@ spec = do -- $ it -- "In-txn migrations in non-default database get registered after all said migrations are committed, not after each one is applied inside a transaction" -- $ error "TODO" - -- aroundTestDbInfo - -- $ it - -- "No-txn migration with failure in COMMIT statement retries from the right place" - -- $ error "TODO" + aroundFreshDatabase + $ it + "No-txn migration with failure in COMMIT statement retries from the right place, and so does a new `codd up`" + $ \dbInfo0 -> do + -- We want retries to ensure applied statements are not being applied more than once + let + dbInfo = dbInfo0 + { retryPolicy = RetryPolicy + 2 + (ExponentialBackoff (realToFrac @Double 0.001) + ) + } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + dbInfo + { sqlMigrations = + [ "test/migrations/no-txn-partial-application-test" + ] + } + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(_ :: SomeException) -> True) + (appliedMigs :: [(String, Int, Bool)], datatable :: [ DB.Only + Int + ], othertableExists :: DB.Only Bool) <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> + (,,) + <$> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id" + () + <*> DB.query + conn + "SELECT id from somedata order by id" + () + <*> unsafeQuery1 + conn + "SELECT EXISTS (SELECT FROM pg_tables WHERE tablename='othertablenotexists')" + () + ) + appliedMigs + `shouldContain` [ ( "2000-01-01-00-00-00-create-table-with-unique-id.sql" + , 2 + , True + ) + , ( "2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql" + , 3 + , False + ) + ] + map DB.fromOnly datatable `shouldBe` [1, 2, 3, 4, 5, 6] + othertableExists `shouldBe` DB.Only False + logs <- readMVar logsmv + + -- We want the statement that failed printed, a line saying how many statements had been applied and also saying from where it will be resumed (these statement numbers _will_ differ in this test) + logs + `shouldSatisfy` any + (\line -> + "COMMIT;" + `Text.isInfixOf` line + && "23505" + `Text.isInfixOf` line + ) + length + (filter + (\line -> + "7 statements" + `Text.isInfixOf` line + && "8th failed" + `Text.isInfixOf` line + && "4th statement" + `Text.isInfixOf` line + ) + logs + ) + `shouldBe` 3 -- Total amount of attempts + + -- Now we resume application with a new `codd up` invocation, changing the failed no-txn migration from the 8th statements onwards so it can complete. If codd tries to apply statements that already have been applied, we'd get duplicate key violation exceptions + logsSecondCoddUp <- newMVar [] + runMVarLogger logsSecondCoddUp $ applyMigrationsNoCheck + dbInfo + (Just + [ AddedSqlMigration + SqlMigration + { migrationName = + "2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql" + , migrationSql = + mkValidSql + -- This needs to match 2001-01-01-insert-duplicate-inside-explicit-transaction.sql up to the statement right before BEGIN + "SELECT 4;\n\ +\\n\ +\-- Some comment\n\ +\COPY somedata FROM STDIN WITH (FORMAT csv);\n\ +\4\n\ +\5\n\ +\6\n\ +\\\.\n\ +\\n\ +\-- Another comment\n\ +\\n\ +\SELECT 7;\n\ +\BEGIN; SELECT 3; CREATE TABLE othertablenowwillexist(); COPY somedata FROM STDIN WITH (FORMAT csv);\n7\n8\n9\n\\.\nCOMMIT;" + , migrationInTxn = False + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 2) + ] + ) + testConnTimeout + (const $ pure ()) + (appliedMigsSecondTime :: [(String, Int, Bool)], datatableSecondTime :: [ DB.Only + Int + ], othertableExistsSecondTime :: DB.Only Bool) <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> + (,,) + <$> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id" + () + <*> DB.query + conn + "SELECT id from somedata order by id" + () + <*> unsafeQuery1 + conn + "SELECT EXISTS (SELECT FROM pg_tables WHERE tablename='othertablenowwillexist')" + () + ) + appliedMigsSecondTime + `shouldContain` [ ( "2000-01-01-00-00-00-create-table-with-unique-id.sql" + , 2 + , True + ) + , ( "2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql" + , 8 + , True + ) + ] + map DB.fromOnly datatableSecondTime + `shouldBe` [1, 2, 3, 4, 5, 6, 7, 8, 9] + othertableExistsSecondTime `shouldBe` DB.Only True + logsSecondTime <- readMVar logsSecondCoddUp + + -- We want the statement that says where we're resuming from to appear exactly once + length + (filter + (\line -> + "Resuming application of partially applied" + `Text.isInfixOf` line + && "Skipping the first 3 SQL statements" + `Text.isInfixOf` line + && "starting application from the 4th statement" + `Text.isInfixOf` line + ) + logsSecondTime + ) + `shouldBe` 1 + + -- aroundTestDbInfo -- $ it -- "No-txn migrations with COPY have countable-runnable statements skipped correctly" -- $ error "TODO" - -- aroundTestDbInfo - -- $ it - -- "No-txn migration is resumed from the right statement on a new `codd up` invocation" - -- $ error "TODO" aroundTestDbInfo $ it diff --git a/test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql new file mode 100644 index 00000000..8940e738 --- /dev/null +++ b/test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql @@ -0,0 +1,8 @@ +-- codd: no-txn +CREATE TABLE somedata (id INT NOT NULL, UNIQUE(id) DEFERRABLE INITIALLY DEFERRED); + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql b/test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql new file mode 100644 index 00000000..b68cbd8f --- /dev/null +++ b/test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql @@ -0,0 +1,26 @@ +-- codd: no-txn +SELECT 4; + +-- Some comment +COPY somedata FROM STDIN WITH (FORMAT csv); +4 +5 +6 +\. + +-- Another comment + +SELECT 7; +BEGIN; +SELECT 3; +CREATE TABLE othertablenotexists(); +-- Yet another one +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. + +-- Last one + +COMMIT; From 3aa3eb16a8abd5d14a8295da479ed35f6a6a8816 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 11:32:05 -0300 Subject: [PATCH 19/28] Move some tests from RetrySpec to ApplicationSpec --- test/DbDependentSpecs/ApplicationSpec.hs | 108 ++++++++++++++++++++++- test/DbDependentSpecs/RetrySpec.hs | 108 ----------------------- 2 files changed, 107 insertions(+), 109 deletions(-) diff --git a/test/DbDependentSpecs/ApplicationSpec.hs b/test/DbDependentSpecs/ApplicationSpec.hs index ca514dd5..d6913005 100644 --- a/test/DbDependentSpecs/ApplicationSpec.hs +++ b/test/DbDependentSpecs/ApplicationSpec.hs @@ -25,6 +25,7 @@ import Control.Monad.Trans ( lift ) import Control.Monad.Trans.Resource ( MonadThrow ) import qualified Data.Aeson as Aeson import qualified Data.Map.Strict as Map +import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) import qualified Data.Text as Text import Data.Time ( CalendarDiffTime(ctTime) @@ -48,7 +49,9 @@ import DbUtils ( aroundFreshDatabase import Test.Hspec import Test.QuickCheck import qualified Test.QuickCheck as QC -import UnliftIO ( liftIO ) +import UnliftIO ( SomeException + , liftIO + ) placeHoldersMig, selectMig, copyMig :: MonadThrow m => AddedSqlMigration m placeHoldersMig = AddedSqlMigration @@ -138,6 +141,56 @@ createCountCheckingMig expectedCount migName = SqlMigration , migrationEnvVars = mempty } +alwaysPassingMig, createTableMig, addColumnMig, alwaysFailingMig + :: MonadThrow m => AddedSqlMigration m +alwaysPassingMig = AddedSqlMigration + SqlMigration { migrationName = "0001-always-passing.sql" + , migrationSql = mkValidSql "SELECT 99" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 1) +createTableMig = AddedSqlMigration + SqlMigration { migrationName = "0002-create-table.sql" + , migrationSql = mkValidSql "CREATE TABLE anytable ();" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 2) +addColumnMig = AddedSqlMigration + SqlMigration + { migrationName = "0003-add-column.sql" + , migrationSql = mkValidSql + "ALTER TABLE anytable ADD COLUMN anycolumn TEXT;" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 3) +alwaysFailingMig = AddedSqlMigration + SqlMigration { migrationName = "0004-always-failing.sql" + , migrationSql = mkValidSql "SELECT 5/0" + , migrationInTxn = True + , migrationCustomConnInfo = Nothing + , migrationEnvVars = mempty + } + (getIncreasingTimestamp 4) + +changeConnUser + :: CoddSettings -> String -> AddedSqlMigration m -> AddedSqlMigration m +changeConnUser dbInfo newUser mig = mig + { addedSqlMig = (addedSqlMig mig) + { migrationCustomConnInfo = + let cinfo = fromMaybe + (migsConnString dbInfo) + (migrationCustomConnInfo (addedSqlMig mig)) + in Just cinfo { DB.connectUser = newUser } + } + } + + -- | A migration that uses many different ways of inputting strings in postgres. In theory we'd only need to -- test the parser, but we do this to sleep well at night too. -- This migration only makes sense with standard_conforming_strings=on. @@ -645,6 +698,59 @@ spec = do totalRows `shouldBe` 10 describe "Custom connection-string migrations" $ do + aroundFreshDatabase + $ it + "In-txn migrations in same database as the default connection string get registered in the same transaction even for a different user" + $ \dbInfo -> do + -- To test this we put three consecutive in-txn migrations on the default database under a different user, where the last migration always fails. + -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. + runCoddLogger + (applyMigrationsNoCheck + dbInfo + (Just + [ alwaysPassingMig -- Make sure the default connection is available and yet we're not to use it for registering the 3 migrations below + , changeConnUser dbInfo + "codd-test-user" + createTableMig + , changeConnUser dbInfo + "codd-test-user" + addColumnMig + , changeConnUser dbInfo + "codd-test-user" + alwaysFailingMig + ] + ) + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(_ :: SomeException) -> True) + allRegisteredMigs :: [String] <- + map DB.fromOnly <$> withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name from codd_schema.sql_migrations" + () + ) + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO + createTableMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO + addColumnMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig @IO + alwaysFailingMig + ) + ] it "applied_at and application_duration registered properly for migrations running before codd_schema is available" $ do diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index cc95bf79..8e0d9958 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -7,7 +7,6 @@ import Codd.Internal ( NoTxnMigrationApplicationFailu ) import Codd.Logging ( LoggingT(runLoggingT) , Newline(..) - , runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , SqlMigration(..) @@ -18,9 +17,7 @@ import Codd.Types ( RetryBackoffPolicy(..) ) import Control.Monad ( forM_ ) import Control.Monad.Reader ( ReaderT(..) ) -import Control.Monad.Trans.Resource ( MonadThrow ) import qualified Data.List as List -import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Text.IO as Text @@ -44,115 +41,10 @@ import UnliftIO.Concurrent ( MVar , readMVar ) -alwaysPassingMig, createTableMig, addColumnMig, alwaysFailingMig - :: MonadThrow m => AddedSqlMigration m -alwaysPassingMig = AddedSqlMigration - SqlMigration { migrationName = "0001-always-passing.sql" - , migrationSql = mkValidSql "SELECT 99" - , migrationInTxn = True - , migrationCustomConnInfo = Nothing - , migrationEnvVars = mempty - } - (getIncreasingTimestamp 1) -createTableMig = AddedSqlMigration - SqlMigration { migrationName = "0002-create-table.sql" - , migrationSql = mkValidSql "CREATE TABLE anytable ();" - , migrationInTxn = True - , migrationCustomConnInfo = Nothing - , migrationEnvVars = mempty - } - (getIncreasingTimestamp 2) -addColumnMig = AddedSqlMigration - SqlMigration - { migrationName = "0003-add-column.sql" - , migrationSql = mkValidSql - "ALTER TABLE anytable ADD COLUMN anycolumn TEXT;" - , migrationInTxn = True - , migrationCustomConnInfo = Nothing - , migrationEnvVars = mempty - } - (getIncreasingTimestamp 3) -alwaysFailingMig = AddedSqlMigration - SqlMigration { migrationName = "0004-always-failing.sql" - , migrationSql = mkValidSql "SELECT 5/0" - , migrationInTxn = True - , migrationCustomConnInfo = Nothing - , migrationEnvVars = mempty - } - (getIncreasingTimestamp 4) - -changeConnUser - :: CoddSettings -> String -> AddedSqlMigration m -> AddedSqlMigration m -changeConnUser dbInfo newUser mig = mig - { addedSqlMig = (addedSqlMig mig) - { migrationCustomConnInfo = - let cinfo = fromMaybe - (migsConnString dbInfo) - (migrationCustomConnInfo (addedSqlMig mig)) - in Just cinfo { DB.connectUser = newUser } - } - } - spec :: Spec spec = do describe "DbDependentSpecs" $ do describe "Retry tests" $ do - aroundFreshDatabase - $ it - "In-txn migrations in same database as the default connection string get registered in the same transaction even for a different user" - $ \dbInfo -> do - -- To test this we put three consecutive in-txn migrations on the default database under a different user, where the last migration always fails. - -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. - runCoddLogger - (applyMigrationsNoCheck - dbInfo - (Just - [ alwaysPassingMig -- Make sure the default connection is available and yet we're not to use it for registering the 3 migrations below - , changeConnUser dbInfo - "codd-test-user" - createTableMig - , changeConnUser dbInfo - "codd-test-user" - addColumnMig - , changeConnUser dbInfo - "codd-test-user" - alwaysFailingMig - ] - ) - testConnTimeout - (const $ pure ()) - ) - `shouldThrow` (\(_ :: SomeException) -> True) - allRegisteredMigs :: [String] <- - map DB.fromOnly <$> withConnection - (migsConnString dbInfo) - testConnTimeout - (\conn -> DB.query - conn - "SELECT name from codd_schema.sql_migrations" - () - ) - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO - createTableMig - ) - ] - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO addColumnMig - ) - ] - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO - alwaysFailingMig - ) - ] - -- aroundTestDbInfo - -- $ it - -- "In-txn migrations in non-default database get registered after all said migrations are committed, not after each one is applied inside a transaction" - -- $ error "TODO" aroundFreshDatabase $ it "No-txn migration with failure in COMMIT statement retries from the right place, and so does a new `codd up`" From cd51b02def183eff09e495bef6faa0b010fb8d7c Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 11:38:23 -0300 Subject: [PATCH 20/28] More tests for how applied migrations are registered --- test/DbDependentSpecs/ApplicationSpec.hs | 249 ++++++++++++++++++----- 1 file changed, 199 insertions(+), 50 deletions(-) diff --git a/test/DbDependentSpecs/ApplicationSpec.hs b/test/DbDependentSpecs/ApplicationSpec.hs index d6913005..104df651 100644 --- a/test/DbDependentSpecs/ApplicationSpec.hs +++ b/test/DbDependentSpecs/ApplicationSpec.hs @@ -190,6 +190,18 @@ changeConnUser dbInfo newUser mig = mig } } +changeConnDb + :: CoddSettings -> String -> AddedSqlMigration m -> AddedSqlMigration m +changeConnDb dbInfo newDb mig = mig + { addedSqlMig = (addedSqlMig mig) + { migrationCustomConnInfo = + let cinfo = fromMaybe + (migsConnString dbInfo) + (migrationCustomConnInfo (addedSqlMig mig)) + in Just cinfo { DB.connectDatabase = newDb } + } + } + -- | A migration that uses many different ways of inputting strings in postgres. In theory we'd only need to -- test the parser, but we do this to sleep well at night too. @@ -699,58 +711,195 @@ spec = do describe "Custom connection-string migrations" $ do aroundFreshDatabase - $ it - "In-txn migrations in same database as the default connection string get registered in the same transaction even for a different user" - $ \dbInfo -> do - -- To test this we put three consecutive in-txn migrations on the default database under a different user, where the last migration always fails. - -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. - runCoddLogger - (applyMigrationsNoCheck - dbInfo - (Just - [ alwaysPassingMig -- Make sure the default connection is available and yet we're not to use it for registering the 3 migrations below - , changeConnUser dbInfo - "codd-test-user" - createTableMig - , changeConnUser dbInfo - "codd-test-user" - addColumnMig - , changeConnUser dbInfo - "codd-test-user" - alwaysFailingMig - ] - ) - testConnTimeout - (const $ pure ()) + $ forM_ [True, False] + $ \addDefaultConnMig -> + it + ("In-txn migrations in non-default database get registered after all said migrations are committed, not after each one is applied inside a transaction. Default-conn mig first: " + ++ show addDefaultConnMig ) - `shouldThrow` (\(_ :: SomeException) -> True) - allRegisteredMigs :: [String] <- - map DB.fromOnly <$> withConnection - (migsConnString dbInfo) - testConnTimeout - (\conn -> DB.query - conn - "SELECT name from codd_schema.sql_migrations" - () + $ \dbInfo -> do + -- To test this we put three consecutive in-txn migrations that run on a non-default database, where the last migration always fails. + -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. In a second time, we run only the first two migrations, + -- and check they were registered. + runCoddLogger + (applyMigrationsNoCheck + dbInfo + ( Just + $ + -- A default-conn mig is interesting because it makes the default connection available if it runs first, but we don't want that default connection to be used too soon + [ alwaysPassingMig + | addDefaultConnMig + ] + ++ [ changeConnDb + dbInfo + "postgres" + createTableMig + , changeConnDb + dbInfo + "postgres" + addColumnMig + , changeConnDb + dbInfo + "postgres" + alwaysFailingMig + ] + ) + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(_ :: SomeException) -> + True + ) + allRegisteredMigs :: [String] <- + map DB.fromOnly <$> withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name from codd_schema.sql_migrations" + () + ) + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + createTableMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + addColumnMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + alwaysFailingMig + ) + ] + + -- If we don't include the third migration, the first two should be applied + runCoddLogger + (applyMigrationsNoCheck + dbInfo + ( Just + $ + -- A default-conn mig is interesting because it makes the default connection available if it runs first, but we don't want that default connection to be used too soon + [ alwaysPassingMig + | addDefaultConnMig + ] + ++ [ changeConnDb + dbInfo + "postgres" + createTableMig + , changeConnDb dbInfo + "postgres" + addColumnMig + ] + ) + testConnTimeout + (const $ pure ()) + ) + allRegisteredMigs2 :: [String] <- + map DB.fromOnly <$> withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name from codd_schema.sql_migrations" + () + ) + allRegisteredMigs2 + `shouldContain` [ migrationName + (addedSqlMig @IO + createTableMig + ) + ] + allRegisteredMigs2 + `shouldContain` [ migrationName + (addedSqlMig @IO + addColumnMig + ) + ] + allRegisteredMigs2 + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + alwaysFailingMig + ) + ] + aroundFreshDatabase + $ forM_ [True, False] + $ \addDefaultConnMig -> + it + ("In-txn migrations in same database as the default connection string get registered in the same transaction even for a different user. Default-conn mig first: " + ++ show addDefaultConnMig ) - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO - createTableMig - ) - ] - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO - addColumnMig - ) - ] - allRegisteredMigs - `shouldNotContain` [ migrationName - (addedSqlMig @IO - alwaysFailingMig - ) - ] + $ \dbInfo -> do + -- To test this we put three consecutive in-txn migrations on the default database under a different user, where the last migration always fails. + -- Neither of the three migrations should be registered in this scenario, as they were all rolled back. + runCoddLogger + (applyMigrationsNoCheck + dbInfo + ( Just + $ + -- A default-conn mig is interesting because it makes the default connection available if it runs first, but we don't want that default connection to be used regardless + [ alwaysPassingMig + | addDefaultConnMig + ] + ++ [ changeConnUser + dbInfo + "codd-test-user" + createTableMig + , changeConnUser + dbInfo + "codd-test-user" + addColumnMig + , changeConnUser + dbInfo + "codd-test-user" + alwaysFailingMig + ] + ) + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(_ :: SomeException) -> + True + ) + allRegisteredMigs :: [String] <- + map DB.fromOnly <$> withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name from codd_schema.sql_migrations" + () + ) + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + createTableMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + addColumnMig + ) + ] + allRegisteredMigs + `shouldNotContain` [ migrationName + (addedSqlMig + @IO + alwaysFailingMig + ) + ] it "applied_at and application_duration registered properly for migrations running before codd_schema is available" $ do From 19a34d02acc1575654a0af05668f36b8c6ee4483 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 16:42:36 -0300 Subject: [PATCH 21/28] Test statement skipping more thoroughly --- src/Codd/Internal/MultiQueryStatement.hs | 7 ++-- test/DbDependentSpecs/RetrySpec.hs | 5 --- test/ParsingSpec.hs | 49 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index c1280f48..1dde26db 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -106,9 +106,9 @@ isCountableRunnable = \case CopyFromStdinRows _ -> False CopyFromStdinEnd _ -> True --- | Skips the first n non countable-runnable statements from the stream. --- TODO: Test this function in isolation. E.g. one must never fall in a CopyFromStdinRows after skipping any number of statements. --- But also test basic cases including COMMIT, BEGIN, ROLLBACK, etc. +-- | Skips the first N countable-runnable statements from the stream and any non-countable-runnable pieces +-- like white space or comments so that the next piece in the stream is the (N+1)th runnable statement +-- in the original stream. skipNonCountableRunnableStatements :: Monad m => Int -> Stream (Of SqlPiece) m r -> Stream (Of SqlPiece) m r skipNonCountableRunnableStatements numCountableRunnableToSkip = @@ -123,7 +123,6 @@ skipNonCountableRunnableStatements numCountableRunnableToSkip = (0, Nothing) snd - runSingleStatementInternal_ :: MonadUnliftIO m => DB.Connection -> SqlPiece -> m StatementApplied runSingleStatementInternal_ conn p = case p of diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 8e0d9958..789e7c30 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -215,11 +215,6 @@ spec = do `shouldBe` 1 - -- aroundTestDbInfo - -- $ it - -- "No-txn migrations with COPY have countable-runnable statements skipped correctly" - -- $ error "TODO" - aroundTestDbInfo $ it "Bootstrapping no-txn migration still gets registered if it makes default connection string accessible before failing" diff --git a/test/ParsingSpec.hs b/test/ParsingSpec.hs index 48266e39..36221ca5 100644 --- a/test/ParsingSpec.hs +++ b/test/ParsingSpec.hs @@ -5,6 +5,9 @@ import Codd.Internal ( BlockOfMigrations(..) , ConsecutiveInTxnMigrations(..) , parseMigrationFiles ) +import Codd.Internal.MultiQueryStatement + ( skipNonCountableRunnableStatements + ) import Codd.Logging ( runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) , EnvVars(..) @@ -26,6 +29,7 @@ import Codd.Parsing ( AddedSqlMigration(..) import Control.Monad ( forM , forM_ , unless + , when ) import Control.Monad.Identity ( Identity(runIdentity) ) import Control.Monad.Trans.Resource ( MonadThrow(..) ) @@ -41,6 +45,7 @@ import qualified Database.PostgreSQL.Simple as DB import DbUtils ( parseSqlMigrationIO ) import EnvironmentSpec ( ConnStringGen(..) ) import qualified Streaming.Prelude as Streaming +import qualified Streaming.Prelude as S import System.Random ( mkStdGen , randomR ) @@ -200,6 +205,10 @@ validSqlStatements = ] ] +newtype ShuffleOfPieces = ShuffleOfPieces [[SqlPiece]] deriving stock Show +instance Arbitrary ShuffleOfPieces where + arbitrary = ShuffleOfPieces <$> shuffle validSqlStatements + genTextStream :: Monad m => Text -> Gen (PureStream m) genTextStream t = do n <- arbitrary @@ -729,6 +738,46 @@ spec = do "Testing this by selecting txid_current() might be more effective" context "Other important behaviours to test" $ do + it "Countable-runnable statements counted and skipped correctly" + $ property + $ \(ShuffleOfPieces sqlPiecesWithComments, n) -> do + let sqlPieces = filter + (\case + [CommentPiece _] -> False + [WhiteSpacePiece _] -> False + _ -> True + ) + sqlPiecesWithComments + -- Nothing to skip returns same stream + S.toList_ + (skipNonCountableRunnableStatements + 0 + (S.concat $ S.each sqlPieces) + ) + `shouldReturn` concat sqlPieces + + -- We never break COPY apart and comments and white space are ignored up until the first statement not to be skipped + S.toList_ + (skipNonCountableRunnableStatements + n + (S.concat $ S.each sqlPieces) + ) + `shouldReturn` concat (drop n sqlPieces) + + -- Comments and whitespace in the beginning are ignored + when (n > 0) $ do + let clutter = + [ [WhiteSpacePiece " \n"] + , [CommentPiece "-- some comment\n"] + , [CommentPiece "/* comment \n \n \n test */"] + ] + S.toList_ + (skipNonCountableRunnableStatements + n + (S.concat $ S.each $ clutter ++ sqlPieces) + ) + `shouldReturn` concat (drop n sqlPieces) + it "Empty queries detector works well" $ property $ \randomSeed -> do let emptyQueries = From c202286b766e7bc382a27f0211ca0353b99096dc Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 16:53:21 -0300 Subject: [PATCH 22/28] Tidying up a little --- src/Codd/Internal.hs | 93 +++++------------------- src/Codd/Internal/MultiQueryStatement.hs | 6 +- test/ParsingSpec.hs | 8 +- 3 files changed, 26 insertions(+), 81 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 910d099e..6d6e3bac 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE BlockArguments, AllowAmbiguousTypes #-} -{-# LANGUAGE DataKinds #-} +{-# LANGUAGE AllowAmbiguousTypes, DataKinds #-} module Codd.Internal where import Prelude hiding ( readFile ) @@ -10,7 +9,7 @@ import Codd.Internal.MultiQueryStatement , StatementApplied(..) , multiQueryStatement_ , singleStatement_ - , skipNonCountableRunnableStatements + , skipCountableRunnableStatements ) import Codd.Internal.Retry ( RetryIteration(..) , retryFold @@ -203,6 +202,7 @@ data BootstrapCheck = BootstrapCheck , coddSchemaVersion :: CoddSchemaVersion } +-- brittany-disable-next-binding -- | Returns info on what kind of bootstrapping will be necessary, -- waiting up to the time limit for postgres to be up before throwing -- an exception. @@ -212,15 +212,13 @@ checkNeedsBootstrapping connInfo connectTimeout = handleJust (\e -> -- 1. No server available is a big "No", meaning we throw an exception. - if isServerNotAvailableError e - then Nothing - - -- 2. Maybe the default migration connection string doesn't work because: - -- - The DB does not exist. - -- - CONNECT rights not granted. - -- - User doesn't exist. - -- In any case, it's best to be conservative and consider any libpq errors - -- here as errors that might just require bootstrapping. + if isServerNotAvailableError e then Nothing + -- 2. Maybe the default migration connection string doesn't work because: + -- - The DB does not exist. + -- - CONNECT rights not granted. + -- - User doesn't exist. + -- In any case, it's best to be conservative and consider any libpq errors + -- here as errors that might just require bootstrapping. else if isLibPqError e then Just BootstrapCheck { defaultConnAccessible = False @@ -544,57 +542,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn return actAfterResult where - -- runInTxnBlockNotDefaultConn - -- :: (DB.Connection -> txn b) - -- -> DB.Connection - -- -> ConsecutiveInTxnMigrations m - -- -> ( FilePath - -- -> DB.UTCTimestamp - -- -> DiffTime - -- -> MigrationApplicationStatus - -- -> m () - -- ) -- ^ Running in the `m` monad is correct. In-txn migrations can run inside a transaction in a connection that is not the default one, and that's what `txn` would be here: a transaction _possibly_ in a connection different than the one that will be used to apply migrations. - -- -> m b - -- runInTxnBlockNotDefaultConn act conn migBlock registerMig = do - -- res <- - -- -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations - -- retryFold - -- retryPolicy - -- (\previousBlock RetryIteration { tryNumber } -> - -- if tryNumber == 0 - -- then pure previousBlock - -- else reReadBlock previousBlock - -- ) - -- migBlock - -- (\case - -- Left lastEx -> do - -- logError - -- "Failed after all configured retries. Giving up." - -- throwIO lastEx - -- Right ret -> pure ret - -- ) - -- $ \blockFinal -> do - -- logInfo "BEGINning transaction" - -- withTransaction @txn txnIsolationLvl conn $ do - -- let hoistedMigs :: NonEmpty (AddedSqlMigration txn) - -- hoistedMigs = hoistAddedSqlMigration lift - -- <$> inTxnMigs blockFinal - -- errorOrOk <- - -- forMExcept hoistedMigs $ applySingleMigration - -- conn - -- (\fp ts duration apStatus -> - -- lift $ registerMig fp ts duration apStatus - -- ) - -- NoSkipStatements - -- case errorOrOk of - -- Left e -> do - -- logInfo - -- "ROLLBACKed transaction" - -- pure $ Left e - -- Right () -> Right <$> act conn - -- logInfo "COMMITed transaction" - -- pure res - runInTxnBlock :: (DB.Connection -> txn b) -> DB.Connection @@ -1212,7 +1159,7 @@ class SkipStatements a where newtype SkipStatementsNoTxn = SkipStatementsNoTxn Int data NoSkipStatements = NoSkipStatements instance SkipStatements SkipStatementsNoTxn where - type SkipError SkipStatementsNoTxn = NoTxnMigrationApplicationFailure -- TODO: Rename to NoTxnNoTxnMigrationApplicationFailure + type SkipError SkipStatementsNoTxn = NoTxnMigrationApplicationFailure numStatementsToSkip (SkipStatementsNoTxn n) = n mkSqlError = NoTxnMigrationApplicationFailure instance SkipStatements NoSkipStatements where @@ -1263,7 +1210,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes StatementApplied _ -> pure (1, Nothing, Nothing) (WellParsedSql sqlStream, _) -> do - initialTxnStatus <- txnStatus conn -- TODO: For no-txn migrations this should be "not in a transaction". Should we assert that? + initialTxnStatus <- txnStatus conn ((numStmts, mLastBegin, _) :> errorOrDone) <- Streaming.fold (\(!l, !lastBegin, !lastTxnStatus) txnStatusNow -> @@ -1285,23 +1232,23 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes ++ show states states@(_, PQ.TransActive) -> error - $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this as a bug" + $ "Internal error in codd. It seems libpq returned a transaction status while another statement was running, which should be impossible. Please report this as a bug: " ++ show states states@(PQ.TransInError, _) -> error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug" + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug: " ++ show states states@(_, PQ.TransInError) -> error - $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug" + $ "Internal error in codd. Erring statements should be in stream's return, not as an element of it. Please report this as a bug: " ++ show states states@(PQ.TransUnknown, _) -> error - $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you: " ++ show states states@(_, PQ.TransUnknown) -> error - $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you." + $ "Connection to database may have gone bad. Did someone else kill the connection while codd was applying migrations, perhaps? Codd cannot retry under these circumstances, sadly. Please file a bug report if retrying under such circumstances is important to you: " ++ show states ) ( numCountableRunnableStmtsToSkip @@ -1310,7 +1257,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes ) id $ multiQueryStatement_ conn - $ skipNonCountableRunnableStatements + $ skipCountableRunnableStatements numCountableRunnableStmtsToSkip sqlStream pure (numStmts, errorOrDone, mLastBegin) @@ -1380,9 +1327,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes logInfo $ " (" <> prettyPrintDuration appliedMigrationDuration - <> ", " - <> Fmt.sformat Fmt.int appliedMigrationNumStatements - <> ")" + <> ")" pure $ Right () diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 1dde26db..ad221f4d 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -4,7 +4,7 @@ module Codd.Internal.MultiQueryStatement , multiQueryStatement_ , runSingleStatementInternal_ , singleStatement_ - , skipNonCountableRunnableStatements + , skipCountableRunnableStatements ) where import Codd.Logging ( CoddLogger ) @@ -109,9 +109,9 @@ isCountableRunnable = \case -- | Skips the first N countable-runnable statements from the stream and any non-countable-runnable pieces -- like white space or comments so that the next piece in the stream is the (N+1)th runnable statement -- in the original stream. -skipNonCountableRunnableStatements +skipCountableRunnableStatements :: Monad m => Int -> Stream (Of SqlPiece) m r -> Stream (Of SqlPiece) m r -skipNonCountableRunnableStatements numCountableRunnableToSkip = +skipCountableRunnableStatements numCountableRunnableToSkip = S.catMaybes . S.scan (\(skipped, _) p -> if skipped >= numCountableRunnableToSkip diff --git a/test/ParsingSpec.hs b/test/ParsingSpec.hs index 36221ca5..ae9c66b4 100644 --- a/test/ParsingSpec.hs +++ b/test/ParsingSpec.hs @@ -6,7 +6,7 @@ import Codd.Internal ( BlockOfMigrations(..) , parseMigrationFiles ) import Codd.Internal.MultiQueryStatement - ( skipNonCountableRunnableStatements + ( skipCountableRunnableStatements ) import Codd.Logging ( runCoddLogger ) import Codd.Parsing ( AddedSqlMigration(..) @@ -750,7 +750,7 @@ spec = do sqlPiecesWithComments -- Nothing to skip returns same stream S.toList_ - (skipNonCountableRunnableStatements + (skipCountableRunnableStatements 0 (S.concat $ S.each sqlPieces) ) @@ -758,7 +758,7 @@ spec = do -- We never break COPY apart and comments and white space are ignored up until the first statement not to be skipped S.toList_ - (skipNonCountableRunnableStatements + (skipCountableRunnableStatements n (S.concat $ S.each sqlPieces) ) @@ -772,7 +772,7 @@ spec = do , [CommentPiece "/* comment \n \n \n test */"] ] S.toList_ - (skipNonCountableRunnableStatements + (skipCountableRunnableStatements n (S.concat $ S.each $ clutter ++ sqlPieces) ) From 58f4c8270f8f6a6cdbafa71db42b18ffb68cfbb7 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sat, 30 Mar 2024 17:54:14 -0300 Subject: [PATCH 23/28] Avoid extraneous `COMMIT` statements being sent Now we detect if still in a transaction before sending `COMMIT`. This is helpful now that we no longer throw exceptions when an error happens, and instead manually send ROLLBACK. --- src/Codd/Internal.hs | 3 ++- src/Codd/Query.hs | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 6d6e3bac..15b42f14 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -584,6 +584,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn NoSkipStatements case errorOrOk of Left e -> do + liftIO $ DB.rollback conn logInfo "ROLLBACKed transaction" pure $ Left e @@ -1308,7 +1309,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes <> " failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the " <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" - void $ liftIO $ DB.execute_ conn "ROLLBACK" + liftIO $ DB.rollback conn registerMigRan fn migTimestamp diff --git a/src/Codd/Query.hs b/src/Codd/Query.hs index b5d0a365..d8d9067a 100644 --- a/src/Codd/Query.hs +++ b/src/Codd/Query.hs @@ -145,9 +145,7 @@ instance NotInTxn m => CanStartTxn m (InTxnT m) where -- | Runs a function inside a read-write transaction of the desired isolation level, -- BEGINning the transaction if not in one, or just running the supplied function otherwise, -- even if you are in a different isolation level than the one supplied. --- If not in a transaction, commits after running `f`. Does not commit otherwise. --- The first type argument is the desired InTxn monad, as it is helpful for callers to define --- it for better type inference, while the monad `m` not so much. +-- If not in a transaction, commits after running `f`, but only if the transaction is still active. Does not commit otherwise. withTransaction :: forall txn m a . (MonadUnliftIO m, CanStartTxn m txn) @@ -164,8 +162,9 @@ withTransaction isolLvl conn f = do execvoid_ conn $ beginStatement isolLvl -- Note: once we stop rolling back on exception here, we can relax this function's `MonadUnliftIO` -- constraint to just `MonadIO` - v <- unTxnT f `onException` liftIO (DB.rollback conn) - liftIO $ DB.commit conn + v <- unTxnT f `onException` liftIO (DB.rollback conn) + transStatus <- txnStatus conn + when (transStatus == PQ.TransInTrans) $ liftIO $ DB.commit conn pure v where assertTxnStatus :: MonadUnliftIO n => PQ.TransactionStatus -> n () From f911023c079a12f0c1b24ddb2d1aa998fb04e9ee Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sun, 31 Mar 2024 09:30:48 -0300 Subject: [PATCH 24/28] More retry tests, fix cases broken due to COPY related exceptions We had to reimplement `putCopyEnd` from postgresql-simple, and some things still aren't clear. More investigation is necessary. --- src/Codd/Internal.hs | 12 +- src/Codd/Internal/MultiQueryStatement.hs | 44 +++- test/DbDependentSpecs/RetrySpec.hs | 190 +++++++++++++++++- ...1-00-00-00-create-table-with-unique-id.sql | 7 + ...-duplicate-inside-explicit-transaction.sql | 6 + ...1-00-00-00-create-table-with-unique-id.sql | 8 + ...-duplicate-inside-explicit-transaction.sql | 9 + ...1-00-00-00-create-table-with-unique-id.sql | 0 ...-duplicate-inside-explicit-transaction.sql | 0 ...1-00-00-00-create-table-with-unique-id.sql | 8 + ...-duplicate-not-in-explicit-transaction.sql | 8 + 11 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 test/migrations/in-txn-application-error-with-COPY/2000-01-01-00-00-00-create-table-with-unique-id.sql create mode 100644 test/migrations/in-txn-application-error-with-COPY/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql create mode 100644 test/migrations/no-txn-partial-application-error-inside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql create mode 100644 test/migrations/no-txn-partial-application-error-inside-txn/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql rename test/migrations/{no-txn-partial-application-test => no-txn-partial-application-error-on-commit}/2000-01-01-00-00-00-create-table-with-unique-id.sql (100%) rename test/migrations/{no-txn-partial-application-test => no-txn-partial-application-error-on-commit}/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql (100%) create mode 100644 test/migrations/no-txn-partial-application-error-outside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql create mode 100644 test/migrations/no-txn-partial-application-error-outside-txn/2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index 15b42f14..de9dbb24 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -1214,7 +1214,7 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes initialTxnStatus <- txnStatus conn ((numStmts, mLastBegin, _) :> errorOrDone) <- Streaming.fold - (\(!l, !lastBegin, !lastTxnStatus) txnStatusNow -> + (\(!l, !lastBegin, !lastTxnStatus) !txnStatusNow -> (l + 1, , txnStatusNow) $ case (lastTxnStatus, txnStatusNow) @@ -1309,14 +1309,18 @@ applySingleMigration conn registerMigRan skip (AddedSqlMigration sqlMig migTimes <> " failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the " <> Fmt.sformat Fmt.ords lastBeginNum <> " statement in this migration" - liftIO $ DB.rollback conn + + -- ROLLBACK if necessary. Notice that if a COMMIT statement fails, a ROLLBACK is redundant. For any other failing statements that is not the case. + finalTxnStatus <- txnStatus conn + when (finalTxnStatus == PQ.TransInError) $ do + liftIO $ DB.rollback conn + logInfo + "ROLLBACKed last explicitly started transaction" registerMigRan fn migTimestamp appliedMigrationDuration (NoTxnMigrationFailed (lastBeginNum - 1)) - logInfo - "ROLLBACKed last explicitly started transaction" pure $ mkSqlError @s sqlStatementEx (lastBeginNum - 1) Nothing -> do diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index ad221f4d..25185882 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -10,7 +10,9 @@ module Codd.Internal.MultiQueryStatement import Codd.Logging ( CoddLogger ) import Codd.Parsing ( SqlPiece(..) ) import Codd.Query ( txnStatus ) +import Control.Applicative ( (<|>) ) import Control.Monad ( void ) +import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) import Data.Text.Encoding ( encodeUtf8 ) import qualified Database.PostgreSQL.LibPQ as PQ @@ -29,6 +31,7 @@ import qualified Streaming.Internal as S import qualified Streaming.Prelude as Streaming import qualified Streaming.Prelude as S import UnliftIO ( Exception + , IOException , MonadUnliftIO , handle , liftIO @@ -146,9 +149,46 @@ runSingleStatementInternal_ conn p = case p of liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows applied CopyFromStdinEnd _ -> do - liftIO $ void $ DB.putCopyEnd conn - applied + -- postgresql-simple's putCopyEnd throws a weird exception, so we resort to libpq internals + -- This is terrible and we should really be using postgresql-simple as much as possible. TODO: Try again. + liftIO $ PGInternal.withConnection conn $ \pqconn -> do + copyResult <- PQ.putCopyEnd pqconn Nothing + mmsg <- consumeResults pqconn Nothing + case copyResult of + PQ.CopyInOk -> do + -- CopyInOk is a possible result even when COPY fails.. oh why? + case mmsg of + Nothing -> + StatementApplied <$> PQ.transactionStatus pqconn + Just msg -> + pure $ StatementErred $ SqlStatementException + "" + DB.SqlError { DB.sqlState = "" + , DB.sqlExecStatus = DB.FatalError + , DB.sqlErrorMsg = msg + , DB.sqlErrorDetail = "" + , DB.sqlErrorHint = "" + } + _ -> do + pure $ StatementErred $ SqlStatementException + "" + DB.SqlError { DB.sqlState = "" + , DB.sqlExecStatus = DB.FatalError + , DB.sqlErrorMsg = fromMaybe "" mmsg + , DB.sqlErrorDetail = "" + , DB.sqlErrorHint = "" + } where + -- `consumeResults` taken from postgresql-simple's codebase and modified to return the error message + consumeResults pqconn !mmsg = do + mres <- PQ.getResult pqconn + mmsgAfter <- PQ.errorMessage pqconn + let mmsgFinal = + mmsg <|> (if mmsgAfter == Just "" then Nothing else mmsgAfter) + case mres of + Nothing -> pure mmsgFinal + Just _ -> consumeResults pqconn mmsgFinal + applied :: MonadUnliftIO n => n StatementApplied applied = if isCountableRunnable p then StatementApplied <$> txnStatus conn else pure NotACountableStatement diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 789e7c30..02b57298 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -5,6 +5,8 @@ import Codd.Environment ( CoddSettings(..) ) import Codd.Internal ( NoTxnMigrationApplicationFailure , withConnection ) +import Codd.Internal.MultiQueryStatement + ( SqlStatementException ) import Codd.Logging ( LoggingT(runLoggingT) , Newline(..) ) @@ -45,6 +47,187 @@ spec :: Spec spec = do describe "DbDependentSpecs" $ do describe "Retry tests" $ do + aroundFreshDatabase + $ it + "In-txn migrations with failure in COPY statement are handled nicely" + $ \dbInfo0 -> do + -- This test might seem useless, but postgresql-simple support for COPY errors is not great, + -- and we do quite a bit of sqlError exception handling that means we might forget to ROLLBACK + -- in some cases + let + dbInfo = dbInfo0 + { retryPolicy = RetryPolicy + 2 + (ExponentialBackoff (realToFrac @Double 0.001) + ) + } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + dbInfo + { sqlMigrations = + [ "test/migrations/in-txn-application-error-with-COPY" + ] + } + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: SqlStatementException) -> + "duplicate key" + `List.isInfixOf` show e + ) + nonBootstrapAppliedMigs :: [(String, Int, Bool)] <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id OFFSET 1 -- Skip the bootstrap migration" + () + ) + nonBootstrapAppliedMigs `shouldBe` [] + logs <- readMVar logsmv + length (filter ("ROLLBACK" `Text.isInfixOf`) logs) + `shouldBe` 3 + length (filter ("BEGIN" `Text.isInfixOf`) logs) + `shouldBe` 3 + length (filter ("COMMIT" `Text.isInfixOf`) logs) + `shouldBe` 0 + + aroundFreshDatabase + $ it + "No-txn migration with failure in statement not in explicit transaction block retries from the right place" + $ \dbInfo0 -> do + -- We want retries to ensure applied statements are not being applied more than once + let + dbInfo = dbInfo0 + { retryPolicy = RetryPolicy + 2 + (ExponentialBackoff (realToFrac @Double 0.001) + ) + } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + dbInfo + { sqlMigrations = + [ "test/migrations/no-txn-partial-application-error-outside-txn" + ] + } + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: NoTxnMigrationApplicationFailure) -> + "duplicate key" + `List.isInfixOf` show e + ) + (appliedMigs :: [(String, Int, Bool)], datatable :: [ DB.Only + Int + ]) <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> + (,) + <$> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id" + () + <*> DB.query + conn + "SELECT id from somedata order by id" + () + ) + logs <- readMVar logsmv + appliedMigs + `shouldContain` [ ( "2000-01-01-00-00-00-create-table-with-unique-id.sql" + , 2 + , True + ) + , ( "2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql" + , 1 + , False + ) + ] + map DB.fromOnly datatable `shouldBe` [1, 2, 3, 4] + length + (filter + ("duplicate key value violates unique constraint" `Text.isInfixOf` + ) + logs + ) + `shouldBe` 3 + + aroundFreshDatabase + $ it + "No-txn migration with failure in statement inside BEGIN..COMMIT retries from the right place" + $ \dbInfo0 -> do + -- We want retries to ensure applied statements are not being applied more than once + let + dbInfo = dbInfo0 + { retryPolicy = RetryPolicy + 2 + (ExponentialBackoff (realToFrac @Double 0.001) + ) + } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + dbInfo + { sqlMigrations = + [ "test/migrations/no-txn-partial-application-error-inside-txn" + ] + } + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: NoTxnMigrationApplicationFailure) -> + "duplicate key" + `List.isInfixOf` show e + ) + (appliedMigs :: [(String, Int, Bool)], datatable :: [ DB.Only + Int + ]) <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> + (,) + <$> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id" + () + <*> DB.query + conn + "SELECT id from somedata order by id" + () + ) + logs <- readMVar logsmv + appliedMigs + `shouldContain` [ ( "2000-01-01-00-00-00-create-table-with-unique-id.sql" + , 2 + , True + ) + , ( "2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql" + , 1 + , False + ) + ] + map DB.fromOnly datatable `shouldBe` [1, 2, 3, 4] + length + (filter + ("duplicate key value violates unique constraint" `Text.isInfixOf` + ) + logs + ) + `shouldBe` 3 + aroundFreshDatabase $ it "No-txn migration with failure in COMMIT statement retries from the right place, and so does a new `codd up`" @@ -63,14 +246,17 @@ spec = do (applyMigrationsNoCheck dbInfo { sqlMigrations = - [ "test/migrations/no-txn-partial-application-test" + [ "test/migrations/no-txn-partial-application-error-on-commit" ] } Nothing testConnTimeout (const $ pure ()) ) - `shouldThrow` (\(_ :: SomeException) -> True) + `shouldThrow` (\(e :: NoTxnMigrationApplicationFailure) -> + "duplicate key" + `List.isInfixOf` show e + ) (appliedMigs :: [(String, Int, Bool)], datatable :: [ DB.Only Int ], othertableExists :: DB.Only Bool) <- diff --git a/test/migrations/in-txn-application-error-with-COPY/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/in-txn-application-error-with-COPY/2000-01-01-00-00-00-create-table-with-unique-id.sql new file mode 100644 index 00000000..f4c8f282 --- /dev/null +++ b/test/migrations/in-txn-application-error-with-COPY/2000-01-01-00-00-00-create-table-with-unique-id.sql @@ -0,0 +1,7 @@ +CREATE TABLE somedata (id INT NOT NULL, UNIQUE(id)); + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/in-txn-application-error-with-COPY/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql b/test/migrations/in-txn-application-error-with-COPY/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql new file mode 100644 index 00000000..87921fe5 --- /dev/null +++ b/test/migrations/in-txn-application-error-with-COPY/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql @@ -0,0 +1,6 @@ +INSERT INTO somedata(id) VALUES (4); +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/no-txn-partial-application-error-inside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/no-txn-partial-application-error-inside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql new file mode 100644 index 00000000..2c7d4e59 --- /dev/null +++ b/test/migrations/no-txn-partial-application-error-inside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql @@ -0,0 +1,8 @@ +-- codd: no-txn +CREATE TABLE somedata (id INT NOT NULL, UNIQUE(id)); + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/no-txn-partial-application-error-inside-txn/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql b/test/migrations/no-txn-partial-application-error-inside-txn/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql new file mode 100644 index 00000000..17e5fe4a --- /dev/null +++ b/test/migrations/no-txn-partial-application-error-inside-txn/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql @@ -0,0 +1,9 @@ +-- codd: no-txn +INSERT INTO somedata(id) VALUES (4); -- This statement would fail if codd tries to rerun it +BEGIN; +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. +COMMIT; diff --git a/test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/no-txn-partial-application-error-on-commit/2000-01-01-00-00-00-create-table-with-unique-id.sql similarity index 100% rename from test/migrations/no-txn-partial-application-test/2000-01-01-00-00-00-create-table-with-unique-id.sql rename to test/migrations/no-txn-partial-application-error-on-commit/2000-01-01-00-00-00-create-table-with-unique-id.sql diff --git a/test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql b/test/migrations/no-txn-partial-application-error-on-commit/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql similarity index 100% rename from test/migrations/no-txn-partial-application-test/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql rename to test/migrations/no-txn-partial-application-error-on-commit/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql diff --git a/test/migrations/no-txn-partial-application-error-outside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/no-txn-partial-application-error-outside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql new file mode 100644 index 00000000..2c7d4e59 --- /dev/null +++ b/test/migrations/no-txn-partial-application-error-outside-txn/2000-01-01-00-00-00-create-table-with-unique-id.sql @@ -0,0 +1,8 @@ +-- codd: no-txn +CREATE TABLE somedata (id INT NOT NULL, UNIQUE(id)); + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/no-txn-partial-application-error-outside-txn/2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql b/test/migrations/no-txn-partial-application-error-outside-txn/2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql new file mode 100644 index 00000000..533d38ec --- /dev/null +++ b/test/migrations/no-txn-partial-application-error-outside-txn/2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql @@ -0,0 +1,8 @@ +-- codd: no-txn +INSERT INTO somedata(id) VALUES (4); -- This statement would fail if codd tries to rerun it + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. From a8a785d9d13b5c74bd260bd97ef369857fc18218 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sun, 31 Mar 2024 10:28:26 -0300 Subject: [PATCH 25/28] Use postgresql-simple's COPY functions and exception catching This is a nicer way of avoiding libpq internals --- src/Codd/Internal/MultiQueryStatement.hs | 102 ++++++++++------------- 1 file changed, 42 insertions(+), 60 deletions(-) diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 25185882..330b196c 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -10,10 +10,9 @@ module Codd.Internal.MultiQueryStatement import Codd.Logging ( CoddLogger ) import Codd.Parsing ( SqlPiece(..) ) import Codd.Query ( txnStatus ) -import Control.Applicative ( (<|>) ) -import Control.Monad ( void ) import Data.Maybe ( fromMaybe ) import Data.Text ( Text ) +import qualified Data.Text as Text import Data.Text.Encoding ( encodeUtf8 ) import qualified Database.PostgreSQL.LibPQ as PQ import qualified Database.PostgreSQL.Simple as DB @@ -33,7 +32,10 @@ import qualified Streaming.Prelude as S import UnliftIO ( Exception , IOException , MonadUnliftIO + , SomeException + , fromException , handle + , handleJust , liftIO ) @@ -129,65 +131,45 @@ skipCountableRunnableStatements numCountableRunnableToSkip = runSingleStatementInternal_ :: MonadUnliftIO m => DB.Connection -> SqlPiece -> m StatementApplied runSingleStatementInternal_ conn p = case p of - CommentPiece _ -> applied - WhiteSpacePiece _ -> applied - BeginTransaction s -> singleStatement_ conn s - CommitTransaction s -> singleStatement_ conn s - RollbackTransaction s -> singleStatement_ conn s - OtherSqlPiece s -> singleStatement_ conn s - CopyFromStdinStatement copyStm -> do - liftIO $ DB.copy_ conn $ DB.Query (encodeUtf8 copyStm) - -- Unlike every other SqlPiece, COPY does not fit into a single constructor. - -- For counting it doesn't matter if we count `COPY FROM` or the ending of `COPY`. - -- For skipping it doesn't matter either which one we count, as we'll skip N countable - -- statements when necessary and start from N+1, whatever that is. - -- Since the txnStatus here is TransActive (query ongoing), it is simpler - -- if we count the ending of `COPY`, as after that the status is TransIdle, so - -- callers have one fewer state to deal with. - applied - CopyFromStdinRows copyRows -> do - liftIO $ DB.putCopyData conn $ encodeUtf8 copyRows - applied - CopyFromStdinEnd _ -> do - -- postgresql-simple's putCopyEnd throws a weird exception, so we resort to libpq internals - -- This is terrible and we should really be using postgresql-simple as much as possible. TODO: Try again. - liftIO $ PGInternal.withConnection conn $ \pqconn -> do - copyResult <- PQ.putCopyEnd pqconn Nothing - mmsg <- consumeResults pqconn Nothing - case copyResult of - PQ.CopyInOk -> do - -- CopyInOk is a possible result even when COPY fails.. oh why? - case mmsg of - Nothing -> - StatementApplied <$> PQ.transactionStatus pqconn - Just msg -> - pure $ StatementErred $ SqlStatementException - "" - DB.SqlError { DB.sqlState = "" - , DB.sqlExecStatus = DB.FatalError - , DB.sqlErrorMsg = msg - , DB.sqlErrorDetail = "" - , DB.sqlErrorHint = "" - } - _ -> do - pure $ StatementErred $ SqlStatementException - "" - DB.SqlError { DB.sqlState = "" - , DB.sqlExecStatus = DB.FatalError - , DB.sqlErrorMsg = fromMaybe "" mmsg - , DB.sqlErrorDetail = "" - , DB.sqlErrorHint = "" - } + CommentPiece _ -> applied + WhiteSpacePiece _ -> applied + BeginTransaction s -> singleStatement_ conn s + CommitTransaction s -> singleStatement_ conn s + RollbackTransaction s -> singleStatement_ conn s + OtherSqlPiece s -> singleStatement_ conn s + CopyFromStdinStatement copyStm -> + liftIO + $ handleCopyErrors (Just copyStm) + $ DB.copy_ conn (DB.Query (encodeUtf8 copyStm)) + >> applied + CopyFromStdinRows copyRows -> + -- I haven't seen errors coming from sending rows to the server yet; it seems they only happen + -- on `putCopyEnd` or the initial `copy_`. Still, let's be cautious and prepare for it. + liftIO + $ handleCopyErrors Nothing + $ DB.putCopyData conn (encodeUtf8 copyRows) + >> applied + CopyFromStdinEnd _ -> + liftIO $ handleCopyErrors Nothing $ DB.putCopyEnd conn >> applied where - -- `consumeResults` taken from postgresql-simple's codebase and modified to return the error message - consumeResults pqconn !mmsg = do - mres <- PQ.getResult pqconn - mmsgAfter <- PQ.errorMessage pqconn - let mmsgFinal = - mmsg <|> (if mmsgAfter == Just "" then Nothing else mmsgAfter) - case mres of - Nothing -> pure mmsgFinal - Just _ -> consumeResults pqconn mmsgFinal + handleCopyErrors (fromMaybe "" -> stmt) = handleJust + (\(e :: SomeException) -> + -- In my tests, COPY errors are of type `IOException` for `putCopyEnd` and of type `SqlError` for `copy_`. + -- Sadly the ones of type `IOException` don't contain e.g. error codes, but at least their message shows the failed statement. + -- We transform those into `SqlError` here since all of the codebase is prepared for that. + case () of + () + | Just sqlEx <- fromException @DB.SqlError e -> Just sqlEx + | Just ioEx <- fromException @IOException e -> Just DB.SqlError + { DB.sqlState = "" + , DB.sqlExecStatus = DB.FatalError + , DB.sqlErrorMsg = encodeUtf8 $ Text.pack $ show ioEx + , DB.sqlErrorDetail = "" + , DB.sqlErrorHint = "" + } + | otherwise -> Nothing -- Let it blow up if we don't expect it + ) + (pure . StatementErred . SqlStatementException stmt) applied :: MonadUnliftIO n => n StatementApplied applied = if isCountableRunnable p then StatementApplied <$> txnStatus conn From 0d162708457f920fd1068fe7c630ede3a8d14c60 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Sun, 31 Mar 2024 11:06:47 -0300 Subject: [PATCH 26/28] Amending documentation to include retry examples --- README.md | 38 ++++--- docs/SQL-MIGRATIONS.md | 138 ++++++++++++++++++----- src/Codd/Internal/MultiQueryStatement.hs | 30 +++-- 3 files changed, 153 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index d28f2ff4..b2d2976e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,21 @@ # What is Codd? + +- [What is Codd?](#what-is-codd) + - [Installing Codd](#installing-codd) + - [1. Self-contained executable](#1-self-contained-executable) + - [2. Nix](#2-nix) + - [3. Docker](#3-docker) + - [Get codd up and running in 15 minutes](#get-codd-up-and-running-in-15-minutes) + - [Start using codd with an existing database](#start-using-codd-with-an-existing-database) + - [Safety considerations](#safety-considerations) + - [Frequently Asked Questions](#frequently-asked-questions) + - [Why does taking and restoring a database dump affect my expected codd schema?](#why-does-taking-and-restoring-a-database-dump-affect-my-expected-codd-schema) + - [Will codd run out of memory or system resources if my migration files are too large or too many?](#will-codd-run-out-of-memory-or-system-resources-if-my-migration-files-are-too-large-or-too-many) + - [Will codd handle SQL errors nicely?](#will-codd-handle-sql-errors-nicely) + + Codd is a CLI tool that applies plain SQL migrations atomically (when PostgreSQL allows it) and includes schema equality checks that practically ensure your development database's schema matches the database schema in every other environment, checking table columns' names, types, order, available functions, roles, table privileges, object ownership, row security policies, database encoding [and much more](/docs/DATABASE-EQUALITY.md). These schema equality checks happen automatically; you only need to write .sql files and `codd add migration-file.sql` them. No configuration files, JSON, or YAML; just 3 environment variables and .sql files and you can use codd. @@ -89,23 +104,6 @@ Automatic merge failed; fix conflicts and then commit the result. ¹ Some SQL must run without explicit transactions; single-transaction application only works when none of that is present. ² There can be false positives and false negatives in some cases. - -- [What is Codd?](#what-is-codd) - - [Installing Codd](#installing-codd) - - [1. Self-contained executable](#1-self-contained-executable) - - [2. Nix](#2-nix) - - [3. Docker](#3-docker) - - [Get codd up and running in 15 minutes](#get-codd-up-and-running-in-15-minutes) - - [Start using codd with an existing database](#start-using-codd-with-an-existing-database) - - [Safety considerations](#safety-considerations) - - [Frequently Asked Questions](#frequently-asked-questions) - - - - ## Installing Codd ### 1. Self-contained executable @@ -205,3 +203,9 @@ We recommend following these instructions closely to catch as many possible issu 1. ### Why does taking and restoring a database dump affect my expected codd schema? `pg_dump` does not dump all of the schema state that codd checks. A few examples include (at least with PG 13) role related state, the database's default transaction isolation level and deferredness, among possibly others. So check that it isn't the case that you get different schemas when that happens. We recommend using `pg_dumpall` to preserve more when possible instead. If you've checked with `psql` and everything looks to be the same please report a bug in codd. + +2. ### Will codd run out of memory or system resources if my migration files are too large or too many? + Most likely not. Codd reads migrations from disk in streaming fashion and keeps in memory only a single statement at a time (modulo the garbage collector, but our CI-run benchmarks show no more than 4MB RAM are ever used on my x86 linux machine even for 1 million statements). For `COPY` statements, codd uses a constant-size buffer to stream read the contents to achieve bounded memory usage while staying fast. Also, codd does not open more than one migration file simultaneously to stay well below typical file handle limits imposed by the shell or operating system, and that is also assured through an automated test that runs in CI with `strace`. + +3. ### Will codd handle SQL errors nicely? + Codd tries to do the "best possible thing" even in rather unusual situations. It will retry sets of consecutive in-txn migrations atomically so as not to leave your database in an intermediary state. Even for no-txn migrations, codd will retry the failing statement instead of entire migrations, and _even_ if you write explicit `BEGIN..COMMIT` sections in no-txn migrations, codd will be smart enough to retry from the `BEGIN` if a statement inside that section fails. See the [retry examples](/docs/SQL-MIGRATIONS.md#examples) if you're interested. What codd currently cannot handle well is having its connection killed by an external agent while it's applying a _no-txn_ migration, a scenario which should be extremely rare. Basically, we hope you should be able to write your migrations however you want and rely comfortably on the fact that codd should do the reasonable thing when handling errors. diff --git a/docs/SQL-MIGRATIONS.md b/docs/SQL-MIGRATIONS.md index b2c00fb3..c67c195c 100644 --- a/docs/SQL-MIGRATIONS.md +++ b/docs/SQL-MIGRATIONS.md @@ -2,33 +2,32 @@ Most of the time, you'll be able to simply add migrations and things should work well. However, there are cases where things get tricky, so this guide should help if you're in such a situation. - + - [SQL Migrations and Codd](#sql-migrations-and-codd) - - [Configurability](#configurability) - - [Migrations that can't run in transactions](#migrations-that-cant-run-in-transactions) - - [Migrations that need to run in a custom database connection](#migrations-that-need-to-run-in-a-custom-database-connection) - - [Templating environment variables into migrations](#templating-environment-variables-into-migrations) - - [Special care with no-txn or custom-connection migrations](#special-care-with-no-txn-or-custom-connection-migrations) - - [Unsupported SQL inside migrations](#unsupported-sql-inside-migrations) - - [Retry Policies](#retry-policies) - - - + - [Configurability](#configurability) + - [Migrations that can't run in transactions](#migrations-that-cant-run-in-transactions) + - [Migrations that need to run in a custom database connection](#migrations-that-need-to-run-in-a-custom-database-connection) + - [Templating environment variables into migrations](#templating-environment-variables-into-migrations) + - [Special care with no-txn or custom-connection migrations](#special-care-with-no-txn-or-custom-connection-migrations) + - [Unsupported SQL inside migrations](#unsupported-sql-inside-migrations) + - [Retry Policies](#retry-policies) + - [Examples:](#examples) + - [Set of consecutive in-txn migrations](#set-of-consecutive-in-txn-migrations) + - [No-txn migration failing in the middle](#no-txn-migration-failing-in-the-middle) + - [No-txn migration with statement failing inside an explicit BEGIN..COMMIT section of the migration](#no-txn-migration-with-statement-failing-inside-an-explicit-begincommit-section-of-the-migration) + ## Configurability -_Codd_ handles plain SQL migrations, but allows some level of configurability per +Codd handles plain SQL migrations, but allows some level of configurability per migration by parsing comments at the top of migrations, i.e. comments with special syntax before any statements. -There are currently three different special comments that are meaningful to _codd_, +There are currently three different special comments that are meaningful to codd, which we describe in the next three subsections. ### Migrations that can't run in transactions -Just add `-- codd: no-txn` before any statements to your SQL file and _codd_ will run +Just add `-- codd: no-txn` before any statements to your SQL file and codd will run the statements in your migration outside a transaction. Example: @@ -39,10 +38,14 @@ ALTER TYPE experience ADD VALUE 'intern' BEFORE 'junior'; UPDATE employee SET employee_experience='intern'; ```` +Avoid these migrations as much as possible because they are dangerous. Read +[Special care with no-txn or custom-connection migrations](#special-care-with-no-txn-or-custom-connection-migrations) +for more information why. + ### Migrations that need to run in a custom database connection Just add `-- codd-connection: postgres://...` (or in keyword value pairs format) before -any statements to your SQL file and _codd_ will use that connection string to apply that +any statements to your SQL file and codd will use that connection string to apply that migration. The format is the same as the one used for the `CODD_CONNECTION` environment variable. @@ -70,10 +73,10 @@ and interpolate their values by referencing them through `${VAR1}`, `${VAR2}` et You can reference environment variables from anywhere in the migration, including other `-- codd` directives in the header. -Please be aware that _codd_'s environment variable templating is extremely primitive. Do not +Please be aware that codd's environment variable templating is extremely primitive. Do not put entire statements inside your environment variables. Also, if one variable contains `${OTHERVAR}` inside, then there is a chance it can be doubly replaced depending on the internal -order _codd_ applies its replacements. This order is undefined, so don't rely on that behaviour. +order codd applies its replacements. This order is undefined, so don't rely on that behaviour. No escaping is performed, that is on you as well. Example: @@ -89,13 +92,13 @@ CREATE DATABASE "${PGDATABASE}" TEMPLATE template0 OWNER ${PGUSER} ENCODING UTF8 ## Special care with no-txn or custom-connection migrations -By using `no-txn` migrations or migrations with a custom connection string, you're taking great risk with the possibility of a migration failing when deploying and leaving the database in an intermediary state that is not compatible with the previously deployed application nor the to-be-deployed one. It is recommended that you avoid these at great costs and plan carefully when adding even one of them. +By using `no-txn` migrations or migrations with a custom connection string, you're taking great risk with the possibility of a migration failing when deploying and leaving the database in an intermediary state that is not compatible with the previously deployed application nor the to-be-deployed one. Codd can resume application from the precise statement that last failed (see [Retry Policies](#retry-policies) for an example) but it is recommended that you avoid these at great costs and plan carefully when adding even one of them. -_Codd_ will always run each block of consecutive `in-txn` migrations with the same connection string in a single transaction. If there are `in-txn` migrations intertwined with `no-txn` migrations or migrations with custom connection strings, every block of consecutive `in-txn` and `same-connection-string` migrations runs in the same transaction, but other migrations run separately. Also, if even one `no-txn` migration or one migration with a custom connection string exists, _codd_ will apply and commit every pending migration and verify schemas only after that. +Codd will always run each block of consecutive `in-txn` migrations with the same connection string in a single transaction. If there are `in-txn` migrations intertwined with `no-txn` migrations or migrations with custom connection strings, every block of consecutive `in-txn` and `same-connection-string` migrations runs in the same transaction, but other migrations run separately. Also, if even one `no-txn` migration or one migration with a custom connection string exists, codd will apply and commit every pending migration and verify schemas only after that. ## Unsupported SQL inside migrations -_Codd_ does not support all SQL statements inside a migration. This is an incomplete list of things that we know _codd_ does not support. +Codd does not support all SQL statements inside a migration. This is an incomplete list of things that we know codd does not support. 1. `COPY FROM STDIN` is supported but other forms of `COPY` are not. 2. psql's meta commands, including `\COPY`, are not supported. @@ -104,10 +107,91 @@ If you find a problem, please let us know. ## Retry Policies -A migration can fail for a variety of reasons, including unhandled data and serializability errors when using Serializable transactions. For this reason, _codd_ comes with a default Retry Policy of 3 tries (at most 2 retries), the first retry attempt 1 second after the first fails, and the second retry attempt 2 seconds after the second one fails. This can be configured with the `CODD_RETRY_POLICY` environment variable as in [CONFIGURATION.md](CONFIGURATION.md). Important observations are: +A migration can fail for a variety of reasons, including unhandled data and serializability errors when using Serializable transactions. For this reason, codd comes with a default Retry Policy of 3 tries (at most 2 retries), the first retry attempt 1 second after the first fails, and the second retry attempt 2 seconds after the second one fails. This can be configured with the `CODD_RETRY_POLICY` environment variable as in [CONFIGURATION.md](CONFIGURATION.md). Important observations are: -- When faced with a block of consecutive `in-txn` migrations, _codd_ retries the blocks whole. - - For these, the retry count and intervals are "reset" for each block. -- For `no-txn` migrations, _codd_ retries individual statements, not even entire migrations. +- When faced with a set of consecutive `in-txn` migrations, codd retries all the pending migrations in the block. + - For these, the retry count and intervals are "reset" for each block that is retried. +- For `no-txn` migrations, codd retries individual statements, not even entire migrations. - Otherwise retries would lead to possibly inconsistent data. - The retry count and intervals are also "reset" for each statement. + - Codd stores how many statements it applied for a no-txn migration and is able to resume application from the exact statement that failed in the no-txn migration. + +### Examples: + +#### Set of consecutive in-txn migrations + +This shows how codd will retry all consecutive in-txn migrations atomically. + +```` +Checking if database codd-test-db is accessible with the configured connection string... (waiting up to 5sec) +Looking for pending migrations... [2 found] +BEGINning transaction +Applying 2000-01-01-00-00-00-create-table-with-unique-id.sql (0.94ms) +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +ROLLBACKed transaction +Warn: Waiting 1ms before next try +BEGINning transaction +Applying 2000-01-01-00-00-00-create-table-with-unique-id.sql (0.56ms) +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +ROLLBACKed transaction +Warn: Waiting 2ms before next try +BEGINning transaction +Applying 2000-01-01-00-00-00-create-table-with-unique-id.sql (0.60ms) +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +ROLLBACKed transaction +Error: Failed after all configured retries. Giving up. +```` + +#### No-txn migration failing in the middle + +This is how codd will retry if a migration has a `COPY` statement in a no-txn migration that fails after another statement in that same migration: + +```` +Checking if database codd-test-db is accessible with the configured connection string... (waiting up to 5sec) +Looking for pending migrations... [2 found] +Applying 2000-01-01-00-00-00-create-table-with-unique-id.sql (0.95ms) +Applying 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 1 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql, the 2nd failed to be applied. Codd will resume the next retry or codd up from it +Warn: Waiting 1ms before next try +Warn: Skipping the first 1 SQL statements, which have already been applied, and starting application from the 2nd statement +Applying 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 1 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql, the 2nd failed to be applied. Codd will resume the next retry or codd up from it +Warn: Waiting 2ms before next try +Warn: Skipping the first 1 SQL statements, which have already been applied, and starting application from the 2nd statement +Applying 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 1 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-not-in-explicit-transaction.sql, the 2nd failed to be applied. Codd will resume the next retry or codd up from it +Error: Failed after all configured retries. Giving up. +```` + +#### No-txn migration with statement failing inside an explicit BEGIN..COMMIT section of the migration + +You are free to add your explicit `BEGIN..COMMIT` sections inside your no-txn migrations to minimize the number of possible intermediary states should application fail, and codd is smart enough to retry them appropriately: + +```` +Checking if database codd-test-db is accessible with the configured connection string... (waiting up to 5sec) +Looking for pending migrations... [2 found] +Applying 2000-01-01-00-00-00-create-table-with-unique-id.sql (0.87ms) +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 2 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql, the 3rd failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the 2nd statement in this migration +ROLLBACKed last explicitly started transaction +Warn: Waiting 1ms before next try +Warn: Skipping the first 1 SQL statements, which have already been applied, and starting application from the 2nd statement +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 2 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql, the 3rd failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the 2nd statement in this migration +ROLLBACKed last explicitly started transaction +Warn: Waiting 2ms before next try +Warn: Skipping the first 1 SQL statements, which have already been applied, and starting application from the 2nd statement +Applying 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql [failed] +Error: SqlStatementException {sqlStatement = "", psimpleError = SqlError {sqlState = "", sqlExecStatus = FatalError, sqlErrorMsg = "duplicate key value violates unique constraint \"somedata_id_key\"\nDETAIL: Key (id)=(1) already exists.\nCONTEXT: COPY somedata, line 1\n)", sqlErrorDetail = "", sqlErrorHint = ""}} +Error: After applying 2 statements from no-txn migration 2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql, the 3rd failed to be applied. Since this failed statement is inside an explicitly started transaction in the migration, codd will resume the next retry or codd up from the last BEGIN-like statement, which is the 2nd statement in this migration +ROLLBACKed last explicitly started transaction +Error: Failed after all configured retries. Giving up. +```` diff --git a/src/Codd/Internal/MultiQueryStatement.hs b/src/Codd/Internal/MultiQueryStatement.hs index 330b196c..80706c0d 100644 --- a/src/Codd/Internal/MultiQueryStatement.hs +++ b/src/Codd/Internal/MultiQueryStatement.hs @@ -156,18 +156,30 @@ runSingleStatementInternal_ conn p = case p of (\(e :: SomeException) -> -- In my tests, COPY errors are of type `IOException` for `putCopyEnd` and of type `SqlError` for `copy_`. -- Sadly the ones of type `IOException` don't contain e.g. error codes, but at least their message shows the failed statement. + -- They also _sometimes_ contain an internal postgresql-simple error concatenated to the actual database error, which isn't great, so we remove it if it's there. + -- We should file a bug report to postgresql-simple. -- We transform those into `SqlError` here since all of the codebase is prepared for that. case () of () - | Just sqlEx <- fromException @DB.SqlError e -> Just sqlEx - | Just ioEx <- fromException @IOException e -> Just DB.SqlError - { DB.sqlState = "" - , DB.sqlExecStatus = DB.FatalError - , DB.sqlErrorMsg = encodeUtf8 $ Text.pack $ show ioEx - , DB.sqlErrorDetail = "" - , DB.sqlErrorHint = "" - } - | otherwise -> Nothing -- Let it blow up if we don't expect it + | Just sqlEx <- fromException @DB.SqlError e + -> Just sqlEx + | Just ioEx <- fromException @IOException e + -> let fullError = Text.pack $ show ioEx + in + Just DB.SqlError + { DB.sqlState = "" + , DB.sqlExecStatus = DB.FatalError + , DB.sqlErrorMsg = + encodeUtf8 + $ fromMaybe fullError + $ Text.stripPrefix + "user error (Database.PostgreSQL.Simple.Copy.putCopyEnd: failed to parse command status\nConnection error: ERROR: " + fullError + , DB.sqlErrorDetail = "" + , DB.sqlErrorHint = "" + } + | otherwise + -> Nothing -- Let it blow up if we don't expect it ) (pure . StatementErred . SqlStatementException stmt) applied :: MonadUnliftIO n => n StatementApplied From 6ba3513597045c2fb93b845ca05b1047f18fc007 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Mon, 1 Apr 2024 16:57:07 -0300 Subject: [PATCH 27/28] Add test for in-txn migrations failing on COMMIT and fix this scenario --- src/Codd/Internal.hs | 96 ++++++++++--------- test/DbDependentSpecs/RetrySpec.hs | 53 ++++++++++ ...1-00-00-00-create-table-with-unique-id.sql | 7 ++ ...-duplicate-inside-explicit-transaction.sql | 6 ++ 4 files changed, 119 insertions(+), 43 deletions(-) create mode 100644 test/migrations/in-txn-application-error-on-COMMIT/2000-01-01-00-00-00-create-table-with-unique-id.sql create mode 100644 test/migrations/in-txn-application-error-on-COMMIT/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql diff --git a/src/Codd/Internal.hs b/src/Codd/Internal.hs index de9dbb24..38d08c23 100644 --- a/src/Codd/Internal.hs +++ b/src/Codd/Internal.hs @@ -5,7 +5,7 @@ import Prelude hiding ( readFile ) import Codd.Environment ( CoddSettings(..) ) import Codd.Internal.MultiQueryStatement - ( SqlStatementException + ( SqlStatementException(..) , StatementApplied(..) , multiQueryStatement_ , singleStatement_ @@ -495,7 +495,6 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn fromMaybe defaultConnInfo (blockCustomConnInfo block) (_, conn) <- openConn cinfo - -- Create codd_schema and flush previously applied migrations if possible. We do this here -- since we expect _some_ of the migration blocks to use the default connection string, and after -- that we can register migrations were applied. @@ -527,8 +526,8 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn actAfterResult <- case singleInTxnBlockResult of Just result -> pure result Nothing -> do - -- It is possible to have only non-default-connection-string migrations. - -- In that case, we assume the default-connection-string will be valid after those migrations + -- It is possible to have only non-default-connection-string migrations, or to have in-txn migrations running on a non-default database last. + -- In those cases, we assume the default-connection-string will be valid after those migrations -- and use that to register all applied migrations and then run "actionAfter". (_, defaultConn) <- openConn defaultConnInfo createCoddSchemaAndFlushPendingMigrations @@ -551,46 +550,57 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> DiffTime -> MigrationApplicationStatus -> txn () - ) -- ^ Using the `txn` monad is right: registering applied migrations happens in the same connection that applies migrations if that is the default-database connection, and should be (but this is not yet implemented) scheduled to be inserted into codd_schema.sql_migrations in the first future opportunity, meaning when this function is called it's merely an in-memory operation, which can also run in `txn`. + ) -- ^ Using the `txn` monad is right: registering applied migrations happens in the same connection that applies migrations if that is a default-database connection, and otherwise will be scheduled to be inserted into codd_schema.sql_migrations in the first future opportunity, meaning when this function is called it's merely an in-memory operation, which can also run in `txn`. -> m b - runInTxnBlock act conn migBlock registerMig = do - res <- + runInTxnBlock act conn migBlock registerMig = -- Naturally, we retry entire in-txn block transactions on error, not individual statements or individual migrations - retryFold - retryPolicy - (\previousBlock RetryIteration { tryNumber } -> - if tryNumber == 0 - then pure previousBlock - else reReadBlock previousBlock - ) - migBlock - (\case - Left lastEx -> do - logError - "Failed after all configured retries. Giving up." - throwIO lastEx - Right ret -> pure ret - ) - $ \blockFinal -> do - logInfo "BEGINning transaction" - withTransaction txnIsolationLvl conn $ do - let hoistedMigs :: NonEmpty (AddedSqlMigration txn) - hoistedMigs = hoistAddedSqlMigration lift - <$> inTxnMigs blockFinal - errorOrOk <- - forMExcept hoistedMigs $ applySingleMigration - conn - registerMig - NoSkipStatements - case errorOrOk of - Left e -> do - liftIO $ DB.rollback conn - logInfo - "ROLLBACKed transaction" - pure $ Left e - Right () -> Right <$> act conn - logInfo "COMMITed transaction" - pure res + retryFold + retryPolicy + (\previousBlock RetryIteration { tryNumber } -> + if tryNumber == 0 + then pure previousBlock + else reReadBlock previousBlock + ) + migBlock + (\case + Left lastEx -> do + logError + "Failed after all configured retries. Giving up." + throwIO lastEx + Right ret -> pure ret + ) + $ \blockFinal -> do + logInfo "BEGINning transaction" + withTransaction txnIsolationLvl conn $ do + let hoistedMigs :: NonEmpty (AddedSqlMigration txn) + hoistedMigs = hoistAddedSqlMigration lift + <$> inTxnMigs blockFinal + errorOrOk <- forMExcept hoistedMigs $ applySingleMigration + conn + registerMig + NoSkipStatements + case errorOrOk of + Left e -> do + liftIO $ DB.rollback conn + logInfo + "ROLLBACKed transaction" + pure $ Left e + Right () -> do + res <- act conn + -- Also catch exceptions on COMMIT so they're treated as a retriable error + commitE <- try $ liftIO $ DB.execute_ conn + "COMMIT" + case commitE of + Left e -> do + let sqlEx = + SqlStatementException "COMMIT" e + logError (Text.pack $ show sqlEx) + logError "COMMIT failed" + pure $ Left sqlEx + Right _ -> do + logInfo + "COMMITed transaction" + pure $ Right res runNoTxnMig :: DB.Connection @@ -600,7 +610,7 @@ applyCollectedMigrations actionAfter CoddSettings { migsConnString = defaultConn -> DiffTime -> MigrationApplicationStatus -> m () - ) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. See reasons presented there. + ) -- ^ This is `m` instead of `txn` and is correct, unlike in `runInTxnBlock`. The reason is that there is no transaction opened by codd for no-txn migrations, and so the function that registers applied migrations needs to start its own transaction. -> m (Maybe x) runNoTxnMig conn mig registerMig = do retryFold diff --git a/test/DbDependentSpecs/RetrySpec.hs b/test/DbDependentSpecs/RetrySpec.hs index 02b57298..5bbb4f3b 100644 --- a/test/DbDependentSpecs/RetrySpec.hs +++ b/test/DbDependentSpecs/RetrySpec.hs @@ -96,6 +96,59 @@ spec = do length (filter ("COMMIT" `Text.isInfixOf`) logs) `shouldBe` 0 + aroundFreshDatabase + $ it + "In-txn migrations with failure in COMMIT are handled nicely" + $ \dbInfo0 -> do + let + dbInfo = dbInfo0 + { retryPolicy = RetryPolicy + 2 + (ExponentialBackoff (realToFrac @Double 0.001) + ) + } + logsmv <- newMVar [] + runMVarLogger + logsmv + (applyMigrationsNoCheck + dbInfo + { sqlMigrations = + [ "test/migrations/in-txn-application-error-on-COMMIT" + ] + } + Nothing + testConnTimeout + (const $ pure ()) + ) + `shouldThrow` (\(e :: SqlStatementException) -> + "duplicate key" + `List.isInfixOf` show e + ) + nonBootstrapAppliedMigs :: [(String, Int, Bool)] <- + withConnection + (migsConnString dbInfo) + testConnTimeout + (\conn -> DB.query + conn + "SELECT name, num_applied_statements, no_txn_failed_at IS NULL from codd_schema.sql_migrations order by id OFFSET 1 -- Skip the bootstrap migration" + () + ) + nonBootstrapAppliedMigs `shouldBe` [] + logs <- readMVar logsmv + length (filter ("ROLLBACK" `Text.isInfixOf`) logs) + `shouldBe` 0 -- Instead of ROLLBACK, we see "COMMIT failed" messages + length (filter ("COMMIT failed" `Text.isInfixOf`) logs) + `shouldBe` 3 + length + (filter + ("duplicate key value violates unique constraint" `Text.isInfixOf` + ) + logs + ) + `shouldBe` 3 + length (filter ("BEGIN" `Text.isInfixOf`) logs) + `shouldBe` 3 + aroundFreshDatabase $ it "No-txn migration with failure in statement not in explicit transaction block retries from the right place" diff --git a/test/migrations/in-txn-application-error-on-COMMIT/2000-01-01-00-00-00-create-table-with-unique-id.sql b/test/migrations/in-txn-application-error-on-COMMIT/2000-01-01-00-00-00-create-table-with-unique-id.sql new file mode 100644 index 00000000..1e46d6ca --- /dev/null +++ b/test/migrations/in-txn-application-error-on-COMMIT/2000-01-01-00-00-00-create-table-with-unique-id.sql @@ -0,0 +1,7 @@ +CREATE TABLE somedata (id INT NOT NULL, UNIQUE(id) DEFERRABLE INITIALLY DEFERRED); + +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. diff --git a/test/migrations/in-txn-application-error-on-COMMIT/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql b/test/migrations/in-txn-application-error-on-COMMIT/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql new file mode 100644 index 00000000..87921fe5 --- /dev/null +++ b/test/migrations/in-txn-application-error-on-COMMIT/2001-01-01-00-00-00-insert-duplicate-inside-explicit-transaction.sql @@ -0,0 +1,6 @@ +INSERT INTO somedata(id) VALUES (4); +COPY somedata FROM STDIN WITH (FORMAT csv); +1 +2 +3 +\. From 4dc0983d64c20c019ee54de936a946fd30ebdd41 Mon Sep 17 00:00:00 2001 From: Marcelo Zabani Date: Tue, 2 Apr 2024 16:52:26 -0300 Subject: [PATCH 28/28] Remove `--no-apply` from `codd add` --- app/Main.hs | 19 ++++--------- src/Codd/AppCommands/AddMigration.hs | 40 ++++++++++------------------ 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 98ecf10c..de75c961 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,9 +1,7 @@ module Main where import qualified Codd -import Codd.AppCommands.AddMigration ( AddMigrationOptions(..) - , addMigration - ) +import Codd.AppCommands.AddMigration ( addMigration ) import Codd.AppCommands.VerifySchema ( verifySchema ) import Codd.AppCommands.WriteSchema ( WriteSchemaOpts(..) , writeSchema @@ -24,7 +22,7 @@ import Options.Applicative import qualified System.IO as IO import qualified Text.Read as Text -data Cmd = Up (Maybe Codd.VerifySchemas) DiffTime | Add AddMigrationOptions (Maybe FilePath) (LogLevel -> Bool) SqlFilePath | WriteSchema WriteSchemaOpts | VerifySchema (LogLevel -> Bool) Bool +data Cmd = Up (Maybe Codd.VerifySchemas) DiffTime | Add (Maybe FilePath) (LogLevel -> Bool) SqlFilePath | WriteSchema WriteSchemaOpts | VerifySchema (LogLevel -> Bool) Bool cmdParser :: Parser Cmd cmdParser = hsubparser @@ -101,13 +99,7 @@ upParser = addParser :: Parser Cmd addParser = Add - <$> (AddMigrationOptions <$> switch - ( long "no-apply" - <> help - "Do not apply any pending migrations, including the one being added." - ) - ) - <*> optionalStrOption + <$> optionalStrOption ( long "dest-folder" <> help "Specify the folder path where the .sql migration shall be put. If unspecified, the first folder in the 'CODD_MIGRATION_DIRS' environment variable will be used" @@ -191,9 +183,8 @@ doWork dbInfo (Up mCheckSchemas connectTimeout) = Nothing connectTimeout checkSchemas -doWork dbInfo (Add addOpts destFolder verbosity fp) = - runCoddLoggerLevelFilter verbosity - $ addMigration dbInfo addOpts destFolder fp +doWork dbInfo (Add destFolder verbosity fp) = + runCoddLoggerLevelFilter verbosity $ addMigration dbInfo destFolder fp doWork dbInfo (VerifySchema verbosity fromStdin) = runCoddLoggerLevelFilter verbosity $ verifySchema dbInfo fromStdin doWork dbInfo (WriteSchema opts) = writeSchema dbInfo opts diff --git a/src/Codd/AppCommands/AddMigration.hs b/src/Codd/AppCommands/AddMigration.hs index a155b9a2..8d0224cd 100644 --- a/src/Codd/AppCommands/AddMigration.hs +++ b/src/Codd/AppCommands/AddMigration.hs @@ -1,6 +1,5 @@ module Codd.AppCommands.AddMigration - ( AddMigrationOptions(..) - , addMigration + ( addMigration ) where import qualified Codd @@ -15,7 +14,6 @@ import Codd.Internal ( delayedOpenStreamFile ) import Codd.Logging ( CoddLogger , logError - , logInfo , logInfoAlways ) import Codd.Parsing ( EnvVars @@ -51,19 +49,14 @@ import UnliftIO.Exception ( SomeException ) import UnliftIO.Resource ( runResourceT ) -newtype AddMigrationOptions = AddMigrationOptions - { dontApply :: Bool - } - addMigration :: forall m . (MonadUnliftIO m, CoddLogger m, MonadThrow m, EnvVars m, NotInTxn m) => CoddSettings - -> AddMigrationOptions -> Maybe FilePath -> SqlFilePath -> m () -addMigration dbInfo@Codd.CoddSettings { onDiskReps, migsConnString, sqlMigrations } AddMigrationOptions { dontApply } destFolder sqlFp@(SqlFilePath fp) +addMigration dbInfo@Codd.CoddSettings { onDiskReps, migsConnString, sqlMigrations } destFolder sqlFp@(SqlFilePath fp) = do finalDir <- case (destFolder, sqlMigrations) of (Just f, _) -> pure f @@ -131,25 +124,20 @@ addMigration dbInfo@Codd.CoddSettings { onDiskReps, migsConnString, sqlMigration finalMigFile <- timestampAndCopyMigrationFile sqlFp finalDir addE <- try $ do - unless dontApply $ do - databaseSchemas <- Codd.applyMigrationsNoCheck - dbInfo - Nothing - (secondsToDiffTime 5) - (readRepresentationsFromDbWithSettings dbInfo) - persistRepsToDisk databaseSchemas onDiskRepsDir + databaseSchemas <- Codd.applyMigrationsNoCheck + dbInfo + Nothing + (secondsToDiffTime 5) + (readRepresentationsFromDbWithSettings dbInfo) + persistRepsToDisk databaseSchemas onDiskRepsDir - logInfoAlways - $ "New migration applied and added to " - <> Text.pack finalMigFile - logInfoAlways - $ "Updated expected DB schema representations in the " - <> Text.pack onDiskRepsDir - <> " folder" - when dontApply - $ logInfo - $ "Migration was NOT applied, but was added to " + logInfoAlways + $ "New migration applied and added to " <> Text.pack finalMigFile + logInfoAlways + $ "Updated expected DB schema representations in the " + <> Text.pack onDiskRepsDir + <> " folder" case addE of Right _ -> do -- Remove original file