接前问,初步学习pg_control文件之十一,再来看下面这个
XLogRecPtr minRecoveryPoint;
看其注释:
* minRecoveryPoint is updated to the latest replayed LSN whenever we
* flush a data change during archive recovery. That guards against
* starting archive recovery, aborting it, and restarting with an earlier
* stop location. If we've already flushed data changes from WAL record X
* to disk, we mustn't start up until we reach X again. Zero when not
* doing archive recovery.
看来,是为了防止出现重复执行重做日志...它强调的是在 archive recovery的情况下。
启动时,如果处于Recovery状态,则要进行设置:
/*
* This must be called ONCE during postmaster or standalone-backend startup
*/
void
StartupXLOG(void)
{
…
CheckPoint checkPoint;
…
/*
* Read control file and check XLOG status looks valid.
* Note: in most control paths, *ControlFile is already valid and we need
* not do ReadControlFile() here, but might as well do it to be sure.
*/
ReadControlFile(); if (ControlFile->state < DB_SHUTDOWNED ||
ControlFile->state > DB_IN_PRODUCTION ||
!XRecOffIsValid(ControlFile->checkPoint.xrecoff))
ereport(FATAL,
(errmsg("control file contains invalid data")));
… /* REDO */
if (InRecovery)
{
…
if (InArchiveRecovery)
{
/* initialize minRecoveryPoint if not set yet */
if (XLByteLT(ControlFile->minRecoveryPoint, checkPoint.redo))
ControlFile->minRecoveryPoint = checkPoint.redo;
}
…
}
…
}
在xlogredo函数中得到处理:
/*
* XLOG resource manager's routines
*
* Definitions of info values are in include/catalog/pg_control.h, though
* not all record types are related to control file updates.
*/
void
xlog_redo(XLogRecPtr lsn, XLogRecord *record)
{
…
if (info == XLOG_NEXTOID)
{
…
}
…
else if (info == XLOG_BACKUP_END)
{
…
if (XLByteEQ(ControlFile->backupStartPoint, startpoint))
{
/*
* We have reached the end of base backup, the point where
* pg_stop_backup() was done. The data on disk is now consistent.
* Reset backupStartPoint, and update minRecoveryPoint to make
* sure we don't allow starting up at an earlier point even if
* recovery is stopped and restarted soon after this.
*/
…
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); if (XLByteLT(ControlFile->minRecoveryPoint, lsn))
ControlFile->minRecoveryPoint = lsn;
MemSet(&ControlFile->backupStartPoint, , sizeof(XLogRecPtr));
UpdateControlFile(); LWLockRelease(ControlFileLock);
}
}
else if (info == XLOG_PARAMETER_CHANGE)
{
…
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
ControlFile->MaxConnections = xlrec.MaxConnections;
ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts;
ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
ControlFile->wal_level = xlrec.wal_level; /*
* Update minRecoveryPoint to ensure that if recovery is aborted, we
* recover back up to this point before allowing hot standby again.
* This is particularly important if wal_level was set to 'archive'
* before, and is now 'hot_standby', to ensure you don't run queries
* against the WAL preceding the wal_level change. Same applies to
* decreasing max_* settings.
*/
minRecoveryPoint = ControlFile->minRecoveryPoint;
if ((minRecoveryPoint.xlogid != || minRecoveryPoint.xrecoff != )
&& XLByteLT(minRecoveryPoint, lsn))
{
ControlFile->minRecoveryPoint = lsn;
} UpdateControlFile();
LWLockRelease(ControlFileLock);
…
}
}
再就是checkpoint发生的时候,也做了处理:
CreateCheckPoint -->XLogFlush--> UpdateMinRecoveryPoint
/*
* Perform a checkpoint --- either during shutdown, or on-the-fly
*
* flags is a bitwise OR of the following:
* CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
* CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
* CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
* ignoring checkpoint_completion_target parameter.
* CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occured
* since the last one (implied by CHECKPOINT_IS_SHUTDOWN or
* CHECKPOINT_END_OF_RECOVERY).
*
* Note: flags contains other bits, of interest here only for logging purposes.
* In particular note that this routine is synchronous and does not pay
* attention to CHECKPOINT_WAIT.
*/
void
CreateCheckPoint(int flags)
{
…
recptr = XLogInsert(RM_XLOG_ID,
shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
XLOG_CHECKPOINT_ONLINE,
&rdata); XLogFlush(recptr);
…
}
/*
* Ensure that all XLOG data through the given position is flushed to disk.
*
* NOTE: this differs from XLogWrite mainly in that the WALWriteLock is not
* already held, and we try to avoid acquiring it if possible.
*/
void
XLogFlush(XLogRecPtr record)
{
XLogRecPtr WriteRqstPtr;
XLogwrtRqst WriteRqst; /*
* During REDO, we are reading not writing WAL. Therefore, instead of
* trying to flush the WAL, we should update minRecoveryPoint instead. We
* test XLogInsertAllowed(), not InRecovery, because we need the bgwriter
* to act this way too, and because when the bgwriter tries to write the
* end-of-recovery checkpoint, it should indeed flush.
*/
if (!XLogInsertAllowed())
{
UpdateMinRecoveryPoint(record, false);
return;
} /* Quick exit if already known flushed */
if (XLByteLE(record, LogwrtResult.Flush))
return; …
}
/*
* Advance minRecoveryPoint in control file.
* If we crash during recovery, we must reach this point again before the
* database is consistent.
* If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint
* is only updated if it's not already greater than or equal to 'lsn'.
*/
static void
UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
{
…
/*
* An invalid minRecoveryPoint means that we need to recover all the WAL,
* i.e., we're doing crash recovery. We never modify the control file's
* value in that case, so we can short-circuit future checks here too.
*/
if (minRecoveryPoint.xlogid == && minRecoveryPoint.xrecoff == )
updateMinRecoveryPoint = false;
else if (force || XLByteLT(minRecoveryPoint, lsn))
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
XLogRecPtr newMinRecoveryPoint; /*
* To avoid having to update the control file too often, we update it
* all the way to the last record being replayed, even though 'lsn'
* would suffice for correctness. This also allows the 'force' case
* to not need a valid 'lsn' value.
*
* Another important reason for doing it this way is that the passed
* 'lsn' value could be bogus, i.e., past the end of available WAL, if
* the caller got it from a corrupted heap page. Accepting such a
* value as the min recovery point would prevent us from coming up at
* all. Instead, we just log a warning and continue with recovery.
* (See also the comments about corrupt LSNs in XLogFlush.)
*/
SpinLockAcquire(&xlogctl->info_lck);
newMinRecoveryPoint = xlogctl->replayEndRecPtr;
SpinLockRelease(&xlogctl->info_lck); if (!force && XLByteLT(newMinRecoveryPoint, lsn))
elog(WARNING,
"xlog min recovery request %X/%X is past current point %X/%X",
lsn.xlogid, lsn.xrecoff,
newMinRecoveryPoint.xlogid, newMinRecoveryPoint.xrecoff); /* update control file */
if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint))
{
ControlFile->minRecoveryPoint = newMinRecoveryPoint;
UpdateControlFile();
minRecoveryPoint = newMinRecoveryPoint; ereport(DEBUG2,
(errmsg("updated min recovery point to %X/%X",
minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
}
}
LWLockRelease(ControlFileLock);
}