先说 heap_insert 函数:
/*
* heap_insert - insert tuple into a heap
*
* The new tuple is stamped with current transaction ID and the specified
* command ID.
*
* If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
* logged in WAL, even for a non-temp relation. Safe usage of this behavior
* requires that we arrange that all new tuples go into new pages not
* containing any tuples from other transactions, and that the relation gets
* fsync'd before commit. (See also heap_sync() comments)
*
* The HEAP_INSERT_SKIP_FSM option is passed directly to
* RelationGetBufferForTuple, which see for more info.
*
* Note that these options will be applied when inserting into the heap's
* TOAST table, too, if the tuple requires any out-of-line data.
*
* The BulkInsertState object (if any; bistate can be NULL for default
* behavior) is also just passed through to RelationGetBufferForTuple.
*
* The return value is the OID assigned to the tuple (either here or by the
* caller), or InvalidOid if no OID. The header fields of *tup are updated
* to match the stored tuple; in particular tup->t_self receives the actual
* TID where the tuple was stored. But note that any toasting of fields
* within the tuple data is NOT reflected into *tup.
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
int options, BulkInsertState bistate)
{
/**
Form_pg_class tmprel = relation->rd_rel;
NameData tmprelname = tmprel->relname;
fprintf(stderr,"Insert into: %s\n", tmprelname.data);
fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
*/
TransactionId xid = GetCurrentTransactionId(); HeapTuple heaptup;
Buffer buffer;
bool all_visible_cleared = false; if (relation->rd_rel->relhasoids)
{
#ifdef NOT_USED
/* this is redundant with an Assert in HeapTupleSetOid */
Assert(tup->t_data->t_infomask & HEAP_HASOID);
#endif /*
* If the object id of this tuple has already been assigned, trust the
* caller. There are a couple of ways this can happen. At initial db
* creation, the backend program sets oids for tuples. When we define
* an index, we set the oid. Finally, in the future, we may allow
* users to set their own object ids in order to support a persistent
* object store (objects need to contain pointers to one another).
*/
if (!OidIsValid(HeapTupleGetOid(tup)))
HeapTupleSetOid(tup, GetNewOid(relation));
}
else
{
/* check there is not space for an OID */
Assert(!(tup->t_data->t_infomask & HEAP_HASOID));
} tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
HeapTupleHeaderSetXmin(tup->t_data, xid);
HeapTupleHeaderSetCmin(tup->t_data, cid); HeapTupleHeaderSetXmax(tup->t_data, ); /* for cleanliness */
tup->t_tableOid = RelationGetRelid(relation); /*
* If the new tuple is too big for storage or contains already toasted
* out-of-line attributes from some other relation, invoke the toaster.
*
* Note: below this point, heaptup is the data we actually intend to store
* into the relation; tup is the caller's original untoasted data.
*/
if (relation->rd_rel->relkind != RELKIND_RELATION)
{
/* toast table entries should never be recursively toasted */
Assert(!HeapTupleHasExternal(tup));
heaptup = tup;
}
else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
heaptup = toast_insert_or_update(relation, tup, NULL, options);
else
heaptup = tup; /*
* We're about to do the actual insert -- but check for conflict first,
* to avoid possibly having to roll back work we've just done.
*
* For a heap insert, we only need to check for table-level SSI locks.
* Our new tuple can't possibly conflict with existing tuple locks, and
* heap page locks are only consolidated versions of tuple locks; they do
* not lock "gaps" as index page locks do. So we don't need to identify
* a buffer before making the call.
*/
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); /* Find buffer to insert this tuple into */
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, options, bistate); /* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup); if (PageIsAllVisible(BufferGetPage(buffer)))
{
all_visible_cleared = true;
PageClearAllVisible(BufferGetPage(buffer));
} /*
* XXX Should we set PageSetPrunable on this page ?
*
* The inserting transaction may eventually abort thus making this tuple
* DEAD and hence available for pruning. Though we don't want to optimize
* for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
* aborted tuple will never be pruned until next vacuum is triggered.
*
* If you do add PageSetPrunable here, add it in heap_xlog_insert too.
*/ MarkBufferDirty(buffer); /* XLOG stuff */
if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation))
{
xl_heap_insert xlrec;
xl_heap_header xlhdr;
XLogRecPtr recptr;
XLogRecData rdata[];
Page page = BufferGetPage(buffer);
uint8 info = XLOG_HEAP_INSERT; xlrec.all_visible_cleared = all_visible_cleared;
xlrec.target.node = relation->rd_node;
xlrec.target.tid = heaptup->t_self;
rdata[].data = (char *) &xlrec;
rdata[].len = SizeOfHeapInsert;
rdata[].buffer = InvalidBuffer;
rdata[].next = &(rdata[]); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
xlhdr.t_infomask = heaptup->t_data->t_infomask;
xlhdr.t_hoff = heaptup->t_data->t_hoff; /*
* note we mark rdata[1] as belonging to buffer; if XLogInsert decides
* to write the whole page to the xlog, we don't need to store
* xl_heap_header in the xlog.
*/
rdata[].data = (char *) &xlhdr;
rdata[].len = SizeOfHeapHeader;
rdata[].buffer = buffer;
rdata[].buffer_std = true;
rdata[].next = &(rdata[]); /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
rdata[].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
rdata[].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
rdata[].buffer = buffer;
rdata[].buffer_std = true;
rdata[].next = NULL; /*
* If this is the single and first tuple on page, we can reinit the
* page instead of restoring the whole thing. Set flag, and hide
* buffer references from XLogInsert.
*/
if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
{
info |= XLOG_HEAP_INIT_PAGE;
rdata[].buffer = rdata[].buffer = InvalidBuffer;
} recptr = XLogInsert(RM_HEAP_ID, info, rdata); PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
} END_CRIT_SECTION(); UnlockReleaseBuffer(buffer); /* Clear the bit in the visibility map if necessary */
if (all_visible_cleared)
visibilitymap_clear(relation,
ItemPointerGetBlockNumber(&(heaptup->t_self))); /*
* If tuple is cachable, mark it for invalidation from the caches in case
* we abort. Note it is OK to do this after releasing the buffer, because
* the heaptup data structure is all in local memory, not in the shared
* buffer.
*/
CacheInvalidateHeapTuple(relation, heaptup); pgstat_count_heap_insert(relation); /*
* If heaptup is a private copy, release it. Don't forget to copy t_self
* back to the caller's image, too.
*/
if (heaptup != tup)
{
tup->t_self = heaptup->t_self;
heap_freetuple(heaptup);
}
return HeapTupleGetOid(tup);
}
我如果执行一个普通的sql文,则可以加入这样的调试代码,来看看我是否确实向我想要的表中插入数据:
/**
Form_pg_class tmprel = relation->rd_rel;
NameData tmprelname = tmprel->relname;
fprintf(stderr,"Insert into: %s\n", tmprelname.data);
fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
*/
当我执行 create tablespace的时候,我想它是要写入数据字典的。
但是上述代码反应出来的relNode是不正确的,而 relname也是空的值。
然后我从更高测调用层面来观察:
当我执行 create tablespace的时候,调用关系如下:
PostgresMain-->exec_simple_query-->PortalRun-->PortalRunMulti-->PortalRunUtility-->Createtablespace
-->simple_heap_insert-->heap_insert
再看 Createtablespace函数:
/*
* Create a table space
*
* Only superusers can create a tablespace. This seems a reasonable restriction
* since we're determining the system layout and, anyway, we probably have
* root if we're doing this kind of activity
*/
void
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
#ifdef HAVE_SYMLINK
Relation rel;
Datum values[Natts_pg_tablespace];
bool nulls[Natts_pg_tablespace];
HeapTuple tuple;
Oid tablespaceoid;
char *location;
Oid ownerId; /* Must be super user */
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied to create tablespace \"%s\"",
stmt->tablespacename),
errhint("Must be superuser to create a tablespace."))); /* However, the eventual owner of the tablespace need not be */
if (stmt->owner)
ownerId = get_role_oid(stmt->owner, false);
else
ownerId = GetUserId(); /* Unix-ify the offered path, and strip any trailing slashes */
location = pstrdup(stmt->location);
canonicalize_path(location); /* disallow quotes, else CREATE DATABASE would be at risk */
if (strchr(location, '\''))
ereport(ERROR,
(errcode(ERRCODE_INVALID_NAME),
errmsg("tablespace location cannot contain single quotes"))); /*
* Allowing relative paths seems risky
*
* this also helps us ensure that location is not empty or whitespace
*/
if (!is_absolute_path(location))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("tablespace location must be an absolute path"))); /*
* Check that location isn't too long. Remember that we're going to append
* 'PG_XXX/<dboid>/<relid>.<nnn>'. FYI, we never actually reference the
* whole path, but mkdir() uses the first two parts.
*/
if (strlen(location) + + strlen(TABLESPACE_VERSION_DIRECTORY) + +
OIDCHARS + + OIDCHARS + + OIDCHARS > MAXPGPATH)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("tablespace location \"%s\" is too long",
location))); /*
* Disallow creation of tablespaces named "pg_xxx"; we reserve this
* namespace for system purposes.
*/
if (!allowSystemTableMods && IsReservedName(stmt->tablespacename))
ereport(ERROR,
(errcode(ERRCODE_RESERVED_NAME),
errmsg("unacceptable tablespace name \"%s\"",
stmt->tablespacename),
errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); /*
* Check that there is no other tablespace by this name. (The unique
* index would catch this anyway, but might as well give a friendlier
* message.)
*/
if (OidIsValid(get_tablespace_oid(stmt->tablespacename, true)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("tablespace \"%s\" already exists",
stmt->tablespacename))); /*
* Insert tuple into pg_tablespace. The purpose of doing this first is to
* lock the proposed tablename against other would-be creators. The
* insertion will roll back if we find problems below.
*/
rel = heap_open(TableSpaceRelationId, RowExclusiveLock); MemSet(nulls, false, sizeof(nulls)); values[Anum_pg_tablespace_spcname - ] =
DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
values[Anum_pg_tablespace_spcowner - ] =
ObjectIdGetDatum(ownerId);
values[Anum_pg_tablespace_spclocation - ] =
CStringGetTextDatum(location);
nulls[Anum_pg_tablespace_spcacl - ] = true;
nulls[Anum_pg_tablespace_spcoptions - ] = true; tuple = heap_form_tuple(rel->rd_att, values, nulls); tablespaceoid = simple_heap_insert(rel, tuple); CatalogUpdateIndexes(rel, tuple); heap_freetuple(tuple); /* Record dependency on owner */
recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); /* Post creation hook for new tablespace */
InvokeObjectAccessHook(OAT_POST_CREATE,
TableSpaceRelationId, tablespaceoid, ); create_tablespace_directories(location, tablespaceoid); /* Record the filesystem change in XLOG */
{
xl_tblspc_create_rec xlrec;
XLogRecData rdata[]; xlrec.ts_id = tablespaceoid;
rdata[].data = (char *) &xlrec;
rdata[].len = offsetof(xl_tblspc_create_rec, ts_path);
rdata[].buffer = InvalidBuffer;
rdata[].next = &(rdata[]); rdata[].data = (char *) location;
rdata[].len = strlen(location) + ;
rdata[].buffer = InvalidBuffer;
rdata[].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
} /*
* Force synchronous commit, to minimize the window between creating the
* symlink on-disk and marking the transaction committed. It's not great
* that there is any window at all, but definitely we don't want to make
* it larger than necessary.
*/
ForceSyncCommit(); pfree(location); /* We keep the lock on pg_tablespace until commit */
heap_close(rel, NoLock);
#else /* !HAVE_SYMLINK */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tablespaces are not supported on this platform")));
#endif /* HAVE_SYMLINK */
}
/*
* simple_heap_insert - insert a tuple
*
* Currently, this routine differs from heap_insert only in supplying
* a default command ID and not allowing access to the speedup options.
*
* This should be used rather than using heap_insert directly in most places
* where we are modifying system catalogs.
*/
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
return heap_insert(relation, tup, GetCurrentCommandId(true), , NULL);
}
我把它简练化,看看它都干了什么:
/*
* Create a table space
*
* Only superusers can create a tablespace. This seems a reasonable restriction
* since we're determining the system layout and, anyway, we probably have
* root if we're doing this kind of activity
*/
void
CreateTableSpace(CreateTableSpaceStmt *stmt)
{
......
/*
* Insert tuple into pg_tablespace. The purpose of doing this first is to
* lock the proposed tablename against other would-be creators. The
* insertion will roll back if we find problems below.
*/
rel = heap_open(TableSpaceRelationId, RowExclusiveLock);
......
tablespaceoid = simple_heap_insert(rel, tuple);
......
}
而 heap_open(TableSpaceRelationId, RowExclusiveLock) 这一句,
里面的 TableSpaceRelationId其实是宏:
/* ----------------
* pg_tablespace definition. cpp turns this into
* typedef struct FormData_pg_tablespace
* ----------------
*/
#define TableSpaceRelationId 1213
而如果想要看到值,可以运行下面的语句,恰好 1213 对应的就是 pg_tablespace 表。
[pgsql@localhost bin]$ ./psql
psql (9.1.)
Type "help" for help. pgsql=# select ::regclass;
regclass
---------------
pg_tablespace
( row) pgsql=#
但是,实际上,pg_tablespace是数据字典,而数据库的目录中,并不存在一个单独的1213文件与之对应。
如果我用上述的:
/**
Form_pg_class tmprel = relation->rd_rel;
NameData tmprelname = tmprel->relname;
fprintf(stderr,"Insert into: %s\n", tmprelname.data);
fprintf(stderr,"In heap_insert,going to insert into table:%d \n\n",relation->rd_node.relNode );
*/
来看,就会知道 relNode是 12587。
我可以在 global目录下,找到这个 12587文件。
1213 对应着 12587文件。这是一个比较怪异的事情。