您好,登錄后才能下訂單哦!
本節(jié)繼續(xù)介紹排序的實現(xiàn),上一節(jié)介紹了ExecSort,本節(jié)介紹該函數(shù)中調(diào)用的排序的具體實現(xiàn)函數(shù),本節(jié)是第一部分,包括tuplesort_begin_heap/tuplesort_puttupleslot.
SortState
排序運行期狀態(tài)信息
/* ----------------
* SortState information
* 排序運行期狀態(tài)信息
* ----------------
*/
typedef struct SortState
{
//基類
ScanState ss; /* its first field is NodeTag */
//是否需要隨機訪問排序輸出?
bool randomAccess; /* need random access to sort output? */
//結(jié)果集是否存在邊界?
bool bounded; /* is the result set bounded? */
//如存在邊界,需要多少個元組?
int64 bound; /* if bounded, how many tuples are needed */
//是否已完成排序?
bool sort_Done; /* sort completed yet? */
//是否使用有界值?
bool bounded_Done; /* value of bounded we did the sort with */
//使用的有界值?
int64 bound_Done; /* value of bound we did the sort with */
//tuplesort.c的私有狀態(tài)
void *tuplesortstate; /* private state of tuplesort.c */
//是否worker?
bool am_worker; /* are we a worker? */
//每個worker對應(yīng)一個條目
SharedSortInfo *shared_info; /* one entry per worker */
} SortState;
/* ----------------
* Shared memory container for per-worker sort information
* per-worker排序信息的共享內(nèi)存容器
* ----------------
*/
typedef struct SharedSortInfo
{
//worker個數(shù)?
int num_workers;
//排序機制
TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
} SharedSortInfo;
TuplesortInstrumentation
報告排序統(tǒng)計的數(shù)據(jù)結(jié)構(gòu).
/*
* Data structures for reporting sort statistics. Note that
* TuplesortInstrumentation can't contain any pointers because we
* sometimes put it in shared memory.
* 報告排序統(tǒng)計的數(shù)據(jù)結(jié)構(gòu).
* 注意TuplesortInstrumentation不能包含指針因為有時候會把該結(jié)構(gòu)體放在共享內(nèi)存中.
*/
typedef enum
{
SORT_TYPE_STILL_IN_PROGRESS = 0,//仍然在排序中
SORT_TYPE_TOP_N_HEAPSORT,//TOP N 堆排序
SORT_TYPE_QUICKSORT,//快速排序
SORT_TYPE_EXTERNAL_SORT,//外部排序
SORT_TYPE_EXTERNAL_MERGE//外部排序后的合并
} TuplesortMethod;//排序方法
typedef enum
{
SORT_SPACE_TYPE_DISK,//需要用上磁盤
SORT_SPACE_TYPE_MEMORY//使用內(nèi)存
} TuplesortSpaceType;
typedef struct TuplesortInstrumentation
{
//使用的排序算法
TuplesortMethod sortMethod; /* sort algorithm used */
//排序使用空間類型
TuplesortSpaceType spaceType; /* type of space spaceUsed represents */
//空間消耗(以K為單位)
long spaceUsed; /* space consumption, in kB */
} TuplesortInstrumentation;
tuplesort_begin_heap和tuplesort_puttupleslot都是準(zhǔn)備工作,把tuple放到數(shù)組(堆)中,為后續(xù)的實際排序?qū)崿F(xiàn)作準(zhǔn)備.
tuplesort_begin_heap
Tuplesortstate *
tuplesort_begin_heap(TupleDesc tupDesc,//元組描述符
int nkeys, //排序鍵個數(shù)
AttrNumber *attNums,//屬性編號
Oid *sortOperators, //排序操作符
Oid *sortCollations,//排序規(guī)則
bool *nullsFirstFlags,//標(biāo)記
int workMem, //內(nèi)存大小
SortCoordinate coordinate,//協(xié)調(diào)器
bool randomAccess)//是否隨機訪問
{
//獲取排序狀態(tài)
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
randomAccess);
MemoryContext oldcontext;
int i;
oldcontext = MemoryContextSwitchTo(state->sortcontext);
AssertArg(nkeys > 0);
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG,
"begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
nkeys, workMem, randomAccess ? 't' : 'f');
#endif
state->nKeys = nkeys;
TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
false, /* no unique check */
nkeys,
workMem,
randomAccess,
PARALLEL_SORT(state));
//設(shè)置運行狀態(tài)
state->comparetup = comparetup_heap;
state->copytup = copytup_heap;
state->writetup = writetup_heap;
state->readtup = readtup_heap;
//假定不需要拷貝元組描述符
state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
state->abbrevNext = 10;
/* Prepare SortSupport data for each column */
//為每一列準(zhǔn)備SortSupport數(shù)據(jù)(分配內(nèi)存空間)
state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
for (i = 0; i < nkeys; i++)
{
//------- 遍歷排序鍵
//排序鍵
SortSupport sortKey = state->sortKeys + i;
AssertArg(attNums[i] != 0);
AssertArg(sortOperators[i] != 0);
//設(shè)置SortSupport
sortKey->ssup_cxt = CurrentMemoryContext;
sortKey->ssup_collation = sortCollations[i];
sortKey->ssup_nulls_first = nullsFirstFlags[i];
sortKey->ssup_attno = attNums[i];
/* Convey if abbreviation optimization is applicable in principle */
//縮寫優(yōu)化是否原則上可用?
sortKey->abbreviate = (i == 0);
//設(shè)置
PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
}
/*
* The "onlyKey" optimization cannot be used with abbreviated keys, since
* tie-breaker comparisons may be required. Typically, the optimization
* is only of value to pass-by-value types anyway, whereas abbreviated
* keys are typically only of value to pass-by-reference types.
* 對于縮寫優(yōu)化"onlyKey"優(yōu)化不能使用,因為可能需要tie-breaker比較.
* 典型的,優(yōu)化器只對按值傳遞類型有價值,而縮寫建通常只對引用傳遞類型有價值.
*/
if (nkeys == 1 && !state->sortKeys->abbrev_converter)
state->onlyKey = state->sortKeys;
MemoryContextSwitchTo(oldcontext);
return state;
}
tuplesort_puttupleslot
接收一個元組(一行)
/*
* Accept one tuple while collecting input data for sort.
* 接收一個元組(一行)
*
* Note that the input data is always copied; the caller need not save it.
* 注意輸入數(shù)據(jù)通常會被拷貝,調(diào)用者不需要保存這些數(shù)據(jù).
*/
void
tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
{
MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
SortTuple stup;
/*
* Copy the given tuple into memory we control, and decrease availMem.
* Then call the common code.
* 拷貝給定的元組在我們控制的內(nèi)存中,同時減少可用內(nèi)存.
* 然后調(diào)用puttuple_common函數(shù).
*/
//#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup))
COPYTUP(state, &stup, (void *) slot);
puttuple_common(state, &stup);
MemoryContextSwitchTo(oldcontext);
}
/*
* Shared code for tuple and datum cases.
* 元組和datum共享的代碼.
*/
static void
puttuple_common(Tuplesortstate *state, SortTuple *tuple)
{
Assert(!LEADER(state));
switch (state->status)
{
case TSS_INITIAL://初始化
/*
* Save the tuple into the unsorted array. First, grow the array
* as needed. Note that we try to grow the array when there is
* still one free slot remaining --- if we fail, there'll still be
* room to store the incoming tuple, and then we'll switch to
* tape-based operation.
* 在未排序的數(shù)組中存儲元組.
* 首先,如需要則擴展數(shù)組大小.注意在只有1個slot剩下來的時候才嘗試擴展數(shù)組.
* 假如擴展失敗,仍有空間可用于存儲輸入的元組,然后將切換至tape-based(基于磁帶)操作.
*/
if (state->memtupcount >= state->memtupsize - 1)
{
(void) grow_memtuples(state);
Assert(state->memtupcount < state->memtupsize);
}
state->memtuples[state->memtupcount++] = *tuple;//在數(shù)組中保存元組
/*
* Check if it's time to switch over to a bounded heapsort. We do
* so if the input tuple count exceeds twice the desired tuple
* count (this is a heuristic for where heapsort becomes cheaper
* than a quicksort), or if we've just filled workMem and have
* enough tuples to meet the bound.
* 檢查是否切換至有界heapsort.
* 在輸入元組個數(shù)超出期望元組個數(shù)兩倍的情況下執(zhí)行該檢查
* (在堆排序比快速排序成本更低時所獲得的洞見),
* 或者如果我們已經(jīng)填充了workMem并且有足夠的元組滿足bound時也執(zhí)行該檢查.
*
* Note that once we enter TSS_BOUNDED state we will always try to
* complete the sort that way. In the worst case, if later input
* tuples are larger than earlier ones, this might cause us to
* exceed workMem significantly.
* 注意我們一旦進入TSS_BOUNDED狀態(tài),將嘗試按指定的方式完成排序.
* 在最壞的情況下,如果后續(xù)的輸入元組比早前的要大,這可能會導(dǎo)致內(nèi)存大小會超出workMem.
*/
if (state->bounded &&
(state->memtupcount > state->bound * 2 ||
(state->memtupcount > state->bound && LACKMEM(state))))
{
#ifdef TRACE_SORT
if (trace_sort)
elog(LOG, "switching to bounded heapsort at %d tuples: %s",
state->memtupcount,
pg_rusage_show(&state->ru_start));
#endif
//切換至堆排序
make_bounded_heap(state);
return;
}
/*
* Done if we still fit in available memory and have array slots.
* 如果內(nèi)存空間可用并且數(shù)組有位置存儲,則已完成任務(wù)!
*/
if (state->memtupcount < state->memtupsize && !LACKMEM(state))
return;
/*
* Nope; time to switch to tape-based operation.
* 切換至tape-base操作
*/
inittapes(state, true);
/*
* Dump all tuples.
* dump所有元組
*/
dumptuples(state, false);
break;
case TSS_BOUNDED://有界堆排序
/*
* We don't want to grow the array here, so check whether the new
* tuple can be discarded before putting it in. This should be a
* good speed optimization, too, since when there are many more
* input tuples than the bound, most input tuples can be discarded
* with just this one comparison. Note that because we currently
* have the sort direction reversed, we must check for <= not >=.
* 不希望在這里擴展數(shù)組,因此檢查在放到數(shù)組前檢查是否可用廢棄某些新元組.
* 這將是一個很好的性能優(yōu)化,因為存在比邊界要大得多的輸入元組,
* 大多數(shù)輸入元組會通過對比被廢棄.
*/
if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0)
{
/* new tuple <= top of the heap, so we can discard it */
// 新元組 <= 堆頂,可以廢棄
free_sort_tuple(state, tuple);
CHECK_FOR_INTERRUPTS();
}
else
{
/* discard top of heap, replacing it with the new tuple */
//廢棄堆頂,使用新元組替換
free_sort_tuple(state, &state->memtuples[0]);
tuplesort_heap_replace_top(state, tuple);
}
break;
case TSS_BUILDRUNS://構(gòu)建運行期信息
/*
* Save the tuple into the unsorted array (there must be space)
* 保存元組到未排序數(shù)組中(存在空閑空間)
*/
state->memtuples[state->memtupcount++] = *tuple;
/*
* If we are over the memory limit, dump all tuples.
* 如果已超出內(nèi)存限制,則dump所有元組
*/
dumptuples(state, false);
break;
default:
elog(ERROR, "invalid tuplesort state");
break;
}
}
N/A
N/A
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。