PostgreSQL 源碼解讀（72）- 查詢語句#57（make_one_rel函數(shù)#22-...

發(fā)布時間：2020-08-18 20:07:39 來源：ITPUB博客閱讀：207 作者：husthxd 欄目：關(guān)系型數(shù)據(jù)庫

本節(jié)大體介紹了遺傳算法(geqo函數(shù))的實現(xiàn)，在參與連接的關(guān)系大于等于12(默認(rèn)值)個時，PG使用遺傳算法生成連接訪問路徑，構(gòu)建最終的連接關(guān)系。
遺傳算法簡介
遺傳算法是借鑒生物科學(xué)而產(chǎn)生的搜索算法，在這個算法中會用到一些生物科學(xué)的相關(guān)知識，下面是PG遺傳算法中所使用的的一些術(shù)語：
1、染色體(Chromosome)：染色體又可稱為基因型個體(individuals)，一個染色體可以視為一個解(一個合法的連接訪問路徑)。
2、種群（Pool）：一定數(shù)量的個體（染色體）組成了群體(pool/population)，群體中個體的數(shù)量叫做群體大?。╬opulation size）。
3、基因(Gene)：基因是染色體中的元素，用于表示個體的特征。例如有一個串（即染色體）S=1011，則其中的1，0，1，1這4個元素分別稱為基因。在PG中，基因是參與連接的關(guān)系。
4、適應(yīng)度(Fitness)：各個個體對環(huán)境的適應(yīng)程度叫做適應(yīng)度(fitness)。為了體現(xiàn)染色體的適應(yīng)能力，引入了對問題中的每一個染色體都能進(jìn)行度量的函數(shù)，叫適應(yīng)度函數(shù)。這個函數(shù)通常會被用來計算個體在群體中被使用的概率。在PG中適應(yīng)度是連接訪問路徑的總成本。

一、數(shù)據(jù)結(jié)構(gòu)


/*
 * Private state for a GEQO run --- accessible via root->join_search_private
 */
typedef struct
{
    List       *initial_rels;   /* 參與連接的關(guān)系鏈表;the base relations we are joining */
    unsigned short random_state[3]; /* 無符號短整型數(shù)組(隨機數(shù));state for pg_erand48() */
} GeqoPrivateData;

/* we presume that int instead of Relid
   is o.k. for Gene; so don't change it! */
typedef int Gene;//基因(整型)

typedef struct Chromosome//染色體
{
    Gene       *string;//基因
    Cost        worth;//成本
} Chromosome;

typedef struct Pool//種群
{
    Chromosome *data;//染色體數(shù)組
    int         size;//大小
    int         string_length;//長度
} Pool;
 
 /* A "clump" of already-joined relations within gimme_tree */
 typedef struct
 {
     RelOptInfo *joinrel;        /* joinrel for the set of relations */
     int         size;           /* number of input relations in clump */
 } Clump;

二、源碼解讀

geqo函數(shù)實現(xiàn)了遺傳算法，構(gòu)建多表（≥12）的連接訪問路徑。


//----------------------------------------------------------------------- geqo
/*
 * geqo
 *    solution of the query optimization problem
 *    similar to a constrained Traveling Salesman Problem (TSP)
 *     遺傳算法:可參考TSP的求解算法.
 *    TSP-旅行推銷員問題（最短路徑問題）:
 *       給定一系列城市和每對城市之間的距離，求解訪問每一座城市一次并回到起始城市的最短回路。
 */

RelOptInfo *
geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
{
    GeqoPrivateData private;//遺傳算法私有的數(shù)據(jù),包括參與連接的關(guān)系和隨機數(shù)
    int         generation;
    Chromosome *momma;//染色體-母親數(shù)組
    Chromosome *daddy;//染色體-父親數(shù)組
    Chromosome *kid;//染色體-孩子數(shù)組
    Pool       *pool;//種群指針
    int         pool_size,//種群大小
                number_generations;//進(jìn)化代數(shù),使用最大迭代次數(shù)(進(jìn)化代數(shù))作為停止準(zhǔn)則

#ifdef GEQO_DEBUG
    int         status_interval;
#endif
    Gene       *best_tour;
    RelOptInfo *best_rel;//最優(yōu)解

#if defined(ERX)
    Edge       *edge_table;     /* 邊界鏈表;list of edges */
    int         edge_failures = 0;
#endif
#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
    City       *city_table;     /* 城市鏈表;list of cities */
#endif
#if defined(CX)
    int         cycle_diffs = 0;
    int         mutations = 0;
#endif

/* 配置私有信息;set up private information */
    root->join_search_private = (void *) &private;
    private.initial_rels = initial_rels;

/* 初始化種子值;initialize private number generator */
    geqo_set_seed(root, Geqo_seed);

/* 設(shè)置遺傳算法參數(shù);set GA parameters */
    pool_size = gimme_pool_size(number_of_rels);//種群大小
    number_generations = gimme_number_generations(pool_size);//迭代次數(shù)
#ifdef GEQO_DEBUG
    status_interval = 10;
#endif

/* 申請內(nèi)存;allocate genetic pool memory */
    pool = alloc_pool(root, pool_size, number_of_rels);

/* 隨機初始化種群;random initialization of the pool */
    random_init_pool(root, pool);

/* 對種群進(jìn)行排序,成本最低的保留;sort the pool according to cheapest path as fitness */
    sort_pool(root, pool);      /* we have to do it only one time, since all
                                 * kids replace the worst individuals in
                                 * future (-> geqo_pool.c:spread_chromo ) */

#ifdef GEQO_DEBUG
    elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
         pool_size,
         pool->data[0].worth,
         pool->data[pool_size - 1].worth);
#endif

/* 申請染色體內(nèi)存(母親&父親);allocate chromosome momma and daddy memory */
    momma = alloc_chromo(root, pool->string_length);
    daddy = alloc_chromo(root, pool->string_length);

#if defined (ERX)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using edge recombination crossover [ERX]");
#endif
/* allocate edge table memory */
  //申請邊界表內(nèi)存
    edge_table = alloc_edge_table(root, pool->string_length);
#elif defined(PMX)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using partially matched crossover [PMX]");
#endif
/* 申請孩子染色體內(nèi)存;allocate chromosome kid memory */
    kid = alloc_chromo(root, pool->string_length);
#elif defined(CX)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using cycle crossover [CX]");
#endif
/* allocate city table memory */
    kid = alloc_chromo(root, pool->string_length);
    city_table = alloc_city_table(root, pool->string_length);
#elif defined(PX)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using position crossover [PX]");
#endif
/* allocate city table memory */
    kid = alloc_chromo(root, pool->string_length);//申請內(nèi)存
    city_table = alloc_city_table(root, pool->string_length);
#elif defined(OX1)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using order crossover [OX1]");
#endif
/* allocate city table memory */
    kid = alloc_chromo(root, pool->string_length);
    city_table = alloc_city_table(root, pool->string_length);
#elif defined(OX2)
#ifdef GEQO_DEBUG
    elog(DEBUG2, "using order crossover [OX2]");
#endif
/* allocate city table memory */
    kid = alloc_chromo(root, pool->string_length);
    city_table = alloc_city_table(root, pool->string_length);
#endif


/* my pain main part: */
/* 迭代式優(yōu)化.iterative optimization */

    for (generation = 0; generation < number_generations; generation++)//開始迭代
    {
        /* SELECTION: using linear bias function */
    //選擇:利用線性偏差(bias)函數(shù),從中選出momma&daddy
        geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);

#if defined (ERX)
        /* EDGE RECOMBINATION CROSSOVER */
    //交叉遺傳
        gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);

        kid = momma;

        /* are there any edge failures ? */
    //遍歷邊界
        edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length);
#elif defined(PMX)
        /* PARTIALLY MATCHED CROSSOVER */
        pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
#elif defined(CX)
        /* CYCLE CROSSOVER */
        cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
        /* mutate the child */
        if (cycle_diffs == 0)
        {
            mutations++;
            geqo_mutation(root, kid->string, pool->string_length);
        }
#elif defined(PX)
        /* POSITION CROSSOVER */
        px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#elif defined(OX1)
        /* ORDER CROSSOVER */
        ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#elif defined(OX2)
        /* ORDER CROSSOVER */
        ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#endif


        /* EVALUATE FITNESS */
    //計算適應(yīng)度
        kid->worth = geqo_eval(root, kid->string, pool->string_length);

        /* push the kid into the wilderness of life according to its worth */
    //把遺傳產(chǎn)生的染色體放到野外以進(jìn)行下一輪的進(jìn)化
        spread_chromo(root, kid, pool);


#ifdef GEQO_DEBUG
        if (status_interval && !(generation % status_interval))
            print_gen(stdout, pool, generation);
#endif

    }


#if defined(ERX) && defined(GEQO_DEBUG)
    if (edge_failures != 0)
        elog(LOG, "[GEQO] failures: %d, average: %d",
             edge_failures, (int) number_generations / edge_failures);
    else
        elog(LOG, "[GEQO] no edge failures detected");
#endif

#if defined(CX) && defined(GEQO_DEBUG)
    if (mutations != 0)
        elog(LOG, "[GEQO] mutations: %d, generations: %d",
             mutations, number_generations);
    else
        elog(LOG, "[GEQO] no mutations processed");
#endif

#ifdef GEQO_DEBUG
    print_pool(stdout, pool, 0, pool_size - 1);
#endif

#ifdef GEQO_DEBUG
    elog(DEBUG1, "GEQO best is %.2f after %d generations",
         pool->data[0].worth, number_generations);
#endif


    /*
     * got the cheapest query tree processed by geqo; first element of the
     * population indicates the best query tree
     */
    best_tour = (Gene *) pool->data[0].string;

    best_rel = gimme_tree(root, best_tour, pool->string_length);

    if (best_rel == NULL)
        elog(ERROR, "geqo failed to make a valid plan");

    /* DBG: show the query plan */
#ifdef NOT_USED
    print_plan(best_plan, root);
#endif

    /* ... free memory stuff */
    free_chromo(root, momma);
    free_chromo(root, daddy);

#if defined (ERX)
    free_edge_table(root, edge_table);
#elif defined(PMX)
    free_chromo(root, kid);
#elif defined(CX)
    free_chromo(root, kid);
    free_city_table(root, city_table);
#elif defined(PX)
    free_chromo(root, kid);
    free_city_table(root, city_table);
#elif defined(OX1)
    free_chromo(root, kid);
    free_city_table(root, city_table);
#elif defined(OX2)
    free_chromo(root, kid);
    free_city_table(root, city_table);
#endif

    free_pool(root, pool);

    /* ... clear root pointer to our private storage */
    root->join_search_private = NULL;

    return best_rel;
}


//--------------------------------------------------------------------------- geqo_pool.c
static int  compare(const void *arg1, const void *arg2);

/*
 * alloc_pool
 *      allocates memory for GA pool
 */
Pool *
alloc_pool(PlannerInfo *root, int pool_size, int string_length)
{
    Pool       *new_pool;
    Chromosome *chromo;
    int         i;

    /* pool */
    new_pool = (Pool *) palloc(sizeof(Pool));
    new_pool->size = (int) pool_size;
    new_pool->string_length = (int) string_length;

    /* all chromosome */
    new_pool->data = (Chromosome *) palloc(pool_size * sizeof(Chromosome));

    /* all gene */
    chromo = (Chromosome *) new_pool->data; /* vector of all chromos */
    for (i = 0; i < pool_size; i++)
        chromo[i].string = palloc((string_length + 1) * sizeof(Gene));

    return new_pool;
}

/*
 * free_pool
 *      deallocates memory for GA pool
 */
void
free_pool(PlannerInfo *root, Pool *pool)
{
    Chromosome *chromo;
    int         i;

    /* all gene */
    chromo = (Chromosome *) pool->data; /* vector of all chromos */
    for (i = 0; i < pool->size; i++)
        pfree(chromo[i].string);

    /* all chromosome */
    pfree(pool->data);

    /* pool */
    pfree(pool);
}

/*
 * random_init_pool
 *      initialize genetic pool
 */
void
random_init_pool(PlannerInfo *root, Pool *pool)
{
    Chromosome *chromo = (Chromosome *) pool->data;
    int         i;
    int         bad = 0;

    /*
     * We immediately discard any invalid individuals (those that geqo_eval
     * returns DBL_MAX for), thereby not wasting pool space on them.
   * 立即丟棄所有無效的個體(那些geqo_eval返回DBL_MAX的)，因此不會在它們上浪費內(nèi)存空間。
     *
     * If we fail to make any valid individuals after 10000 tries, give up;
     * this probably means something is broken, and we shouldn't just let
     * ourselves get stuck in an infinite loop.
   * 如果在10000次嘗試后仍然沒有產(chǎn)生任何有效的個體，那么放棄是最好的選擇;
   * 這可能意味著有個地方存在問題，因此不應(yīng)該陷入死循環(huán)。
     */
    i = 0;
    while (i < pool->size)
    {
        init_tour(root, chromo[i].string, pool->string_length);
        pool->data[i].worth = geqo_eval(root, chromo[i].string,
                                        pool->string_length);
        if (pool->data[i].worth < DBL_MAX)
            i++;
        else
        {
            bad++;
            if (i == 0 && bad >= 10000)
                elog(ERROR, "geqo failed to make a valid plan");
        }
    }

#ifdef GEQO_DEBUG
    if (bad > 0)
        elog(DEBUG1, "%d invalid tours found while selecting %d pool entries",
             bad, pool->size);
#endif
}

/*
 * sort_pool
 *   sorts input pool according to worth, from smallest to largest
 *
 *   maybe you have to change compare() for different ordering ...
 */
void
sort_pool(PlannerInfo *root, Pool *pool)
{
    qsort(pool->data, pool->size, sizeof(Chromosome), compare);
}

/*
 * compare
 *   qsort comparison function for sort_pool
 */
static int
compare(const void *arg1, const void *arg2)
{
    const Chromosome *chromo1 = (const Chromosome *) arg1;
    const Chromosome *chromo2 = (const Chromosome *) arg2;

    if (chromo1->worth == chromo2->worth)
        return 0;
    else if (chromo1->worth > chromo2->worth)
        return 1;
    else
        return -1;
}

/* alloc_chromo
 *    allocates a chromosome and string space
 */
Chromosome *
alloc_chromo(PlannerInfo *root, int string_length)
{
    Chromosome *chromo;

    chromo = (Chromosome *) palloc(sizeof(Chromosome));
    chromo->string = (Gene *) palloc((string_length + 1) * sizeof(Gene));

    return chromo;
}

/* free_chromo
 *    deallocates a chromosome and string space
 */
void
free_chromo(PlannerInfo *root, Chromosome *chromo)
{
    pfree(chromo->string);
    pfree(chromo);
}

/* spread_chromo
 *   inserts a new chromosome into the pool, displacing worst gene in pool
 *   assumes best->worst = smallest->largest
 */
void
spread_chromo(PlannerInfo *root, Chromosome *chromo, Pool *pool)
{
    int         top,
                mid,
                bot;
    int         i,
                index;
    Chromosome  swap_chromo,
                tmp_chromo;

    /* new chromo is so bad we can't use it */
    if (chromo->worth > pool->data[pool->size - 1].worth)
        return;

    /* do a binary search to find the index of the new chromo */

    top = 0;
    mid = pool->size / 2;
    bot = pool->size - 1;
    index = -1;

    while (index == -1)
    {
        /* these 4 cases find a new location */

        if (chromo->worth <= pool->data[top].worth)
            index = top;
        else if (chromo->worth == pool->data[mid].worth)
            index = mid;
        else if (chromo->worth == pool->data[bot].worth)
            index = bot;
        else if (bot - top <= 1)
            index = bot;


        /*
         * these 2 cases move the search indices since a new location has not
         * yet been found.
         */

        else if (chromo->worth < pool->data[mid].worth)
        {
            bot = mid;
            mid = top + ((bot - top) / 2);
        }
        else
        {                       /* (chromo->worth > pool->data[mid].worth) */
            top = mid;
            mid = top + ((bot - top) / 2);
        }
    }                           /* ... while */

    /* now we have index for chromo */

    /*
     * move every gene from index on down one position to make room for chromo
     */

    /*
     * copy new gene into pool storage; always replace worst gene in pool
     */

    geqo_copy(root, &pool->data[pool->size - 1], chromo, pool->string_length);

    swap_chromo.string = pool->data[pool->size - 1].string;
    swap_chromo.worth = pool->data[pool->size - 1].worth;

    for (i = index; i < pool->size; i++)
    {
        tmp_chromo.string = pool->data[i].string;
        tmp_chromo.worth = pool->data[i].worth;

        pool->data[i].string = swap_chromo.string;
        pool->data[i].worth = swap_chromo.worth;

        swap_chromo.string = tmp_chromo.string;
        swap_chromo.worth = tmp_chromo.worth;
    }
}

 /*
  * init_tour
  *
  *   Randomly generates a legal "traveling salesman" tour
  *   (i.e. where each point is visited only once.)
  *   隨機生成TSP路徑(每個點只訪問一次)
  */
 void
 init_tour(PlannerInfo *root, Gene *tour, int num_gene)
 {
     int         i,
                 j;
 
     /*
      * We must fill the tour[] array with a random permutation of the numbers
      * 1 .. num_gene.  We can do that in one pass using the "inside-out"
      * variant of the Fisher-Yates shuffle algorithm.  Notionally, we append
      * each new value to the array and then swap it with a randomly-chosen
      * array element (possibly including itself, else we fail to generate
      * permutations with the last city last).  The swap step can be optimized
      * by combining it with the insertion.
      */
     if (num_gene > 0)
         tour[0] = (Gene) 1;
 
     for (i = 1; i < num_gene; i++)
     {
         j = geqo_randint(root, i, 0);
         /* i != j check avoids fetching uninitialized array element */
         if (i != j)
             tour[i] = tour[j];
         tour[j] = (Gene) (i + 1);
     }
 }
 

//----------------------------------------------------------- geqo_eval
 /*
  * geqo_eval
  *
  * Returns cost of a query tree as an individual of the population.
  * 返回該此進(jìn)化的適應(yīng)度。
  * 
  * If no legal join order can be extracted from the proposed tour,
  * returns DBL_MAX.
  * 如無合適的連接順序,返回DBL_MAX
  */
 Cost
 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
 {
     MemoryContext mycontext;
     MemoryContext oldcxt;
     RelOptInfo *joinrel;
     Cost        fitness;
     int         savelength;
     struct HTAB *savehash;
 
     /*
      * Create a private memory context that will hold all temp storage
      * allocated inside gimme_tree().
      *
      * Since geqo_eval() will be called many times, we can't afford to let all
      * that memory go unreclaimed until end of statement.  Note we make the
      * temp context a child of the planner's normal context, so that it will
      * be freed even if we abort via ereport(ERROR).
      */
     mycontext = AllocSetContextCreate(CurrentMemoryContext,
                                       "GEQO",
                                       ALLOCSET_DEFAULT_SIZES);
     oldcxt = MemoryContextSwitchTo(mycontext);
 
     /*
      * gimme_tree will add entries to root->join_rel_list, which may or may
      * not already contain some entries.  The newly added entries will be
      * recycled by the MemoryContextDelete below, so we must ensure that the
      * list is restored to its former state before exiting.  We can do this by
      * truncating the list to its original length.  NOTE this assumes that any
      * added entries are appended at the end!
      *
      * We also must take care not to mess up the outer join_rel_hash, if there
      * is one.  We can do this by just temporarily setting the link to NULL.
      * (If we are dealing with enough join rels, which we very likely are, a
      * new hash table will get built and used locally.)
      *
      * join_rel_level[] shouldn't be in use, so just Assert it isn't.
      */
     savelength = list_length(root->join_rel_list);
     savehash = root->join_rel_hash;
     Assert(root->join_rel_level == NULL);
 
     root->join_rel_hash = NULL;
 
     /* construct the best path for the given combination of relations */
   //給定的關(guān)系組合,構(gòu)造最佳的訪問路徑
     joinrel = gimme_tree(root, tour, num_gene);
 
     /*
      * compute fitness, if we found a valid join
    * 如找到一個有效的連接,計算其適應(yīng)度
      *
      * XXX geqo does not currently support optimization for partial result
      * retrieval, nor do we take any cognizance of possible use of
      * parameterized paths --- how to fix?
    * 遺傳算法目前不支持部分結(jié)果檢索的優(yōu)化，目前也不知道是否可能使用參數(shù)化路徑——如何修復(fù)?
      */
     if (joinrel)
     {
         Path       *best_path = joinrel->cheapest_total_path;//獲取生成的關(guān)系的最優(yōu)路徑
 
         fitness = best_path->total_cost;//適應(yīng)度=該路徑的總成本
     }
     else
         fitness = DBL_MAX;//連接無效,適應(yīng)度為DBL_MAX,下一輪迭代會丟棄
 
     /*
      * Restore join_rel_list to its former state, and put back original
      * hashtable if any.
    * 將join_rel_list恢復(fù)到原來的狀態(tài).如存在hash表，則把原來的哈希表放回去。
      */
     root->join_rel_list = list_truncate(root->join_rel_list,
                                         savelength);
     root->join_rel_hash = savehash;
 
     /* release all the memory acquired within gimme_tree */
   //釋放資源
     MemoryContextSwitchTo(oldcxt);
     MemoryContextDelete(mycontext);
 
     return fitness;
 }
 
//------------------------------------------- gimme_tree
/*
 * gimme_tree
 *    Form planner estimates for a join tree constructed in the specified
 *    order.
 *    給定順序構(gòu)造連接樹,由優(yōu)化器估算成本. 
 *
 *   'tour' is the proposed join order, of length 'num_gene'
 *   tour-建議的連接順序,長度為num_gene
 *
 * Returns a new join relation whose cheapest path is the best plan for
 * this join order.  NB: will return NULL if join order is invalid and
 * we can't modify it into a valid order.
 * 返回一個新的連接關(guān)系，其成本最低的路徑是此連接順序的最佳計劃。
 * 如果join order無效，而且不能將其修改為有效的order，則返回NULL。
 *
 * The original implementation of this routine always joined in the specified
 * order, and so could only build left-sided plans (and right-sided and
 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
 * It could never produce a "bushy" plan.  This had a couple of big problems,
 * of which the worst was that there are situations involving join order
 * restrictions where the only valid plans are bushy.
 * 這個處理過程的初始實現(xiàn)總是按照指定的順序連接，因此只能構(gòu)建左側(cè)計劃
 * (以及右側(cè)和混合計劃，這是make_join_rel()是對稱的這一事實的副產(chǎn)品)。
 * 它永遠(yuǎn)不會產(chǎn)生一個“bushy”(N+M,其中N≥2,M≥2)的計劃。
 * 這有幾個大問題，其中最糟糕的是涉及到連接順序限制的情況，其中唯一有效的計劃是bushy的。
 *
 * The present implementation takes the given tour as a guideline, but
 * postpones joins that are illegal or seem unsuitable according to some
 * heuristic rules.  This allows correct bushy plans to be generated at need,
 * and as a nice side-effect it seems to materially improve the quality of the
 * generated plans.  Note however that since it's just a heuristic, it can
 * still fail in some cases.  (In particular, we might clump together
 * relations that actually mustn't be joined yet due to LATERAL restrictions;
 * since there's no provision for un-clumping, this must lead to failure.)
 * 目前的實施以給定的路線(tour)為指導(dǎo)，但根據(jù)一些啟發(fā)式規(guī)則，延遲了非法或看似不合適的基因加入。
 * 這允許在需要時生成正確的bushy計劃，這帶來了額外的好處，似乎實質(zhì)性地提高了生成計劃的質(zhì)量。
 * 但是請注意，由于它只是一個啟發(fā)式的做法，在某些情況下它仍然可能失敗。
 * (特別是，我們可能會將由于橫向限制而實際上還不能被連接的關(guān)系組合在一起;由于沒有關(guān)于非LATERAL的規(guī)定，這肯定會導(dǎo)致失敗。)
 */
RelOptInfo *
gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
{
    GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
    List       *clumps;
    int         rel_count;

    /*
     * Sometimes, a relation can't yet be joined to others due to heuristics
     * or actual semantic restrictions.  We maintain a list of "clumps" of
     * successfully joined relations, with larger clumps at the front. Each
     * new relation from the tour is added to the first clump it can be joined
     * to; if there is none then it becomes a new clump of its own. When we
     * enlarge an existing clump we check to see if it can now be merged with
     * any other clumps.  After the tour is all scanned, we forget about the
     * heuristics and try to forcibly join any remaining clumps.  If we are
     * unable to merge all the clumps into one, fail.
     * 有時，由于啟發(fā)式或?qū)嶋H的語義限制，關(guān)系還不能連接到其他關(guān)系。
   * 因此保留了一個成功連接關(guān)系的“clumps”(聚類)鏈表，在此前有更大的clumps(聚類)。
   * 每個新關(guān)系從tour添加到第一個clump(聚類)，它可以加入;如果沒有的話，它自己會構(gòu)成一個clump(聚類)。
   * 當(dāng)擴大現(xiàn)有的clump(聚類)時，需要檢查它現(xiàn)在是否可以與其他clumps(聚類)合并。
   * 在所有的tour基因掃描之后，這時候不使用啟發(fā)式規(guī)則，并試圖強行加入任何剩余的clumps(聚類)中。
   * 如果我們不能把所有的聚類合并成一個種群，則失敗。
     */
    clumps = NIL;

    for (rel_count = 0; rel_count < num_gene; rel_count++)//遍歷基因即參與連接的關(guān)系
    {
        int         cur_rel_index;//當(dāng)前索引
        RelOptInfo *cur_rel;//當(dāng)前的關(guān)系
        Clump      *cur_clump;//當(dāng)前的clump聚類

        /* 獲取下一個輸入的關(guān)系.Get the next input relation */
        cur_rel_index = (int) tour[rel_count];
        cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
                                          cur_rel_index - 1);

        /* 放在一個單獨的聚類clump中;Make it into a single-rel clump */
        cur_clump = (Clump *) palloc(sizeof(Clump));
        cur_clump->joinrel = cur_rel;
        cur_clump->size = 1;

        /* Merge it into the clumps list, using only desirable joins */
    //使用期望的連接方式(force=F)將它合并到clump(聚類)鏈表中
        clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
    }

    if (list_length(clumps) > 1)//聚類鏈表>1
    {
        /* Force-join the remaining clumps in some legal order */
    //以傳統(tǒng)的順序加入到剩余的聚類中
        List       *fclumps;//鏈表
        ListCell   *lc;//元素

        fclumps = NIL;
        foreach(lc, clumps)
        {
            Clump      *clump = (Clump *) lfirst(lc);
      //(force=T)
            fclumps = merge_clump(root, fclumps, clump, num_gene, true);
        }
        clumps = fclumps;
    }

    /* Did we succeed in forming a single join relation? */
    if (list_length(clumps) != 1)//無法形成最終的結(jié)果關(guān)系,返回NULL
        return NULL;

    return ((Clump *) linitial(clumps))->joinrel;//成功,則返回結(jié)果Relation
}


//------------------------------ merge_clump
/*
 * Merge a "clump" into the list of existing clumps for gimme_tree.
 * 將某個clump聚類合并到gimme_tree中生成的現(xiàn)存clumps聚類群中
 *
 * We try to merge the clump into some existing clump, and repeat if
 * successful.  When no more merging is possible, insert the clump
 * into the list, preserving the list ordering rule (namely, that
 * clumps of larger size appear earlier).
 * 嘗試將clump合并到現(xiàn)有的clumps中，如果成功，則重復(fù)。
 * 當(dāng)不再可能合并時，將clump插入到鏈表中，保留鏈表排序規(guī)則(即，更大的clump出現(xiàn)在前面)。
 * 
 * If force is true, merge anywhere a join is legal, even if it causes
 * a cartesian join to be performed.  When force is false, do only
 * "desirable" joins.
 * 如果force為true，則在連接合法的位置進(jìn)行合并，即使這會導(dǎo)致執(zhí)行笛卡爾連接。當(dāng)力force為F時，只做“合適的”連接。
 */
static List *
merge_clump(PlannerInfo *root,//優(yōu)化信息 
      List *clumps, //聚類鏈表
      Clump *new_clump, //新的聚類
      int num_gene,//基因格式
            bool force)//是否強制加入
{
    ListCell   *prev;
    ListCell   *lc;

    /* Look for a clump that new_clump can join to */
  //驗證新聚類能否加入
    prev = NULL;
    foreach(lc, clumps)//遍歷鏈表
    {
        Clump      *old_clump = (Clump *) lfirst(lc);//原有的聚類

        if (force ||
            desirable_join(root, old_clump->joinrel, new_clump->joinrel))//如強制加入或者可按要求加入
        {
            RelOptInfo *joinrel;//

            /*
             * Construct a RelOptInfo representing the join of these two input
             * relations.  Note that we expect the joinrel not to exist in
             * root->join_rel_list yet, and so the paths constructed for it
             * will only include the ones we want.
       * 構(gòu)造一個RelOptInfo，表示這兩個輸入關(guān)系的連接。
       * 注意，預(yù)期joinrel不會存在于root->join_rel_list中，因此為它構(gòu)造的路徑將只包含我們期望的路徑。
             */
            joinrel = make_join_rel(root,
                                    old_clump->joinrel,
                                    new_clump->joinrel);//構(gòu)造連接新關(guān)系RelOptInfo

            /* 如連接順序無效,繼續(xù)搜索;Keep searching if join order is not valid */
            if (joinrel)
            {
                /* Create paths for partitionwise joins. */
        //創(chuàng)建partitionwise連接
                generate_partitionwise_join_paths(root, joinrel);

                /*
                 * Except for the topmost scan/join rel, consider gathering
                 * partial paths.  We'll do the same for the topmost scan/join
                 * rel once we know the final targetlist (see
                 * grouping_planner).
         * 除了最上面的掃描/連接的關(guān)系，嘗試gather partial(并行)訪問路徑。
         * 一旦我們知道最終的targetlist(參見grouping_planner)，將對最頂層的掃描/連接關(guān)系執(zhí)行相同的操作。
                 */
                if (old_clump->size + new_clump->size < num_gene)
                    generate_gather_paths(root, joinrel, false);

                /* Find and save the cheapest paths for this joinrel */
        //設(shè)置成本最低的路徑
                set_cheapest(joinrel);

                /* Absorb new clump into old */
        //把新的clump吸納到舊的clump中,釋放new_clump
                old_clump->joinrel = joinrel;
                old_clump->size += new_clump->size;
                pfree(new_clump);

                /* Remove old_clump from list */
        //從鏈表中刪除old_clump
                clumps = list_delete_cell(clumps, lc, prev);

                /*
                 * Recursively try to merge the enlarged old_clump with
                 * others.  When no further merge is possible, we'll reinsert
                 * it into the list.
         * 遞歸地嘗試將逐步擴大的old_clump與其他clump合并。
         * 當(dāng)不能進(jìn)一步合并時，我們將把它重新插入到鏈表中。
                 */
                return merge_clump(root, clumps, old_clump, num_gene, force);
            }
        }
        prev = lc;
    }

    /*
     * No merging is possible, so add new_clump as an independent clump, in
     * proper order according to size.  We can be fast for the common case
     * where it has size 1 --- it should always go at the end.
   * 不可能合并，因此按照大小的適當(dāng)順序?qū)ew_clump添加為獨立的clumps中。
   * 一般情況下，可以快速處理它的大小為1——總是在鏈表的最后。
     */
    if (clumps == NIL || new_clump->size == 1)
        return lappend(clumps, new_clump);//直接添加

    /* Check if it belongs at the front */
  //檢查是否屬于前面的clump
    lc = list_head(clumps);
    if (new_clump->size > ((Clump *) lfirst(lc))->size)
        return lcons(new_clump, clumps);

    /* Else search for the place to insert it */
  //搜索位置,插入之
    for (;;)
    {
        ListCell   *nxt = lnext(lc);

        if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
            break;              /* it belongs after 'lc', before 'nxt' */
        lc = nxt;
    }
    lappend_cell(clumps, lc, new_clump);

    return clumps;
}

三、跟蹤分析

測試表(13張表)和數(shù)據(jù):

drop table if exists t01;
drop table if exists t02;
drop table if exists t03;
drop table if exists t04;
drop table if exists t05;
drop table if exists t06;
drop table if exists t07;
drop table if exists t08;
drop table if exists t09;
drop table if exists t10;
drop table if exists t11;
drop table if exists t12;
drop table if exists t13;

create table t01(c1 int,c2 varchar(20));
create table t02(c1 int,c2 varchar(20));
create table t03(c1 int,c2 varchar(20));
create table t04(c1 int,c2 varchar(20));
create table t05(c1 int,c2 varchar(20));
create table t06(c1 int,c2 varchar(20));
create table t07(c1 int,c2 varchar(20));
create table t08(c1 int,c2 varchar(20));
create table t09(c1 int,c2 varchar(20));
create table t10(c1 int,c2 varchar(20));
create table t11(c1 int,c2 varchar(20));
create table t12(c1 int,c2 varchar(20));
create table t13(c1 int,c2 varchar(20));

insert into t01 select generate_series(1,100),'TEST'||generate_series(1,100);
insert into t02 select generate_series(1,1000),'TEST'||generate_series(1,1000);
insert into t03 select generate_series(1,10000),'TEST'||generate_series(1,10000);
insert into t04 select generate_series(1,200),'TEST'||generate_series(1,200);
insert into t05 select generate_series(1,4000),'TEST'||generate_series(1,4000);
insert into t06 select generate_series(1,100000),'TEST'||generate_series(1,100000);
insert into t07 select generate_series(1,100),'TEST'||generate_series(1,100);
insert into t08 select generate_series(1,1000),'TEST'||generate_series(1,1000);
insert into t09 select generate_series(1,10000),'TEST'||generate_series(1,10000);
insert into t10 select generate_series(1,200),'TEST'||generate_series(1,200);
insert into t11 select generate_series(1,4000),'TEST'||generate_series(1,4000);
insert into t12 select generate_series(1,100000),'TEST'||generate_series(1,100000);
insert into t13 select generate_series(1,100),'TEST'||generate_series(1,100);

create index idx_t01_c1 on t01(c1);
create index idx_t06_c1 on t06(c1);
create index idx_t12_c1 on t12(c1);

測試SQL語句與執(zhí)行計劃如下:

testdb=# explain verbose select * 
from t01,t02,t03,t04,t05,t06,t07,t08,t09,t10,t11,t12,t13
where t01.c1 = t02.c1 
and t02.c1 = t03.c1
and t03.c1 = t04.c1
and t04.c1 = t05.c1
and t05.c1 = t06.c1
and t06.c1 = t07.c1
and t07.c1 = t08.c1
and t08.c1 = t09.c1
and t09.c1 = t10.c1
and t10.c1 = t11.c1
and t11.c1 = t12.c1
and t12.c1 = t13.c1;
                                           QUERY PLAN                                                                                        
                
-------------------------------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=404.93..597.44 rows=1 width=148)
   Output: t01.c1, t01.c2, t02.c1, t02.c2, t03.c1, t03.c2, t04.c1, t04.c2, t05.c1, t05.c2, t06.c1, t06.c2, t07.c1, t07.c2, t08.c1, t08.c2, t09.c1, t09.c2, t10.c1, t10.c2, t11.c1, t11.c2, t12.c1, t12.c2,
 t13.c1, t13.c2
   Hash Cond: (t03.c1 = t01.c1)
   ->  Seq Scan on public.t03  (cost=0.00..155.00 rows=10000 width=12)
         Output: t03.c1, t03.c2
   ->  Hash  (cost=404.92..404.92 rows=1 width=136)
         Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c2, t01.c1, t
01.c2
         ->  Nested Loop  (cost=327.82..404.92 rows=1 width=136)
               Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c2, t01
.c1, t01.c2
               Join Filter: (t02.c1 = t01.c1)
               ->  Nested Loop  (cost=327.68..404.75 rows=1 width=126)
                     Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c
2
                     Join Filter: (t02.c1 = t06.c1)
                     ->  Hash Join  (cost=327.38..404.39 rows=1 width=113)
                           Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2
                           Hash Cond: (t05.c1 = t02.c1)
                           ->  Seq Scan on public.t05  (cost=0.00..62.00 rows=4000 width=12)
                                 Output: t05.c1, t05.c2
                           ->  Hash  (cost=327.37..327.37 rows=1 width=101)
                                 Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2
                                 ->  Hash Join  (cost=307.61..327.37 rows=1 width=101)
                                       Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2
                                       Hash Cond: (t08.c1 = t02.c1)
                                       ->  Seq Scan on public.t08  (cost=0.00..16.00 rows=1000 width=11)
                                             Output: t08.c1, t08.c2
                                       ->  Hash  (cost=307.60..307.60 rows=1 width=90)
                                             Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2
                                             ->  Hash Join  (cost=230.59..307.60 rows=1 width=90)
                                                   Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2
                                                   Hash Cond: (t11.c1 = t02.c1)
                                                   ->  Seq Scan on public.t11  (cost=0.00..62.00 rows=4000 width=12)
                                                         Output: t11.c1, t11.c2
                                                   ->  Hash  (cost=230.58..230.58 rows=1 width=78)
                                                         Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2
                                                         ->  Nested Loop  (cost=37.71..230.58 rows=1 width=78)
                                                               Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2
                                                               Join Filter: (t02.c1 = t12.c1)
                                                               ->  Hash Join  (cost=37.42..229.93 rows=1 width=65)
                                                                     Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2
                                                                     Hash Cond: (t09.c1 = t02.c1)
                                                                     ->  Seq Scan on public.t09  (cost=0.00..155.00 rows=10000 width=12)
                                                                           Output: t09.c1, t09.c2
                                                                     ->  Hash  (cost=37.41..37.41 rows=1 width=53)
                                                                           Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2
                                                                           ->  Hash Join  (cost=32.65..37.41 rows=1 width=53)
                                                                                 Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2
                                                                                 Hash Cond: (t04.c1 = t02.c1)
                                                                                 ->  Seq Scan on public.t04  (cost=0.00..4.00 rows=200 width=11)
                                                                                       Output: t04.c1, t04.c2
                                                                                 ->  Hash  (cost=32.62..32.62 rows=2 width=42)
                                                                                       Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2
                                                                                       ->  Hash Join  (cost=27.85..32.62 rows=2 width=42)
                                                                                             Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2
                                                                                             Hash Cond: (t10.c1 = t02.c1)
                                                                                             ->  Seq Scan on public.t10  (cost=0.00..4.00 rows=200 width=11)
                                                                                                   Output: t10.c1, t10.c2
                                                                                             ->  Hash  (cost=27.73..27.73 rows=10 width=31)
                                                                                                   Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2
                                                                                                   ->  Hash Join  (cost=6.50..27.73 rows=10 width=31)
                                                                                                         Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2
                                                                                                         Hash Cond: (t02.c1 = t13.c1)
                                                                                                         ->  Hash Join  (cost=3.25..24.00 rows=100 width=21)
                                                                                                               Output: t02.c1, t02.c2, t07.c1, t07.c2
                                                                                                               Hash Cond: (t02.c1 = t07.c1)
                                                                                                               ->  Seq Scan on public.t02  (cost=0.00..16.00 rows=1000 width=11)
                                                                                                                     Output: t02.c1, t02.c2
                                                                                                               ->  Hash  (cost=2.00..2.00 rows=100 width=10)
                                                                                                                     Output: t07.c1, t07.c2
                                                                                                                     ->  Seq Scan on public.t07  (cost=0.00..2.00 rows=100 width=10)
                                                                                                                           Output: t07.c1, t07.c2
                                                                                                         ->  Hash  (cost=2.00..2.00 rows=100 width=10)
                                                                                                               Output: t13.c1, t13.c2
                                                                                                               ->  Seq Scan on public.t13  (cost=0.00..2.00 rows=100 width=10)
                                                                                                                     Output: t13.c1, t13.c2
                                                               ->  Index Scan using idx_t12_c1 on public.t12  (cost=0.29..0.64 rows=1 width=13)
                                                                     Output: t12.c1, t12.c2
                                                                     Index Cond: (t12.c1 = t09.c1)
                     ->  Index Scan using idx_t06_c1 on public.t06  (cost=0.29..0.34 rows=1 width=13)
                           Output: t06.c1, t06.c2
                           Index Cond: (t06.c1 = t12.c1)
               ->  Index Scan using idx_t01_c1 on public.t01  (cost=0.14..0.16 rows=1 width=10)
                     Output: t01.c1, t01.c2
                     Index Cond: (t01.c1 = t06.c1)
(83 rows)

testdb=#

啟動gdb,設(shè)置斷點

(gdb) b geqo
Breakpoint 1 at 0x793ec6: file geqo_main.c, line 86.
(gdb) c
Continuing.

Breakpoint 1, geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:86
86    int     edge_failures = 0;

輸入?yún)?shù):root為優(yōu)化器信息,一共有13個參與連接的關(guān)系,initial_rels是13個參與連接關(guān)系的鏈表

(gdb) p *initial_rels
$1 = {type = T_List, length = 13, head = 0x1f0f670, tail = 0x1f0f888}

初始化遺傳算法私有數(shù)據(jù)

86    int     edge_failures = 0;
(gdb) n
97    root->join_search_private = (void *) &private;
(gdb) 
98    private.initial_rels = initial_rels;

設(shè)置種子值

(gdb) n
101   geqo_set_seed(root, Geqo_seed);

計算種群大小/迭代代數(shù)

104   pool_size = gimme_pool_size(number_of_rels);
(gdb) p Geqo_seed
$2 = 0
(gdb) n
105   number_generations = gimme_number_generations(pool_size);
(gdb) p pool_size
$6 = 250
(gdb) n
111   pool = alloc_pool(root, pool_size, number_of_rels);
(gdb) p number_generations
$7 = 250

隨機初始化種群,pool->data數(shù)組存儲了組成該種群的染色體

(gdb) n
114   random_init_pool(root, pool);
(gdb) n
117   sort_pool(root, pool);    /* we have to do it only one time, since all
(gdb) p *pool
$20 = {data = 0x1f0f8d8, size = 250, string_length = 13}
(gdb) p pool->data[0]->string
$23 = (Gene *) 0x1f108f0
(gdb) p *pool->data[0]->string
$24 = 8
(gdb) p pool->data[0].worth
$50 = 635.99087618977478
(gdb) p *pool->data[1]->string
$25 = 7
(gdb) p *pool->data[2]->string
$26 = 6
(gdb) p *pool->data[249].string
$48 = 13
(gdb) p pool->data[249].worth
$49 = 601.3463999999999
...

開始進(jìn)行迭代進(jìn)化

(gdb) n
129   momma = alloc_chromo(root, pool->string_length);
(gdb) 
130   daddy = alloc_chromo(root, pool->string_length);
(gdb) 
137   edge_table = alloc_edge_table(root, pool->string_length);
(gdb) 
178   for (generation = 0; generation < number_generations; generation++)
(gdb) p number_generations
$52 = 250

利用線性偏差(bias)函數(shù)選擇,然后交叉遺傳

181     geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
(gdb) n
185     gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);
(gdb) n
187     kid = momma;
(gdb) p *momma
$1 = {string = 0x1f30460, worth = 637.36587618977478}
(gdb) p *momma->string
$2 = 11
(gdb) p *daddy->string
$3 = 8
(gdb) p *daddy
$4 = {string = 0x1f304e0, worth = 635.57048404744364}

遍歷邊界表,計算kid的成本,把kid放到種群中,進(jìn)一步進(jìn)化

(gdb) 
216     kid->worth = geqo_eval(root, kid->string, pool->string_length);
(gdb) p *kid
$5 = {string = 0x1f30460, worth = 637.36587618977478}
(gdb) n
219     spread_chromo(root, kid, pool);
(gdb) p *kid
$6 = {string = 0x1f30460, worth = 663.22435850797251}
(gdb) n
178   for (generation = 0; generation < number_generations; generation++)

下面考察進(jìn)化過程中的geqo_eval函數(shù),進(jìn)入該函數(shù),13個基因,tour為2

(gdb) 
216     kid->worth = geqo_eval(root, kid->string, pool->string_length);
(gdb) step
geqo_eval (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:75
75    mycontext = AllocSetContextCreate(CurrentMemoryContext,
(gdb) p *tour
$7 = 2

賦值/保存狀態(tài)

(gdb) n
78    oldcxt = MemoryContextSwitchTo(mycontext);
(gdb) 
95    savelength = list_length(root->join_rel_list);
(gdb) 
96    savehash = root->join_rel_hash;
(gdb) 
97    Assert(root->join_rel_level == NULL);
(gdb) 
99    root->join_rel_hash = NULL;

進(jìn)入geqo_eval->gimme_tree函數(shù)

(gdb) 
102   joinrel = gimme_tree(root, tour, num_gene);
(gdb) step
gimme_tree (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:165
165   GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;

鏈表clumps初始化為NULL,開始循環(huán),執(zhí)行連接操作,tour數(shù)組保存了RTE的順序

(gdb) n
180   clumps = NIL;
(gdb) 
182   for (rel_count = 0; rel_count < num_gene; rel_count++)
(gdb) n
189     cur_rel_index = (int) tour[rel_count];
(gdb) p tour[0]
$9 = 2
(gdb) p tour[1]
$10 = 12

循環(huán)添加到clumps中,直至所有的表都加入到clumps中或者無法產(chǎn)生有效的連接

(gdb) n
190     cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
(gdb) 
194     cur_clump = (Clump *) palloc(sizeof(Clump));
(gdb) 
195     cur_clump->joinrel = cur_rel;
(gdb) 
196     cur_clump->size = 1;
(gdb) 
199     clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
(gdb) 
(gdb) 
182   for (rel_count = 0; rel_count < num_gene; rel_count++)

完成循環(huán)調(diào)用

(gdb) b geqo_eval.c:218
Breakpoint 2 at 0x793bf9: file geqo_eval.c, line 218.
(gdb) c
Continuing.

Breakpoint 2, gimme_tree (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:219
219   if (list_length(clumps) != 1)

clumps鏈表中只有一個元素,該元素為13張表成功連接的訪問路徑

(gdb) p *clumps
$11 = {type = T_List, length = 1, head = 0x1ea2e20, tail = 0x1ea2e20}
$12 = {joinrel = 0x1ee4ef8, size = 13}
(gdb) p *((Clump *)clumps->head->data.ptr_value)->joinrel
$13 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x1ee34b0, rows = 100, consider_startup = false, 
  consider_param_startup = false, consider_parallel = true, reltarget = 0x1ee5110, pathlist = 0x1ee5a78, ppilist = 0x0, 
  partial_pathlist = 0x0, cheapest_startup_path = 0x1ee5ee0, cheapest_total_path = 0x1ee5ee0, cheapest_unique_path = 0x0, 
  cheapest_parameterized_paths = 0x1ee5fa0, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0, 
  reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0, 
  lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, 
  subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = -1, serverid = 0, userid = 0, useridiscurrent = false, 
  fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0, 
  baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 4294967295, joininfo = 0x0, 
  has_eclass_joins = false, consider_partitionwise_join = false, top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, 
  boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, partexprs = 0x0, nullable_partexprs = 0x0, 
  partitioned_child_rels = 0x0}

geqo_eval->gimme_tree函數(shù)返回

(gdb) n
222   return ((Clump *) linitial(clumps))->joinrel;

回到geqo_eval函數(shù),設(shè)置適應(yīng)度,還原現(xiàn)場等

(gdb) 
113     Path     *best_path = joinrel->cheapest_total_path;
(gdb) n
115     fitness = best_path->total_cost;
(gdb) 
124   root->join_rel_list = list_truncate(root->join_rel_list,
(gdb) 
126   root->join_rel_hash = savehash;
(gdb) 
129   MemoryContextSwitchTo(oldcxt);
(gdb) 
130   MemoryContextDelete(mycontext);
(gdb) 
132   return fitness;
(gdb) p fitness
$14 = 680.71399161308523

回到函數(shù)geqo,繼續(xù)迭代

geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:219
219     spread_chromo(root, kid, pool);
(gdb) 
178   for (generation = 0; generation < number_generations; generation++)

完成循環(huán)迭代

(gdb) b geqo_main.c:229
Breakpoint 3 at 0x79407a: file geqo_main.c, line 229.
(gdb) c
Continuing.
Breakpoint 3, geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:260
260   best_tour = (Gene *) pool->data[0].string;

最佳的訪問節(jié)點路徑(存儲在best_tour數(shù)組中)

(gdb) p best_tour[0]
$17 = 2
(gdb) p best_tour[1]
$18 = 7
(gdb) p best_tour[12]
$19 = 3
(gdb) p best_tour[13]

最佳的最終結(jié)果關(guān)系

(gdb) p *best_rel
$21 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x1f3d098, rows = 1, consider_startup = false, 
  consider_param_startup = false, consider_parallel = true, reltarget = 0x1f3d7e0, pathlist = 0x1f3e148, ppilist = 0x0, 
  partial_pathlist = 0x0, cheapest_startup_path = 0x1f3e550, cheapest_total_path = 0x1f3e550, cheapest_unique_path = 0x0, 
  cheapest_parameterized_paths = 0x1f3e670, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0, 
  reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0, 
  lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, 
  subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = -1, serverid = 0, userid = 0, useridiscurrent = false, 
  fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0, 
  baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 4294967295, joininfo = 0x0, 
  has_eclass_joins = false, consider_partitionwise_join = false, top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, 
  boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, partexprs = 0x0, nullable_partexprs = 0x0, 
  partitioned_child_rels = 0x0}

清理現(xiàn)場,并返回

(gdb) n
274   free_chromo(root, daddy);
(gdb) 
277   free_edge_table(root, edge_table);
(gdb) 
294   free_pool(root, pool);
(gdb) 
297   root->join_search_private = NULL;
(gdb) 
299   return best_rel;
(gdb) 
300 }

DONE!

四、參考資料

allpaths.c
cost.h
costsize.c
PG Document:Query Planning
十分鐘搞懂遺傳算法
你和遺傳算法的距離也許只差這一文
智能優(yōu)化方法及其應(yīng)用-遺傳算法

向AI問一下細(xì)節(jié)

PostgreSQL 源碼解讀（72）- 查詢語句#57（make_one_rel函數(shù)#22-...

一、數(shù)據(jù)結(jié)構(gòu)

二、源碼解讀

三、跟蹤分析

四、參考資料

猜你喜歡

最新資訊

相關(guān)推薦

相關(guān)標(biāo)簽

四、參考資料