C++聚類算法中的聚類趨勢(shì)預(yù)測(cè)

發(fā)布時(shí)間：2024-11-11 13:54:11 來(lái)源：億速云閱讀：79 作者：小樊欄目：編程語(yǔ)言

在C++中實(shí)現(xiàn)聚類算法時(shí)，聚類趨勢(shì)預(yù)測(cè)是一個(gè)重要的任務(wù)。聚類趨勢(shì)預(yù)測(cè)可以幫助我們了解數(shù)據(jù)的分布和潛在結(jié)構(gòu)，從而為后續(xù)的數(shù)據(jù)分析和決策提供依據(jù)。以下是一些常用的聚類和趨勢(shì)預(yù)測(cè)算法，以及如何在C++中實(shí)現(xiàn)它們的基本思路：

1. K-means聚類

K-means是一種廣泛使用的聚類算法，它將數(shù)據(jù)劃分為K個(gè)簇，每個(gè)簇的中心是該簇內(nèi)所有數(shù)據(jù)點(diǎn)的均值。

基本步驟：

初始化：隨機(jī)選擇K個(gè)數(shù)據(jù)點(diǎn)作為初始質(zhì)心。
分配數(shù)據(jù)點(diǎn)到簇：將每個(gè)數(shù)據(jù)點(diǎn)分配到距離最近的質(zhì)心所在的簇。
更新質(zhì)心：計(jì)算每個(gè)簇內(nèi)所有數(shù)據(jù)點(diǎn)的均值，更新質(zhì)心的位置。
迭代：重復(fù)步驟2和3，直到質(zhì)心不再發(fā)生變化或達(dá)到最大迭代次數(shù)。

C++實(shí)現(xiàn)示例：

#include <iostream>
#include <vector>
#include <cmath>
#include <random>

using namespace std;

struct Point {
    double x, y;
};

double distance(const Point& a, const Point& b) {
    return sqrt((a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y));
}

vector<Point> kmeans(const vector<Point>& points, int k, int max_iterations = 100) {
    vector<Point> centroids(k);
    random_device rd;
    mt19937 gen(rd());
    uniform_int_distribution<> dis(0, points.size() - 1);

    // Initialize centroids
    for (int i = 0; i < k; ++i) {
        centroids[i] = points[dis(gen)];
    }

    for (int iter = 0; iter < max_iterations; ++iter) {
        vector<Point> clusters(k);
        vector<int> cluster_counts(k, 0);

        // Assign points to clusters
        for (const auto& point : points) {
            double min_dist = DBL_MAX;
            int min_cluster = -1;
            for (int i = 0; i < k; ++i) {
                double dist = distance(point, centroids[i]);
                if (dist < min_dist) {
                    min_dist = dist;
                    min_cluster = i;
                }
            }
            clusters[min_cluster].push_back(point);
            cluster_counts[min_cluster]++;
        }

        // Update centroids
        for (int i = 0; i < k; ++i) {
            if (cluster_counts[i] > 0) {
                Point centroid = {0, 0};
                for (const auto& point : clusters[i]) {
                    centroid.x += point.x;
                    centroid.y += point.y;
                }
                centroid.x /= cluster_counts[i];
                centroid.y /= cluster_counts[i];
                centroids[i] = centroid;
            }
        }

        // Check for convergence
        bool converged = true;
        for (int i = 0; i < k; ++i) {
            if (cluster_counts[i] > 0) {
                Point prev_centroid = centroids[i];
                for (const auto& point : clusters[i]) {
                    double dist = distance(point, prev_centroid);
                    if (dist > 1e-6) { // Arbitrary small threshold
                        converged = false;
                        break;
                    }
                }
                if (!converged) break;
                centroids[i] = prev_centroid; // Revert to previous centroid for this iteration
            }
        }

        if (converged) break;
    }

    return centroids;
}

int main() {
    vector<Point> points = {{1, 2}, {1, 4}, {1, 0}, {10, 2}, {10, 4}, {10, 0}};
    int k = 2;
    vector<Point> centroids = kmeans(points, k);

    for (const auto& centroid : centroids) {
        cout << "Centroid: (" << centroid.x << ", " << centroid.y << ")" << endl;
    }

    return 0;
}

2. DBSCAN聚類

DBSCAN（Density-Based Spatial Clustering of Applications with Noise）是一種基于密度的聚類算法，它能夠發(fā)現(xiàn)任意形狀的簇，并識(shí)別噪聲點(diǎn)。

基本步驟：

初始化：選擇一個(gè)點(diǎn)作為核心點(diǎn)，并設(shè)置一個(gè)鄰域半徑和最小點(diǎn)數(shù)。
擴(kuò)展簇：從核心點(diǎn)開(kāi)始，不斷擴(kuò)展簇，直到?jīng)]有更多的點(diǎn)可以加入。
標(biāo)記噪聲點(diǎn)：未被分配到任何簇的點(diǎn)被認(rèn)為是噪聲點(diǎn)。

C++實(shí)現(xiàn)示例：

#include <iostream>
#include <vector>
#include <queue>
#include <cmath>
#include <random>

using namespace std;

struct Point {
    double x, y;
};

double distance(const Point& a, const Point& b) {
    return sqrt((a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y));
}

vector<Point> dbscan(const vector<Point>& points, double eps, int min_samples) {
    vector<Point> clusters;
    vector<bool> visited(points.size(), false);
    random_device rd;
    mt19937 gen(rd());
    uniform_int_distribution<> dis(0, points.size() - 1);

    for (int i = 0; i < points.size(); ++i) {
        if (!visited[i]) {
            vector<Point> cluster;
            queue<int> q;
            q.push(i);
            visited[i] = true;

            while (!q.empty()) {
                int point_index = q.front();
                q.pop();
                cluster.push_back(points[point_index]);

                for (const auto& neighbor : points) {
                    if (!visited[neighbor.first] && distance(points[point_index], neighbor) <= eps) {
                        visited[neighbor.first] = true;
                        q.push(neighbor.first);
                    }
                }
            }

            if (cluster.size() >= min_samples) {
                clusters.push_back(cluster);
            }
        }
    }

    return clusters;
}

int main() {
    vector<Point> points = {{1, 2}, {1, 4}, {1, 0}, {10, 2}, {10, 4}, {10, 0}};
    double eps = 2;
    int min_samples = 2;
    vector<Point> clusters = dbscan(points, eps, min_samples);

    for (const auto& cluster : clusters) {
        cout << "Cluster:" << endl;
        for (const auto& point : cluster) {
            cout << "(" << point.x << ", " << point.y << ")" << endl;
        }
    }

    return 0;
}

3. 高斯混合模型（GMM）

高斯混合模型是一種基于概率的聚類方法，它假設(shè)數(shù)據(jù)是由多個(gè)高斯分布生成的。

基本步驟：

估計(jì)參數(shù)：通過(guò)EM算法（Expectation Maximization）估計(jì)每個(gè)高斯分布的參數(shù)（均值、協(xié)方差和混合系數(shù)）。
聚類：根據(jù)每個(gè)數(shù)據(jù)點(diǎn)屬于各個(gè)高斯分布的概率進(jìn)行聚類。

C++實(shí)現(xiàn)示例：

#include <iostream>
#include <vector>
#include <cmath>
#include <random>

using namespace std;

struct Point {
    double x, y;
};

double multivariate_normal_pdf(const Point& x, const vector<Point>& mean, const vector<vector<double>>& covariance) {
    double exponent = 0.0;
    for (size_t i = 0; i < x.size(); ++i) {
        exponent += pow(x[i] - mean[i], 2) / covariance[i][i];
    }
    return exp(-0.5 * exponent) / sqrt((2 * M_PI) * pow(covariance[0][0], covariance.size()));
}

vector<vector<Point>> gmm(const vector<Point>& points, int num_components, double max_iterations = 100) {
    vector<vector<Point>> clusters(num_components);
    vector<Point> means(num_components);
    vector<vector<double>> covariances(num_components);
    vector<double> weights(num_components, 1.0 / num_components);

    // Initialize means and covariances randomly
    random_device rd;
    mt19937 gen(rd());
    uniform_int_distribution<> dis(0, points.size() - 1);

    for (int i = 0; i < num_components; ++i) {
        int index = dis(gen);
        means[i] = points[index];
        covariances[i] = {{1, 0}, {0, 1}}; // Identity matrix
    }

    for (int iter = 0; iter < max_iterations; ++iter) {
        vector<double> log_likelihood(num_components, 0.0);

        // E-step: Compute posterior probabilities
        for (size_t i = 0; i < points.size(); ++i) {
            double max_prob = -1.0;
            int max_cluster = -1;
            for (int j = 0; j < num_components; ++j) {
                double prob = multivariate_normal_pdf(points[i], means[j], covariances[j]) * weights[j];
                if (prob > max_prob) {
                    max_prob = prob;
                    max_cluster = j;
                }
            }
            log_likelihood[max_cluster] += log(max_prob);
        }

        // M-step: Update parameters
        double total_log_likelihood = accumulate(log_likelihood.begin(), log_likelihood.end(), 0.0);
        for (int j = 0; j < num_components; ++j) {
            weights[j] = exp(log_likelihood[j] - total_log_likelihood);
            weights[j] /= accumulate(weights.begin(), weights.end(), 0.0);

            Point sum = {0, 0};
            for (size_t i = 0; i < points.size(); ++i) {
                if (j == max_cluster) {
                    sum.x += points[i].x;
                    sum.y += points[i].y;
                }
            }
            means[j] = sum / points.size();

            vector<double> sum_cov(4, 0.0);
            for (size_t i = 0; i < points.size(); ++i) {
                if (j == max_cluster) {
                    sum_cov[0] += pow(points[i].x - means[j].x, 2);
                    sum_cov[1] += pow(points[i].y - means[j].y, 2);
                    sum_cov[2] += (points[i].x - means[j].x) * (points[i].y - means[j].y);
                    sum_cov[3] += pow(points[i].x - means[j].x, 2) * pow(points[i].y - means[j].y, 2);
                }
            }
            double n = points.size();
            covariances[j][0] = sum_cov[0] / n;
            covariances[j][1] = sum_cov[1] / n;
            covariances[j][2] = sum_cov[2] / n;
            covariances[j][3] = sum_cov[3] / n;
        }
    }

    // Assign points to clusters based on posterior probabilities
    for (size_t i = 0; i < points.size(); ++i) {
        double max_prob = -1.0;
        int max_cluster = -1;
        for (int j = 0; j < num_components; ++j) {
            double prob = multivariate_normal_pdf(points[i], means[j], covariances[j]) * weights[j];
            if (prob > max_prob) {
                max_prob = prob;
                max_cluster = j;
            }
        }
        clusters[max_cluster].push_back(points[i]);
    }

    return clusters;
}

int main() {
    vector<Point> points = {{1, 2}, {1, 4}, {1, 0}, {10, 2}, {10, 4}, {10, 0}};
    int num_components = 2;
    vector<vector<Point>> clusters = gmm(points, num_components);

    for (const auto& cluster : clusters) {
        cout << "Cluster:" << endl;
        for (const auto& point : cluster) {
            cout << "(" << point.x << ", " << point.y << ")" << endl;
        }
    }

    return 0;
}

這些算法只是聚類和趨勢(shì)預(yù)測(cè)的一部分方法，實(shí)際應(yīng)用中可能需要根據(jù)具體需求選擇合適的算法并進(jìn)行調(diào)整。希望這些示例能幫助你理解如何在C++中實(shí)現(xiàn)這些算法。

向AI問(wèn)一下細(xì)節(jié)

C++聚類算法中的聚類趨勢(shì)預(yù)測(cè)

1. K-means聚類

基本步驟：

C++實(shí)現(xiàn)示例：

2. DBSCAN聚類

基本步驟：

C++實(shí)現(xiàn)示例：

3. 高斯混合模型（GMM）

基本步驟：

C++實(shí)現(xiàn)示例：

猜你喜歡

最新資訊

相關(guān)推薦

相關(guān)標(biāo)簽