zoukankan      html  css  js  c++  java
  • 简易kmeans-c++版本

    typedef double dtype;
    主要接口:
    void Kmeans(const vector<vector<dtype> > &d,int k,string distype,vector<vector<int> > &kset,map<int,int> &category,vector<vector<dtype> > &kcenter)
    
    参数:
    vector<vector<dtype> > &d: 数据矩阵mxn
    int k:聚类类别数目
    string distype:距离类别,"sqeuclidean"表示欧式距离,"cosine"表示余弦距离,"hamming"表示海明距
    vector<vector<int> > &kset:初始化K个中心
    map<int,int> &category:返回的类别号
    vector<vector<dtype> > &kcenter:返回的类别中心
    
    double SSE(const vector<vector<dtype> > &matrix,const vector<vector<dtype> > &kcenter,map<int,int> &category,string distype,map<int,double> &EverySSE)
    返回总的误差平方和,map<int,double> &EverySSE返回每个类的误差平方和
    #include<iostream>
    #include<vector>
    #include<map>
    #include<algorithm>
    #include<cmath>
    #include<fstream>
    #include<string>
    #include<assert.h>
    #include<stdlib.h>
    #include<time.h> 
    int const N = 1000;
    int const eps = 1e-3;
    using namespace std;
    typedef double dtype;
    typedef vector<int>::size_type vt;
    double disfun(const vector<dtype> &a,const vector<dtype> &b,string distype)
    {
        double dis_sum=0;
        if(distype=="sqeuclidean")
        {
            for(vt i=0;i<a.size();++i)  
                dis_sum+=pow(a[i]-b[i],2);
        }
        if(distype=="cosine")
        {
            double len_a=0,len_b=0,in_product=0;
            for(vt i=0;i<a.size();++i)
            {
                len_a+=pow(a[i],2);
                len_b+=pow(b[i],2);
                in_product+=a[i]*b[i];
            }   
            dis_sum = 1-in_product*1.0/(sqrt(len_a)*sqrt(len_b));       
        }
        if(distype=="hamming")
        {
            int count=0;
            for(vt i=0;i<a.size();++i)
            {
                if(a[i]!=b[i])count++;  
            }
            dis_sum = count*1.0/a.size();
        }
        return dis_sum;
    }
    vector<int> random_chooseK(const int n,int k)
    {
        map<int,int> mvis;
        vector<int> vec;
        int i=0;
        cout<<"n:"<<n<<"k:"<<k<<endl;
        cout<<"random choose K ok"<<endl;
        srand((unsigned)time(NULL));
        while(i<k)
        {
            int tmp=rand()%(n);
            if(!mvis.count(tmp)){
                mvis[tmp]=1;
                vec.push_back(tmp);
                i++;
            }       
        }
        for(int &i:vec)cout<<i<<"->";
        cout<<"random choose K over"<<endl;
        return vec;     
    } 
    bool nochange(const vector<vector<dtype> > &kcenter,const vector<vector<dtype> > &prekcenter)
    {
        for(int i=0;i<kcenter.size();++i)
        {
            string st("sqeuclidean");
            if(disfun(kcenter[i],prekcenter[i],st)>eps)return false;
        }   
        return true;
    }
    vector<dtype> sum_vector(const vector<dtype> &a,const vector<dtype> &b)
    {
        vector<dtype> avec;
        assert(a.size()==b.size());
        for(int i=0;i<a.size();++i)
        {
            avec.push_back(a[i]+b[i]);
        }
        return avec;
    }
    void setZero(vector<dtype> &vec)
    {
        for(int i=0;i<vec.size();++i)
        {
            vec[i]=0;
        }   
    }
    void AlterKcenter(const vector<vector<dtype> > &d,vector<vector<dtype> > &kcenter,const vector<vector<int> > &kset,string distype)
    {
        for(int i=0;i<kcenter.size();++i)
        {
            setZero(kcenter[i]);
            cout<<i<<":"<<kset[i].size()<<endl;
            for(int j=0;j<kset[i].size();++j)
            {
                kcenter[i] = sum_vector(d[kset[i][j]],kcenter[i]);  
            }
            for(int p=0;p<kcenter[i].size();++p)
            {
                if(distype=="hamming")
                    kcenter[i][p]=int(kcenter[i][p]*1.0/kset[i].size()+0.5);
                else kcenter[i][p]=kcenter[i][p]*1.0/kset[i].size();
            }
        }
    }
    void Kmeans(const vector<vector<dtype> > &d,int k,string distype,vector<vector<int> > &kset,map<int,int> &category,vector<vector<dtype> > &kcenter)
    {
        vector<int> kgram=random_chooseK(d.size(),k);
        vector<vector<dtype> > prekcenter;
        //vector<vector<int> > kset; 
        for(int i=0;i<k;++i){
            vector<int> vitmp;
            kset.push_back(vitmp);
        }
        cout<<"kset"<<endl;
        for(vt i=0;i<kgram.size();++i)kcenter.push_back(d[kgram[i]]);
        cout<<"kset over"<<endl;
        bool flag =false;
        int count=0;
        while(!flag)
        {
            for(int x=0;x<k;++x)kset[x].clear();
            for(int j=0;j<d.size();++j)
            {
                double min_dis= 1e308;
                int index = -1;
                for(int m =0;m <k;++m)
                {
                    double dis=disfun(d[j],kcenter[m],distype);
                    if(dis<min_dis)
                    {
                        min_dis=dis;
                        index = m;
                    }
                }
                kset[index].push_back(j);
                category[j]=index;  
            }
            prekcenter = kcenter;
            count++;
            cout<<"alterkcenter begin"<<endl; 
            AlterKcenter(d,kcenter,kset,distype);
            cout<<"alterkcenter finished"<<endl; 
            flag = nochange(kcenter,prekcenter);
            cout<<"µÚ"<<count<<"´Îµü´ú"<<endl; 
        }                       
    }
    void printmatrix(const vector<vector<dtype> > &matrix)
    {
        for(int i=0;i<matrix.size();++i)
        {
            for(int j=0;j<matrix[i].size();++j)
                cout<<matrix[i][j]<<"  ";
            cout<<endl;     
        }
    }
    double SSE(const vector<vector<dtype> > &matrix,const vector<vector<dtype> > &kcenter,map<int,int> &category,string distype,map<int,double> &EverySSE)
    {
        double TotalSSE=0;
        //map<int,double> EverySSE;
        for(int i=0;i<matrix.size();++i)
        {
            double tmpsse=0;
            tmpsse=disfun(matrix[i],kcenter[category[i]],distype);
            if(EverySSE.count(category[i]))
                EverySSE[category[i]]+=tmpsse;
            else EverySSE[category[i]]=tmpsse;
            TotalSSE+=tmpsse;
        }   
        return TotalSSE;            
    }
    void saveresult(const vector<vector<dtype> > &matrix,vector<vector<int> > &kset)
    {
        fstream f("result.txt",ios::out);
        map<int,int> category;
        cout<<"kset size:"<<kset.size()<<endl;
        f<<"total:"<<endl;
        for(int i=0;i<kset.size();++i){
            f<<"µÚ"<<i<<"Àà:"<<kset[i].size()<<endl; 
        }
        for(int i=0;i<kset.size();++i)
        {
            cout<<kset[i].size()<<endl;
            for(int j=0;j<kset[i].size();++j)
                category[kset[i][j]]=i;
        }
        for(int i=0;i<matrix.size();++i)
        {
            f<<i<<": "<<category[i]<<endl; 
        } 
        for(int i=0;i<matrix.size();++i)
        {
            f<<"----------------------------------"<<category[i]<<"---------------------------------"<<endl;
            for(int j=0;j<matrix[i].size();++j)
                f<<matrix[i][j]<<"  ";
            f<<endl;    
        }
        f.close();
    
    }
    int main()
    {
        vector<vector<dtype> > matrix;
        int all=0;
        srand((unsigned)time(0));
        while(all<600)
        {
            vector<dtype> row;
            for(int n=1;n<=600;++n)
            {
                int tmp=rand()%(2);
                if(rand()/double(RAND_MAX)>0.6)tmp=0;
                //cout<<tmp<<endl;
                row.push_back(tmp); 
            }
            matrix.push_back(row);
            all++;
        }
        //printmatrix(matrix);
        vector<vector<int> > kset;
        vector<vector<dtype> > kcenter;
        map<int,int> category;
        string st("hamming");
        Kmeans(matrix,30,st,kset,category,kcenter);
        saveresult(matrix,kset);
        map<int,double> EverySSE;
        cout<<SSE(matrix,kcenter,category,st,EverySSE)<<endl;
        map<int,double>:: const_iterator it=EverySSE.begin();
        while(it!=EverySSE.end()){
            cout<<it->first<<":"<<it->second<<endl; 
            it++;
        }
        system("pause");
        return 0;
    }
  • 相关阅读:
    第15次Scrum会议(10/27)【欢迎来怼】
    C语言--第三周作业评分和总结(5班)
    例行报告(20171020-20171025)
    MongoDB主从复制
    副本集
    MongoDB索引
    聚合框架
    MongoDB查询操作
    MongoDB增删改
    MongoDB基本安装
  • 原文地址:https://www.cnblogs.com/freeopen/p/5483033.html
Copyright © 2011-2022 走看看