https://www.douban.com/note/275544555/
題目:給出N個(gè)無序的數(shù)玄呛,然后找出其中最大的k個(gè)數(shù)
解題思路:
?????????首先測(cè)試數(shù)據(jù)有可能會(huì)有一億個(gè)數(shù)件缸,數(shù)據(jù)量特別的大基括,數(shù)據(jù)庫(kù)不可能存儲(chǔ)這么多的數(shù)據(jù)杨赤。如果直接sort排序,NlogN時(shí)間復(fù)雜度實(shí)在是太高句葵,大于10^9。我們可以考慮對(duì)數(shù)據(jù)進(jìn)行分塊讀取兢仰,每次讀取的數(shù)據(jù)塊大小應(yīng)大于k乍丈。
?????????不如先假設(shè)第一次讀取的數(shù)據(jù)塊前k個(gè)數(shù)最大,然后把k個(gè)數(shù)建成最小二叉堆把将。然后從第k+1個(gè)數(shù)開始轻专,每個(gè)數(shù)都與堆頂?shù)臄?shù)值進(jìn)行比較,如果數(shù)字i大于堆頂則把堆頂?shù)脑氐脑靥鎿Q成i察蹲,再調(diào)整一次堆请垛。最后讀取完數(shù)據(jù)之后,這個(gè)二叉堆里面的元素就是從小到大排序好的最大k個(gè)數(shù)洽议。
時(shí)間復(fù)雜度:O(NlogK)
空間復(fù)雜度:O(K)
證明過程:
?????????為什么求最大的k個(gè)用的不是最大堆宗收,而是最小堆?最大堆堆頂?shù)脑厥亲畲蟮难切郑碌淖訕湓絹碓叫』旎袾個(gè)數(shù)建成最大堆,那么堆頂往下的k個(gè)數(shù)就是最大的k個(gè)數(shù)审胚。但是時(shí)間復(fù)雜度O(NlogN)和空間復(fù)雜度O(N)太高匈勋!
?????????排序時(shí)間復(fù)雜度很高,是因?yàn)檫M(jìn)行了很多沒有用的判斷膳叨,我們只需要取最大的k個(gè)數(shù)洽洁,而排序則把N個(gè)數(shù)都從小到大排序好了。建立一個(gè)k個(gè)數(shù)的最小堆菲嘴,假設(shè)堆里面的元素是最大的饿自,當(dāng)然只是假設(shè)碎浇。如果從M+1到N這些數(shù)只要有數(shù)大于最小堆堆頂?shù)臄?shù),那么假設(shè)就不成立璃俗,堆頂那個(gè)數(shù)就不符合奴璃,自然把它去掉,把新的數(shù)加進(jìn)來城豁,再重新調(diào)整堆苟穆,使得堆頂?shù)脑刈钚 ?/p>
?????????為什么要用最小堆呢?因?yàn)槊看尾檎疫@k個(gè)數(shù)里面的最小的那個(gè)數(shù)就是堆頂唱星,時(shí)間復(fù)雜度是O(1)雳旅。如果直接用數(shù)組來存儲(chǔ)這k個(gè)數(shù),雖然查找的時(shí)間復(fù)雜度是logN间聊,但是當(dāng)把這個(gè)數(shù)插入數(shù)組的時(shí)候攒盈,數(shù)組比它小其他元素還需要往前平移,所以時(shí)間復(fù)雜度遠(yuǎn)遠(yuǎn)大于logN哎榴。由于每次調(diào)整堆的時(shí)間復(fù)雜度是logN型豁。所以最小堆的做法的時(shí)間復(fù)雜度是
O(NlogK),而空間復(fù)雜度只有O(K)尚蝌。
代碼1? C++的STL庫(kù)優(yōu)先隊(duì)列實(shí)現(xiàn)二叉堆:
#include
#include
#include
#include
#include
using namespace std;
struct cmp{
???bool operator ()(int a,int b)
???{
???????return a>b;
???}
};
#define MAX 11000
int a[MAX];
using namespace std;
priority_queue,cmp>q;
int main()
{
???int n,i,k,m,top;
???scanf("%d%d",&n,&m);
?????for(i=1;i<=n;i++)
?????{
??????????scanf("%d",&k);
??????????if(i<=m)? //前m個(gè)數(shù)入 隊(duì)列
??????????{
???????????????q.push(k);
???????????????if(i==m)? //紀(jì)錄前m個(gè)數(shù)中最小的數(shù)
????????????????????top=q.top();
??????????}
??????????else
??????????{
???????????????if(k>top)? //如果新加入的數(shù)大于隊(duì)列中最小的數(shù)則出隊(duì)
???????????????{
????????????????????q.pop();
????????????????????q.push(k);
????????????????????top=q.top();
???????????????}
??????????}
?????}
?????k=0;
?????while(!q.empty())? //這樣處理是為了最后一個(gè)數(shù)打印時(shí)沒有空格
?????{
??????????a[k++]=q.top();
??????????q.pop();
?????}
?????for(i=0;i
?????{
??????????printf("%d",a[i]);
??????????if(i==k-1)
???????????????printf("\n");
??????????else
???????????????printf(" ");
?????}
???return 0;
}
代碼2 (數(shù)組實(shí)現(xiàn)堆):
#include
#define MAX 10001
int a[MAX];
void HeapAdjust(int R[],int s,int t)? //篩選函數(shù)1
{
???int i,j,temp;
???temp=R[s];
???i=s;
???for(j=2*i;j<=t;j=2*j)
???{
???????if(j
???????????j++;
???????if(temp>R[j]) break;
???????R[i]=R[j];
???????i=j;
???}
???R[i]=temp;
}
void HeapSort(int R[],int n)? //堆排
{
???int i;
???for(i=n/2;i>0;i--)
???{
???????HeapAdjust(R,i,n);
???}
???for(i=n;i>1;i--)
???{
???????R[1]^=R[i];
???????R[i]^=R[1];
???????R[1]^=R[i];
???????HeapAdjust(R,1,i-1);
???}
}
void HeapAdjust2(int R[],int s,int t)? //篩選函數(shù)2
{
???int i,j,temp;
???temp=R[s];
???i=s;
???for(j=2*i;j<=t;j=2*j)
???{
???????if(jR[j+1])
???????????j++;
???????if(temp
???????R[i]=R[j];
???????i=j;
???}
???R[i]=temp;
}
int main()
{
???int i,k,n,m;
???scanf("%d%d",&n,&m);
???for(i=1;i<=m;i++)
???{
???????scanf("%d",&a[i]);
???}
???HeapSort(a,m);
???for(i=m+1;i<=n;i++)
???{
???????scanf("%d",&k);
???????if(k>a[1])? ? ? ? //新元素大于堆中最小元素則加入堆
???????{
???????????a[1]=k;
???????????HeapAdjust2(a,1,m);? //從根節(jié)點(diǎn)開始重新篩選一次
???????}
???}
???HeapSort(a,m);
???for(i=1;i<=m;i++)
???{
???????printf("%d",a[i]);
???????if(i==m)
???????????printf("\n");
???????else
???????????printf(" ");
???}
???return 0;
}