題目
中位數計算。選擇你熟悉的編程語言實現教材P31上公式(2.3)的算法,用於估算大量數據的中位數。下載測試數據文件(1百萬條數據,中位數爲49899),選擇不同的區間大小(width),考察計算結果與真實中位數的誤差。
公式
代碼實現
matlab
clear all;
clc;
N = 1000000;
width = 100; %區間長度
[a] = textread('rand1m.txt','%d');
%此處可直接使用median(a)求出中位數49899
max = max(a); %用其他語言的話,最值可用冒泡排序求出
min = min(a);
num = (max - min) / width;
num = round(num); %向上取整
count = zeros(num,1); %保存各區間的個數
%計數
for i = 1:N
count(floor((a(i)-min)/width+1)) = count(floor((a(i)-min)/width+1)) + 1;
end
%確定中位數所在的區間位置index
%計算該區間以前的頻數和sigma
sigma = 0;
index = 1;
while N/2-sigma>0
sigma = sigma + count(index);
index = index +1;
end
index = index -1;
sigma = sigma - count(index);
%根據公式計算中位數
L1 = min + width*(index-1);
median = L1 +((N/2-sigma)/count(index)) * width;
median = fix(median);
C
#include<stdio.h>
const int width=100; //區間長度
void main(){
FILE *fp1,*fp2;
int a,max=2000,min=2000,N=0,median;
if( (fp1 = fopen("rand1m.txt","r"))==NULL )
printf("error");
//取出最大值與最小值
while(fscanf(fp1,"%d%*[^0-9]",&a)>0){
if(a>=max)
max = a;
if(a<=min)
min = a;
N++;
}
fclose(fp1);
int num = (max - min) / width +1; //區間個數
int* count = new int[num]; //區間頻數
for(int i=0; i<num; ++i)
count[i]=0;
if( (fp2 = fopen("rand1m.txt","r"))==NULL )
printf("error");
//計數
while(fscanf(fp2,"%d%*[^0-9]",&a)>0){
count[(a-min)/width]++;
}
fclose(fp2);
//確定區間index 統計index前所有區間頻數和sigma
int sigma = 0;
int index = 0;
while(N/2 - sigma >0){
sigma += count[index];
index ++;
}
index --;
sigma -= count[index];
//套用公式
int L1 = min + width*index; //區間下限
double rate = (1.0) * (N/2-sigma) / count[index];
median = L1 + rate * width;
printf("L1=%d\nmedian=%d\n",L1,median);
}