序列壓縮中基於Markov預測模型的的Finite Context Model源碼實現,還未實現位數非常大時的加減乘除運算,更新中…
#include "stdio.h"
#include "iostream"
#include "map"
#include <string.h>
#define K 5
using namespace std;
struct dct
{
public:
dct(){}
dct(int x,char *y)
{
num=x;
strcpy(str,y);
}
int num;
char str[K+1];
};
map<int,dct> mymap; //定義全局的鍵值對mymap
map<int,dct>::iterator it;
int index=0;
int sch1(char *input1)
{
int count1=0;
for(it=mymap.begin();it!=mymap.end();it++)
{
if(strcmp(it->second.str,input1)==0)
{
count1=it->second.num;
break;
}
}
//如果計算的到的count1的值爲0,表示字典中還沒有當前這種推導關係,則將buff數組存入字典中
if(count1==0)
{
dct item;
item.num=1;
strcpy(item.str,input1);
mymap.insert(make_pair(++index,item));
}
//如果計算得到的count1的值不爲0,表示當前推導關係在字典中找到相同的推導關係,則將字典中的計算器加1
if(count1!=0)
{
it->second.num+=1;
}
return count1;
}
int sch2(char *input2)
{
char str_input[K];
char str_space[K+1];
char str_dct[K];
int count2=0;
//把input字符串的前K個字符賦給str_input
for(int i=0;i<K;i++)
{
str_input[i]=input2[i];
}
for(it=mymap.begin();it!=mymap.end();it++)
{
strcpy(str_space,it->second.str); //把當前字典項的K+1長度的字符串存入str_space中
for(int i=0;i<K;i++)
{
str_dct[i]=str_space[i]; //將str_space的前K個字符賦給str_dct數組中
}
if(strcmp(str_dct,str_input)==0)
{
count2=count2+it->second.num;
}
}
return count2;
}
int sch3(char *input3)
{
int count3=0;
for(it=mymap.begin();it!=mymap.end();it++)
{
if(strcmp(it->second.str,input3)==0)
{
count3=it->second.num;
return count3;
}
}
return 0;
}
int main()
{
double A[2]={0,0.25};
double T[2]={0.25,0.5};
double C[2]={0.5,0.75};
double G[2]={0.75,1};
double pA=0.25;
double pT=0.25;
double pC=0.25;
double pG=0.25;
double rng[2]={0,1};
double low,range,range_low,range_high;
char buff[K+1],buff_copy[K+1];
char *buff_bottom=&buff[K];
int n_c=0,n_s_c=0,n_A_c=0,n_T_c=0,n_C_c=0,n_G_c=0;
double aofa=0.25;
FILE *f;
f=fopen("test.txt","r");
fgets(buff,K+1,f); //把輸入的前K個字符讀入buff數組,因爲此時還沒有字典
//將前K個字符進行算術編碼,各個字符的概率均爲0.25,這個過程中概率不修改
for(int i=0;i<6;i++)
{
if(buff[i]=='A')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=A[0];
range_high=A[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if (buff[i]=='T')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=T[0];
range_high=T[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if (buff[i]=='C')
{
range=rng[1]-rng[0];
low=rng[0];
range_low=C[0];
range_high=C[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else
{
range=rng[1]-rng[0];
low=rng[0];
range_low=G[0];
range_high=G[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
cout<<'['<<rng[0]<<','<<rng[1]<<']'<<endl;
}
//將當前buff數組的內容存入字典作爲字典項
dct item;
item.num=1;
strcpy(item.str,buff);
mymap.insert(make_pair(++index,item));
//將buff數組的內容整體向前移動一個單位
for(int i=0;i<K+1;i++)
{
buff[i]=buff[i+1];
}
//循環執行,將序列的下一位讀入到buff數組的最後一位,
//將當前buff數組去字典中查找,得到n_c,n_s_c,n_A_c,n_T_c,n_C_c,n_G_c的值,
//修改ATCG的概率,對buff數組的最後一位利用修改後的ATCG概率和碼區分佈進行算數編碼
while(fgets(buff_bottom,2,f))
{
//查找字典,得到n_c的值
n_c=sch2(buff);
//查找字典,得到n_A_c的值
strcpy(buff_copy,buff);
buff_copy[K]='A';
n_A_c=sch3(buff_copy);
//查找字典,得到n_T_c的值
strcpy(buff_copy,buff);
buff_copy[K]='T';
n_T_c=sch3(buff_copy);
//查找字典,得到n_C_c的值
strcpy(buff_copy,buff);
buff_copy[K]='C';
n_C_c=sch3(buff_copy);
//查找字典,得到n_G_c的值
strcpy(buff_copy,buff);
buff_copy[K]='G';
n_G_c=sch3(buff_copy);
//查找字典,得到n_s_c的值
n_s_c=sch1(buff);
//根據n_A_c,n_T_c,n_C_c,n_G_c的值,修改字符 A T C G的概率
pA=(n_A_c+aofa)/(n_c+aofa*4);
pT=(n_T_c+aofa)/(n_c+aofa*4);
pC=(n_C_c+aofa)/(n_c+aofa*4);
pG=(n_G_c+aofa)/(n_c+aofa*4);
//根據修改後的ATCG字符的概率,修改ATCG的碼區範圍
A[0]=0;
A[1]=pA;
T[0]=A[1];
T[1]=pA+pT;
C[0]=T[1];
C[1]=pA+pT+pC;
G[0]=C[1];
G[1]=1;
//根據修改後的ATCG的碼區範圍,對buff數組的第K+1位字符進行算術編碼
range=rng[1]-rng[0];
low=rng[0];
if(buff[K]=='A')
{
range_low=A[0];
range_high=A[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if(buff[K]=='T')
{
range_low=T[0];
range_high=T[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else if(buff[K]=='C')
{
range_low=C[0];
range_high=C[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
else
{
range_low=G[0];
range_high=G[1];
rng[0]=low+range*range_low;
rng[1]=low+range*range_high;
}
//輸出當前位的算數編碼值
cout<<"["<<rng[0]<<","<<"rng[1]"<<"]"<<endl;
}
}