Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 15090 | Accepted: 6696 |
Description
As an IBM researcher, you have been tasked with writing a program that will find commonalities amongst given snippets of DNA that can be correlated with individual survey information to identify new genetic markers.
A DNA base sequence is noted by listing the nitrogen bases in the order in which they are found in the molecule. There are four bases: adenine (A), thymine (T), guanine (G), and cytosine (C). A 6-base DNA sequence could be represented as TAGACC.
Given a set of DNA base sequences, determine the longest series of bases that occurs in all of the sequences.
Input
- A single positive integer m (2 <= m <= 10) indicating the number of base sequences in this dataset.
- m lines each containing a single base sequence consisting of 60 bases.
Output
Sample Input
3 2 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 3 GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA 3 CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
Sample Output
no significant commonalities AGATAC CATCATCAT
題意就是求最多三個字符串的最長的公共子序列的問題。
考慮用後綴數組的思維求解,從第一個字符開始依次向後移動,取後綴數組, 然後拿到後兩個字符串中進行比較,取兩者匹配長度的最小值,最後取所有後綴數組中匹配長度的最大值作爲結果。
一共就只有最多三個字符串,且每個字符串都只有60個字符,所以這麼做是完全可以的。
代碼如下:
/*************************************************************************
> File Name: Blue_Jeans.cpp
> Author: Zhanghaoran
> Mail: [email protected]
> Created Time: Thu 26 Nov 2015 06:21:10 PM CST
************************************************************************/
#include <iostream>
#include <algorithm>
#include <cstring>
#include <cstdio>
#include <cstdlib>
using namespace std;
char str[10][150];
int nexti[150];
char cmp[150];
int len;
int T;
int n;
int ans;
void preKMP(){
int i, j;
i = 0;
j = nexti[0] = -1;
while(i < len){
if(j != -1 && cmp[i] != cmp[j]){
j = nexti[j];
}
else if(cmp[++ i] == cmp[++ j])
nexti[i] = nexti[j];
else
nexti[i] = j;
}
//for(int i = 0; i <= m; i ++){
// cout << kmpnext[i] << " ";
//}
//cout << endl;
}
void Kmp_Count(){
preKMP();
int i, j, res;
ans = 110;
for(int temp = 1; temp < n; temp ++){
i = 0;
j = 0;
res = 0;
while(i < 60 && j < len){
if(j == -1 || str[temp][i] == cmp[j]){
i ++;
j ++;
}
else
j = nexti[j];
if(j > res)
res = j;
}
if(res < ans)
ans = res;
}
}
int main(void){
scanf("%d", &T);
char ss[150];
while(T --){
scanf("%d", &n);
for(int i = 0; i < n; i ++)
scanf("%s", str[i]);
int res = 0;
for(int i = 0; i <= 57; i ++){
strcpy(cmp, str[0] + i);
len = 60 - i;
Kmp_Count();
if(res < ans){
res = ans;
strncpy(ss, str[0] + i, res);
ss[res] = '\0';
}
else if(ans == res){
char tt[150];
strncpy(tt, str[0] + i, res);
tt[res] = '\0';
if(strcmp(tt, ss) < 0)
strcpy(ss, tt);
}
}
if(res >= 3)
cout << ss << endl;
else
cout << "no significant commonalities" << endl;
}
return 0;
}