1985年由K. A. Dill建立的模型[Dil85]。在该模型中,氨基酸分为两类:疏水(H)和亲水(P)。 最佳折叠是使相邻但非共价结合的H-H对的数量最大化的折叠,从而允许疏水效应。 这是一个例子:
下面给出了HPHHHHPPHPPH串的折叠。折叠的分数是4(见红色邻接)。
H - P P - P
- - -
H H H - P
- - -
H - H H - P
我正在尝试用c ++编写一个算法来最大化得分并返回最佳配置,但我的代码运行得不好。所以我在寻求你的帮助。
这是我的代码:
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <math.h>
using namespace std;
string EMPTY = " ";
int solve(string protein,int bestScore,unsigned int sequenceIndex, vector <vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection)
{
char allDirections[4] = {'R','D','L','U'};
if (direction=='R')
{
posY +=1;
}
else if (direction == 'D')
{
posX +=1;
}
else if (direction == 'L')
{
posY -=1;
}
else if (direction == 'U')
{
posX -=1;
}
// copy currentGrid into newGrid.
vector <vector<string> > newGrid ;
if (currentGrid[posX][posY] == EMPTY)
{
for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
{
vector<string> temp;
newGrid.insert(newGrid.begin(),temp);
for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)
{
newGrid[j].push_back(currentGrid[i][j]);
}
}
//replace the first subelement of protein.
newGrid[posX][posY] = protein[sequenceIndex];
// Add new direction to the vector pdirection.
pDirection.push_back(direction);
//str.at(i)
if (protein[sequenceIndex] == 'H')
{
if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
{
score += 1;
}
else if ( (pDirection[0] == 'U' ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
{
score += 1;
}
else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
{
score += 1;
}
else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
{
score += 1;
}
}
sequenceIndex += 1;
}
if (sequenceIndex != protein.size())
//repeat this for all directions
{
for (int i = 0; i < 4; i++)
{
direction = allDirections[i];
bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
}
}
else
{
if (score > bestScore)
{
bestScore = score;
}
else if (score == bestScore)
{
bestScore = score;
}
}
return bestScore;
}
int main()
{
unsigned int sequenceIndex = 0;
int score = 0;
int bestScore = 0;
vector<char> pDirection;
string protein = "HPPHPPH";
vector <vector<string> > currentGrid; //a*[a*[' ']] avec a=len(protein)*2-1
int posX = protein.size();
int posY = protein.size();
//int posX = ceil((protein.size()*2-1)/2));
//int posY = ceil((protein.size()*2-1)/2));
// create an empty 2D vector
for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
{
vector<string> temp;
currentGrid.insert(currentGrid.begin(),temp);
for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)
{
currentGrid[j].push_back(EMPTY);
}
}
// place the first element of protein in the middle of currentGrid
currentGrid[posX][posY] = protein.substr(0);
cout<<protein<<endl;
cout<<protein.size()<<endl;
cout<<solve(protein,bestScore,sequenceIndex, currentGrid,posX,posY,'R',score,pDirection)<<endl;
}
答案 0 :(得分:1)
网络搜索确认了我在媒体上所听到的内容 - 正如一般所述,蛋白质折叠问题很难 - http://www.brown.edu/Research/Istrail_Lab/papers/10.1.1.110.3139.pdf描述了类似于您提出的问题的近似解决方案。
答案 1 :(得分:0)
对于那些迷迷糊糊的人,我已经修复了此代码。但是,它非常缓慢,因为它正在尝试所有可能的结构。无论如何,你去了:
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <math.h>
#include <unistd.h>
using namespace std;
string EMPTY = "_";
int numfolds = 0;
void printGrid(vector<vector<string> > grid);
int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection);
void printGrid(vector<vector<string> > grid)
{
int gridsize = grid.size();
for (int i=0; i < gridsize;i++)
{
for (int j=0; j < gridsize;j++)
{
cout << grid[i][j];
}
cout << endl;
}
}
int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection)
{
char allDirections[4] = {'R','D','L','U'};
if (direction=='R')
posY +=1;
else if (direction == 'D')
posX +=1;
else if (direction == 'L')
posY -=1;
else if (direction == 'U')
posX -=1;
// copy currentGrid into newGrid
vector<vector<string> > newGrid(currentGrid.size(), vector<string>(currentGrid.size(), EMPTY));
for (unsigned int i=0; i < currentGrid.size();i++)
for (unsigned int j=0; j < currentGrid.size();j++)
newGrid[i][j] = currentGrid[i][j];
if (currentGrid[posX][posY] == EMPTY)
{
// Copy old state into the newGrid
//replace the first subelement of protein.
newGrid[posX][posY] = protein[sequenceIndex];
//printGrid(newGrid);
// Add new direction to the vector pdirection.
pDirection.push_back(direction);
//str.at(i)
if (protein[sequenceIndex] == 'H')
{
if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
{
score += 1;
}
else if ( (pDirection[0] == 'U' ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
{
score += 1;
}
else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
{
score += 1;
}
else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
{
score += 1;
}
}
sequenceIndex += 1;
}
else
{
return bestScore;
}
// Recursively repeat for all directions
if (sequenceIndex < protein.size())
{
for (int i = 0; i < 4; i++)
{
direction = allDirections[i];
bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
}
}
else
{
numfolds++;
//printGrid(newGrid);
//cout << endl;
if(score > bestScore)
{
cout << "Improved fold with score: " << score << "\t(" << numfolds << "th fold)" << endl;
printGrid(newGrid);
}
//else if(numfolds%1000==0)
//{
//cout << "Currently folded score: " << score << "\t(" << numfolds << "th fold)" << endl;
//printGrid(newGrid);
//}
bestScore = max(score,bestScore);
}
return bestScore;
}
int main()
{
unsigned int sequenceIndex = 1;
int score = 0;
int bestScore = 0;
vector<char> pDirection;
string protein = "HHHPPHPPPPHPHP";
// vector <vector<string> > currentGrid;
int posX = protein.size();
int posY = protein.size();
cout << "Protein to fold " << protein << endl;
// create an empty 2D vector
int gridsize = (protein.size()*2);
vector<vector<string> > currentGrid(gridsize, vector<string>(gridsize, EMPTY));
// place the first element of protein in the middle of currentGrid
currentGrid[posX][posY] = protein.at(0);
//printGrid(currentGrid);
cout << solve(protein,bestScore,sequenceIndex,currentGrid,posX,posY,'R',score,pDirection) << endl;
cout << numfolds << endl;
}