hp模型蛋白质折叠

时间:2017-03-22 01:18:21

标签: c++ algorithm vector

1985年由K. A. Dill建立的模型[Dil85]。在该模型中,氨基酸分为两类:疏水(H)和亲水(P)。  最佳折叠是使相邻但非共价结合的H-H对的数量最大化的折叠,从而允许疏水效应。  这是一个例子:

下面给出了HPHHHHPPHPPH串的折叠。折叠的分数是4(见红色邻接)。

               H - P   P - P
                   -   -   -
                   H   H   H - P
                   -   -       -
                   H - H   H - P

我正在尝试用c ++编写一个算法来最大化得分并返回最佳配置,但我的代码运行得不好。所以我在寻求你的帮助。

这是我的代码:

#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <math.h>
using namespace std;

string EMPTY = " ";

int solve(string protein,int bestScore,unsigned int sequenceIndex, vector        <vector<string> > & currentGrid, int posX, int posY, char direction,int    score,vector<char> & pDirection)
{
      char  allDirections[4] = {'R','D','L','U'};

    if (direction=='R')
    {
        posY +=1;
    }
    else if (direction == 'D')
    {
        posX +=1;
    }
    else if (direction == 'L')
    {
        posY -=1;
    }
    else if (direction == 'U')
    {
        posX -=1;
    }

    // copy currentGrid into newGrid.
    vector <vector<string> >  newGrid ;

    if (currentGrid[posX][posY] == EMPTY)
    {
        for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
        {
            vector<string> temp;
            newGrid.insert(newGrid.begin(),temp);

            for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)

            {
                newGrid[j].push_back(currentGrid[i][j]);

            }
        }

        //replace the first subelement of protein.
        newGrid[posX][posY] = protein[sequenceIndex];
        // Add new direction to the vector pdirection.
        pDirection.push_back(direction);

        //str.at(i)
        if (protein[sequenceIndex] == 'H')
        {
            if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ( (pDirection[0] == 'U'  ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
        }

        sequenceIndex += 1;
    }
    if (sequenceIndex != protein.size())
    //repeat this for all directions
    {
        for (int i = 0; i < 4; i++)
        {
            direction = allDirections[i];

            bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
        }
    }
    else
    {
        if (score > bestScore)
        {
            bestScore = score;

        }
        else if (score == bestScore)
        {

            bestScore = score;
        }
    }

    return bestScore;
}

int main()
{
    unsigned int  sequenceIndex = 0;
    int score = 0;
    int bestScore = 0;
    vector<char> pDirection;
    string protein = "HPPHPPH";
    vector <vector<string> > currentGrid; //a*[a*[' ']] avec a=len(protein)*2-1

    int posX = protein.size();
    int posY = protein.size();

    //int posX = ceil((protein.size()*2-1)/2));
    //int posY = ceil((protein.size()*2-1)/2));


    // create an empty 2D vector
    for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
    {
        vector<string> temp;
        currentGrid.insert(currentGrid.begin(),temp);

        for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)

        {
            currentGrid[j].push_back(EMPTY);

        }
    }
    // place the first element of protein in the middle of currentGrid
    currentGrid[posX][posY] = protein.substr(0);


    cout<<protein<<endl;
    cout<<protein.size()<<endl;
    cout<<solve(protein,bestScore,sequenceIndex, currentGrid,posX,posY,'R',score,pDirection)<<endl;
}

2 个答案:

答案 0 :(得分:1)

网络搜索确认了我在媒体上所听到的内容 - 正如一般所述,蛋白质折叠问题很难 - http://www.brown.edu/Research/Istrail_Lab/papers/10.1.1.110.3139.pdf描述了类似于您提出的问题的近似解决方案。

答案 1 :(得分:0)

对于那些迷迷糊糊的人,我已经修复了此代码。但是,它非常缓慢,因为它正在尝试所有可能的结构。无论如何,你去了:

#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <math.h>
#include <unistd.h>

using namespace std;

string EMPTY = "_";
int numfolds = 0;

void printGrid(vector<vector<string> > grid);
int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection);

void printGrid(vector<vector<string> > grid)
{
    int gridsize = grid.size();
    for (int i=0; i < gridsize;i++)
    {
        for (int j=0; j < gridsize;j++)
        {
            cout << grid[i][j];

        }
        cout << endl;
    }
}


int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection)
{
    char  allDirections[4] = {'R','D','L','U'};

    if (direction=='R')
    posY +=1;
    else if (direction == 'D')
    posX +=1;
    else if (direction == 'L')
    posY -=1;
    else if (direction == 'U')
    posX -=1;

    // copy currentGrid into newGrid

    vector<vector<string> > newGrid(currentGrid.size(), vector<string>(currentGrid.size(), EMPTY));
    for (unsigned int i=0; i < currentGrid.size();i++)
        for (unsigned int j=0; j < currentGrid.size();j++)
            newGrid[i][j] = currentGrid[i][j];
    if (currentGrid[posX][posY] == EMPTY)
    {

        // Copy old state into the newGrid
        //replace the first subelement of protein.
        newGrid[posX][posY] = protein[sequenceIndex];
        //printGrid(newGrid);
        // Add new direction to the vector pdirection.
        pDirection.push_back(direction);

        //str.at(i)
        if (protein[sequenceIndex] == 'H')
        {
            if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ( (pDirection[0] == 'U'  ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
        }
        sequenceIndex += 1;
    }
    else
    {
        return bestScore;
    }
    // Recursively repeat for all directions
    if (sequenceIndex < protein.size())
    {
        for (int i = 0; i < 4; i++)
        {
            direction = allDirections[i];
            bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
        }
    }
    else
    {
        numfolds++;

        //printGrid(newGrid);
        //cout << endl;
        if(score > bestScore)
        {
            cout << "Improved fold with score: " << score << "\t(" << numfolds << "th fold)" << endl;
            printGrid(newGrid);
        }
        //else if(numfolds%1000==0)
        //{
            //cout << "Currently folded score: " << score << "\t(" << numfolds << "th fold)" << endl;
            //printGrid(newGrid);
        //}
        bestScore = max(score,bestScore);
    }
    return bestScore;
}


int main()
{
    unsigned int  sequenceIndex = 1;
    int score = 0;
    int bestScore = 0;
    vector<char> pDirection;
    string protein = "HHHPPHPPPPHPHP";
    // vector <vector<string> > currentGrid;

    int posX = protein.size();
    int posY = protein.size();

    cout << "Protein to fold " << protein << endl;

    // create an empty 2D vector
    int gridsize = (protein.size()*2);
    vector<vector<string> > currentGrid(gridsize, vector<string>(gridsize, EMPTY));
    // place the first element of protein in the middle of currentGrid
    currentGrid[posX][posY] = protein.at(0);

    //printGrid(currentGrid);

    cout << solve(protein,bestScore,sequenceIndex,currentGrid,posX,posY,'R',score,pDirection) << endl;
    cout << numfolds << endl;
}