使用python请求发送POST请求的问题

时间:2020-05-22 20:49:21

标签: python python-requests

我再次尝试向RNA折叠服务器发送请求,但无法获得答案。上一次我通过添加我最初没有编写的必需参数来解决它。这次,我使用网络嗅探器检查了POST请求数据,并确保所有必填参数均已填写。但是,服务器(RNAStructuremfold)仍然没有答案。这是我用于RNAStructure服务器的代码:

import requests 

class RequestToRNAStruct:
def __init__(self):
    super().__init__()

def request_rnastructure(self):
    example_seq = 'GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCCCA'
    rnastruct_url = 'https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Predict1.html'

    payload = {'seqTitle': 'example',
                'seqBox': example_seq,
                'Acids': 'RNA',
                'gamma': '1',
                'intercept': '-0.6',
                'iterations': '1',
                'loop': '30',
                'max': '2',
                'minHelix': '3',
                'percent': '10',
                'slope': '1,8',
                'structures': '20',
                'temperature': '310.15',
                'window': '3'}
    r = requests.post(rnastruct_url, data=payload)

    with open('RNAStructure_req.html', 'wb') as request_file:
        request_file.write(r.content)

我想念什么?

1 个答案:

答案 0 :(得分:0)

正如@marmeladze指出的那样,您使用的端点是错误的。您正在尝试将POST请求发送到HTML。正确的端点是表单中指示的带有Runner.php PHP脚本的URL。

https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Runner.php

您实际上可以发送请求,以使端点评估RNA序列。但是,响应中不包含评估结果(请参见下图),因此存储HTML页面将无济于事。

Capture of the HTML text of the response

我改进了代码,将请求发送到正确的端点,等待结果,最后将它们以PDF,PS和CT格式下载到其他文件中。希望这可以帮助您获得所需的东西?

import requests 
import time

class RequestToRNAStruct:
    def __init__(self):
        super().__init__()

        self.rna_seq = 'GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCCCA'
        self.options = {
            'seqTitle': 'example',
            'seqBox': self.rna_seq,
            'Acids': 'RNA',
            'gamma': '1',
            'intercept': '-0.6',
            'iterations': '1',
            'loop': '30',
            'max': '2',
            'minHelix': '3',
            'percent': '10',
            'slope': '1,8',
            'structures': '20',
            'temperature': '310.15',
            'window': '3'
        }

        self.form_url = 'https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Runner.php'
        self.rochester_edu_host = 'http://rna.urmc.rochester.edu'
        self.attempt_sleep_time = 5

    def extract_server(self, subpath):
        init_string = 'server='
        end_string = '&key'

        server = subpath[subpath.find(init_string) + len(init_string) : subpath.find(end_string)]

        return server

    def extract_key(self, subpath):
        init_string = 'key='
        end_string = '&started'

        key = subpath[subpath.find(init_string) + len(init_string) : subpath.find(end_string)]

        return key

    def extract_results_url(self, content):
        # string just before link to results
        string_key = 'Click <a href="'
        # indexes of the string sequence in the content        
        init_index = content.find(string_key)
        end_index = content.find('"', init_index + len(string_key) + 1)

        # extracted URL
        url_subpath = content[init_index + len(string_key): end_index]
        url_subpath = url_subpath.replace('&amp;', '&')

        wait_for_results_url = self.rochester_edu_host + url_subpath
        self.server = self.extract_server(url_subpath)
        self.key = self.extract_key(url_subpath)

        return wait_for_results_url

    def request_calculations(self):
        content = ''
        with requests.post(self.form_url, data=self.options) as r:      
            content = r.text

        return content

    def request_results_page(self, wait_for_results_url):
        attempt = 0
        in_progress_search_string = 'Calculation in progress. Please wait.'
        result_page_text = ''

        # attempts to get results page URL
        print('Sending request to server "{server}":'.format(server=self.server))
        print(wait_for_results_url)

        while(True):
            attempt += 1    
            print('\n > Attempt: ' + str(attempt))

            with requests.get(wait_for_results_url) as r:
                result_page_text = r.text
                # with open('RNAStructure_req.html', 'wb') as request_file:
                #     request_file.write(r.content)

            if in_progress_search_string in result_page_text:
                print(in_progress_search_string)
                time.sleep(self.attempt_sleep_time)
            else:
                print('Results ready!')
                break

        return result_page_text

    def save_results_to_disk(self, result_page_text):
        results_url_first_slice = self.rochester_edu_host + '/RNAstructureWeb/Servers'
        init_index = result_page_text.find('id="1downloadsAll"')
        search_string = '<a href="../../'

        print('\nSaving results with key "{key}" to files...'.format(key=self.key))

        for i in range(3):
            init_index_url = result_page_text.find(search_string, init_index)
            init_index = init_index_url + len(search_string)
            url_params = result_page_text[ init_index_url + len(search_string) : result_page_text.find('" download', init_index_url) ]

            single_result_url = '{host}/{server}/{url_params}'.format(
                host=results_url_first_slice,
                server=self.server,
                url_params=url_params,
            )

            extension = single_result_url[ single_result_url.rfind('.') + 1 :]
            filename = '{key}.{extension}'.format(
                key=self.key,
                extension=extension
            )

            print('({step}/3) {extension}...'.format(
                step=i+1,
                extension=extension.upper()
            ))

            with requests.get(single_result_url) as r:
                with open(filename, 'wb') as result_file:
                    result_file.write(r.content)

    def request_rnastructure_results(self):
        # send request with RNA sequence and selected options to perform the calculations
        form_response_content = self.request_calculations()
        # extract results URL from HTML content
        wait_for_results_url = self.extract_results_url(form_response_content)
        # wait until calculations have been made
        result_page_text = self.request_results_page(wait_for_results_url)
        # save results in three different files (different formats)
        self.save_results_to_disk(result_page_text)

        print('\nDone!')


# call main function
RequestToRNAStruct().request_rnastructure_results()