我再次尝试向RNA折叠服务器发送请求,但无法获得答案。上一次我通过添加我最初没有编写的必需参数来解决它。这次,我使用网络嗅探器检查了POST请求数据,并确保所有必填参数均已填写。但是,服务器(RNAStructure和mfold)仍然没有答案。这是我用于RNAStructure服务器的代码:
import requests
class RequestToRNAStruct:
def __init__(self):
super().__init__()
def request_rnastructure(self):
example_seq = 'GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCCCA'
rnastruct_url = 'https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Predict1.html'
payload = {'seqTitle': 'example',
'seqBox': example_seq,
'Acids': 'RNA',
'gamma': '1',
'intercept': '-0.6',
'iterations': '1',
'loop': '30',
'max': '2',
'minHelix': '3',
'percent': '10',
'slope': '1,8',
'structures': '20',
'temperature': '310.15',
'window': '3'}
r = requests.post(rnastruct_url, data=payload)
with open('RNAStructure_req.html', 'wb') as request_file:
request_file.write(r.content)
我想念什么?
答案 0 :(得分:0)
正如@marmeladze指出的那样,您使用的端点是错误的。您正在尝试将POST请求发送到HTML。正确的端点是表单中指示的带有Runner.php
PHP脚本的URL。
https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Runner.php
您实际上可以发送请求,以使端点评估RNA序列。但是,响应中不包含评估结果(请参见下图),因此存储HTML页面将无济于事。
我改进了代码,将请求发送到正确的端点,等待结果,最后将它们以PDF,PS和CT格式下载到其他文件中。希望这可以帮助您获得所需的东西?
import requests
import time
class RequestToRNAStruct:
def __init__(self):
super().__init__()
self.rna_seq = 'GGGCUAUUAGCUCAGUUGGUUAGAGCGCACCCCUGAUAAGGGUGAGGUCGCUGAUUCGAAUUCAGCAUAGCCCA'
self.options = {
'seqTitle': 'example',
'seqBox': self.rna_seq,
'Acids': 'RNA',
'gamma': '1',
'intercept': '-0.6',
'iterations': '1',
'loop': '30',
'max': '2',
'minHelix': '3',
'percent': '10',
'slope': '1,8',
'structures': '20',
'temperature': '310.15',
'window': '3'
}
self.form_url = 'https://rna.urmc.rochester.edu/RNAstructureWeb/Servers/Predict1/Runner.php'
self.rochester_edu_host = 'http://rna.urmc.rochester.edu'
self.attempt_sleep_time = 5
def extract_server(self, subpath):
init_string = 'server='
end_string = '&key'
server = subpath[subpath.find(init_string) + len(init_string) : subpath.find(end_string)]
return server
def extract_key(self, subpath):
init_string = 'key='
end_string = '&started'
key = subpath[subpath.find(init_string) + len(init_string) : subpath.find(end_string)]
return key
def extract_results_url(self, content):
# string just before link to results
string_key = 'Click <a href="'
# indexes of the string sequence in the content
init_index = content.find(string_key)
end_index = content.find('"', init_index + len(string_key) + 1)
# extracted URL
url_subpath = content[init_index + len(string_key): end_index]
url_subpath = url_subpath.replace('&', '&')
wait_for_results_url = self.rochester_edu_host + url_subpath
self.server = self.extract_server(url_subpath)
self.key = self.extract_key(url_subpath)
return wait_for_results_url
def request_calculations(self):
content = ''
with requests.post(self.form_url, data=self.options) as r:
content = r.text
return content
def request_results_page(self, wait_for_results_url):
attempt = 0
in_progress_search_string = 'Calculation in progress. Please wait.'
result_page_text = ''
# attempts to get results page URL
print('Sending request to server "{server}":'.format(server=self.server))
print(wait_for_results_url)
while(True):
attempt += 1
print('\n > Attempt: ' + str(attempt))
with requests.get(wait_for_results_url) as r:
result_page_text = r.text
# with open('RNAStructure_req.html', 'wb') as request_file:
# request_file.write(r.content)
if in_progress_search_string in result_page_text:
print(in_progress_search_string)
time.sleep(self.attempt_sleep_time)
else:
print('Results ready!')
break
return result_page_text
def save_results_to_disk(self, result_page_text):
results_url_first_slice = self.rochester_edu_host + '/RNAstructureWeb/Servers'
init_index = result_page_text.find('id="1downloadsAll"')
search_string = '<a href="../../'
print('\nSaving results with key "{key}" to files...'.format(key=self.key))
for i in range(3):
init_index_url = result_page_text.find(search_string, init_index)
init_index = init_index_url + len(search_string)
url_params = result_page_text[ init_index_url + len(search_string) : result_page_text.find('" download', init_index_url) ]
single_result_url = '{host}/{server}/{url_params}'.format(
host=results_url_first_slice,
server=self.server,
url_params=url_params,
)
extension = single_result_url[ single_result_url.rfind('.') + 1 :]
filename = '{key}.{extension}'.format(
key=self.key,
extension=extension
)
print('({step}/3) {extension}...'.format(
step=i+1,
extension=extension.upper()
))
with requests.get(single_result_url) as r:
with open(filename, 'wb') as result_file:
result_file.write(r.content)
def request_rnastructure_results(self):
# send request with RNA sequence and selected options to perform the calculations
form_response_content = self.request_calculations()
# extract results URL from HTML content
wait_for_results_url = self.extract_results_url(form_response_content)
# wait until calculations have been made
result_page_text = self.request_results_page(wait_for_results_url)
# save results in three different files (different formats)
self.save_results_to_disk(result_page_text)
print('\nDone!')
# call main function
RequestToRNAStruct().request_rnastructure_results()