尝试优化Python API读取脚本

时间:2016-12-23 22:22:05

标签: python api loops optimization

所以我创建了一个脚本来与我们的API服务器进行通信以进行资产管理并检索一些信息。我发现脚本的最长总时间部分是:

{方法'读''_ssl._SSLSocket'对象}

目前我们正在提取大约25个左右资产的信息,而这个特定部分需要18.89秒。

有没有办法优化这个,所以我们有2700台电脑不需要45分钟?

如果有帮助,我可以提供实际代码的副本。

import urllib2
import base64
import json
import csv

# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25

final_output_list = []


def get_creds():
    # Credentials Function that retrieves username:pw from .file
    with open('.cred') as cred_file:
        cred_string = cred_file.read().rstrip()
        return cred_string
        print(cred_string)


def get_all_assets():
    # Function to retrieve computer ID + computer names and store the ID in a
    # new list called computers_parsed
    request = urllib2.Request('jss'
                              'JSSResource/computers')
    creds = get_creds()
    request.add_header('Authorization', 'Basic ' + base64.b64encode(creds))
    response = urllib2.urlopen(request).read()
    # At this point the request for ID + name has been retrieved and now to be
    # formatted in json
    parsed_ids_json = json.loads(response)
    # Then assign the parsed list (which has nested lists) at key 'computers'
    # to a new list variable called computer_set
    computer_set = parsed_ids_json['computers']
    # New list to store just the computer ID's obtained in Loop below
    computer_ids = []
    # Count variable, when equal to max # of computers in Count_stop it stops.
    count = 0
    # This for loop iterates over ID + name in computer_set and returns the ID
    # to the list computer_ids
    for computers in computer_set:
        count += 1
        computer_ids.append(computers['id'])
        # This IF condition allows for the script to be tested at 25 assets
        # instead of all 2,000+ (comment out other announce_all_assets call)
        if count == Count_Stop:
            announce_all_assets(computer_ids, count)
    # announce_all_assets(computer_ids, count)


def announce_all_assets(computer_ids, count):
    print('Final list of ID\'s for review: ' + str(computer_ids))
    print('Total number of computers to check against JSS: ' +
          str(count))
    extension_attribute_request(computer_ids, count)


def extension_attribute_request(computer_ids, count):
    # Creating new variable, first half of new URL used in loop to get
    # extension attributes using the computer ID's in computers_ids
    base_url = 'jss'
    what_we_want = '/subset/extensionattributes'
    creds = get_creds()
    print('Extension attribute function starts now:')
    for ids in computer_ids:
        request_url = base_url + str(ids) + what_we_want
        request = urllib2.Request(request_url)
        request.add_header('Authorization', 'Basic ' + base64.b64encode(creds))
        response = urllib2.urlopen(request).read()
        parsed_ext_json = json.loads(response)
        ext_att_json = parsed_ext_json['computer']['extension_attributes']
    retrieve_all_ext(ext_att_json)


def retrieve_all_ext(ext_att_json):
    new_computer = {}
    # new_computer['original_id'] = ids['id']
    # new_computer['original_name'] = ids['name']
    for computer in ext_att_json:
        new_computer[str(computer['name'])] = computer['value']
        add_to_master_list(new_computer)


def add_to_master_list(new_computer):
    final_output_list.append(new_computer)
    print(final_output_list)


def main():
    # Function to run the get all assets function
    get_all_assets()

if __name__ == '__main__':
    # Function to run the functions in order: main > get all assets >
    main()

2 个答案:

答案 0 :(得分:1)

我非常推荐使用'请求'模块结束' urllib2'。它可以为你处理很多东西,并且会让你很头疼。

相信它也会给你更好的表现,但我很乐意听到你的意见。

这是您使用请求的代码。 (我已添加换行符以突出显示我的更改。请注意内置的.json()解码器。):

# Requires requests module be installed.:
# `pip install requests` or `pip3 install requests`
# https://pypi.python.org/pypi/requests/
import requests

import base64
import json
import csv

# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25

final_output_list = []

def get_creds():
    # Credentials Function that retrieves username:pw from .file
    with open('.cred') as cred_file:
        cred_string = cred_file.read().rstrip()
        return cred_string
        print(cred_string)

def get_all_assets():
    # Function to retrieve computer ID + computer names and store the ID in a
    # new list called computers_parsed


    base_url = 'jss'
    what_we_want = 'JSSResource/computers'
    request_url = base_url + what_we_want


# NOTE the request_url is constructed based on your request assignment just below.
# As such, it is malformed as a URL, and I assume anonymized for your posting on SO.
#    request = urllib2.Request('jss'
#                              'JSSResource/computers')
#

    creds = get_creds()


    headers={
        'Authorization': 'Basic ' + base64.b64encode(creds),
    }
    response = requests.get( request_url, headers )
    parsed_ids_json = response.json()


    #[NO NEED FOR THE FOLLOWING. 'requests' HANDLES DECODES JSON. SEE ABOVE ASSIGNMENT.]
    # At this point the request for ID + name has been retrieved and now to be
    # formatted in json
    # parsed_ids_json = json.loads(response)


    # Then assign the parsed list (which has nested lists) at key 'computers'
    # to a new list variable called computer_set
    computer_set = parsed_ids_json['computers']
    # New list to store just the computer ID's obtained in Loop below
    computer_ids = []
    # Count variable, when equal to max # of computers in Count_stop it stops.
    count = 0
    # This for loop iterates over ID + name in computer_set and returns the ID
    # to the list computer_ids
    for computers in computer_set:
        count += 1
        computer_ids.append(computers['id'])
        # This IF condition allows for the script to be tested at 25 assets
        # instead of all 2,000+ (comment out other announce_all_assets call)
        if count == Count_Stop:
            announce_all_assets(computer_ids, count)
    # announce_all_assets(computer_ids, count)

def announce_all_assets(computer_ids, count):
    print('Final list of ID\'s for review: ' + str(computer_ids))
    print('Total number of computers to check against JSS: ' +
          str(count))
    extension_attribute_request(computer_ids, count)

def extension_attribute_request(computer_ids, count):
    # Creating new variable, first half of new URL used in loop to get
    # extension attributes using the computer ID's in computers_ids
    base_url = 'jss'
    what_we_want = '/subset/extensionattributes'
    creds = get_creds()
    print('Extension attribute function starts now:')
    for ids in computer_ids:
        request_url = base_url + str(ids) + what_we_want


        headers={
            'Authorization': 'Basic ' + base64.b64encode(creds),
        }
        response = requests.get( request_url, headers )
        parsed_ext_json = response.json()


        ext_att_json = parsed_ext_json['computer']['extension_attributes']
    retrieve_all_ext(ext_att_json)

def retrieve_all_ext(ext_att_json):
    new_computer = {}
    # new_computer['original_id'] = ids['id']
    # new_computer['original_name'] = ids['name']
    for computer in ext_att_json:
        new_computer[str(computer['name'])] = computer['value']
        add_to_master_list(new_computer)

def add_to_master_list(new_computer):
    final_output_list.append(new_computer)
    print(final_output_list)

def main():
    # Function to run the get all assets function
    get_all_assets()

if __name__ == '__main__':
    # Function to run the functions in order: main > get all assets >
    main()

请让我知道你的25个资产在18.89秒内的相对表现时间!我非常好奇。

答案 1 :(得分:1)

我仍然建议我从下面(?)推荐我的另一个答案,从纯粹的清洁角度来看请求模块的使用(请求是非常干净),但我认识到它可能会也可能不会解决您的原始问题。

如果您想尝试可能 影响原始问题的PyCurl,请使用与该方法相同的代码:

# Requires pycurl module be installed.:
# `pip install pycurl` or `pip3 install pycurl`
# https://pypi.python.org/pypi/pycurl/7.43.0
# NOTE: The syntax used herein for pycurl is python 3 compliant.
# Not python 2 compliant.
import pycurl

import base64
import json
import csv

def pycurl_data( url, headers ):
    buffer = BytesIO()
    connection = pycurl.Curl()
    connection.setopt( connection.URL, url )
    connection.setopt(pycurl.HTTPHEADER, headers )
    connection.setopt( connection.WRITEDATA, buffer )
    connection.perform()
    connection.close()

    body = buffer.getvalue()
    # NOTE: The following assumes a byte string and a utf8 format. Change as desired.
    return json.loads( body.decode('utf8') )

# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25

final_output_list = []

def get_creds():
    # Credentials Function that retrieves username:pw from .file
    with open('.cred') as cred_file:
        cred_string = cred_file.read().rstrip()
        return cred_string
        print(cred_string)

def get_all_assets():
    # Function to retrieve computer ID + computer names and store the ID in a
    # new list called computers_parsed
    base_url = 'jss'
    what_we_want = 'JSSResource/computers'
    request_url = base_url + what_we_want

# NOTE the request_url is constructed based on your request assignment just below.
# As such, it is malformed as a URL, and I assume anonymized for your posting on SO.
#    request = urllib2.Request('jss'
#                              'JSSResource/computers')
#

    creds = get_creds()


    headers= [ 'Authorization: Basic ' + base64.b64encode(creds) ]
    response = pycurl_data( url, headers )


    # At this point the request for ID + name has been retrieved and now to be
    # formatted in json
    parsed_ids_json = json.dumps( response )
    # Then assign the parsed list (which has nested lists) at key 'computers'
    # to a new list variable called computer_set
    computer_set = parsed_ids_json['computers']
    # New list to store just the computer ID's obtained in Loop below
    computer_ids = []
    # Count variable, when equal to max # of computers in Count_stop it stops.
    count = 0
    # This for loop iterates over ID + name in computer_set and returns the ID
    # to the list computer_ids
    for computers in computer_set:
        count += 1
        computer_ids.append(computers['id'])
        # This IF condition allows for the script to be tested at 25 assets
        # instead of all 2,000+ (comment out other announce_all_assets call)
        if count == Count_Stop:
            announce_all_assets(computer_ids, count)
    # announce_all_assets(computer_ids, count)

def announce_all_assets(computer_ids, count):
    print('Final list of ID\'s for review: ' + str(computer_ids))
    print('Total number of computers to check against JSS: ' +
          str(count))
    extension_attribute_request(computer_ids, count)

def extension_attribute_request(computer_ids, count):
    # Creating new variable, first half of new URL used in loop to get
    # extension attributes using the computer ID's in computers_ids
    base_url = 'jss'
    what_we_want = '/subset/extensionattributes'
    creds = get_creds()
    print('Extension attribute function starts now:')
    for ids in computer_ids:
        request_url = base_url + str(ids) + what_we_want


        headers= [ 'Authorization: Basic ' + base64.b64encode(creds) ]
        response = pycurl_data( url, headers )

        parsed_ext_json = json.dumps( response )


        ext_att_json = parsed_ext_json['computer']['extension_attributes']
    retrieve_all_ext(ext_att_json)

def retrieve_all_ext(ext_att_json):
    new_computer = {}
    # new_computer['original_id'] = ids['id']
    # new_computer['original_name'] = ids['name']
    for computer in ext_att_json:
        new_computer[str(computer['name'])] = computer['value']
        add_to_master_list(new_computer)

def add_to_master_list(new_computer):
    final_output_list.append(new_computer)
    print(final_output_list)

def main():
    # Function to run the get all assets function
    get_all_assets()

if __name__ == '__main__':
    # Function to run the functions in order: main > get all assets >
    main()