在Python中使用最小化来估算参数并加快此过程

时间:2018-10-20 14:38:03

标签: python-3.x minimization

我正在尝试使用最小化来查找参数估计。我写的代码行得通,但是有两个问题:

  1. 我只找到一个局部最小值。我试图通过使用盆地跳跃来解决这个问题。
  2. 要花很长时间才能得到结果,并且由于我必须将这种最小化进行大约1000次,所以这成为一个大问题。

所以我的问题是:

  1. 您知道我如何优化代码,以便使其运行速度更快以实现最小化。

  2. 有什么办法可以改变盆地跳跃部分,使其运行得更快?例如。将niter设置得较低或我不知道的其他方法。我试过像这样运行它,但十个小时后,即使是1000名跳盆的人中也没有一个得到答复。

  3. 还有找到全局最小值的另一种方法吗?

请随时提出其他问题。

我的代码:

import numpy as np
from scipy.optimize import minimize
from scipy.optimize import basinhopping
from scipy.integrate import odeint
import pickle
import os
import pandas as pd
import datetime
import numpy.random as npr
import csv


path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
os.chdir(path)
###IDS
df = pd.read_csv('1_Youtuber_SingleNrSheet_Comedy.csv', sep = ";", skipinitialspace=True)                                                    ######Change Name
YoutuberID = df["Channel_ID"].tolist()


##print(YoutuberID)



with open("9_p_q_m_Fun_ExtendedBass_VIEWS_Comedy_test.csv", "w" ,newline='',encoding='utf-8') as csv_file2:                                                        ######Change Name
    csv_writer2 = csv.writer(csv_file2, delimiter=';')

    csv_writer2.writerow(["Type","p", "q", "m","Functionvalue"])



    count = 0    
    for ID in YoutuberID[0:]:                                                                                                                      ###Change

        try:

            path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
            os.chdir(path)

            ###ALL INFO
            Days = pd.read_csv('3_API_Call_ALL_info_Comedy_v2.csv', sep = ";", skipinitialspace=True)

            views_path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python\Daily_Views_Comedy"                                               ######Change Name
            os.chdir(views_path)

            SVR = pd.read_csv("4_COMEDY_DailyViews_Clean_" + str(count) + "_" + ID + ".csv", sep = ";", parse_dates=True,  dayfirst=True)  ######Change Name

##            print(SVR[SVR.columns[0]])

            SVR = SVR[SVR[SVR.columns[0]]< "2018-05-01"]                                                       ####CHANGE DATE FOR DIF CAT
##            print(SVR)

            #####SV Input
            SV = np.array(SVR["Daily Views"])


##            print(SV)
            Days = Days[Days["channelId"] == ID]

##            print(Days)

            Days["publishedAt"] = pd.to_datetime(Days.publishedAt)

            Days = Days[Days["publishedAt"] > "2015-01-08"] ##"2015-01-10"


##            print(Days)

            ##### Timedelta #####

            start_date = pd.to_datetime("2015-06-08")
            ##print(start_date)

            video_upload_day =[]

            for video_date in Days["publishedAt"]:
                TimeDelta = video_date - start_date
                video_upload_day.append(TimeDelta.days)

            ##print(video_upload_day)
            ##print(videoT)

            nvideos = len(video_upload_day)
            ndays = len(SV)


            videoT = np.array(video_upload_day)

##            print(videoT,nvideos,ndays)



            def objective(x):
                p = x[0] 
                q = x[1]
                m = x[2]


                estimateV = np.zeros( (ndays, nvideos) )

                for t in range( ndays ):
                    for v in range( nvideos ):
                                if videoT[v] <= t:
                                     estimateV[ t,v ] = p*m  +  (q-p) * np.sum(estimateV[0:t,v],axis=0)  -   (q/m) * (np.sum(estimateV[0:t,v],axis=0)**2)

                estimateSV = np.sum( estimateV, axis = 1 )


                return np.sum( (SV - estimateSV)**2 )

这是最小化部分。我制作了一个用于正常最小化,一个制作了盆地跳跃,并用##进行了分隔。

            ###### MINIMIZATION #######

            mguess = round(sum(SV)/(nvideos*2),0)

            print(sum(SV),mguess)

            x0 = np.array([0.001, 0.01, mguess])   ####Make it less volatile to first guess? Make bigger steps for m?

            b1 = (0.00001,0.5)
            b2 = (10**4,10**7)              
            bnds = (b1,b1,b2)


##            minimizer_kwargs = dict(method="L-BFGS-B",bounds=bnds)
##            res = basinhopping(objective, x0,niter=20, minimizer_kwargs=minimizer_kwargs)

            res = minimize(objective, x0,bounds = bnds)
            print(res)

            csv_writer2.writerow(["COMEDY",res.x[0], res.x[1],res.x[2],res.fun])                     ###CHANNGE CAT

            print("CURRERNT YOUTUBER IS:",count)
            count += 1

        except:

            print("PROBLEM",count)
            count += 1
    ##        print(res,res.x[0],res.x[1],res.x[2],res.fun)

0 个答案:

没有答案