Python-Flask CSV文件上载错误处理

时间:2018-03-03 23:15:07

标签: python pandas flask

基本上我能够用我当前的代码执行我想要的操作,即上传csv文件,使用pandas操作它,然后更新MSQL数据库。我想以某种方式添加错误处理。基本上,上传功能仅适用于一个特定文件,并为所有其他文件抛出不同的错误。

有没有办法可以捕获多个错误并向用户返回错误消息?

可能类似于对输入csv文件列标题的检查。

@app.route('/upload', methods =['GET', 'POST'])
def csv_input():
tempfile_path = tempfile.NamedTemporaryFile().name
#file.save(tempfile_path)
#sheet = pd.read_csv(tempfile_path)
if request.method == 'POST':
    file = request.files['file']
    if file: #and allowed_filename(file.filename):
        #filename = secure_filename(file.filename)
        file.save(tempfile_path)
        input_csv = pd.read_csv(tempfile_path,sep=",", engine='python')

        #### Data Cleansing From Uploded Data
        col_titles = ['id','title','vote_average','w_average','vote_count','year','runtime',
          'budget','revenue','profit']
        # Only Keep Data where the Original Language is English
        input_csv = input_csv[input_csv['original_language']=='en']
        # New Dataframe that only contains data with vote count > 10 
        input_csv = input_csv[input_csv['vote_count'] >= 10]
        # Fill all NA values to 0 - Needed to set datatypes
        input_csv = input_csv.fillna(0)
        # Remove all Rows with no Runtime
        input_csv = input_csv[input_csv['runtime']!=0]
        # Revmove all duplciate Rows
        input_csv = input_csv.drop_duplicates()

        input_csv['vote_average'] = input_csv.vote_average.astype(float).round(1)
        input_csv.vote_average.round(1)
        input_csv['runtime'] = input_csv.runtime.astype(int)
        input_csv['vote_count'] = input_csv.vote_count.astype(int)
        input_csv['revenue'] = input_csv.revenue.astype('int64')
        input_csv['budget'] = input_csv.budget.astype('int64')

        profit_cal(input_csv,'revenue','budget','profit')

        input_csv['profit']=input_csv.profit.astype('int64')
        input_csv['profit']=input_csv.profit.replace(0,'No Data')

        #reorder_data = pd.DataFrame(input_csv)
        # Year Cleaning
        input_csv['year'] = pd.to_datetime(input_csv['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)
        #C = reorder_data['vote_average'].mean()
        #m = reorder_data['vote_count'].quantile(0.10)
        #w_average = org_data.copy().loc[reorder_data['vote_count'] >= m]

        #### IMDB Data Calculation
        V = input_csv['vote_count']
        R = input_csv['vote_average']
        C = input_csv['vote_average'].mean()
        m = input_csv['vote_count'].quantile(0.10)
        input_csv['w_average'] = (V/(V+m) * R) + (m/(m+V) * C)


        #C = input_csv['vote_average'].mean()
        #m = input_csv['vote_count'].quantile(0.10)

        #input_csv['w_average'] = input_csv.apply(weighted_rating, axis = 1)
        input_csv['w_average'] = input_csv.w_average.astype(float).round(1)



        reorder_data = input_csv[col_titles]
        reorder_data.to_sql(name='title_data', con=engine, if_exists = 'replace', index=False)    
        # Reorder the data and output in the correct order

        ##### Genre Loads == DataFrame 2
        df = input_csv
        v = df.genres.apply(json.loads)

        df = pd.DataFrame(
        {
            'id' : df['id'].values.repeat(v.str.len(), axis=0),
            'genre' : np.concatenate(v.tolist())
        })

        df['genre'] = df['genre'].map(lambda x: x.get('name'))

        genre_data = df.genre.str.get_dummies().sum(level=0)

        genre_data = df.loc[(df!=0).any(1)]
        #genre_data = genre_data.set_index('id')

        genre_order = ['id','genre']

        ## Dataframw to SQL
        genre_data[genre_order].to_sql(name='genre_data', con=engine, if_exists = 'replace', index=False) 
        ####### Keyword Search ### Dataframe

        #genre_data.to_csv("genre_data.csv")

        #return genre_data[genre_order].to_html()

        flash('Database has been updated successfully','success')
        #return reorder_data[col_titles].to_html()
        #stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
        #csv_input = csv.reader(stream)
        #return reorder_data.to_html(index=False)
        #flash('File Uploaded Successfully')
        #return redirect(url_for('index'))
return render_template('upload.html')

1 个答案:

答案 0 :(得分:0)

有几种方法:

  1. Python方式,只需添加try:except以及相关的异常类。

    try:
        # parsing & processing logic here
        pass
    except EmptyDataError as ex:
        # tell user we don't except empty data
        pass
    except ParserError as ex:
        # tell user we failed to parse their input
        pass
    except Exception as ex:
        # tell user that something went wrong.
        pass
    
  2. Flask方式,为烧瓶注册错误处理程序以获取特定异常(这会影响整个烧瓶应用程序):

    @app.errorhandler(pandas.errors.EmptyDataError)
    def handle_empty_data():
        return 'Failed parsing Input', 200