我是python的新手,对堆栈溢出来说是全新的:)
我有一个问题&我希望你能提出建议。
下面,我有一个HTML表单的片段。它调用我的文件backend.py(其脚本略低)。
出于某种原因,我运行脚本&它不会生成文件&不打印变量。我没有SSH访问,因为我正在使用共享主机,我真的不知道如何解决脚本/查看日志。有人可以建议吗?
如果有什么我正在做的那显然是错的,请告诉我。我认为它不会从表单中获取变量。如果我在本地对变量进行硬编码,则会运行,因此实际脚本没有问题,但可能使用CGI位。
py脚本会打印HTML代码!
<form action="../cgi-bin/backend.py" method="post" multipart id="form" class="tab-wizard wizard-circle">
<h6><font color="#2eb9f7">Data Source Info</font></h6>
<section>
<div class="row">
<div class="col-md-6">
<div class="form-group">
<label for="origindatabase">Hive Database Name:</label>
<input type="text" class="form-control" id="origindatabase"> </div>
</div>
backend.cgi包括以下内容:
#!/usr/bin/python
import cgitb; cgitb.enable()
import cgi
import sys
def getData():
formData = cgi.FieldStorage()
database = formData.getvalue('database')
table = formData.getvalue('origintable')
select_fields = formData.getvalue('select')
aggrequired = formData.getvalue('aggrequired')
aggtype = formData.getvalue('aggtype')
aggfield = formData.getvalue('aggfield')
aggalias = formData.getvalue('aggalias')
agg2required = formData.getvalue('agg2required')
aggtype2 = formData.getvalue('agg2type')
aggfield2= formData.getvalue('agg2field')
aggalias2 = formData.getvalue('agg2alias')
grouprequired = formData.getvalue('group')
group_fields = formData.getvalue('group_fields')
selectfields = formData.getvalue('finalselect')
hivesavedb = formData.getvalue('destinationdb')
hivesavetable = formData.getvalue('destinationtable')
whererequired = formData.getvalue('whererequired')
where1field = formData.getvalue('where1field')
where1operator = formData.getvalue('where1operator')
where1criteria = formData.getvalue('where1criteria')
where2required = formData.getvalue('where2')
where2type = formData.getvalue('typeofwhere')
where2field = formData.getvalue('where2field')
where2operator = formData.getvalue('where2operator')
where2criteria = formData.getvalue('where2criteria')
joinrequired = formData.getvalue('joinrequired')
jointable = formData.getvalue('jointable')
joinfield1 = formData.getvalue('joinfield1')
joinfield2 = formData.getvalue('joinfield2')
joinlocation = formData.getvalue('whenjoin')
join_field_selection = formData.getvalue('where2criteria')
return database
return table
return select_fields
return aggrequired
return aggtype
return aggfield
return aggalias
return agg2required
return aggtype2
return aggfield2
return aggalias2
return grouprequired
return group_fields
return selectfields
return hivesavedb
return hivesavetable
return whererequired
return where1field
return where1operator
return where1criteria
return where2required
return where2type
return where2field
return where2operator
return where2criteria
return joinrequired
return jointable
return joinfield1
return joinfield2
return joinlocation
return join_field_selection
def index():
try:
database = getData()
table = getData()
select_fields = getData()
aggrequired = getData()
aggtype = getData()
aggfield = getData()
aggalias = getData()
agg2required = getData()
aggtype2 = getData()
aggfield2 = getData()
aggalias2 = getData()
grouprequired = getData()
group_fields = getData()
selectfields = getData()
hivesavedb = getData()
hivesavetable = getData()
whererequired = getData()
where1field = getData()
where1operator = getData()
where1criteria = getData()
where2required = getData()
where2type = getData()
where2field = getData()
where2operator = getData()
where2criteria = getData()
joinrequired = getData()
jointable = getData()
joinfield1 = getData()
joinfield2 = getData()
joinlocation = getData()
join_field_selection = getData()
file = open("testfile1.txt","a")
#LIBRARY IMPORTS#
file.write('from pyspark.sql import SparkSession\nimport argparse, sys\nimport pyspark.sql.functions as sqlfunc\nfrom pyspark.sql import *\nfrom pyspark.sql.functions import *\nfrom datetime import datetime\n')
#CREATE SPARK SESSION#
file.write('def create_session(appname):\n')
file.write('\tspark_session = SparkSession\n')
file.write('.builder\n')
file.write('.appName(appname)\n')
file.write('.master(\'yarn\')\n')
file.write('.config("hive.metastore.uris","thrift://uds-far1-mn1.dab.o2.net:9083")\n')
file.write('.enableHiveSupport()\n')
file.write('.getOrCreate()\n')
file.write('\treturn spark_session\n')
#MAIN CLASS#
file.write('if __name__ == \'__main__\':\n')
file.write('\tspark_session = create_session(\'roamingdata\')\n')
print("opened main class")
#CREATE DF1 / DF2 FROM DB#
file.write('\tdf = spark_session.table(\'' + database + '.' + table + '\')\n')
if joinrequired == 'YES' or joinrequired == 'yes' or joinrequired == 'Yes':
file.write('\tdf2 = spark_session.table(\'' + database2 + '.' + table2 + '\')\n')
print("created dataframes")
#DF1 FILTERS AND OPTIONS#
file.write('\tfiltereddf = df.coalesce(1000)')
if whererequired == 'YES' and where2required == 'NO':
file.write('.filter((df.' + where1field + where1operator + where1criteria + '))')
if whererequired == 'YES' and where2required == 'YES':
file.write('.filter((df.[' + where1field + ']' + where1operator + where1criteria + ')'+ where2type + '(df.[' + where2field + ']' + where2operator + where2criteria + '))')
file.write('.select('+ select_fields+')')
print("applied dataframe1 filters")
#DF2 FILTERS AND OPTIONS#
if joinrequired == 'YES' or joinrequired == 'yes' or joinrequired == 'Yes':
file.write('\tfiltereddf2 = df2.coalesce(1000)')
if df2whererequired == 'YES' and df2whererequired == 'NO':
file.write('.filter((df2.' + df2where1field + df2where1operator + df2where1criteria + '))')
if df2whererequired == 'YES' and df2where2required == 'YES':
file.write('.filter((df2.[' + df2where1field + ']' + df2where1operator + df2where1criteria + ')'+ df2where2type + '(df2.[' + df2where2field + ']' + df2where2operator + df2where2criteria + '))')
file.write('.select('+ select_fields+')')
file.write('finaldf = filtereddf')
print("applied dataframe2 filters")
if joinrequired == 'YES' or joinrequired == 'yes' or joinrequired == 'Yes':
file.write('.join(filtereddf2 , filtereddf.'+ joinfield1 + '== filtereddf2.' + joinfield2 + ')')
print("joined dataframes")
file.write('.select('+ join_field_selection+')')
file.write('.groupBy('+ group_fields + ')')
if aggrequired == 'YES' or aggrequired == 'yes' or aggrequired == 'Yes':
file.write('.agg(sqlfunc.'+aggtype+'(df.'+ aggfield+').alias(\''+aggalias+'\')')
if agg2required == 'YES' or agg2required == 'yes' or agg2required == 'Yes':
file.write(',sqlfunc.'+aggtype2+'(df.'+ aggfield2+').alias(\''+aggalias2+'\'))')
file.write('.select('+selectfields+')\n')
print("did aggregations on joined dataframes")
file.write('df_agg.createOrReplaceTempView("temporarytable")\n')
file.write('finaldf = spark_session.sql("INSERT INTO table ' + hivesavedb + '.' + hivesavetable + ' select * from temporarytable")');
file.close()
except:
print('no')
print ("Content-type:text/html\r\n\r\n")
print ('<html>')
print ('<head>')
print ('<title>Response </title>')
print ('</head>')
print ('<body>')
print ('<h2>Hello.</h2>')
print ('</body>')
print ('</html')