ImportError:没有名为openpyxl

时间:2019-01-29 22:44:45

标签: spark-submit

我收到ImportError:没有名为openpyxl的模块 用于执行的命令: spark-submit-驱动程序内存5g --jars /tmp/spark-csv_2.10-1.5.0.jar,/tmp/commons-csv-1.6.jar /app/T6M0_app/T6M0_app_Copy/QA_Automation/myspark.py     #!/ usr / bin / env python     #--编码:utf-8--

import sys, datetime
from openpyxl import load_workbook
from openpyxl.compat import range
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
from pyspark.sql import *
from pyspark import SparkConf, SparkContext, SQLContext
from pyspark.sql.types import *
import os
import subprocess
import re


def compare_source_target_values(b_date,revid):

  print("Hello spark")  
  wb =     load_workbook('/app/T6M0_app/T6M0_app_Copy/Data_Count_SQL.xlsx',data_only=True)

  ws = wb.worksheets[0]
  maxrow = ws.max_row
  maxcol = ws1.max_column

  print 'Max Row:' + maxrow
  print 'Max Col:' + maxcol

  conf1 = SparkConf().setAppName("ExportFromHDFS")
  sc = SparkContext(conf=conf1)

  sqlContext = HiveContext(sc)



  for curr_row in range(2, maxrow+1):
    source_sql = str(ws.cell(row=curr_row, column=2).value)
    dest_path = str(ws.cell(row=curr_row, column=3).value)
    result_fname=str(ws.cell(row=curr_row, column=4).value)

    print 'Executing*************' + source_sql 
    print 'Destination Path' + dest_path 
    print 'Result File Name' + result_fname 

    df= sqlContext.sql("select * from qa_lbn0_cz.vw_yqi0_ebr_core where     BUSINESSEFFECTIVEDATE = '2018-01-31' and rev=2 limit 5")
    df.coalesce(1).write.format("com.databricks.spark.csv").mode("overwrite").option("header", "true").save("/tmp/sample3")


  wb1.close()

if __name__ == "__main__":
    # record start time
    startTime = datetime.datetime.now()
    result = False
    # Check input file
    if len(sys.argv) != 3:
        print "Error:  Need 2 input parameter business effective date and rev id."
        exit(0)
    else:
        f_businessdate = sys.argv[1]
        rev_id = sys.argv[2]
    #get_master_control_table_data_hive(f_businessdate,rev_id)
    #format_master_control_data()
    compare_source_target_values(f_businessdate,rev_id)

从HDFS导出数据并将其保存到CSV文件。 导出查询将从Excel中读取 将放置在HDFS lcoation中的结果.deflate文件转换为.CSV并将其存储在本地系统中

0 个答案:

没有答案