Bigquery CSV文件加载失败

时间:2019-01-11 17:45:05

标签: python-3.x google-bigquery export-to-csv

google.api_core.exceptions.BadRequest:400读取数据时出错,错误消息:CSV表遇到太多错误,放弃了。行数:1;错误:1.请查看错误流以获取更多详细信息。

我正在尝试运行将数据加载到csv的Python脚本,但出现此错误。谁能解释这个错误

   import csv
#Imports the Google Cloud BigQuery client library
from google.cloud import bigquery
from google.cloud.bigquery import Dataset
from google.cloud.bigquery import Table
from google.cloud.bigquery import LoadJobConfig
from google.cloud.bigquery import SchemaField

filename = 'events.csv'
idNeeded=0
#Instantiates a client
bigquery_client = bigquery.Client()

#Runs a query from BigQuery
def runBigQueryQuery( query, filename, idNeeded ):

  if idNeeded == 1:  
    i = 1
    query_job = bigquery_client.query(query)

    results = query_job.result()

    with open (filename, 'w', newline='') as f: #Create CSV file
      write = csv.writer(f,dialect='excel',lineterminator='\n')
      try:
        for row in results:
          print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format(row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance))
          write.writerow([i,row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance]) #write Rows to CSV
          i = i+1
      except AttributeError as error:
        print('An error occured: {0}'.format(error))


  else:
    query_job = bigquery_client.query(query)

    results = query_job.result()

    with open (filename, 'w', newline='') as f: #Create CSV file
      write = csv.writer(f,dialect='excel',lineterminator='\n')
      try:
        for row in results:
          print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format( row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance))
          write.writerow([row.EventId,
row.ScheduleId,
row.Date, 
row.TimeFrom, 
row.Description,
row.TimeTo, 
row.ResourceId, 
row.EmployeeId, 
row.MovementTypeId, 
row.Capacity, 
row.CanBook, 
row.NonMemberFlag, 
row.MemberAmount, 
row.NonMemberAmount, 
row.Attendance]) #write Rows to CSV
      except AttributeError as error:
        print('An error occured: {0}'.format(error))

  return

#Creates a dataset in BigQuery
def createDataset(datasetname):
  dataset_ref = bigquery_client.dataset(datasetname)

  dataset = Dataset(dataset_ref)
  dataset.location = 'US'

  dataset = bigquery_client.create_dataset(dataset)

  return

def getDataset(datasetname):

  dataset = bigquery_client.dataset(datasetname)

  return dataset

def createTable(tablename, global_dataset_ref):

  schema = [
    #Enter Schema here.
    # SchemaField('url', 'STRING', mode='required'),
    # SchemaField('views', 'INTEGER', mode='required')
  ]

  table_ref = global_dataset_ref.table(tablename)

  table = Table(table_ref, schema=schema)
  table = bigquery_client.create_table(table)

  assert table.table_id == tablename

  return

def getTable(tablename, global_dataset_ref):

  table_ref = global_dataset_ref.table(tablename)
  table = bigquery_client.get_table(table_ref)

  # print(table.table_id)
  print(table.schema)
  # print(table.description)
  # print(table.num_rows)

  return table


def getTableSchema(tablename, global_dataset_ref):

  table_ref = global_dataset_ref.table(tablename)
  table = bigquery_client.get_table(table_ref)

  schema = table.schema

  return schema

def loadDataFromCSV(tablename, global_dataset_ref, filename):

  schema = getTableSchema(tablename, global_dataset_ref)

  table_ref = global_dataset_ref.table(tablename)

  load_config = LoadJobConfig()
  load_config.source_format = bigquery.SourceFormat.CSV
  load_config.schema = schema
  load_config.autodetect = True
  load_config.allow_quoted_newlines = True

  with open (filename, 'rb') as readable:
      job = bigquery_client.load_table_from_file(readable, table_ref, location='US', job_config=load_config)

  job.result()

  print('Loaded {} rows into {}:{}.'.format(job.output_rows, global_dataset_ref, table_ref.table_id)) 

  return

# Testing
if __name__ == '__main__':
  datasetname = 'Data_Layer'
  tablename = 'Events'

  sqlquery = '''SELECT 
  null as EventId,
  sc.scheduleid AS ScheduleId,
  NULL AS Description,
  sc.scheduledatefrom AS Date,
  sc.timestart AS TimeFrom,
  sc.timeduration AS TimeTo,
  r.resourceid AS ResourceId,
  sp.employeeid AS EmployeeId,
  NULL AS MovementTypeId,
  r.configheight AS Capacity,
  CASE
    WHEN st.schedulestatus IN (1,  3) THEN '1'
    ELSE '0'
  END CanBook,
  CASE
    WHEN sv.nonmembermayenroll = TRUE THEN '1'
    ELSE '0'
  END NonMemberFlag,
  COALESCE(ProgramPrice.pricemember,
    ServicePrice.pricemember,
    0) AS MemberAmount,
  COALESCE(ProgramPrice.pricenonmember,
    ServicePrice.pricenonmember,
    0) AS NonMemberAmount,
  'N/A' AS Attendance
FROM
  AloomaTest.SCSESSIONS s
LEFT JOIN
  AloomaTest.SCSESSION_PROVIDERS sp
ON
  sp.sessionid = s.sessionid
LEFT JOIN
  AloomaTest.SCSESSION_RESOURCES sr
ON
  sr.sessionid = s.sessionid
LEFT JOIN
  AloomaTest.SCSCHEDULES sc
ON
  sc.scheduleid = s.scheduleid
LEFT JOIN
  AloomaTest._SCSCHEDULESTATUS ST
ON
  ST.schedulestatus = sc.schedulestatus
LEFT JOIN
  AloomaTest.SCRESOURCES r
ON
  r.resourceid = sr.resourceid
LEFT JOIN
  AloomaTest.SCSERVICES sv
ON
  sv.serviceid = sc.serviceid
LEFT JOIN
  AloomaTest.SCPROGREG_SEMCOURSES semc
ON
  semc.serviceid = sc.serviceid
  AND semc.semesterid = sc.semesterid
LEFT JOIN
  AloomaTest.SCPROGREG_PRICES ProgramPrice
ON
  ProgramPrice.scheduleid = sc.scheduleid
LEFT JOIN
  AloomaTest.SCPROGREG_PRICES ServicePrice
ON
  ServicePrice.semcourseid = semc.semcourseid
WHERE
  COALESCE(ProgramPrice.feetypeid,
    0) = 0
  AND COALESCE(ServicePrice.feetypeid,
    0)= 0
    and  sc.scheduleid in(31207,
25936,
5761094,
832794,
9825,
17912)
'''

  #createDataset(datasetname) #Successfully tested this code 2018-09-24
global_dataset_ref = getDataset(datasetname) #Successfully tested this code 2018-09-24

  #createTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24
getTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24

runBigQueryQuery(sqlquery,filename,idNeeded) #Successfully tested this code 2018-09-24

loadDataFromCSV(tablename, global_dataset_ref,filename) #Successfully tested this code 2018-09-24

样本数据

,25936,2009-06-01 18:30:00,1110,M1PO - M1 PT Full,60,,254,,,1,0,0,0,N/A
,17912,2009-04-22 06:15:00,375,Pil Ptnr - Pilates Partner,60,47,398,,10,1,1,0,0,N/A
,31207,2009-06-22 19:00:00,1140,D390-2 - 1 1/2 Hour Massage,90,107,548,,20,0,0,0,0,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,583,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,591,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,585,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,584,2349,,20,0,1,20,50,N/A
,832794,2012-02-21 14:30:00,870,Comp Member One/One,60,,2963,,,1,0,0,0,N/A

1 个答案:

答案 0 :(得分:0)

错误消息表明CSV中只有1行,制作时可能会丢失新行。