运行此代码可以在一夜之间获得课堂提交的信息。这样的另一段代码运行良好,但是似乎在一批之后在随机位置随机停止。我正在使用cloud sql存储分配,以便在遍历时无问题地循环遍历,因为对循环进行计数会显示所有提交。
它绕着第12到第17批循环运行,并且毫无例外地停止并显示它已经完成。这种行为似乎没有理由吗?我们大约有18万份作业,但尚未完成。
from sqlalchemy import create_engine
import psycopg2
from google.oauth2 import service_account
import json
import base64
# import httplib2
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
from pandas.io.json import json_normalize
from google.auth.transport.requests import AuthorizedSession
from apiclient.http import BatchHttpRequest
import httplib2
import time
from apiclient.discovery import build
import pandas as pd
import random
from googleapiclient.errors import HttpError
import email, smtplib, ssl
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import socket
socket.setdefaulttimeout(100000)
start = time.time()
http = httplib2.Http()
log = []
array = []
batch_array = []
assignments = []
listwithtokens = []
listwithtokenstokens = []
credentials = service_account.Credentials.from_service_account_info(
{
},
scopes=['https://www.googleapis.com/auth/admin.directory.user', 'https://www.googleapis.com/auth/classroom.courses',
'https://www.googleapis.com/auth/classroom.rosters',
'https://www.googleapis.com/auth/classroom.coursework.students'
]
)
email_sub = 'sa-gsync2@academiesenterprisetrust.org'
delegated_credentials = credentials.with_subject(email_sub)
service_two = build('classroom', 'v1', credentials=delegated_credentials)
engine = create_engine('postgresql+psycopg2://USER:PASSWORD@IP:PORT/DQ')
sql = '''SELECT "courseId", id FROM public.classroom_assignments;'''
def callback(request_id, response, exception):
ids = request_id.split('|')
if exception is not None:
print('Error getting assignments "{0}" for course: "{1}"'.format(request_id, exception))
tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
listwithtokens.append(tokendata)
else:
assignments.extend(response.get("studentSubmissions", []))
nextPageToken = response.get("nextPageToken", None)
if nextPageToken:
tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
listwithtokens.append(tokendata)
else:
pass
with engine.connect() as con:
try:
rs = con.execute(sql)
bcount = 0
i = 0
batch_n = 0
batch = service_two.new_batch_http_request(callback=callback)
start_time = time.time()
for row in rs:
i += 1
response = service_two.courses().courseWork().studentSubmissions().list(pageToken=None, courseId=row[0] , courseWorkId =row[1], pageSize=500)
ids = f"{row[0]}|{row[1]}"
batch.add(response, request_id=ids)
bcount += 1
if bcount == 999:
batch_n += 1
print(batch_n)
batch.execute()
batch = service_two.new_batch_http_request(callback=callback)
bcount = 0
end = time.time() - start_time
print('batch count',i)
if end < 60:
diff = 60 - end
print(diff, 'seconds to sleep')
time.sleep(60)
start_time = time.time()
if bcount > 0:
batch.execute()
except HttpError as err:
if err.resp.get('content-type', '').startswith('application/json'):
print(err.content)
这是错误<HttpError 502 when requesting classroom.googleapis.com/batch returned "Bad Gateway">