Question

我创建了一个使用logservice.fetch()从Google App Engine中检索5分钟值日志的函数。然后创建日志字典，以便我可以将它们传递给ML引擎在线预测。我遇到的问题是这个函数似乎只接收一个日志。我已经确认在5分钟的时间内有超过一个日志，所以我认为问题是我如何编写我的功能。我仍然是python的新手，并已经没有想法使这项工作。如何在5分钟内收到所有日志？

代码：

#retrieve and store timestamp for bigquery query
def timestamp():
    global_settings = GlobalSettings.all().get()

    logsml_last_updated = global_settings.logsml_last_updated
    if not logsml_last_updated:
        logsml_last_updated = datetime.datetime.now() - datetime.timedelta(minutes=5)

    ret_logs = logs(logsml_last_updated, offset=None)

    results = upload_logs(ret_logs, logsml_last_updated)

    global_settings.logsml_last_updated = datetime.datetime.now()

    global_settings.put()

    return results

#retrieve logs from logservice
def logs(timestamp, offset=None):
    CSV_COLUMNS = 'resource,place_id,status,end_time,device,device_os,device_os_version,latency,megacycles,cost,device_brand,device_family,browser_version,app,ua_parse'.split(
        ',')

    start_time = timestamp
    end_time = start_time + datetime.timedelta(minutes=5)
    # MAX_LOGS_TO_READ = 500

    logging.info("start_time")
    logging.info(start_time)
    logging.info(start_time.strftime('%s'))

    ret_logs = logservice.fetch(
        start_time=long(start_time.strftime('%s')),
        end_time=long(end_time.strftime('%s')),
        offset=offset,
        minimum_log_level=logservice.LOG_LEVEL_INFO,
        include_app_logs=True)

    for line in ret_logs:
        combined = ""
        splitted = line.combined.split('"')
        if len(splitted) > 3:
            splitted_again = splitted[3].split('/')
            if len(splitted_again) > 1:
                combined = splitted_again[1].split(' ')[0]
        user_agent = user_agents.parse(line.user_agent or "")
        row_data = [line.resource.split('?')[0][1:], get_param_from_url(line.resource, 'place_id'), line.status,
                    datetime.datetime.fromtimestamp(line.end_time),
                    user_agent.device.model, user_agent.os.family, user_agent.os.version_string,
                    line.latency, line.mcycles, line.cost,
                    user_agent.device.brand, user_agent.device.family,
                    user_agent.browser.version_string,
                    get_param_from_url(line.resource, 'session_id'),
                    line.version_id or "", combined]
        row_string = [x if isinstance(x, basestring) else '' if not x else str(x) for x in row_data]
        logging.info(row_string)

        l1 = dict(zip(CSV_COLUMNS, row_string))
        logging.info(l1)
        l1.update({str(k): float(v) if k == 'megacycles' else v for k, v in l1.items()})
        l1.update({str(k): float(v) if k == 'latency' else v for k, v in l1.items()})
        l1.update({k: v if v is not '' else '0' for k, v in l1.items()})
        l1['key'] = "%s-%s-%s" % (l1['megacycles'], l1['end_time'], l1['latency'])

        ret = {'instances': []}
        ret['orig'] = []
        ret['orig'].append(dict(l1))
        l1.pop('place_id')
        l1.pop('resource')
        l1.pop('status')
        ret['instances'].append(l1)
        logging.info(ret)

        return ret

提前致谢。

Answer 1

可能已经找到了解决该问题的方法，但是无论如何...

我在strfime docs的任何地方都找不到%s（注意小写s）。有一个%S，它以[0,61]', which for most cases is not enough to represent the time since the begging of the Unix epoch, as required by logservice.fetch（）`格式返回秒：

  Args:
    start_time: The earliest request completion or last-update time that
      results should be fetched for, in seconds since the Unix epoch.
    end_time: The latest request completion or last-update time that
      results should be fetched for, in seconds since the Unix epoch.

time.time()但是会返回：

自纪元以来的时间（以秒为单位）

只需将strftime('%s')替换为time()。

函数只从logservice.fetch（）接收一个日志

1 个答案: