我们已多次询问此问题,并且我知道数据存储区每个实体的存储空间限制为1MB,如here所述。
但是。我仍然坚持......
这里我用来推迟任务的代码(主要来自Google的示例Mapper代码here):
class Mapper(object):
# Subclasses should replace this with a model class (eg, model.Person).
KIND = None
# Subclasses can replace this with a list of (property, value) tuples to filter by.
FILTERS = []
def __init__(self):
self.to_put = []
self.to_delete = []
def map(self, entity):
"""Updates a single entity.
Implementers should return a tuple containing two iterables (to_update, to_delete).
"""
return ([], [])
def finish(self):
"""Called when the mapper has finished, to allow for any final work to be done."""
pass
def get_query(self):
"""Returns a query over the specified kind, with any appropriate filters applied."""
q = self.KIND.query()
for prop, value in self.FILTERS:
q = q.filter(prop == value)
return q
def run(self, act_urlkey=None, batch_size=20):
"""Starts the mapper running."""
self._continue(None, batch_size)
def _batch_write(self):
"""Writes updates and deletes entities in a batch."""
if self.to_put:
ndb.put_multi(self.to_put)
self.to_put = []
if self.to_delete:
ndb.delete_multi(self.to_delete)
self.to_delete = []
def _continue(self, curs_str=None, batch_size=20):
logging.debug("entering _continue with curs_str: %s" % pprint.pformat(curs_str))
q = self.get_query()
# If we're resuming, pick up where we left off last time.
if curs_str is not None:
curs = Cursor.from_websafe_string(curs_str)
entities, next_curs, more = q.fetch_page(batch_size, start_cursor=curs)
else:
entities, next_curs, more = q.fetch_page(batch_size)
try:
# Steps over the results, returning each entity and its index.
for entity in entities:
self.map(entity)
if next_curs and more:
logging.debug("Mapper._continue - sys.getsizeof(self): %d, sys.getsizeof(next_curs.to_websafe_string()): %d, sys.getsizeof(batch_size): %d" % (
sys.getsizeof(self),
sys.getsizeof(next_curs.to_websafe_string()),
sys.getsizeof(batch_size)
))
deferred.defer(self._continue, next_curs.to_websafe_string(), batch_size)
else:
self.finish()
except:
exc_type, exc_value, exc_tb = sys.exc_info()
logging.exception(traceback.format_exception(exc_type, exc_value, exc_tb))
代码运行良好,在相当长的时间内遍历查询页面,但在一段时间后不可避免地会因此异常而失败:
Traceback (most recent call last):
File "/base/data/home/apps/s~xxx-test/backendadmin:beta-0-11-9.388453478982695515/bp_content/themes/xxx/handlers/mappers.py", line 90, in _continue
deferred.defer(self._continue, next_curs.to_websafe_string(), batch_size)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/deferred/deferred.py", line 272, in defer
key = _DeferredTaskEntity(data=pickled).put()
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/db/__init__.py", line 1077, in put
return datastore.Put(self._entity, **kwargs)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/datastore.py", line 605, in Put
return PutAsync(entities, **kwargs).get_result()
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/apiproxy_stub_map.py", line 613, in get_result
return self.__get_result_hook(self)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/datastore/datastore_rpc.py", line 1881, in __put_hook
self.check_rpc_success(rpc)
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/datastore/datastore_rpc.py", line 1371, in check_rpc_success
rpc.check_success()
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/apiproxy_stub_map.py", line 579, in check_success
self.__rpc.CheckSuccess()
File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/apiproxy_rpc.py", line 134, in CheckSuccess
raise self.exception
RequestTooLargeError: The request to API call datastore_v3.Put() was too large.
在错误出现之前显示传递给deferred.defer
的参数大小的日志是这样的(大小很小,从不增加):
Mapper._continue - sys.getsizeof(self): 32, sys.getsizeof(next_curs.to_websafe_string()): 85, sys.getsizeof(batch_size): 12
Mapper._continue - sys.getsizeof(self): 32, sys.getsizeof(next_curs.to_websafe_string()): 85, sys.getsizeof(batch_size): 12
在哪里/如何找到数据存储区试图存储的对象太大?
答案 0 :(得分:1)
Mapper
类的实例变量,随着递归越来越深入,它继续增长。我删除了它,现在事情进展顺利。
sys.getsizeof
误以为我认为我的物体尺寸没有增大。请参阅this以获取有关getsizeof有什么问题的解释以及为什么对象大小似乎没有移动,即使它确实存在。