我使用Tweepy在Django中使用Twitter API。
我使用这个辅助功能来保存推文,它可以在大约50%的时间内工作,并且会抛出一个' DataError:整数超出范围'其余时间的异常 - 这是当前在下面的代码中捕获并记录错误的推文(以便稍后解析!)但现在是时候修复它了。
def read_tweet(tweet_data, current_user):
import logging
logger = logging.getLogger('django')
from coreapp.models import Tweet
from django.core.exceptions import ObjectDoesNotExist
from django.db import DataError
#We might get weird results where user has changed their details, so first we check the UID.
try:
#print "trying tweet_data.id"
current_tweet =Tweet.objects.get(id=tweet_data.id)
created=False
return current_user, created
except ObjectDoesNotExist:
pass
try:
current_tweet, created = Tweet.objects.get_or_create(
truncated=tweet_data.truncated,
text=tweet_data.text,
in_reply_to_status_id=tweet_data.in_reply_to_status_id,
id=tweet_data.id,
favorite_count=tweet_data.favorite_count,
author = current_user,
_json = tweet_data._json,
source=tweet_data.source,
retweeted=tweet_data.retweeted,
coordinates = tweet_data.coordinates,
entities = tweet_data.entities,
in_reply_to_screen_name = tweet_data.in_reply_to_screen_name,
id_str = tweet_data.id_str,
retweet_count = tweet_data.retweet_count,
in_reply_to_user_id = tweet_data.in_reply_to_user_id,
favorited = tweet_data.favorited,
user = tweet_data.user,
geo = tweet_data.geo,
in_reply_to_user_id_str = tweet_data.in_reply_to_user_id_str,
lang = tweet_data.lang,
created_at = tweet_data.created_at,
place = tweet_data.place)
return current_tweet, created
except(DataError), e:
try:
current_tweet, created = Tweet.objects.get_or_create(
text=tweet_data.text,
id=tweet_data.id,
author = current_user,
_json = tweet_data._json,
user = tweet_data.user,
created_at = tweet_data.created_at,
)
return current_tweet, created
except(DataError), e:
errormessage = {"errortweet": tweet_data._json}
print errormessage
logger.error(str(errormessage))
return None, None
Models.py
class Tweet(models.Model):
truncated=models.BooleanField(default=False)
text=models.TextField(max_length=140)
in_reply_to_status_id=models.BigIntegerField(blank=True, null=True)
id=models.BigIntegerField(primary_key=True)
favorite_count=models.BigIntegerField(default=0)
author = models.ForeignKey(User)
_json = models.TextField()
source=models.CharField(max_length=25)
retweeted=models.BooleanField(default=False)
coordinates = models.TextField(blank=True, null=True)
entities = models.TextField(blank=True, null=True)
in_reply_to_screen_name = models.CharField(max_length=25, blank=True, null=True)
id_str = models.CharField(max_length=25)
retweet_count = models.BigIntegerField(default=0)
in_reply_to_user_id = models.BigIntegerField(blank=True, null=True)
favorited = models.BooleanField(default=False)
retweeted_status = models.TextField()
user = models.TextField(blank=True, null=True) #User is a dictionary in the response; here we take a serialised version
geo = models.TextField(blank=True, null=True)
in_reply_to_user_id_str = models.CharField(max_length=25, blank=True, null=True)
possibly_sensitive = models.BooleanField(default=False)
lang = models.CharField(max_length=5)
created_at = models.DateTimeField()
in_reply_to_status_id_str = models.CharField(max_length=25, blank=True, null=True)
place = models.TextField(blank=True, null=True)
如果我禁用最后一次尝试/除了以便在django调试模式下出错,我得到以下回溯:
Environment:
Request Method: GET
Request URL: http://127.0.0.1:8001/cursorsearch/surveillance/
Django Version: 1.6
Python Version: 2.7.5
Installed Applications:
('django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'coreapp',
'silk',
'south')
Installed Middleware:
('silk.middleware.SilkyMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware')
Traceback:
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response
114. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/Users/dev/als/coreapp/views.py" in cursor_search
92. current_tweet, created = read_tweet(tweet, current_user)
File "/Users/dev/als/coreapp/tools.py" in read_tweet
173. created_at = tweet_data.created_at,
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/manager.py" in get_or_create
154. return self.get_queryset().get_or_create(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in get_or_create
388. six.reraise(*exc_info)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in get_or_create
380. obj.save(force_insert=True, using=self.db)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in save
545. force_update=force_update, update_fields=update_fields)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in save_base
573. updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in _save_table
654. result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/base.py" in _do_insert
687. using=using, raw=raw)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/manager.py" in _insert
232. return insert_query(self.model, objs, fields, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/query.py" in insert_query
1511. return query.get_compiler(using=using).execute_sql(return_id)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/models/sql/compiler.py" in execute_sql
898. cursor.execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
69. return super(CursorDebugWrapper, self).execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
53. return self.cursor.execute(sql, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/utils.py" in __exit__
99. six.reraise(dj_exc_type, dj_exc_value, traceback)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/django/db/backends/util.py" in execute
53. return self.cursor.execute(sql, params)
Exception Type: DataError at /cursorsearch/surveillance/
Exception Value: integer out of range
正如您所看到的,有问题的字段是BigInts,因此对于较大的twitter ID应该是好的,因为Twitter文档表明它们需要64位签名,因此我们将被覆盖。 我也试过,在shell中明确地转换为Long类型,这没有任何可能性。我还尝试使用不同的用户模型进行尝试,这没有任何区别(用户模型以前保存没有问题,我可以像往常一样通过shell或查询检索它们)。
我使用Silk检查SQL,并且在成功的推文保存和不成功之间可以看到的主要区别在于成功的SQL中的SQL明确地将NULL分配给整数值(例如' in_reply_to_user_id' )等等 - 但那肯定不是吗?如果我在没有任何分配的情况下保存,而在模型中指定Null = True,则应该没问题。
最近一天试图解决这个问题。 :)
编辑:我也尝试在这些字段中添加default = None,以防万一,但是也没有,同样的错误。
第二次编辑: 成功使用id的一个例子是524539416799617024,不成功的一个是524539525209808896,所以应该没问题。我实际上只是尝试使用之前保存的tweet.id从命令行执行此操作,并抛出相同的异常,因此它显然是别的 - 但我不知道是什么!
更多示例:成功保存推文ID - 524822288437633024,524822389821939714 推文失败:524822248499060736,524823331368091648
我已经通过分配在命令行中对这些进行了测试
u = User.object.get_or_create(id = id_from_tweet)
t = Tweet(id = 524823331368091648,user = current_user)然后点击t.save() - 同样的错误。
我只是简单地检查了整数
a = -9223372036854775808,b = id,c = 9223372036854775808,a< b< c出来是真的。
我还在命令行上分配int之前明确强制int为Long,但也没有任何区别。
最终编辑:我没有得到具体答案。似乎它可能与JSON/SNowflake problem有关 - 它在路上被解析时会被释放。作为一个间歇性的错误,我不清楚为什么有些人没事,有些则没有,而且我不能100%确定是否是造成它的原因,但似乎很可能。
答案 0 :(得分:2)
您检查过BigIntegerField类接受的内容吗?确保在初始化时可以将所有值传递给它。
来自documentation:
class BigIntegerField([** options])
一个64位整数,非常类似于IntegerField,但它保证适合从-9223372036854775808到9223372036854775807的数字。
看起来像NULL或None可能是不可接受的...我发现你曾尝试将它们设置为默认值为零,但不是全部。