我在django开发了一个网站,该网站由大约25000个郊区组成。 我需要一些东西来列出网站上的所有网址,并检查链接是否定期中断,所以我更愿意做一些我可以作为脚本运行的东西。
我应该遵循哪种方法?有什么想法吗?
答案 0 :(得分:10)
这是一个基于优秀@sneawo答案的改进课程。功能包括:
settings.ROOT_URLCONF
; 欢迎改进。
from django import test
from django.core.urlresolvers import reverse
from django.conf import settings
import importlib
class UrlsTest(test.TestCase):
def test_responses(self, allowed_http_codes=[200, 302, 405],
credentials={}, logout_url="", default_kwargs={}, quiet=False):
"""
Test all pattern in root urlconf and included ones.
Do GET requests only.
A pattern is skipped if any of the conditions applies:
- pattern has no name in urlconf
- pattern expects any positinal parameters
- pattern expects keyword parameters that are not specified in @default_kwargs
If response code is not in @allowed_http_codes, fail the test.
if @credentials dict is specified (e.g. username and password),
login before run tests.
If @logout_url is specified, then check if we accidentally logged out
the client while testing, and login again
Specify @default_kwargs to be used for patterns that expect keyword parameters,
e.g. if you specify default_kwargs={'username': 'testuser'}, then
for pattern url(r'^accounts/(?P<username>[\.\w-]+)/$'
the url /accounts/testuser/ will be tested.
If @quiet=False, print all the urls checked. If status code of the response is not 200,
print the status code.
"""
module = importlib.import_module(settings.ROOT_URLCONF)
if credentials:
self.client.login(**credentials)
def check_urls(urlpatterns, prefix=''):
for pattern in urlpatterns:
if hasattr(pattern, 'url_patterns'):
# this is an included urlconf
new_prefix = prefix
if pattern.namespace:
new_prefix = prefix + (":" if prefix else "") + pattern.namespace
check_urls(pattern.url_patterns, prefix=new_prefix)
params = {}
skip = False
regex = pattern.regex
if regex.groups > 0:
# the url expects parameters
# use default_kwargs supplied
if regex.groups > len(regex.groupindex.keys()) \
or set(regex.groupindex.keys()) - set(default_kwargs.keys()):
# there are positional parameters OR
# keyword parameters that are not supplied in default_kwargs
# so we skip the url
skip = True
else:
for key in set(default_kwargs.keys()) & set(regex.groupindex.keys()):
params[key] = default_kwargs[key]
if hasattr(pattern, "name") and pattern.name:
name = pattern.name
else:
# if pattern has no name, skip it
skip = True
name = ""
fullname = (prefix + ":" + name) if prefix else name
if not skip:
url = reverse(fullname, kwargs=params)
response = self.client.get(url)
self.assertIn(response.status_code, allowed_http_codes)
# print status code if it is not 200
status = "" if response.status_code == 200 else str(response.status_code) + " "
if not quiet:
print(status + url)
if url == logout_url and credentials:
# if we just tested logout, then login again
self.client.login(**credentials)
else:
if not quiet:
print("SKIP " + regex.pattern + " " + fullname)
check_urls(module.urlpatterns)
答案 1 :(得分:7)
在show-urls中使用django-extensions命令作为起点。 (documentation)
python manage.py show_urls
答案 2 :(得分:4)
对于没有参数的简单网址,您可以使用此类测试:
from django import test
from django.core.urlresolvers import reverse
from foo.urls import urlpatterns
class UrlsTest(test.TestCase):
def test_responses(self):
for url in urlpatterns:
response = self.client.get(reverse(url.name))
self.assertEqual(response.status_code, 200)
答案 3 :(得分:1)
另一种方法是添加一个记录器,如Sentry(带Raven)并添加贡献404 middleware(或者只是编写自己的自定义404处理程序)
答案 4 :(得分:1)
如果您的网页已上传到网络服务器,则零编码解决方案是使用免费的W3C Link Checker。它将尝试在页面中找到的每个链接,并提供一个很好的摘要。
答案 5 :(得分:0)
我采用的方法与使用reverse
的方法稍有不同,而是实际加载网站并查找所有'hrefs',然后执行所有这些等等。下面的代码将所有调用打印为层次结构。目前它断言响应代码200(在以下链接之后),如果您正在测试25000个子站点,那么仅记录响应代码然后搜索输出可能是有意义的。
from django.conf import settings
from django.test.testcases import TestCase
import re
from urlparse import urlsplit, urljoin
class GenericTestCase( TestCase ):
fixtures = []
def test_links( self ):
self.p1 = re.compile( r'href="([^"]*)"' )
self.p2 = re.compile( r"href='([^']*)'" )
self.visited_urls = set()
self.visit( '/', 0 )
def visit( self, url, depth ):
print( '-' * depth + url ),
self.visited_urls.add( url )
response = self.client.get( url, follow=True )
if response.redirect_chain:
url = urlsplit( response.redirect_chain[-1][0] ).path
print( ' => ' + url )
if url in self.visited_urls:
return
self.visited_urls.add( url )
else:
print( '' )
self.assertEquals( response.status_code, 200 )
refs = self.get_refs( response.content )
for relative_url in refs:
absolute_url = urljoin( url, relative_url )
if not self.skip_url( absolute_url, relative_url ):
self.visit( absolute_url, depth + 1 )
def skip_url( self, absolute_url, relative_url ):
return absolute_url in self.visited_urls \
or ':' in absolute_url \
or absolute_url.startswith( settings.STATIC_URL ) \
or relative_url.startswith( '#' )
def get_refs( self, text ):
urls = set()
urls.update( self.p1.findall( text ) )
urls.update( self.p2.findall( text ) )
return urls
答案 6 :(得分:0)
在Django 2.2.x中,我不得不使用@sneawo出色答案的这个经过稍微修改的版本:
from django import test
from django.urls import reverse, URLPattern
from myapp.urls import urlpatterns
class MyAppUrlsTest(test.SimpleTestCase):
def test_responses(self):
for url in urlpatterns:
# For now, perform only GET requests and ignore URLs that need arguments.
if not isinstance(url, URLPattern) or url.pattern.regex.groups or not url.name:
continue
urlpath = reverse(url.name)
response = self.client.get(urlpath, follow=True)
self.assertEqual(response.status_code, 200)
请注意,我也在忽略需要参数的视图。对于我的特定的,简单的用例,这还使我可以通过不在name
中给它们一个urlpatterns
来排除视图。
另请参阅https://github.com/encode/django-rest-framework/pull/5500#issue-146618375。