我正在尝试使用Google云端存储(GCS),Python2.7和Ferris框架创建文档管理系统。我能够将多种类型的文件上传到云存储中,并且我能够以编程方式将CSV和TXT推送到客户端浏览器,以便下载,没有任何问题。但如果文件是Microsoft Word文档或PDF或任何其他mime类型,我会收到以下错误:
'ascii' codec can't decode byte 0xe2 in position X
如果用户尝试下载CSV文件,则以下示例有效:
@route
def test_get_csv_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_csv_file.csv'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_csv_file.csv"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
以下是尝试推送因上述错误而失败的Word文档的示例:
@route
def test_get_word_file(self):
# the file in google cloud storage
thefilename = '/mydomain.appspot.com/my_word_file.doc'
try:
with gcs.open(thefilename, "r") as the_file:
self.response.headers["Content-Disposition"] = "'attachment'; filename=my_word_file.doc"
return the_file.read(32*1024*1024).decode("utf-8")
except gcs.NotFoundError:
return "it failed"
对文件的访问必须限制在域帐户,因此我无法将存储桶的默认ACL设置为public-read,否则我只能使用storage.googlapis.com/yadda/yadda URL作为服务网址并完成它。 我也尝试将解码值更改为Latin-1但是只渲染了一个空白文件。我不明白为什么这适用于CSV文件而不是其他任何东西。 我感谢任何帮助。感谢
答案 0 :(得分:0)
它实际上并没有解决你的问题。但另一种方法是使用signed urls。然后,这些文件将直接从云存储中提供,生成的URL将在有限的时间内有效。
我使用下面的python模块。它有一些用于url签名的实用方法和类。
import datetime
import time
import urllib
from urlparse import urlparse
__author__ = 'fabio'
__all__ = ['sign', 'PolicyDocument', 'CloudStorageURLSigner']
from google.appengine.api import app_identity
from base64 import b64encode
import json
def sign(string_to_sign):
signing_key_name, signature = app_identity.sign_blob(string_to_sign)
return b64encode(signature)
class PolicyDocument:
"""Represents a policy.
Attributes:
content_type:
success_action_redirect:
key:
bucket:
expiration:
acl:
success_action_status:
"""
ACL = "acl"
SUCCESS_ACTION_REDIRECT = "success_action_redirect"
SUCCESS_ACTION_STATUS = "success_action_status"
KEY = "key"
BUCKET = "bucket"
CONTENT_TYPE = "content-type"
ACL_PUBLIC_READ = "public-read"
ACL_PROJECT_PRIVATE = "project-private"
def __init__(self, content_type=None, success_action_redirect=None, key=None, bucket=None, expiration=None,
success_action_status=201, acl=ACL_PROJECT_PRIVATE):
self.content_type = content_type
self.success_action_redirect = success_action_redirect
self.key = key
self.bucket = bucket
self.expiration = expiration
self.acl = acl
self.success_action_status = success_action_status
def as_dict(self):
conditions = [{self.ACL: self.acl},
{self.BUCKET: self.bucket},
{self.KEY: self.key},
{self.CONTENT_TYPE: self.content_type},
["starts-with", "$content-type", 'image/'],
]
# TODO investigate why its not working
if self.success_action_redirect:
conditions.append({self.SUCCESS_ACTION_REDIRECT: self.success_action_redirect})
else:
conditions.append({self.SUCCESS_ACTION_STATUS: str(self.success_action_status)})
return dict(expiration=self.expiration, conditions=conditions)
def as_json_b64encode(self):
return b64encode(self.as_json())
def as_json(self):
return json.dumps(self.as_dict())
class CloudStorageURLSigner(object):
"""Contains methods for generating signed URLs for Google Cloud Storage."""
DEFAULT_GCS_API_ENDPOINT = 'https://storage.googleapis.com'
def __init__(self, gcs_api_endpoint=None, expiration=None):
"""Creates a CloudStorageURLSigner that can be used to access signed URLs.
Args:
gcs_api_endpoint: Base URL for GCS API. Default is 'https://storage.googleapis.com'
expiration: An instance of datetime.datetime containing the time when the
signed URL should expire.
"""
self.gcs_api_endpoint = gcs_api_endpoint or self.DEFAULT_GCS_API_ENDPOINT
self.expiration = expiration or (datetime.datetime.now() +
datetime.timedelta(days=1))
self.expiration = int(time.mktime(self.expiration.timetuple()))
self.client_id_email = app_identity.get_service_account_name()
def __make_signature_string(self, verb, path, content_md5, content_type):
"""Creates the signature string for signing according to GCS docs."""
signature_string = ('{verb}\n'
'{content_md5}\n'
'{content_type}\n'
'{expiration}\n'
'{resource}')
return signature_string.format(verb=verb,
content_md5=content_md5,
content_type=content_type,
expiration=self.expiration,
resource=path)
def signed_url(self, verb, path, content_type='', content_md5=''):
"""Forms and returns the full signed URL to access GCS."""
base_url = '%s%s' % (self.gcs_api_endpoint, path)
signature_string = self.__make_signature_string(verb, path, content_md5,
content_type)
signature = urllib.quote_plus(sign(signature_string))
return "{}?GoogleAccessId={}&Expires={}&Signature={}".format(base_url, self.client_id_email,
str(self.expiration), signature)
def signed_download_url(self, url):
if self.is_stored_on_google_cloud_storage(url):
parsed_url = urlparse(url)
return self.signed_url('GET', parsed_url.path)
return url
@staticmethod
def is_stored_on_google_cloud_storage(url):
return "storage.googleapis.com" in url