我尝试使用简单命令通过Python中的子进程访问HBase。 以下代码给出了错误的输出:
import json
import re
import onedrivesdk
import requests
from onedrivesdk.helpers.resource_discovery import ResourceDiscoveryRequest, \
ServiceInfo
# our domain (not the original)
redirect_uri = 'https://example.ourdomain.net/'
# our client id (not the original)
client_id = "a1234567-1ab2-1234-a123-ab1234abc123"
# our client secret (not the original)
client_secret = 'ABCaDEFGbHcd0e1I2fghJijkL3mn4M5NO67P8Qopq+r='
resource = 'https://api.office.com/discovery/'
auth_server_url = 'https://login.microsoftonline.com/common/oauth2/authorize'
auth_token_url = 'https://login.microsoftonline.com/common/oauth2/token'
# our sharepoint URL (not the original)
sharepoint_base_url = 'https://{tenant}.sharepoint.com/'
# our site URL (not the original)
sharepoint_site_url = sharepoint_base_url + 'sites/{site}'
file_to_upload = 'C:/test.xlsx'
target_filename = 'test.xlsx'
class AnyVersionResourceDiscoveryRequest(ResourceDiscoveryRequest):
def get_all_service_info(self, access_token, sharepoint_base_url):
headers = {'Authorization': 'Bearer ' + access_token}
response = json.loads(requests.get(self._discovery_service_url,
headers=headers).text)
service_info_list = [ServiceInfo(x) for x in response['value']]
# Get all services, not just the ones with service_api_version 'v2.0'
# Filter only on service_resource_id
sharepoint_services = \
[si for si in service_info_list
if si.service_resource_id == sharepoint_base_url]
return sharepoint_services
http = onedrivesdk.HttpProvider()
auth = onedrivesdk.AuthProvider(http_provider=http, client_id=client_id,
auth_server_url=auth_server_url,
auth_token_url=auth_token_url)
should_authenticate_via_browser = False
try:
# Look for a saved session. If not found, we'll have to
# authenticate by opening the browser.
auth.load_session()
auth.refresh_token()
except FileNotFoundError as e:
should_authenticate_via_browser = True
pass
if should_authenticate_via_browser:
auth_url = auth.get_auth_url(redirect_uri)
code = ''
while not re.match(r'[a-zA-Z0-9_-]+', code):
# Ask for the code
print('Paste this URL into your browser, approve the app\'s access.')
print('Copy the resulting URL and paste it below.')
print(auth_url)
code = input('Paste code here: ')
# Parse code from URL if necessary
if re.match(r'.*?code=([a-zA-Z0-9_-]+).*', code):
code = re.sub(r'.*?code=([a-zA-Z0-9_-]*).*', r'\1', code)
auth.authenticate(code, redirect_uri, client_secret, resource=resource)
service_info = AnyVersionResourceDiscoveryRequest().\
get_all_service_info(auth.access_token, sharepoint_base_url)[0]
auth.redeem_refresh_token(service_info.service_resource_id)
auth.save_session()
client = onedrivesdk.OneDriveClient(sharepoint_site_url + '/_api/v2.0/',
auth, http)
# Get the drive ID of the Documents folder.
documents_drive_id = [x['id']
for x
in client.drives.get()._prop_list
if x['name'] == 'Documents'][0]
items = client.item(drive=documents_drive_id, id='root')
# Upload file
uploaded_file_info = items.children[target_filename].upload(file_to_upload)
我没有给出HBase中的表列表,而是获得以下输出:
import subprocess
cmd=['hbase','shell','list']
subprocess.call(cmd)
如何提供子进程命令?
答案 0 :(得分:0)
如果您需要从Python访问HBase,我强烈建议您查看 happybase 模块。
过去4年我一直在使用它们 - 它们简化了我们的ETL任务。
开箱即用,它们是Python 2.X,但只需几分钟的工作 - 您可以将它们升级到Python 3(如果数据是UTF-8,则非常有用)
答案 1 :(得分:0)
想出办法。我创建了一个包含以下命令的shell文件:
echo 'list' | hbase shell -n
在Python中使用。
答案 2 :(得分:0)
以下命令对我有用。
import subprocess
print(subprocess.check_output(
"echo \"list_regions \'table_name\'\" | hbase shell", shell=True))
或
command = "echo \"list_regions \'table_name\'\" | hbase shell"
output,error = subprocess.Popen(
command, universal_newlines=True, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
print(output)
print(error)
基本上相当于下面的
hbase_command | hbase shell