我从一个有40亿+行的巨大<?php
/**
* My Orders
*
* Shows recent orders on the account page
*
* @author WooThemes
* @package WooCommerce/Templates
* @version 2.3.10
*/
if ( ! defined( 'ABSPATH' ) ) {
exit; // Exit if accessed directly
}
$customer_orders = get_posts( apply_filters( 'woocommerce_my_account_my_orders_query', array(
'numberposts' => $order_count,
'meta_key' => '_customer_user',
'meta_value' => get_current_user_id(),
'post_type' => wc_get_order_types( 'view-orders' ),
'post_status' => array_keys( wc_get_order_statuses() )
) ) );
if ( $customer_orders ) : ?>
<h2><?php echo apply_filters( 'woocommerce_my_account_my_orders_title', __( 'Recent Orders', 'woocommerce' ) ); ?></h2>
<table class="shop_table shop_table_responsive my_account_orders">
<thead>
<tr>
<th class="order-number"><span class="nobr"><?php _e( 'Order', 'woocommerce' ); ?></span></th>
<th class="order-number"><span class="nobr"><?php _e( 'Product ID', 'woocommerce' ); ?></span></th>
<th class="order-date"><span class="nobr"><?php _e( 'Date', 'woocommerce' ); ?></span></th>
<th class="order-status"><span class="nobr"><?php _e( 'Status', 'woocommerce' ); ?></span></th>
<th class="order-total"><span class="nobr"><?php _e( 'Total', 'woocommerce' ); ?></span></th>
<th class="order-actions"> </th>
</tr>
</thead>
<tbody><?php
foreach ( $customer_orders as $customer_order ) {
$order = wc_get_order( $customer_order );
$order->populate( $customer_order );
$item_count = $order->get_item_count();
$_product_id = $order->product_id();
?><tr class="order">
<td class="order-number" data-title="<?php esc_attr_e( 'Order Number', 'woocommerce' ); ?>">
<a href="<?php echo esc_url( $order->get_view_order_url() ); ?>">
<?php echo _x( '#', 'hash before order number', 'woocommerce' ) . $order->get_order_number(); ?>
</a>
</td>
<td class="order-number" data-title="<?php esc_attr_e( 'Product ID', 'woocommerce' ); ?>">
<a href="<?php get_post_permalink(); ?>">
<?php echo _x( 'Product ID # ', 'hash before product ID', 'woocommerce' ) . $_product_id; ?>
</a>
</td>
<td class="order-date" data-title="<?php esc_attr_e( 'Date', 'woocommerce' ); ?>">
<time datetime="<?php echo date( 'Y-m-d', strtotime( $order->order_date ) ); ?>" title="<?php echo esc_attr( strtotime( $order->order_date ) ); ?>"><?php echo date_i18n( get_option( 'date_format' ), strtotime( $order->order_date ) ); ?></time>
</td>
<td class="order-status" data-title="<?php esc_attr_e( 'Status', 'woocommerce' ); ?>" style="text-align:left; white-space:nowrap;">
<?php echo wc_get_order_status_name( $order->get_status() ); ?>
</td>
<td class="order-total" data-title="<?php esc_attr_e( 'Total', 'woocommerce' ); ?>">
<?php echo sprintf( _n( '%s for %s item', '%s for %s items', $item_count, 'woocommerce' ), $order->get_formatted_order_total(), $item_count ); ?>
</td>
<td class="order-actions">
<?php
$actions = array();
if ( $order->needs_payment() ) {
$actions['pay'] = array(
'url' => $order->get_checkout_payment_url(),
'name' => __( 'Pay', 'woocommerce' )
);
}
if ( in_array( $order->get_status(), apply_filters( 'woocommerce_valid_order_statuses_for_cancel', array( 'pending', 'failed' ), $order ) ) ) {
$actions['cancel'] = array(
'url' => $order->get_cancel_order_url( wc_get_page_permalink( 'myaccount' ) ),
'name' => __( 'Cancel', 'woocommerce' )
);
}
$actions['view'] = array(
'url' => $order->get_view_order_url(),
'name' => __( 'View', 'woocommerce' )
);
$actions = apply_filters( 'woocommerce_my_account_my_orders_actions', $actions, $order );
if ( $actions ) {
foreach ( $actions as $key => $action ) {
echo '<a href="' . esc_url( $action['url'] ) . '" class="button ' . sanitize_html_class( $key ) . '">' . esc_html( $action['name'] ) . '</a>';
}
}
?>
</td>
</tr><?php
}
?></tbody>
</table>
<?php endif; ?>
表中抽取了一个进程。我正在尝试卸载到Redshift
存储桶中,然后将其复制回另一个表。问题是,它坐了3个小时左右,然后失败了。奇怪的是,当我查看桶时,我可以看到59个切片和一个清单文件。但它不会将它们放在那里直到进程终止(上次我认为我得到的错误是服务器意外关闭了或者什么)。有没有办法优化这种类型的事务,还是有更好的方法来执行这种类型的卸载/复制?我想知道为什么这个过程会停止并挂起,但随后会显示它在s3中看到我的存储桶中的时间戳时将文件上传到s3小时。在一段时间后,我是否需要某种代码来自动杀死它?这是我的代码:
S3
from datetime import datetime
import logging
import boto3
import psycopg2 as ppg2
from inst_utils import aws
from inst_config import config3
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] - %(message)s')
if __name__ == '__main__':
# Unload step
timestamp = datetime.now()
month = timestamp.month
year = timestamp.year
s3_sesh = boto3.session.Session(**config3.S3_INFO)
s3 = s3_sesh.resource('s3')
fname = 'load_{}_{:02d}'.format(year, month)
bucket_url = ('canvas_logs/agg_canvas_logs_user_agent_types/'
'{}/'.format(fname))
unload_url = ('s3://{}/{}'.format(config3.S3_BUCKET, bucket_url))
s3.Bucket(config3.S3_BUCKET).put_object(Key=bucket_url)
table_name = 'requests_{}_{:02d}'.format(year, month - 1)
logging.info('Starting unload.')
try:
with ppg2.connect(**config3.REQUESTS_POSTGRES_INFO) as conn:
cur = conn.cursor()
# TODO add sql the sql folder to clean up this program.
unload = r'''
unload ('select
user_id
,course_id
,request_month
,user_agent_type
,count(session_id)
,\'DEV\' etl_requests_usage
,CONVERT_TIMEZONE(\'MST\', getdate()) etl_datetime_local
,\'agg_canvas_logs_user_agent_types\' etl_transformation_name
,\'N/A\' etl_pdi_version
,\'N/A\' etl_pdi_build_version
,null etl_pdi_hostname
,null etl_pdi_ipaddress
,null etl_checksum_md5
from
(select distinct
user_id
,context_id as course_id
,date_trunc(\'month\', request_timestamp) request_month
,session_id
,case
when user_agent like \'%CanvasAPI%\' then \'api\'
when user_agent like \'%candroid%\' then \'mobile_app_android\'
when user_agent like \'%iCanvas%\' then \'mobile_app_ios\'
when user_agent like \'%CanvasKit%\' then \'mobile_app_ios\'
when user_agent like \'%Windows NT%\' then \'desktop\'
when user_agent like \'%MacBook%\' then \'desktop\'
when user_agent like \'%iPhone%\' then \'mobile\'
when user_agent like \'%iPod Touch%\' then \'mobile\'
when user_agent like \'%iPad%\' then \'mobile\'
when user_agent like \'%iOS%\' then \'mobile\'
when user_agent like \'%CrOS%\' then \'desktop\'
when user_agent like \'%Android%\' then \'mobile\'
when user_agent like \'%Linux%\' then \'desktop\'
when user_agent like \'%Mac OS%\' then \'desktop\'
when user_agent like \'%Macintosh%\' then \'desktop\'
else \'other_unknown\'
end as user_agent_type
from {}
where context_type = \'Course\')
group by
user_id
,course_id
,request_month
,user_agent_type')
to '{}'
credentials 'aws_access_key_id={};aws_secret_access_key={}'
manifest
gzip
delimiter '|'
'''.format(
table_name, unload_url, config3.S3_ACCESS, config3.S3_SECRET)
cur.execute(unload)
conn.commit()
except ppg2.Error as e:
logging.critical('Error occurred during transaction: {}'.format(e))
raise Exception('{}'.format(e))
logging.info('Starting copy process.')
schema_name = 'ods_canvas_logs'
table_name = 'agg_canvas_logs_user_agent_types'
manifest_url = unload_url + 'manifest'
logging.info('Manifest url: {}'.format(manifest_url))
load = aws.RedshiftLoad(schema_name,
table_name,
manifest_url,
config3.S3_INFO,
config3.REDSHIFT_POSTGRES_INFO_PROD,
config3.REDSHIFT_POSTGRES_INFO,
safe_load=True,
truncate=True
)
load.execute()
对象只是我创建的一个包装类,用于简化从S3复制文件,因为它在我的工作中非常常见。