如何按关键字条件拆分文本文件中的内容?

时间:2013-09-10 04:19:15

标签: shell text split sh

我有文字文件。它是数据库转储文件。

实施例,

--
-- Greenplum Database database dump
--

SET statement_timeout = 0;
SET client_encoding = 'WIN874';
SET standard_conforming_strings = off;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET escape_string_warning = off;

SET search_path = prod_db_cdr, pg_catalog;

SET default_with_oids = false;

--
-- Name: execution_seq; Type: SEQUENCE SET; Schema: prod_db_cdr; Owner: prod_operation_cdr
--

SELECT pg_catalog.setval('execution_seq', 2231081, false);


--
-- Name: mas_file_seq; Type: SEQUENCE SET; Schema: prod_db_cdr; Owner: prod_operation_cdr
--

SELECT pg_catalog.setval('mas_file_seq', 39591902, false);

COPY backup_gprs_usg (access_method_identifier, call_start_date, call_start_time, call_sequence_number, highest_priority_disc_type_cd, access_point_node_code, actual_cell_site_identifier, usg_data_rec_transfer_size_amt, usage_data_record_downlink_vol, usage_data_record_uplink_vol, download_volume, usage_qos_reliability, usage_qos_peak_rate, usage_qos_delay, usage_qos_mean_rate, call_event_sequence_number, call_duration, call_minute, call_gross_revenue_amount, call_net_revenue_amount, cost_band_code, call_cost_code, switch_identifier, call_service_type_code, wireless_airtime_period_code, call_type_code, call_rating_tariff_code, original_account_identifier, billing_account_identifier, original_msisdn, call_terminating_number, hotspot_id, file_id, execution_id, row_num, managed_file_id, mas_file_name, mas_file_date, unl_file_name, unl_file_date, invoicing_company, roaming_flag, unl_execution_id, charge_download_volume, rat_type, package_name, package_eff_dttm, service_package_id, test_indicator_yn, current_speed, charging_id, call_charge_duration, revenue_code) FROM stdin;
1-70SH-484  2012-11-06  13:45:25    3447641960  4474    internet    \N  13741   0   0   13741   0   0   0   0   121204000   2400    40  40.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-6SSJ55Z   66817   \N  \N  \N  2223187 19285   5548108 gprs63_20121106_026327_061120121605.mas 2012-11-06 16:44:34 ungprs63_20121106_5549276_071120120302.mas  \N  AIS 0   2227110 13741   \N  \N  \N  \N  \N  \N  \N  \N  \N
1-70SH-484  2012-10-09  08:57:05    454058762   4474    internet    \N  8949    0   0   8949    0   0   0   0   121104000   1440    24  24.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-6SSJ55Z   66817   \N  \N  \N  2167261 5791    5426588 gprs63_20121009_004286_131020120312.mas 2012-10-13 03:12:53 ungprs63_20121009_5549276_071120120302.mas  \N  AIS 0   2227110 8949    \N  \N  \N  \N  \N  \N  \N  \N  \N
1-8CJY-356  2012-10-22  21:59:56    867800963   1968    internet    \N  153677  0   0   153677  0   0   0   0   121104000   1920    32  32.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-699AVWS   66818   \N  \N  \N  2193344 1483    5486759 gprs63_20121022_029163_221020122317.mas 2012-10-22 23:21:45 ungprs63_20121022_5549280_071120120302.mas  \N  AIS 0   2227110 153677  \N  \N  \N  \N  \N  \N  \N  \N  \N
1-8CJY-356  2012-10-30  10:48:50    2796459017  1968    internet    \N  8522    0   0   8522    0   0   0   0   121104000   3840    64  64.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-699AVWS   66818   \N  \N  \N  2222995 4676    5520363 gprs63_20121030_027310_301020121302.mas 2012-10-30 13:08:48 ungprs63_20121030_5549280_071120120302.mas  \N  AIS 0   2227110 8522    \N  \N  \N  \N  \N  \N  \N  \N  \N
\.

--
-- Data for Name: wireless_voice_smsdf_call_hist_1_prt_p201309; Type: TABLE DATA; Schema: prod_db_cdr; Owner: gpadmin
--

COPY wireless_voice_smsdf_call_hist_1_prt_p201309 (access_method_identifier, call_start_date, call_start_time, call_sequence_number, call_event_sequence_number, call_terminating_number, terminating_network_identifier, terminating_cellular_network_i, call_duration, call_chargeable_duration, call_chargeable_second, wireless_airtime_period_code, call_net_revenue_amount, call_gross_revenue_amount, call_rating_tariff_code, original_account_identifier, billing_account_identifier, cost_band_code, call_cost_code, highest_priority_disc_type_cd, call_service_type_code, call_enhanced_svc_type_code, call_type_code, switch_identifier, terminating_zone_geography_id, originating_zone_geography_id, orig_province_geography_id, term_province_geography_id, originating_cell_site_id, terminating_cell_site_id, actual_cell_site_identifier, call_pay_flag, call_pay_by_number, cp_booked_qty, cp_attended_qty, call_vpn_service_type_cd, call_term_nbr_cos_id, file_id, execution_id, managed_file_id, row_num, invoicing_company, roaming_flag, originating_operator, terminating_operator, created_date, source_filename) FROM stdin;
\.


--
-- Data for Name: wl_voice_smsdf_call_hist_err; Type: TABLE DATA; Schema: prod_db_cdr; Owner: gpadmin
--

COPY wl_voice_smsdf_call_hist_err (access_method_number, access_method_identifier, account_number, account_identifier, call_event_sequence_number, call_type_code, event_dtm, call_start_date, call_start_time, call_net_revenue_amount, call_rating_tariff_code, call_originating_number, call_cost_code, call_service_type_code, call_terminating_number, originating_cell_site_id, terminating_cell_site_id, call_chargeable_duration, call_chargeable_second, call_enhanced_svc_type_code, switch_identifier, call_duration, wireless_airtime_period_code, wireless_airtime_period_desc, term_province_geography_id, term_province_geography_nm, orig_province_geography_id, orig_province_geography_nm, terminating_zone_geography_id, terminating_zone_geography_nm, originating_zone_geography_id, originating_zone_geography_nm, terminating_cellular_network_i, terminating_network_identifier, cost_band_code, cost_band_name, cost_band_desc, actual_cell_site_identifier, call_gross_revenue_amount, highest_priority_disc_type_cd, cp_booked_qty, cp_attended_qty, call_pay_flag, call_pay_by_number, call_vpn_service_type_cd, original_account_identifier, original_account_number, call_sequence_number, file_id, execution_id, managed_file_id, row_num, invoicing_company, roaming_flag, originating_operator, terminating_operator, created_date, source_filename) FROM stdin;
0817218057  1-5OEM-231  31300003445661  \N  130920000   1   20130828225815  2013-08-28  22:58:15    0.00    169 \N  66923020889 LOCAL   0819232299  M19025  66923020889 5   300 N   66923011161 269 4   Nationwide  OTH AIS3G   BKK Bangkok OT  AWN-MG01    CB  AWN-MG01    AWN-MG01:AWN    3GPost-paid 2532    AWN Local   A001Non-VAS 669230208   7.50    386 \N  \N  \N  \N  \N  \N  \N  66033863618 \N  3391729 6603386 3618    AWN 0   \N  \N  2013-08-28 23:20:14 gsm1_20130828_018731_280820132317.mas
0817314643  0817314643  31300003417046  \N  130924000   65  20130828091802  2013-08-28  09:18:02    0.00    365 \N  68053   ADJACENT    0868767187  L01212  68053   2   120 N1  SCPvpn1 117 4   Nationwide  OTH DTC3    RYG Rayong  XL  W18-MW83    CE  GSM-MG013   W18-MW83:W18    DTAC    5   Others Adjacent Zone    A002Non-VAS \N  3.00    3836    \N  \N  0   0817314643  1   \N  \N  6600042647  \N  3386946 6600042 647 AWN 1   \N  \N  2013-08-28 10:21:20 vpn_20130828_015884_280820131021.mas
0817314643  0817314643  31300003417046  \N  130924000   65  20130829135623  2013-08-29  13:56:23    0.00    365 \N  023835641   LOCAL   023835641   L01212  023835641   1   60  N1  SCPvpn1 41  4   Nationwide  BKK Bangkok RYG Rayong  CB  TOT-LO01    CE  GSM-MG013   TOT-LO01:LANDLI TOT 1647    Landline Local  A001Non-VAS \N  1.50    3836    \N  \N  0   0817314643  1   \N  \N  660575518805    \N  3391766 6605755 18805   AWN 1   \N  \N  2013-08-29 15:33:05 vpn_20130829_008870_290820131459.mas
0818458854  1-GI3-182   31300003462832  \N  130904000   1   20130829112115  2013-08-29  11:21:15    0.00    169 \N  90099   VAS 90099   AWN61   90099   1   60  F   66818900702 1   1   Constant    OTH VMB OTH AIS OT  \N  OT  AWN-MG01    \N  OTH 1825    Voice Mail 1    B008Network Services    90099   0.00    \N  \N  \N  \N  \N  \N  \N  \N  66052865699 \N  3391729 6605286 5699    AWN 1   \N  \N  2013-08-29 12:04:34 gsm1_20130829_015829_290820131157.mas
0818849405  1-9HI8-410  31300003428854  \N  130916000   1   20130827124233  2013-08-27  12:42:33    0.00    329 \N  68041   LOCAL   0813183223  M04575  68041   3   180 N   66923014101 125 93  05:00-16:59 OTH AIS CRI Chiang Rai  CB  DPC-MD011   NU  AWN-MG03    DPC-MD011:DPC   DPC 772 DPC Local   A001Non-VAS 68041   4.00    3408    \N  \N  \N  \N  \N  \N  \N  659587910141    \N  3383845 6595879 10141   AWN 0   \N  \N  2013-08-27 13:20:21 gsm1_20130827_013376_270820131315.mas
\.


--
-- Greenplum Database database dump complete
--

我想按关键字条件拆分内容。自“COPY”一词到段落结尾。将每个内容拆分为新文件。并以表名命名。

实施例

backup_gprs_usg.txt

COPY backup_gprs_usg (access_method_identifier, call_start_date, call_start_time, call_sequence_number, highest_priority_disc_type_cd, access_point_node_code, actual_cell_site_identifier, usg_data_rec_transfer_size_amt, usage_data_record_downlink_vol, usage_data_record_uplink_vol, download_volume, usage_qos_reliability, usage_qos_peak_rate, usage_qos_delay, usage_qos_mean_rate, call_event_sequence_number, call_duration, call_minute, call_gross_revenue_amount, call_net_revenue_amount, cost_band_code, call_cost_code, switch_identifier, call_service_type_code, wireless_airtime_period_code, call_type_code, call_rating_tariff_code, original_account_identifier, billing_account_identifier, original_msisdn, call_terminating_number, hotspot_id, file_id, execution_id, row_num, managed_file_id, mas_file_name, mas_file_date, unl_file_name, unl_file_date, invoicing_company, roaming_flag, unl_execution_id, charge_download_volume, rat_type, package_name, package_eff_dttm, service_package_id, test_indicator_yn, current_speed, charging_id, call_charge_duration, revenue_code) FROM stdin;
1-70SH-484  2012-11-06  13:45:25    3447641960  4474    internet    \N  13741   0   0   13741   0   0   0   0   121204000   2400    40  40.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-6SSJ55Z   66817   \N  \N  \N  2223187 19285   5548108 gprs63_20121106_026327_061120121605.mas 2012-11-06 16:44:34 ungprs63_20121106_5549276_071120120302.mas  \N  AIS 0   2227110 13741   \N  \N  \N  \N  \N  \N  \N  \N  \N
1-70SH-484  2012-10-09  08:57:05    454058762   4474    internet    \N  8949    0   0   8949    0   0   0   0   121104000   1440    24  24.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-6SSJ55Z   66817   \N  \N  \N  2167261 5791    5426588 gprs63_20121009_004286_131020120312.mas 2012-10-13 03:12:53 ungprs63_20121009_5549276_071120120302.mas  \N  AIS 0   2227110 8949    \N  \N  \N  \N  \N  \N  \N  \N  \N
1-8CJY-356  2012-10-22  21:59:56    867800963   1968    internet    \N  153677  0   0   153677  0   0   0   0   121104000   1920    32  32.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-699AVWS   66818   \N  \N  \N  2193344 1483    5486759 gprs63_20121022_029163_221020122317.mas 2012-10-22 23:21:45 ungprs63_20121022_5549280_071120120302.mas  \N  AIS 0   2227110 153677  \N  \N  \N  \N  \N  \N  \N  \N  \N
1-8CJY-356  2012-10-30  10:48:50    2796459017  1968    internet    \N  8522    0   0   8522    0   0   0   0   121104000   3840    64  64.000  0.000   1253    202099  OPAvscp1    VAS 55  63  126 \N  1-699AVWS   66818   \N  \N  \N  2222995 4676    5520363 gprs63_20121030_027310_301020121302.mas 2012-10-30 13:08:48 ungprs63_20121030_5549280_071120120302.mas  \N  AIS 0   2227110 8522    \N  \N  \N  \N  \N  \N  \N  \N  \N
\. 

wireless_voice_smsdf_call_hist_1_prt_p201309.txt

COPY wireless_voice_smsdf_call_hist_1_prt_p201309 (access_method_identifier, call_start_date, call_start_time, call_sequence_number, call_event_sequence_number, call_terminating_number, terminating_network_identifier, terminating_cellular_network_i, call_duration, call_chargeable_duration, call_chargeable_second, wireless_airtime_period_code, call_net_revenue_amount, call_gross_revenue_amount, call_rating_tariff_code, original_account_identifier, billing_account_identifier, cost_band_code, call_cost_code, highest_priority_disc_type_cd, call_service_type_code, call_enhanced_svc_type_code, call_type_code, switch_identifier, terminating_zone_geography_id, originating_zone_geography_id, orig_province_geography_id, term_province_geography_id, originating_cell_site_id, terminating_cell_site_id, actual_cell_site_identifier, call_pay_flag, call_pay_by_number, cp_booked_qty, cp_attended_qty, call_vpn_service_type_cd, call_term_nbr_cos_id, file_id, execution_id, managed_file_id, row_num, invoicing_company, roaming_flag, originating_operator, terminating_operator, created_date, source_filename) FROM stdin;
\.

wl_voice_smsdf_call_hist_err.txt

COPY wl_voice_smsdf_call_hist_err (access_method_number, access_method_identifier, account_number, account_identifier, call_event_sequence_number, call_type_code, event_dtm, call_start_date, call_start_time, call_net_revenue_amount, call_rating_tariff_code, call_originating_number, call_cost_code, call_service_type_code, call_terminating_number, originating_cell_site_id, terminating_cell_site_id, call_chargeable_duration, call_chargeable_second, call_enhanced_svc_type_code, switch_identifier, call_duration, wireless_airtime_period_code, wireless_airtime_period_desc, term_province_geography_id, term_province_geography_nm, orig_province_geography_id, orig_province_geography_nm, terminating_zone_geography_id, terminating_zone_geography_nm, originating_zone_geography_id, originating_zone_geography_nm, terminating_cellular_network_i, terminating_network_identifier, cost_band_code, cost_band_name, cost_band_desc, actual_cell_site_identifier, call_gross_revenue_amount, highest_priority_disc_type_cd, cp_booked_qty, cp_attended_qty, call_pay_flag, call_pay_by_number, call_vpn_service_type_cd, original_account_identifier, original_account_number, call_sequence_number, file_id, execution_id, managed_file_id, row_num, invoicing_company, roaming_flag, originating_operator, terminating_operator, created_date, source_filename) FROM stdin;
0817218057  1-5OEM-231  31300003445661  \N  130920000   1   20130828225815  2013-08-28  22:58:15    0.00    169 \N  66923020889 LOCAL   0819232299  M19025  66923020889 5   300 N   66923011161 269 4   Nationwide  OTH AIS3G   BKK Bangkok OT  AWN-MG01    CB  AWN-MG01    AWN-MG01:AWN    3GPost-paid 2532    AWN Local   A001Non-VAS 669230208   7.50    386 \N  \N  \N  \N  \N  \N  \N  66033863618 \N  3391729 6603386 3618    AWN 0   \N  \N  2013-08-28 23:20:14 gsm1_20130828_018731_280820132317.mas
0817314643  0817314643  31300003417046  \N  130924000   65  20130828091802  2013-08-28  09:18:02    0.00    365 \N  68053   ADJACENT    0868767187  L01212  68053   2   120 N1  SCPvpn1 117 4   Nationwide  OTH DTC3    RYG Rayong  XL  W18-MW83    CE  GSM-MG013   W18-MW83:W18    DTAC    5   Others Adjacent Zone    A002Non-VAS \N  3.00    3836    \N  \N  0   0817314643  1   \N  \N  6600042647  \N  3386946 6600042 647 AWN 1   \N  \N  2013-08-28 10:21:20 vpn_20130828_015884_280820131021.mas
0817314643  0817314643  31300003417046  \N  130924000   65  20130829135623  2013-08-29  13:56:23    0.00    365 \N  023835641   LOCAL   023835641   L01212  023835641   1   60  N1  SCPvpn1 41  4   Nationwide  BKK Bangkok RYG Rayong  CB  TOT-LO01    CE  GSM-MG013   TOT-LO01:LANDLI TOT 1647    Landline Local  A001Non-VAS \N  1.50    3836    \N  \N  0   0817314643  1   \N  \N  660575518805    \N  3391766 6605755 18805   AWN 1   \N  \N  2013-08-29 15:33:05 vpn_20130829_008870_290820131459.mas
0818458854  1-GI3-182   31300003462832  \N  130904000   1   20130829112115  2013-08-29  11:21:15    0.00    169 \N  90099   VAS 90099   AWN61   90099   1   60  F   66818900702 1   1   Constant    OTH VMB OTH AIS OT  \N  OT  AWN-MG01    \N  OTH 1825    Voice Mail 1    B008Network Services    90099   0.00    \N  \N  \N  \N  \N  \N  \N  \N  66052865699 \N  3391729 6605286 5699    AWN 1   \N  \N  2013-08-29 12:04:34 gsm1_20130829_015829_290820131157.mas
0818849405  1-9HI8-410  31300003428854  \N  130916000   1   20130827124233  2013-08-27  12:42:33    0.00    329 \N  68041   LOCAL   0813183223  M04575  68041   3   180 N   66923014101 125 93  05:00-16:59 OTH AIS CRI Chiang Rai  CB  DPC-MD011   NU  AWN-MG03    DPC-MD011:DPC   DPC 772 DPC Local   A001Non-VAS 68041   4.00    3408    \N  \N  \N  \N  \N  \N  \N  659587910141    \N  3383845 6595879 10141   AWN 0   \N  \N  2013-08-27 13:20:21 gsm1_20130827_013376_270820131315.mas
\.

你有没有想过编写shell脚本?

1 个答案:

答案 0 :(得分:0)

awk和perl都非常擅长一次读取一个段落:

awk '$1=="COPY" {f = $2 ".txt"; print > f}' RS='' dbdump.txt
perl -MIO::File -000 -ne '
    if (/^COPY (\S+)/) {my $fh = IO::File->new("$1.txt", ">"); $fh->print($_)}
' dbdump.txt