Perl - 基于每个x维转置CSV数据

时间:2017-10-21 08:28:22

标签: perl csv transpose

我正在寻找一些帮助来完成以下任务,我需要使用Perl才能在每个x维度的基础上转换CSV数据。

我的解析器生成以下内容:

metric_timestamp,metric_name,logical_cpus,virtual_cpus,PID,pct_CPU,pct_Usr,pct_Sys,Size,ResSet,ResText,ResData,ShdLib,MinorFault,MajorFault,Command,Threads,IOwaitTime
1508571992,top,8,8,0022309,5.10,4.73,0.37,883068,205532,3744,437988,133848,218,0,update-manager,4,0
1508571992,top,8,8,0019621,4.75,3.82,0.93,1583120,154436,12,1052056,32676,6,0,compiz,12,0
1508571992,top,8,8,0022969,3.84,3.58,0.25,6806384,124468,38608,6743892,30840,287,0,splunkd,8,0
1508571992,top,8,8,0024821,3.60,2.54,1.06,996964,437244,38608,926092,39856,3932,0,splunkd,65,0
1508571992,top,8,8,0018921,3.48,1.50,1.98,607744,75992,2288,241472,59520,135,0,Xorg,4,83
1508571992,top,8,8,0002234,3.43,3.18,0.25,44692,4084,212,2372,2032,5,0,dbus-daemon,1,1
1508571992,top,8,8,0000001,2.47,1.57,0.90,37992,5956,1392,1876,3932,169,0,systemd,1,0
1508571992,top,8,8,0019826,1.89,1.64,0.25,244172,3496,3744,90560,1436,0,0,hp-systray,2,0
1508571992,top,8,8,0018387,1.12,0.00,1.12,0,0,0,0,0,0,0,irq/137-brcmf_p,1,0
1508571992,top,8,8,0019112,1.01,0.93,0.08,6825396,136828,38608,6762904,31856,78,0,splunkd,9,0
1508571992,top,8,8,0019667,1.01,0.71,0.30,513568,7672,220,298512,5188,37,0,indicator-multi,4,0
1508571992,top,8,8,0019460,0.88,0.79,0.08,652552,43828,72,387284,15828,0,0,unity-panel-ser,5,0
1508571992,top,8,8,0017365,0.78,0.00,0.78,0,0,0,0,0,0,0,kworker/0:0,1,11
1508571992,top,8,8,0021906,0.76,0.00,0.76,0,0,0,0,0,0,0,kworker/2:5,1,22
1508571992,top,8,8,0019373,0.64,0.47,0.17,32920,764,140,492,560,0,0,upstart-dbus-br,1,0
1508571992,top,8,8,0018182,0.61,0.00,0.61,0,0,0,0,0,0,0,kworker/u16:6,1,4
1508571992,top,8,8,0018246,0.56,0.00,0.56,0,0,0,0,0,0,0,kworker/u16:65,1,0
1508571992,top,8,8,0024853,0.47,0.25,0.22,2464304,381020,23044,523220,332228,9,0,mongod,80,0
1508571992,top,8,8,0000651,0.44,0.00,0.44,0,0,0,0,0,0,0,kworker/u16:0,1,0
1508571992,top,8,8,0020568,0.44,0.37,0.07,296200,109540,3744,121404,62012,7,0,aptd,2,0
1508571992,top,8,8,0019321,0.39,0.29,0.10,44908,3808,212,2588,1536,0,0,dbus-daemon,1,0
1508571992,top,8,8,0024373,0.39,0.00,0.39,0,0,0,0,0,0,0,kworker/u16:5,1,0
1508571992,top,8,8,0018194,0.37,0.00,0.37,0,0,0,0,0,0,0,kworker/u16:12,1,9
1508571992,top,8,8,0018191,0.35,0.00,0.35,0,0,0,0,0,0,0,kworker/u16:9,1,0
1508571992,top,8,8,0002315,0.34,0.12,0.22,32524,5736,580,4364,1544,18,0,systemd-logind,1,0
1508571992,top,8,8,0026328,0.34,0.29,0.05,1088852,136536,1852,674588,37648,0,0,shutter,4,0
1508571992,top,8,8,0000339,0.32,0.00,0.32,0,0,0,0,0,0,0,dmcrypt_write,1,37
1508571992,top,8,8,0000402,0.32,0.00,0.32,0,0,0,0,0,0,0,jbd2/dm-1-8,1,849
1508571992,top,8,8,0019528,0.30,0.27,0.03,476884,3012,40,295680,2420,0,0,indicator-appli,3,0

维度是字段“Command”(命令调用的名称)和“PID”。

目标是使每个指标转换为每个命令1个指标值,PID:

注意:CSV数据中维度的位置可以是动态的,理想情况下我希望自动检测它

metric_timestamp,metric_name,dimension_Command,dimension_PID,_value
1508573159,IOwaitTime,java,7736,0
1508573159,MajorFault,java,7736,0
1508573159,MinorFault,java,7736,28
1508573159,ResData,java,7736,6539320
1508573159,ResSet,java,7736,957420
1508573159,ResText,java,7736,4
1508573159,ShdLib,java,7736,38092
1508573159,Size,java,7736,6764384
1508573159,Threads,java,7736,58
1508573159,logical_cpus,java,7736,8
1508573159,pct_CPU,java,7736,105.91
1508573159,pct_Sys,java,7736,0.88
1508573159,pct_Usr,java,7736,105.03
1508573159,virtual_cpus,java,7736,8
1508573159,IOwaitTime,Xorg,2674,56
1508573159,MajorFault,Xorg,2674,0
1508573159,MinorFault,Xorg,2674,4
1508573159,ResData,Xorg,2674,235788
1508573159,ResSet,Xorg,2674,73252
1508573159,ResText,Xorg,2674,2288
1508573159,ShdLib,Xorg,2674,58636
1508573159,Size,Xorg,2674,580760
1508573159,Threads,Xorg,2674,4
1508573159,logical_cpus,Xorg,2674,8
1508573159,pct_CPU,Xorg,2674,2.86
1508573159,pct_Sys,Xorg,2674,1.56
1508573159,pct_Usr,Xorg,2674,1.3
1508573159,virtual_cpus,Xorg,2674,8

理想情况下,我只想使用Perl核心模块,因为程序必须尽可能便携,避免需要安装模块。 除非不可能,否则我有一个Perl模块的解决方案。

在Python中,我正在使用csv模块完成任务,遗憾的是我完全坚持使用Perl实现相同的目标......

非常感谢您的帮助!

编辑1:

我使用Text :: CSV:XS模块启动代码,这看起来像是一种操纵或迭代维度的方法,我还不知道如何处理; - )

#!/usr/bin/perl

use Text::CSV_XS;
use strict;
use warnings;

my $csvfile = shift or die "No filename specified";

my $csv = Text::CSV_XS->new();

my @columns;

open(FILE, $csvfile) or die "Can't open $csvfile: $!";
while (<FILE>) {
    $csv->parse($_) or die "parse() failed: " . $csv->error_input();
    my @data = $csv->fields();

    for my $i (0..$#data) {
        push @{$columns[$i]}, $data[$i];
    }
}
close(FILE);

my %hash = map {shift @$_ => $_} @columns;

use Data::Dumper;
print Dumper(\%hash);

这会产生:

$VAR1 = {
          'PID' => [
                     '0007736',
                     '0007402'
                   ],
          'metric_name' => [
                             'os.unix.nmon.processes.top',
                             'os.unix.nmon.processes.top'
                           ],
          'Threads' => [
                         '51',
                         '44'
                       ],
          'ResText' => [
                         '4',
                         '130084'
                       ],
          'logical_cpus' => [
                              '8',
                              '8'
                            ],
          'MinorFault' => [
                            '69',
                            '174'
                          ],
          'MajorFault' => [
                            '0',
                            '0'
                          ],
          'ResData' => [
                         '6531132',
                         '3772776'
                       ],
          'pct_CPU' => [
                         '25.34',
                         '7.05'
                       ],
          'ResSet' => [
                        '687028',
                        '547372'
                      ],
          'metric_timestamp' => [
                                  '1508572327',
                                  '1508572327',
                                  ''
                                ],
          'IOwaitTime' => [
                            '0',
                            '0'
                          ],
          'virtual_cpus' => [
                              '8',
                              '8'
                            ],
          'Size' => [
                      '6756256',
                      '4596936'
                    ],
          'Command' => [
                         'java',
                         'chromium-browse'
                       ],
          'pct_Sys' => [
                         '0.35',
                         '1.14'
                       ],
          'pct_Usr' => [
                         '24.99',
                         '5.90'
                       ],
          'ShdLib' => [
                        '37688',
                        '185404'
                      ]
        };

如何使用我的尺寸迭代可用字段?

1 个答案:

答案 0 :(得分:1)

#!/usr/bin/perl
use Data::Dumper;
use Text::CSV;
use strict;
use warnings;

my $csvfile = shift or die "No filename specified";
open(my $FILE, $csvfile) or die "Can't open $csvfile: $!";

my $out_file=\*STDOUT;
# open($out_file,">",....);

my @out_head =qw(metric_timestamp metric_name dimension_Command dimension_PID _value);

my $csv     = Text::CSV->new();
my $out_csv = Text::CSV->new();

$out_csv->column_names(@out_head);
$out_csv->combine(@out_head);
print $out_file $out_csv->string, "\n";

my @head=@{$csv->getline($FILE)};
$csv->column_names(\@head);
@head=grep {!/metric_timestamp|metric_name|Command|PID/} @head;
while (my $row = $csv->getline_hr ($FILE)) {
  my %out;
  @out{@out_head}=@{$row}{'metric_timestamp','metric_name','Command','PID'};
  for(@head) {
    $out{metric_name}=$_;
    $out{_value}=$row->{$_};
    $out_csv->combine(map { $out{$_} } @out_head);
    print $out_file $out_csv->string, "\n";
  }
}