合并2个数据集与多个变量(不能只使用类似的变量)

时间:2017-03-18 18:26:39

标签: python mysql matlab sas weka

我正在尝试合并2个数据集(150,000和50,000个记录),每个数据集大约有50个变量,其中一些可能匹配。两个数据集中的一个常见变量是“事件日期”,但我不能使用它,因为数据集在该特定日期发生了300次事件(按地址,城市,县,邮编,时间紧急医疗服务分类( EMS)已通知)。其他数据集具有事件发生的确切时间,地址,城市,县,邮政编码和其他一些字段,但如果信息未知/记录,这些字段可能为空白。

我想创建一个缓冲区来连接每个字段的数据集。例如,首先从事件发生的日期开始(没有缺失值),如果它们相同,则下一步是检查它们是否发生在同一个县,市等。 (某些值可能为空)。比较的最后一个领域是通知EMS(事件发生后30-60分钟)。如果所有字段都匹配,那么它将归结为30-60分钟的最终缓冲区。它将是多对一合并(50,000到150,000)。

什么程序可以让我这样做?是否有某些代码?

我添加了两个数据集的片段(https://filedropper.com/filemanager/public.php?service=files&t=0f2d129b1622901fafc8c9e678433623&download)和(https://filedropper.com/filemanager/public.php?service=files&t=642c840bc3e431c3d4d839a71bb66944&download

datasets

预期输出看起来像这样

expected output

使用的代码是:

T1 = readtable('dataset1.csv')
T2 = readtable('dataset2.csv')
LT1 = size(T1,1);
LT2 = size(T2,1);
T1 = [T1, cell2table(repmat({''}, LT1, 7),'VariableNames', {'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})]
augmented = false(LT1,1);
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([0,0,0;1,0,0]);
for tt2 = 1:LT2
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashDateTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
crashdt2 = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD');
strtaddr2 = strtaddr2(isletter(strtaddr2));
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = countyn2(isletter(countyn2));
countyn2 = upper(countyn2);
countyn2 = strrep(countyn2,'COUNTY','');
end
for tt1 = 1:LT1
if augmented(tt1)
continue
end
matchvec = true(5,1);
cdate1 = T1.IncidentDate{tt1};
matchvec(1) = strcmp(cdate1, cdate2);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr2) && ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strtaddr1(isletter(strtaddr1));
matchvec(2) = strcmp(strtaddr1,strtaddr2);
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
pcityn1 = pcityn1(isletter(pcityn1));
if ~isempty(pcityn2) && ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
matchvec(3) = strcmp(pcityn1,pcityn2);
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
countyn1 = countyn1(isletter(countyn1));
if ~isempty(countyn2) && ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
matchvec(4) = strcmp(countyn1,countyn2);
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.Date12_DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
difcrdt1d = crashdt1d-crashdt2;
tmatch = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
matchvec(5) = all(tmatch);
end
if all(matchvec)
T1{tt1,{'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName','PostalCityName'}} = table2cell( T2(tt2,{'County_Name', 'City_Name','Town_Name', 'CrashTime', 'SecondaryLocation','RouteName', 'PostalCityName'}) );
augmented(tt1)=true;
else
T1(tt1,:)
T2(tt2,:)
matchvec
end
end
end
T1

1 个答案:

答案 0 :(得分:0)

编辑:优化的性能代码;预测大量数据。

OP注意:您的原始数据存在许多错误。不应在csv文件的实际数据中的任何位置使用逗号。某些字符串(找到1个单位通知时间)没有预定义格式。 try块处理一个特定情况;如果所有字段都有缺陷数据,则应在所有字段中实施try。所有这些都应在合并之前解决。

clear;clc;close all

T1 = readtable('dataset1.csv');
T2 = readtable('dataset2.csv');
T1 = T1(1:1000,:);
T2 = T2(1:900,:);
LT1 = size(T1,1);
LT2 = size(T2,1);
% expand T1 for expansion
T1 = [T1, cell2table(repmat({''}, LT1, 7), ....
    'VariableNames', {'County_Name', 'City_Name', 'Town_Name', ....
    'CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})];

augmented = false(LT1,1); % see usage below
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([-1,0,0;1,0,0]); % 0 to 1 hour
strtaddrcmpf = @(c1,c2) cellfun(@(s2) ....
    cellfun(@(s1) ....
    ~(isempty(strfind(s1,s2)) | isempty(strfind(s2,s1))), ....
    c1), ....
    c2);
% buffer original to speed up
fprintf('Pre-processing started at %s \n', datestr(datetime('now')))
T1B = cell2table([repmat({''}, LT1, 5), repmat({true}, LT1, 4)], ....
    'VariableNames', {'CrashDTU','CrashDTD',  ....
    'StrtAdd','PoCityN', 'CountyN', ....
    'CrashDTFlg', 'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
T2B = cell2table([repmat({''}, LT2, 4), repmat({true}, LT2, 3)], ....
    'VariableNames', {'CrashDT', 'StrtAdd', 'PoCityN', 'CountyN', ....
    'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});

fprintf('Progress:        ')
for tt2 = 1:LT2
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt2/LT2*50);

    cdate2 = T2.CrashDate{tt2};
    crasht2 = T2.CrashTime{tt2};
    assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
    crashdt2 = [cdate2, ' ', crasht2];
    T2B.CrashDT{tt2} = datetime(crashdt2,'InputFormat',dtstr);

    strtaddr2 = T2.RouteName{tt2};
    if ~isempty(strtaddr2)
        strtaddr2 = upper(strtaddr2);
        strtaddr2 = strrep(strtaddr2,'ROAD','RD'); % repeat for HWY ST etc
        strtaddr2 = strsplit(strtaddr2,'/');
        switch true
            case strfind(strtaddr2,'I95')
                strtaddr2 = {'I95'};
            case strfind(strtaddr2,'I495')
                strtaddr2 = {'I495'};
            otherwise
                strtaddr2 = cellfun(@(s) s(isletter(s)), ....
                    strtaddr2, 'Uniform',false);
        end
        T2B.StrtAdd{tt2} = strtaddr2;
    else
        T2B.StrtAddFlg(tt2) = false;
    end

    pcityn2 = T2.PostalCityName{tt2};
    if ~isempty(pcityn2)
        pcityn2 = upper(pcityn2);
        pcityn2 = pcityn2(isletter(pcityn2));
        T2B.PoCityN{tt2} = pcityn2;
    else
        T2B.PoCityNFlg(tt2) = false;
    end

    countyn2 = T2.County_Name{tt2};
    if ~isempty(countyn2)
        countyn2 = upper(countyn2);
        countyn2 = countyn2(isletter(countyn2));
        countyn2 = strrep(countyn2,'COUNTY','');
        T2B.CountyN{tt2} = countyn2;
    else
        T2B.CountyNFlg(tt2) = false;
    end
end
for tt1 = 1:LT1
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt1/LT1*50+50);

    strtaddr1 = upper(T1.AddressStreet{tt1});
    if ~isempty(strtaddr1)
        strtaddr1 = strrep(strtaddr1,'ROAD','RD');
        strtaddr1 = strsplit(strtaddr1,'/');
        switch true
            case strfind(strtaddr1,'I95')
                strtaddr1 = {'I95'};
            case strfind(strtaddr1,'I495')
                strtaddr1 = {'I495'};
            otherwise
                strtaddr1 = cellfun(@(s) s(isletter(s)), ....
                    strtaddr1, 'Uniform',false);
        end
        T1B.StrtAdd{tt1} = strtaddr1;
    else
        T1B.StrtAddFlg(tt1) = false;
    end

    pcityn1 = upper(T1.AddressCityIncident{tt1});
    if ~isempty(pcityn1)
        pcityn1 = pcityn1(isletter(pcityn1));
        T1B.PoCityN{tt1} = pcityn1;
    else
        T1B.PoCityNFlg(tt1) = false;
    end

    countyn1 = upper(T1.AddressCountyIncident{tt1});
    if ~isempty(countyn1)
        countyn1 = countyn1(isletter(countyn1));
        countyn1 = strrep(countyn1,'COUNTY','');
        T1B.CountyN{tt1} = countyn1;
    else
        T1B.CountyNFlg(tt1) = false;
    end

    crashdt1u = T1.UnitNotified{tt1};
    crashdt1d = T1.DispatchNotified{tt1};
    if ~isempty(crashdt1u) || ~isempty(crashdt1d)
        tmatch = true(2,1);
%         a little dirty here, need both date and time
        try
            if ~isempty(crashdt1u)
                crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
                T1B.CrashDTU{tt1} = crashdt1u;
            end
            if ~isempty(crashdt1d)
                crashdt1d = datetime(crashdt1d,'InputFormat',dtstr);
                T1B.CrashDTD{tt1} = crashdt1d;
            end
        catch
            T1B.CrashDTFlg(tt1) = false;
        end
    else
        T1B.CrashDTFlg(tt1) = false;
    end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
fprintf('Pre-processing finished at %s \n', ....
    datestr(datetime('now')))

fprintf('Matching started at %s \n', datestr(datetime('now')))

% process data
fprintf('Progress:        ')
for tt2 = 1:LT2
    fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
    fprintf('Progress: %6.2f%%', tt2/LT2*100);
%     extract a row for comparison
    crashdt2 = T2B.CrashDT{tt2};
    strtaddr2 = T2B.StrtAdd{tt2};
    pcityn2 = T2B.PoCityN{tt2};
    countyn2 = T2B.CountyN{tt2};

    for tt1 = 1:LT1
        if augmented(tt1) % match already found, skip
            continue
        end

%         Boolean comparison: treat missing data as identical
        cdate1 = T1.IncidentDate{tt1};
        match1 = strcmp(cdate1, cdate2); % incident date
        if ~match1
            continue
        end

        if T2B.StrtAddFlg(tt2) && T1B.StrtAddFlg(tt1) % put 2 first: faster
            strtaddr1 = T1B.StrtAdd{tt1};
            strtaddr_cmp = strtaddrcmpf(strtaddr2,strtaddr1);
            match2 = any(strtaddr_cmp); % street name match
        end
        if ~match2
            continue
        end

        if T2B.PoCityNFlg(tt2) && T1B.PoCityNFlg(tt1)
            pcityn1 = T1B.PoCityN{tt1};
            match3 = strcmp(pcityn1,pcityn2); % postal city name match
        end
        if ~match3
            continue
        end

        if T2B.CountyNFlg(tt2) && T1B.CountyNFlg(tt1)
            countyn1 = T1B.CountyN{tt1};
            countyn1 = countyn1(isletter(countyn1));
            countyn1 = strrep(countyn1,'COUNTY','');
            match4 = strcmp(countyn1,countyn2); % county name match
        end
        if ~match4
            continue
        end

        if T1B.CrashDTFlg(tt1)
            crashdt1u = T1B.CrashDTU{tt1};
            crashdt1d = T1B.CrashDTD{tt1};
%             a little dirty here, need both date and time
            if ~isempty(crashdt1u)
                difcrdt1u = crashdt1u-crashdt2;
                tmatch1 = difcrdt1u >= trange(1) && difcrdt1u <= trange(2);
            end
            if ~isempty(crashdt1d)
                difcrdt1d = crashdt1d-crashdt2;
                tmatch2 = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
            end
            match5 = tmatch1 & tmatch2;
        end
        if ~match5
            continue
        end

%         append row in T2 to T1
        T1{tt1,{'County_Name', 'City_Name', 'Town_Name', ....
            'CrashTime', 'SecondaryLocation', 'RouteName', ....
            'PostalCityName'}} = ....
            table2cell( T2(tt2,{'County_Name', 'City_Name', ....
            'Town_Name', 'CrashTime', 'SecondaryLocation', ....
            'RouteName', 'PostalCityName'}) );
        augmented(tt1) = true;
%         break % assume unique matching
    end
end

fprintf('%s',repmat(sprintf('\b'),1,length('Progress:        ')))
fprintf('Matching finished at %s \nTotalling %d matches. \n', ....
    datestr(datetime('now')), sum(augmented))

编辑:通过OP设置新上传的数据,涵盖了更多案例。

  • 'GEORGETOWN PIKE/CENTRILLION DR'等道路交叉应与'GEORGETOWN PIKE''CENTRILLION DR'匹配。
  • 'I95'这样的州际名称在其名称中有数字,应与街道数字区分开来。
  • 州际名称有时包含应忽略的详细位置。 (并查看其他信息)

添加进度显示。

编辑:我忘了使用augmented记录加快速度。此外,最后添加了调试部分,以查看匹配期间未满足的条件。

这是使用Matlab中的table类的解决方案。由于它是一个非常新的功能,因此在不同版本的Matlab中编程可能会有所不同。我正在使用R2015b。

关键点:

  1. 对于数据集2中的每一行,查找数据集1中所有行的匹配。
  2. 如果记录的任何内容 不匹配 ,请跳过。除此以外, 认为它们属于同一事件。
  3. 将数据集2中的其他内容附加到1。
  4. 带注释的示例代码:

    (obsolete)
    

    我从Matlab收到此消息

      

    警告:修改了变量名称以使其成为有效的MATLAB   标识符

    因此,您可能需要根据需要更改表中的列名称。

    这些是从csv文件导入的原始数据集

    (obsolete)
    

    示例输出:

    (obsolete)
    

    新数据集和输出:

    >> T1
    
    T1 = 
    
        IncidentDate                   AddressStreet                   AddressCityIncident    AddressCountyIncident    AddressState    IncidentPostalCode    DispatchNotified      UnitNotified  
        ____________    ___________________________________________    ___________________    _____________________    ____________    __________________    ________________    ________________
    
        '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33' 
        '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33' 
        '1/1/2014'      'I95 SB TO OLD KEENE MILL RD'                  'SPRINGFIELD'          'Fairfax County'         'VA'            22150                 '1/1/2014 2:00'     '1/1/2014 2:00' 
        '1/1/2014'      'SYDENSTRICKER RD/OLD KEENE MILL RD'           'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 4:54'     '1/1/2014 4:54' 
        '1/1/2014'      'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB'    'CHANTILLY'            'Fairfax County'         'VA'            20151                 '1/1/2014 12:28'    '1/1/2014 12:28'
        '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'
        '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'
        '1/1/2014'      'CENTREVILLE RD/BRADENTON DR'                  'CENTREVILLE'          'Fairfax County'         'VA'            20121                 '1/1/2014 13:41'    '1/1/2014 13:41'
        '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:45'    '1/1/2014 16:45'
        '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:42'    '1/1/2014 16:42'
        '1/1/2014'      '8526 GEORGETOWN PIKE'                         'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:49'    '1/1/2014 16:49'
        '1/1/2014'      'OX RD/BRADDOCK RD'                            'FAIRFAX'              'Fairfax County'         'VA'            22032                 '1/1/2014 22:32'    '1/1/2014 22:32'
    
    >> T2
    
    T2 = 
    
        CrashDate       County_Name       City_Name    Town_Name    CrashTime        SecondaryLocation              RouteName         PostalCityName
        __________    ________________    _________    _________    _________    __________________________    ___________________    ______________
    
        '1/1/2014'    'Fairfax County'    NaN          NaN          '6:35'       ''                            'I95'                  'LORTON'      
        '1/1/2014'    'Fairfax County'    NaN          NaN          '5:19'       ''                            'I95 RAMP'             'SPRINGFIELD' 
        '1/1/2014'    'Fairfax County'    NaN          NaN          '10:23'      ''                            'I495'                 'ANNANDALE'   
        '1/1/2014'    'Fairfax County'    NaN          NaN          '2:08'       ''                            'BUILDERS RD'          'HERNDON'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '16:42'      ''                            'GEORGETOWN PIKE'      'MCLEAN'      
        '1/1/2014'    'Fairfax County'    NaN          NaN          '20:55'      'LEESBURG PIKE'               'WILSON BLVD'          'FALLS CHURCH'
        '1/1/2014'    'Fairfax County'    NaN          NaN          '4:54'       ''                            'SYDENSTRICKER RD'     'BURKE'       
        '1/1/2014'    'Fairfax County'    NaN          NaN          '2:34'       'BEACON HILL RD'              'RICHMOND HWY'         'ALEXANDRIA'  
        '1/1/2014'    'Fairfax County'    NaN          NaN          '2:00'       ''                            'COAT RIDGE RD'        'HERNDON'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '13:17'      ''                            'OLD KEENE MILL RD'    'BURKE'       
        '1/1/2014'    'Fairfax County'    NaN          NaN          '5:19'       'MCLEAREN RD'                 'CENTREVILLE RD'       'HERNDON'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '21:48'      'VIRGINIA CENTER BLVD'        'VADEN DR'             'VIENNA'      
        '1/1/2014'    'Fairfax County'    NaN          NaN          '19:59'      'FAIRFAX COUNTY PKWY RAMP'    'LEE HWY RAMP'         'FAIRFAX'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '2:36'       ''                            'I95'                  'SPRINGFIELD' 
        '1/1/2014'    'Fairfax County'    NaN          NaN          '20:36'      'MOUNT GILEAD RD'             'BRADDOCK RD'          'CENTREVILLE' 
        '1/1/2014'    'Fairfax County'    NaN          NaN          '1:46'       ''                            'I95'                  'LORTON'      
        '1/1/2014'    'Fairfax County'    NaN          NaN          '18:45'      ''                            'I495'                 'HAMPTON'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '13:40'      'BRADENTON DR'                'CENTREVILLE RD'       'CENTREVILLE' 
        '1/1/2014'    'Fairfax County'    NaN          NaN          '17:24'      'SHREVE HILL RD'              'IDYLWOOD RD'          'DUNN LORING' 
        '1/1/2014'    'Fairfax County'    NaN          NaN          '17:46'      'SACRAMENTO DR'               'RICHMOND HWY'         'ALEXANDRIA'  
        '1/1/2014'    'Fairfax County'    NaN          NaN          '1:40'       ''                            'WINBOURNE RD'         'BURKE'       
        '1/1/2014'    'Fairfax County'    NaN          NaN          '1:33'       ''                            'BURKE LAKE RD'        'BURKE'       
        '1/1/2014'    'Fairfax County'    NaN          NaN          '15:44'      'TELEGRAPH RD'                'FRANCONIA RD'         'ALEXANDRIA'  
        '1/1/2014'    'Fairfax County'    NaN          NaN          '22:19'      'OX RD'                       'BRADDOCK RD'          'FAIRFAX'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '12:27'      ''                            'SULLY RD'             'HERNDON'     
        '1/1/2014'    'Fairfax County'    NaN          NaN          '11:25'      'MONUMENT DR'                 'LEE HWY'              'FAIRFAX'     
    
    
    
    T1 = 
    
        IncidentDate                   AddressStreet                   AddressCityIncident    AddressCountyIncident    AddressState    IncidentPostalCode    DispatchNotified      UnitNotified        County_Name       City_Name    Town_Name    CrashTime    SecondaryLocation        RouteName         PostalCityName
        ____________    ___________________________________________    ___________________    _____________________    ____________    __________________    ________________    ________________    ________________    _________    _________    _________    _________________    __________________    ______________
    
        '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33'     'Fairfax County'    [NaN]        [NaN]        '1:33'       ''                   'BURKE LAKE RD'       'BURKE'       
        '1/1/2014'      'BURKE LAKE RD/BURKE RD'                       'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 1:33'     '1/1/2014 1:33'     'Fairfax County'    [NaN]        [NaN]        '1:33'       ''                   'BURKE LAKE RD'       'BURKE'       
        '1/1/2014'      'I95 SB TO OLD KEENE MILL RD'                  'SPRINGFIELD'          'Fairfax County'         'VA'            22150                 '1/1/2014 2:00'     '1/1/2014 2:00'     ''                  ''           ''           ''           ''                   ''                    ''            
        '1/1/2014'      'SYDENSTRICKER RD/OLD KEENE MILL RD'           'BURKE'                'Fairfax County'         'VA'            22015                 '1/1/2014 4:54'     '1/1/2014 4:54'     'Fairfax County'    [NaN]        [NaN]        '4:54'       ''                   'SYDENSTRICKER RD'    'BURKE'       
        '1/1/2014'      'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB'    'CHANTILLY'            'Fairfax County'         'VA'            20151                 '1/1/2014 12:28'    '1/1/2014 12:28'    ''                  ''           ''           ''           ''                   ''                    ''            
        '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'    ''                  ''           ''           ''           ''                   ''                    ''            
        '1/1/2014'      '11700 SWARTS DR'                              'FAIRFAX'              'Fairfax County'         'VA'            22030                 '1/1/2014 13:07'    '1/1/2014 13:07'    ''                  ''           ''           ''           ''                   ''                    ''            
        '1/1/2014'      'CENTREVILLE RD/BRADENTON DR'                  'CENTREVILLE'          'Fairfax County'         'VA'            20121                 '1/1/2014 13:41'    '1/1/2014 13:41'    'Fairfax County'    [NaN]        [NaN]        '13:40'      'BRADENTON DR'       'CENTREVILLE RD'      'CENTREVILLE' 
        '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:45'    '1/1/2014 16:45'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
        '1/1/2014'      'GEORGETOWN PIKE/CENTRILLION DR'               'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:42'    '1/1/2014 16:42'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
        '1/1/2014'      '8526 GEORGETOWN PIKE'                         'MCLEAN'               'Fairfax County'         'VA'            22102                 '1/1/2014 16:49'    '1/1/2014 16:49'    'Fairfax County'    [NaN]        [NaN]        '16:42'      ''                   'GEORGETOWN PIKE'     'MCLEAN'      
        '1/1/2014'      'OX RD/BRADDOCK RD'                            'FAIRFAX'              'Fairfax County'         'VA'            22032                 '1/1/2014 22:32'    '1/1/2014 22:32'    'Fairfax County'    [NaN]        [NaN]        '22:19'      'OX RD'              'BRADDOCK RD'         'FAIRFAX'