我正在尝试合并2个数据集(150,000和50,000个记录),每个数据集大约有50个变量,其中一些可能匹配。两个数据集中的一个常见变量是“事件日期”,但我不能使用它,因为数据集在该特定日期发生了300次事件(按地址,城市,县,邮编,时间紧急医疗服务分类( EMS)已通知)。其他数据集具有事件发生的确切时间,地址,城市,县,邮政编码和其他一些字段,但如果信息未知/记录,这些字段可能为空白。
我想创建一个缓冲区来连接每个字段的数据集。例如,首先从事件发生的日期开始(没有缺失值),如果它们相同,则下一步是检查它们是否发生在同一个县,市等。 (某些值可能为空)。比较的最后一个领域是通知EMS(事件发生后30-60分钟)。如果所有字段都匹配,那么它将归结为30-60分钟的最终缓冲区。它将是多对一合并(50,000到150,000)。
什么程序可以让我这样做?是否有某些代码?
我添加了两个数据集的片段(https://filedropper.com/filemanager/public.php?service=files&t=0f2d129b1622901fafc8c9e678433623&download)和(https://filedropper.com/filemanager/public.php?service=files&t=642c840bc3e431c3d4d839a71bb66944&download)
预期输出看起来像这样
使用的代码是:
T1 = readtable('dataset1.csv')
T2 = readtable('dataset2.csv')
LT1 = size(T1,1);
LT2 = size(T2,1);
T1 = [T1, cell2table(repmat({''}, LT1, 7),'VariableNames', {'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})]
augmented = false(LT1,1);
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([0,0,0;1,0,0]);
for tt2 = 1:LT2
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashDateTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
crashdt2 = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD');
strtaddr2 = strtaddr2(isletter(strtaddr2));
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = countyn2(isletter(countyn2));
countyn2 = upper(countyn2);
countyn2 = strrep(countyn2,'COUNTY','');
end
for tt1 = 1:LT1
if augmented(tt1)
continue
end
matchvec = true(5,1);
cdate1 = T1.IncidentDate{tt1};
matchvec(1) = strcmp(cdate1, cdate2);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr2) && ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strtaddr1(isletter(strtaddr1));
matchvec(2) = strcmp(strtaddr1,strtaddr2);
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
pcityn1 = pcityn1(isletter(pcityn1));
if ~isempty(pcityn2) && ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
matchvec(3) = strcmp(pcityn1,pcityn2);
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
countyn1 = countyn1(isletter(countyn1));
if ~isempty(countyn2) && ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
matchvec(4) = strcmp(countyn1,countyn2);
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.Date12_DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
difcrdt1d = crashdt1d-crashdt2;
tmatch = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
matchvec(5) = all(tmatch);
end
if all(matchvec)
T1{tt1,{'County_Name', 'City_Name', 'Town_Name','CrashTime', 'SecondaryLocation', 'RouteName','PostalCityName'}} = table2cell( T2(tt2,{'County_Name', 'City_Name','Town_Name', 'CrashTime', 'SecondaryLocation','RouteName', 'PostalCityName'}) );
augmented(tt1)=true;
else
T1(tt1,:)
T2(tt2,:)
matchvec
end
end
end
T1
答案 0 :(得分:0)
编辑:优化的性能代码;预测大量数据。
OP注意:您的原始数据存在许多错误。不应在csv文件的实际数据中的任何位置使用逗号。某些字符串(找到1个单位通知时间)没有预定义格式。 try
块处理一个特定情况;如果所有字段都有缺陷数据,则应在所有字段中实施try
。所有这些都应在合并之前解决。
clear;clc;close all
T1 = readtable('dataset1.csv');
T2 = readtable('dataset2.csv');
T1 = T1(1:1000,:);
T2 = T2(1:900,:);
LT1 = size(T1,1);
LT2 = size(T2,1);
% expand T1 for expansion
T1 = [T1, cell2table(repmat({''}, LT1, 7), ....
'VariableNames', {'County_Name', 'City_Name', 'Town_Name', ....
'CrashTime', 'SecondaryLocation', 'RouteName', 'PostalCityName'})];
augmented = false(LT1,1); % see usage below
dtstr = 'MM/dd/yyyy HH:mm';
trange = duration([-1,0,0;1,0,0]); % 0 to 1 hour
strtaddrcmpf = @(c1,c2) cellfun(@(s2) ....
cellfun(@(s1) ....
~(isempty(strfind(s1,s2)) | isempty(strfind(s2,s1))), ....
c1), ....
c2);
% buffer original to speed up
fprintf('Pre-processing started at %s \n', datestr(datetime('now')))
T1B = cell2table([repmat({''}, LT1, 5), repmat({true}, LT1, 4)], ....
'VariableNames', {'CrashDTU','CrashDTD', ....
'StrtAdd','PoCityN', 'CountyN', ....
'CrashDTFlg', 'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
T2B = cell2table([repmat({''}, LT2, 4), repmat({true}, LT2, 3)], ....
'VariableNames', {'CrashDT', 'StrtAdd', 'PoCityN', 'CountyN', ....
'StrtAddFlg', 'PoCityNFlg', 'CountyNFlg'});
fprintf('Progress: ')
for tt2 = 1:LT2
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt2/LT2*50);
cdate2 = T2.CrashDate{tt2};
crasht2 = T2.CrashTime{tt2};
assert(~isempty(cdate2) & ~isempty(crasht2),'Major data missing')
crashdt2 = [cdate2, ' ', crasht2];
T2B.CrashDT{tt2} = datetime(crashdt2,'InputFormat',dtstr);
strtaddr2 = T2.RouteName{tt2};
if ~isempty(strtaddr2)
strtaddr2 = upper(strtaddr2);
strtaddr2 = strrep(strtaddr2,'ROAD','RD'); % repeat for HWY ST etc
strtaddr2 = strsplit(strtaddr2,'/');
switch true
case strfind(strtaddr2,'I95')
strtaddr2 = {'I95'};
case strfind(strtaddr2,'I495')
strtaddr2 = {'I495'};
otherwise
strtaddr2 = cellfun(@(s) s(isletter(s)), ....
strtaddr2, 'Uniform',false);
end
T2B.StrtAdd{tt2} = strtaddr2;
else
T2B.StrtAddFlg(tt2) = false;
end
pcityn2 = T2.PostalCityName{tt2};
if ~isempty(pcityn2)
pcityn2 = upper(pcityn2);
pcityn2 = pcityn2(isletter(pcityn2));
T2B.PoCityN{tt2} = pcityn2;
else
T2B.PoCityNFlg(tt2) = false;
end
countyn2 = T2.County_Name{tt2};
if ~isempty(countyn2)
countyn2 = upper(countyn2);
countyn2 = countyn2(isletter(countyn2));
countyn2 = strrep(countyn2,'COUNTY','');
T2B.CountyN{tt2} = countyn2;
else
T2B.CountyNFlg(tt2) = false;
end
end
for tt1 = 1:LT1
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt1/LT1*50+50);
strtaddr1 = upper(T1.AddressStreet{tt1});
if ~isempty(strtaddr1)
strtaddr1 = strrep(strtaddr1,'ROAD','RD');
strtaddr1 = strsplit(strtaddr1,'/');
switch true
case strfind(strtaddr1,'I95')
strtaddr1 = {'I95'};
case strfind(strtaddr1,'I495')
strtaddr1 = {'I495'};
otherwise
strtaddr1 = cellfun(@(s) s(isletter(s)), ....
strtaddr1, 'Uniform',false);
end
T1B.StrtAdd{tt1} = strtaddr1;
else
T1B.StrtAddFlg(tt1) = false;
end
pcityn1 = upper(T1.AddressCityIncident{tt1});
if ~isempty(pcityn1)
pcityn1 = pcityn1(isletter(pcityn1));
T1B.PoCityN{tt1} = pcityn1;
else
T1B.PoCityNFlg(tt1) = false;
end
countyn1 = upper(T1.AddressCountyIncident{tt1});
if ~isempty(countyn1)
countyn1 = countyn1(isletter(countyn1));
countyn1 = strrep(countyn1,'COUNTY','');
T1B.CountyN{tt1} = countyn1;
else
T1B.CountyNFlg(tt1) = false;
end
crashdt1u = T1.UnitNotified{tt1};
crashdt1d = T1.DispatchNotified{tt1};
if ~isempty(crashdt1u) || ~isempty(crashdt1d)
tmatch = true(2,1);
% a little dirty here, need both date and time
try
if ~isempty(crashdt1u)
crashdt1u = datetime(crashdt1u,'InputFormat',dtstr);
T1B.CrashDTU{tt1} = crashdt1u;
end
if ~isempty(crashdt1d)
crashdt1d = datetime(crashdt1d,'InputFormat',dtstr);
T1B.CrashDTD{tt1} = crashdt1d;
end
catch
T1B.CrashDTFlg(tt1) = false;
end
else
T1B.CrashDTFlg(tt1) = false;
end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Pre-processing finished at %s \n', ....
datestr(datetime('now')))
fprintf('Matching started at %s \n', datestr(datetime('now')))
% process data
fprintf('Progress: ')
for tt2 = 1:LT2
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Progress: %6.2f%%', tt2/LT2*100);
% extract a row for comparison
crashdt2 = T2B.CrashDT{tt2};
strtaddr2 = T2B.StrtAdd{tt2};
pcityn2 = T2B.PoCityN{tt2};
countyn2 = T2B.CountyN{tt2};
for tt1 = 1:LT1
if augmented(tt1) % match already found, skip
continue
end
% Boolean comparison: treat missing data as identical
cdate1 = T1.IncidentDate{tt1};
match1 = strcmp(cdate1, cdate2); % incident date
if ~match1
continue
end
if T2B.StrtAddFlg(tt2) && T1B.StrtAddFlg(tt1) % put 2 first: faster
strtaddr1 = T1B.StrtAdd{tt1};
strtaddr_cmp = strtaddrcmpf(strtaddr2,strtaddr1);
match2 = any(strtaddr_cmp); % street name match
end
if ~match2
continue
end
if T2B.PoCityNFlg(tt2) && T1B.PoCityNFlg(tt1)
pcityn1 = T1B.PoCityN{tt1};
match3 = strcmp(pcityn1,pcityn2); % postal city name match
end
if ~match3
continue
end
if T2B.CountyNFlg(tt2) && T1B.CountyNFlg(tt1)
countyn1 = T1B.CountyN{tt1};
countyn1 = countyn1(isletter(countyn1));
countyn1 = strrep(countyn1,'COUNTY','');
match4 = strcmp(countyn1,countyn2); % county name match
end
if ~match4
continue
end
if T1B.CrashDTFlg(tt1)
crashdt1u = T1B.CrashDTU{tt1};
crashdt1d = T1B.CrashDTD{tt1};
% a little dirty here, need both date and time
if ~isempty(crashdt1u)
difcrdt1u = crashdt1u-crashdt2;
tmatch1 = difcrdt1u >= trange(1) && difcrdt1u <= trange(2);
end
if ~isempty(crashdt1d)
difcrdt1d = crashdt1d-crashdt2;
tmatch2 = difcrdt1d >= trange(1) && difcrdt1d <= trange(2);
end
match5 = tmatch1 & tmatch2;
end
if ~match5
continue
end
% append row in T2 to T1
T1{tt1,{'County_Name', 'City_Name', 'Town_Name', ....
'CrashTime', 'SecondaryLocation', 'RouteName', ....
'PostalCityName'}} = ....
table2cell( T2(tt2,{'County_Name', 'City_Name', ....
'Town_Name', 'CrashTime', 'SecondaryLocation', ....
'RouteName', 'PostalCityName'}) );
augmented(tt1) = true;
% break % assume unique matching
end
end
fprintf('%s',repmat(sprintf('\b'),1,length('Progress: ')))
fprintf('Matching finished at %s \nTotalling %d matches. \n', ....
datestr(datetime('now')), sum(augmented))
编辑:通过OP设置新上传的数据,涵盖了更多案例。
'GEORGETOWN PIKE/CENTRILLION DR'
等道路交叉应与'GEORGETOWN PIKE'
或'CENTRILLION DR'
匹配。 'I95'
这样的州际名称在其名称中有数字,应与街道数字区分开来。 添加进度显示。
编辑:我忘了使用augmented
记录加快速度。此外,最后添加了调试部分,以查看匹配期间未满足的条件。
这是使用Matlab中的table
类的解决方案。由于它是一个非常新的功能,因此在不同版本的Matlab中编程可能会有所不同。我正在使用R2015b。
关键点:
带注释的示例代码:
(obsolete)
我从Matlab收到此消息
警告:修改了变量名称以使其成为有效的MATLAB 标识符
因此,您可能需要根据需要更改表中的列名称。
这些是从csv文件导入的原始数据集
(obsolete)
示例输出:
(obsolete)
新数据集和输出:
>> T1
T1 =
IncidentDate AddressStreet AddressCityIncident AddressCountyIncident AddressState IncidentPostalCode DispatchNotified UnitNotified
____________ ___________________________________________ ___________________ _____________________ ____________ __________________ ________________ ________________
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33'
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33'
'1/1/2014' 'I95 SB TO OLD KEENE MILL RD' 'SPRINGFIELD' 'Fairfax County' 'VA' 22150 '1/1/2014 2:00' '1/1/2014 2:00'
'1/1/2014' 'SYDENSTRICKER RD/OLD KEENE MILL RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 4:54' '1/1/2014 4:54'
'1/1/2014' 'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB' 'CHANTILLY' 'Fairfax County' 'VA' 20151 '1/1/2014 12:28' '1/1/2014 12:28'
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07'
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07'
'1/1/2014' 'CENTREVILLE RD/BRADENTON DR' 'CENTREVILLE' 'Fairfax County' 'VA' 20121 '1/1/2014 13:41' '1/1/2014 13:41'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:45' '1/1/2014 16:45'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:42' '1/1/2014 16:42'
'1/1/2014' '8526 GEORGETOWN PIKE' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:49' '1/1/2014 16:49'
'1/1/2014' 'OX RD/BRADDOCK RD' 'FAIRFAX' 'Fairfax County' 'VA' 22032 '1/1/2014 22:32' '1/1/2014 22:32'
>> T2
T2 =
CrashDate County_Name City_Name Town_Name CrashTime SecondaryLocation RouteName PostalCityName
__________ ________________ _________ _________ _________ __________________________ ___________________ ______________
'1/1/2014' 'Fairfax County' NaN NaN '6:35' '' 'I95' 'LORTON'
'1/1/2014' 'Fairfax County' NaN NaN '5:19' '' 'I95 RAMP' 'SPRINGFIELD'
'1/1/2014' 'Fairfax County' NaN NaN '10:23' '' 'I495' 'ANNANDALE'
'1/1/2014' 'Fairfax County' NaN NaN '2:08' '' 'BUILDERS RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'Fairfax County' NaN NaN '20:55' 'LEESBURG PIKE' 'WILSON BLVD' 'FALLS CHURCH'
'1/1/2014' 'Fairfax County' NaN NaN '4:54' '' 'SYDENSTRICKER RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '2:34' 'BEACON HILL RD' 'RICHMOND HWY' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '2:00' '' 'COAT RIDGE RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '13:17' '' 'OLD KEENE MILL RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '5:19' 'MCLEAREN RD' 'CENTREVILLE RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '21:48' 'VIRGINIA CENTER BLVD' 'VADEN DR' 'VIENNA'
'1/1/2014' 'Fairfax County' NaN NaN '19:59' 'FAIRFAX COUNTY PKWY RAMP' 'LEE HWY RAMP' 'FAIRFAX'
'1/1/2014' 'Fairfax County' NaN NaN '2:36' '' 'I95' 'SPRINGFIELD'
'1/1/2014' 'Fairfax County' NaN NaN '20:36' 'MOUNT GILEAD RD' 'BRADDOCK RD' 'CENTREVILLE'
'1/1/2014' 'Fairfax County' NaN NaN '1:46' '' 'I95' 'LORTON'
'1/1/2014' 'Fairfax County' NaN NaN '18:45' '' 'I495' 'HAMPTON'
'1/1/2014' 'Fairfax County' NaN NaN '13:40' 'BRADENTON DR' 'CENTREVILLE RD' 'CENTREVILLE'
'1/1/2014' 'Fairfax County' NaN NaN '17:24' 'SHREVE HILL RD' 'IDYLWOOD RD' 'DUNN LORING'
'1/1/2014' 'Fairfax County' NaN NaN '17:46' 'SACRAMENTO DR' 'RICHMOND HWY' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '1:40' '' 'WINBOURNE RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'Fairfax County' NaN NaN '15:44' 'TELEGRAPH RD' 'FRANCONIA RD' 'ALEXANDRIA'
'1/1/2014' 'Fairfax County' NaN NaN '22:19' 'OX RD' 'BRADDOCK RD' 'FAIRFAX'
'1/1/2014' 'Fairfax County' NaN NaN '12:27' '' 'SULLY RD' 'HERNDON'
'1/1/2014' 'Fairfax County' NaN NaN '11:25' 'MONUMENT DR' 'LEE HWY' 'FAIRFAX'
T1 =
IncidentDate AddressStreet AddressCityIncident AddressCountyIncident AddressState IncidentPostalCode DispatchNotified UnitNotified County_Name City_Name Town_Name CrashTime SecondaryLocation RouteName PostalCityName
____________ ___________________________________________ ___________________ _____________________ ____________ __________________ ________________ ________________ ________________ _________ _________ _________ _________________ __________________ ______________
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33' 'Fairfax County' [NaN] [NaN] '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'BURKE LAKE RD/BURKE RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 1:33' '1/1/2014 1:33' 'Fairfax County' [NaN] [NaN] '1:33' '' 'BURKE LAKE RD' 'BURKE'
'1/1/2014' 'I95 SB TO OLD KEENE MILL RD' 'SPRINGFIELD' 'Fairfax County' 'VA' 22150 '1/1/2014 2:00' '1/1/2014 2:00' '' '' '' '' '' '' ''
'1/1/2014' 'SYDENSTRICKER RD/OLD KEENE MILL RD' 'BURKE' 'Fairfax County' 'VA' 22015 '1/1/2014 4:54' '1/1/2014 4:54' 'Fairfax County' [NaN] [NaN] '4:54' '' 'SYDENSTRICKER RD' 'BURKE'
'1/1/2014' 'RT28 SB THRU RAMP/RT28 SB RAMP TO RT50 WB' 'CHANTILLY' 'Fairfax County' 'VA' 20151 '1/1/2014 12:28' '1/1/2014 12:28' '' '' '' '' '' '' ''
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07' '' '' '' '' '' '' ''
'1/1/2014' '11700 SWARTS DR' 'FAIRFAX' 'Fairfax County' 'VA' 22030 '1/1/2014 13:07' '1/1/2014 13:07' '' '' '' '' '' '' ''
'1/1/2014' 'CENTREVILLE RD/BRADENTON DR' 'CENTREVILLE' 'Fairfax County' 'VA' 20121 '1/1/2014 13:41' '1/1/2014 13:41' 'Fairfax County' [NaN] [NaN] '13:40' 'BRADENTON DR' 'CENTREVILLE RD' 'CENTREVILLE'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:45' '1/1/2014 16:45' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'GEORGETOWN PIKE/CENTRILLION DR' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:42' '1/1/2014 16:42' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' '8526 GEORGETOWN PIKE' 'MCLEAN' 'Fairfax County' 'VA' 22102 '1/1/2014 16:49' '1/1/2014 16:49' 'Fairfax County' [NaN] [NaN] '16:42' '' 'GEORGETOWN PIKE' 'MCLEAN'
'1/1/2014' 'OX RD/BRADDOCK RD' 'FAIRFAX' 'Fairfax County' 'VA' 22032 '1/1/2014 22:32' '1/1/2014 22:32' 'Fairfax County' [NaN] [NaN] '22:19' 'OX RD' 'BRADDOCK RD' 'FAIRFAX'