我有一个由整数(年龄)索引的数据帧(330行×11列)。我想将数据帧下采样到97行。任何人都可以想到一种优雅而简单的方法来实现这一目标吗?
示例数据:
subsidence1.1 subsidence1.11 subsidence1.12 subsidence1.13 subsidence1.14 subsidence1.15 subsidence1.16 subsidence1.17 subsidence1.18 subsidence1.19 subsidence1.2
age
303.982536 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
302.959104 -16.072090 -15.377285 -14.688177 -14.007097 -13.330865 -12.661389 -11.998249 -11.341865 -10.690118 -10.046194 -9.405841
301.947395 -20.178141 -18.767330 -17.372894 -15.990381 -14.623930 -13.268656 -11.927322 -10.599186 -9.284892 -7.981037 -6.691025
300.935685 -24.105568 -21.968602 -19.854874 -17.763423 -15.692894 -13.644213 -11.617834 -9.611091 -7.625165 -5.658335 -3.711900
299.923975 -27.892780 -25.020903 -22.178156 -19.369872 -16.592970 -13.843728 -11.126516 -8.437073 -5.777435 -3.144508 -0.539260
298.912265 -31.562993 -27.947536 -24.375001 -20.842286 -17.351752 -13.900609 -10.488683 -7.114587 -3.778202 -0.477744 2.785826
298.100567 -39.202321 -35.212732 -31.271504 -27.376498 -23.527739 -19.723348 -15.963993 -12.246264 -8.571384 -4.937769 -1.344742
297.888964 -46.722928 -42.359208 -38.049286 -33.791991 -29.585005 -25.427365 -21.320583 -17.259219 -13.245845 -9.279074 -5.356589
296.877373 -50.083382 -44.967299 -39.916886 -34.931039 -30.005863 -25.141908 -20.336060 -15.590047 -10.899534 -6.266525 -1.686869
295.865782 -53.361746 -47.492007 -41.698160 -35.981292 -30.336933 -24.764152 -19.263568 -13.830113 -8.465904 -3.166812 2.067072
294.854191 -56.568517 -49.941151 -43.402492 -36.951362 -30.587567 -24.307742 -18.110565 -11.992692 -5.954436 0.007303 5.892680
293.842600 -59.708739 -52.321161 -45.035074 -37.852737 -30.768427 -23.779991 -16.886848 -10.086845 -3.376732 3.245537 9.781500
292.831009 -62.788359 -54.637342 -46.605512 -38.688092 -30.884265 -23.188478 -15.601343 -8.119917 -0.739700 6.539949 13.720849
291.819418 -65.808873 -56.896600 -48.115167 -39.464029 -30.940535 -22.540796 -14.260980 -6.099704 1.947892 9.881598 17.705631
290.807826 -68.779624 -59.100448 -49.570440 -40.185734 -30.944376 -21.839856 -12.870296 -4.032015 4.680143 13.264523 21.727727
289.796235 -71.698178 -61.255064 -50.976437 -40.859740 -30.899700 -21.092739 -11.434164 -1.919832 7.451046 16.683478 25.783125
288.784644 -74.572221 -63.363381 -52.335479 -41.485496 -30.809202 -20.301245 -9.956993 0.228472 10.256670 20.135201 29.864378
287.773053 -77.400957 -65.426637 -53.650249 -42.071183 -30.678738 -19.471188 -8.442615 2.411406 13.095121 23.612912 33.96932
由于
答案 0 :(得分:1)
使用select a.Applications,
a.Clicks,
isnull(a.Datacaptures,0) as Datacaptures,
a.Aggration_day,
a.MediaUsed_id,
a.mediatrackingid_fk,
a.tracking_type,
t.Tracking_info,
m.MediaName,
DATEPART(dayofyear, Aggration_day) AS DayofYear,
c.Title as Campaigntitle,
c.campaign_id,
i.title,
i.start_date,
i.end_date,
i.IO_number
from Aggeration_day a
JOIN tracking_types t on a.tracking_type = t.Tracking_type_id
JOIN MediaUsed m on m.media_used_id = a.MediaUsed_id
JOIN MediaTrackingItems i on a.mediatrackingid_fk = i.mediatrackingid
LEFT join Campaign_items CI ON a.mediatrackingid_fk = CI.mediatrackingid_FK
JOIN Campaigns c ON CI.campaign_id_fk = c.campaign_id
np.linspace
使用较短数据的示例。
df.iloc[np.linspace(0, len(df) - 1, 97).astype(int)]
答案 1 :(得分:0)
只需使用数据框的 sample
方法
# assume df is your dataframe
sample = df.sample(n=97)
答案 2 :(得分:0)
如果您想要每隔10行(如评论中所述),只需执行
df[::10]