我正在重新编码pandas df中的几列,并将结果推到R。问题是df列之一“ temp_selection”仅在pandas df中出现一次,而在R中多次出现。
参见下文:
from pandas import DataFrame, read_csv
import os # For changing the working directory
import pandas as pd
import sys #only needed to determine Python version number
import numpy as np
import os
from numpy import random
from scipy.stats import stats
from scipy.special import stdtr
from statsmodels.formula.api import ols
%load_ext rpy2.ipython
# Importing R Packages
from rpy2.robjects.packages import importr
import rpy2.robjects.packages as rpackages
import rpy2.interactive as r
import rpy2.interactive.packages # this can take few seconds
# import R's "base" package
base = importr('base')
# import R's "utils" package
utils = importr('utils')
# Reproducible dataframe for StackOverflow
df = pd.DataFrame(np.random.randint(0,100,size=(1200, 11)), columns=list('ABCDEFGHIJK'))
# Do a median split on valence
valence_median = df['D'].median()
df['valence_median_split'] = ''
df['Category'] = ''
df['valence_median_split'] = np.where(df['J'] < valence_median, 'Low_Valence', 'High_Valence')
df['Category'] = np.where(df['B'] < valence_median, 'Faces', 'Houses')
df['temp_selection'] = np.nan
low = np.random.choice(df.index[df['valence_median_split'] == 'Low_Valence'], size=2)
high = np.random.choice(df.index[df['valence_median_split'] == 'High_Valence'], size=2)
df.loc[low, 'temp_selection'] = 'Low'
df.loc[high, 'temp_selection'] = 'High'
df_temp = df[df.temp_selection.notnull()]
df_temp = df[df.Category != 'Faces']
%Rpush df_temp
%R names(df_temp)
输出:
array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
'valence_median_split', 'Category', 'temp_selection.4',
'temp_selection.6', 'temp_selection.7', 'temp_selection.10',
'temp_selection.13', 'temp_selection.16', 'temp_selection.17',
'temp_selection.19', 'temp_selection.21', 'temp_selection.22',
'temp_selection.30', 'temp_selection.35', 'temp_selection.42',
'temp_selection.43', 'temp_selection.44', 'temp_selection.46',
'temp_selection.51', 'temp_selection.53', 'temp_selection.54',
'temp_selection.56', 'temp_selection.57', 'temp_selection.58',
'temp_selection.59', 'temp_selection.62', 'temp_selection.65',
'temp_selection.67', 'temp_selection.68', 'temp_selection.69',
'temp_selection.71', 'temp_selection.77', 'temp_selection.78',
'temp_selection.82', 'temp_selection.88', 'temp_selection.89',
'temp_selection.90', 'temp_selection.94', 'temp_selection.96',
'temp_selection.97', 'temp_selection.98', 'temp_selection.99',
'temp_selection.100', 'temp_selection.101', 'temp_selection.102',
'temp_selection.106', 'temp_selection.109', 'temp_selection.115',
'temp_selection.117', 'temp_selection.121', 'temp_selection.122',
'temp_selection.123', 'temp_selection.124', 'temp_selection.125',
'temp_selection.128', 'temp_selection.129', 'temp_selection.132',
'temp_selection.133', 'temp_selection.134', 'temp_selection.138',
'temp_selection.140', 'temp_selection.142', 'temp_selection.143',
'temp_selection.144', 'temp_selection.145', 'temp_selection.148',
'temp_selection.149', 'temp_selection.151', 'temp_selection.152',
'temp_selection.153', 'temp_selection.154', 'temp_selection.156',
'temp_selection.157', 'temp_selection.158', 'temp_selection.160',
'temp_selection.161', 'temp_selection.163', 'temp_selection.165',
'temp_selection.167', 'temp_selection.169', 'temp_selection.171',
'temp_selection.172', 'temp_selection.173', 'temp_selection.176',
'temp_selection.178', 'temp_selection.179', 'temp_selection.180',
'temp_selection.184', 'temp_selection.185', 'temp_selection.186',
'temp_selection.187', 'temp_selection.188', 'temp_selection.190',
'temp_selection.194', 'temp_selection.195', 'temp_selection.197',
'temp_selection.198', 'temp_selection.199', 'temp_selection.201',
'temp_selection.204', 'temp_selection.205', 'temp_selection.207',
'temp_selection.210', 'temp_selection.212', 'temp_selection.214',
'temp_selection.215', 'temp_selection.217', 'temp_selection.218',
'temp_selection.219', 'temp_selection.220', 'temp_selection.225',
'temp_selection.227', 'temp_selection.228', 'temp_selection.229',
'temp_selection.230', 'temp_selection.231', 'temp_selection.234',
'temp_selection.236', 'temp_selection.237', 'temp_selection.238',
'temp_selection.241', 'temp_selection.243', 'temp_selection.248',
'temp_selection.249', 'temp_selection.254', 'temp_selection.256',
'temp_selection.262', 'temp_selection.265', 'temp_selection.266',
'temp_selection.267', 'temp_selection.270', 'temp_selection.271',
'temp_selection.274', 'temp_selection.278', 'temp_selection.280',
'temp_selection.282', 'temp_selection.284', 'temp_selection.286',
'temp_selection.287', 'temp_selection.291', 'temp_selection.293',
'temp_selection.294', 'temp_selection.295', 'temp_selection.297',
'temp_selection.298', 'temp_selection.301', 'temp_selection.302',
'temp_selection.307', 'temp_selection.311', 'temp_selection.312',
'temp_selection.313', 'temp_selection.314', 'temp_selection.317',
'temp_selection.318', 'temp_selection.319', 'temp_selection.320',
'temp_selection.321', 'temp_selection.323', 'temp_selection.327',
'temp_selection.328', 'temp_selection.329', 'temp_selection.331',
'temp_selection.334', 'temp_selection.335', 'temp_selection.336',
'temp_selection.341', 'temp_selection.342', 'temp_selection.345',
'temp_selection.346', 'temp_selection.347', 'temp_selection.348',
'temp_selection.354', 'temp_selection.355', 'temp_selection.357',
'temp_selection.359', 'temp_selection.360', 'temp_selection.363',
'temp_selection.364', 'temp_selection.365', 'temp_selection.366',
'temp_selection.368', 'temp_selection.369', 'temp_selection.370',
'temp_selection.371', 'temp_selection.372', 'temp_selection.373',
'temp_selection.374', 'temp_selection.375', 'temp_selection.376',
'temp_selection.377', 'temp_selection.380', 'temp_selection.382',
'temp_selection.383', 'temp_selection.386', 'temp_selection.389',
'temp_selection.390', 'temp_selection.391', 'temp_selection.394',
'temp_selection.395', 'temp_selection.396', 'temp_selection.397',
'temp_selection.398', 'temp_selection.399', 'temp_selection.402',
'temp_selection.405', 'temp_selection.408', 'temp_selection.410',
'temp_selection.413', 'temp_selection.414', 'temp_selection.415',
'temp_selection.417', 'temp_selection.420', 'temp_selection.422',
'temp_selection.424', 'temp_selection.426', 'temp_selection.428',
'temp_selection.433', 'temp_selection.434', 'temp_selection.437',
'temp_selection.442', 'temp_selection.443', 'temp_selection.445',
'temp_selection.446', 'temp_selection.447', 'temp_selection.448',
'temp_selection.451', 'temp_selection.452', 'temp_selection.455',
'temp_selection.458', 'temp_selection.460', 'temp_selection.461',
'temp_selection.462', 'temp_selection.466', 'temp_selection.474',
'temp_selection.477', 'temp_selection.478', 'temp_selection.482',
'temp_selection.484', 'temp_selection.486', 'temp_selection.487',
'temp_selection.488', 'temp_selection.489', 'temp_selection.492',
'temp_selection.493', 'temp_selection.495', 'temp_selection.496',
'temp_selection.497', 'temp_selection.500', 'temp_selection.502',
'temp_selection.505', 'temp_selection.506', 'temp_selection.507',
'temp_selection.508', 'temp_selection.509', 'temp_selection.510',
'temp_selection.511', 'temp_selection.513', 'temp_selection.518',
'temp_selection.519', 'temp_selection.520', 'temp_selection.522',
'temp_selection.524', 'temp_selection.525', 'temp_selection.526',
'temp_selection.527', 'temp_selection.528', 'temp_selection.529',
'temp_selection.530', 'temp_selection.531', 'temp_selection.532',
'temp_selection.533', 'temp_selection.535', 'temp_selection.537',
'temp_selection.538', 'temp_selection.539', 'temp_selection.542',
'temp_selection.543', 'temp_selection.545', 'temp_selection.547',
'temp_selection.548', 'temp_selection.549', 'temp_selection.551',
'temp_selection.554', 'temp_selection.555', 'temp_selection.557',
'temp_selection.558', 'temp_selection.561', 'temp_selection.562',
'temp_selection.563', 'temp_selection.565', 'temp_selection.567',
'temp_selection.569', 'temp_selection.570', 'temp_selection.571',
'temp_selection.572', 'temp_selection.574', 'temp_selection.576',
'temp_selection.580', 'temp_selection.582', 'temp_selection.583',
'temp_selection.584', 'temp_selection.585', 'temp_selection.587',
'temp_selection.588', 'temp_selection.589', 'temp_selection.592',
'temp_selection.599', 'temp_selection.600', 'temp_selection.601',
'temp_selection.602', 'temp_selection.603', 'temp_selection.606',
'temp_selection.607', 'temp_selection.608', 'temp_selection.609',
'temp_selection.611', 'temp_selection.614', 'temp_selection.616',
'temp_selection.618', 'temp_selection.619', 'temp_selection.621',
'temp_selection.622', 'temp_selection.623', 'temp_selection.631',
'temp_selection.632', 'temp_selection.633', 'temp_selection.636',
'temp_selection.637', 'temp_selection.639', 'temp_selection.640',
'temp_selection.641', 'temp_selection.642', 'temp_selection.643',
'temp_selection.645', 'temp_selection.646', 'temp_selection.647',
'temp_selection.650', 'temp_selection.652', 'temp_selection.654',
'temp_selection.655', 'temp_selection.656', 'temp_selection.657',
'temp_selection.658', 'temp_selection.659', 'temp_selection.660',
'temp_selection.665', 'temp_selection.666', 'temp_selection.668',
'temp_selection.672', 'temp_selection.673', 'temp_selection.675',
'temp_selection.676', 'temp_selection.677', 'temp_selection.678',
'temp_selection.679', 'temp_selection.682', 'temp_selection.685',
'temp_selection.687', 'temp_selection.688', 'temp_selection.689',
'temp_selection.691', 'temp_selection.692', 'temp_selection.694',
'temp_selection.697', 'temp_selection.700', 'temp_selection.703',
'temp_selection.704', 'temp_selection.706', 'temp_selection.707',
'temp_selection.708', 'temp_selection.713', 'temp_selection.715',
'temp_selection.719', 'temp_selection.721', 'temp_selection.723',
'temp_selection.724', 'temp_selection.726', 'temp_selection.727',
'temp_selection.729', 'temp_selection.731', 'temp_selection.732',
'temp_selection.734', 'temp_selection.735', 'temp_selection.738',
'temp_selection.740', 'temp_selection.741', 'temp_selection.743',
'temp_selection.744', 'temp_selection.746', 'temp_selection.747',
'temp_selection.757', 'temp_selection.758', 'temp_selection.760',
'temp_selection.761', 'temp_selection.762', 'temp_selection.764',
'temp_selection.765', 'temp_selection.766', 'temp_selection.769',
'temp_selection.770', 'temp_selection.771', 'temp_selection.773',
'temp_selection.774', 'temp_selection.776', 'temp_selection.777',
'temp_selection.781', 'temp_selection.783', 'temp_selection.786',
'temp_selection.788', 'temp_selection.791', 'temp_selection.792',
'temp_selection.794', 'temp_selection.797', 'temp_selection.798',
'temp_selection.803', 'temp_selection.806', 'temp_selection.807',
'temp_selection.808', 'temp_selection.810', 'temp_selection.814',
'temp_selection.815', 'temp_selection.816', 'temp_selection.817',
'temp_selection.818', 'temp_selection.823', 'temp_selection.824',
'temp_selection.825', 'temp_selection.826', 'temp_selection.827',
'temp_selection.829', 'temp_selection.830', 'temp_selection.831',
'temp_selection.832', 'temp_selection.834', 'temp_selection.837',
'temp_selection.838', 'temp_selection.839', 'temp_selection.840',
'temp_selection.841', 'temp_selection.843', 'temp_selection.844',
'temp_selection.846', 'temp_selection.848', 'temp_selection.849',
'temp_selection.851', 'temp_selection.853', 'temp_selection.854',
'temp_selection.855', 'temp_selection.859', 'temp_selection.860',
'temp_selection.863', 'temp_selection.867', 'temp_selection.871',
'temp_selection.872', 'temp_selection.874', 'temp_selection.875',
'temp_selection.876', 'temp_selection.878', 'temp_selection.879',
'temp_selection.880', 'temp_selection.881', 'temp_selection.882',
'temp_selection.886', 'temp_selection.893', 'temp_selection.899',
'temp_selection.900', 'temp_selection.901', 'temp_selection.902',
'temp_selection.903', 'temp_selection.904', 'temp_selection.907',
'temp_selection.908', 'temp_selection.910', 'temp_selection.914',
'temp_selection.915', 'temp_selection.916', 'temp_selection.917',
'temp_selection.926', 'temp_selection.927', 'temp_selection.933',
'temp_selection.937', 'temp_selection.939', 'temp_selection.941',
'temp_selection.942', 'temp_selection.946', 'temp_selection.947',
'temp_selection.951', 'temp_selection.952', 'temp_selection.953',
'temp_selection.956', 'temp_selection.957', 'temp_selection.958',
'temp_selection.959', 'temp_selection.960', 'temp_selection.962',
'temp_selection.965', 'temp_selection.966', 'temp_selection.968',
'temp_selection.971', 'temp_selection.975', 'temp_selection.977',
'temp_selection.979', 'temp_selection.980', 'temp_selection.982',
'temp_selection.984', 'temp_selection.990', 'temp_selection.991',
'temp_selection.993', 'temp_selection.995', 'temp_selection.996',
'temp_selection.997', 'temp_selection.998', 'temp_selection.999',
'temp_selection.1004', 'temp_selection.1006',
'temp_selection.1009', 'temp_selection.1010',
'temp_selection.1011', 'temp_selection.1012',
'temp_selection.1013', 'temp_selection.1015',
'temp_selection.1017', 'temp_selection.1018',
'temp_selection.1019', 'temp_selection.1020',
'temp_selection.1021', 'temp_selection.1022',
'temp_selection.1023', 'temp_selection.1024',
'temp_selection.1025', 'temp_selection.1026',
'temp_selection.1027', 'temp_selection.1030',
'temp_selection.1031', 'temp_selection.1037',
'temp_selection.1040', 'temp_selection.1043',
'temp_selection.1044', 'temp_selection.1045',
'temp_selection.1047', 'temp_selection.1048',
'temp_selection.1050', 'temp_selection.1053',
'temp_selection.1059', 'temp_selection.1062',
'temp_selection.1066', 'temp_selection.1067',
'temp_selection.1070', 'temp_selection.1073',
'temp_selection.1076', 'temp_selection.1079',
'temp_selection.1080', 'temp_selection.1081',
'temp_selection.1082', 'temp_selection.1085',
'temp_selection.1089', 'temp_selection.1090',
'temp_selection.1091', 'temp_selection.1093',
'temp_selection.1097', 'temp_selection.1099',
'temp_selection.1101', 'temp_selection.1103',
'temp_selection.1104', 'temp_selection.1106',
'temp_selection.1107', 'temp_selection.1109',
'temp_selection.1111', 'temp_selection.1113',
'temp_selection.1114', 'temp_selection.1116',
'temp_selection.1117', 'temp_selection.1119',
'temp_selection.1121', 'temp_selection.1122',
'temp_selection.1131', 'temp_selection.1134',
'temp_selection.1135', 'temp_selection.1136',
'temp_selection.1138', 'temp_selection.1139',
'temp_selection.1140', 'temp_selection.1145',
'temp_selection.1146', 'temp_selection.1148',
'temp_selection.1149', 'temp_selection.1152',
'temp_selection.1153', 'temp_selection.1159',
'temp_selection.1161', 'temp_selection.1164',
'temp_selection.1167', 'temp_selection.1168',
'temp_selection.1171', 'temp_selection.1173',
'temp_selection.1174', 'temp_selection.1178',
'temp_selection.1181', 'temp_selection.1184',
'temp_selection.1185', 'temp_selection.1186',
'temp_selection.1187', 'temp_selection.1188',
'temp_selection.1190', 'temp_selection.1191',
'temp_selection.1196'], dtype='<U20')
问题在于,“ temp_selection”列在R中多次出现,而在熊猫df中仅出现一次。
什么是纠正此问题的好方法?
答案 0 :(得分:0)
这是解决方案,以防万一有人觉得将来有用。
R中的相乘列错误地解释了这些值。因此,为防止这种情况发生,我使用以下代码将它们转换为pandas中的字符串:
df_temp['temp_selection'] = df_temp['temp_selection'].astype(str) # Convert to string to see if it is fixed for R
这导致正确的解释并推到R后解决了这个问题。