我想进行rfm分析并返回特定段的所有ID。
class RfmTable:
quintiles = {}
def _init(self,rfm):
self.rfm = rfm
def conn_data(self,table_name,start_time,end_time):
conn = psycopg2.connect(database=database_name, user=user_name,
password=password_info, host=host_info, port=port_info)
cursor = conn.cursor()
cursor.execute('select random_user_id,type,revenue,timestamp from '+table_name+ " where timestamp between EXTRACT(EPOCH FROM date '"+start_time+"') AND EXTRACT(EPOCH FROM date '"+end_time+"')")
a = cursor.fetchall()
df = pd.DataFrame(a, columns=[user_id, type,revenue,timestamp])
df[date] = pd.DatetimeIndex(pd.to_datetime(df[timestamp], unit='s')).tz_localize('UTC').tz_convert('Asia/Shanghai')
df[date] = df[date].apply(lambda x: x.strftime('%Y-%m-%d'))
df = df.drop([timestamp],axis = 1)
df_revenue = df[df[type].isin([revenue])]
df_install = df[df[type].isin([install])]
df_revenue = pd.merge(df_revenue,df_install,on=user_id,suffixes=('','_i'))
df_revenue = df_revenue.loc[:,[user_id,type,revenue,date]]
df_revenue[revenue]=pd.DataFrame(df_revenue[revenue],dtype=np.float)
df_revenue[revenue] = df_revenue[revenue]/100
df_revenue[date]=pd.to_datetime(df_revenue[date])
NOW = df_revenue[date].max() + timedelta(days=1)
rfm = df_revenue.groupby(user_id).agg({date: lambda x: (NOW - x.max()).days, # Recency (最近一次充值时间的间隔)
type: lambda x: len(x), # Frequency
revenue: lambda x: x.sum()}) # Monetary
rfm[date] = rfm[date].astype(int)
rfm.rename(columns={date: 'Recency',
type: 'Frequency',
revenue: 'Monetary'}, inplace=True)
quintiles = rfm.quantile(q=[0.2,0.4,0.6,0.8]).to_dict()
def r_score(x,c):
if x <= quintiles[c][.2]:
return 5
elif x <= quintiles[c][.4]:
return 4
elif x <= quintiles[c][.6]:
return 3
elif x <= quintiles[c][.8]:
return 2
else:
return 1
def fm_score(x, c):
if x <= quintiles[c][.2]:
return 1
elif x <= quintiles[c][.4]:
return 2
elif x <= quintiles[c][.6]:
return 3
elif x <= quintiles[c][.8]:
return 4
else:
return 5
rfm['R'] = rfm['Recency'].apply(lambda x: r_score(x,'Recency'))
rfm['F'] = rfm['Frequency'].apply(lambda x: fm_score(x, 'Frequency'))
rfm['M'] = rfm['Monetary'].apply(lambda x: fm_score(x, 'Monetary'))
rfm['RFM Score'] = rfm['R'].map(str) + rfm['F'].map(str) + rfm['M'].map(str)
segt_map = {
r'[4-5][4-5][5]': '重要价值用户',
r'[4-5][1-3][5]': '重要保持用户',
r'[1-3][4-5][5]': '重要发展用户',
r'[1-3][1-3][5]': '重要挽留用户',
r'[4-5][4-5][1-4]':'一般价值用户',
r'[4-5][1-3][1-4]': '一般发展用户',
r'[1-3][4-5][1-4]': '一般保持用户',
r'[1-3][1-3][1-4]': '一般挽留用户'
}
rfm['Segment'] = rfm['R'].map(str) + rfm['F'].map(str) + rfm['M'].map(str)
rfm['Segment'] = rfm['Segment'].replace(segt_map, regex=True)
rfm.reset_index(inplace=True)
return rfm
def get(self,segment):
return self.rfm[self.rfm['Segment'].isin([segment])][user_id].values.tolist()
conn_data方法创建一个名为rfm的数据帧,其中包含id,新近度,频率,货币,R,F,M,RFM Scole和Segment。
我想使用get方法返回某些输入段的所有ID。我写完之后:
data1 = RfmTable()
data1.conn_data('adjust_logs_lanyue','20190610','20190613')
data1.get('重要挽留用户')
有错误:'AttributeError:'RfmTable'对象没有属性'rfm'
那我该怎么做才能纠正呢?
答案 0 :(得分:1)
与其他对象语言不同,Python在Java或C ++中没有像this
这样的隐式对象的概念。这意味着当您在rfm
中使用conn_data
时,您只是在使用一个局部变量,该局部变量将在函数返回时立即消失。您必须显式地影响对象的属性:
def conn_data(self,table_name,start_time,end_time):
...
self.rfm = rfm # store the object as an attribute of self
return rfm