forked from ourresearch/jump-api
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_summary_rules.py
169 lines (145 loc) · 7.2 KB
/
user_summary_rules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import pandas as pd
from datetime import datetime
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
import numpy as np
# inst_level = pd.read_csv("~/Downloads/unsub-institution-level.csv")
# all_institutions_df = pd.read_csv("~/Downloads/unsub-package-level.csv")
today = datetime.now()
one_month_ago = today - relativedelta(months=+1)
two_months_ago = today - relativedelta(months=+2)
three_months_ago = today - relativedelta(months=+3)
four_months_ago = today - relativedelta(months=+4)
six_months_ago = today - relativedelta(months=+6)
one_year_ago = today - relativedelta(years=+1)
fourteen_months_ago = today - relativedelta(months=+14)
more_than_1_year_ago = today - relativedelta(days=+366)
# rules for institutions
def rule_not_paid(df_original):
df = df_original.copy()
df['intercom_last_seen'] = [parse(w) if isinstance(w, str) else w for w in df['intercom_last_seen']]
df['date_last_paid_invoice'] = [parse(w) if isinstance(w, str) else w for w in df['date_last_paid_invoice']]
df = df[~np.isnan(df['intercom_last_seen'])]
# We noticed that you have been using Unsub but you’re payment is not up to date
#- From columns: intercom_last_seen, current_deal, consortia, consortium_account, date_last_paid_invoice, amount_last_paid_invoice, created_sce_last
#- Rule:
# IF last seen on Intercom recently (last 3 mo's)
# AND there’s no current deal
# AND is not likely paid through consortium
# AND hasn’t paid an invoice within the last year
not_paid = df[
(df['intercom_last_seen'] > three_months_ago) &
(~df['current_deal']) &
(df['consortia'] == "No") &
([not isinstance(w, str) for w in df['consortium_account'].to_list()]) &
([z < more_than_1_year_ago if isinstance(z, pd.Timestamp) else True for z in df['date_last_paid_invoice'].to_list()])
]
not_paid = not_paid.assign(not_paid=True)
df_original = df_original.assign(not_paid = df_original['institution_id'].map(not_paid.set_index('institution_id')['not_paid']))
return df_original
def rule_not_using(df_original):
df = df_original.copy()
df['intercom_last_seen'] = [parse(w) if isinstance(w, str) else w for w in df['intercom_last_seen']]
df['date_last_paid_invoice'] = [parse(w) if isinstance(w, str) else w for w in df['date_last_paid_invoice']]
df['created_inst'] = [parse(w) if isinstance(w, str) else w for w in df['created_inst']]
# We noticed that you have not been using Unsub despite having a current subscription
#- From columns: intercom_last_seen, current_deal, consortia, consortium_account, date_last_paid_invoice, amount_last_paid_invoice, created_sce_last
#- Rule:
# IF NOT last seen on Intercom recently (last 6 mo's)
# AND
# there is a recent deal
# OR likely paid through consortium
# OR paid an invoice within the last year
# AND
# It's been more than 2 months since Institution created
not_using = df[
([z < six_months_ago if isinstance(z, pd.Timestamp) else True for z in df['intercom_last_seen'].to_list()]) &
(
df['current_deal'] |
[isinstance(w, str) and w != "No" for w in df['consortia']] |
[isinstance(w, str) for w in df['consortium_account'].to_list()] |
[z > more_than_1_year_ago if isinstance(z, pd.Timestamp) else False for z in df['date_last_paid_invoice'].to_list()]
) &
(df['created_inst'] < two_months_ago)
]
not_using = not_using.assign(not_using=True)
df_original = df_original.assign(not_using = df_original['institution_id'].map(not_using.set_index('institution_id')['not_using']))
return df_original
def rule_new_users(df_original):
df = df_original.copy()
df['created_inst'] = [parse(w) if isinstance(w, str) else w for w in df['created_inst']]
# Follow up with new users: > 1 month & < 6 months
new_users = df[(df['created_inst'] < one_month_ago) & (df['created_inst'] > six_months_ago)]
new_users = new_users.assign(new_users=True)
df_original = df_original.assign(new_users = df_original['institution_id'].map(new_users.set_index('institution_id')['new_users']))
return df_original
def rule_current_users(df_original):
df = df_original.copy()
df['created_inst'] = [parse(w) if isinstance(w, str) else w for w in df['created_inst']]
df['date_last_paid_invoice'] = [parse(w) if isinstance(w, str) else w for w in df['date_last_paid_invoice']]
df['intercom_last_seen'] = [parse(w) if isinstance(w, str) else w for w in df['intercom_last_seen']]
# IF has a current deal
# OR invoice paid within last 14 months
# OR last seen on intercom within last 12 months
# OR institution created within last 3 months
current_users = df[
(df['current_deal']) |
([z > fourteen_months_ago if isinstance(z, pd.Timestamp) else False for z in df['date_last_paid_invoice']]) |
([z > one_year_ago if isinstance(z, pd.Timestamp) else False for z in df['intercom_last_seen']]) |
(df['created_inst'] > three_months_ago)
]
current_users = current_users.assign(current_users=True)
df_original = df_original.assign(current_user = df_original['institution_id'].map(current_users.set_index('institution_id')['current_users']))
return df_original
# rules for packages
def rule_required_data(df_original):
df = df_original.copy()
# We noticed you haven’t uploaded required data
# From columns: is_deleted, is_feeder_package, is_feedback_package, currency, big_deal_cost,
# big_deal_cost_increase, has_complete_counter_data, prices
# Rule:
# IF NOT is_deleted
# AND NOT is_feeder_package
# AND NOT is_feedback_package
# ~~AND is current_user~~ (column doesn't exist yet)
# AND
# currency is empty OR
# prices is FALSE (empty) OR
# big_deal_cost is empty OR
# big_deal_cost_increase is empty OR
# has_complete_counter_data=FALSE
mrd = df[(
([not w if isinstance(w, bool) else False for w in df['is_deleted']]) and
([not w if isinstance(w, bool) else False for w in df['is_feeder_package']]) and
([not w if isinstance(w, bool) else False for w in df['is_feedback_package']]) and
(
df['currency'].isnull() |
df['big_deal_cost'].isnull() |
df['big_deal_cost_increase'].isnull() |
([not w if isinstance(w, bool) else False for w in df['has_complete_counter_data']]) |
([not w if isinstance(w, bool) else False for w in df['prices']])
)
)]
mrd = mrd.assign(missing_required_data=True)
df_original = df_original.merge(mrd[['package_id','ror_id','missing_required_data']], how='left', on=['package_id','ror_id'])
return df_original
def rule_recommended_data(df_original):
df = df_original.copy()
# We noticed you haven’t uploaded recommended data
# From columns: is_deleted, is_feeder_package, is_feedback_package, perpetual_access
# Rule:
# IF NOT is_deleted
# AND NOT is_feeder_package
# AND NOT is_feedback_package
# ~~AND is current_user~~ (column doesn't exist yet)
# AND
# perpetual_access=FALSE
mrecd = df[(
([not w if isinstance(w, bool) else False for w in df['is_deleted']]) and
([not w if isinstance(w, bool) else False for w in df['is_feeder_package']]) and
([not w if isinstance(w, bool) else False for w in df['is_feedback_package']]) and
([not w if isinstance(w, bool) else False for w in df['perpetual_access']])
)]
mrecd = mrecd.assign(missing_recommended_data=True)
df_original = df_original.merge(mrecd[['package_id','ror_id','missing_recommended_data']], how='left', on=['package_id','ror_id'])
return df_original