-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapp.py
432 lines (365 loc) · 18.8 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
from datetime import datetime, date
from multiprocessing import freeze_support
from typing import Dict
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import streamlit as st
from matplotlib import ticker, patches
import bayesianGroundhog
import epsilonRingtail
import randomCrayfish
import segmentJunglefowl
from actionGenerator import get_actions
from customerGenerator import generate_customers, get_products
from rewardCalculator import HlvCalculator
from simulator import TelcoSimulator
st.set_page_config(layout="wide", page_title="Marketing Policy Simulator", )
matplotlib.use("agg")
start_ts = datetime.today()
today = start_ts.date()
st.header("Telco Marketing simulator")
row1_col1, row1_col2 = st.columns(2)
with row1_col1:
st.markdown("""This is a Telecommunications company Marketing Policy simulator
that allows developers to test different Marketing policies.
The simulator generates customer profile on every run
and simulates the customers response to a marketing actions e.g. an outbound call.
The revenue generated by the policy's choice of customer actions is summed and plotted over time
to show the accumulated gains of each policy.""")
st.write("##")
st.write("##")
actions = get_actions()
products, product_market_size = get_products()
customer_h_col1, customer_h_col2, customer_h_col3 = st.columns((1, 2, 1))
with customer_h_col2:
st.subheader("Customers")
cust_col1, cust_col2, cust_col3 = st.columns((2, 1, 1))
with cust_col1:
st.write("""Customers are generated on every run using distributions of common first and last names of the
population of the Netherlands.
The date of births of the customers are generated using the age density of telecom service customers.
The products are randomly assigned to customer portfolios with a weighed distribution that reflects a telecom
company with a base that mostly has portfolios with older products and only some with newer products.""")
nr_of_customers: float = st.slider(label="Base Size", min_value=10000, max_value=800000, value=100000, step=10000)
customers = generate_customers(int(nr_of_customers), today)
sample_cust = customers[0:8]
cust_list = list()
for c in sample_cust:
cust_list.append({ # "id": c.id,
"name": c.name,
"dob": c.dob,
"billing_address": c.billing_address.__str__(),
"portfolio": str([str(p) for p in c.portfolio])})
cust_df = pd.DataFrame(cust_list)
st.dataframe(cust_df)
with cust_col2:
st.markdown('')
st.markdown('')
st.markdown('')
st.markdown('')
portfolio_count = dict()
for product in products:
portfolio_count[product.name] = 0
max_product_name_length = 0
for cust in customers:
product_name = cust.portfolio[0].name
if product_name not in portfolio_count:
portfolio_count[product_name] = 0
portfolio_count[product_name] += 1
if len(product_name) > max_product_name_length:
max_product_name_length = len(product_name)
fig, ax = plt.subplots()
langs = list()
students = list()
for product in products:
product_name = product.name
count = portfolio_count[product_name]
langs.append(product_name)
students.append(count)
ax.bar(langs, students, alpha=0.3)
ax.tick_params(labelrotation=90)
ax.set_ylabel("Segment size")
st.pyplot(fig)
st.write("##")
products_h_col1, products_h_col2, products_h_col3 = st.columns((1, 2, 1))
with products_h_col2:
st.subheader("Products")
products_col1, products_col2, products_col3 = st.columns((2, 1, 1))
with products_col1:
st.write("""This simulator only considered Fixed internet services to allows the simulator to finish fast.
The products are based in a Dutch Telco operator Ziggo but are fake products.
The yearly margins on the products are reasonable for a Dutch Telco but are not the actual margins of Ziggo.
Adjusting the Average Price per Unit sold (ARPU) changes the list price of the product
in proportion the original list price. Since the costs can not change this also changes the margin.
Increasing the ATL Marketing Budget allows for better graphics which increases campaigns effectiveness
due to a better brand image.""")
arpu: int = st.slider(label="ARPU €", min_value=100, max_value=3000, value=2100, step=100)
marketing_budget: int = st.slider(label="ATL Marketing Budget (Million €)", min_value=18, max_value=50, value=25,
step=1)
prod = list()
for p in products:
prod.append({"name": p.name, "list_price": p.list_price + (arpu - 2100), "margin": p._margin + (arpu - 2100),
"start_date": p.start_date, "end_date": p.end_date, "download_speed": p.kwargs["download_speed"],
"upload_speed": p.kwargs["upload_speed"]})
prod_df = pd.DataFrame(prod)
st.dataframe(prod_df)
with products_col2:
st.markdown('')
st.markdown('')
st.markdown('')
st.markdown('')
fig, ax = plt.subplots()
# Triangles
triangle1x = [0, 0.7, 0, 0]
triangle1y = [620, 900, 900, 620]
triangle2x = [0, 1, 1, 0]
triangle2y = [400, 400, 780, 400]
for i in range(3):
ax.plot(triangle1x, triangle1y)
ax.fill_between(triangle1x, triangle1y, alpha=0.2)
ax.text(0.1, 810, "Expensive", fontsize=14,
horizontalalignment='left',
verticalalignment='center')
for i in range(3):
ax.plot(triangle2x, triangle2y)
ax.fill_between(triangle2x, triangle2y, alpha=0.2)
ax.text(0.7, 480, "Cheap", fontsize=14,
horizontalalignment='left',
verticalalignment='center')
# Products
# Alter the spread to make the plot look better
x = [0.0, 0.0, 0.0, 0.14, 0.051, 0.48, 0.17, 0.5, 0.5, 0.02, 0.06, 0.05, 0.5, 0.5]
x = [p + ((marketing_budget - 25) / 100) for p in x]
y = [p.list_price + (arpu - 2100) for p in products]
ax.scatter(x, y, alpha=0.5)
ax.plot([0, 1], [500, 900], label="Decision bound", alpha=0.2)
ax.set_xlim(0, 1)
ax.set_ylim(400, 900)
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('€ %.0f'))
ax.set_ylabel("List Price")
ax.set_xlabel("Perceived Quality")
st.pyplot(fig)
st.write("##")
st.write("##")
segment_h_col1, segment_h_col2, segment_h_col3 = st.columns((1, 2, 1))
with segment_h_col1:
st.image(segmentJunglefowl.SegmentJunglefowl.icon, width=100)
with segment_h_col2:
st.subheader("Gold Silver Bronze segments")
segment_col1, segment_col2, segment_col3 = st.columns((2, 1, 1))
with segment_col1:
st.write("""The Gold Silver Bronze segments policy uses the traditional marketing segmentation
where we segment the base into High, Medium and Low revenue groups(Gold, Silver, Bronze respectively).
Then for every group we assign actions that try and sell them a product
or service with a price point for that group. This policy is meant to give us a baseline
for what a traditional marketing department would do.""")
gold_threshold: float = st.slider(label="Gold Segment", min_value=0.0, max_value=8000.0, value=5600.0, step=200.0)
silver_threshold: float = st.slider(label="Silver Segment", min_value=0.0, max_value=8000.0, value=2800.0,
step=200.0)
with segment_col2:
hlv_calculator = HlvCalculator()
margins = list()
for customer in customers:
margin = hlv_calculator.get_hlv(customer, today)
margins.append(hlv_calculator.get_hlv(customer, today, 20))
fig, ax = plt.subplots()
ax.hist(margins, bins=20)
gold_pathch = patches.Rectangle((gold_threshold, 0), (max(margins) - gold_threshold), 25000, angle=0.0, alpha=0.3,
ec="gray", fc="CornflowerBlue")
ax.add_patch(gold_pathch)
silver_pathch = patches.Rectangle((silver_threshold, 0), (gold_threshold - silver_threshold), 25000, angle=0.0,
alpha=0.3, ec="gray", fc="red")
ax.add_patch(silver_pathch)
bronze_pathch = patches.Rectangle((0, 0), silver_threshold, 25000, angle=0.0, alpha=0.3, ec="gray", fc="green")
ax.add_patch(bronze_pathch)
ax.set_ylabel('Number of customers')
ax.set_xlabel('Household Lifetime Value (5 year window)')
ax.legend(["Gold", "Silver", "Bronze"])
st.pyplot(fig)
st.write("##")
epsilon_h_col1, epsilon_h_col2, epsilon_h_col3 = st.columns((1, 2, 1))
with epsilon_h_col1:
st.image(epsilonRingtail.EpsilonRingtail.icon, width=100)
with epsilon_h_col2:
st.subheader("Epsilon Greedy")
epsilon_col1, epsilon_col2, epsilon_col3 = st.columns((2, 1, 1))
with epsilon_col1:
st.write("""The Epsilon Greedy policy uses a basic Explorer/Exploit ratio to test out new campaigns to better
estimate the conversion rate. Then for every customer the estimated conversion rate is multiplied by the increase in
Household Lifetime value (Delta HLV) to calculate teh estimated revenue.
The Epsilon parameter defines the percentage of instances the algorithm will Exploit the campaign that is
estimated to give the highest revenue. The rest of the time (1 - Epsilon) the algorithm will test the newer
campaigns.
This is because we will never have enough chances to calculate the true conversion rate of a campaign.
This is to avoid campaigns that had bad luck to be tested on the difficult people first
still get another chance.""")
epsilon: float = st.slider(label="Epsilon", min_value=0.1, max_value=0.9, value=0.8, step=0.1)
resort_batch_size: int = st.slider(label="Batch size", min_value=1, max_value=201, value=51, step=10)
with epsilon_col2:
fig, ax = plt.subplots()
ax.bar(["Base"], [(1 - epsilon) * 100], 5, bottom=[epsilon * 100], label='Explorer', alpha=0.2)
ax.bar(["Base"], [epsilon * 100], 5, label='Exploit', alpha=0.2)
ax.set_ylabel('Percentage Offers')
ax.yaxis.set_major_formatter(ticker.PercentFormatter())
ax.legend()
st.pyplot(fig)
st.write("##")
bayesian_h_col1, bayesian_h_col2, bayesian_h_col3 = st.columns((1, 2, 1))
with bayesian_h_col1:
st.image(bayesianGroundhog.BayesianGroundhog.icon, width=100)
with bayesian_h_col2:
st.subheader("Bayesian")
bayesian_col1, bayesian_col2 = st.columns(2)
with bayesian_col1:
st.write("""The Bayesian policy uses Thompson-Sampling to estimate the rewards of serving the customer each campaign
. Each Action reward is defined as a beta distribution that is updated when ever an Action succeeds or fails.
The Action beta distribution is sampled for every new customer to generate the expected rewards for that customer.
The algorithm then chooses the action with the maximum expected reward (Delta Household Lifetime Value.
The plots here show the probability density of the conversion rate of three Action at different simulated update
steps.
The Beta distribution of the Action's conversion rate is updated by the simulated success/fail reward steps.
Here we can test how quickly the Action Beta distributions (Arms) conclude which action is clearly better
(most right peak).
""")
initial_trials: int = st.slider(label="Initial Trails", min_value=0, max_value=500, value=99, step=1)
initial_wins: int = st.slider(label="Initial Wins", min_value=0, max_value=500, value=1, step=1)
with bayesian_col2:
# Define the multi-armed bandits
nb_bandits = 3 # Number of bandits
# True probability of winning for each bandit
p_bandits = [0.45, 0.55, 0.60]
def pull(arm_index):
"""Pull arm of bandit with index `i` and return 1 if win,
else return 0."""
if np.random.rand() < p_bandits[arm_index]:
return 1
else:
return 0
# Define plotting functions
# Iterations to plot
plots = [2, 10, 50, 200, 500, 1000]
def plot(priors, step_count, ax_of_plot):
"""Plot the priors for the current step."""
plot_x = np.linspace(0.001, .999, 100)
for prior in priors:
plot_y = prior.pdf(plot_x)
_ = ax_of_plot.plot(plot_x, plot_y)
ax_of_plot.fill_between(plot_x, plot_y, 0, alpha=0.2)
ax_of_plot.set_xlim([0, 1])
ax_of_plot.set_ylim(bottom=0)
ax_of_plot.set_title(f'Priors at step {step_count:d}')
fig, axs = plt.subplots(2, 3, )
axs = axs.flat
# The number of trials and wins will represent the prior for each
# bandit with the help of the Beta distribution.
trials = [initial_trials] * 3 # [0, 0, 0] # Number of times we tried each bandit
wins = [initial_wins] * 3 # [0, 0, 0] # Number of wins for each bandit
n = 1000
# Run the trail for `n` steps
for step in range(1, n + 1):
# Define the prior based on current observations
bandit_priors = [
stats.beta(a=1 + w, b=1 + t) for t, w in zip(trials, wins)]
# plot prior
if step in plots:
plot(bandit_priors, step, next(axs))
# Sample a probability theta for each bandit
theta_samples = [
d.rvs(1) for d in bandit_priors
]
# choose a bandit
chosen_bandit = np.argmax(theta_samples)
# Pull the bandit
x = pull(chosen_bandit)
# Update trials and wins (defines the posterior)
trials[chosen_bandit] += 1
wins[chosen_bandit] += x
plt.tight_layout()
st.pyplot(fig)
st.write("##")
st.write("##")
st.subheader("Simulator")
sim_col1, sim_col2 = st.columns((2, 2))
with sim_col1:
st.write("To run a simulation you must check the checkbox in the bottom of this section. "
"Depending on the settings a simulation can take up to 30 minutes (10 sequential runs of 356 days)."
"We recommend using the default settings so that a simulation can finish in 2 minutes. "
"The simulation must run multiple sims per policy to estimate the mean revenue per policy."
"Running multiple instance can be done by her increasing the number of sim threads per policy or "
"the never of sequential runs within one thread")
# Defaults are optimized for Linux OS, Windows take a long time ot start a Thread so 1 thread per policy
# and more sequential runs is advised
runs_per_policies = st.slider(label="Threads per policy", min_value=1, max_value=10, value=5, step=1)
sequential_runs = st.slider(label="Sequential runs per thread", min_value=1, max_value=10, value=1, step=1)
day_count = st.slider(label="Number of days to simulate", min_value=21, max_value=365, value=50, step=1)
run = st.checkbox("Run Simulator", value=True)
if __name__ == '__main__':
freeze_support()
if gold_threshold == 0 or silver_threshold == gold_threshold:
gold_t = None
silver_t = None
else:
gold_t = gold_threshold
silver_t = silver_threshold
simulator = TelcoSimulator()
chosen_action_logs: Dict[str, Dict[datetime, Dict[str, int]]] = dict()
if run:
# Run simulations
policies = [randomCrayfish.RandomCrayfish, segmentJunglefowl.SegmentJunglefowl,
epsilonRingtail.EpsilonRingtail, bayesianGroundhog.BayesianGroundhog]
keywords = {'epsilon': epsilon, 'resort_batch_size': resort_batch_size, "initial_trials": initial_trials,
"initial_conversions": initial_wins, "current_base": customers,
"gold_threshold": gold_threshold, "silver_threshold": silver_threshold}
all_logs, chosen_action_logs = simulator.do_simulations(policies, keywords, runs_per_policies,
sequential_runs, customers, actions, day_count,
start_ts)
# Plot performance
sim_col2.pyplot(simulator.plot_performance(all_logs, show=False, save=False))
st.subheader("Policy Timelines")
timeline_col1, timeline_col2, timeline_col3 = st.columns((2, 1, 1))
timeline_col1.write("Here we see amount of times the policy chooses each action during the simulation "
"where each color represents an action.")
timeline_col1.write("")
timeline_col1.markdown("**RandomCrayfish**")
timeline_col1.write("The RandomCrayfish policy is meant to be a reference for the worst possible rational "
"policy where an action is chosen at random."
"The timeline displayed what you would expect from this, in that,"
" each action is chose about as many times and any other for the duration of the "
"simulation")
timeline_col1.write("")
timeline_col1.markdown("**SementJunglefolw**")
timeline_col1.write("This is an implementation of a Gold, Silver, Bronze based marketing strategy. "
"The timeline is very similar to what we see in company data. "
"There are a few big campaigns, and a some smaller campaigns with very short runs")
timeline_col1.write("")
timeline_col1.markdown("**EpsilonRingtail**")
timeline_col1.write("This is an implementation of a Epsilon Greedy policy "
"that does not consider the customer or product context."
"The timeline is a bit more complex since it is a learning algorithm "
"based on maximizing Delta Household lifetime value. "
"In the first few days the it changes what it thinks is the best campaign a lot "
"as samples are being collected. After some time this starts to stabilize "
"between a few good options that are close to each other in terms of HLV. "
"If the simulation runs long enough it would stabilize even more "
"and stick to one or two campaigns")
timeline_col1.write("")
timeline_col1.markdown("**BayesianGroundhog**")
timeline_col1.write("This is an implementation of a Baysian Bandit or Thompson sampling. "
"The timeline of this policy is in some way simpler than the Epsilon Greedy policy. "
"In the beginning the policy will try each campaign more or less equality. "
"At some point it will have enough information that come to a conclusion "
"that one or two campaigns are usually the best.")
i = 0
# Plot one timeline per policy
plots = simulator.plot_timelines(chosen_action_logs, actions, show=False, save=False)
for policy_name, fig in plots.items():
if i % 2 == 0:
col = timeline_col2
else:
col = timeline_col3
col.subheader(policy_name)
col.pyplot(fig)
i += 1