forked from xyla-io/bot_appsflyer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathappsflyer_processor.py
85 lines (66 loc) · 3.09 KB
/
appsflyer_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import numpy as np
from datetime import date
from pathlib import Path
from typing import Optional, Dict
class AppsFlyerProcessor:
source_directory_path: Path
platform_directory_map: Dict[str, str]
processed_data: Optional[pd.DataFrame]=None
def __init__(self, source_directory_path: Path, platform_directory_map: Dict[str, str]):
self.source_directory_path = source_directory_path
self.platform_directory_map = platform_directory_map
def process(self):
processed_data = pd.DataFrame()
for platform, app_id in self.platform_directory_map.items():
files_path = self.source_directory_path / app_id
for path in files_path.glob('*.csv'):
file_name = path.absolute()
df = pd.read_csv(file_name)
day_list = [
x
for x in df.columns
if x not in ('Cohort Day', 'Media Source', 'Ltv Country', 'Campaign Id', 'Users',
'Cost', 'Average eCPI', 'Users')
]
df_final = pd.DataFrame()
for i in day_list:
event_day = i.split(' ')[-1]
if event_day == 'partial':
event_day = i.split(' ')[-3]
df_temp = df[['Cohort Day', 'Media Source', 'Ltv Country', 'Campaign Id']]
# Ensure Campaign Id can be read as a string
df_temp['Campaign Id'] = df_temp['Campaign Id'].astype(str)
df_temp['Campaign Id'] = '"' + df_temp['Campaign Id'] + '"'
df_temp['event_day'] = event_day
df_temp['cohort_revenue'] = df[[i]]
df_temp.cohort_revenue = df_temp.cohort_revenue.apply(lambda s: float(s.split('/')[0]) / float(s.split('/')[1]) if isinstance(s, str) and '/' in s else s)
df_temp['platform'] = platform
df_temp['install'] = df[['Users']]
df_final = df_temp.append(df_final, sort=True)
processed_data = processed_data.append(df_final, sort=True)
self.processed_data = processed_data
def process_old(self):
today = date.today()
file_name = input('Please enter file name: ')
platform = ''
if file_name.find('ios') != -1: platform = 'ios'
elif file_name.find('android') != -1: platform = 'android'
else: platform = 'error'
df = pd.read_csv('{}.csv'.format(file_name))
day_list = [x for x in df.columns if x not in ('Cohort Day', 'Media Source', 'Ltv Country', 'Campaign Id', 'Users',
'Cost', 'Average eCPI','Users')]
df_final = pd.DataFrame()
for i in day_list:
event_day = i.split(' ')[-1]
df_temp = df[['Cohort Day', 'Media Source', 'Ltv Country', 'Campaign Id']]
# Ensure Campaign Id can be read as a string
df_temp['Campaign Id'] = df_temp['Campaign Id'].astype(str)
df_temp['Campaign Id'] = '"' + df_temp['Campaign Id'] + '"'
df_temp['event_day'] = event_day
df_temp['cohort_revenue'] = df[[i]]
df_temp['platform'] = platform
df_temp['install'] = df[['Users']]
df_final = df_temp.append(df_final, sort = True)
df_final.to_csv('AF Total Revenue Data Lot - {}.csv'.format(today), index=False)
print('Exported CSV')