Untitled

From Kotya, 10 Months ago, written in Plain Text, viewed 207 times.
URL http://codebin.org/view/c406520e Embed
Download Paste or View Raw
  1. # сводная таблица
  2.  
  3. group = (logs.
  4.           groupby(['event_name', 'group_id']).
  5.           agg({'user_id': 'nunique'}).
  6.           reset_index().
  7.           rename(columns={'user_id' : 'total_users'}).
  8.           sort_values(by=['group_id','total_users'], ascending=False))
  9.  
  10. group =group[group['event_name'] != 'Tutorial']
  11. group
  12.  
  13. # функция
  14.  
  15. def hyp (df1, df2, event, alpha, n):
  16.     bonf_alpha = alpha / n
  17.     n_users = np.array([df1['user_id'].nunique(),
  18.                         df2['user_id'].nunique()])
  19.     success = np.array([df1[df1['event_name'] == event_name]['user_id'].nunique(),
  20.                         df2[df2['event_name'] == event_name]['user_id'].nunique()])
  21.     p1 = success[0]/n_users[0]
  22.     p2 = success[1]/n_users[1]
  23.     p_combined = (success[0] + success[1]) / (n_users[0] + n_users[1])
  24.     difference = p1 - p2
  25.     z_value = difference /  np.sqrt(p_combined * (1 - p_combined) * (1/n_users[0] + 1/n_users[1]))
  26.     distr = st.norm(0, 1)  
  27.     p_value = (1 - distr.cdf(abs(z_value))) * 2
  28.     print('Событие:', event)
  29.     print('p-значение: ', p_value)
  30.     if p_value < bonf_alpha:
  31.         print('Отвергаем нулевую гипотезу: между долями есть разница')
  32.     else:
  33.         print(
  34.         'Не получилось отвергнуть нулевую гипотезу, нет оснований считать доли разными')
  35.  
  36. # Потом передаешь каждую группу функции:
  37.  
  38. for event_name in group['event_name'].unique():
  39.     hyp(logs[logs['group_id'] == 246], logs[logs['group_id'] == 247], event_name,.05, 4)
  40.     print()
  41.  
  42. for event_name in group['event_name'].unique():
  43.     hyp(logs[logs['group_id'] == 246], logs[logs['group_id'] == 248], event_name,.05, 12)
  44.     print()
  45.  
  46. for event_name in group['event_name'].unique():
  47.     hyp(logs[logs['group_id'] == 247], logs[logs['group_id'] == 248], event_name,.05,12)
  48.     print()
  49.  
  50. for event_name in group['event_name'].unique():
  51.     hyp(logs[logs['group_id'] != 248], logs[logs['group_id'] == 248], event_name,.05, 12)
  52.     print()

Reply to "Untitled"

Here you can reply to the paste above