Re: Untitled

From Innocent Marten, 4 Months ago, written in Plain Text, viewed 79 times. This paste is a reply to Untitled from Funky Teal - view diff
URL http://codebin.org/view/15568d27 Embed
Download Paste or View Raw
  1. count_users_group = df_clear.pivot_table(index = 'group', values = 'user_id', aggfunc = ['nunique']).reset_index()
  2. count_users_group.columns = ['group', 'n_user']
  3. count_users_group
  4. events_group_users = df_clear.pivot_table(index = 'events', columns = 'group', values = 'user_id',\
  5.                                           aggfunc = 'nunique')\
  6.                              .sort_values(by = 'A1', ascending = False)\
  7.                              .reset_index().drop(4)
  8. events_group_users['count_users'] =  events_group_users['A1'] + events_group_users['A2'] + events_group_users['B']
  9. events_group_users['sum_A1_A2'] = events_group_users['A1'] + events_group_users['A2']
  10. events_group_users['proportionA1'] = events_group_users['A1'] / count_users_group.iloc[0,1] * 100
  11. events_group_users['proportionA2'] = events_group_users['A2'] / count_users_group.iloc[1,1] * 100
  12. events_group_users['proportionB'] = events_group_users['B'] / count_users_group.iloc[2,1] * 100
  13. events_group_users['proportionA1_A2'] = ((events_group_users['A1'] + events_group_users['A2'])\
  14.                                         / (count_users_group.iloc[0,1] + count_users_group.iloc[1,1])) * 100
  15.  
  16. #создадим функцию
  17. def test(successes1, successes2, trials1, trials2):  
  18.     alpha = .05
  19.     p1 = successes1/ trials1
  20.     p2 = successes2 / trials2
  21.     p_combined = (successes1 + successes2) / (trials1 + trials2)
  22.     difference = p1 - p2
  23.    
  24.     z_value = difference / mth.sqrt(p_combined * (1 - p_combined) * (1/trials1 + 1/trials2))
  25.     distr = stats.norm(0, 1)
  26.     p_value = (1 - distr.cdf(abs(z_value))) * 2
  27.    
  28.     print('Событие:', event_name)
  29.     print('p-значение: ', p_value)
  30.  
  31.     if p_value < alpha:
  32.         print('Отвергаем нулевую гипотезу: между долями есть значимая разница')
  33.     else:
  34.         print('Не получилось отвергнуть нулевую гипотезу, нет оснований считать доли разными')
  35.  
  36. # теперь проверим группы A1 и A2 по всем событиям:
  37. #for index, row in events_group_users.iterrows():
  38. #    test(row["A1"], row["A2"], count_users_group['n_user'][0],count_users_group['n_user'][1])
  39. #    print()
  40. events_group_users['events'] = events_group_users.apply(lambda row: test(row['A1'], row['A2'], count_users_group['n_user'][0], count_users_group['n_user'][1]), axis=1)

Reply to "Re: Untitled"

Here you can reply to the paste above