- features = ['lead_time',
- 'arrival_date_month',
- 'stays_in_weekend_nights',
- 'adults',
- 'has_children',
- 'has_babies',
- 'meal',
- 'country',
- 'distribution_channel',
- 'is_repeated_guest',
- 'previous_cancellations',
- 'previous_bookings_not_canceled',
- 'reserved_room_type',
- 'booking_changes',
- 'days_in_waiting_list',
- 'customer_type',
- 'need_car_parking_space',
- 'total_of_special_requests',
- 'total_nights']
- cat_features = ['arrival_date_month',
- 'stays_in_weekend_nights',
- 'meal',
- 'country',
- 'distribution_channel',
- 'is_repeated_guest',
- 'previous_cancellations',
- 'previous_bookings_not_canceled',
- 'reserved_room_type',
- 'booking_changes',
- 'customer_type',
- 'need_car_parking_space',
- 'total_of_special_requests',
- 'total_nights']
- target = 'is_canceled'
- categorical_columns = df_train[features].select_dtypes(include='object').columns.to_list()
- encoder = OneHotEncoder(handle_unknown='ignore')
- encoder.fit(df_train[categorical_columns])
- df_train_categorical = pd.DataFrame(encoder.transform(df_train[categorical_columns]).toarray())
- df_test_categorical = pd.DataFrame(encoder.transform(df_test[categorical_columns]).toarray())
- df_train_categorical.columns = encoder.get_feature_names_out()
- df_test_categorical.columns = encoder.get_feature_names_out()
- df_for_learn = pd.concat([df_train[features].select_dtypes(exclude='object').reset_index(drop=True), df_train_categorical.reset_index(drop=True)], axis=1)
- df_for_test = pd.concat([df_test[features].select_dtypes(exclude='object').reset_index(drop=True), df_test_categorical.reset_index(drop=True)], axis=1)