- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- data = pd.read_csv('/datasets/flights.csv')
- # < преобразуйте данные так, чтобы избежать дамми-ловушки >
- data_ohe = pd.get_dummies(data, drop_first=True)
- # < поделим данные >
- features = data_ohe.drop(['Arrival Delay'] , axis=1)
- target = data_ohe['Arrival Delay']
- features_train, features_valid, target_train, target_valid = train_test_split(
- features, target, test_size=0.25, random_state=12345)
- numeric = ['Day', 'Day Of Week', 'Origin Airport Delay Rate',
- 'Destination Airport Delay Rate', 'Scheduled Time', 'Distance',
- 'Scheduled Departure Hour', 'Scheduled Departure Minute']
- scaler = StandardScaler()
- scaler.fit(features_train[numeric])
- # < преобразуйте тренировочную выборку >
- features_train[numeric] = scaler.transform(features_train[numeric])
- # < преобразуйте валидационную выборку >
- features_valid[numeric] = scaler.transform(features_valid[numeric])
- print(features_train.shape)
- print(features_valid.shape)
Re: Untitled
From Ivory Duck, 11 Months ago, written in Plain Text, viewed 188 times.
This paste is a reply to Untitled from Little Finch
- view diff
URL http://codebin.org/view/51071481
Embed
Download Paste or View Raw
— Expand Paste to full width of browser