One-Hot Encoding with Python
In [1]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
In [16]:
cat_list = ['cat1', 'cat2', 'cat3']
value = np.random.randint(1,100,size = 20)
cat_value = np.random.choice(cat_list, size = 20)
In [18]:
df = pd.DataFrame({'value':value,'cat_value':cat_value})
print(df.head())
value cat_value 0 74 cat1 1 47 cat3 2 73 cat2 3 79 cat1 4 57 cat1
In [27]:
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
encoder = OneHotEncoder(sparse_output=False)
x_encoded = encoder.fit_transform(df[categorical_columns])
x_encoded_df = pd.DataFrame(x_encoded, columns= encoder.get_feature_names_out(categorical_columns))
df_encoded = pd.concat([df,x_encoded_df],axis = 1)
df_encoded = df_encoded.drop(categorical_columns,axis = 1)
print(df_encoded.head())
value cat_value_cat1 cat_value_cat2 cat_value_cat3 cat_value_cat4 0 74 1.0 0.0 0.0 0.0 1 47 0.0 0.0 1.0 0.0 2 73 0.0 1.0 0.0 0.0 3 79 1.0 0.0 0.0 0.0 4 57 1.0 0.0 0.0 0.0