import pandas as pd # to import the data and doing data loading, cleaning, manipulation, analysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
Load Required Libraries
Load The Dataset
# Load the Credit Card Fraud Detection dataset
= pd.read_csv('creditcard.csv')
data 3) data.head(
Time | V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | ... | V21 | V22 | V23 | V24 | V25 | V26 | V27 | V28 | Amount | Class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | -1.359807 | -0.072781 | 2.536347 | 1.378155 | -0.338321 | 0.462388 | 0.239599 | 0.098698 | 0.363787 | ... | -0.018307 | 0.277838 | -0.110474 | 0.066928 | 0.128539 | -0.189115 | 0.133558 | -0.021053 | 149.62 | 0 |
1 | 0.0 | 1.191857 | 0.266151 | 0.166480 | 0.448154 | 0.060018 | -0.082361 | -0.078803 | 0.085102 | -0.255425 | ... | -0.225775 | -0.638672 | 0.101288 | -0.339846 | 0.167170 | 0.125895 | -0.008983 | 0.014724 | 2.69 | 0 |
2 | 1.0 | -1.358354 | -1.340163 | 1.773209 | 0.379780 | -0.503198 | 1.800499 | 0.791461 | 0.247676 | -1.514654 | ... | 0.247998 | 0.771679 | 0.909412 | -0.689281 | -0.327642 | -0.139097 | -0.055353 | -0.059752 | 378.66 | 0 |
3 rows × 31 columns
Data Preprocessing
# Split the dataset into features (X) and target variable (y)
= data.drop('Class', axis=1)
X = data['Class']
y
# Data preprocessing
= StandardScaler()
scaler = scaler.fit_transform(X) X_scaled
Dimensionality reduction with Principal component analysis (PCA)
# Apply PCA for dimensionality reduction
= PCA(n_components=10)
pca = pca.fit_transform(X_scaled) X_pca
Split the dataset into training and testing sets
# Split the preprocessed dataset into training and testing sets
= train_test_split(X_pca, y, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test
Finally Apply Classification Machine Learning Algorithms with Support Vector Machine (SVM) and Random Forest
# Create an SVM classifier
= SVC(kernel='linear')
svm
# Train the SVM classifier
svm.fit(X_train, y_train)
# Make predictions on the test set using the SVM classifier
= svm.predict(X_test)
svm_predictions
# Calculate the accuracy of the SVM classifier
= accuracy_score(y_test, svm_predictions)
svm_accuracy print("SVM Accuracy:", svm_accuracy)
# Create a Random Forest classifier
= RandomForestClassifier(n_estimators=100, random_state=42)
rf
# Train the Random Forest classifier
rf.fit(X_train, y_train)
# Make predictions on the test set using the Random Forest classifier
= rf.predict(X_test)
rf_predictions
# Calculate the accuracy of the Random Forest classifier
= accuracy_score(y_test, rf_predictions)
rf_accuracy print("Random Forest Accuracy:", rf_accuracy)
SVM Accuracy: 0.9992802219023208
Random Forest Accuracy: 0.9995259997893332
No matching items