End to End Projects - Credit Card Fraud Detection with SVM and RF - అజ్ఞాత క్రెడిట్ కార్డ్ లావాదేవీలు మోసపూరితమైనవ లేదా నిజమైనవ ముందుగానే నిర్ణయించడం

Load Required Libraries

import pandas as pd # to import the data and doing data loading, cleaning, manipulation, analysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

Load The Dataset

# Load the Credit Card Fraud Detection dataset
data = pd.read_csv('creditcard.csv')
data.head(3)

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	...	V21	V22	V23	V24	V25	V26	V27	V28	Amount
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	...	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	...	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	...	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66

3 rows × 31 columns

Data Preprocessing

# Split the dataset into features (X) and target variable (y)
X = data.drop('Class', axis=1)
y = data['Class']

# Data preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Dimensionality reduction with Principal component analysis (PCA)

# Apply PCA for dimensionality reduction
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_scaled)

Split the dataset into training and testing sets

# Split the preprocessed dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

Finally Apply Classification Machine Learning Algorithms with Support Vector Machine (SVM) and Random Forest

# Create an SVM classifier
svm = SVC(kernel='linear')

# Train the SVM classifier
svm.fit(X_train, y_train)

# Make predictions on the test set using the SVM classifier
svm_predictions = svm.predict(X_test)

# Calculate the accuracy of the SVM classifier
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest classifier
rf.fit(X_train, y_train)

# Make predictions on the test set using the Random Forest classifier
rf_predictions = rf.predict(X_test)

# Calculate the accuracy of the Random Forest classifier
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)

SVM Accuracy: 0.9992802219023208
Random Forest Accuracy: 0.9995259997893332