Salary Dataset Assignment - Ipynb
Salary Dataset Assignment - Ipynb
"cells": [
{
"cell_type": "markdown",
"id": "d7bbc9d7-7c25-47c7-93f1-28d2537acb2d",
"metadata": {},
"source": [
"# Naive Bayes Assignment"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "73a04236-f548-4aae-8247-6dc70877da11",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import keras\n",
"from nltk.corpus import stopwords\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_extraction.text import TfidfTransformer\n",
"\n",
"from sklearn.naive_bayes import MultinomialNB as MB\n",
"from sklearn.naive_bayes import GaussianNB as GB"
]
},
{
"cell_type": "markdown",
"id": "c4d6d173-4e4d-4c59-bccc-3118c32dd376",
"metadata": {},
"source": [
"## Salary Dataset"
]
},
{
"cell_type": "markdown",
"id": "d36f681f-fa2f-432d-b42f-16fe036eddd0",
"metadata": {},
"source": [
"### Train Dataset:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b8395358-3254-4c46-8f40-89651d3d9a0a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>workclass</th>\n",
" <th>education</th>\n",
" <th>educationno</th>\n",
" <th>maritalstatus</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capitalgain</th>\n",
" <th>capitalloss</th>\n",
" <th>hoursperweek</th>\n",
" <th>native</th>\n",
" <th>Salary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>39</td>\n",
" <td>State-gov</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>2174</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>50</td>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>38</td>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>53</td>\n",
" <td>Private</td>\n",
" <td>11th</td>\n",
" <td>7</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>Private</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>Cuba</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30156</th>\n",
" <td>27</td>\n",
" <td>Private</td>\n",
" <td>Assoc-acdm</td>\n",
" <td>12</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Tech-support</td>\n",
" <td>Wife</td>\n",
" <td>White</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>38</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30157</th>\n",
" <td>40</td>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Machine-op-inspct</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>>50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30158</th>\n",
" <td>58</td>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Widowed</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Unmarried</td>\n",
" <td>White</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30159</th>\n",
" <td>22</td>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Own-child</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>20</td>\n",
" <td>United-States</td>\n",
" <td><=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30160</th>\n",
" <td>52</td>\n",
" <td>Self-emp-inc</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Wife</td>\n",
" <td>White</td>\n",
" <td>Female</td>\n",
" <td>15024</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>>50K</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>30161 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" age workclass education educationno
maritalstatus \\\n",
"0 39 State-gov Bachelors 13 Never-
married \n",
"1 50 Self-emp-not-inc Bachelors 13 Married-civ-
spouse \n",
"2 38 Private HS-grad 9
Divorced \n",
"3 53 Private 11th 7 Married-civ-
spouse \n",
"4 28 Private Bachelors 13 Married-civ-
spouse \n",