from cchs import*
import matplotlib.pyplot as plt
%matplotlib inline
CCHS = np.genfromtxt('CCHSX.csv', delimiter=',', skip_header=1, dtype=DATA_COLUMNS)
replace_nominal_codes(CCHS, 'biosex', (('1','M'),('2','F')))
replace_missing_with_nan(CCHS, 'alcoweek', (996,))
plt.figure(figsize=(10,4))
plt.hist(CCHS['bmi'],bins=10, color='y')
plt.title('Histogram of bmi')
plt.xlabel('bmi')
plt.ylabel('Frequency')
plt.show()
The distribution is slightly right skewed. The highest frequency is between BMI: 22-26.
CCHS_FEMALE_POP=CCHS[CCHS['biosex']=='F'] #mask for women only
CCHS_MALE_POP=CCHS[CCHS['biosex']=='M'] #mask for men only
CCHS_POP=[CCHS_MALE_POP['bmi'], CCHS_FEMALE_POP['bmi']]
ax=plt.figure(1, figsize=(10,4)).add_subplot(111)
ax.boxplot(CCHS_POP)
ax.set_xticklabels(['Male', 'Female'])
plt.title('Bmi by Gender')
plt.show()
print('Mean bmi for men is:', np.mean(CCHS_MALE_POP['bmi']), 'while Mean bmi for women is:', np.mean(CCHS_FEMALE_POP['bmi']))
print ('The Boxplot shows that the Mean BMI for Men is higher.')
The mean, median, minimum, 1st quartile and 3rd quartile values is higher for men. However, women have a higher maximum and more extreme outliers.
CCHS_FEMALE_NEVERDRINK=CCHS[(CCHS['biosex']=='F') & (CCHS['alcofreq']==1)] #never drinking
CCHS_FEMALE_LOWDRINK=CCHS[(CCHS['biosex']=='F') & (CCHS['alcofreq']>1) & (CCHS['alcofreq']<=3)] #low drinking
CCHS_FEMALE_MIDDRINK=CCHS[(CCHS['biosex']=='F') & (CCHS['alcofreq']>3) & (CCHS['alcofreq']<=5)] #medium drinking
CCHS_FEMALE_HIGHDRINK=CCHS[(CCHS['biosex']=='F') & (CCHS['alcofreq']>5)] #high drinking
CCHS_FEMALE=[CCHS_FEMALE_NEVERDRINK['bmi'],CCHS_FEMALE_LOWDRINK['bmi'], CCHS_FEMALE_MIDDRINK['bmi'], CCHS_FEMALE_HIGHDRINK['bmi']]
ax=plt.figure(1, figsize=(10,6)).add_subplot(111)
ax.boxplot(CCHS_FEMALE)
ax.set_xticklabels(['Never','Low', 'Medium', 'High'])
plt.title('bmi by alcohol consumption frequency for women')
plt.show()
print('Mean bmi for women-never drinking:', np.mean(CCHS_FEMALE_NEVERDRINK['bmi']))
print('Mean bmi for women-low drinking frequency:', np.mean(CCHS_FEMALE_LOWDRINK['bmi']))
print('Mean bmi for women-medium drinking frequency:', np.mean(CCHS_FEMALE_MIDDRINK['bmi']))
print('Mean bmi for women-high drinking frequency:', np.mean(CCHS_FEMALE_HIGHDRINK['bmi']))
For women, the high drinking frequency group have the lowest mean, median, 1st quartile, 3rd quartile and maximum BMI.
CCHS_MALE_NEVERDRINK=CCHS[(CCHS['biosex']=='M') & (CCHS['alcofreq']==1)] #never drinking
CCHS_MALE_LOWDRINK=CCHS[(CCHS['biosex']=='M') & (CCHS['alcofreq']>1) & (CCHS['alcofreq']<=3)] #low drinking
CCHS_MALE_MIDDRINK=CCHS[(CCHS['biosex']=='M') & (CCHS['alcofreq']>3) & (CCHS['alcofreq']<=5)] #medium drinking
CCHS_MALE_HIGHDRINK=CCHS[(CCHS['biosex']=='M') & (CCHS['alcofreq']>5)] #high drinking
CCHS_MALE=[CCHS_MALE_NEVERDRINK['bmi'],CCHS_MALE_LOWDRINK['bmi'], CCHS_MALE_MIDDRINK['bmi'], CCHS_MALE_HIGHDRINK['bmi']]
ax=plt.figure(1, figsize=(10,6)).add_subplot(111)
ax.boxplot(CCHS_MALE)
ax.set_xticklabels(['Never','Low', 'Medium', 'High'])
plt.title('bmi by alcohol consumption frequency for men')
plt.show()
print('Mean bmi for men-never drinking:', np.mean(CCHS_MALE_NEVERDRINK['bmi']))
print('Mean bmi for men-low drinking frequency:', np.mean(CCHS_MALE_LOWDRINK['bmi']))
print('Mean bmi for men-medium drinking frequency:', np.mean(CCHS_MALE_MIDDRINK['bmi']))
print('Mean bmi for men-high drinking frequency:', np.mean(CCHS_MALE_HIGHDRINK['bmi']))
For men, the never drinking group has the lowest mean, median, and 1st quartile BMI
The results obtained in this report demonstrates that Women in all 4 categores (never drinking, low drinking, medium drinking, high drinking) have lower BMI than Men.