Author: Haleema Atobiloye.

## Research Working Title: Sociological Indicators, Drinking Behaviors, and Vaping Habits of OU Undergrads
#### Author: 
  #          Haleema B. Atobiloye
#### Class: SOC 5500 - Data Analysis
#### Project start date: Feb 28, 2022 
#### Project status: Data analysis complete
#### Project completion date: Apr 22, 2022 
#### Programming: R (R-Studio) 
##--------------------------------------------------------- ##

##--------------------------------------------------------- ##
##            WORKING DIRECTORY 
##--------------------------------------------------------- ##
setwd("~/Downloads/MA IDS - Semester 4 - Final Semester iA!/SOC 5500")

##--------------------------------------------------------- ##
##            CLEAR ENVIRONMENT 
##--------------------------------------------------------- ##
rm(list=ls(all=TRUE))

##--------------------------------------------------------- ##
##            PACKAGES/ LIBRARIES NEEDED 
##--------------------------------------------------------- ##
library(psych)
library(tidyverse)
library(googlesheets4)
library(janitor)
library(QCA)

##--------------------------------------------------------- ##
##            LOAD GOOGLE SHEETS DATA 
##--------------------------------------------------------- ##
dd<- read_sheet("<https://docs.google.com/spreadsheets/d/1oPtTdl7LeicP8TRAx7WTAt7nqmxC4zxIzN-6hJD6SiE/edit?usp=sharing>",
                col_names = TRUE)

##--------------------------------------------------------- ##
##                       RECODES 
##--------------------------------------------------------- ##

##------------------------------- ##
#### GROUP 1 - Demography
dd$ou.sample  #### NO RECODE NEEDED, binary indicators ####

dd$age
dd$age.r <- recode(dd$age, 
                   "10=0; else=copy")

dd$sex.category
dd$female <- recode(dd$sex.category, 
                    "2=1; 
                     1=0; 
                     3:4=0; 
                     else=NA")

dd$masculinity <- recode(dd$sex.category, 
                         "2=1; 
                          3:4=2;                      
                          1 =3;
                          else=NA")

##------------------------------- ##
#### GROUP 2 - Early Context
dd$ses  #### NO RECODE NEEDED

dd$pol.lean.pa  #### NO RECODE NEEDED

dd$start.soc.media
dd$start.soc.media.r <- recode(dd$start.soc.media, 
                               "1=0; 
                               6=1;
                               5=2;
                               4=3;
                               3=4;
                               2=5")

##------------------------------- ##
#### GROUP 3 - Current Social Context 
#### NO RECODE NEEDED FOR dd$type.school VARIABLES, binary indicators ####
dd$type.coll.state
dd$type.coll.private
dd$type.coll.community
dd$type.coll.relig

##### index of college type
dd$type.coll <-(
  dd$type.coll.state+
    dd$type.coll.private+
    dd$type.coll.community+
    dd$type.coll.relig)

dd$soc.circle.drinking  
dd$soc.circle.drinking.r <-dd$soc.circle.drinking-1

dd$read.insta.r <-dd$read.insta-1
dd$read.snap.r <-dd$read.snap-1
dd$read.twitter.r <-dd$read.twitter-1
dd$read.fb.r <-dd$read.fb-1
dd$read.reddit.r <-dd$read.reddit-1
dd$read.breit.r <-dd$read.breit-1
dd$read.tinder.r <-dd$read.tinder-1
dd$read.group.whats.app.r <-dd$read.group.whats.app-1

dd$write.insta.r <-dd$write.insta-1
dd$write.snap.r <-dd$write.snap-1
dd$write.twitter.r <-dd$write.twitter-1
dd$write.fb.r <-dd$write.fb-1
dd$write.reddit.r <-dd$write.reddit-1
dd$write.breit.r <-dd$write.breit-1
dd$write.tinder.r <-dd$write.tinder-1
dd$write.group.whats.app.r <-dd$write.group.whats.app-1

#### index of social media use
dd$sm.use <- (
  dd$read.insta.r+
    dd$read.snap.r+
    dd$read.twitter.r+ 
    dd$read.fb.r+
    dd$read.reddit.r+
    dd$read.breit.r+
    dd$read.tinder.r+
    dd$read.group.whats.app.r+
    dd$write.insta.r+
    dd$write.snap.r+
    dd$write.twitter.r+
    dd$write.fb.r+
    dd$write.reddit.r+
    dd$write.breit.r+
    dd$write.tinder.r+
    dd$write.group.whats.app.r)

##------------------------------- ##
#### GROUP 4 - Key Causal Variables
dd$in.relationship
dd$in.relationship.r <-dd$in.relationship-1

#### NO RECODE NEEDED FOR search for relationship VARIABLES, binary indicators
dd$rel.not.looking
dd$rel.casual
dd$rel.dating
dd$rel.potentil.long.t
dd$rel.serious
dd$rel.open

#### index of relationship search status
dd$rel.search.status <- (
  dd$rel.not.looking+
    dd$rel.casual+
    dd$rel.dating+
    dd$rel.potentil.long.t+
    dd$rel.serious+
    dd$rel.open)

dd$col.acad.demanding.r <- dd$col.acad.demanding-1
dd$col.sport.crazy.r <- dd$col.sport.crazy-1
dd$col.party.school.r <- dd$col.party.school-1

#### index of college intensity level
dd$col.intense.lvl <- (
  dd$col.acad.demanding.r+
    dd$col.sport.crazy.r+
    dd$col.party.school.r)

##------------------------------- ##
#### DEPENDENT VARIABLES
dd$amount.drink.drunk
dd$amount.drink.drunk.r <-dd$amount.drink.drunk-1

dd$vape.how.often.college
dd$vape.how.often.college.r <-dd$vape.how.often.college-1

#########------------------------------------------#################
##### GROUPINGS FOR OUR DESCRIPTIVE STATS ANALYSIS
dgroup.early.contxt <- cbind(dd$ses,dd$pol.lean.pa,dd$start.soc.media.r)
group.cur.soc.contxt <- cbind(dd$type.coll,dd$soc.circle.drinking.r,dd$sm.use)
group.key.causal.varbls <- cbind(dd$in.relationship.r,dd$rel.search.status,dd$col.intense.lvl)

######################################################################
##--------------------------------------------------------- ##
##                       BASIC ANALYSIS 
##--------------------------------------------------------- ##

####### GROUP 1 - Demography
summary(dd$age.r)
table(dd$age.r)

summary(dd$masculinity)
table(dd$masculinity)

summary(dd$female)
table(dd$female)

summary(dd$start.soc.media.r)
table(dd$start.soc.media.r)

####### GROUP 2 - Early Context
summary(dd$soc.circle.drinking.r)
table(dd$soc.circle.drinking.r)

###### GROUP 3 - Current Social Context 
summary(dd$soc.circle.drinking.r)
table(dd$soc.circle.drinking.r)

summary(dd$sm.use)
table(dd$sm.use)

summary(dd$type.coll)
table(dd$type.coll)

###### GROUP 4 - Key Causal Variables
summary(dd$in.relationship.r)
table(dd$in.relationship.r)

summary(dd$rel.search.status)
table(dd$rel.search.status)

summary(dd$col.acad.demanding.r)
table(dd$col.acad.demanding.r)

summary(dd$col.sport.crazy.r)
table(dd$col.sport.crazy.r)

summary(dd$col.party.school.r)
table(dd$col.party.school.r)

summary(dd$col.intense.lvl)
table(dd$col.intense.lvl)

##### DEPENDENT VARIABLES
summary(dd$amount.drink.drunk.r)
table(dd$amount.drink.drunk.r)

summary(dd$vape.how.often.college.r)
table(dd$vape.how.often.college.r)

#### CHARTS
pairs.panels(group.demography)
pairs.panels(group.early.contxt)
pairs.panels(group.cur.soc.contxt)
pairs.panels(group.key.causal.varbls)

###### CORRELATIONS ####
cor(group.demography,dd$amount.drink.drunk.r)
cor(group.demography,dd$vape.how.often.college.r)

cor(group.early.contxt,dd$amount.drink.drunk.r)
cor(group.early.contxt,dd$vape.how.often.college.r)

cor(group.cur.soc.contxt,dd$amount.drink.drunk.r)
cor(group.cur.soc.contxt,dd$amount.drink.drunk.r)

cor(group.key.causal.varbls,dd$vape.how.often.college.r)
cor(group.key.causal.varbls,dd$vape.how.often.college.r)

##--------------------------------------------------------- ##
##                LINEAR REGRESSION 
##--------------------------------------------------------- ##
attach(dd)
model.vars <- data.frame(amount.drink.drunk, vape.how.often.college,
                         ou.sample,
                         age.r,
                         female,
                         masculinity,
                         race,
                         ses,
                         pol.lean.pa,
                         start.soc.media.r,
                         soc.circle.drinking.r,
                         sm.use,
                         type.coll,
                         in.relationship.r,
                         rel.search.status,
                         col.intense.lvl)

##### Dependent Variable 1: Drinking frequency:
summary(model1<-lm(amount.drink.drunk ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity))

summary(model2<-lm(amount.drink.drunk ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r))

summary(model3<-lm(amount.drink.drunk ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r+
                     soc.circle.drinking.r+
                     sm.use+
                     type.coll))

summary(model4<-lm(amount.drink.drunk ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r+
                     soc.circle.drinking.r+
                     sm.use+
                     type.coll+
                     in.relationship.r+
                     rel.search.status+
                     col.intense.lvl))

###### Exporting Dependent Variable 1 Regression Output
capture.output(summary(model1),summary(model2),
               summary(model3),summary(model4),file = "modelvarsdv1.txt")

##### Dependent Variable 2: Vaping frequency:
summary(model5<-lm(vape.how.often.college ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity))

summary(model6<-lm(vape.how.often.college ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r))

summary(model7<-lm(vape.how.often.college ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r+
                     soc.circle.drinking.r+
                     sm.use+
                     type.coll))

summary(model8<-lm(vape.how.often.college ~
                     ou.sample+
                     age.r+
                     female+
                     masculinity+
                     ses+
                     pol.lean.pa+
                     start.soc.media.r+
                     soc.circle.drinking.r+
                     sm.use+
                     type.coll+
                     in.relationship.r+
                     rel.search.status+
                     col.intense.lvl))

###### Exporting Dependent Variable 2 Regression Output
capture.output(summary(model5),summary(model6),
               summary(model7),summary(model8),file = "modelvarsdv2.txt")

##--------------------------------------------------------- ##
##                CORRELATION MATRIX 
##--------------------------------------------------------- ##
group1 <- data.frame(amount.drink.drunk, 
                     vape.how.often.college,
                     ou.sample,
                     age.r,
                     female,
                     masculinity,
                     ses,
                     pol.lean.pa,
                     start.soc.media.r,
                     soc.circle.drinking.r,
                     sm.use,
                     type.coll,
                     in.relationship.r,
                     rel.search.status,
                     col.intense.lvl)

round(cor(group1), digits = 4)

round(sapply(group1, min), digits = 4) 
round(sapply(group1, mean) , digits = 4) 
round(sapply(group1, max) , digits = 4) 
round(sapply(group1, sd) , digits = 4) 

##--------------------------------------------------------- ##
##              VARIABLES USED FOR ANALYSIS 
##--------------------------------------------------------- ##
dd$ou.sample
dd$age.r
dd$female
dd$masculinity
dd$ses
dd$pol.lean.pa
dd$start.soc.media.r
dd$type.coll
dd$soc.circle.drinking.r
dd$sm.use
dd$in.relationship.r
dd$rel.search.status
dd$col.intense.lvl
dd$amount.drink.drunk.r
dd$vape.how.often.college.r