R Orientation
rep(1, 100)
my_object = seq(from=0, to=50, by=2)
plot(my_object)
getwd()
setwd() #use forward slashes even in windows
install.packages("readxl")
library(readxl)
read_excel("data.xlsx")
read.csv("data.csv")
install.packages("foreign")
library(foreign)
read.dta("data.dta")
install.packages("haven")
library(haven)
read_sav("data.sav")
?sample
sample(c(0,1), 100, replace=TRUE)
Import Data
read.csv("data.csv", header=TRUE) # consider there is a header
sample_csv = read.table("data.csv", header=TRUE, sep=",")
Assess Data
summary(sample_csv)
nrow(sample_csv)
max(sample_csv)
min(sample_csv)
mean(sample_csv)
median(sample_csv)
unique(sample_csv$E)
sort(sample_csv$E)
length(sample_csv$E)
which(sample_csv$E=0)
length(sample_csv$E=0)
is.matrix(sample_csv)
is.data.frame(sample_csv)
Manipulate Data
as.matrix(sample_csv) # convert data-frame into a matrix
matrix(1, 2, 3)
as.data.frame(a) # convert a matrix into data-frame
t(a) #transpose
a[1,] # call first row
a[-1,] # delete first row
rbind(a, c(2, 2, 2)) # add a row
cbind(a, c(2, 3)) # add a column
Sampling, for-loops, and the apply function
states = c("Cairo", "New York", "Nairobi")
sample(x=states, size=10, replace=TRUE)
sample(x=states, size=2, replace=FALSE)
rnorm(n=100)
plot(density(rnorm(n=100000)))
coin = <- c("Heads", "Tails")
toss = <- c()
for (i in 1:100) {
toss[i] <- sample(x=coin, size=1)
}
table(toss)
marital <- c("married", "single")
income <- 1:4
results <- matrix(nrow=100, ncol=3, data=NA)
columns(results) <- c("marital", "income", "state")
head(results)
for (i in 1:100) {
results[i, 1] <- sample(marital, size=1)
results[i, 2] <- sample(income, size=1)
results[i, 3] <- sample(state, size=1)
}
head(results)
apply(X=results, MARGIN=2, FUN=table)
Introduction to the Class lm
height <- c(1, 2, 3)
bodymass <- c(5, 6, 7)
fit <- lm(bodymass ~ height) # Y - X
class(fit)
summary(fit)
names(fit)
plot(height, bodymass) # X - Y
abline(fit, col="red")
Run IV regression using ivreg (from AER package)
# Load the AER package
library(AER)
# General IV regression syntax:
iv_model <- ivreg(
formula = dependent_var ~ endogenous_var + control_var1 + control_var2 + ... |
instrument_var + control_var1 + control_var2 + ...,
data = your_data_frame
)
# Summary of the model
summary(iv_model)
dependent_var ~ endogenous_var + controls | instruments + controls
# It converts a numeric or character vector into a factor, which is R's way of handling categorical variables
# (discrete groups, not continuous numbers)
x <- c(1980, 1981, 1982, 1980, 1981)
y <- as.factor(x)
[1] 1980 1981 1982 1980 1981
Levels: 1980 1981 1982
Comments
Post a Comment