CONTENT
- WAP to create a vector and perform task on that in R:
- WAP to create a list and perform following task on that in R:
- WAP to create a 4X4 matrix and perform following task on that in R:
- WAP to create an array and perform following task on that in R:
- WAP to create a data frame and perform following task on that in R:
- WAP to calculate the summary of any dataset in R.
- WAP to create factors and perform all possible tasks on factors in R.
- WAP to calculate statistical measures (mean, median, min and max) in R.
- WAP to import a data set from the internet directly or through a file in R.
- WAP to find the outliers in R.
- WAP to implement data visualization using Histogram and Box plot in R.
- WAP to implement the data manipulation with dplyr package in R.
- WAP to implement the data visualization with ggplot2.
- WAP to develop an application to perform classification using a decision tree in R.
- WAP to perform classification using KNN in R.
- WAP to perform clustering using K-means in R.
- WAP to fill missing values in a dataset using mean in R.
- WAP to extract association rules using Apriori Algorithm in R.
- WAP to develop an application to design star schemas from fact tables in R.
1.
Will be written soon...
2.
Will be written soon...
3.
Will be written soon...
4.
Will be written soon...
5.
Will be written soon...
6.
Will be written soon...
7.
Will be written soon...
8.A: Write a program to create a vector and combines numerical values, numerical values in a sequence.
Will be written soon...
8.B: Write a program to create a vector and find length of vector, sort a vector, access a perticular index value in vector.
Will be written soon...
9.A: Write a program to create a list and access lists, change item value, list length, check if item exists.
for_print <- function(l){
for (i in l){
cat(i, " ")
}
}
lst = list(1,2,3,4,5,6)
cat("list : ")
for_print(lst)
cat("\nAfter changing lst[2] = 10\nList : ")
lst[2]=10
for_print(lst)
cat("\nList length : ", length(lst))
n = as.integer(readline(prompt = "Enter a list element : "))
if(n %in% lst){
cat("Element is present in list")
}else{
cat("Element is not present in list")
}
9.B: Write a program to create a list and add list items, remove list items.
for_print <- function(l){
for (i in l){
cat(i, " ")
}
}
lst = list(1,2,3,4,5,6)
cat("list : ")
for_print(lst)
cat("\nAfter adding a item in the list : ")
lst = append(lst, 20)
for_print(lst)
cat("\nAfter deleting an item from the list : ")
lst = lst[-length(lst)]
for_print(lst)
9.C: Write a program to create a list and range of indexes, loop through a list, join two lists.
for_print <- function(l){
for (i in l){
cat(i, " ")
}
}
lst = list(1,2,3,4,5,6)
cat("list : ")
for_print(lst)
cat("\nList index ranges : 1 to",length(lst))
cat("\nLoop through a list : ")
for_print(lst)
cat("\nAfter adding another list in above list : ")
lst2 = list(20,21,22)
lst = append(lst,lst2)
for_print(lst)
10.A: Write a program to create a 4X4 matrix and access matrix items, access more than one row and for access more than one column.
Will be Written soon..
10.B: Write a program to create a 4X4 matrix and add rows and columns, remove rows and columns.
Will be written soon...
10.C: Write a program to create a 4X4 matrix and check if an item exists, amount of rows and columns, matrix length.
x = matrix(c(1:9), nrow = 3)
print(x)
n = as.integer(readline(prompt = "Enter any matrix element : "))
if(n %in% x){
cat(n,"is present in matrix")
}else{
cat(n,"is not present in matrix")
}
cat("\nRows in matrix :",nrow(x))
cat("\nColumns in matrix :",ncol(x))
cat("\nMatrix length :",length(x))
10.D: Write a program to create a 4X4 matrix and loop through a matrix, combine two matrices.
x = matrix(c(1:9), nrow = 3)
cat("Display matrix using for loop :\n")
for(i in 1:nrow(x)){
for(j in 1:ncol(x)){
cat(x[i,j]," ")
}
cat("\n")
}
cat("\nDisplay matrix using while loop :\n")
i=1
while(i <= nrow(x)){
j=1
while(j <= ncol(x)){
cat(x[i,j]," ")
j=j+1
}
i=i+1
cat("\n")
}
y = matrix(c(10:18), nrow = 3)
cat("\nCombining X and Y matrix :\n")
print(cbind(x,y))
11.A: Write a program to create an array and access array items, check if an item exists.
x <- array(c(c(1:3),c(4:9)),dim = c(3,3))
cat("Array X :\n")
print(x)
n = as.integer(readline(prompt = "Enter array element : "))
if(n %in% x){
cat(n,"is present in array")
}else{
cat(n,"is not present in array")
}
cat("\nElement at 2nd row and 2 column in array :",x[2,2])
11.B: Write a program to create an array and amount of rows and columns, array length, loop through an array.
x <- array(c(c(1:3),c(4:9)),dim = c(3,3))
cat("Display array using loop :\n")
for(i in 1:nrow(x)){
for(j in 1:ncol(x)){
cat(x[i,j]," ")
}
cat("\n")
}
cat("\nTotal rows :",nrow(x),"and total columns :",ncol(x))
cat("\nLength of array :",length(x))
12.A: Write a program to create a data frame and summarize the data, access items, add rows, add columns.
students <- data.frame(Name = c("Parvej Ali","Rihan Ali","Abc"),
Enrollment = c(56,56,100),
Program = c("B.Tech","B.Tech","B.Tech"))
cat("Summary of dataframe :\n")
print(summary(students))
cat("\nNames column from dataframe :\n")
print(students$Name)
cat("\nAdd one more column :\n")
students$Semester <- c(6,7,5)
print(students)
cat("\nAdd one more row :\n")
students <- rbind(students,c("Def",101,"B.Tech",4))
print(students)
12.B: Write a program to create a data frame and remove rows and columns, amount of rows and columns.
students <- data.frame(Name = c("Parvej Ali","Rihan Ali","Abc"),
Enrollment = c(56,56,100),
Program = c("B.Tech","B.Tech","B.Tech"))
print(students)
cat("\nRemoving 3rd row and 3rd column from dataframe :\n")
students <- students[-3,-3]
print(students)
cat("\nTotal rows :",nrow(students),"and total columns :",ncol(students))
12.C: Write a program to create a data frame and data frame length, combining data frames.
students <- data.frame(Name = c("Parvej Ali","Rihan Ali","Abc"),
Enrollment = c(56,56,100),
Program = c("B.Tech","B.Tech","B.Tech"))
print(students)
cat("\nDataframe length :",length(students))
cat("\n\nCombine two dataframes :\n")
students2 <- data.frame(Enrollment = c(56,101,80),
Semester = c(6,7,5))
students <- merge(x = students, y = students2, by = "Enrollment", all = TRUE)
print(students)
13. Write a program to calculate the summary of any dataset.
x = datasets::attitude
print(head(x))
print(summary(x))
14. Write a program to create factors and perform all possible tasks on factors.
v = c(1,2,3,4,3,2,4,5,5)
cat("Original vector.\n")
cat(v,"\nIs this a factor :",is.factor(v))
cat("\n\nAfter making it factor :\n")
v = factor(v)
cat(v,"\nIs this a factor :",is.factor(v))
cat("\nLevels of factor of the said vector.\n")
cat(levels(factor(v)),"\n\n")
str(v)
cat("\nAfter adding one more level :\n")
levels(v) = c(levels(v),0)
str(v)
15. WAP to calculate statistical measures (mean, median, min and max) in R.
x <- c(1.4, 5.66, 7.13, 9.21)
cat("Max :", max(x))
cat("\nMin :", min(x))
cat("\nMean :", mean(x))
cat("\nMedian :", median(x))
16. WAP to import a data set from the internet directly or through a file in R.
cat("Student data which is stored offline...\n")
setwd("C:\\Users\\Parvej Ali\\Documents")
student <- read.csv("Student_data.csv")
print(head(student,4))
#cat("\n\nNational labout force projection data which is online...\n")
#df <- read.csv("https://www.stats.govt.nz/assets/Uploads/National-labour-force-projections/National-labour-force-projections-2020base2073/Download-data/National-labour-force-projections-2020base-2073.csv.csv")
#print(head(df))
17. WAP to find the outliers in R.
setwd("C:\\Users\\Parvej Ali\\Documents")
df <- read.csv("Raw_Housing_Prices.csv")
OutVals = boxplot(df$Sale.Price)$out
print(OutVals)
setwd("D:\\Parvej Ali\\B.Tech(C.S)\\Data Warehousing & Data Mining With R - Programming\\Assignment 3")
df <- read.csv("abc.csv")
ov <- plot(df$House_Price, df$Area)
print(ov)
18. WAP to implement data visualization using Histogram and Box plot in R.
setwd("C:\\Users\\Parvej Ali\\Documents")
df <- read.csv("Raw_Housing_Prices.csv")
par(mfrow=c(1,2))
boxplot(df$Sale.Price)
hist(df$Sale.Price)
19. WAP to implement the data manipulation with dplyr package in R.
library("dplyr")
data("iris")
cat("Summary of iris :\n")
print(summary(iris))
cat("\nSample_n of iris :\n")
print(sample_n(iris, 3))
index <- sample(1:nrow(iris), 3)
cat("\nRandom Sample data from iris:\n")
print(iris[index,])
cat("\nFrequency table for species:\n")
print(table(iris$Species))
cat("\nFilter data by species = setosa:\n")
print(head(filter(iris, Species == "setosa"), 3))
20. WAP to implement the data visualization with ggplot2.
library(ggplot2)
library(dplyr)
print(ggplot(data = mtcars, aes(x = hp, y = mpg, col = factor(cyl))) + geom_point()
+ stat_smooth(method = lm, col = "red"))
21. WAP to develop an application to perform classification using a decision tree in R.
library(datasets)
library(caTools)
library(party)
library(dplyr)
library(magrittr)
data("readingSkills")
print(head(readingSkills))
sample_data = sample.split(readingSkills, SplitRatio = 0.8)
train_data <- subset(readingSkills, sample_data == TRUE)
test_data <- subset(readingSkills, sample_data == FALSE)
model<- ctree(nativeSpeaker ~ ., train_data)
plot(model)
22. WAP to perform classification using KNN in R.
library(e1071)
library(caTools)
library(class)
data(iris)
cat("# Iris dataset :\n\n")
print(head(iris))
split <- sample.split(iris, SplitRatio = 0.7)
train_cl <- subset(iris, split == "TRUE")
test_cl <- subset(iris, split == "FALSE")
train_scale <- scale(train_cl[, 1:4])
test_scale <- scale(test_cl[, 1:4])
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 1)
cm <- table(test_cl$Species, classifier_knn)
cat("\n# Confusiin Matrix :\n\n")
print(cm)
cat("\n\n# Accuracy :\n\n")
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 1) =', 1-misClassError))
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 3)
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 3) =', 1-misClassError))
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 5)
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 5) =', 1-misClassError))
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 7)
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 7) =', 1-misClassError))
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 15)
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 15) =', 1-misClassError))
classifier_knn <- knn(train = train_scale,
test = test_scale,
cl = train_cl$Species,
k = 19)
misClassError <- mean(classifier_knn != test_cl$Species)
print(paste('Accuracy (K = 19) =', 1-misClassError))
23. WAP to perform clustering using K-means in R.
library(ClusterR)
library(cluster)
data(iris)
iris_1 <- iris[, -5]
set.seed(240)
kmeans.re <- kmeans(iris_1, centers = 3, nstart = 20)
par(mfrow=c(1,2))
cm <- table(iris$Species, kmeans.re$cluster)
cat("Confusion Matrix :\n")
print(cm)
plot(iris_1[c("Sepal.Length", "Sepal.Width")],
col = kmeans.re$cluster,
main = "K-means with 3 clusters")
points(kmeans.re$centers[, c("Sepal.Length", "Sepal.Width")],
col = 1:3, pch = 8, cex = 3)
y_kmeans <- kmeans.re$cluster
clusplot(iris_1[, c("Sepal.Length", "Sepal.Width")],
y_kmeans,
lines = 0,
shade = TRUE,
color = TRUE,
labels = 2,
plotchar = FALSE,
span = TRUE,
main = paste("Cluster iris"),
xlab = 'Sepal.Length',
ylab = 'Sepal.Width')
24. WAP to fill missing values in a dataset using mean in R.
data <- data.frame(marks1 = c(NA, 22, NA, 49, 75),
marks2 = c(81, 14, NA, 61, 12),
marks3 = c(78.5, 19.325, NA, 28, 48.002))
cat("# Dataframe with missing values :\n\n")
print(data)
mean_val <- colMeans(data,na.rm = TRUE)
for(i in colnames(data))
data[,i][is.na(data[,i])] <- mean_val[i]
cat("\n\n# Dataframe without missing values :\n\n")
print(data)
25. WAP to extract association rules using Apriori Algorithm in R.
library(arules)
library(RColorBrewer)
data("Groceries")
rules <- apriori(Groceries,
parameter = list(supp = 0.01, conf = 0.2))
cat("\n\nRules :\n\n")
print(inspect(rules[1:10]))
arules::itemFrequencyPlot(Groceries, topN = 20,
col = brewer.pal(8, 'Pastel2'),
main = 'Relative Item Frequency Plot',
type = "relative",
ylab = "Item Frequency (Relative)")
26. WAP to develop an application to design star schemas from fact tables in R.