There are multiple tools to get started with R, I have explored with Anaconda as that gives me flexibility of using Python in the same IDE. Within Anaconda you can either install R studio or use Jupyter notebook.
Once you have installed Anaconda, go to command prompt and create a new environment
conda env create -f requirements/my-environment.yml
After that activate the environment
source activate my-environment
OR
create a notebook
jupyter notebook test_R_notebook.ipynb
Once you have your R up and running, either in R studio or Jupyter notebook, here are a few basic commands to get started.
# Read file mydata<-read.csv("path/filename.csv") or mydata<-read.csv("path/filename.csv", header=TRUE) # Print data mydata # converting a text data to integers employeeDataNum$department<-as.numeric(employeeDataNum$department) # remove NA values mydata<-na.omit(mydata) mydata # Replace NA with average mydata$column[is.na(mydata$column)] <- round(mean(mydata$column, na.rm = TRUE)) # Plot bars for items data_subset<-mydata[c(7,8:20)] data_subset<-ifelse(data_subset=='yes', 1,0) barplot(data_subset) # plot for boxlot boxplot(data$column) Check your library paths Sys.getenv("R_LIBS_USER") #Install a package install.packages("AER") #with dependencies install.packages("AER", dependencies=TRUE) #include a library library(dplyr) # Getting specific columns datanew<-mydata[,c(7,8,9,10)] Divide data set into training and test sets set.seed(4) inTraing<-sample(2,nrow(mydata),prob=c(0.7,0.3),replace=T) trainset<-mydata[inTraing==1,] testset<-mydata[inTraing==2,] # Applying alog on training data linermodel<-lm(trainset$Other_players~.,data = trainset) linermodel # Predict for test ddata predict<-predict(linermodel,testset) # plot testsubset<-testset[1:100,] plot(testsubset$Other_players[1:100], type="l") lines(predict[1:100],col="red") # Finding correlation among columns correlation <- cor(mydata) install.packages('corrplot', dependencies=TRUE) library(corrplot) corrplot(correlation,type='lower') # Subsetting data based on some conditiom employee_left<-subset(employeeData, left==1) employee_left # More plotting plot(employeeData$salary) hist(employeeData$last_evaluation) # Summary summary(employeeData) # creating decision tree library(rpart) my_tree<-rpart(formula = formulacolumn ~ .,data=traindata) plot(my_tree, margin=0.1) text(my_tree,pretty=T,cex=0.7) # Confusion matrix predtree<-predict(my_tree,testdata,type="class") install.packages('e1071', dependencies=TRUE) library(caret) confusionMatrix(table(predtree,testdata$leftlibrary(randomForest))) # using random forest for analysis library(randomForest) employee_forest<-randomForest(left~.,data=traindata) predforest<-predict(employee_forest,testdata,type="class") confusionMatrix(table(predforest,testdata$left)) # using naive bayes library(e1071) employee_naive<-naiveBayes(left~.,data=traindata) pred_naive<-predict(employee_naive,testdata,type="class") confusionMatrix(table(pred_naive,testdata$left)) # using svm employee_svm<-svm(left~.,data=traindata) pred_svm<-predict(employee_svm,testdata,type="class") confusionMatrix(table(pred_svm,testdata$left))