Data Science

Monday, 31 October 2016

Hacker Rank > 30 Days of Code > Solutions in R

Hacker Rank: Introduced 30 days of code to brush up your skills for interview.
Following blog contains the solution for problems which can be submitted through R programming.
Better solution for below codes are welcome.

#Day -2 Problem - https://www.hackerrank.com/challenges/30-operators
#Solution -
input <- file('stdin','r')
mealcost <- as.double(readLines(input,n=1))
tipPercent <- as.double(readLines(input,n=1))
taxPercent <- as.double(readLines(input,n=1))
tip <- as.double((mealcost*tipPercent)/100)
tax <- as.double((mealcost*taxPercent)/100)
totalcost <- round(mealcost + tip + tax)

cat("The total meal cost is",totalcost ,"dollars.")

#Day -3 Problem - https://www.hackerrank.com/challenges/30-conditional-statements
#Solution -
input<-file('stdin', 'r')
x <- readLines(input, n=1)
y <- as.integer(x)

if((y%%2) == 1){
cat("Weird")
}else if((y%%2) == 0){
if(y>=2 && y<=5){
cat("Not Weird")
}else if(y>=6 && y <=20){
cat("Weird")
}else{
cat("Not Weird")
}
}

#Day - 6 Problem - https://www.hackerrank.com/challenges/30-review-loop
#Solution -
input <- file('stdin','r')
x <- readLines(input)
z <- as.integer(x[1]) + 1
for(i in 2:z){
s <- strsplit(x[i], "")[[1]]
k <- s[c(TRUE, FALSE)]
l <- s[c(FALSE,TRUE)]
cat(k, sep="", collapse="","\t")
cat(l, sep="", collapse="","\n")
}

#Day - 7 Problem - https://www.hackerrank.com/challenges/30-arrays
#Solution -
input <- file('stdin','r')
x <- readLines(input)
y <- as.integer(x[1])
s <- as.integer(strsplit(x[2]," ")[[1]])
z <- array(s, c(1,y))
v <- rev(z)

cat(v)

#Day - 9 Problem - https://www.hackerrank.com/challenges/30-recursion
#Solution -
input <- file('stdin','r')
x <- as.integer(readLines(input, n=1))
factorial <- 1

recur_factorial <- function(n) {

if(n == 0) {
print(1)
} else {
for(i in 1:n) {
factorial = factorial*i
}
print(factorial)
}
}

recur_factorial(x)

#Day - 10 Problem - https://www.hackerrank.com/challenges/30-binary-numbers
#Solution -
input <- file('stdin','r')
x <- as.integer(readLines(input, n=1))

l <- c()
i <- 1
maxcount <- 0
tempcount <- 0
while(x > 0){
l[[i]] <- (x%%2)
x <- (x%/%2)
i <- i + 1
}

for(j in 1:length(l)){
if(l[[j]] == 0) {
tempcount <- 0
}
else{
tempcount <- tempcount + 1
}
if(tempcount > maxcount){
maxcount <- tempcount
}
}
maxcount

#Day - 11 Problem - https://www.hackerrank.com/challenges/30-2d-arrays
#Solution -
input <- file('stdin','r')
x <- readLines(input)
y <- length(x)
minValueInArray <- -9;
elementsInHourGlass <- 7;
tempsum <- minValueInArray * elementsInHourGlass;
maxsum <- 0
out <- array(dim = c( 6, 6))
for(i in 1:y){
out[i,] <- c(as.integer(strsplit(x[i]," ")[[1]]))
}

for(i in 1:(y-2)){
for(j in 1:(y-2)){

maxsum <- out[i,j] + out[i,j+1] + out[i,j+2] + out[i+1,j+1] + out[i+2,j] + out[i+2,j+1] + out[i+2,j+2]
if(tempsum < maxsum){
tempsum = maxsum
}
}
}
cat(tempsum)

#Day - 20 Problem - https://www.hackerrank.com/challenges/30-sorting
#Solution -
input <- file('stdin','r')
x <- readLines(input)
y <- as.integer(x[1])
a <- c(as.integer(strsplit(x[2]," ")[[1]]))
numberOfSwaps <- 0
if(y >=2 && y <=600 ){
for (i in 1:y){
#Track number of elements swapped during a single array traversal
for (j in 1:(y-1)) {
#Swap adjacent elements if they are in decreasing order
if (a[j] > (a[j + 1])) {
temp <- a[j]
a[j] <- a[j+1]
a[j+1] <- temp
numberOfSwaps <- numberOfSwaps + 1
}
}
# // If no elements were swapped during a traversal, array is sorted
if (numberOfSwaps == 0) {
break
}
}
}
cat("Array is sorted in",numberOfSwaps,"swaps.\n")
cat("First Element:", a[1],"\n")
cat("Last Element:" ,a[y],"\n")

#Day 26 Problem - https://www.hackerrank.com/challenges/30-nested-logic
Solution -
input <- file('stdin','r')
x <- readLines(input)
a <- c(as.integer(strsplit(x[1]," ")[[1]]))
b <- c(as.integer(strsplit(x[2]," ")[[1]]))

fine <- 0
if(a[1] >= 1 && a[1] <= 31 && b[1] >= 1 && b[1] <= 31 && a[2] >= 1 && a[2] <= 12 && b[2] >= 1 && b[2] <=12 && a[3] >= 1 && a[3] <= 3000 && b[3] >= 1 && b[3] <= 3000){
if(a[3] == b[3] || a[3] < b[3]){
if(a[2] == b[2] || a[2] < b[2] || a[3] < b[3]) {
if(a[1] == b[1] || a[1] < b[1] || a[2] < b[2] || a[3] < b[3]){
fine <- 0
}else
fine <- (a[1] - b[1]) * 15
}else
fine <- (a[2] - b[2]) * 500
}else
fine <- 10000
}
cat(fine)

#Day -27 Problem - https://www.hackerrank.com/challenges/30-testing
#Solution -
cat(5,"\n");
cat("4 3 \n")
cat("0 -3 4 2 \n")
cat("5 2 \n")
cat("0 -3 4 2 2 \n")
cat("6 3 \n")
cat("0 -3 4 2 1 1 \n")
cat("7 2 \n")
cat("0 -3 1 1 1 1 1 \n")
cat("3 3 \n")
cat("0 -3 4 \n")

#Day - 28 Problem - https://www.hackerrank.com/challenges/30-regex-patterns
#Solution -
input <- file('stdin','r')
x <- readLines(input)
y <- as.integer(x[1])
out <- array(dim = c( y, 2))
for(i in 1:y){
out[i,] <- c(strsplit(x[i+1]," ")[[1]])
}
z <- sort(out[grep("@gmail",out)-y])
for(i in 1:length(z)){
cat(z[i],"\n")
}

#Day - 29 Problem - https://www.hackerrank.com/challenges/30-bitwise-and
#Solution -
input <- file('stdin','r')
a <- readLines(input)
b <- as.integer(a[1])
c <- array(dim = c(b,2))
for(i in 1:b){
g <- 0
c[i,] <- c(as.integer(strsplit(a[i+1]," ")[[1]]))
for(d in 1:(c[i,1]-1)){
h <- d + 1
while(h <= c[i,1])
{
if((bitwAnd(d,h) < c[i,2]) && (bitwAnd(d,h) > g)){
g <- bitwAnd(d,h)
}
h <- h + 1
}
}
cat(g,"\n")
}

Thursday, 27 October 2016

Analysis of face emotions using R and Microsoft api

#Install r and rstudio to run the following r script.
https://www.rstudio.com/products/rstudio/download/
https://cran.rstudio.com/

# Install relevant packages
install.packages("httr")
install.packages("XML")
install.packages("stringr")
install.packages("ggplot2")

#Load relevant packages
library("httr")
library("XML")
library("stringr")
library("ggplot2")

#clear the earlier objects.
rm(list = ls())

# Define image source for your r to do analysis on
img.url = "http://images.christianpost.com/full/49973/actress-megan-fox-arrives-on-the-red-carpet-for-the-film-friends-with-kids-during-the-36th-toronto-international-film-festival-tiff-in-toronto-september-9-2011.jpg"

# Define Microsoft API URL to request data
URL.emoface = 'https://api.projectoxford.ai/emotion/v1.0/recognize'

# Define access key (access key is available via: https://www.microsoft.com/cognitive-services/en-us/emotion-api)
emotionKEY = 'xxx'

# Define image
mybody = list(url = img.url)

# Request data from Microsoft
faceEMO = POST(
url = URL.emoface,
content_type('application/json'),
add_headers(.headers = c('Ocp-Apim-Subscription-Key' = emotionKEY)),
body = mybody,
encode = 'json'
)

# Show request results (if Status=200, request is okay)
faceEMO

# Reuqest results from face analysis
megan = content(faceEMO)[[1]]

# Define results in data frame
o<-as.data.frame(as.matrix(megan$scores))

# change column names
o$V1<-as.numeric(o$V1)
colnames(o)[1] <- "Level"
o$Emotion<- rownames(o)

# Make plot of different emotions
ggplot(data=o, aes(x=Emotion, y=Level)) + geom_bar(stat="identity")

Saturday, 22 October 2016

Word Cloud Generation using R

Go to https://dev.twitter.com/ and log in with your Twitter Account.
Go to https://apps.twitter.com/
Click on create new app.
Use unique name for the application as it might be used by other user.

Go to key and access tokens.
Generate the access token and key.

#Install R studio and R from below link to run the below R script.

#Write the r code to connect the twitter and generate the wordcloud

install.packages("twitteR")
install.packages("RCurl")
install.packages("wordcloud")
install.packages("SnowballC")
install.packages("tm")
install.packages("plyr")

library(twitteR)
library(RCurl)
library(wordcloud)
library(SnowballC)
library(tm)library(plyr)
library(dplyr)

rm(list=ls())

consumer_key <- 'your consumer key'
consumer_secret <- 'your consumer secret key'
access_token <- 'your access token key'
access_secret <- 'your access secret key'
setup_twitter_oauth(consumer_key = consumer_key,consumer_secret = consumer_secret,
access_token = access_token,access_secret = access_secret)

Donald_tweets <- searchTwitter("Donald+trumph",n=2000,lang="en",resultType = "recent")

donald_tweets_text <- sapply(Donald_tweets,function(x) x$getText())

donald_tweets_text_df <- as.data.frame(donald_tweets_text)

donald_corpus <- Corpus(VectorSource(donald_tweets_text))

inspect(donald_corpus[10])

#remove punctuation
donald_clean <- tm_map(donald_corpus, removePunctuation)

inspect(donald_clean[10])

#converting everything to lower cases
donald_clean <- tm_map(donald_clean, content_transformer(tolower))

#stopword are words like of, the, a, as..
donald_clean <- tm_map(donald_clean,removeWords, stopwords("english"))
donald_clean <- tm_map(donald_clean, removeNumbers)
donald_clean <- tm_map(donald_clean, stripWhitespace)

# i am removing #Donald Trumph as it is obviously will be there in
donald_clean <- tm_map(donald_clean, removeWords, "Donald+Trumph")

#all the documents as I have used it in my search#stemDocument("Viewing")
donald_clean <- tm_map(donald_clean, stemDocument)

#wordcloud(sultan_clean) #basic wordcloud
wordcloud(donald_clean, random.order = F, max.words = 200,
scale = c(3.5,1),random.color =T, colors = rainbow(10),min.freq = 3)

Wednesday, 6 May 2015

Hadoop Single Node Cluster Installation

This tutorial has been tested with the following software versions

· Ubuntu Linux 12.04

· Hadoop 1.0.4

· jdk7

1.1 Install Ubuntu 12.04 on your system either with windows or Ubuntu 12.04 individually.

1.2 Configuring SSH

ssh-keygen -t rsa -P ""

cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys

ssh localhost

1.3 Install Hadoop-1.0.4 on your system.

Download Hadoop-1.0.4 tar file and untar it and place it in home directory.

sudo tar xzf hadoop-1.0.4.tar.gz

-to install java on the system.

sudo aptget install openjdk7jdk

-to check java is currently installed on the system

java -version

Set the configuration of the Hadoop file.

Open the conf file and change the following files.

1.3.1 core-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<name>fs.default.name</name>

<value>HDFS://localhost:9000</value>

<description>The name of the default file system. A URI whose

scheme and authority determine the FileSystem implementation. The

uri's scheme determines the config property (fs.SCHEME.impl) naming the

FileSystem implementation class. The uri's authority is used to

determine the host, port, etc. for a filesystem.</description>

</property>

</configuration>

1.3.2 Hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386

1.3.3 HDFS-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<name>dfs.replication</name>

<description>Default block replication.

The actual number of replications can be specified when the file is created.

The default is used if replication is not specified in create time.

</description>

</property>

</configuration>

1.3.4 mapred-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<name>mapred.job.tracker</name>

<value>localhost:9001</value>

<description>The host and port that the MapReduce job tracker runs

at. If "local", then jobs are run in-process as a single map

and reduce task.

</description>

</property>

</configuration>

1.3.5 masters

master

1.3.6 slave

master

1.3.7 .bashrc

export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386

export HADOOP_HOME=/home/harshit/Hadoop-1.0.4

export TOMCAT_HOME=/home/harshit/apache-tomcat-7.0.39

export PATH=$PATH:$JAVA_HOME/bin

1.4 to Start the Hadoop cluster

-- change the directory to Hadoop -1.0.4

cd Hadoop-1.0.4

--format the namenode

bin/Hadoop namenode –format

--start namenode

bin/Hadoop-daemons.sh start namenode

--start datanode

bin/Hadoop-daemons.sh start datanode

--start secondarynamenode

bin/Hadoop-daemons.sh start secondarynamenode

--start tasktracker

bin/Hadoop-daemons.sh start tasktracker

--start jobtracker

bin/Hadoop-daemons.sh start jobtracker

--check all processes are running

jps

--to run all processes at once

bin/hadoop/start-all.sh

--to stop all the processes

bin/hadoop/stop-all.sh

--stop all processes individually by changing the start to stop.

1.5 Copy the data from local to hdfs.

--check whether any data is present in hdfs

Cd Hadoop-1.0.4

Bin/Hadoop fs –ls

--transfer file from local to hdfs

Bin/Hadoop fs –put filename.txt filename.txt

--check whether file is present

Bin/Hadoop fs –ls

1.6 Run the sample wordcount program from Hadoop example.jar

Bin/Hadoop jar Hadoop-examples-1.0.4.jar wordcount filename.txt filenameop.txt

--check the output generated

Bin/Hadoop fs –get filenameop.txt filenameop.txt