Comcast Telecom Complaints Machine Learning Project in R.

Comcast Telecom Complaints Machine Learning Project in R.

Simplilearn Project

Comcast is an American global telecommunication company. The firm has been providing terrible customer service. They continue to fall short despite repeated promises to improve. Only last month (October 2016) the authority fined them a $2.3 million, after receiving over 1000 consumer complaints. The existing database will serve as a repository of public customer complaints filed against Comcast. It will help to pin down what is wrong with Comcast's customer service.

Importing data and library into R environment.

library(tidyverse)
library(stringi)
#library(lubridate)
library(dplyr)
library(ggplot2)
library(ggpubr)
list.files(path = "../input")
comcast_data<- read.csv("../input/comcast/Comcast Telecom Complaints data.csv",header = TRUE)
#Manipulating column names
names(comcast_data)<- stri_replace_all(regex =  "\.",replacement = "",str =names(comcast_data))
head(comcast_data)

Provide the trend chart for the number of complaints at monthly and daily granularity levels.

ggplot(data = monthly_count,aes(Month,Count,label = Count))+
        geom_line()+
        geom_point(size = 0.8)+
        geom_text()+
        scale_x_continuous(breaks = monthly_count$Month)+
        labs(title = "Monthly Ticket Count",x= "Months",y ="No. of Tickets")+
        theme(plot.title = element_text(hjust = 0.5))

Provide a table with the frequency of complaint types. -Which complaint types are maximum i.e., around internet, network issues, or across any other domains.

network_tickets<- contains(comcast_data$CustomerComplaint,match = 'network',ignore.case = T)
internet_tickets<- contains(comcast_data$CustomerComplaint,match = 'internet',ignore.case = T)
billing_tickets<- contains(comcast_data$CustomerComplaint,match = 'bill',ignore.case = T)
email_tickets<- contains(comcast_data$CustomerComplaint,match = 'email',ignore.case = T)
charges_ticket<- contains(comcast_data$CustomerComplaint,match = 'charge',ignore.case = T)

comcast_data$ComplaintType[internet_tickets]<- "Internet"
comcast_data$ComplaintType[network_tickets]<- "Network"
comcast_data$ComplaintType[billing_tickets]<- "Billing"
comcast_data$ComplaintType[email_tickets]<- "Email"
comcast_data$ComplaintType[charges_ticket]<- "Charges"

comcast_data$ComplaintType[-c(internet_tickets,network_tickets,
                              billing_tickets,charges_ticket,email_tickets)]<- "Others"

table(comcast_data$ComplaintType)

Create a new categorical variable with value as Open and Closed. Open & Pending is to be categorized as Open and Closed & Solved is to be categorized as Closed.

comcast_data<- group_by(comcast_data,State,ComplaintStatus)
chart_data<- summarise(comcast_data,Count = n())
ggplot(as.data.frame(chart_data) ,mapping = aes(State,Count))+
      geom_col(aes(fill = ComplaintStatus),width = 0.95)+
      theme(axis.text.x = element_text(angle = 90),
            axis.title.y = element_text(size = 15),
            axis.title.x = element_text(size = 15),
            title = element_text(size = 16,colour = "#0073C2FF"),
            plot.title = element_text(hjust =  0.5))+
      labs(title = "Ticket Status Stacked Bar Chart ",
           x = "States",y = "No of Tickets",
           fill= "Status")

Provide state wise status of complaints in a stacked bar chart. Use the categorized variable from Q3. Provide insights on: -Which state has the maximum complaints -Which state has the highest percentage of unresolved complaints.

resolved_data <- group_by(comcast_data,ComplaintStatus)
total_resloved<- summarise(resolved_data ,percentage =(n()/nrow(resolved_data))) 
resolved_data <- group_by(comcast_data,ReceivedVia,ComplaintStatus)
Category_resloved<- summarise(resolved_data ,percentage =(n()/nrow(resolved_data)))

Provide the percentage of complaints resolved till date, which were received through the Internet and customer care calls.

par(mfrow = c(1,2))
total<-ggplot(total_resloved,
       aes(x= "",y =percentage,fill = ComplaintStatus))+
      geom_bar(stat = "identity",width = 1)+
      coord_polar("y",start = 0)+
      geom_text(aes(label = paste0(round(percentage*100),"%")),
                position = position_stack(vjust = 0.5))+
      labs(x = NULL,y = NULL,fill = NULL)+
      theme_classic()+theme(axis.line = element_blank(),
                            axis.text = element_blank(),
                            axis.ticks = element_blank())

# Pie Chart for Category wise Ticket Status
category<-ggplot(Category_resloved,
       aes(x= "",y =percentage,fill = ComplaintStatus))+
      geom_bar(stat = "identity",width = 1)+
      coord_polar("y",start = 0)+
      geom_text(aes(label = paste0(ReceivedVia,"-",round(percentage*100),"%")),
                position = position_stack(vjust = 0.5))+
      labs(x = NULL,y = NULL,fill = NULL)+
      theme_classic()+theme(axis.line = element_blank(),
                            axis.text = element_blank(),
                            axis.ticks = element_blank())
ggarrange(total,category,nrow = 1, ncol = 2)

GitHub Repo