library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.10
## v tidyr 1.2.1 v stringr 1.4.1
## v readr 2.1.3 v forcats 0.5.2
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stats)
Accidentsatwork <- read.csv("C:/Users/Waleed pc/Desktop/R Homework/report.csv", header = TRUE, sep = ",", quote = "\"", dec = ",", stringsAsFactors=FALSE, fill = TRUE)
Accidentsatwork #read and saved in obj
#a:what is the structure of your data
#using str function to find out
str(Accidentsatwork)
## 'data.frame': 8 obs. of 20 variables:
## $ Indicator: chr "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" ...
## $ Sex : chr "Males" "Males" "Males" "Males" ...
## $ Age.group: chr "Age groups total" "Under 25" "25-34" "35-44" ...
## $ X2005 : int 2089 462 545 439 397 203 246 43
## $ X2006 : int 2517 563 633 499 483 293 339 46
## $ X2007 : int 2536 536 643 482 516 310 359 49
## $ X2008 : int 2688 584 714 527 495 305 368 63
## $ X2009 : int 1855 380 528 340 344 227 263 36
## $ X2010 : int 2073 427 566 411 356 276 313 37
## $ X2011 : int 2416 508 702 495 387 284 324 40
## $ X2012 : int 2553 552 727 486 440 295 348 53
## $ X2013 : int 2635 562 782 467 431 339 393 54
## $ X2014 : int 2988 723 858 523 470 351 414 63
## $ X2015 : int 3162 684 959 601 479 362 439 77
## $ X2016 : int 3327 798 980 570 532 382 447 65
## $ X2017 : int 3367 788 998 660 464 379 457 78
## $ X2018 : int 3375 730 958 608 544 422 535 113
## $ X2019 : int 2783 461 811 583 449 400 479 79
## $ X2020 : int 2370 365 709 510 387 333 399 66
## $ X2021 : int 2611 409 770 518 481 354 433 79
#b:How many variables are in the data set
#there are 20 variables
#c:How many obj are there
#there are 8 objs
#c: Which years have the data been taken from and what age groups
#the data is taken from the years 2005 to 2021, from under 25 to 65 and older
#d: is the data equally distributed across 2005 to 2021
#no it is not equally distributed
#e: have number of incidents increased over the years
#yes there is an upward trend
# 3. Provide brief descriptive statistical analysis of your data set (like measures of central tendency and dispersion).
# 4. Include at least one plot into your report.
# If ggplot2 is too complicated for you now, create a plot with R base functions.
summary(Accidentsatwork) #summary of the data
## Indicator Sex Age.group X2005
## Length:8 Length:8 Length:8 Min. : 43.0
## Class :character Class :character Class :character 1st Qu.: 235.2
## Mode :character Mode :character Mode :character Median : 418.0
## Mean : 553.0
## 3rd Qu.: 482.8
## Max. :2089.0
## X2006 X2007 X2008 X2009
## Min. : 46.0 Min. : 49.0 Min. : 63.0 Min. : 36.0
## 1st Qu.: 327.5 1st Qu.: 346.8 1st Qu.: 352.2 1st Qu.: 254.0
## Median : 491.0 Median : 499.0 Median : 511.0 Median : 342.0
## Mean : 671.6 Mean : 678.9 Mean : 718.0 Mean : 496.6
## 3rd Qu.: 580.5 3rd Qu.: 562.8 3rd Qu.: 616.5 3rd Qu.: 417.0
## Max. :2517.0 Max. :2536.0 Max. :2688.0 Max. :1855.0
## X2010 X2011 X2012 X2013
## Min. : 37.0 Min. : 40.0 Min. : 53.0 Min. : 54.0
## 1st Qu.: 303.8 1st Qu.: 314.0 1st Qu.: 334.8 1st Qu.: 379.5
## Median : 383.5 Median : 441.0 Median : 463.0 Median : 449.0
## Mean : 557.4 Mean : 644.5 Mean : 681.8 Mean : 707.9
## 3rd Qu.: 461.8 3rd Qu.: 556.5 3rd Qu.: 595.8 3rd Qu.: 617.0
## Max. :2073.0 Max. :2416.0 Max. :2553.0 Max. :2635.0
## X2014 X2015 X2016 X2017
## Min. : 63.0 Min. : 77.0 Min. : 65.0 Min. : 78.0
## 1st Qu.: 398.2 1st Qu.: 419.8 1st Qu.: 430.8 1st Qu.: 437.5
## Median : 496.5 Median : 540.0 Median : 551.0 Median : 562.0
## Mean : 798.8 Mean : 845.4 Mean : 887.6 Mean : 898.9
## 3rd Qu.: 756.8 3rd Qu.: 752.8 3rd Qu.: 843.5 3rd Qu.: 840.5
## Max. :2988.0 Max. :3162.0 Max. :3327.0 Max. :3367.0
## X2018 X2019 X2020 X2021
## Min. : 113.0 Min. : 79.0 Min. : 66.0 Min. : 79.0
## 1st Qu.: 506.8 1st Qu.: 436.8 1st Qu.: 357.0 1st Qu.: 395.2
## Median : 576.0 Median : 470.0 Median : 393.0 Median : 457.0
## Mean : 910.6 Mean : 755.6 Mean : 642.4 Mean : 706.9
## 3rd Qu.: 787.0 3rd Qu.: 640.0 3rd Qu.: 559.8 3rd Qu.: 581.0
## Max. :3375.0 Max. :2783.0 Max. :2370.0 Max. :2611.0
mean(Accidentsatwork$X2005)
## [1] 553
#output 533
mean(Accidentsatwork$X2021)
## [1] 706.875
#output 706.875
#the quantile of 2005 compared to 2021
minyear = Accidentsatwork$X2005
maxyear = Accidentsatwork$X2021
quantile(maxyear)
## 0% 25% 50% 75% 100%
## 79.00 395.25 457.00 581.00 2611.00
quantile(maxyear)
## 0% 25% 50% 75% 100%
## 79.00 395.25 457.00 581.00 2611.00
#standard deviation of min and max year
sd(minyear)
## [1] 641.6298
sd(maxyear)
## [1] 792.7218
str(Accidentsatwork)
## 'data.frame': 8 obs. of 20 variables:
## $ Indicator: chr "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" ...
## $ Sex : chr "Males" "Males" "Males" "Males" ...
## $ Age.group: chr "Age groups total" "Under 25" "25-34" "35-44" ...
## $ X2005 : int 2089 462 545 439 397 203 246 43
## $ X2006 : int 2517 563 633 499 483 293 339 46
## $ X2007 : int 2536 536 643 482 516 310 359 49
## $ X2008 : int 2688 584 714 527 495 305 368 63
## $ X2009 : int 1855 380 528 340 344 227 263 36
## $ X2010 : int 2073 427 566 411 356 276 313 37
## $ X2011 : int 2416 508 702 495 387 284 324 40
## $ X2012 : int 2553 552 727 486 440 295 348 53
## $ X2013 : int 2635 562 782 467 431 339 393 54
## $ X2014 : int 2988 723 858 523 470 351 414 63
## $ X2015 : int 3162 684 959 601 479 362 439 77
## $ X2016 : int 3327 798 980 570 532 382 447 65
## $ X2017 : int 3367 788 998 660 464 379 457 78
## $ X2018 : int 3375 730 958 608 544 422 535 113
## $ X2019 : int 2783 461 811 583 449 400 479 79
## $ X2020 : int 2370 365 709 510 387 333 399 66
## $ X2021 : int 2611 409 770 518 481 354 433 79
#creating histograms to visually see difference of 2005 and 2021 overall
hist(minyear)

hist(maxyear)

barplot(minyear)

barplot(maxyear)

plot(minyear)

plot(maxyear)

#it can be obsered that less accidents happend to older people and more to younger people
plot(minyear,type = "o", col = "red", xlab = "Month", ylab = "Accidents",
main = "Accidents in 2005 for all age groups")

plot(maxyear,type = "o", col = "red", xlab = "Month", ylab = "Accidents",
main = "Accidents in 2021 for all age groups")

boxplot(minyear)

boxplot(maxyear)
