library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.5      v purrr   0.3.4 
## v tibble  3.1.8      v dplyr   1.0.10
## v tidyr   1.2.1      v stringr 1.4.1 
## v readr   2.1.3      v forcats 0.5.2
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stats)

Accidentsatwork <- read.csv("C:/Users/Waleed pc/Desktop/R Homework/report.csv", header = TRUE, sep = ",", quote = "\"", dec = ",", stringsAsFactors=FALSE, fill = TRUE)
Accidentsatwork    #read and saved in obj
#a:what is the structure of your data
                           
#using str function to find out

str(Accidentsatwork)
## 'data.frame':    8 obs. of  20 variables:
##  $ Indicator: chr  "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" ...
##  $ Sex      : chr  "Males" "Males" "Males" "Males" ...
##  $ Age.group: chr  "Age groups total" "Under 25" "25-34" "35-44" ...
##  $ X2005    : int  2089 462 545 439 397 203 246 43
##  $ X2006    : int  2517 563 633 499 483 293 339 46
##  $ X2007    : int  2536 536 643 482 516 310 359 49
##  $ X2008    : int  2688 584 714 527 495 305 368 63
##  $ X2009    : int  1855 380 528 340 344 227 263 36
##  $ X2010    : int  2073 427 566 411 356 276 313 37
##  $ X2011    : int  2416 508 702 495 387 284 324 40
##  $ X2012    : int  2553 552 727 486 440 295 348 53
##  $ X2013    : int  2635 562 782 467 431 339 393 54
##  $ X2014    : int  2988 723 858 523 470 351 414 63
##  $ X2015    : int  3162 684 959 601 479 362 439 77
##  $ X2016    : int  3327 798 980 570 532 382 447 65
##  $ X2017    : int  3367 788 998 660 464 379 457 78
##  $ X2018    : int  3375 730 958 608 544 422 535 113
##  $ X2019    : int  2783 461 811 583 449 400 479 79
##  $ X2020    : int  2370 365 709 510 387 333 399 66
##  $ X2021    : int  2611 409 770 518 481 354 433 79
#b:How many variables are in the data set

#there are 20 variables


#c:How many obj are there
#there are 8 objs



#c: Which years have the data been taken from and what age groups
#the data is taken from the years 2005 to 2021, from under 25 to 65 and older


#d: is the data equally distributed across 2005 to 2021 
#no it is not equally distributed 


#e: have number of incidents increased over the years 
#yes there is an upward trend
# 3. Provide brief descriptive statistical analysis of your data set (like measures of central tendency and dispersion).
# 4. Include at least one plot into your report. 
# If ggplot2 is too complicated for you now, create a plot with R base functions.

summary(Accidentsatwork)   #summary of the data
##   Indicator             Sex             Age.group             X2005       
##  Length:8           Length:8           Length:8           Min.   :  43.0  
##  Class :character   Class :character   Class :character   1st Qu.: 235.2  
##  Mode  :character   Mode  :character   Mode  :character   Median : 418.0  
##                                                           Mean   : 553.0  
##                                                           3rd Qu.: 482.8  
##                                                           Max.   :2089.0  
##      X2006            X2007            X2008            X2009       
##  Min.   :  46.0   Min.   :  49.0   Min.   :  63.0   Min.   :  36.0  
##  1st Qu.: 327.5   1st Qu.: 346.8   1st Qu.: 352.2   1st Qu.: 254.0  
##  Median : 491.0   Median : 499.0   Median : 511.0   Median : 342.0  
##  Mean   : 671.6   Mean   : 678.9   Mean   : 718.0   Mean   : 496.6  
##  3rd Qu.: 580.5   3rd Qu.: 562.8   3rd Qu.: 616.5   3rd Qu.: 417.0  
##  Max.   :2517.0   Max.   :2536.0   Max.   :2688.0   Max.   :1855.0  
##      X2010            X2011            X2012            X2013       
##  Min.   :  37.0   Min.   :  40.0   Min.   :  53.0   Min.   :  54.0  
##  1st Qu.: 303.8   1st Qu.: 314.0   1st Qu.: 334.8   1st Qu.: 379.5  
##  Median : 383.5   Median : 441.0   Median : 463.0   Median : 449.0  
##  Mean   : 557.4   Mean   : 644.5   Mean   : 681.8   Mean   : 707.9  
##  3rd Qu.: 461.8   3rd Qu.: 556.5   3rd Qu.: 595.8   3rd Qu.: 617.0  
##  Max.   :2073.0   Max.   :2416.0   Max.   :2553.0   Max.   :2635.0  
##      X2014            X2015            X2016            X2017       
##  Min.   :  63.0   Min.   :  77.0   Min.   :  65.0   Min.   :  78.0  
##  1st Qu.: 398.2   1st Qu.: 419.8   1st Qu.: 430.8   1st Qu.: 437.5  
##  Median : 496.5   Median : 540.0   Median : 551.0   Median : 562.0  
##  Mean   : 798.8   Mean   : 845.4   Mean   : 887.6   Mean   : 898.9  
##  3rd Qu.: 756.8   3rd Qu.: 752.8   3rd Qu.: 843.5   3rd Qu.: 840.5  
##  Max.   :2988.0   Max.   :3162.0   Max.   :3327.0   Max.   :3367.0  
##      X2018            X2019            X2020            X2021       
##  Min.   : 113.0   Min.   :  79.0   Min.   :  66.0   Min.   :  79.0  
##  1st Qu.: 506.8   1st Qu.: 436.8   1st Qu.: 357.0   1st Qu.: 395.2  
##  Median : 576.0   Median : 470.0   Median : 393.0   Median : 457.0  
##  Mean   : 910.6   Mean   : 755.6   Mean   : 642.4   Mean   : 706.9  
##  3rd Qu.: 787.0   3rd Qu.: 640.0   3rd Qu.: 559.8   3rd Qu.: 581.0  
##  Max.   :3375.0   Max.   :2783.0   Max.   :2370.0   Max.   :2611.0
mean(Accidentsatwork$X2005)
## [1] 553
#output 533
mean(Accidentsatwork$X2021)
## [1] 706.875
#output 706.875

#the quantile of 2005 compared to 2021 
minyear = Accidentsatwork$X2005
maxyear = Accidentsatwork$X2021
quantile(maxyear)
##      0%     25%     50%     75%    100% 
##   79.00  395.25  457.00  581.00 2611.00
quantile(maxyear)
##      0%     25%     50%     75%    100% 
##   79.00  395.25  457.00  581.00 2611.00
#standard deviation of min and max year

sd(minyear)
## [1] 641.6298
sd(maxyear)
## [1] 792.7218
str(Accidentsatwork)
## 'data.frame':    8 obs. of  20 variables:
##  $ Indicator: chr  "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" "Registered accidents at work" ...
##  $ Sex      : chr  "Males" "Males" "Males" "Males" ...
##  $ Age.group: chr  "Age groups total" "Under 25" "25-34" "35-44" ...
##  $ X2005    : int  2089 462 545 439 397 203 246 43
##  $ X2006    : int  2517 563 633 499 483 293 339 46
##  $ X2007    : int  2536 536 643 482 516 310 359 49
##  $ X2008    : int  2688 584 714 527 495 305 368 63
##  $ X2009    : int  1855 380 528 340 344 227 263 36
##  $ X2010    : int  2073 427 566 411 356 276 313 37
##  $ X2011    : int  2416 508 702 495 387 284 324 40
##  $ X2012    : int  2553 552 727 486 440 295 348 53
##  $ X2013    : int  2635 562 782 467 431 339 393 54
##  $ X2014    : int  2988 723 858 523 470 351 414 63
##  $ X2015    : int  3162 684 959 601 479 362 439 77
##  $ X2016    : int  3327 798 980 570 532 382 447 65
##  $ X2017    : int  3367 788 998 660 464 379 457 78
##  $ X2018    : int  3375 730 958 608 544 422 535 113
##  $ X2019    : int  2783 461 811 583 449 400 479 79
##  $ X2020    : int  2370 365 709 510 387 333 399 66
##  $ X2021    : int  2611 409 770 518 481 354 433 79
#creating histograms to visually see difference of 2005 and 2021 overall

hist(minyear)

hist(maxyear)

barplot(minyear)

barplot(maxyear)

plot(minyear)

plot(maxyear)

#it can be obsered that less accidents happend to older people and more to younger people

plot(minyear,type = "o", col = "red", xlab = "Month", ylab = "Accidents",
     main = "Accidents in 2005 for all age groups")

plot(maxyear,type = "o", col = "red", xlab = "Month", ylab = "Accidents",
     main = "Accidents in 2021 for all age groups")

boxplot(minyear)

boxplot(maxyear)