10: Social media post performance

library(gamlss)
library(gamlss.ggplots)
library(gamlss.add)
library(ggplot2)
library(dplyr)
library(gamboostLSS)
data(fbdata)

Figure 10.1

Facebook data: histogram of the number of shares, truncated for display at 100 and overlaid with density estimate.

fbdata %>% dplyr::select(share) %>%
  filter(share<101) %>%              ## truncate at 100
  ggplot(aes(share)) +
  geom_histogram(aes(y=after_stat(density)), colour="darkblue", fill="lightblue", binwidth=1)  +
  theme_bw(base_size=12) +
  xlab("Number of shares") +
  ylab("Density") +
  stat_density(alpha=.2, fill="#FF6666", bw=1.8, outline.type="upper", colour="black")+
  theme(axis.text.x=element_text(size=14))+
  theme(axis.text.y=element_text(size=14))+ 
  theme(axis.title.x = element_text(size=14),
      axis.title.y = element_text(size=14))

Model building

Select the response distribution

  • The gamlss cyclic P-spline function pbc() has been used for the smooth cyclic terms for Post.Hour, Post.Weekday and Post.Month.

  • Because of numerical instability of the pbc(Post.Month) term, the number of break points (knots) was set at 5 for this term (inter=5).

m0 <- gamlss(share ~ Category + pbc(Post.Month, control=pbc.control(inter=5)) + pbc(Post.Weekday),
        sigma.formula = ~ pbc(Post.Month, control=pbc.control(inter=5)) +
             Category + pbc(Post.Weekday),
        nu.formula = ~ Category,
        tau.formula = ~ Category,
        data = fbdata, family=PO)
GAMLSS-RS iteration 1: Global Deviance = 14158.62 
GAMLSS-RS iteration 2: Global Deviance = 14158.62 
T1 <- chooseDist(m0, type="count")
minimum GAIC(k= 2 ) family: ZASICHEL 
minimum GAIC(k= 3.84 ) family: ZASICHEL 
minimum GAIC(k= 6.2 ) family: SI 
T1[order(T1[,1])[1:5],]   ## top 5 in order for AIC
                2     3.84      6.2
ZASICHEL 4087.826 4117.266 4155.026
ZISICHEL 4092.669 4122.109 4159.869
ZABNB    4092.753 4122.193 4159.953
SI       4095.530 4119.450 4150.130
SICHEL   4097.770 4121.690 4152.370
T1[order(T1[,3])[1:5],]   ## top 5 in order for BIC
                2     3.84      6.2
SI       4095.530 4119.450 4150.130
SICHEL   4097.770 4121.690 4152.370
ZASICHEL 4087.826 4117.266 4155.026
BNB      4102.049 4125.969 4156.649
ZISICHEL 4092.669 4122.109 4159.869

Select covariates

The gamlss function stepGAICAll.A() was used for covariate selection, with the AIC as criterion (the default). The lengthy output of stepGAICAll.A() shown below can be suppressed with the argument trace=FALSE.

# starting model
m1 <- gamlss(share~1, data = fbdata, family=SICHEL, method=mixed(20,100), trace=FALSE)

m.SICHEL <- stepGAICAll.A(m1,
                scope=list(lower=~1,
                upper = ~ Category + Type + pbc(Post.Weekday) +
                pbc(Post.Month, control=pbc.control(inter=5)) +
                pbc(Post.Hour) + Paid))
--------------------------------------------------- 
Distribution parameter:  mu 
Start:  AIC= 4280 
 share ~ 1 

                                                        Df    AIC
+ Category                                          2.0000 4186.3
+ pbc(Post.Hour)                                    8.3218 4252.8
+ pbc(Post.Weekday)                                 1.0000 4266.6
+ Type                                              2.0000 4272.2
+ Paid                                              1.0000 4275.0
+ pbc(Post.Month, control = pbc.control(inter = 5)) 1.0000 4278.9
<none>                                                     4280.0

Step:  AIC= 4186.32 
 share ~ Category 

                                                        Df    AIC
+ pbc(Post.Weekday)                                 1.0000 4168.5
+ pbc(Post.Hour)                                    1.0003 4184.4
+ pbc(Post.Month, control = pbc.control(inter = 5)) 1.0000 4184.6
<none>                                                     4186.3
+ Paid                                              1.0000 4186.4
+ Type                                              2.0000 4189.0

Step:  AIC= 4168.46 
 share ~ Category + pbc(Post.Weekday) 

                                                        Df    AIC
+ pbc(Post.Month, control = pbc.control(inter = 5)) 1.0000 4166.3
+ pbc(Post.Hour)                                    1.0003 4167.0
<none>                                                     4168.5
+ Paid                                              1.0000 4170.1
+ Type                                              2.0000 4171.0

Step:  AIC= 4166.25 
 share ~ Category + pbc(Post.Weekday) + pbc(Post.Month, control = pbc.control(inter = 5)) 

                     Df    AIC
+ pbc(Post.Hour) 4.5673 4156.3
<none>                  4166.3
+ Paid           1.0000 4167.5
+ Type           2.0000 4167.8

Step:  AIC= 4156.28 
 share ~ Category + pbc(Post.Weekday) + pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) 

           Df    AIC
+ Type 2.2164 4155.3
<none>        4156.3
+ Paid 1.0240 4157.8

Step:  AIC= 4155.31 
 share ~ Category + pbc(Post.Weekday) + pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) + Type 

            Df    AIC
<none>         4155.3
+ Paid 0.97896 4156.8
--------------------------------------------------- 
Distribution parameter:  sigma 
Start:  AIC= 4155.31 
 ~1 

                                                         Df    AIC
+ Category                                          1.14140 4134.8
+ pbc(Post.Month, control = pbc.control(inter = 5)) 1.17363 4147.4
+ pbc(Post.Weekday)                                 1.01504 4151.0
+ pbc(Post.Hour)                                    0.62380 4151.8
<none>                                                      4155.3
+ Paid                                              0.97953 4157.2
+ Type                                              1.94709 4157.6

Step:  AIC= 4134.76 
 ~Category 

                                                         Df    AIC
+ pbc(Post.Month, control = pbc.control(inter = 5)) 1.64616 4127.2
+ pbc(Post.Weekday)                                 0.85811 4131.8
+ Paid                                              1.18304 4133.7
+ pbc(Post.Hour)                                    0.84724 4134.4
<none>                                                      4134.8
+ Type                                              1.95036 4135.9

Step:  AIC= 4127.24 
 share ~ Category + pbc(Post.Month, control = pbc.control(inter = 5)) 

                         Df    AIC
+ pbc(Post.Hour)    0.76260 4118.3
+ pbc(Post.Weekday) 0.89129 4123.1
<none>                      4127.2
+ Paid              0.98746 4127.9
+ Type              1.89642 4130.1

Step:  AIC= 4118.3 
 share ~ Category + pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) 

                         Df    AIC
+ pbc(Post.Weekday) 0.93208 4113.8
<none>                      4118.3
+ Paid              0.98401 4119.1
+ Type              1.99804 4121.5

Step:  AIC= 4113.8 
 share ~ Category + pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) + pbc(Post.Weekday) 

            Df    AIC
<none>         4113.8
+ Paid 0.96737 4114.6
+ Type 1.99660 4117.7
--------------------------------------------------- 
Distribution parameter:  nu 
Start:  AIC= 4113.8 
 ~1 

                                                         Df    AIC
+ pbc(Post.Month, control = pbc.control(inter = 5)) -2.2657 4089.5
+ Category                                          -1.2657 4090.7
+ pbc(Post.Hour)                                    -2.2656 4098.6
+ pbc(Post.Weekday)                                 -2.2657 4099.8
+ Paid                                              -2.2656 4108.5
+ Type                                               2.4285 4110.0
<none>                                                      4113.8

Step:  AIC= 4089.55 
 share ~ pbc(Post.Month, control = pbc.control(inter = 5)) 

                                                         Df    AIC
+ Category                                           2.0001 4069.6
+ Type                                               2.0000 4081.0
+ Paid                                               1.0000 4085.5
+ pbc(Post.Weekday)                                  1.0000 4088.3
+ pbc(Post.Hour)                                     1.0000 4089.3
<none>                                                      4089.5
- pbc(Post.Month, control = pbc.control(inter = 5)) -2.2657 4113.8

Step:  AIC= 4069.59 
 share ~ pbc(Post.Month, control = pbc.control(inter = 5)) + Category 

                                                         Df    AIC
+ Type                                              1.99999 4061.9
<none>                                                      4069.6
+ pbc(Post.Hour)                                    1.00002 4070.1
+ pbc(Post.Weekday)                                 1.00000 4070.6
+ Paid                                              0.99994 4072.0
- Category                                          2.00008 4089.5
- pbc(Post.Month, control = pbc.control(inter = 5)) 1.00010 4090.7

Step:  AIC= 4061.92 
 share ~ pbc(Post.Month, control = pbc.control(inter = 5)) + Category +  
    Type 

                                                         Df    AIC
+ pbc(Post.Weekday)                                 0.99998 4058.8
+ pbc(Post.Hour)                                    1.00010 4060.3
<none>                                                      4061.9
+ Paid                                              0.99995 4064.9
- Type                                              1.99999 4069.6
- Category                                          2.00004 4081.0
- pbc(Post.Month, control = pbc.control(inter = 5)) 1.00010 4085.9

Step:  AIC= 4058.83 
 share ~ pbc(Post.Month, control = pbc.control(inter = 5)) + Category +  
    Type + pbc(Post.Weekday) 

                                                         Df    AIC
+ pbc(Post.Hour)                                    1.00008 4058.3
<none>                                                      4058.8
- pbc(Post.Weekday)                                 0.99998 4061.9
+ Paid                                              0.99997 4066.3
- Category                                          2.00000 4069.0
- Type                                              1.99997 4070.6
- pbc(Post.Month, control = pbc.control(inter = 5)) 1.00008 4082.3

Step:  AIC= 4058.34 
 share ~ pbc(Post.Month, control = pbc.control(inter = 5)) + Category +  
    Type + pbc(Post.Weekday) + pbc(Post.Hour) 

                                                         Df    AIC
<none>                                                      4058.3
- pbc(Post.Hour)                                    1.00008 4058.8
- pbc(Post.Weekday)                                 0.99996 4060.3
+ Paid                                              0.99993 4066.3
- Category                                          2.00005 4069.3
- pbc(Post.Month, control = pbc.control(inter = 5)) 1.00005 4070.3
- Type                                              2.00003 4071.3

Final SICHEL model

summary(m.SICHEL)
******************************************************************
Family:  c("SICHEL", "Sichel") 

Call:  gamlss(formula = share ~ Category + pbc(Post.Weekday) +  
    pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) + Type, sigma.formula = ~Category +  
    pbc(Post.Month, control = pbc.control(inter = 5)) +  
    pbc(Post.Hour) + pbc(Post.Weekday), nu.formula = ~pbc(Post.Month,  
    control = pbc.control(inter = 5)) + Category +  
    Type + pbc(Post.Weekday) + pbc(Post.Hour), family = SICHEL,  
    data = fbdata, method = mixed(20, 100), trace = FALSE) 

Fitting method: mixed(20, 100) 

------------------------------------------------------------------
Mu link function:  log
Mu Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)      2.85863    0.19238  14.860  < 2e-16 ***
Category2        0.37931    0.10547   3.597 0.000356 ***
Category3        0.52690    0.09557   5.513  5.8e-08 ***
TypePhoto/Video  0.29027    0.19541   1.485 0.138094    
TypeStatus       0.63849    0.22728   2.809 0.005172 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Sigma link function:  log
Sigma Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.9660     0.2794   7.037 6.96e-12 ***
Category2    -0.3813     0.4483  -0.850  0.39555    
Category3    -1.1623     0.4073  -2.854  0.00451 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
Nu link function:  identity 
Nu Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)       1.4902     0.3444   4.327 1.85e-05 ***
Category2        -3.6145     0.3292 -10.981  < 2e-16 ***
Category3        -3.1493     0.2565 -12.280  < 2e-16 ***
TypePhoto/Video  -1.3031     0.3560  -3.660  0.00028 ***
TypeStatus        0.5194     0.5144   1.010  0.31321    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

------------------------------------------------------------------
NOTE: Additive smoothing terms exist in the formulas: 
 i) Std. Error for smoothers are for the linear effect only. 
ii) Std. Error for the linear terms maybe are not accurate. 
------------------------------------------------------------------
No. of observations in the fit:  495 
Degrees of Freedom for the fit:  22.00045
      Residual Deg. of Freedom:  472.9996 
                      at cycle:  9 
 
Global Deviance:     4014.34 
            AIC:     4058.341 
            SBC:     4150.843 
******************************************************************

Figure 10.3

Facebook data: Worm plot, Sichel response distribution model (10.1). Twenty realizations of randomized quantile residuals are shown.

rqres.plot(m.SICHEL, howmany=20, plot.type="all", ylim=c(-1.5,1.5))

Table 10.2: Prediction scores

The prediction scores in Table 10.2 (MSEP, LS, CRPS, MAPE) are all based on \(K\)-fold cross-validation. The random number generation for obtaining the folds is platform-dependent, so the reader may get results which are numerically slightly different to those obtained here (and reported in Table 10.2).

Alternative models

Neural network: m.NN

# predictors are standardized for more stable results and faster convergence in nn():
fbdata <-  mutate(fbdata,
                  type3 = as.numeric(Type),
                  Category.s = as.numeric(Category)/3,
                  Post.Weekday.s = Post.Weekday/7,
                  Post.Month.s = Post.Month/12,
                  Post.Hour.s = Post.Hour/24,
                  type3.s = type3/3)
set.seed(1494)
m.NN <- gamlss(share ~ nn(~ Category.s + Post.Weekday.s +  Post.Month.s +
                            Post.Hour.s + type3, size=3, decay=0.1),
    sigma.formula = ~ nn(~ Category.s + Post.Weekday.s +  Post.Month.s +
                           Post.Hour.s + type3, size=3, decay=0.1),
    nu.formula = nn(~Category.s + Post.Weekday.s +  Post.Month.s +
                      Post.Hour.s + type3, size=3, decay=0.1),
    family = SICHEL, data = fbdata, n.cyc=100)

Boosting: m.boost

m.boost <- gamboostLSS(share ~ bols(Category) + bbs(Post.Weekday) +  bbs(Post.Month) +
                         bbs(Post.Hour) + bols(Type),
                         data = fbdata,
                         families = as.families("SICHEL"),
                         method = "noncyclic",
                         control = boost_control(nu = 0.1))

Functions for computing the prediction scores

kfold <- function(data, K, seed=2346){
  ## sets up vector of indices for K-fold cross-validation
 set.seed(seed)
 n <- nrow(data)
 m <- floor(n/K)
 rem <- n%%K
 data$row <- 1:n
 CV <- data.frame(y=data$share, pred=NA)
 I <- sample(c(rep(1:K, each=m), sample(K, rem)))
 return(I)
}


kCV <- function(model, data, K, seed=123)
{
  ## produces model predictions using K-fold cross-validation
 data$row <- 1:nrow(data)
 CV <- data.frame(y=model$y, pred=NA)
 I <- kfold(data, K, seed) # vector of indices for K folds

 for(k in 1:K)
   {
   Itrain <- I!=k
   training.data <- data[Itrain,]
   testdata <- data[!Itrain,]
   mtrain <- update(model, data=training.data)
   CV$pred[testdata$row] <- predict(mtrain, newdata=testdata, type="response", data=training.data)
   }
 return(CV)
}

kCV.boost <- function(model, data, K, seed=123)
{
 ## produces model predictions using K-fold cross-validation
 data$row <- 1:nrow(data)
 CV <- data.frame(y=model$mu$response, pred=NA)
 I <- kfold(data, K, seed)  # vector of indices for K folds

 for(k in 1:K)
 {
   Itrain <- I!=k
   training.data <- data[Itrain,]
   testdata <- data[!Itrain,]
   ## The boosting model m.boost has to be hard-coded as it is not
   ## possible for the update() function to work for boosting 
   mtrain <- gamboostLSS(share ~ bols(Category) + bbs(Post.Weekday) + bbs(Post.Month) +
                           bbs(Post.Hour) + bols(Type),
                           data = training.data,
                           families = as.families("SICHEL"), method = "noncyclic",
                           control = boost_control(nu = 0.1))

   cvr <- cvrisk(mtrain, grid = 1:200)
   mstop(mtrain) <- mstop(cvr)
   CV$pred[testdata$row] <- predict(mtrain, newdata=testdata, parameter="mu", type="response",
                                    data=training.data)
 }
return(CV)
}

Evaluation of MAPE and MSEP for model m.SICHEL:

kfold.gamlss <- kCV(model=m.SICHEL, data=fbdata, K=10, seed=2345)
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
new prediction 
MAPE <- median(100*abs((kfold.gamlss$pred-kfold.gamlss$y)/(kfold.gamlss$y+1)))
MSEP <- mean((kfold.gamlss$pred-kfold.gamlss$y)^2)
MAPE
[1] 59.66588
MSEP
[1] 1810.404

Evaluation of MAPE and MSEP for model m.boost:

kfold.boost <- kCV.boost(model=m.boost, data=fbdata, K=10, seed=2345)
Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...Starting cross-validation...
MAPE <- median(100*abs((kfold.boost$pred-kfold.boost$y)/(kfold.boost$y+1)))
MSEP <- mean((kfold.boost$pred-kfold.boost$y)^2)
MAPE
[1] 67.87735
MSEP
[1] 1784.676

Evaluation of MAPE and MSEP for model m.NN:

kfold.NN <- kCV(model=m.NN, data=fbdata, K=10, seed=2345)

We encountered a problem in that the command

mtrain <- update(model, data=training.data)

does not work for the m.NN model (even though it worked for m.SICHEL). Even more strange, it worked when stepping through the commands manually. We will attempt to fix this, but in the meantime kfold.NN was obtained by stepping through kCV() manually.

MAPE <- median(100*abs((kfold.NN$pred-kfold.NN$y)/(kfold.NN$y+1)))
MSEP <- mean((kfold.NN$pred-kfold.NN$y)^2)
MAPE
[1] 59.62527
MSEP
[1] 1785.128

CRPS

kCRPS <- function(model, data, K, seed, ymax=150)
{
 I <- kfold(data, K, seed)  # vector of indices for K folds
 data$row <- 1:nrow(data)
 y <- 0:ymax
 predicted <- data.frame(row=1:nrow(data), y=model$y, mu=NA, sigma=NA, nu=NA)
 predicted[,6:(6+ymax)] <- NA

 for(k in 1:K)
 {
   Itrain <- I!=k
   training.data <- data[Itrain,]
   test.data <- data[!Itrain,]
   mtrain <- try(update(model, data=training.data))  # doesn't work for NN model
   predtest <- predictAll(mtrain, data=training.data, newdata=test.data, type="response")
   predicted[test.data$row, "mu"] <- predtest$mu
   predicted[test.data$row, "sigma"] <- predtest$sigma
   predicted[test.data$row, "nu"] <- predtest$nu
   }

 for(yi in y) predicted[,6+yi] <-
   (pSICHEL(yi, predicted$mu, predicted$sigma, predicted$nu)-as.numeric(yi>=predicted$y))^2
 predicted$CRPS <- -rowSums(predicted[,-(1:5)])
 return(sum(predicted$CRPS))
}
crps <- kCRPS(model=m.SICHEL, data=fbdata, K=10, seed=2345, ymax=791)
crps
[1] -6259.246

Again the cross-validation did not work for model m.NN and CRPS had to be obtained manually.

Log score

We use the gamlss function gamlssCV() to extract the log scores.

Model m.SICHEL

m.SICHEL.CV <- gamlssCV(formula = share ~ Category + Type + pbc(Post.Weekday) +
                 pbc(Post.Month, control = pbc.control(inter = 5)) +
                 pbc(Post.Hour),
                 sigma.formula = ~Category +
                 pbc(Post.Month, control = pbc.control(inter = 5)) +
                 pbc(Post.Hour) + pbc(Post.Weekday),
                 nu.formula = ~ Category +  Type +
                 pbc(Post.Month, control = pbc.control(inter = 5)) +
                 pbc(Post.Weekday) + pbc(Post.Hour),
                 family = SICHEL, data = fbdata, method = mixed(20, 100),
                 K.fold=10, set.seed=123)
m.SICHEL.CV$CV/(-2) # log likelihood
[1] -2064.038

Model m.NN

The gamlssCV() function does not work for models including nn() terms:

m2 <- gamlssCV(share ~ nn(~ Category.s + Post.Weekday.s +  Post.Month.s +
                            Post.Hour.s + type3, size=3, decay=0.1),
                            sigma.formula = ~ nn(~ Category.s + Post.Weekday.s +
                            Post.Month.s + Post.Hour.s + type3, size=3, decay=0.1),
                            nu.formula = nn(~Category.s + Post.Weekday.s +  Post.Month.s +
                            Post.Hour.s + type3, size=3, decay=0.1),
                            family = SICHEL, data = fbdata, n.cyc=200,
                            K.fold=10, set.seed=123)

At present we have to step through the computations manually. (Hopefully this will be fixed.)

Figure 10.4: Term plots for Category and Post.Month

term.plot(m.SICHEL, what="mu", pages=1, ylim="common",
          terms=c("Category", "pbc(Post.Month, control = pbc.control(inter = 5))"),
          ylab=c("Partial for Category","Partial for pbc(Post.Month)"), mar=c(4, 1, 1, 2) + 0.1)

term.plot(m.SICHEL, what="sigma", pages=1, ylim="common",
          terms=c("Category", "pbc(Post.Month, control = pbc.control(inter = 5))"),
          ylab=c("Partial for Category","Partial for pbc(Post.Month)"))

term.plot(m.SICHEL, what="nu", pages=1, ylim="common",
          terms=c("Category", "pbc(Post.Month, control = pbc.control(inter = 5))"),
          ylab=c("Partial for Category","Partial for pbc(Post.Month)"))

Figure 10.5: Plots of fitted response distribution

library(distreg.vis)

These plots were created using the distreg.vis package, which is interactive. The following is the code generated by the package.

(a) Category

The following scenarios were selected: Post.Month = 10 (October), Post.Weekday = 6 (Friday), Post.Hour = 3, Type = “Photo/Video”, Category = 1, 2, 3 (“action”, “product”, “inspiration”).

covariate_data <- structure(list(Post.Month = c(10L, 10L, 10L),
                                 Post.Weekday = c(6L, 6L, 6L),
                                 Post.Hour = c(3L, 3L, 3L),
                                 Category = structure(c(1L, 2L, 3L),
                                    levels = c("1", "2", "3"), class = "factor"),
                                 Type = c("Photo/Video", "Photo/Video", "Photo/Video")),
                            class = "data.frame",
                            row.names = c("1 action", "2 product", "3 inspiration"))
pred_data <- preds(model = m.SICHEL, newdata = covariate_data)
new prediction 
new prediction 
new prediction 
plot_dist(model = m.SICHEL, pred_params = pred_data, type = "pdf", palette = "Set1") +
  lims(x=c(0,150)) +
  labs(title=NULL) +
  guides(fill=guide_legend(title='Category')) +
  theme(text = element_text(size=14))

(b) Month

The following scenarios were selected: Post.Month = 1, 5, 10 (January, May, October), Post.Weekday = 6 (Friday), Post.Hour = 3, Category = 1 (“action”), Type = “Photo/Video”.

covariate_data <- structure(list(Post.Month = c(1L, 5L, 10L),
                                 Post.Weekday = c(6L, 6L, 6L),
                                 Post.Hour = c(3L, 3L, 3L),
                                 Category = structure(c(1L, 1L, 1L),
                                      levels = c("1", "2", "3"), class = "factor"),
                                 Type = c("Photo/Video", "Photo/Video", "Photo/Video")),
                            class = "data.frame",
                            row.names = c("January", "May", "October"))
pred_data <- preds(model = m.SICHEL, newdata = covariate_data)
new prediction 
new prediction 
new prediction 
plot_dist(model = m.SICHEL, pred_params = pred_data, type = "pdf", palette = "Set1") +
  lims(x=c(0,150)) +
  labs(title=NULL) +
  guides(fill=guide_legend(title='Month')) +
  theme(text = element_text(size=14))