data_mismatch.cpp

View page source

Random Effects Variance May Cause Data Mismatch

Model

\[\B{p}( z | \theta ) \sim \B{N} ( \theta , \sigma_z^2 )\]
\[\B{p}( y | \theta ) \sim \B{N} [ \exp( u ) \theta , \sigma_y^2 ]\]
\[\B{p}( u | \theta ) \sim \B{N} ( 0 , \sigma_u^2 )\]

The fixed likelihood g(theta) is

\[g( \theta ) = \frac{1}{2} \left[ \log ( 2 \pi \sigma_z^2 ) + ( z - \theta )^2 \right]\]

The random likelihood f(theta, u) is

\[f(\theta , u ) = \frac{1}{2} \left[ \log ( 2 \pi \sigma_u^2 ) + u^2 / \sigma_u^2 + \log ( 2 \pi \sigma_y^2 ) + [ y - \exp(u) \theta ]^2 / \sigma_y^2 \right]\]

Mismatch

In the case where \(y = z\), one might expect the solution \(\theta = z\) and \(u = 0\) because all the data residuals are zero, \(\B{p}(u | \theta )\) is maximal, and there is no prior for \(\theta\). This example demonstrates that \(\theta = z\) and \(u = 0\) may not be optimal for the this case. To be specific it shows that the derivative of L(theta) may be non-zero.

Theory

See the Laplace Approximation for Mixed Effects Models section for the theory behind the calculations below:

Derivatives

\begin{eqnarray} g_\theta ( \theta ) & = & ( \theta - z ) \sigma_z^{-2} \\ f_\theta ( \theta , u ) & = & [ \exp(u) \theta - y ] \exp(u) \sigma_y^{-2} \\ f_u ( \theta , u ) & = & u / \sigma_u^2 + [ \exp(u) \theta - y ] \exp(u) \theta \sigma_y^{-2} \\ f_{u,u} ( \theta , u ) & = & \sigma_u^{-2} + [ 2 \exp(u) \theta - y ] \exp(u) \theta \sigma_y^{-2} \\ f_{u,\theta} ( \theta , u ) & = & [ 2 \exp(u) \theta - y ] \exp(u) \sigma_y^{-2} \\ \hat{u}_\theta ( \theta ) & = & - f_{u,\theta} [ \theta , \hat{u} ( \theta ) ] / f_{u,u} [ \theta , \hat{u} ( \theta ) ] \\ f_{u,u,u} ( \theta , u ) & = & [ 4 \exp(u) \theta - y ] \exp(u) \theta \sigma_y^{-2} \\ f_{u,u,\theta} ( \theta , u ) & = & [ 4 \exp(u) \theta - y ] \exp(u) \sigma_y^{-2} \end{eqnarray}

Objective

\begin{eqnarray} h( \theta , u ) & = & \frac{1}{2} \log f_{u,u} ( \theta , u ) + f( \theta , u ) - \log( 2 \pi ) \\ h_\theta ( \theta , u ) & = & \frac{1}{2} f_{u,u,\theta} ( \theta , u ) / f_{u,u} ( \theta , u ) + f_\theta ( \theta , u ) \\ h_u ( \theta , u ) & = & \frac{1}{2} f_{u,u,u} ( \theta , u ) / f_{u,u} ( \theta , u ) + f_u ( \theta , u ) \\ L( \theta ) & = & h [ \theta , \hat{u} ( \theta ) ] + g ( \theta ) \\ L_\theta ( \theta ) & = & h_\theta [ \theta , \hat{u} ( \theta ) ] + h_u [ \theta , \hat{u} ( \theta ) ] \hat{u}_\theta ( \theta ) + g_\theta ( \theta ) \end{eqnarray}
# include <cppad/cppad.hpp>
# include <cppad/mixed/cppad_mixed.hpp>

namespace {
   using CppAD::log;
   using CppAD::exp;
   using CppAD::AD;
   //
   using CppAD::mixed::d_sparse_rcv;
   using CppAD::mixed::a1_double;
   using CppAD::mixed::d_vector;
   using CppAD::mixed::a1_vector;

   class mixed_derived : public cppad_mixed {
   private:
      const double y_, z_;
      const double sigma_u_, sigma_y_, sigma_z_;
   public:
      // constructor
      mixed_derived(
         size_t                 n_fixed       ,
         size_t                 n_random      ,
         double                 y             ,
         double                 z             ,
         double                 sigma_u       ,
         double                 sigma_y       ,
         double                 sigma_z       ) :
         cppad_mixed(n_fixed, n_random) ,
         y_(y)                          ,
         z_(z)                          ,
         sigma_u_(sigma_u)              ,
         sigma_y_(sigma_y)              ,
         sigma_z_(sigma_z )
      {  assert( n_fixed == 1 );
         assert( n_random == 1 );
      }
      // implementation of fix_likelihood as p(z|theta) * p(theta)
      a1_vector fix_likelihood(
         const a1_vector&         fixed_vec  ) override
      {
         a1_double theta = fixed_vec[0];

         // initialize log-density
         a1_vector vec(1);
         vec[0] = 0.0;

         // compute this factor once
         // sqrt_2pi = CppAD::sqrt( 8.0 * CppAD::atan(1.0) );

         // Data term p(z|theta)
         a1_double res  = (z_ - theta) / sigma_z_;
         vec[0]    += res * res / 2.0;
         // the following term does not depend on fixed effects
         // vec[0]    += log(sqrt_2pi * sigma_z_ );

         // prior term p(theta)

         return vec;
      }
      // ------------------------------------------------------------------
      // implementation of ran_likelihood as p(y|theta, u) * p(u|theta)
      a1_vector ran_likelihood(
         const a1_vector&         fixed_vec  ,
         const a1_vector&         random_vec ) override
      {
         a1_double theta = fixed_vec[0];
         a1_double u     = random_vec[0];

         // initialize log-density
         a1_vector vec(1);
         vec[0] = 0.0;

         // compute this factors once
         // sqrt_2pi = CppAD::sqrt( 8.0 * CppAD::atan(1.0) );

         // Data term p(y|theta,u)
         a1_double res  = (y_ - exp(u) * theta) / sigma_y_;
         vec[0]    += res * res / 2.0;
         // the following term does not depend on fixed or random effects
         // vec[0]    += log(sqrt_2pi * sigma_y_ );

         // prior term p(u|theta)
         res        = u / sigma_u_;
         vec[0]    += res * res / 2.0;
         // the following term does not depend on fixed or random effects
         // vec[0]    += log(sqrt_2pi * sigma_u_ );

         return vec;
      }
      // ==================================================================
      // Routines used to check that objective derivative is zero at solution
      // g(thata)    = (theta - z)^2           / (2.0 * sigma_z * sigma_z)
      // f(theta, u) = ( exp(u) * theta - y)^2 / (2.0 * sigma_y * sigma_y )
      //             + u^2  / (2.0 * sigma_u * sigma_u);
      double g_theta(double theta)
      {  return (theta - z_) / (sigma_z_ * sigma_z_); }
      //
      double f_theta(double theta, double u)
      {  double num_theta    = 2.0 * (exp(u) * theta - y_) * exp(u);
         double ratio_theta  = num_theta / (2.0 * sigma_y_ * sigma_y_);
         return ratio_theta;
      }
      double f_u(double theta, double u)
      {  double num_u    = 2.0 * (exp(u) * theta - y_) * exp(u) * theta;
         double ratio_u  = num_u / (2.0 * sigma_y_ * sigma_y_);
         return ratio_u + u / (sigma_u_ * sigma_u_);
      }
      double f_uu(double theta, double u)
      {  double term     = exp(u) * theta;
         double num_uu   = 2.0 * (term * term + (term - y_) * term);
         double ratio_uu = num_uu / (2.0 * sigma_y_ * sigma_y_);
         return ratio_uu + 1.0 / (sigma_u_ * sigma_u_);
      }
      double f_utheta(double theta, double u)
      {  double num_utheta    = 2.0 * (exp(u) * theta - y_) * exp(u);
         num_utheta          += 2.0 * exp(u) * exp(u) * theta;
         double ratio_utheta  = num_utheta / (2.0 * sigma_y_ * sigma_y_);
         return ratio_utheta;
      }
      double uhat_theta(double theta, double uhat)
      {  double ret = - f_utheta(theta, uhat) / f_uu(theta, uhat);
         return ret;
      }
      double f_uuu(double theta, double u)
      {  double term = exp(u) * theta;
         // term_u = term
         // num_uu = 2.0 * term * (2.0 * term  - y)
         //        = 4.0 * term^2 - 2.0 * term * y
         double num_uuu  = 8.0 * term * term  - 2.0 * term * y_;
         double ratio_uuu = num_uuu / (2.0 * sigma_y_ * sigma_y_);
         return ratio_uuu;
      }
      double f_uutheta(double theta, double u)
      {  double term = exp(u) * theta;
         // term_theta = exp(u)
         // num_uu = 2.0 * term * (2.0 * term  - y)
         //        = 4.0 * term^2 - 2.0 * term * y
         double num_uutheta = 8.0 * term * exp(u) - 2.0 * y_ * exp(u);
         double ratio_uutheta = num_uutheta / (2.0 * sigma_y_ * sigma_y_);
         return ratio_uutheta;
      }
      double h_theta(double theta, double u)
      {  double ret = 0.5 * f_uutheta(theta, u) / f_uu(theta, u);
         ret       += f_theta(theta, u);
         return ret;
      }
      double h_u(double theta, double u)
      {  double ret = 0.5 * f_uuu(theta, u) / f_uu(theta, u);
         ret       += f_u(theta, u);
         return ret;
      }
      double L_theta(double theta , double uhat)
      {  double ret = h_theta(theta, uhat);
         ret       += h_u(theta, uhat) * uhat_theta(theta, uhat);
         ret       += g_theta(theta);
         return ret;
      }
   };

}

bool data_mismatch_xam(void)
{
   bool   ok = true;
   double inf = std::numeric_limits<double>::infinity();

   size_t n_fixed  = 1;
   size_t n_random = 1;
   double z        = 0.05;
   double y        = z;
   double sigma_u  = 0.1;
   double sigma_y  = 0.1 * y;
   double sigma_z  = 0.1 * z;
   d_vector fixed_in(n_fixed), random_in(n_random);
   d_vector fixed_lower(n_fixed), fixed_upper(n_fixed);
   fixed_lower[0] = -inf;
   fixed_in[0]    = z;
   fixed_upper[0] = + inf;
   random_in[0]   = 0.0;
   //
   // no constraints
   d_vector fix_constraint_lower(0), fix_constraint_upper(0);
   //
   //
   // object that is derived from cppad_mixed
   mixed_derived mixed_object(
      n_fixed, n_random,
      y, z, sigma_u, sigma_y, sigma_z

   );
   mixed_object.initialize(fixed_in, random_in);
   //
   // compute the derivative of the objective at the starting point
   double theta_in   = fixed_in[0];
   double u_in       = random_in[0];
   double L_theta_in = mixed_object.L_theta(theta_in, u_in);
   //
   // derivative corresponding to theta = z = y is greater than 1
   ok &= fabs( L_theta_in ) > 1.0;
   //
   // optimize the fixed effects using quasi-Newton method
   std::string fixed_ipopt_options =
      "Integer print_level               0\n"
      "String  sb                        yes\n"
      "String  derivative_test           adaptive\n"
      "Numeric tol                       1e-8\n"
      "Integer max_iter                  15\n"
   ;
   std::string random_ipopt_options =
      "Integer print_level               0\n"
      "String  sb                        yes\n"
      "String  derivative_test           second-order\n"
      "Numeric tol                       1e-8\n"
   ;
   //
   d_vector random_lower(n_random), random_upper(n_random);
   for(size_t i = 0; i < n_random; i++)
   {  random_lower[i] = -inf;
      random_upper[i] = +inf;
   }
   d_vector fixed_scale = fixed_in;
   CppAD::mixed::fixed_solution solution = mixed_object.optimize_fixed(
      fixed_ipopt_options,
      random_ipopt_options,
      fixed_lower,
      fixed_upper,
      fix_constraint_lower,
      fix_constraint_upper,
      fixed_scale,
      fixed_in,
      random_lower,
      random_upper,
      random_in
   );
   d_vector fixed_out = solution.fixed_opt;
   //
   d_vector random_out = mixed_object.optimize_random(
      random_ipopt_options, fixed_out, random_lower, random_upper, random_in
   );
   //
   // compute the derivative of the objective at the final point
   double theta_out   = fixed_out[0];
   double u_out       = random_out[0];
   double L_theta_out = mixed_object.L_theta(theta_out, u_out);
   //
   // derivative corresponding to solution is less that 1e-7
   ok &= fabs( L_theta_out ) <= 1e-7;
   //
   // Now demonstrate that the solution is still close to the expected values
   ok &= fabs( theta_out / z - 1.0 ) <= 1e-2;
   ok &= fabs( u_out ) <= 1e-2;
   //
   return ok;
}