ReFRACtor
connor_solver.cc
Go to the documentation of this file.
1 #include "connor_solver.h"
2 #include "linear_algebra.h"
3 #include "fp_exception.h"
4 #include "logger.h"
5 #include "ifstream_cs.h"
6 #include <fstream>
7 
8 using namespace FullPhysics;
9 using namespace blitz;
10 
11 #ifdef HAVE_LUA
12 #include "register_lua.h"
14 .def(luabind::constructor<const boost::shared_ptr<CostFunction>&,
16  double>())
17 .def(luabind::constructor<const boost::shared_ptr<CostFunction>&,
19  double, std::string>())
21 #endif
22 
23 // Note that we have the various equations here in Latex. This is
24 // fairly unreadable as text comments, but if you view the doxgyen
25 // documentation created by this it gives a nicely formatted output.
26 
27 //-----------------------------------------------------------------------
30 //-----------------------------------------------------------------------
31 
32 double ConnorSolver::rcond = 1e-12;
33 
34 //-----------------------------------------------------------------------
40 //-----------------------------------------------------------------------
41 
43 {
45  (new ConnorSolverState(x_i, x_a, apriori_cov_scaled, sa_m1_scaled,
46  sigma_ap, gamma, gamma_last_step_, gamma_initial,
47  residual_, se, k, kt_se_m1_k, dx, fstat));
48 }
49 
50 //-----------------------------------------------------------------------
54 //-----------------------------------------------------------------------
55 
57 {
58  x_i.reference(S.x_i().copy());
59  x_a.reference(S.x_a().copy());
60  apriori_cov_scaled.reference(S.apriori_cov_scaled().copy());
61  sa_m1_scaled.reference(S.sa_m1_scaled().copy());
62  sigma_ap.reference(S.sigma_ap().copy());
63  gamma = S.gamma();
64  gamma_last_step_ = S.gamma_last_step();
65  gamma_initial = S.gamma_initial();
66  residual_.reference(S.residual().copy());
67  se.reference(S.se().copy());
68  k.reference(S.k().copy());
69  kt_se_m1_k.reference(S.kt_se_m1_k().copy());
70  dx.reference(S.dx().copy());
71  fstat = S.fstat();
72 }
73 
74 //-----------------------------------------------------------------------
79 //-----------------------------------------------------------------------
80 
81 void ConnorSolver::to_stream(std::ostream& Os) const
82 {
83  Os << "# X_i\n"
84  << x_i << "\n\n"
85  << "# X_a\n"
86  << x_a << "\n\n"
87  << "# apriori_cov_scaled\n"
88  << apriori_cov_scaled << "\n\n"
89  << "# sa_m1_scaled\n"
90  << sa_m1_scaled << "\n\n"
91  << "# sigma_ap\n"
92  << sigma_ap << "\n\n"
93  << "# gamma\n"
94  << gamma << "\n\n"
95  << "# gamma_last_step\n"
96  << gamma_last_step_ << "\n\n"
97  << "# gamma_intial\n"
98  << gamma_initial << "\n\n"
99  << "# residual\n"
100  << residual_ << "\n\n"
101  << "# se\n"
102  << se << "\n\n"
103  << "# k \n"
104  << k << "\n\n"
105  << "#kt_se_m1_k\n"
106  << kt_se_m1_k << "\n\n"
107  << "# dx\n"
108  << dx << "\n\n"
109  << "# Fstat\n";
110  fstat.to_stream(Os);
111  Os << "\n";
112 }
113 
114 //-----------------------------------------------------------------------
116 //-----------------------------------------------------------------------
117 
118 void ConnorSolver::from_stream(std::istream& is)
119 {
120  is >> x_i
121  >> x_a
122  >> apriori_cov_scaled
123  >> sa_m1_scaled
124  >> sigma_ap
125  >> gamma
126  >> gamma_last_step_
127  >> gamma_initial
128  >> residual_
129  >> se
130  >> k
131  >> kt_se_m1_k
132  >> dx
133  >> fstat;
134 }
135 
136 //-----------------------------------------------------------------------
148 //-----------------------------------------------------------------------
149 
150 bool ConnorSolver::solve(const blitz::Array<double, 1>& Initial_guess,
151  const blitz::Array<double, 1>& Apriori,
152  const blitz::Array<double, 2>& Apriori_cov)
153 
154 {
155  using namespace blitz;
156  firstIndex i1; secondIndex i2;
157 
158 // Check for consistent data.
159  if(Initial_guess.rows() != Apriori.rows())
160  throw Exception("Intitial guess and Apriori must be the same size");
161  if(Apriori.rows() != Apriori_cov.rows() ||
162  Apriori.rows() != Apriori_cov.cols())
163  throw Exception("Apriori and Apriori_cov must be the same size");
164 
165 // Initialize variables from input data
166 
167  x_i.reference(Initial_guess.copy());
168  x_a.reference(Apriori.copy());
169  sigma_ap.resize(Apriori_cov.rows());
170  sigma_ap = sqrt(Apriori_cov(i1, i1));
171  gamma = gamma_initial;
172 
173  apriori_cov_scaled.resize(Apriori_cov.shape());
174  apriori_cov_scaled = Apriori_cov(i1, i2) / (sigma_ap(i1) * sigma_ap(i2));
175 
176 // Do Levenberg-Marquardt solution.
177 
178  bool has_converged = false;
179  convergence_check()->initialize_check();
180  FitStatistic fnew;
181  fstat = fnew;
182  Array<double, 1> x_i_last, se_last, residual_last;
183  Array<double, 2> k_last;
184  FitStatistic fstat_last;
185  bool first = true;
186  while(!has_converged) {
187  fstat.number_iteration += 1;
188  cost_function()->cost_function(x_i, residual_, se, k);
189  // To aid in testing, save state if requested.
190  if(first && save_test_data_ != "") {
191  std::ofstream save(save_test_data_.c_str());
192  save.precision(12);
193  to_stream(save);
194  }
195  first = false;
196  do_inversion();
197 
198  bool step_diverged, convergence_failed;
199  gamma_last_step_ = gamma;
200  convergence_check()->convergence_check(fstat_last, fstat,
201  has_converged,
202  convergence_failed,
203  gamma, step_diverged);
204  if(convergence_failed) { // Return failure to converge if check
205  // tells us to.
206  fstat.fit_succeeded = false;
207  notify_update_do(*this);
208  return false;
209  }
210  if(step_diverged) {
211  // Redo last step, but with new gamma calculated by
212  // convergence_test
213  x_i.resize(x_i_last.shape());
214  se.resize(se_last.shape());
215  residual_.resize(residual_last.shape());
216  k.resize(k_last.shape());
217  x_i = x_i_last; se = se_last, residual_ = residual_last, k = k_last;
218  do_inversion();
219  fstat.number_iteration -= 1;
220  fstat.number_divergent += 1;
221  } else {
222  // Stash data, in case we need to backup the next step.
223  x_i_last.resize(x_i.shape());
224  se_last.resize(se.shape());
225  residual_last.resize(residual_.shape());
226  k_last.resize(k.shape());
227  x_i_last = x_i; se_last = se; residual_last = residual_; k_last = k;
228  fstat_last = fstat;
229  }
230 
231  x_i += dx;
232  notify_update_do(*this);
233  }
234  fstat.fit_succeeded = true;
235  convergence_check()->evaluate_quality(fstat, residual_, se);
236  return true;
237 }
238 
239 //-----------------------------------------------------------------------
242 //-----------------------------------------------------------------------
243 
244 void ConnorSolver::test_do_inversion(const std::string& Fname,
245  blitz::Array<double, 1>& Dx,
246  blitz::Array<double, 2>& Kt_se_m1_k)
247 {
248  firstIndex i1; secondIndex i2; thirdIndex i3; fourthIndex i4;
249  IfstreamCs in(Fname);
250  from_stream(in);
251 
252  do_inversion();
253  Dx.reference(dx);
254  Kt_se_m1_k.reference(kt_se_m1_k);
255 }
256 
257 
258 /****************************************************************/
295 {
296  using namespace blitz;
297  firstIndex i1; secondIndex i2; thirdIndex i3;
298 
299 //-----------------------------------------------------------------------
300 // Calculate kt_se_m1_k, which we keep around to use in the error
301 // analysis calculation
302 //-----------------------------------------------------------------------
303 
304  kt_se_m1_k.resize(k.cols(), k.cols());
305  kt_se_m1_k = sum(k(i3,i1) / se(i3) * k(i3, i2), i3);
306 
307 //-----------------------------------------------------------------------
308 // The Jacobian may have a column that is all zero (e.g., it is
309 // ignoring a parameter). In that case we want the entire lhs of the
310 // equation to be zero for that row/col, so we don't update that
311 // parameter. Without this, the least squares solution would tend to
312 // force us to move the parameter to the apriori value.
313 //-----------------------------------------------------------------------
314 
315  Array<double, 1> zero_unused_parm_v = zero_unused_parm();
316 
317 //-----------------------------------------------------------------------
318 // Calculate the left hand side of the equation.
319 //-----------------------------------------------------------------------
320 
321  Array<double, 2> lhs(sigma_ap.rows(), sigma_ap.rows());
322  Array<double, 1> rhs(lhs.rows());
323  sa_m1_scaled.resize(apriori_cov_scaled.shape());
324  Array<bool, 1> zero_unused_flag(zero_unused_parm_v.shape());
325  zero_unused_flag = (zero_unused_parm_v < 1e-8);
326  sa_m1_scaled = generalized_inverse(apriori_cov_scaled, zero_unused_flag);
327  lhs = ( ((1 + gamma) * sa_m1_scaled(i1, i2) +
328  sigma_ap(i1) * kt_se_m1_k(i1, i2) * sigma_ap(i2)) *
329  zero_unused_parm_v(i1) * zero_unused_parm_v(i2));
330 
331 //-----------------------------------------------------------------------
332 // Calculate the right hand side of the equation. Note the "-"
333 // before residual is because the residual is F(x) - y, but we have
334 // y - F(x) in our equation.
335 //-----------------------------------------------------------------------
336 
337  rhs = sigma_ap(i1) * sum(k(i2, i1) * (-residual_(i2)) / se(i2),i2) +
338  sum(sa_m1_scaled(i1, i2) * (x_a(i2) - x_i(i2)) / sigma_ap(i2), i2);
339 
340 //-----------------------------------------------------------------------
341 // Solve equation, and scale back to give the final answer.
342 //-----------------------------------------------------------------------
343 
344  dx.resize(lhs.cols());
345  Array<double, 1> dxscale = solve_least_squares(lhs, rhs, rcond);
346  dx = sigma_ap * dxscale;
347 
348 //-----------------------------------------------------------------------
349 // Calculate the fit statistics.
350 //-----------------------------------------------------------------------
351 
352  fstat.d_sigma_sq = sum(dxscale * rhs);
353  // The jacobian may have degenerate rows, if so we don't want to
354  // count them for the d_sigma_sq calculation, because this is
355  // defined in terms of parameters that actually can vary.
356  int d_sigma_sq_numrow = count(max(abs(kt_se_m1_k), i2) > 1e-20);
357  fstat.d_sigma_sq_scaled = fstat.d_sigma_sq / d_sigma_sq_numrow;
358  fstat.chisq_apriori = sum(((x_a - x_i) / sigma_ap(i1) *
359  sum(sa_m1_scaled(i1, i2) *
360  (x_a(i2) - x_i(i2)) / sigma_ap(i2), i2))
361  * zero_unused_parm_v(i1));
362  fstat.chisq_measured = sum(residual_ * residual_ / se);
363 
364 //-----------------------------------------------------------------------
365 // Forecast what residual and state vector will be in next
366 // iteration, assuming cost function is fully linear.
367 //-----------------------------------------------------------------------
368 
369  Array<double, 1> residual_fc = residual_.copy();
370  residual_fc += sum(k(i1, i2) * dx(i2), i2);
371  Array<double, 1> x_i_fc = x_i.copy();
372  x_i_fc += dx;
373 
374  fstat.chisq_apriori_fc = sum(((x_a - x_i_fc) / sigma_ap(i1) *
375  sum(sa_m1_scaled(i1, i2) *
376  (x_a(i2) - x_i_fc(i2)) / sigma_ap(i2), i2))
377  * zero_unused_parm_v(i1));
378  fstat.chisq_measured_fc = sum(residual_fc * residual_fc / se);
379 }
380 
381 //-----------------------------------------------------------------------
384 //-----------------------------------------------------------------------
385 
387 {
388  firstIndex i1; secondIndex i2;
389  Array<double, 2> t(sa_m1_scaled.shape());
390  Array<double, 1> zero_unused_parm_v = zero_unused_parm();
391  Array<bool, 1> zero_unused_flag(zero_unused_parm_v.shape());
392  zero_unused_flag = (zero_unused_parm_v < 1e-8);
393  t = sa_m1_scaled + sigma_ap(i1) * kt_se_m1_k * sigma_ap(i2);
394  Array<double, 2> res(t.shape());
395  res = generalized_inverse(t, zero_unused_flag);
396  return res;
397 }
398 
399 //-----------------------------------------------------------------------
401 //-----------------------------------------------------------------------
402 
403 Array<double, 2> ConnorSolver::aposteriori_covariance() const
404 {
405  firstIndex i1; secondIndex i2;
406  Array<double, 2> ascale(aposteriori_covariance_scaled());
407  Array<double, 2> res(ascale.shape());
408  res = ascale * sigma_ap(i1) * sigma_ap(i2);
409  return res;
410 }
411 
412 //-----------------------------------------------------------------------
416 //-----------------------------------------------------------------------
417 
418 blitz::Array<double, 1> ConnorSolver::x_solution_zero_unused() const
419 {
420  Array<double, 1> res(x_i * zero_unused_parm());
421  return res;
422 }
423 
424 //-----------------------------------------------------------------------
426 //-----------------------------------------------------------------------
427 
428 Array<double, 2> ConnorSolver::averaging_kernel() const
429 {
430  firstIndex i1; secondIndex i2; thirdIndex i3;
431  Array<double, 2> res(sa_m1_scaled.shape());
432 
433  res = zero_unused_parm()(i1) * sigma_ap(i1) *
434  sum(aposteriori_covariance_scaled()(i1, i3) *
435  sigma_ap(i3) * kt_se_m1_k(i3, i2), i3) * zero_unused_parm()(i2);
436  return res;
437 }
438 
439 //-----------------------------------------------------------------------
442 //-----------------------------------------------------------------------
443 
444 blitz::Array<double, 1> ConnorSolver::x_solution_uncertainty() const
445 {
446  blitz::Array<double, 2> cov = aposteriori_covariance();
447  blitz::Array<double, 1> res(cov.rows());
448  Array<double, 1> zero_unused_parm_v = zero_unused_parm();
449  for(int i = 0; i < res.rows(); ++i)
450  // Due to roundoff, the covariance matrix might be very close to 0
451  // but negative. To avoid nan, just set these to 0
452  if(cov(i, i) > 0)
453  res(i) = sqrt(cov(i, i)) * zero_unused_parm_v(i);
454  else
455  res(i) = 0.0;
456  return res;
457 }
458 
459 //-----------------------------------------------------------------------
461 //-----------------------------------------------------------------------
462 
463 Array<double, 1> ConnorSolver::zero_unused_parm() const
464 {
465  firstIndex i1; secondIndex i2; thirdIndex i3;
466  Array<double, 1> res(sigma_ap.rows());
467  res = where(max(abs(kt_se_m1_k), i2) <= 1e-20, 0, 1);
468  return res;
469 }
470 
471 //-----------------------------------------------------------------------
473 //-----------------------------------------------------------------------
474 
475 Array<double, 2> ConnorSolver::apriori_covariance() const
476 {
477  firstIndex i1; secondIndex i2; thirdIndex i3;
478  Array<double, 2> res(apriori_cov_scaled.shape());
479  res = apriori_cov_scaled * sigma_ap(i1) * sigma_ap(i2);
480  return res;
481 }
blitz::Array< double, 1 > zero_unused_parm() const
Array that is 0 where we are not using a parameter, and 1 elsewhere.
const blitz::Array< double, 1 > & x_i() const
const blitz::Array< double, 2 > & kt_se_m1_k() const
blitz::Array< double, 1 > solve_least_squares(const blitz::Array< double, 2 > &A, const blitz::Array< double, 1 > &B, double Rcond=1e-12)
This solves the least squares system A*x = b, returning the minimum norm solution.
This class holds various parameters describing how good of a fit we have.
virtual bool solve(const blitz::Array< double, 1 > &Initial_guess, const blitz::Array< double, 1 > &Apriori, const blitz::Array< double, 2 > &Apriori_cov)
This solves the least squares problem starting at the initial guess.
boost::shared_ptr< ConnorSolverState > state() const
Save state to a ConnorSolverState object.
const blitz::Array< double, 1 > & sigma_ap() const
const FitStatistic & fstat() const
const blitz::Array< double, 2 > & k() const
Class that saves the state of a ConnorSolver.
const blitz::Array< double, 1 > & residual() const
This is the base of the exception hierarchy for Full Physics code.
Definition: fp_exception.h:16
void test_do_inversion(const std::string &Fname, blitz::Array< double, 1 > &Dx, blitz::Array< double, 2 > &Kt_se_m1_k)
Restore state previously saved, and run do_inversion.
blitz::Array< double, 1 > x_solution_zero_unused() const
Return the solution to the last problem solved.
#define REGISTER_LUA_CLASS(X)
Definition: register_lua.h:116
Apply value function to a blitz array.
virtual blitz::Array< double, 2 > aposteriori_covariance() const
Return a posteriori covariance matrix for last problem solved.
const blitz::Array< double, 2 > & apriori_cov_scaled() const
const blitz::Array< double, 1 > & dx() const
blitz::Array< double, 2 > apriori_covariance() const
Apriori covariance matrix.
blitz::Array< double, 2 > generalized_inverse(const blitz::Array< double, 2 > &A, double Rcond=std::numeric_limits< double >::epsilon())
This returns the generalized inverse of the given matrix.
It can be convenient to allow comments in a text data file that are ignored by the program reading it...
Definition: ifstream_cs.h:17
void to_stream(std::ostream &os) const
Save state of object.
blitz::Array< double, 1 > x_solution_uncertainty() const
Return the uncertainty of x_solution, this is just the sqrt of the diagonal of the full aposteriori_c...
virtual blitz::Array< double, 2 > aposteriori_covariance_scaled() const
Return a scaled a posteriori covariance matrix for last problem solved.
const blitz::Array< double, 1 > & x_a() const
void from_stream(std::istream &is)
Restore state of object previous saved with "to_stream".
Contains classes to abstract away details in various Spurr Radiative Transfer software.
Definition: doxygen_python.h:1
#define REGISTER_LUA_END()
Definition: register_lua.h:134
static double rcond
Factor to determine if we treat a singular factor as 0.
virtual blitz::Array< double, 2 > averaging_kernel() const
Return averaging kernel for last problem solved.
int number_iteration
Number of iterations.
const blitz::Array< double, 2 > & sa_m1_scaled() const
void do_inversion()
This does an inversion step.
const blitz::Array< double, 1 > & se() const

Copyright © 2017, California Institute of Technology.
ALL RIGHTS RESERVED.
U.S. Government Sponsorship acknowledged.
Generated Fri Aug 24 2018 15:44:08