do not allocate xtol_abs unless needed (#268)

* do not allocate xtol_abs unless needed As discussed in #183, it is beneficial to avoid allocating potentially huge buffers of size `n` unless `x`-tolerance criteria are used. * bobyqa: handle xtol_abs == NULL * cdirect: handle xtol_abs == NULL * cdirect/hybrid: handle xtol_abs == NULL * cobyla: handle xtol_abs == NULL * sbplx: handle xtol_abs == NULL * newuoa: handle xtol_abs == NULL * praxis: handle xtol_abs == NULL * optimize.c: handle xtol_abs == NULL

do not allocate xtol_abs unless needed (#268)
* do not allocate xtol_abs unless needed As discussed in #183, it is beneficial to avoid allocating potentially huge buffers of size `n` unless `x`-tolerance criteria are used. * bobyqa: handle xtol_abs == NULL * cdirect: handle xtol_abs == NULL * cdirect/hybrid: handle xtol_abs == NULL * cobyla: handle xtol_abs == NULL * sbplx: handle xtol_abs == NULL * newuoa: handle xtol_abs == NULL * praxis: handle xtol_abs == NULL * optimize.c: handle xtol_abs == NULL
bc5f39e6 · aitap · GitHub · f21d2111 · bc5f39e6 · bc5f39e6
10 changed file
--- a/src/algs/bobyqa/bobyqa.c
+++ b/src/algs/bobyqa/bobyqa.c
@@ -3123,9 +3123,11 @@ nlopt_result bobyqa(int n, int npt, double *x,

    /* SGJ, 2009: compute rhoend from NLopt stop info */
    rhoend = stop->xtol_rel * (rhobeg);
-    for (j = 0; j < n; ++j)
-	 if (rhoend < stop->xtol_abs[j] / fabs(s[j]))
-	      rhoend = stop->xtol_abs[j] / fabs(s[j]);
+    if (stop->xtol_abs) {
+        for (j = 0; j < n; ++j)
+         if (rhoend < stop->xtol_abs[j] / fabs(s[j]))
+              rhoend = stop->xtol_abs[j] / fabs(s[j]);
+    }


 /*     This subroutine seeks the least value of a function of many variables, */

--- a/src/algs/cdirect/cdirect.c
+++ b/src/algs/cdirect/cdirect.c
@@ -383,7 +383,7 @@ static int small(double *w, params *p)
 {
     int i;
     for (i = 0; i < p->n; ++i)
-	  if (w[i] > p->stop->xtol_abs[i] &&
+	  if (w[i] > (p->stop->xtol_abs ? p->stop->xtol_abs[i] : 0) &&
 	      w[i] > (p->ub[i] - p->lb[i]) * p->stop->xtol_rel)
 	       return 0;
     return 1;
@@ -575,21 +575,24 @@ nlopt_result cdirect(int n, nlopt_func f, void *f_data,
 {
     cdirect_uf_data d;
     nlopt_result ret;
-     const double *xtol_abs_save;
+     const double *xtol_abs_save = NULL;
     int i;

     d.f = f; d.f_data = f_data; d.lb = lb; d.ub = ub;
-     d.x = (double *) malloc(sizeof(double) * n*4);
+     d.x = (double *) malloc(sizeof(double) * n * (stop->xtol_abs ? 4 : 3));
     if (!d.x) return NLOPT_OUT_OF_MEMORY;
     
     for (i = 0; i < n; ++i) {
 	  x[i] = (x[i] - lb[i]) / (ub[i] - lb[i]);
 	  d.x[n+i] = 0;
 	  d.x[2*n+i] = 1;
-	  d.x[3*n+i] = stop->xtol_abs[i] / (ub[i] - lb[i]);
     }
-     xtol_abs_save = stop->xtol_abs;
-     stop->xtol_abs = d.x + 3*n;
+     if (stop->xtol_abs) {
+       for (i = 0; i < n; ++i)
+         d.x[3*n+i] = stop->xtol_abs[i] / (ub[i] - lb[i]);
+       xtol_abs_save = stop->xtol_abs;
+       stop->xtol_abs = d.x + 3*n;
+     }
     ret = cdirect_unscaled(n, cdirect_uf, &d, d.x+n, d.x+2*n, x, minf, stop,
 			    magic_eps, which_alg);
     stop->xtol_abs = xtol_abs_save;

--- a/src/algs/cdirect/hybrid.c
+++ b/src/algs/cdirect/hybrid.c
@@ -152,7 +152,7 @@ static nlopt_result divide_largest(params *p)
     /* check xtol */
     for (i = 0; i < n; ++i)
 	  if (w[i] > p->stop->xtol_rel * (ub[i] - lb[i])
-	      && w[i] > p->stop->xtol_abs[i])
+	      && w[i] > (p->stop->xtol_abs ? p->stop->xtol_abs[i] : 0))
 	       break;
     if (i == n) return NLOPT_XTOL_REACHED;

@@ -316,21 +316,24 @@ nlopt_result cdirect_hybrid(int n, nlopt_func f, void *f_data,
 {
     cdirect_uf_data d;
     nlopt_result ret;
-     const double *xtol_abs_save;
+     const double *xtol_abs_save = NULL;
     int i;

     d.f = f; d.f_data = f_data; d.lb = lb; d.ub = ub;
-     d.x = (double *) malloc(sizeof(double) * n*4);
+     d.x = (double *) malloc(sizeof(double) * n * (stop->xtol_abs ? 4 : 3));
     if (!d.x) return NLOPT_OUT_OF_MEMORY;

     for (i = 0; i < n; ++i) {
 	  x[i] = (x[i] - lb[i]) / (ub[i] - lb[i]);
 	  d.x[n+i] = 0;
 	  d.x[2*n+i] = 1;
-	  d.x[3*n+i] = stop->xtol_abs[i] / (ub[i] - lb[i]);
     }
-     xtol_abs_save = stop->xtol_abs;
-     stop->xtol_abs = d.x + 3*n;
+     if (stop->xtol_abs) {
+         for (i = 0; i < n; ++i)
+          d.x[3*n+i] = stop->xtol_abs[i] / (ub[i] - lb[i]);
+         xtol_abs_save = stop->xtol_abs;
+         stop->xtol_abs = d.x + 3*n;
+     }
     ret = cdirect_hybrid_unscaled(n, cdirect_uf, &d, d.x+n, d.x+2*n,
 				   x, minf, stop, local_alg, local_maxeval,
 				   randomized_div);

--- a/src/algs/cobyla/cobyla.c
+++ b/src/algs/cobyla/cobyla.c
@@ -219,9 +219,10 @@ nlopt_result cobyla_minimize(unsigned n, nlopt_func f, void *f_data,
     /* SGJ, 2008: compute rhoend from NLopt stop info */
     rhobeg = fabs(dx[0] / s.scale[0]);
     rhoend = stop->xtol_rel * (rhobeg);
-     for (j = 0; j < n; ++j)
-	  if (rhoend < stop->xtol_abs[j] / fabs(s.scale[j]))
-	       rhoend = stop->xtol_abs[j] / fabs(s.scale[j]);
+     if (stop->xtol_abs)
+      for (j = 0; j < n; ++j)
+	   if (rhoend < stop->xtol_abs[j] / fabs(s.scale[j]))
+	        rhoend = stop->xtol_abs[j] / fabs(s.scale[j]);

     /* each equality constraint gives two inequality constraints */
     m = nlopt_count_constraints(m, fc) + 2 * nlopt_count_constraints(p, h);

--- a/src/algs/neldermead/sbplx.c
+++ b/src/algs/neldermead/sbplx.c
@@ -195,7 +195,7 @@ nlopt_result sbplx_minimize(int n, nlopt_func f, void *f_data,
 		  the step size is too large (in early iterations),
 		  the inner Nelder-Mead may not make much progress */
 	       for (j = 0; j < n; ++j)
-		    if (fabs(xstep[j]) * psi > stop->xtol_abs[j]
+		    if (fabs(xstep[j]) * psi > (stop->xtol_abs ? stop->xtol_abs[j] : 0)
 			&& fabs(xstep[j]) * psi > stop->xtol_rel * fabs(x[j]))
 			 break;
 	       if (j == n) {

--- a/src/algs/newuoa/newuoa.c
+++ b/src/algs/newuoa/newuoa.c
@@ -1626,9 +1626,10 @@ static nlopt_result newuob_(int *n, int *npt, double *x,

 /* SGJ, 2008: compute rhoend from NLopt stop info */
    rhoend = stop->xtol_rel * (*rhobeg);
-    for (j = 0; j < *n; ++j)
-	 if (rhoend < stop->xtol_abs[j])
-	      rhoend = stop->xtol_abs[j];
+    if (stop->xtol_abs)
+     for (j = 0; j < *n; ++j)
+	  if (rhoend < stop->xtol_abs[j])
+	       rhoend = stop->xtol_abs[j];

 /* The arguments N, NPT, X, RHOBEG, RHOEND, IPRINT and MAXFUN are identical */
 /*   to the corresponding arguments in SUBROUTINE NEWUOA. */

--- a/src/algs/praxis/praxis.c
+++ b/src/algs/praxis/praxis.c
@@ -189,7 +189,8 @@ nlopt_result praxis_(double t0, double machep, double h0,
 	 t_old = small + t0;
    else {
 	 t_old = 0;
-	 for (i__ = 0; i__ < n; ++i__)
+	 if (stop->xtol_abs)
+	  for (i__ = 0; i__ < n; ++i__)
 	      if (stop->xtol_abs[i__] > t_old)
 		   t_old = stop->xtol_abs[i__];
 	 t_old += small;

--- a/src/api/optimize.c
+++ b/src/api/optimize.c
@@ -622,7 +622,7 @@ static nlopt_result nlopt_optimize_(nlopt_opt opt, double *x, double *minf)
            }
            if (opt->dx)
                nlopt_set_initial_step(local_opt, opt->dx);
-            for (i = 0; i < n && stop.xtol_abs[i] > 0; ++i);
+            for (i = 0; i < n && stop.xtol_abs && stop.xtol_abs[i] > 0; ++i);
            if (local_opt->ftol_rel <= 0 && local_opt->ftol_abs <= 0 && local_opt->xtol_rel <= 0 && i < n) {
                /* it is not sensible to call MLSL without *some*
                   nonzero tolerance for the local search */

--- a/src/api/options.c
+++ b/src/api/options.c
@@ -116,12 +116,8 @@ nlopt_opt NLOPT_STDCALL nlopt_create(nlopt_algorithm algorithm, unsigned n)
            opt->ub = (double *) calloc(n, sizeof(double));
            if (!opt->ub)
                goto oom;
-            opt->xtol_abs = (double *) calloc(n, sizeof(double));
-            if (!opt->xtol_abs)
-                goto oom;
            nlopt_set_lower_bounds1(opt, -HUGE_VAL);
            nlopt_set_upper_bounds1(opt, +HUGE_VAL);
-            nlopt_set_xtol_abs1(opt, 0.0);
        }
    }

@@ -163,9 +159,11 @@ nlopt_opt NLOPT_STDCALL nlopt_copy(const nlopt_opt opt)
            nopt->ub = (double *) malloc(sizeof(double) * (opt->n));
            if (!opt->ub)
                goto oom;
-            nopt->xtol_abs = (double *) malloc(sizeof(double) * (opt->n));
-            if (!opt->xtol_abs)
-                goto oom;
+            if (opt->xtol_abs) {
+                nopt->xtol_abs = (double *) malloc(sizeof(double) * (opt->n));
+                if (!opt->xtol_abs)
+                    goto oom;
+            }
            if (opt->x_weights) {
                nopt->x_weights = (double *) malloc(sizeof(double) * (opt->n));
                if (!opt->x_weights)
@@ -175,7 +173,9 @@ nlopt_opt NLOPT_STDCALL nlopt_copy(const nlopt_opt opt)

            memcpy(nopt->lb, opt->lb, sizeof(double) * (opt->n));
            memcpy(nopt->ub, opt->ub, sizeof(double) * (opt->n));
-            memcpy(nopt->xtol_abs, opt->xtol_abs, sizeof(double) * (opt->n));
+            if (opt->xtol_abs) {
+                memcpy(nopt->xtol_abs, opt->xtol_abs, sizeof(double) * (opt->n));
+            }
        }

        if (opt->m) {
@@ -691,6 +691,10 @@ GETSET(ftol_rel, double, ftol_rel) GETSET(ftol_abs, double, ftol_abs) GETSET(xto
 {
    if (opt) {
        nlopt_unset_errmsg(opt);
+        if (!opt->xtol_abs && opt->n > 0) {
+            opt->xtol_abs = (double *) calloc(opt->n, sizeof(double));
+            if (!opt->xtol_abs) return NLOPT_OUT_OF_MEMORY;
+        }
        memcpy(opt->xtol_abs, xtol_abs, opt->n * sizeof(double));
        return NLOPT_SUCCESS;
    }
@@ -702,6 +706,10 @@ nlopt_result NLOPT_STDCALL nlopt_set_xtol_abs1(nlopt_opt opt, double xtol_abs)
    if (opt) {
        unsigned i;
        nlopt_unset_errmsg(opt);
+        if (!opt->xtol_abs && opt->n > 0) {
+            opt->xtol_abs = (double *) calloc(opt->n, sizeof(double));
+            if (!opt->xtol_abs) return NLOPT_OUT_OF_MEMORY;
+        }
        for (i = 0; i < opt->n; ++i)
            opt->xtol_abs[i] = xtol_abs;
        return NLOPT_SUCCESS;
@@ -713,7 +721,13 @@ nlopt_result NLOPT_STDCALL nlopt_get_xtol_abs(const nlopt_opt opt, double *xtol_
 {
    nlopt_unset_errmsg(opt);
    if (opt && (opt->n == 0 || xtol_abs)) {
-        memcpy(xtol_abs, opt->xtol_abs, opt->n * sizeof(double));
+        if (opt->xtol_abs) {
+            memcpy(xtol_abs, opt->xtol_abs, sizeof(double) * (opt->n));
+        } else {
+            unsigned i;
+            for (i = 0; i < opt->n; ++i)
+                xtol_abs[i] = 0;
+        }
        return NLOPT_SUCCESS;
    }
    return NLOPT_INVALID_ARGS;

--- a/src/util/stop.c
+++ b/src/util/stop.c
@@ -100,6 +100,7 @@ int nlopt_stop_x(const nlopt_stopping * s, const double *x, const double *oldx)
    unsigned i;
    if (diff_norm(s->n, x, oldx, s->x_weights, NULL, NULL) < s->xtol_rel * vector_norm(s->n, x, s->x_weights, NULL, NULL))
        return 1;
+    if (!s->xtol_abs) return 0;
    for (i = 0; i < s->n; ++i)
        if (fabs(x[i] - oldx[i]) >= s->xtol_abs[i])
            return 0;
@@ -111,6 +112,7 @@ int nlopt_stop_dx(const nlopt_stopping * s, const double *x, const double *dx)
    unsigned i;
    if (vector_norm(s->n, dx, s->x_weights, NULL, NULL) < s->xtol_rel * vector_norm(s->n, x, s->x_weights, NULL, NULL))
        return 1;
+    if (!s->xtol_abs) return 0;
    for (i = 0; i < s->n; ++i)
        if (fabs(dx[i]) >= s->xtol_abs[i])
            return 0;
@@ -124,6 +126,7 @@ int nlopt_stop_xs(const nlopt_stopping * s, const double *xs, const double *oldx
    unsigned i;
    if (diff_norm(s->n, xs, oldxs, s->x_weights, scale_min, scale_max) < s->xtol_rel * vector_norm(s->n, xs, s->x_weights, scale_min, scale_max))
        return 1;
+    if (!s->xtol_abs) return 0;
    for (i = 0; i < s->n; ++i)
        if (fabs(sc(xs[i], scale_min[i], scale_max[i]) - sc(oldxs[i], scale_min[i], scale_max[i])) >= s->xtol_abs[i])
            return 0;