Algorithms_in_C++  1.0.0
Set of algorithms implemented in C++.
kohonen_som_trace.cpp File Reference

Kohonen self organizing map (data tracing) More...

#include <algorithm>
#include <array>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <iostream>
#include <valarray>
#include <vector>
Include dependency graph for kohonen_som_trace.cpp:

Namespaces

 machine_learning
 Machine learning algorithms.
 

Functions

double _random (double a, double b)
 
int save_nd_data (const char *fname, const std::vector< std::valarray< double >> &X)
 
void machine_learning::update_weights (const std::valarray< double > &x, std::vector< std::valarray< double >> *W, std::valarray< double > *D, double alpha, int R)
 
void machine_learning::kohonen_som_tracer (const std::vector< std::valarray< double >> &X, std::vector< std::valarray< double >> *W, double alpha_min)
 
void test_circle (std::vector< std::valarray< double >> *data)
 
void test1 ()
 
void test_lamniscate (std::vector< std::valarray< double >> *data)
 
void test2 ()
 
void test_3d_classes (std::vector< std::valarray< double >> *data)
 
void test3 ()
 
double get_clock_diff (clock_t start_t, clock_t end_t)
 
int main (int argc, char **argv)
 

Detailed Description

Kohonen self organizing map (data tracing)

This example implements a powerful self organizing map algorithm. The algorithm creates a connected network of weights that closely follows the given data points. This this creates a chain of nodes that resembles the given input shape.

Author
Krishna Vedala
Note
This C++ version of the program is considerable slower than its C counterpart
The compiled code is much slower when compiled with MS Visual C++ 2019 than with GCC on windows
See also
kohonen_som_topology.cpp

Function Documentation

◆ get_clock_diff()

double get_clock_diff ( clock_t  start_t,
clock_t  end_t 
)

Convert clock cycle difference to time in seconds

Parameters
[in]start_tstart clock
[in]end_tend clock
Returns
time difference in seconds
452  {
453  return static_cast<double>(end_t - start_t) / CLOCKS_PER_SEC;
454 }

◆ main()

int main ( int  argc,
char **  argv 
)

Main function

457  {
458 #ifdef _OPENMP
459  std::cout << "Using OpenMP based parallelization\n";
460 #else
461  std::cout << "NOT using OpenMP based parallelization\n";
462 #endif
463 
464  std::srand(std::time(nullptr));
465 
466  std::clock_t start_clk = std::clock();
467  test1();
468  auto end_clk = std::clock();
469  std::cout << "Test 1 completed in " << get_clock_diff(start_clk, end_clk)
470  << " sec\n";
471 
472  start_clk = std::clock();
473  test2();
474  end_clk = std::clock();
475  std::cout << "Test 2 completed in " << get_clock_diff(start_clk, end_clk)
476  << " sec\n";
477 
478  start_clk = std::clock();
479  test3();
480  end_clk = std::clock();
481  std::cout << "Test 3 completed in " << get_clock_diff(start_clk, end_clk)
482  << " sec\n";
483 
484  std::cout
485  << "(Note: Calculated times include: creating test sets, training "
486  "model and writing files to disk.)\n\n";
487  return 0;
488 }
Here is the call graph for this function:

◆ test1()

void test1 ( )

Test that creates a random set of points distributed near the circumference of a circle and trains an SOM that finds that circular pattern. The following CSV files are created to validate the execution:

  • test1.csv: random test samples points with a circular pattern
  • w11.csv: initial random map
  • w12.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test1.csv" title "original", \
"w11.csv" title "w1", \
"w12.csv" title "w2"

Sample execution
output

233  {
234  int j = 0, N = 500;
235  int features = 2;
236  int num_out = 50;
239  for (int i = 0; i < std::max(num_out, N); i++) {
240  // loop till max(N, num_out)
241  if (i < N) { // only add new arrays if i < N
242  X[i] = std::valarray<double>(features);
243  }
244  if (i < num_out) { // only add new arrays if i < num_out
245  W[i] = std::valarray<double>(features);
246 
247 #ifdef _OPENMP
248 #pragma omp for
249 #endif
250  for (j = 0; j < features; j++) {
251  // preallocate with random initial weights
252  W[i][j] = _random(-1, 1);
253  }
254  }
255  }
256 
257  test_circle(&X); // create test data around circumference of a circle
258  save_nd_data("test1.csv", X); // save test data points
259  save_nd_data("w11.csv", W); // save initial random weights
260  kohonen_som_tracer(X, &W, 0.1); // train the SOM
261  save_nd_data("w12.csv", W); // save the resultant weights
262 }
Here is the call graph for this function:

◆ test2()

void test2 ( )

Test that creates a random set of points distributed near the locus of the Lamniscate of Gerono and trains an SOM that finds that circular pattern. The following CSV files are created to validate the execution:

  • test2.csv: random test samples points with a lamniscate pattern
  • w21.csv: initial random map
  • w22.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test2.csv" title "original", \
"w21.csv" title "w1", \
"w22.csv" title "w2"

Sample execution
output

315  {
316  int j = 0, N = 500;
317  int features = 2;
318  int num_out = 20;
321  for (int i = 0; i < std::max(num_out, N); i++) {
322  // loop till max(N, num_out)
323  if (i < N) { // only add new arrays if i < N
324  X[i] = std::valarray<double>(features);
325  }
326  if (i < num_out) { // only add new arrays if i < num_out
327  W[i] = std::valarray<double>(features);
328 
329 #ifdef _OPENMP
330 #pragma omp for
331 #endif
332  for (j = 0; j < features; j++) {
333  // preallocate with random initial weights
334  W[i][j] = _random(-1, 1);
335  }
336  }
337  }
338 
339  test_lamniscate(&X); // create test data around the lamniscate
340  save_nd_data("test2.csv", X); // save test data points
341  save_nd_data("w21.csv", W); // save initial random weights
342  kohonen_som_tracer(X, &W, 0.01); // train the SOM
343  save_nd_data("w22.csv", W); // save the resultant weights
344 }
Here is the call graph for this function:

◆ test3()

void test3 ( )

Test that creates a random set of points distributed in six clusters in 3D space. The following CSV files are created to validate the execution:

  • test3.csv: random test samples points with a circular pattern
  • w31.csv: initial random map
  • w32.csv: trained SOM map

The outputs can be readily plotted in gnuplot using the following snippet

set datafile separator ','
plot "test3.csv" title "original", \
"w31.csv" title "w1", \
"w32.csv" title "w2"

Sample execution
output

414  {
415  int j = 0, N = 200;
416  int features = 3;
417  int num_out = 20;
420  for (int i = 0; i < std::max(num_out, N); i++) {
421  // loop till max(N, num_out)
422  if (i < N) { // only add new arrays if i < N
423  X[i] = std::valarray<double>(features);
424  }
425  if (i < num_out) { // only add new arrays if i < num_out
426  W[i] = std::valarray<double>(features);
427 
428 #ifdef _OPENMP
429 #pragma omp for
430 #endif
431  for (j = 0; j < features; j++) {
432  // preallocate with random initial weights
433  W[i][j] = _random(-1, 1);
434  }
435  }
436  }
437 
438  test_3d_classes(&X); // create test data around the lamniscate
439  save_nd_data("test3.csv", X); // save test data points
440  save_nd_data("w31.csv", W); // save initial random weights
441  kohonen_som_tracer(X, &W, 0.01); // train the SOM
442  save_nd_data("w32.csv", W); // save the resultant weights
443 }
Here is the call graph for this function:

◆ test_3d_classes()

void test_3d_classes ( std::vector< std::valarray< double >> *  data)

Creates a random set of points distributed in six clusters in 3D space with centroids at the points

  • \({0.5, 0.5, 0.5}\)
  • \({0.5, 0.5, -0.5}\)
  • \({0.5, -0.5, 0.5}\)
  • \({0.5, -0.5, -0.5}\)
  • \({-0.5, 0.5, 0.5}\)
  • \({-0.5, 0.5, -0.5}\)
  • \({-0.5, -0.5, 0.5}\)
  • \({-0.5, -0.5, -0.5}\)
Parameters
[out]datamatrix to store data in
359  {
360  const int N = data->size();
361  const double R = 0.1; // radius of cluster
362  int i = 0;
363  const int num_classes = 8;
364  const std::array<const std::array<double, 3>, num_classes> centres = {
365  // centres of each class cluster
366  std::array<double, 3>({.5, .5, .5}), // centre of class 0
367  std::array<double, 3>({.5, .5, -.5}), // centre of class 1
368  std::array<double, 3>({.5, -.5, .5}), // centre of class 2
369  std::array<double, 3>({.5, -.5, -.5}), // centre of class 3
370  std::array<double, 3>({-.5, .5, .5}), // centre of class 4
371  std::array<double, 3>({-.5, .5, -.5}), // centre of class 5
372  std::array<double, 3>({-.5, -.5, .5}), // centre of class 6
373  std::array<double, 3>({-.5, -.5, -.5}) // centre of class 7
374  };
375 
376 #ifdef _OPENMP
377 #pragma omp for
378 #endif
379  for (i = 0; i < N; i++) {
380  int cls =
381  std::rand() % num_classes; // select a random class for the point
382 
383  // create random coordinates (x,y,z) around the centre of the class
384  data[0][i][0] = _random(centres[cls][0] - R, centres[cls][0] + R);
385  data[0][i][1] = _random(centres[cls][1] - R, centres[cls][1] + R);
386  data[0][i][2] = _random(centres[cls][2] - R, centres[cls][2] + R);
387 
388  /* The follosing can also be used
389  for (int j = 0; j < 3; j++)
390  data[0][i][j] = _random(centres[cls][j] - R, centres[cls][j] + R);
391  */
392  }
393 }
Here is the call graph for this function:

◆ test_circle()

void test_circle ( std::vector< std::valarray< double >> *  data)

Creates a random set of points distributed near the circumference of a circle and trains an SOM that finds that circular pattern. The generating function is

\begin{eqnarray*} r &\in& [1-\delta r, 1+\delta r)\\ \theta &\in& [0, 2\pi)\\ x &=& r\cos\theta\\ y &=& r\sin\theta \end{eqnarray*}

Parameters
[out]datamatrix to store data in
196  {
197  const int N = data->size();
198  const double R = 0.75, dr = 0.3;
199  double a_t = 0., b_t = 2.f * M_PI; // theta random between 0 and 2*pi
200  double a_r = R - dr, b_r = R + dr; // radius random between R-dr and R+dr
201  int i = 0;
202 
203 #ifdef _OPENMP
204 #pragma omp for
205 #endif
206  for (i = 0; i < N; i++) {
207  double r = _random(a_r, b_r); // random radius
208  double theta = _random(a_t, b_t); // random theta
209  data[0][i][0] = r * cos(theta); // convert from polar to cartesian
210  data[0][i][1] = r * sin(theta);
211  }
212 }
Here is the call graph for this function:

◆ test_lamniscate()

void test_lamniscate ( std::vector< std::valarray< double >> *  data)

Creates a random set of points distributed near the locus of the Lamniscate of Gerono.

\begin{eqnarray*} \delta r &=& 0.2\\ \delta x &\in& [-\delta r, \delta r)\\ \delta y &\in& [-\delta r, \delta r)\\ \theta &\in& [0, \pi)\\ x &=& \delta x + \cos\theta\\ y &=& \delta y + \frac{\sin(2\theta)}{2} \end{eqnarray*}

Parameters
[out]datamatrix to store data in
277  {
278  const int N = data->size();
279  const double dr = 0.2;
280  int i = 0;
281 
282 #ifdef _OPENMP
283 #pragma omp for
284 #endif
285  for (i = 0; i < N; i++) {
286  double dx = _random(-dr, dr); // random change in x
287  double dy = _random(-dr, dr); // random change in y
288  double theta = _random(0, M_PI); // random theta
289  data[0][i][0] = dx + cos(theta); // convert from polar to cartesian
290  data[0][i][1] = dy + sin(2. * theta) / 2.f;
291  }
292 }
Here is the call graph for this function:
std::srand
T srand(T... args)
std::clock_t
std::cos
T cos(T... args)
std::vector
STL class.
test_3d_classes
void test_3d_classes(std::vector< std::valarray< double >> *data)
Definition: kohonen_som_trace.cpp:359
save_nd_data
int save_nd_data(const char *fname, const std::vector< std::valarray< double >> &X)
Definition: kohonen_som_trace.cpp:58
get_clock_diff
double get_clock_diff(clock_t start_t, clock_t end_t)
Definition: kohonen_som_trace.cpp:452
test1
void test1()
Definition: kohonen_som_trace.cpp:233
std::clock
T clock(T... args)
std::cout
std::array
STL class.
std::valarray< double >
test2
void test2()
Definition: kohonen_som_trace.cpp:315
std::rand
T rand(T... args)
test_lamniscate
void test_lamniscate(std::vector< std::valarray< double >> *data)
Definition: kohonen_som_trace.cpp:277
std::sin
T sin(T... args)
data
int data[MAX]
test data
Definition: hash_search.cpp:24
test3
void test3()
Definition: kohonen_som_trace.cpp:414
std::time
T time(T... args)
std::max
T max(T... args)
_random
double _random(double a, double b)
Definition: kohonen_som_topology.cpp:53
machine_learning::kohonen_som_tracer
void kohonen_som_tracer(const std::vector< std::valarray< double >> &X, std::vector< std::valarray< double >> *W, double alpha_min)
Definition: kohonen_som_trace.cpp:149
test_circle
void test_circle(std::vector< std::valarray< double >> *data)
Definition: kohonen_som_trace.cpp:196