提交 8b7d0b51 编写于 作者: sahduashufa's avatar sahduashufa

add autodiff

上级 dedb2de4
CXX = g++
BIN = bin
LIB = lib
OBJS_MAIN = root/obj/dual.o root/obj/gradient.o root/obj/main.o
all : main
main : $(BIN)
$(MAKE) -C root obj obj/dual.o obj/gradient.o obj/main.o
$(CXX) -o $(BIN)/main $(OBJS_MAIN) $(LIBS)
$(BIN) :
if [ ! -d $(BIN) ]; then mkdir $(BIN); fi
$(LIB) :
if [ ! -d $(LIB) ]; then mkdir $(LIB); fi
clean :
$(MAKE) -C root clean
if [ -d $(BIN) ]; then rm $(BIN) -r; fi
install : $(LIB)
ar rcs $(LIB)/libgradient.a root/obj/dual.o root/obj/gradient.o
sudo mv $(LIB)/libgradient.a /usr/local/lib/mylibs
sudo ln -sf /usr/local/lib/mylibs/libgradient.a /usr/local/lib/libgradient.a
sudo cp root/include/gradient.h /usr/local/include/mylibs
.PHONY : all
.PHONY : install
.PHONY : main
.PHONY : clean
# Automatic Differentiation
Very simple automatic differentiation tool, implemented using dual numbers and operator overloading.
This code is part of my article on **Medium** : **[Automatic Differentiation](https://medium.com/@omaraflak/automatic-differentiation-4d26d03b7508)**.
# Example
```c++
// Dual(value, derivative=0)
Dual x(5, 1); // derivative=1 means we are going to derive with respect to this variable.
Dual y(6);
Dual f = pow(x,2)*y; // the derivative is calculated when the function is computed.
std::cout << f.getDerivative() << std::endl; // get the derivative of y*x^2 with respect to x, evaluated at (x=5,y=6).
```
CXX = g++
ODIR = obj
CXXFLAGS = -std=c++14
OBJS = $(ODIR)/gradient.o $(ODIR)/dual.o $(ODIR)/main.o
all : $(ODIR) $(OBJS)
$(ODIR)/gradient.o : src/gradient.cpp include/gradient.h
$(CXX) -c $< -o $@ $(CXXFLAGS)
$(ODIR)/dual.o : src/dual.cpp include/dual.h
$(CXX) -c $< -o $@ $(CXXFLAGS)
$(ODIR)/main.o : src/main.cpp include/dual.h include/gradient.h
$(CXX) -c $< -o $@ $(CXXFLAGS)
$(ODIR) :
if [ ! -d $(ODIR) ]; then mkdir $(ODIR); fi
clean :
if [ -d $(ODIR) ]; then rm $(ODIR) -r; fi
.PHONY : all
.PHONY : clean
#ifndef DUAL
#define DUAL
#include <iostream>
#include <cmath>
class Dual {
private:
double val;
double der;
public:
Dual();
Dual(double val);
Dual(double val, double der);
void setValue(double value);
void setDerivative(double derivative);
double getValue() const;
double getDerivative() const;
// operators
friend Dual operator+(const Dual& u, const Dual& v);
friend Dual operator-(const Dual& u, const Dual& v);
friend Dual operator*(const Dual& u, const Dual& v);
friend Dual operator/(const Dual& u, const Dual& v);
Dual operator+=(const Dual& u);
Dual operator-=(const Dual& u);
Dual operator*=(const Dual& u);
Dual operator/=(const Dual& u);
friend std::ostream& operator<<(std::ostream& os, const Dual& a);
// maths
friend Dual sin(Dual d);
friend Dual cos(Dual d);
friend Dual exp(Dual d);
friend Dual log(Dual d);
friend Dual abs(Dual d);
friend Dual pow(Dual d, double p);
};
#endif
#ifndef GRADIENT
#define GRADIENT
#include "dual.h"
#include <vector>
namespace dual{
namespace{
typedef std::vector<Dual> Vector;
typedef std::vector<Vector> Matrix;
}
void resetDerivatives(Vector& input);
Vector gradient(Dual (*f)(Vector&), Vector& input);
Matrix jacobian(Vector (*f)(Vector&), Vector& input);
}
#endif
#include "../include/dual.h"
Dual::Dual(){
this->val = 0;
this->der = 0;
}
Dual::Dual(double val){
this->val = val;
this->der = 0;
}
Dual::Dual(double val, double der){
this->val = val;
this->der = der;
}
void Dual::setValue(double value){
this->val = value;
}
void Dual::setDerivative(double derivative){
this->der = derivative;
}
double Dual::getValue() const{
return val;
}
double Dual::getDerivative() const{
return der;
}
// operators
Dual operator+(const Dual& u, const Dual& v){
return Dual(u.val+v.val, u.der+v.der);
}
Dual operator-(const Dual& u, const Dual& v){
return Dual(u.val-v.val, u.der-v.der);
}
Dual operator*(const Dual& u, const Dual& v){
return Dual(u.val*v.val, u.der*v.val+u.val*v.der);
}
Dual operator/(const Dual& u, const Dual& v){
return Dual(u.val/v.val, (u.der*v.val-u.val*v.der)/(v.val*v.val));
}
Dual Dual::operator+=(const Dual& u){
*this = *this+u;
return *this;
}
Dual Dual::operator-=(const Dual& u){
*this = *this-u;
return *this;
}
Dual Dual::operator*=(const Dual& u){
*this = *this*u;
return *this;
}
Dual Dual::operator/=(const Dual& u){
*this = *this/u;
return *this;
}
std::ostream& operator<<(std::ostream& os, const Dual& a){
os << a.val;
return os;
}
// maths
Dual sin(Dual d){
return Dual(::sin(d.val), d.der*::cos(d.val));
}
Dual cos(Dual d){
return Dual(::cos(d.val), -d.der*::sin(d.val));
}
Dual exp(Dual d){
return Dual(::exp(d.val), d.der*::exp(d.val));
}
Dual log(Dual d){
return Dual(::log(d.val), d.der/d.val);
}
Dual abs(Dual d){
int sign = d.val==0 ? 0 : d.val/::abs(d.val);
return Dual(::abs(d.val), d.der*sign);
}
Dual pow(Dual d, double p){
return Dual(::pow(d.val, p), p*d.der*::pow(d.val, p-1));
}
#include "../include/gradient.h"
void dual::resetDerivatives(dual::Vector& input){
for(auto& e : input){
e.setDerivative(0);
}
}
dual::Vector dual::gradient(Dual (*f)(dual::Vector&), dual::Vector& input){
resetDerivatives(input);
dual::Vector grad(input.size());
for(size_t i=0 ; i<grad.size() ; i++){
input[i].setDerivative(1);
grad[i] = f(input).getDerivative();
input[i].setDerivative(0);
}
return grad;
}
dual::Matrix dual::jacobian(dual::Vector (*f)(dual::Vector&), dual::Vector& input){
resetDerivatives(input);
dual::Matrix jac(f(input).size(), dual::Vector(input.size()));
for(size_t i=0 ; i<jac.size() ; i++){
for(size_t j=0 ; j<jac[i].size() ; j++){
input[j].setDerivative(1);
jac[i][j] = f(input)[i].getDerivative();
input[j].setDerivative(0);
}
}
return jac;
}
#include <iostream>
#include "../include/dual.h"
#include "../include/gradient.h"
typedef std::vector<Dual> Vector;
std::ostream& operator<<(std::ostream& os, const Vector& v){
os << "[";
for(size_t i=0 ; i<v.size() ; i++){
os << v[i] << (i<v.size()-1?", ":"");
}
os << "]";
return os;
}
Dual function(Vector& x){
return x[0]*x[0]+x[1]*x[1]; // f(x1,x2) = x1^2 + x2^2
}
int main() {
Vector x = {50, 70}; // random default values
// hyperparameters
int epochs = 30;
float learningRate = 0.3;
// gradient descent
for(size_t i=0 ; i<epochs ; i++){
Vector gradient = dual::gradient(function, x);
for(size_t j=0 ; j<x.size() ; j++){
x[j] -= learningRate*gradient[j];
}
}
// check new values
std::cout << "x = " << x << std::endl;
std::cout << "f(x) = " << function(x) << std::endl;
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册