From 3eb7f5e4dbd1835e18207d2aa5b55057bfd1e54a Mon Sep 17 00:00:00 2001 From: HypoX64 Date: Sun, 30 Aug 2020 21:48:52 +0800 Subject: [PATCH] Better Data Augmentation --- README.md | 84 +++--- README_CN.md | 74 +++-- data/__init__.py | 0 data/augmenter.py | 123 ++++++++- {util => data}/dataloader.py | 13 +- data/noise.py | 168 +++++++++++ {util => data}/statistics.py | 6 +- data/surrogates.py | 321 ++++++++++++++++++++++ util/transformer.py => data/transforms.py | 82 +----- examples/show_example.npy | Bin 0 -> 16128 bytes models/core.py | 16 +- models/net_1d/lstm.py | 4 +- simple_test.py | 9 +- tools/server.py | 4 +- train.py | 10 +- util/array_operation.py | 11 +- util/options.py | 20 +- util/plot.py | 2 +- 18 files changed, 779 insertions(+), 168 deletions(-) create mode 100644 data/__init__.py rename {util => data}/dataloader.py (94%) create mode 100644 data/noise.py rename {util => data}/statistics.py (97%) create mode 100644 data/surrogates.py rename util/transformer.py => data/transforms.py (50%) create mode 100644 examples/show_example.npy diff --git a/README.md b/README.md index 7a40454..190702f 100644 --- a/README.md +++ b/README.md @@ -6,25 +6,69 @@ | English | [中文版](./README_CN.md) |

A time series signal analysis and classification framework.
-It contain multiple network and provide data preprocessing, reading, training, evaluation, testing and other functions.
+It contain multiple network and provide data preprocessing, data augmentation, training, evaluation, testing and other functions.
Some output examples: [heatmap](./image/heatmap_eg.png) [running_loss](./image/running_loss_eg.png) [log.txt](./docs/log_eg.txt)
-Supported network:
+## Feature +### Data preprocessing +General signal preprocessing method. +* Normliaze +5_95 | maxmin | None +* filter +fft | fir | iir | wavelet | None + +### Data augmentation +Various data augmentation method.
[[Time Series Data Augmentation for Deep Learning: A Survey]](https://arxiv.org/pdf/2002.12478.pdf) +* Base +scale, warp, app, aaft, iaaft, filp, crop +* Noise +spike, step, slope, white, pink, blue, brown, violet +* Gan +dcgan + +### Network +Various networks for evaluation. >1d > ->>lstm, cnn_1d, resnet18_1d, resnet34_1d, multi_scale_resnet_1d, micro_multi_scale_resnet_1d - +>>lstm, cnn_1d, resnet18_1d, resnet34_1d, multi_scale_resnet_1d, micro_multi_scale_resnet_1d,autoencoder,mlp >2d(stft spectrum) > >>mobilenet, resnet18, resnet50, resnet101, densenet121, densenet201, squeezenet, dfcnn, multi_scale_resnet, +### K-fold +Use k-fold to make the results more reliable. +```--k_fold```&```--fold_index```
+ +* --k_fold +```python +# fold_num of k-fold. If 0 or 1, no k-fold and cut 80% to train and other to eval. +``` +* --fold_index +```python +"""--fold_index +When --k_fold != 0 or 1: +Cut dataset into sub-set using index , and then run k-fold with sub-set +If input 'auto', it will shuffle dataset and then cut dataset equally +If input: [2,4,6,7] +when len(dataset) == 10 +sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:] +------- +When --k_fold == 0 or 1: +If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval +If input: [5] +when len(dataset) == 10 +train-set : dataset[0:5] eval-set : dataset[5:] +""" +``` + ## A example: Use EEG to classify sleep stage [sleep-edfx](https://github.com/HypoX64/candock/tree/f24cc44933f494d2235b3bf965a04cde5e6a1ae9)
Thank [@swalltail99](https://github.com/swalltail99)for the bug. In other to load sleep-edfx dataset,please install mne==0.18.0
```bash pip install mne==0.18.0 ``` + ## Getting Started ### Prerequisites - Linux, Windows,mac @@ -32,11 +76,11 @@ pip install mne==0.18.0 - Python 3 - Pytroch 1.0+ ### Dependencies -This code depends on torchvision, numpy, scipy , matplotlib, available via pip install.
+This code depends on torchvision, numpy, scipy, pywt, matplotlib, available via pip install.
For example:
```bash -pip3 install matplotlib +pip install matplotlib ``` ### Clone this repo: ```bash @@ -64,7 +108,7 @@ python3 simple_test.py --label 50 --input_nc 1 --model_name micro_multi_scale_re ## Training with your own dataset * step1: Generate signals.npy and labels.npy in the following format. ```python -#1.type:numpydata signals:np.float64 labels:np.int64 +#1.type:numpydata signals:np.float32 labels:np.int64 #2.shape signals:[num,ch,length] labels:[num] #num:samples_num, ch :channel_num, length:length of each sample #for example: @@ -73,28 +117,4 @@ labels = np.array([0,0,0,0,0,1,1,1,1,1]) #0->class0 1->class1 ``` * step2: input ```--dataset_dir "your_dataset_dir"``` when running code. -### About k-fold -```--k_fold```&```--fold_index```
- -* k_fold -```python -# fold_num of k-fold. If 0 or 1, no k-fold and cut 0.8 to train and other to eval. -``` -* fold_index -```python - """--fold_index - 5-fold: - Cut dataset into sub-set using index , and then run k-fold with sub-set - If input 'auto', it will shuffle dataset and then cut dataset equally - If input: [2,4,6,7] - when len(dataset) == 10 - sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:] - --------------------------------------------------------------- - No-fold: - If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval - If input: [5] - when len(dataset) == 10 - train-set : dataset[0:5] eval-set : dataset[5:] - """ -``` -### [ More options](./util/options.py). \ No newline at end of file +### [ More training options](./util/options.py). \ No newline at end of file diff --git a/README_CN.md b/README_CN.md index 2e0ee9e..234a74c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -6,18 +6,61 @@ | [English](./README.md) | 中文版 |

一个通用的一维时序信号分析,分类框架.
-它将包含多种网络结构,并提供数据预处理,读取,训练,评估,测试等功能.
+它将包含多种网络结构,并提供数据预处理,数据增强,训练,评估,测试等功能.
一些训练时的输出样例: [heatmap](./image/heatmap_eg.png) [running_loss](./image/running_loss_eg.png) [log.txt](./docs/log_eg.txt)
-目前支持的网络结构:
+ +## 支持的功能 +### 数据预处理 +通用的数据预处理方法 +* Normliaze +5_95 | maxmin | None +* filter +fft | fir | iir | wavelet | None + +### 数据增强 +多种多样的数据增强方法.注意:使用时应该结合数据的物理特性进行选择.
[[Time Series Data Augmentation for Deep Learning: A Survey]](https://arxiv.org/pdf/2002.12478.pdf) +* Base +scale, warp, app, aaft, iaaft, filp, crop +* Noise +spike, step, slope, white, pink, blue, brown, violet +* Gan +dcgan + +### 网络 +提供多种用于评估的网络. >1d > ->>lstm, cnn_1d, resnet18_1d, resnet34_1d, multi_scale_resnet_1d, micro_multi_scale_resnet_1d - +>>lstm, cnn_1d, resnet18_1d, resnet34_1d, multi_scale_resnet_1d, micro_multi_scale_resnet_1d,autoencoder,mlp >2d(stft spectrum) > >>mobilenet, resnet18, resnet50, resnet101, densenet121, densenet201, squeezenet, dfcnn, multi_scale_resnet, +### K-fold +使用K-fold使得结果更加可靠. +```--k_fold```&```--fold_index```
+ +* --k_fold +```python +# fold_num of k-fold. If 0 or 1, no k-fold and cut 80% to train and other to eval. +``` +* --fold_index +```python +"""--fold_index +When --k_fold != 0 or 1: +Cut dataset into sub-set using index , and then run k-fold with sub-set +If input 'auto', it will shuffle dataset and then cut dataset equally +If input: [2,4,6,7] +when len(dataset) == 10 +sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:] +------- +When --k_fold == 0 or 1: +If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval +If input: [5] +when len(dataset) == 10 +train-set : dataset[0:5] eval-set : dataset[5:] +""" +``` ## 关于EEG睡眠分期数据的实例 为了适应新的项目,代码已被大幅更改,不能正常运行如sleep-edfx等睡眠数据集,如果仍然需要运行,请参照下文按照输入格式标准自行加载数据,如果有时间我会修复这个问题。 当然,如果需要加载睡眠数据集也可以直接使用[老的版本](https://github.com/HypoX64/candock/tree/f24cc44933f494d2235b3bf965a04cde5e6a1ae9)
@@ -75,27 +118,4 @@ labels = np.array([0,0,0,0,0,1,1,1,1,1]) #0->class0 1->class1 ``` * step2: 输入 ```--dataset_dir "your_dataset_dir"``` 当运行代码的时候. -### 关于 k-fold -```--k_fold```&```--fold_index```
-* k_fold -```python -# fold_num of k-fold. If 0 or 1, no k-fold and cut 0.8 to train and other to eval -``` -* fold_index -```python - """--fold_index - 5-fold: - Cut dataset into sub-set using index , and then run k-fold with sub-set - If input 'auto', it will shuffle dataset and then cut dataset equally - If input: [2,4,6,7] - when len(dataset) == 10 - sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:] - --------------------------------------------------------------- - No-fold: - If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval - If input: [5] - when len(dataset) == 10 - train-set : dataset[0:5] eval-set : dataset[5:] - """ -``` ### [ More options](./util/options.py). \ No newline at end of file diff --git a/data/__init__.py b/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/augmenter.py b/data/augmenter.py index 7bc9664..dfbff52 100644 --- a/data/augmenter.py +++ b/data/augmenter.py @@ -2,24 +2,29 @@ import os import time import numpy as np +import scipy.signal +import scipy.fftpack as fftpack +import pywt + import torch from torch import nn, optim from multiprocessing import Process, Queue import matplotlib -matplotlib.use('Agg') import matplotlib.pyplot as plt -# import torch.multiprocessing as mp import warnings warnings.filterwarnings("ignore") import sys sys.path.append("..") -from util import util,transformer,dataloader,statistics,plot,options +from util import util,plot,options,dsp +from util import array_operation as arr +from . import transforms,dataloader,statistics,surrogates,noise + from models.net_1d.gan import Generator,Discriminator,GANloss,weights_init_normal from models.core import show_paramsnumber -def gan(opt,signals,labels): +def dcgan(opt,signals,labels): print('Augment dataset using gan...') if opt.gpu_id != -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu_id) @@ -118,9 +123,113 @@ def gan(opt,signals,labels): # return signals,labels return out_signals,out_labels - -def base(opt,signals,labels): - pass +def base1d(opt,data,test_flag): + """ + data : batchsize,ch,length + """ + batchsize,ch,length = data.shape + random_list = np.random.rand(15) + threshold = 1/(len(opt.augment)+1) + + if test_flag: + move = int((length-opt.finesize)*0.5) + result = data[:,:,move:move+opt.finesize] + else: + result = np.zeros((batchsize,ch,opt.finesize)) + + for i in range(batchsize): + for j in range(ch): + signal = data[i][j] + _length = length + # Time Domain + if 'scale' in opt.augment and random_list[0]>threshold: + beta = np.clip(np.random.normal(1, 0.1),0.8,1.2) + signal = arr.interp(signal, int(_length*beta)) + _length = signal.shape[0] + + + if 'warp' in opt.augment and random_list[1]>threshold: + pos = np.sort(np.random.randint(0, _length, 2)) + if pos[1]-pos[0]>10: + beta = np.clip(np.random.normal(1, 0.1),0.8,1.2) + signal = np.concatenate((signal[:pos[0]], arr.interp(signal[pos[0]:pos[1]], int((pos[1]-pos[0])*beta)) , signal[pos[1]:])) + _length = signal.shape[0] + + # Noise + if 'spike' in opt.augment and random_list[2]>threshold: + std = np.std(signal) + spike_indexs = np.random.randint(0, _length, int(_length*np.clip(np.random.uniform(0,0.05),0,1))) + for index in spike_indexs: + signal[index] = signal[index] + std*np.random.randn()*opt.augment_noise_lambda + + if 'step' in opt.augment and random_list[3]>threshold: + std = np.std(signal) + step_indexs = np.random.randint(0, _length, int(_length*np.clip(np.random.uniform(0,0.01),0,1))) + for index in step_indexs: + signal[index:] = signal[index:] + std*np.random.randn()*opt.augment_noise_lambda + + if 'slope' in opt.augment and random_list[4]>threshold: + slope = np.linspace(-1, 1, _length)*np.random.randn() + signal = signal+slope*opt.augment_noise_lambda + + if 'white' in opt.augment and random_list[5]>threshold: + signal = signal+noise.noise(_length,'white')*(np.std(signal)*np.random.randn()*opt.augment_noise_lambda) + + if 'pink' in opt.augment and random_list[6]>threshold: + signal = signal+noise.noise(_length,'pink')*(np.std(signal)*np.random.randn()*opt.augment_noise_lambda) + + if 'blue' in opt.augment and random_list[7]>threshold: + signal = signal+noise.noise(_length,'blue')*(np.std(signal)*np.random.randn()*opt.augment_noise_lambda) + + if 'brown' in opt.augment and random_list[8]>threshold: + signal = signal+noise.noise(_length,'brown')*(np.std(signal)*np.random.randn()*opt.augment_noise_lambda) + + if 'violet' in opt.augment and random_list[9]>threshold: + signal = signal+noise.noise(_length,'violet')*(np.std(signal)*np.random.randn()*opt.augment_noise_lambda) + + # Frequency Domain + if 'app' in opt.augment and random_list[10]>threshold: + # amplitude and phase perturbations + signal = surrogates.app(signal) + + if 'aaft' in opt.augment and random_list[11]>threshold: + # Amplitude Adjusted Fourier Transform + signal = surrogates.aaft(signal) + + if 'iaaft' in opt.augment and random_list[12]>threshold: + # Iterative Amplitude Adjusted Fourier Transform + signal = surrogates.iaaft(signal,10)[0] + + # crop and filp + if 'filp' in opt.augment and random_list[13]>threshold: + signal = signal[::-1] + + if _length >= opt.finesize: + move = int((_length-opt.finesize)*np.random.random()) + signal = signal[move:move+opt.finesize] + else: + signal = arr.pad(signal, opt.finesize-_length, mod = 'repeat') + + result[i,j] = signal + return result + +def base2d(img,finesize = (224,244),test_flag = True): + h,w = img.shape[:2] + if test_flag: + h_move = int((h-finesize[0])*0.5) + w_move = int((w-finesize[1])*0.5) + result = img[h_move:h_move+finesize[0],w_move:w_move+finesize[1]] + else: + #random crop + h_move = int((h-finesize[0])*random.random()) + w_move = int((w-finesize[1])*random.random()) + result = img[h_move:h_move+finesize[0],w_move:w_move+finesize[1]] + #random flip + if random.random()<0.5: + result = result[:,::-1] + #random amp + result = result*random.uniform(0.9,1.1)+random.uniform(-0.05,0.05) + return result def augment(opt,signals,labels): pass diff --git a/util/dataloader.py b/data/dataloader.py similarity index 94% rename from util/dataloader.py rename to data/dataloader.py index b2f3cc1..af7b6e0 100644 --- a/util/dataloader.py +++ b/data/dataloader.py @@ -5,8 +5,11 @@ import random import scipy.io as sio import numpy as np -from . import dsp,transformer,statistics -from . import array_operation as arr +import sys +sys.path.append("..") +from . import transforms,statistics +from util import dsp +from util import array_operation as arr def del_labels(signals,labels,dels): @@ -23,7 +26,7 @@ def del_labels(signals,labels,dels): def segment_traineval_dataset(signals,labels,a=0.8,random=True): length = len(labels) if random: - transformer.shuffledata(signals, labels) + transforms.shuffledata(signals, labels) signals_train = signals[:int(a*length)] labels_train = labels[:int(a*length)] signals_eval = signals[int(a*length):] @@ -123,6 +126,6 @@ def loaddataset(opt): signals = new_signals if opt.fold_index == 'auto': - transformer.shuffledata(signals,labels) + transforms.shuffledata(signals,labels) - return signals,labels \ No newline at end of file + return signals.astype(np.float32),labels.astype(np.int64) \ No newline at end of file diff --git a/data/noise.py b/data/noise.py new file mode 100644 index 0000000..1eff835 --- /dev/null +++ b/data/noise.py @@ -0,0 +1,168 @@ +""" +clone from +https://github.com/scivision/soothing-sounds +https://github.com/python-acoustics +LICENSE: GPL-3.0 +""" + +""" +# Generator + +The generator module provides signal generators. + +The following functions calculate `N` samples and return an array containing the samples. + +For indefinitely long iteration over the samples, consider using the output of these functions in `itertools.cycle`. + +## Noise + +In general, noise with spectrum S(f) is generated by taking uniform white noise +and filtering with filter response H(f) to get the desired noise spectrum. + +Color | Power/octave | Power density/octave +-------|-------|-------------- +White | +3 dB | 0 dB +Pink | 0 dB | -3 dB +Blue | +6 dB | +3 dB +Brown | -3 dB | -6 dB +Violet | +9 dB | +6 dB +-------|-------|-------------- + +""" +import numpy as np +import scipy +from scipy.fftpack import rfft, irfft + + +def ms(x: np.ndarray) -> np.ndarray: + return (np.abs(x)**2).mean() + +def rms(x: np.ndarray) -> np.ndarray: + return np.sqrt(ms(x)) + +def normalise(y: np.ndarray, x: np.ndarray = 1.) -> np.ndarray: + return y * np.sqrt(ms(x) / ms(y)) + +def noise(N: int, color: str = 'white') -> np.ndarray: + """Noise generator. + + * N: Amount of samples. + * color: Color of noise. + noise_generators = { + 'white': white, + 'pink': pink, + 'blue': blue, + 'brown': brown, + 'violet': violet + } + + https://github.com/python-acoustics + """ + noise_generators = { + 'white': white, + 'pink': pink, + 'blue': blue, + 'brown': brown, + 'violet': violet + } + + return noise_generators[color](N) + + +def white(N: int) -> np.ndarray: + """ + White noise. + + * N: Amount of samples. + + White noise has a constant power density. + Its narrowband spectrum is therefore flat. + The power in white noise will increase by a factor of two for each octave band, + and therefore increases with 3 dB per octave. + + https://github.com/python-acoustics + """ + return np.random.randn(N).astype(np.float32) + + +def pink(N: int) -> np.ndarray: + """ + Pink noise. + + * N: Amount of samples. + + Pink noise has equal power in bands that are proportionally wide. + Power density decreases with 3 dB per octave. + + https://github.com/python-acoustics + """ + + # This method uses the filter with the following coefficients. + # b = np.array([0.049922035, -0.095993537, 0.050612699, -0.004408786]) + # a = np.array([1, -2.494956002, 2.017265875, -0.522189400]) + # return lfilter(B, A, np.random.randn(N)) + + # Another way would be using the FFT + x = white(N) + X = rfft(x) / N + S = np.sqrt(np.arange(X.size)+1.) # +1 to avoid divide by zero + y = irfft(X/S).real[:N] + + return normalise(y) # extremely tiny value 1e-9 without normalization + + +def blue(N: int) -> np.ndarray: + """ + Blue noise. + + * N: Amount of samples. + + Power increases with 6 dB per octave. + Power density increases with 3 dB per octave. + + https://github.com/python-acoustics + """ + x = white(N) + X = rfft(x) / N + S = np.sqrt(np.arange(X.size)) # Filter + y = irfft(X*S).real[:N] + + return normalise(y) + + +def brown(N: int) -> np.ndarray: + """ + Violet noise. + + * N: Amount of samples. + + Power decreases with -3 dB per octave. + Power density decreases with 6 dB per octave. + + https://github.com/python-acoustics + """ + x = white(N) + X = rfft(x) / N + S = np.arange(X.size)+1 # Filter + y = irfft(X/S).real[:N] + + return normalise(y) + + +def violet(N: int) -> np.ndarray: + """ + Violet noise. Power increases with 6 dB per octave. + + * N: Amount of samples. + + Power increases with +9 dB per octave. + Power density increases with +6 dB per octave. + + https://github.com/python-acoustics + """ + x = white(N) + X = rfft(x) / N + S = np.arange(X.size) # Filter + y = irfft(X*S).real[0:N] + + return normalise(y) diff --git a/util/statistics.py b/data/statistics.py similarity index 97% rename from util/statistics.py rename to data/statistics.py index b44f9b4..5b1c680 100644 --- a/util/statistics.py +++ b/data/statistics.py @@ -1,7 +1,9 @@ import numpy as np import os -from . import plot -from . import util +import sys +sys.path.append("..") + +from util import plot,util def label_statistics(labels): labels = (np.array(labels)).astype(np.int64) diff --git a/data/surrogates.py b/data/surrogates.py new file mode 100644 index 0000000..07d1551 --- /dev/null +++ b/data/surrogates.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- +# This code clone from https://github.com/manu-mannattil/nolitsa +# BSD-3 LICENSE : https://github.com/manu-mannattil/nolitsa/blob/master/LICENSE +# Change np.fft to scipt.fftpack + +"""Functions to generate surrogate series. + +This module provides a set of functions to generate surrogate series +from a given time series using multiple algorithms. + +Surrogates Generation +--------------------- + + * ft -- generates Fourier transform surrogates. + * aaft -- generates amplitude adjusted Fourier transform surrogates. + * iaaft -- generates iterative amplitude adjusted Fourier transform + surrogates. + +Utilities +--------- + + * mismatch -- finds the segment of a time series with the least + end-point mismatch. +""" + +import numpy as np +import scipy + + +def rescale(x, interval=(0, 1)): + """Rescale the given scalar time series into a desired interval. + + Rescales the given scalar time series into a desired interval using + a simple linear transformation. + + Parameters + ---------- + x : array_like + Scalar time series. + interval: tuple, optional (default = (0, 1)) + Extent of the interval specified as a tuple. + + Returns + ------- + y : array + Rescaled scalar time series. + """ + x = np.asarray(x) + if interval[1] == interval[0]: + raise ValueError('Interval must have a nonzero length.') + + return (interval[0] + (x - np.min(x)) * (interval[1] - interval[0]) / + (np.max(x) - np.min(x))) + + +def ft(x): + """Return simple Fourier transform surrogates. + + Returns phase randomized (FT) surrogates that preserve the power + spectrum (or equivalently the linear correlations), but completely + destroy the probability distribution. + + Parameters + ---------- + x : array + Real input array containg the time series. + + Returns + ------- + y : array + Surrogates with the same power spectrum as x. + """ + y = scipy.fftpack.rfft(x) + + phi = 2 * np.pi * np.random.random(len(y)) + + phi[0] = 0.0 + if len(x) % 2 == 0: + phi[-1] = 0.0 + + y = y * np.exp(1j * phi) + return scipy.fftpack.irfft(np.real(y), n=len(x)) + + +def aaft(x): + """Return amplitude adjusted Fourier transform surrogates. + + Returns phase randomized, amplitude adjusted (AAFT) surrogates with + crudely the same power spectrum and distribution as the original + data (Theiler et al. 1992). AAFT surrogates are used in testing + the null hypothesis that the input series is correlated Gaussian + noise transformed by a monotonic time-independent measuring + function. + + Parameters + ---------- + x : array + 1-D input array containg the time series. + + Returns + ------- + y : array + Surrogate series with (crudely) the same power spectrum and + distribution. + """ + # Generate uncorrelated Gaussian random numbers. + y = np.random.normal(size=len(x)) + + # Introduce correlations in the random numbers by rank ordering. + y = np.sort(y)[np.argsort(np.argsort(x))] + y = ft(y) + + return np.sort(x)[np.argsort(np.argsort(y))] + + +def iaaft(x, maxiter=1000, atol=1e-8, rtol=1e-10): + """Return iterative amplitude adjusted Fourier transform surrogates. + + Returns phase randomized, amplitude adjusted (IAAFT) surrogates with + the same power spectrum (to a very high accuracy) and distribution + as the original data using an iterative scheme (Schreiber & Schmitz + 1996). + + Parameters + ---------- + x : array + 1-D real input array of length N containing the time series. + maxiter : int, optional (default = 1000) + Maximum iterations to be performed while checking for + convergence. The scheme may converge before this number as + well (see Notes). + atol : float, optional (default = 1e-8) + Absolute tolerance for checking convergence (see Notes). + rtol : float, optional (default = 1e-10) + Relative tolerance for checking convergence (see Notes). + + Returns + ------- + y : array + Surrogate series with (almost) the same power spectrum and + distribution. + i : int + Number of iterations that have been performed. + e : float + Root-mean-square deviation (RMSD) between the absolute squares + of the Fourier amplitudes of the surrogate series and that of + the original series. + + Notes + ----- + To check if the power spectrum has converged, we see if the absolute + difference between the current (cerr) and previous (perr) RMSDs is + within the limits set by the tolerance levels, i.e., if abs(cerr - + perr) <= atol + rtol*perr. This follows the convention used in + the NumPy function numpy.allclose(). + + Additionally, atol and rtol can be both set to zero in which + case the iterations end only when the RMSD stops changing or when + maxiter is reached. + """ + # Calculate "true" Fourier amplitudes and sort the series. + ampl = np.abs(scipy.fftpack.rfft(x)) + sort = np.sort(x) + + # Previous and current error. + perr, cerr = (-1, 1) + + # Start with a random permutation. + t = scipy.fftpack.rfft(np.random.permutation(x)) + + for i in range(maxiter): + # Match power spectrum. + s = np.real(scipy.fftpack.irfft(ampl * t / np.abs(t), n=len(x))) + + # Match distribution by rank ordering. + y = sort[np.argsort(np.argsort(s))] + + t = scipy.fftpack.rfft(y) + cerr = np.sqrt(np.mean((ampl ** 2 - np.abs(t) ** 2) ** 2)) + + # Check convergence. + if abs(cerr - perr) <= atol + rtol * abs(perr): + break + else: + perr = cerr + + # Normalize error w.r.t. mean of the "true" power spectrum. + return y, i, cerr / np.mean(ampl ** 2) + +def app(x,alpha=1.0): + """amplitude and phase perturbations + + Parameters + ---------- + x : array + 1-D real input array of length N containing the time series. + alpha : float + strength of app + + Returns + ------- + y : array + 1-D output time series. + """ + length = x.shape[0] + x_fft = scipy.fftpack.fft(x) + amp = np.abs(x_fft) + phase = np.angle(x_fft) + amp_std = np.std(amp) + phase_std = np.std(phase) + pos_a = np.sort(np.random.randint(0, length, 2)) + pos_p = np.sort(np.random.randint(0, length, 2)) + if pos_a[1]-pos_a[0]>10 and pos_p[1]-pos_p[0]>10: + amp[pos_a[0]:pos_a[1]] = amp[pos_a[0]:pos_a[1]] + np.random.normal(0,alpha,pos_a[1]-pos_a[0])*amp_std + phase[pos_p[0]:pos_p[1]] = phase[pos_p[0]:pos_p[1]] + np.random.normal(0,alpha,pos_p[1]-pos_p[0])*phase_std + fft_re = amp*np.exp(1j*phase) + y = scipy.fftpack.ifft(fft_re) + return np.real(y) + + + +def mismatch(x, length=None, weight=0.5, neigh=3): + """Find the segment that minimizes end-point mismatch. + + Finds the segment in the time series that has minimum end-point + mismatch. To do this we calculate the mismatch between the end + points of all segments of the given length and pick the segment with + least mismatch (Ehlers et al. 1998). We also enforce the + condition that the difference between the first derivatives at the + end points must be a minimum. + + Parameters + ---------- + x : array + Real input array containg the time series. + length : int, optional + Length of segment. By default the largest possible length which + is a power of one of the first five primes is selected. + weight : float, optional (default = 0.5) + Weight given to discontinuity in the first difference of the + time series. Must be between 0 and 1. + neigh : int, optional (default = 3) + Num of end points using which the discontinuity statistic should + be computed. + + Returns + ------- + ends : tuple + Indices of the end points of the segment. + d : float + Discontinuity statistic for the segment. + + Notes + ----- + Both the time series and its first difference are linearly rescaled + to [0, 1]. Thus the discontinuity statistic varies between 0 and 1 + (0 means no discontinuity and 1 means maximum discontinuity). + """ + # Calculate the first difference of the time series and rescale it + # to [0, 1] + dx = rescale(np.diff(x)) + x = rescale(x)[1:] + n = len(x) + + if not length: + primes = np.array([2, 3, 5, 7, 11]) + i = np.argmax(primes ** np.floor(np.log(n) / np.log(primes)) - n) + length = int(primes[i] ** (np.floor(np.log(n) / np.log(primes[i])))) + + d = np.zeros(n - (length + neigh)) + + for i in np.arange(n - (length + neigh)): + d[i] = ((1 - weight) * (np.mean((x[i:i + neigh] - + x[i + length:i + length + neigh]) ** 2.0)) + + weight * (np.mean((dx[i:i + neigh] - + dx[i + length:i + length + neigh]) ** 2.0))) + + return (1 + np.argmin(d), 1 + np.argmin(d) + length), np.min(d) + + +#####################################自己复现的代码,iaaft是错的################################# +""" +def rank_like(src,dst): + src = np.sort(src) + sort_index = np.argsort(dst) + src_new = np.zeros_like(src) + src_new[sort_index] = src[:] + return src_new + +def aaft(signal): + # step 1 + Xs = np.random.randn(len(signal)) + # step 2 + Xs = rank_like(Xs, signal) + # step 3 + Xs_fft = fft(Xs) + Xs_angle = np.angle(Xs_fft) + np.random.shuffle(Xs_angle) + Xs_fft_re = np.abs(Xs_fft)*np.exp(1j*Xs_angle) + Xs_re = ifft(Xs_fft_re) + # step 4 + signal_new = rank_like(signal, Xs_re) + return signal_new + +def iaaft(signal,iter=10): + Ck = np.argsort(signal) + Ak = fft(signal) + Ak_abs = np.abs(Ak) + #Pk = np.angle(X_fft) + Sn = aaft(signal) + + for i in range(iter): + Sk = fft(Sn) + Sk_angle = np.angle(Sk) + Sk_1 = Ak_abs*np.exp(1j*Sk_angle) + + Sn = ifft(Sk_1) + + Sn = rank_like(Sn, signal) + return Sn +""" \ No newline at end of file diff --git a/util/transformer.py b/data/transforms.py similarity index 50% rename from util/transformer.py rename to data/transforms.py index a9ea3ff..410b327 100644 --- a/util/transformer.py +++ b/data/transforms.py @@ -2,15 +2,18 @@ import os import random import numpy as np import torch -from . import dsp -from . import array_operation as arr + +import sys +sys.path.append("..") +from util import dsp +from util import array_operation as arr +from . import augmenter def shuffledata(data,target): state = np.random.get_state() np.random.shuffle(data) np.random.set_state(state) np.random.shuffle(target) - # return data,target def k_fold_generator(length,fold_num,fold_index = 'auto'): sequence = np.linspace(0,length-1,length,dtype='int') @@ -45,93 +48,38 @@ def batch_generator(data,target,sequence,shuffle = True): for i in range(batchsize): out_data[i] = data[sequence[i]] out_target[i] = target[sequence[i]] - return out_data,out_target def ToTensor(data,target=None,gpu_id=0): if target is not None: - data = torch.from_numpy(data).float() - target = torch.from_numpy(target).long() + data = torch.from_numpy(data) + target = torch.from_numpy(target) if gpu_id != -1: data = data.cuda() target = target.cuda() return data,target else: - data = torch.from_numpy(data).float() + data = torch.from_numpy(data) if gpu_id != -1: data = data.cuda() return data -def random_transform_1d(data,opt,test_flag): - batchsize,ch,length = data.shape - - if test_flag: - move = int((length-opt.finesize)*0.5) - result = data[:,:,move:move+opt.finesize] - else: - #random scale - if 'scale' in opt.augment: - length = np.random.randint(opt.finesize, length*1.1, dtype=np.int64) - result = np.zeros((batchsize,ch,length)) - for i in range(batchsize): - for j in range(ch): - result[i][j] = arr.interp(data[i][j], length) - data = result - - #random crop - move = int((length-opt.finesize)*random.random()) - result = data[:,:,move:move+opt.finesize] - - #random flip - if 'flip' in opt.augment: - if random.random()<0.5: - result = result[:,:,::-1] - - #random amp - if 'amp' in opt.augment: - result = result*random.uniform(0.9,1.1) - - #add noise - if 'noise' in opt.augment: - noise = np.random.rand(ch,opt.finesize) - result = result + (noise-0.5)*0.01 - - return result - -def random_transform_2d(img,finesize = (224,244),test_flag = True): - h,w = img.shape[:2] - if test_flag: - h_move = int((h-finesize[0])*0.5) - w_move = int((w-finesize[1])*0.5) - result = img[h_move:h_move+finesize[0],w_move:w_move+finesize[1]] - else: - #random crop - h_move = int((h-finesize[0])*random.random()) - w_move = int((w-finesize[1])*random.random()) - result = img[h_move:h_move+finesize[0],w_move:w_move+finesize[1]] - #random flip - if random.random()<0.5: - result = result[:,::-1] - #random amp - result = result*random.uniform(0.9,1.1)+random.uniform(-0.05,0.05) - return result - -def ToInputShape(data,opt,test_flag = False): - #data = data.astype(np.float32) - _batchsize,_ch,_size = data.shape +def ToInputShape(opt,data,test_flag = False): + if opt.model_type == '1d': - result = random_transform_1d(data, opt, test_flag = test_flag) + result = augmenter.base1d(opt, data, test_flag = test_flag) elif opt.model_type == '2d': + _batchsize,_ch,_size = data.shape result = [] h,w = opt.stft_shape for i in range(_batchsize): for j in range(opt.input_nc): spectrum = dsp.signal2spectrum(data[i][j],opt.stft_size,opt.stft_stride, opt.stft_n_downsample, not opt.stft_no_log) - spectrum = random_transform_2d(spectrum,(h,int(w*0.9)),test_flag=test_flag) + spectrum = augmenter.base2d(spectrum,(h,int(w*0.9)),test_flag=test_flag) result.append(spectrum) result = (np.array(result)).reshape(_batchsize,opt.input_nc,h,int(w*0.9)) - return result + return result.astype(np.float32) diff --git a/examples/show_example.npy b/examples/show_example.npy new file mode 100644 index 0000000000000000000000000000000000000000..551b3e1e17f841f582af8805d0228665d0b8e93f GIT binary patch literal 16128 zcmbW8cbFID5y!7m4v31x5*x;HiiBWCg3j7dH1-}1N&q!B6qH~L_Gpa80=A$LOH`sk z6f04g6~ux_5kV;ecfbK6aBz3R!QGL4-<{9!Jiq*xdw8B@_qV^@d8d5mJM+$O-8*Q%=P z6g|50MJuM3PSPH2uK3r!-QL#)5AMJ5yeHq(8oi6)q z*X`WxhCR^7sK1B5&*j8>OM*#UFCQM; z(1XK#hTnWJ|M z_{Txpojg|`Ii&W+t8e{Q@73rS8pGiH1#0WlqaIw^g~r_4Xc_ zOD$3JU)Qhsx$c#r)To8uxZdo!dfY9+^FMW5L4&Jw$N8sbmS3aaT@V7TRu3Dxe)EbB8}zka!4vBA(BM87 z*6YYE0hk&z82P!PI_gw^_SIg*5B?!$QJo(#`!sBy7=ZPYNq()VkY4 zfNav9*=mlTxKXdqw9|4#gX`gUF>uzpKE_-e7FMgZ2Zw~TMnl%v_5GC^!{L$|%@()# zrCpZmC*AsP{ovS@`qP>dGDKXZ^V$T@S+4ndmMZpYIU*q1Y>jU4#7xC6xDFmzJb`Bd zZrSIK+vH2~5%CaFU_!N+d=fxz=wYkm>>%#pKgb#H*4%sI)LO}mnMH7e>`@UcyG61f z&z87ZvSgP5;TFk2_~CU2H_No%XCB>u=dIE<2E$f*OEOY0;-=Tik{tiald}T+{-!+HuiI|NHat+C zTx6-QpzAbH&LYV&E1f!!wlL z&hy0ZNzC7!AMuw&P6C^J`o&4)LE=EOB%3YiUA-Mcyzy`F-#N_;`e=#)%4FfmfraEO}+XzD)lV0b;S5A1UvLzr#bw zQ`iAMOP+@hnqI~7p8FBK7e>mIa*!dae*G- zgZL-)617Vgm^cz&L>pplnyp8*bTkwH69I8$%i)o$oyWoFdPj#? zugfP^WXx&3#&WMyt>(O{RSEgsRzLJs`$Y>?wVh8dL_<&z? z9lYfn`H5fb0K!X@h9GI@_AnX|HA+HKK={dftSQDN1lhjpda)BzJV9yE%1goz`wvX;styu zcn|w=UO%C4-h-aFjy&|Ut-t8|8TNkf?}8`bHTjw6frsdee1Kiy^BnO{&cPRp5ASQS zuMaL6zI(r!bs-;dfq!#O{-(bKpDMH~JOF=#XW@^?$vN={pTlnOWbg<3@m%~EeZi;0 z^u=z+Zrh)NPt(7l4-6i{7myoy$v5o3(vPq>a65ww@U+5nli&BzksI75?!Yzt9=j74 zW*_B#^o2dS&U4Wl`UH3VF06o!%!LEiop7+rUcH?=}UzV@_XPyGT#IL|}_!{RNSr2-^Zp0b! zil6a4aFyrseQ+E8YtM83OrFP&*cUTA^8Nt*4C2-HBfM{Jb0NMTN8bY+gfG$u=g4)_ zEBlbXPh;z)UZ>yeb86-v-Uk5Jh#T@h`hqtRhxGOE7yJl(K)?8f^<@P<&2_%VIq}1D z$aCNge1g1S_rVY7%lmmDK9e`FFF3-w@Ne=z@}L*KS8(+GGVDX1v;8Q2Eo?K_F7y2q zn+wu>46H`;IfYgFMHoluRjXUG>8!_3*`V)*ak{5AXg-g!UN4V-KWZFv+Nnu%d5h9K ze-@|teppFra}8-6x9ZU<&Hb)T8pm9QRHXU*MTfLLn~r?!;|^&)Z_^>&I~J1d)7(z& z(p-*;G@tKjo96QaZBmvLsB0cAQ_D92o<9 zvVS)H*!Q^7B;T(qP4YO}_WYY-fw}Fu`MS!J+%MZCx!zhQd41X{iDOB_%JZ?E)n!TU zmu3EZl!I=flEoEH9?B#yOoY4SOkN7jWws749X zw4;yQBo+sL%;$g_CEJ|LQci=|{-fk`UmL{sAI0X+#OABS_94aQuY3-M{VeA7s3Vvc zG2ZHPFE+pEb1v*V+diu_hw+;KEEm3K`9Rng0$<5{MH6}li%wI^iO??pOC-tUw9?Fm$>73IL8cla26DdLoV34JsADc}-34cy^YmBP?_mCl zIbrw?e7dkdN!(yR<~s2&n@4t@!yKv23HdzMrSSpFgzvo+f2OfiIGQ|ScJ=v9@-O++ zaNFlfc^>m#^b6<*k!P6;vpIF|_fp3YZ^S+I3;s(yQ+I=B?6-n5+z%d@A9~-CegH@M z;q3RCAA0`^Ifz5@C3fLR+<~XW19I_w;vZZ=Kj@da75D(xu^;=b;2`x9^&0gebv1Q9 z`Xx`Y4&>%Jmgj`$(-#G2r~^4iU-$vP^I>1z^1J`ug5N38r{IWQ$UDRf`4SvQ->eV4 zf_LyO`U&7BaYLQSd&xi4o8&M0TJ(#^OZWqQR_a;&&fZTyz<UicgS48Fi;$%ptEae>^}2i}8UVK3z4 z-2B7)AMD3izVkjCag2QI&(U|Ke!%b13;7c}vflt-#vkd+;7{;U_&@S;gl7?dTnBfF zJM4o0(}zPocrp8?_yPC9vyqGXp8Yc9=ZL)E1NtQXiBIC5I0SdmA9giA^7$?DE&0Io z3Q@9HGm2YsMVvpaow=AIP4Lw>%8e8@$-qCd-P z{Ql7E7`}(T&;!pW{_r>KYW0Knclass0 1->class1 t1 = time.time() signals,labels = dataloader.loaddataset(opt) if opt.gan: - signals,labels = augmenter.gan(opt,signals,labels) + signals,labels = augmenter.dcgan(opt,signals,labels) label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels) util.writelog('label statistics: '+str(label_cnt),opt,True) opt = options.get_auto_options(opt, signals, labels) -train_sequences,eval_sequences = transformer.k_fold_generator(len(labels),opt.k_fold,opt.fold_index) +train_sequences,eval_sequences = transforms.k_fold_generator(len(labels),opt.k_fold,opt.fold_index) t2 = time.time() print('Cost time: %.2f'% (t2-t1),'s') diff --git a/util/array_operation.py b/util/array_operation.py index bd1c60c..1d22d4b 100644 --- a/util/array_operation.py +++ b/util/array_operation.py @@ -17,11 +17,18 @@ def pad(data,padding,mod='zero'): for i in range(repeat_num): out_data = np.append(out_data, data) pad_data = data[:padding-repeat_num*len(data)] - return np.append(out_data, pad_data) + out_data = np.append(out_data, pad_data) + return out_data elif mod == 'reflect': + length = data.shape[0] pad_data = data[::-1][:padding] - return np.append(data, pad_data) + out_data = np.append(data, pad_data) + if padding < length: + return out_data + else: + return pad(out_data,padding-length,mod='reflect') + def normliaze(data, mode = 'norm', sigma = 0, dtype=np.float32, truncated = 2): ''' diff --git a/util/options.py b/util/options.py index 500526f..5c1d818 100644 --- a/util/options.py +++ b/util/options.py @@ -2,7 +2,11 @@ import argparse import os import time import numpy as np -from . import util,dsp,plot,statistics +from . import util,dsp,plot + +import sys +sys.path.append("..") +from data import statistics class Options(): def __init__(self): @@ -37,9 +41,13 @@ class Options(): # ------------------------Data Augmentation------------------------ # base - self.parser.add_argument('--augment', type=str, default='all', help='all | scale,filp,amp,noise | scale,filp ....') + self.parser.add_argument('--augment', type=str, default='scale', + help='all | scale,warp,app,aaft,iaaft,filp,spike,step,slope,white,pink,blue,brown,violet , enter some of them') + self.parser.add_argument('--augment_noise_lambda', type=float, default = 0.1, help='noise level(spike,step,slope,white,pink,blue,brown,violet)') # fft channel --> use fft to improve frequency domain information. self.parser.add_argument('--augment_fft', action='store_true', help='if specified, use fft to improve frequency domain informationa') + + # self.parser.add_argument('--augment_times', type=float, default=10, help='how many times that will be augmented') # for gan,it only support when fold_index = 1 or 0 now # only support when k_fold =0 or 1 @@ -52,22 +60,22 @@ class Options(): # ------------------------Dataset------------------------ """--fold_index - 5-fold: + When --k_fold != 0 or 1: Cut dataset into sub-set using index , and then run k-fold with sub-set If input 'auto', it will shuffle dataset and then cut dataset equally If input: [2,4,6,7] when len(dataset) == 10 sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:] ------- - No-fold: + When --k_fold == 0 or 1: If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval If input: [5] when len(dataset) == 10 train-set : dataset[0:5] eval-set : dataset[5:] """ + self.parser.add_argument('--k_fold', type=int, default=0,help='fold_num of k-fold.If 0 or 1, no k-fold and cut 80% to train and other to eval') self.parser.add_argument('--fold_index', type=str, default='auto', help='where to fold, eg. when 5-fold and input: [2,4,6,7] -> sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:]') - self.parser.add_argument('--k_fold', type=int, default=0,help='fold_num of k-fold.If 0 or 1, no k-fold and cut 0.8 to train and other to eval') self.parser.add_argument('--dataset_dir', type=str, default='./datasets/simple_test',help='your dataset path') self.parser.add_argument('--save_dir', type=str, default='./checkpoints/',help='save checkpoints') self.parser.add_argument('--load_thread', type=int, default=8,help='how many threads when load data') @@ -149,7 +157,7 @@ class Options(): self.opt.fold_index = (np.load(os.path.join(self.opt.dataset_dir,'index.npy'))).tolist() if self.opt.augment == 'all': - self.opt.augment = ["scale","filp","amp","noise"] + self.opt.augment = ['scale','warp','spike','step','slope','white','pink','blue','brown','violet','app','aaft','iaaft','filp'] else: self.opt.augment = str2list(self.opt.augment) diff --git a/util/plot.py b/util/plot.py index 0c8f4c6..65c0640 100644 --- a/util/plot.py +++ b/util/plot.py @@ -1,7 +1,7 @@ import os import numpy as np import matplotlib -matplotlib.use('Agg') +# matplotlib.use('Agg') import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D -- GitLab