Logo Coherent WaveBurst  
Reference Guide
Logo
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WDM.cc
Go to the documentation of this file.
1 // Wavelet Analysis Tool
2 //--------------------------------------------------------------------
3 // V. Necula & S. Klimenko, University of Florida
4 // Implementation of Fast Wilson-Daubechies-Meyer transform
5 // Reference: http://iopscience.iop.org/1742-6596/363/1/012032
6 //--------------------------------------------------------------------
7 
8 #include <strstream>
9 #include <stdexcept>
10 #include <xmmintrin.h>
11 #include "WDM.hh"
12 #include "sseries.hh"
13 #include "wavefft.hh"
14 #include <iostream>
15 #include <stdio.h>
16 #include "TFFTComplexReal.h"
17 #include "TFFTRealComplex.h"
18 #include "TMath.h"
19 #include <complex>
20 
21 using namespace std;
22 
23 
24 extern "C" void sse_dp4();
25 extern "C" void sse_dp5();
26 extern "C" void sse_dp6();
27 extern "C" void sse_dp7();
28 extern "C" void sse_dp8();
29 extern "C" void sse_dp9();
30 extern "C" void sse_dp10();
31 extern "C" void sse_dp11();
32 
33 extern float* watasm_data;
34 extern float* watasm_filter;
35 extern float watasm_xmm0[4];
36 
37 ClassImp(WDM<double>)
38 
39 static const double Pi = 3.14159265358979312;
40 
41 template<class DataType_t> double* WDM<DataType_t>::Cos[MAXBETA];
42 template<class DataType_t> double* WDM<DataType_t>::Cos2[MAXBETA];
43 template<class DataType_t> double* WDM<DataType_t>::SinCos[MAXBETA];
44 template<class DataType_t> double WDM<DataType_t>::CosSize[MAXBETA];
45 template<class DataType_t> double WDM<DataType_t>::Cos2Size[MAXBETA];
46 template<class DataType_t> double WDM<DataType_t>::SinCosSize[MAXBETA];
47 template<class DataType_t> int WDM<DataType_t>::objCounter = 0;
48 
49 // exp^{-...} i.e. negative sign
50 void FFT(double* a, double* b, int n)
51 {
52  wavefft(a, b, n, n, n, -1);
53 }
54 
55 void InvFFT(double* a, double* b, int n)
56 {
57  wavefft(a,b, n,n,n, 1);
58 }
59 
60 // precalculated transform scaling functions
61 template<class DataType_t>
63 {
64  #include "FourierCoefficients.icc"
65 }
66 
67 template<class DataType_t>
69 WaveDWT<DataType_t>(1, 1, 0, B_CYCLE)
70 {
71 // default constructor
72 
73  if(++objCounter==1) initFourier();
74  this->m_WaveType = WDMT;
75  this->m_Heterodine = 1;
76  this->BetaOrder = 4;
77  this->precision = 10;
78  this->m_Level = 0;
79  this->m_Layer = 0;
80  this->KWDM = 0;
81  this->LWDM = 0;
82  wdmFilter.resize(0);
83  this->m_L = 0;
84  this->m_H = 0;
85  TFMap00 = TFMap90 = 0;
86  td_buffer=0;
87 }
88 
89 template<class DataType_t>
90 WDM<DataType_t>::WDM(int M, int K, int iNu, int Precision) :
91 WaveDWT<DataType_t>(1, 1, 0, B_CYCLE)
92 {
93 // constructor
94 // M + 1: number of bands
95 // K : K=n*M, where n is integer. K defines the width of the 'edge'
96 // of the basis function in Fourier domain (see the paper)
97 // larger n, longer the WDM filter, lower the spectral leakage between the bands.
98 // iNu : defines the sharpness of the 'edge' of the basis function in Fourier domain (see paper)
99 // Precison : defines filter length by truncation error quantified by
100 // P = -log10(1 - norm_of_filter) (see the paper)
101 // after forward transformation the structure of the WDM sliced array is the following:
102 // f phase 0 phase 90
103 // M * * ... * * * ... *
104 // ... ... ... ... ... ... ...
105 // 1 * * .... * * * ... *
106 // 0 * * ... * * * ... *
107 // t 1 2 ... n n+1 n+2... 2n
108 // where t/f is the time/frequency index
109 // the global TF index in the linear array is i = t*(M+1)+f
110 
111  if(++objCounter==1) initFourier();
112 
113  this->m_WaveType = WDMT;
114  this->m_Heterodine = 1;
115  this->BetaOrder = iNu;
116  this->precision = Precision;
117  this->m_Level = 0;
118  this->m_Layer = M;
119  this->KWDM = K;
120  this->LWDM = 0;
121 
122  if(iNu<2){
123  printf("iNu too small, reset to 2\n");
124  iNu = 2;
125  }
126  if(iNu>7){
127  printf("iNu too large, reset to 7\n");
128  iNu = 7;
129  }
130 
131  int nMax = 3e5;
132  int M2 = M*2;
133  int N = 1;
134 
135  wavearray<double> filter = this->getFilter(nMax);
136 
137  double* tmp = filter.data;
138  double residual = 1 - tmp[0]*tmp[0];
139  double prec = pow(10., -double(Precision));
140 
141  do {
142  residual -= 2*tmp[N]*tmp[N];
143  N++;
144  //printf("%d %e\n", N, residual);
145  } while(residual>prec || (N-1)%M2 || N/M2<3);
146 
147  printf("Filter length = %d, norm = %.16f\n", N, 1.-residual);
148 
149  wdmFilter.resize(N);
150  wdmFilter.cpf(filter,N);
151  this->m_L = 0;
152  this->m_H = N;
153  TFMap00 = TFMap90 = 0;
154  td_buffer=0;
155 }
156 
157 template<class DataType_t>
159 WaveDWT<DataType_t>(1, 1, 0, B_CYCLE)
160 {
161 // constructor
162 // M + 1: number of bands
163 // K : K=n*M, where n is integer. K defines the width of the 'edge'
164 // of the basis function in Fourier domain (see the paper)
165 // larger n, longer the WDM filter, lower the spectral leakage between the bands.
166 // after forward transformation the structure of the WDM sliced array is the following:
167 // f phase 0 phase 90
168 // M * * ... * * * ... *
169 // ... ... ... ... ... ... ...
170 // 1 * * .... * * * ... *
171 // 0 * * ... * * * ... *
172 // t 1 2 ... n n+1 n+2... 2n
173 // where t/f is the time/frequency index
174 // the global TF index in the linear array is i = t*(M+1)+f
175 
176  if(++objCounter==1) initFourier();
177  int M = abs(m);
178 
179  this->m_WaveType = WDMT;
180  this->m_Heterodine = 1;
181  this->BetaOrder = 2;
182  this->m_Level = 0;
183  this->m_Layer = M;
184  this->KWDM = M;
185  this->LWDM = 0;
186 
187  int nMax = 3e5;
188  int M2 = M*6;
189  int K = m<0 ? M : 2*M-int(92*M/256.); // trancate basis function
190 
191  wavearray<double> filter = this->getFilter(M2+6);
192 
193  double* tmp = filter.data;
194  double residual = 1 - tmp[0]*tmp[0];
195  //cout<<filter.size()<<" "<<M2<<" "<<K<<endl;
196 
197  for(int i=1; i<=M2; i++) {
198  if(i<K) {
199  residual -= 2*tmp[i]*tmp[i];
200  //printf("%d %e %e\n", i, residual, tmp[i]);
201  } else {
202  tmp[i] = 0.;
203  }
204  }
205 
206  //printf("Filter length = %d, norm = %.16f\n", M2+1, 1.-residual);
207 
208  wdmFilter.resize(M2+1);
209  filter *= 1./sqrt(1.-residual);
210  this->precision = -log10(residual);
211  wdmFilter.cpf(filter,M2+1);
212  this->m_L = 0;
213  this->m_H = M2+1;
214  TFMap00 = TFMap90 = 0;
215  td_buffer=0;
216 }
217 
218 template<class DataType_t> WDM<DataType_t>::
220 {
221 // copy constructor
222 // w : other WDM object
223 
224  ++objCounter;
225  this->m_Heterodine = 1;
226  this->m_WaveType = WDMT;
227  this->BetaOrder = w.BetaOrder;
228  this->precision = w.precision;
229  this->m_Level = w.m_Level;
230  this->m_Layer = w.m_Layer;
231  this->m_L = w.m_L;
232  this->m_H = w.m_H;
233  this->KWDM = w.KWDM;
234  this->LWDM = w.LWDM;
235  this->nSTS = w.nSTS; // WaveDWT parent class
236  this->wdmFilter = w.wdmFilter;
237  this->T0 = w.T0;
238  this->Tx = w.Tx;
239  TFMap00 = TFMap90 = 0;
240  sinTD = w.sinTD;
241  cosTD = w.cosTD;
242  sinTDx = w.sinTDx;
243  td_buffer = 0;
244  if(T0.Last()) {
245  initSSEPointers();
246  for(int i=0; i<6; ++i) {
247  td_halo[i].Resize(T0[0].Last());
248  td_halo[i].ZeroExtraElements();
249  }
250  }
251 
252 }
253 
254 template<class DataType_t>
256 {
257 // Destructor
258 
259  if(--objCounter==0) {
260  for(int i=2; i<MAXBETA; ++i){
261  if(Cos[i]) delete [] Cos[i];
262  if(Cos2[i]) delete [] Cos2[i];
263  if(SinCos[i]) delete [] SinCos[i];
264  }
265  }
266 
267  if(TFMap00) delete [] TFMap00;
268  if(TFMap90) delete [] TFMap90;
269  if(td_buffer)free(td_buffer);
270 }
271 
272 //clone
273 template<class DataType_t>
275 {
276 // Clone the object
277 
278  return new WDM<DataType_t>(*this);
279 }
280 
281 
282 template<class DataType_t>
284 {
285 // light-weight clone without TD filters
286 
287  WDM<DataType_t>* pwdm = new WDM<DataType_t>();
288  pwdm->m_Heterodine = this->m_Heterodine;
289  pwdm->m_WaveType = this->m_WaveType;
290  pwdm->BetaOrder = this->BetaOrder;
291  pwdm->precision = this->precision;
292  pwdm->m_Level = this->m_Level;
293  pwdm->m_Layer = this->m_Layer;
294  pwdm->m_L = this->m_L;
295  pwdm->m_H = this->m_H;
296  pwdm->KWDM = this->KWDM;
297  pwdm->LWDM = this->LWDM;
298  pwdm->nSTS = this->nSTS;
299  pwdm->wdmFilter = this->wdmFilter;
300  pwdm->TFMap00 = pwdm->TFMap90 = 0;
301  return pwdm;
302 }
303 
304 template<class DataType_t>
306 {
307 // computes basis function (sampled)
308 // m : frequency index
309 // n : time index
310 // w : where to store the values
311 // return value: indicates the time translation from origin needed by w (n=0 corresponds to t=0)
312 
313  int N = wdmFilter.size() ;
314  int M = this->m_Layer;
315  w.Resize(N-1);
316  if(m==0){
317  if(n%2)for(int i=-N+1; i<N; ++i)w[i] = 0;
318  else{
319  for(int i=1; i<N; ++i)w[-i] = w[i] = wdmFilter[i];
320  w[0] = wdmFilter[0];
321  }
322  }
323  else if(m==M){
324  if( (n-M)%2) for(int i=-N+1; i<N; ++i)w[i] = 0;
325  else {
326  int s = (M%2) ? -1 : 1 ; //sign
327  w[0] = s*wdmFilter[0];
328  for(int i=1; i<N; ++i){
329  s = -s;
330  w[-i] = w[i] = s*wdmFilter[i];
331  }
332  }
333  }
334  else {
335  double ratio = m*Pi/M;
336  double sign = sqrt(2);
337  //if((m*n)%2)sign = -sqrt(2);
338 
339  if((m+n)%2){
340  w[0] = 0;
341  for(int i=1; i<N; ++i){
342  w[i] = - sign*sin(i*ratio)*wdmFilter[i];
343  w[-i] = - w[i];
344  }
345  }
346  else{
347  w[0] = sign*wdmFilter[0];
348  for(int i=1; i<N; ++i)
349  w[i] = w[-i] = sign*cos(i*ratio)*wdmFilter[i];
350  }
351  }
352  return n*M;
353 }
354 
355 template<class DataType_t>
357 {
358 // computes Quadrature basis function (sampled)
359 // m : frequency index
360 // n : time index
361 // w : where to store the values
362 // return value: indicates the time translation from origin needed by w (n=0 corresponds to t=0)
363 
364  int N = wdmFilter.size() ;
365  int M = this->m_Layer;
366  w.Resize(N-1);
367  if(m==0){
368  if(n%2){
369  for(int i=1; i<N; ++i)w[-i] = w[i] = wdmFilter[i];
370  w[0] = wdmFilter[0];
371  }
372  else for(int i=-N+1; i<N; ++i)w[i] = 0;
373  }
374  else if(m==M){
375  if( (n-M)%2){
376  int s = 1;
377  w[0] = wdmFilter[0];
378  for(int i=1; i<N; ++i){
379  s = -s;
380  w[-i] = w[i] = s*wdmFilter[i];
381  }
382  }
383  else for(int i=-N+1; i<N; ++i)w[i] = 0;
384  }
385  else {
386  double ratio = m*Pi/M;
387  double sign = sqrt(2);
388  //if((m*n)%2)sign = -sqrt(2);
389 
390  if((m+n)%2){
391  w[0] = sign*wdmFilter[0];
392  for(int i=1; i<N; ++i)
393  w[i] = w[-i] = sign*cos(i*ratio)*wdmFilter[i];
394  }
395  else{
396  w[0] = 0;
397  for(int i=1; i<N; ++i){
398  w[i] = sign*sin(i*ratio)*wdmFilter[i];
399  w[-i] = - w[i];
400  }
401  }
402  }
403  return n*M;
404 }
405 
406 
407 template<class DataType_t>
409 {
410 // computes basis function (sampled)
411 // j : TF index
412 // w : where to store the values
413 // Quad: true to return Quadrature basis function
414 // return value: indicates the time translation from origin needed by w (j in [0,M] correspond to t=0)
415 
416  int M1 = this->m_Layer+1;
417  int m = j%M1;
418  int n = j/M1;
420  int shift = Quad? getBaseWaveQ(m, n, w2) : getBaseWave(m, n, w2);
421  int nn = w2.Last();
422  w.resize(2*nn+1);
423  for(int i=-nn; i<=nn; ++i) w[nn+i] = w2[i];
424  return shift-nn;
425 }
426 
427 
428 template<class DataType_t>
430 {
431 // not used
432 
433  int M1 = this->m_Layer+1;
434  int N = this->nWWS/2/M1;
435  TFMap00 = new DataType_t*[N];
436  TFMap90 = new DataType_t*[N];
437  for(int i=0; i<N; ++i){
438  TFMap00[i] = this->pWWS + M1*i;
439  TFMap90[i] = TFMap00[i] + M1*N;
440  }
441 }
442 
443 
444 template<class DataType_t>
446 {
447 // access function for a frequency layer described by index
448 // return slice corresponding to the selected frequency band
449 
450  int M = this->m_Level;
451  int N = this->nWWS;
452  int layer = int(fabs(index)+0.001);
453 
454  if(layer>M){
455  printf("WDM::getSlice(): illegal argument %d. Should be no more than %d\n",layer,M);
456  exit(0);
457  }
458 
459  if(!this->allocate()){
460  std::invalid_argument("WDM::getSlice(): data is not allocated");
461  return std::slice(0,1,1);
462  }
463 
464  size_t n = N/(M+1); // number of samples
465  size_t k = N/this->nSTS>1 ? 1 : 0; // power flag when = 0
466  size_t s = M+1; // slice step
467  size_t i = index<0 ? layer+k*N/2 : layer; // first sample
468 
469  n /= k+1; // adjust size
470 
471  if(i+(n-1)*s+1 > this->nWWS){
472  std::invalid_argument("WaveDWT::getSlice(): invalide arguments");
473  return std::slice(0,1,1);
474  }
475 
476  return std::slice(i,n,s);
477 }
478 
479 template<class DataType_t>
481 {
482 // internal WDM function
483 // computes the WDM transformation filter
484 // n - number of the filter coefficients defined by WDM
485 // parameters and precision.
486 
488  double* Fourier = Cos[BetaOrder];
489  int nFourier = CosSize[BetaOrder];
490  int M = this->m_Layer;
491  int K = this->KWDM;
492  double B = Pi/K;
493  double A = (K-M)*Pi/2./K/M;
494  double K2 = K;
495  K2 *= K2;
496  double* filter = tmp.data;
497  double gNorm = sqrt(2*M)/Pi;
498  double fNorm = 0.;
499 
500  double* fourier = new double[nFourier];
501  for(int i=0; i<nFourier; ++i) fourier[i] = Fourier[i];
502 
503  filter[0] = (A + fourier[0]*B)*gNorm; // do n = 0 first
504  fNorm = filter[0]*filter[0];
505  fourier[0] /= sqrt(2); // this line requires to copy Fourier to fourier
506 
507  for(int i=1; i<n; ++i){ // other coefficients
508  double di = i;
509  double i2 = di*di;
510  double sumEven = 0, sumOdd = 0;
511  for(int j=0; j<nFourier; ++j){ // a_j in the Fourier expansion
512  if(i%K);
513  else if(j==i/K)continue;
514  if(j&1)sumOdd += di/(i2 - j*j*K2)*fourier[j];
515  else sumEven += di/(i2 - j*j*K2)*fourier[j];
516  }
517 
518  double intAB = 0; //integral on [A, A+B]
519  if(i%K == 0)
520  if(i/K < nFourier) intAB = fourier[i/K]*B/2*cos(i*A);
521 
522  intAB += 2*(sumEven*sin(i*B/2)*cos(i*Pi/(2*M)) - sumOdd*sin(i*Pi/(2*M))*cos(i*B/2));
523  filter[i] = gNorm* ( sin(i*A)/i + sqrt(2)*intAB ); //sqrt2 b/c of the Four. expansion
524  fNorm += 2*filter[i]*filter[i];
525  }
526 
527  delete [] fourier;
528  return tmp;
529 
530 }
531 
532 template<class DataType_t>
533 wavearray<double> WDM<DataType_t>::getTDFilter1(int n, int L) // ex: L=8 -> tau/8 step
534 {
535 // computes 1st integral needed for TD filters (see the reference)
536 // n : defines the number of TD filter coefficients
537 // L : upsample factor the same as for setTDFilter
538 
539  double* Fourier = Cos2[BetaOrder];
540  int nFourier = Cos2Size[BetaOrder];
541  int M = this->m_Layer;
542  int K = this->KWDM;
543  double B = Pi/K;
544  double A = (K-M)*Pi/2./K/M;
545  double K2 = K*K;
546  double gNorm = 2*M/Pi;
547  wavearray<double> tmp(n*L);
548  double* filter = tmp.data;
549 
550  double* fourier = new double[nFourier];
551  for(int i=0; i<nFourier; ++i) fourier[i] = Fourier[i];
552 
553  filter[0] = (A + fourier[0]*B)*gNorm; // do n = 0 first
554  fourier[0] /= sqrt(2); // this line requires to copy Fourier to fourier
555 
556  for(int i=1; i<n*L; ++i){ // other coefficients
557  double di = i*(1./L);
558  double i2 = di*di;
559  double sumEven = 0, sumOdd = 0;
560  for(int j=0; j<nFourier; ++j){ // a_j in the Fourier expansion
561  if(j*K*L==i)continue;
562  if(j&1)sumOdd += di/(i2 - j*j*K2)*fourier[j];
563  else sumEven += di/(i2 - j*j*K2)*fourier[j];
564  }
565 
566  double intAB = 0; //integral on [A, A+B]
567  if(i%(K*L) == 0)if(i/(K*L) < nFourier)
568  intAB = fourier[i/(K*L)]*B/2*cos(di*A);
569  intAB += 2*(sumEven*sin(di*B/2)*cos(di*Pi/(2*M)) - sumOdd*sin(di*Pi/(2*M))*cos(di*B/2));
570  filter[i] = gNorm* ( sin(di*A)/di + sqrt(2)*intAB ); //sqrt2 b/c of the Four. expansion
571  }
572 
573  delete [] fourier;
574  return tmp;
575 }
576 
577 
578 template<class DataType_t>
580 {
581 // computes 2nd integral needed for TD filters (see paper)
582 // n : defines the number of TD filter coefficients
583 // L : upsample factor the same as for setTDFilter
584 
585  wavearray<double> tmp(n*L);
586  double* res = tmp.data;
587  int M = this->m_Layer;
588  int K = this->KWDM;
589  double B = Pi/K;
590  double K2 = K*K;
591 
592  double* Fourier = SinCos[BetaOrder];
593  int nFourier = SinCosSize[BetaOrder];
594  double gNorm = M/Pi;
595 
596  double aux = Fourier[0];
597  res[0] = aux*B*gNorm;
598  Fourier[0] /= sqrt(2);
599  for(int i=1; i<n*L; ++i){ // psi_i (l in the writeup)
600  double di = i*(1./L);
601  double i2 = di*di;
602  double sum = 0;
603  for(int j=0; j<=nFourier; j+=2){ //j indexing the Fourier coeff
604  if(j*K*L==i)continue;
605  sum += di/(i2 - j*j*K2)*Fourier[j];
606  }
607  sum *= 2*sin(di*B/2);
608  if(i%(2*K*L) == 0)
609  if(i/(K*L) <= nFourier) { // j*K*L = i case (even j)
610  if( (i/(2*K*L)) & 1 ) sum -= Fourier[i/K/L]*B/2;
611  else sum += Fourier[i/K/L]*B/2;
612  }
613  res[i] = gNorm*sqrt(2)*sum; //sqrt2 b/c of the Four. expansion
614  }
615  Fourier[0] = aux;
616  return tmp;
617 }
618 
619 
620 template<class DataType_t>
621 void WDM<DataType_t>::setTDFilter(int nCoeffs, int L)
622 {
623 // initialization of the time delay filters
624 // nCoeffs : define the number of the filter coefficients
625 // L : upsample factor, defines the fundamental time delay step
626 // dt = tau/L , where tau is the sampling interval of the original
627 // time series
628 
629  int M = this->m_Layer;
630  this->LWDM = L;
631  T0.Resize(M*L);
632  Tx.Resize(M*L);
633  for(int i=0; i<6; ++i) {
634  td_halo[i].Resize(nCoeffs);
635  td_halo[i].ZeroExtraElements();
636  }
637 
638  wavearray<double> filt1 = getTDFilter1(M*(nCoeffs+1)+1, L);
639 
640  double* tmp = filt1.data ; //cos2
641  for(int i=M*L; i>=-M*L; --i){
642  T0[i].Resize(nCoeffs);
643  T0[i][0] = tmp[abs(i)];
644  for(int j=1; j<=nCoeffs; ++j){
645  T0[i][j] = tmp[i+ M*L*j];
646  T0[i][-j] = tmp[M*L*j - i];
647  }
648  T0[i].ZeroExtraElements();
649  }
650 
651  wavearray<double> filt2 = getTDFilter2(M*(nCoeffs+1)+1 , L); //sincos
652  tmp = filt2.data;
653  for(int i=M*L; i>=-M*L; --i){
654  Tx[i].Resize(nCoeffs);
655  Tx[i][0] = tmp[abs(i)]/2.; // divide by 2 b/c the filter is actually for sin(2x) = 2*sin*cos
656  for(int j=1; j<=nCoeffs; ++j){
657  Tx[i][j] = tmp[i+ M*L*j]/2.;
658  Tx[i][-j] = tmp[M*L*j - i]/2.;
659  }
660 
661  // change some signs to bring i^l factor to C_l form!!
662  for(int j=1; j<=nCoeffs; ++j)switch(j%4){
663  case 1: Tx[i][-j] *= -1; break;
664  case 2: Tx[i][j] *= -1; Tx[i][-j] *= -1; break;
665  case 3: Tx[i][j] *= -1;
666  }
667  Tx[i].ZeroExtraElements();
668  }
669 
670  initSSEPointers();
671 
672  sinTD.resize(2*L*M);
673  cosTD.resize(2*L*M);
674  for(int i=0; i<2*L*M; ++i){
675  sinTD[i] = sin(i*Pi/(L*M));
676  cosTD[i] = cos(i*Pi/(L*M));
677  }
678  sinTDx.resize(4*L*M);
679  for(int i=0; i<4*L*M; ++i)sinTDx[i] = sin(i*Pi/(2*L*M));
680 }
681 
682 template<class DataType_t>
684 {
685 // required before using SSE instructions to speed up time delay filter calculations
686 
687  if(td_buffer)free(td_buffer);
688  switch(T0[0].SSESize()){
689  case 64: SSE_TDF = sse_dp4; posix_memalign((void**)&td_buffer, 16, 64+16); break;
690  case 80: SSE_TDF = sse_dp5; posix_memalign((void**)&td_buffer, 16, 80+16); break;
691  case 96: SSE_TDF = sse_dp6; posix_memalign((void**)&td_buffer, 16, 96+16); break;
692  case 112: SSE_TDF = sse_dp7; posix_memalign((void**)&td_buffer, 16, 112+16); break;
693  case 128: SSE_TDF = sse_dp8; posix_memalign((void**)&td_buffer, 16, 128+16); break;
694  case 144: SSE_TDF = sse_dp9; posix_memalign((void**)&td_buffer, 16, 144+16); break;
695  case 160: SSE_TDF = sse_dp10; posix_memalign((void**)&td_buffer, 16, 160+16); break;
696  case 176: SSE_TDF = sse_dp11; posix_memalign((void**)&td_buffer, 16, 176+16); break;
697  default: printf("initSSEPointer error, size not found. Contact V.Necula\n"); SSE_TDF=0;
698  }
699  td_data = td_buffer + T0[0].SSESize()/8 + 4;
700 }
701 
702 /*
703 template<class DataType_t>
704 double WDM<DataType_t>::PrintTDFilters(int m, int dt, int nC)
705 { SymmArray<double>& sa0 = T0[dt];
706 SymmArray<double>& sax = Tx[dt];
707 int M = this->m_Layer;
708 double sin0 = sin((m*Pi*dt)/M);
709 double cos0 = cos((m*Pi*dt)/M);
710 double sinP = sin((2*m+1)*dt*Pi/(2.*M));
711 double sinM = sin((2*m-1)*dt*Pi/(2.*M));
712 if(nC<=0 || nC>sa0.Last())nC = sa0.Last();
713 
714 double res = 0, t0, tp, tm;
715 for(int i=-nC; i<=nC; ++i){
716 if(i&1)t0 = sa0[i]*sin0;
717 else t0 = sa0[i]*cos0;
718 tp = sax[i]*sinP;
719 tm = sax[i]*sinM;
720 //printf("i=%3d %+le %+le %+le\n", i, t0, tp, tm);
721 res += t0*t0 + tm*tm + tp*tp;
722 }
723 return res;
724 }
725 */
726 
727 // Quadrature: equivalent to multiplying C_l by -i (plus shifting the 0,M layers)
728 
729 template<class DataType_t>
730 double WDM<DataType_t>::getPixelAmplitude(int m, int n, int dT, bool Quad)
731 {
732 // computes one time dalay for one pixel without SSE instructions
733 // m : frequency index (0...M)
734 // n : time index (in pixels)
735 // dT : time delay (in units of the sampling period or fractions of it,
736 // depending on the TD Filters setting
737 // Quad: true to compute the delayed amplitude for the quadrature [m,n] pixel
738 
739 
740  int M = this->m_Layer;
741  int M1 = M+1;
742  int odd = n&1;
743  bool cancelEven = odd ^ Quad;
744  double dt = dT*(1./this->LWDM);
745  if(m==0 || m==M)if(cancelEven) return 0; // DOESN'T WORK FOR ODD M!!
746  DataType_t* TFMap = this->pWWS;
747  if(Quad) TFMap += this->nWWS/2;
748 
749  //double* tfc = TFMap[m]+n;
750  DataType_t* tfc = TFMap + n*M1 + m;
751  double res, res1;
752 
753  //SymmArray<double>& sa = T0[dT];
754  SymmArraySSE<float>& sa = T0[dT];
755  double sumOdd = 0;
756  double sumEven = tfc[0]*sa[0];
757  for(int i=2; i<=sa.Last(); i+=2) sumEven += (tfc[i*M1]*sa[i] +tfc[-i*M1]*sa[-i]);
758  for(int i=1; i<=sa.Last(); i+=2) sumOdd += (tfc[i*M1]*sa[i] +tfc[-i*M1]*sa[-i]);
759  if(m==0 || m==M) {
760  if(cancelEven)sumEven = 0;
761  else sumOdd = 0;
762  }
763 
764  //res = 3.2*sumEven + sumOdd;
765  if(odd) res = sumEven*cos((m*Pi*dt)/M) - sumOdd*sin((m*Pi*dt)/M);
766  else res = sumEven*cos((m*Pi*dt)/M) + sumOdd*sin((m*Pi*dt)/M);
767 
768  //SymmArray<double>& sax = Tx[dT];
769  SymmArraySSE<float>& sax = Tx[dT];
770  if(m>0){ // m-1 case
771  // tfc = TFMap[m-1]+n;
772  tfc--;
773  sumOdd = 0;
774  sumEven = tfc[0]*sax[0];
775  for(int i=2; i<=sax.Last(); i+=2) sumEven += (tfc[i*M1]*sax[i] +tfc[-i*M1]*sax[-i]);
776  for(int i=1; i<=sax.Last(); i+=2) sumOdd += (tfc[i*M1]*sax[i] + tfc[-i*M1]*sax[-i]);
777 
778  if(m==1) {
779  if(cancelEven)sumEven = 0;
780  else sumOdd = 0;
781  }
782  //res1 = 2.1*sumEven + sumOdd;
783  if(odd) res1 = (sumEven - sumOdd)*sin((2*m-1)*dt*Pi/(2.*M));
784  else res1 = (sumEven + sumOdd)*sin((2*m-1)*dt*Pi/(2.*M));
785  if(m==1 || m == M) res1 *= sqrt(2);
786  if((m+n)&1) res -= res1;
787  else res += res1;
788  }
789 
790  if(m<M){ // m+1 case
791  //tfc = TFMap[m+1]+n;
792  tfc = TFMap + n*M1 + m + 1;
793  sumOdd = 0;
794  sumEven = tfc[0]*sax[0];
795  for(int i=2; i<=sax.Last(); i+=2) sumEven += (tfc[i*M1]*sax[i] + tfc[-i*M1]*sax[-i]);
796  for(int i=1; i<=sax.Last(); i+=2) sumOdd += (tfc[i*M1]*sax[i] + tfc[-i*M1]*sax[-i]);
797 
798  if(m==M-1) {
799  if(cancelEven)sumEven = 0;
800  else sumOdd = 0;
801  }
802  //res1 = 2.3*sumEven + sumOdd;
803  if(odd) res1 = (sumEven + sumOdd)*sin((2*m+1)*dt*Pi/(2.*M));
804  else res1 = (sumEven - sumOdd)*sin((2*m+1)*dt*Pi/(2.*M)); //two (-1)^l cancel
805  if(m==0 || m==M-1) res1 *= sqrt(2);
806  if((m+n)&1) res -= res1;
807  else res += res1;
808  }
809  return res;
810 }
811 
812 
813 // Quadrature: equivalent to multiplying C_l by -i (plus shifting the 0,M layers)
814 template<class DataType_t>
815 double WDM<DataType_t>::getPixelAmplitudeSSEOld(int m, int n, int dT, bool Quad)
816 {
817 // computes one time dalay for one pixel using SSE instructions
818 // m : frequency index (0...M)
819 // n : time index (in pixels)
820 // dT : time delay (in units of the sampling period or fractions of it,
821 // depending on the TD Filters setting
822 // Quad: true to compute the delayed amplitude for the quadrature [m,n] pixel
823 
824 
825  static const double sqrt2 = sqrt(2);
826  int M = this->m_Layer;
827  int M1 = M+1;
828  int L = this->LWDM;
829  int odd = n&1;
830  bool cancelEven = odd ^ Quad;
831 
832  if(m==0 || m==M)if(cancelEven) return 0; // DOESN'T WORK FOR ODD M!!
833  DataType_t* TFMap = this->pWWS;
834  if(Quad) TFMap += this->nWWS/2;
835  DataType_t* tfc = TFMap + n*M1 + m;
836 
837  double res, res1;
838 
839  SymmArraySSE<float>& sa = T0[dT];
840  watasm_data = td_buffer + 4;
841  watasm_filter = sa.SSEPointer();
842 
843  const int last = sa.Last();
844  for(int i=-last, j = -last*M1; i<=last; ++i){td_data[i] = tfc[j] ; j+=M1;}
845  SSE_TDF();
846 
847  float sumEven = watasm_xmm0[0] + watasm_xmm0[2];
848  float sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
849 
850  if(m==0 || m==M) {
851  if(cancelEven)sumEven = 0;
852  else sumOdd = 0;
853  }
854 
855  //res = sumEven + sumOdd;
856  int index = (m*dT)%(2*M*L);
857  if(index<0) index +=2*M*L;
858 
859  if(odd) res = sumEven*cosTD[index] - sumOdd*sinTD[index];
860  else res = sumEven*cosTD[index] + sumOdd*sinTD[index];
861 
862  SymmArraySSE<float>& sax = Tx[dT];
863  watasm_filter = sax.SSEPointer();
864 
865  if(m>0){ // m-1 case
866  tfc--;
867  for(int i=-last, j = -last*M1; i<=last; ++i){td_data[i] = tfc[j] ; j+=M1;}
868  SSE_TDF();
869  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
870  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
871 
872  if(m==1) {
873  if(cancelEven)sumEven = 0;
874  else sumOdd = 0;
875  }
876 
877  index = (2*m-1)*dT%(4*M*L);
878  if(index<0)index += 4*M*L;
879  if(odd) res1 = (sumEven - sumOdd)*sinTDx[index];
880  else res1 = (sumEven + sumOdd)*sinTDx[index];
881 
882  if(m==1 || m == M) res1 *= sqrt2;
883  if((m+n)&1) res -= res1;
884  else res += res1;
885  }
886 
887  if(m<M){ // m+1 case
888  tfc = TFMap + n*M1 + m + 1;
889  for(int i=-last, j = -last*M1; i<=last; ++i){td_data[i] = tfc[j] ; j+=M1;}
890  SSE_TDF();
891 
892  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
893  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
894 
895  if(m==M-1) {
896  if(cancelEven)sumEven = 0;
897  else sumOdd = 0;
898  }
899  index = (2*m+1)*dT%(4*M*L);
900  if(index<0)index += 4*M*L;
901  if(odd) res1 = (sumEven + sumOdd)*sinTDx[index];
902  else res1 = (sumEven - sumOdd)*sinTDx[index]; //two (-1)^l cancel
903  if(m==0 || m==M-1) res1 *= sqrt2;
904  if((m+n)&1) res -= res1;
905  else res += res1;
906  }
907  return res;
908 }
909 
910 template<class DataType_t>
911 float WDM<DataType_t>::getPixelAmplitudeSSE(int m, int n, int dT, bool Quad)
912 {
913 // computes one time dalay for one pixel using SSE instructions; requires td_halo be initialized
914 // m : frequency index (0...M)
915 // n : time index (in pixels)
916 // dT : time delay (in units of the sampling period or fractions of it,
917 // depending on the TD Filters setting
918 // Quad: true to compute the delayed amplitude for the quadrature [m,n] pixel
919 
920  static const double sqrt2 = sqrt(2);
921  int M = this->m_Layer;
922  int L = this->LWDM;
923  int odd = n&1;
924  bool cancelEven = odd ^ Quad;
925  if(m==0 || m==M)if(cancelEven)return 0; // DOESN'T WORK FOR ODD M!!
926 
927  int QuadShift = Quad? 3:0;
928  double res, res1;
929 
930  SymmArraySSE<float>& sa = T0[dT];
931  watasm_data = td_halo[QuadShift + 1].SSEPointer();
932  watasm_filter = sa.SSEPointer();
933  SSE_TDF();
934  float sumEven = watasm_xmm0[0] + watasm_xmm0[2];
935  float sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
936 
937  if(m==0 || m==M) {
938  if(cancelEven)sumEven = 0;
939  else sumOdd = 0;
940  }
941 
942  int index = (m*dT)%(2*M*L);
943  if(index<0) index +=2*M*L;
944  if(odd) res = sumEven*cosTD[index] - sumOdd*sinTD[index];
945  else res = sumEven*cosTD[index] + sumOdd*sinTD[index];
946 
947  SymmArraySSE<float>& sax = Tx[dT];
948  watasm_filter = sax.SSEPointer();
949 
950  if(m>0){ // m-1 case
951  watasm_data = td_halo[QuadShift + 0].SSEPointer();
952  SSE_TDF();
953  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
954  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
955 
956  if(m==1) {
957  if(cancelEven)sumEven = 0;
958  else sumOdd = 0;
959  }
960 
961  index = (2*m-1)*dT%(4*M*L);
962  if(index<0)index += 4*M*L;
963  if(odd) res1 = (sumEven - sumOdd)*sinTDx[index];
964  else res1 = (sumEven + sumOdd)*sinTDx[index];
965 
966  if(m==1 || m == M) res1 *= sqrt2;
967  if((m+n)&1) res -= res1;
968  else res += res1;
969  }
970 
971  if(m<M){ // m+1 case
972  watasm_data = td_halo[QuadShift + 2].SSEPointer();
973  SSE_TDF();
974  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
975  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
976 
977  if(m==M-1) {
978  if(cancelEven)sumEven = 0;
979  else sumOdd = 0;
980  }
981  index = (2*m+1)*dT%(4*M*L);
982  if(index<0)index += 4*M*L;
983 
984  if(odd) res1 = (sumEven + sumOdd)*sinTDx[index];
985  else res1 = (sumEven - sumOdd)*sinTDx[index]; //two (-1)^l cancel
986  if(m==0 || m==M-1) res1 *= sqrt2;
987  if((m+n)&1) res -= res1;
988  else res += res1;
989  }
990  return res;
991 }
992 
993 template<class DataType_t>
994 void WDM<DataType_t>::getPixelAmplitudeSSE(int m, int n, int t1, int t2, float* r, bool Quad)
995 {
996 // computes time dalays for one pixel
997 // m : frequency index (0...M)
998 // n : time index (in pixels)
999 // t1, t2 : time delays range [t1, t1+1, .. t2]
1000 // r : where to store the time delayed amplitude for pixel [m,n]
1001 // Quad : true to request time delays for the quadrature amplitude
1002 
1003  static const double sqrt2 = sqrt(2);
1004  int M = this->m_Layer;
1005  int L = this->LWDM;
1006  int odd = n&1;
1007  bool cancelEven = odd ^ Quad;
1008  if(m==0 || m==M)if(cancelEven){ // DOESN'T WORK FOR ODD M!!
1009  for(int dT = t1 ; dT<=t2; ++dT)*r++ = 0;
1010  return;
1011  }
1012 
1013  double res, res1;
1014  int QuadShift = Quad? 3:0;
1015  int _2ml = 2*M*L;
1016  int _4ml = 2*_2ml;
1017 
1018  for(int dT = t1 ; dT<=t2; ++dT){
1019 
1020  watasm_data = td_halo[QuadShift + 1].SSEPointer();
1021  watasm_filter = T0[dT].SSEPointer();
1022  SSE_TDF();
1023  float sumEven = watasm_xmm0[0] + watasm_xmm0[2];
1024  float sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
1025 
1026  if(m==0 || m==M) {
1027  if(cancelEven)sumEven = 0;
1028  else sumOdd = 0;
1029  }
1030 
1031  int index = (m*dT)%_2ml;
1032  if(index<0) index +=_2ml;
1033  if(odd) res = sumEven*cosTD[index] - sumOdd*sinTD[index];
1034  else res = sumEven*cosTD[index] + sumOdd*sinTD[index];
1035 
1036  SymmArraySSE<float>& sax = Tx[dT];
1037  watasm_filter = sax.SSEPointer();
1038 
1039  if(m>0){ // m-1 case
1040  watasm_data = td_halo[QuadShift + 0].SSEPointer();
1041  SSE_TDF();
1042  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
1043  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
1044 
1045  if(m==1) {
1046  if(cancelEven)sumEven = 0;
1047  else sumOdd = 0;
1048  }
1049 
1050  index = (2*m-1)*dT%_4ml;
1051  if(index<0)index += _4ml;
1052  if(odd) res1 = (sumEven - sumOdd)*sinTDx[index];
1053  else res1 = (sumEven + sumOdd)*sinTDx[index];
1054 
1055  if(m==1 || m == M) res1 *= sqrt2;
1056  if((m+n)&1) res -= res1;
1057  else res += res1;
1058  }
1059 
1060  if(m<M){ // m+1 case
1061  watasm_data = td_halo[QuadShift + 2].SSEPointer();
1062  SSE_TDF();
1063  sumEven = watasm_xmm0[0] + watasm_xmm0[2];
1064  sumOdd = watasm_xmm0[1] + watasm_xmm0[3];
1065 
1066  if(m==M-1) {
1067  if(cancelEven)sumEven = 0;
1068  else sumOdd = 0;
1069  }
1070 
1071  index = (2*m+1)*dT%_4ml;
1072  if(index<0)index += _4ml;
1073  if(odd) res1 = (sumEven + sumOdd)*sinTDx[index];
1074  else res1 = (sumEven - sumOdd)*sinTDx[index]; //two (-1)^l cancel
1075  if(m==0 || m==M-1) res1 *= sqrt2;
1076  if((m+n)&1) res -= res1;
1077  else res += res1;
1078  }
1079  *r++ = res;
1080  }
1081 }
1082 
1083 
1084 template<class DataType_t>
1085 float WDM<DataType_t>::getTDamp(int j, int k, char c)
1086 {
1087 // override getTDamp from WaveDWT
1088 // return time-delayed amplitude for delay index k:
1089 // j - global index in the TF map
1090 // k - delay index assuming time delay step 1/(LWDM*rate)
1091 // c - mode: 'a'/'A' - returns 00/90 amplitude, 'p'or'P' - returns power
1092 
1093  int N = (int)this->nWWS/2;
1094  if(this->nWWS/this->nSTS!=2) {
1095  printf("WDM:getTDamp() - time delays can not be produced with this TF data.");
1096  exit(0);
1097  }
1098  if(!this->Last()) {
1099  printf("WDM:getTDamp() - time delay filter is not set");
1100  exit(0);
1101  }
1102  if(j>=N) j -= N;
1103 
1104  int M = this->m_Layer;
1105  int J = M*LWDM; // number of delays in one pixel
1106  int n = j/(M+1); // time index
1107  int m = j%(M+1); // frequency index
1108  int wdmShift = k/J;
1109  k %= J;
1110 
1111  if(n<0 || n>N/(M+1)) {
1112  cout<<"WDM::getTDamp(): index outside TF map"<<endl; exit(1);
1113  }
1114 
1115 
1116  if(c=='a'){
1117  if(wdmShift%2){ // not working well for odd M when m=0/M !!
1118  if((n+m)%2) return -getPixelAmplitude(m, n-wdmShift, k, true);
1119  else return getPixelAmplitude(m, n-wdmShift, k, true);
1120  }
1121  return getPixelAmplitude(m, n-wdmShift, k, false);
1122  }
1123 
1124  if(c=='A'){
1125  if(wdmShift%2){ // not working well for odd M when m=0/M !!
1126  if((n+m)%2) return getPixelAmplitude(m, n-wdmShift, k, false);
1127  else return -getPixelAmplitude(m, n-wdmShift, k, false);
1128  }
1129  return getPixelAmplitude(m, n-wdmShift, k, true);
1130  }
1131 
1132  double a00 = getPixelAmplitude(m, n-wdmShift, k, false);
1133  double a90 = getPixelAmplitude(m, n-wdmShift, k, true);
1134  return float(a00*a00+a90*a90)/2;
1135 }
1136 
1137 
1138 
1139 // set array of time-delay amplitudes/energy
1140 template<class DataType_t>
1142 {
1143 // override getTDAmp from WaveDWT
1144 // return array of time-delayed amplitudes in the format:
1145 // [-K*dt , 0, K*dt, -K*dt, 0, K*dt] - total 2(2K+1) amplitudes
1146 // or array of time-delayed power in the format:
1147 // [-K*dt , 0, K*dt] - total (2K+1) amplitudes
1148 // j - index in TF map
1149 // K - range of unit delays dt
1150 // c - mode: 'a','A' - amplitude, 'p','P' - power
1151 
1152  if(this->nWWS/this->nSTS!=2) {
1153  printf("WDM:getTDAmp() - time delays can not be produced with this TF data.");
1154  exit(0);
1155  }
1156  if(!this->Last()) {
1157  printf("WDM:getTDAmp() - time delay filter is not set");
1158  exit(0);
1159  }
1160  if(j>=(int)this->nWWS/2) j -= this->nWWS/2;
1161 
1162  int mode = (c=='a' || c=='A') ? 2 : 1;
1163  wavearray<float> amp(mode*(2*K+1));
1164 
1165  float* p00 = amp.data;
1166  float* p90 = amp.data + (mode-1)*(2*K+1);
1167  int i = 0;
1168 
1169  for(int k=-K; k<=K; k++) {
1170  if(mode==2) {
1171  p00[i] = float(getTDamp(j, k, 'a'));
1172  p90[i] = float(getTDamp(j, k, 'A'));
1173  }
1174  else p00[i] = getTDamp(j, k, 'p');
1175  i++;
1176  }
1177  return amp;
1178 }
1179 
1180 
1181 // set array of time-delay amplitudes/energy
1182 template<class DataType_t>
1184 {
1185 // return array of time-delayed amplitudes in the format:
1186 // [-K*dt , 0, K*dt, -K*dt, 0, K*dt] - total 2(2K+1) amplitudes
1187 // or array of time-delayed power in the format:
1188 // [-K*dt , 0, K*dt] - total (2K+1) amplitudes
1189 // j - index in TF map
1190 // K - range of unit delays dt
1191 // c - mode: 'a'/'A' - amplitude, 'p','P' - power
1192 
1193  if(!this->Last()) {
1194  printf("WDM:getTDAmp() - time delay filter is not set");
1195  exit(0);
1196  }
1197 
1198  int M = this->m_Layer;
1199  int n = j/(M+1); // time index
1200  int m = j%(M+1); // frequency index
1201 
1202  int J = M*LWDM;
1203  int max_wdmShift = ((K + J)/(2*J))*2;
1204  int max_k = (K + J)%(2*J) - J;
1205  int min_wdmShift = (-K + J)/(2*J);
1206  int min_k = (-K + J)%(2*J);
1207  if(min_k<0){
1208  min_k += 2*J;
1209  --min_wdmShift;
1210  }
1211  min_wdmShift *=2;
1212  min_k -= J;
1213 
1214  //printf("minShift = %d maxShift = %d mink = %d maxk = %d\n",
1215  // min_wdmShift, max_wdmShift, min_k, max_k);
1216 
1217  int mode = (c=='a' || c=='A') ? 2 : 1;
1218  wavearray<float> amp(mode*(2*K+1));
1220  if(mode==1) aux.resize(2*K+1);
1221 
1222  float* p00 = amp.data;
1223  float* p90 = aux.data;
1224  if(mode==2) p90 = amp.data + (mode-1)*(2*K+1);
1225 
1226  if(min_wdmShift==max_wdmShift){
1227  pSS->GetSTFdata(j, td_halo);
1228  getPixelAmplitudeSSE(m, n, min_k, max_k, p00, false);
1229  getPixelAmplitudeSSE(m, n, min_k, max_k, p90, true);
1230  }
1231  else{
1232  pSS->GetSTFdata(j - min_wdmShift*(M+1), td_halo);
1233  getPixelAmplitudeSSE(m, n-min_wdmShift, min_k, J-1, p00, false);
1234  getPixelAmplitudeSSE(m, n-min_wdmShift, min_k, J-1, p90, true);
1235  p00 += J - min_k;
1236  p90 += J - min_k;
1237 
1238  for(int i=min_wdmShift+2; i<max_wdmShift; i+=2){
1239  pSS->GetSTFdata(j - i*(M+1), td_halo);
1240  getPixelAmplitudeSSE(m, n-i, -J, J-1, p00, false);
1241  getPixelAmplitudeSSE(m, n-i, -J, J-1, p90, true);
1242  p00 += 2*J;
1243  p90 += 2*J;
1244  }
1245  pSS->GetSTFdata(j - max_wdmShift*(M+1), td_halo);
1246  getPixelAmplitudeSSE(m, n-max_wdmShift, -J, max_k, p00, false);
1247  getPixelAmplitudeSSE(m, n-max_wdmShift, -J, max_k, p90, true);
1248  }
1249 
1250  if(mode==1){
1251  p00 = amp.data;
1252  p90 = aux.data;
1253  for(int i=0; i<=2*K+1; ++i)p00[i] = (p00[i]*p00[i] + p90[i]*p90[i])/2;
1254  }
1255  return amp;
1256 }
1257 
1258 template<class DataType_t>
1260 {
1261 // fill r with amplitudes needed to compute time-delays (for both quadratures)
1262 // j - global index in the TF map
1263 
1264  if(this->nWWS/this->nSTS!=2) {
1265  printf("WDM:getTDAmp() - time delays can not be produced with this TF data.");
1266  exit(0);
1267  }
1268  if(!this->Last()) {
1269  printf("WDM:getTDAmp() - time delay filter is not set");
1270  exit(0);
1271  }
1272  DataType_t* TFMap = this->pWWS;
1273  if(j>=(int)this->nWWS/2) j -= this->nWWS/2;
1274 
1275  int M = this->m_Layer;
1276  int M1 = M+1;
1277  int L = this->LWDM;
1278  int BlockSize = T0[0].SSESize()/4;
1279  r.resize(6*BlockSize);
1280  float* data = r.data;
1281  data += BlockSize/2;
1282  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1283  data += BlockSize;
1284  --j;
1285  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1286  data += BlockSize;
1287  j+=2;
1288  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1289 
1290  j+= this->nWWS/2 - 1; // 90 degree phase
1291  data += BlockSize;
1292  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1293  data += BlockSize;
1294  --j;
1295  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1296  data += BlockSize;
1297  j+=2;
1298  for(int i=-L; i<=L; ++i)data[i] = TFMap[j + i*M1];
1299 }
1300 
1301 
1302 template<class DataType_t>
1304 {
1305 // a rough CPU time test for time delays
1306  double res = 0;
1307  int M = this->m_Layer;
1308  int N = this->nWWS/2/(M+1);
1309  int nCoeff = T0[0].Last();
1310  for(int k=0; k<100; ++k)
1311  for(int i=nCoeff; i<N-nCoeff; ++i)
1312  for(int j=0; j<=M; ++j) res += getPixelAmplitude(j, i, dt);
1313  return res;
1314 }
1315 
1316 
1317 template<class DataType_t>
1319 {
1320 // a rough CPU time test for time delays using SSE instructions
1321 
1322  double res;
1323  int M = this->m_Layer;
1324  int N = this->nWWS/2/(M+1);
1325  int nCoeff = T0[0].Last();
1326  for(int k=0; k<100; ++k)
1327  for(int i=nCoeff; i<N-nCoeff; ++i)
1328  for(int j=0; j<=M; ++j) res += getPixelAmplitudeSSEOld(j, i, dt);
1329  return res;
1330 }
1331 
1332 
1333 template<class DataType_t>
1335 {
1336 // legacy function
1337 
1338  return this->m_Level;
1339 }
1340 
1341 template<class DataType_t>
1342 int WDM<DataType_t>::getOffset(int level, int layer)
1343 {
1344 // legacy function
1345 
1346  return layer;
1347 }
1348 
1349 template<class DataType_t>
1350 void WDM<DataType_t>::forward(int level, int layer)
1351 {
1352 // legacy function, should not be called
1353 
1354  printf("ERROR, WDM::forward called\n");
1355 }
1356 
1357 template<class DataType_t>
1358 void WDM<DataType_t>::inverse(int level,int layer)
1359 {
1360 // legacy function, should not be called
1361 
1362  printf("ERROR, WDM::inverse called\n");
1363 }
1364 
1365 
1366 template<class DataType_t>
1368 {
1369 // direct transform
1370 // MM = 0 requests power map of combined quadratures (not amplitudes for both)
1371 
1372  //pWWS contains the TD data, nWWS the size;
1373  static const double sqrt2 = sqrt(2);
1374 
1375  int n, m, j, J;
1376  int M = this->m_Layer; // max layer
1377  int M1 = M+1;
1378  int M2 = M*2;
1379  int nWDM = this->m_H;
1380  int nTS = this->nWWS;
1381  int KK = MM;
1382 
1383  if(MM<=0) MM = M;
1384 
1385  // adjust nWWS
1386  this->nWWS += this->nWWS%MM ? MM-this->nWWS%MM : 0;
1387 
1388  // initialize time series with boundary conditions (mirror)
1389  m = this->nWWS+2*nWDM;
1390  double* ts = (double*) _mm_malloc(m*sizeof(double), 16); // align to 16-byte for SSE
1391  for(n=0; n<=nWDM; n++) ts[nWDM-n] = (double)(this->pWWS[n]);
1392  for(n=0; n < nTS; n++) ts[nWDM+n] = (double)(this->pWWS[n]);
1393  for(n=0; n<int(m - nWDM-nTS); n++) ts[n+nWDM+nTS] = (double)(this->pWWS[nTS-n-1]);
1394 
1395  double* pTS = ts + nWDM;
1396 
1397 
1398  // create aligned symmetric arrays
1399  double* wdm = (double*) _mm_malloc((nWDM)*sizeof(double),16); // align to 16-byte for SSE
1400  double* INV = (double*) _mm_malloc((nWDM)*sizeof(double),16); // align to 16-byte for SSE
1401  for(n=0; n<nWDM; n++) {
1402  wdm[n] = wdmFilter.data[n];
1403  INV[n] = wdmFilter.data[nWDM-n-1];
1404  }
1405  double* WDM = INV+nWDM-1;
1406 
1407  // reallocate TF array
1408  int N = int(this->nWWS/MM);
1409  int L = KK<0 ? 2*N*M1 : N*M1;
1410  this->m_L = KK<0 ? this->m_H : 0;
1411  DataType_t* pWDM = (DataType_t *)realloc(this->pWWS,L*sizeof(DataType_t));
1412  this->release();
1413  this->allocate(size_t(L), pWDM);
1414 
1415  double* re = new double[M2];
1416  double* im = new double[M2];
1417 
1418  double* reX = (double*) _mm_malloc(M2 * sizeof(double), 16); // align to 16-byte for SSE
1419  double* imX = (double*) _mm_malloc(M2 * sizeof(double), 16); // align to 16-byte for SSE
1420 
1421  DataType_t* map00 = pWDM;
1422  DataType_t* map90 = pWDM + N*M1;
1423 
1424  int odd = 0;
1425  int sign,kind; sign=0; // dummy parameters
1426  TFFTRealComplex fft(1,&M2,false);
1427  fft.Init("ES",sign, &kind); // EX - optimized, ES - least optimized
1428 
1429  for(n=0; n<N; n++){
1430  /*
1431  for(m=0; m<M2; m++) {re[m] = 0; im[m] = 0.;}
1432  for(j=0; j<nWDM-1; j+=M2) {
1433  J = M2 + j;
1434  for(m=0; m<M2; m++)
1435  re[m] += *(pTS+j+m)*wdm[j+m] + *(pTS-J+m)*wdm[J-m];
1436  }
1437  re[0] += wdm[J]*pTS[J];
1438  printf("%d %.16f %.16f /",n,re[0],re[1]);
1439  */
1440 
1441  for(m=0; m<M2; m++) {reX[m] = 0; imX[m] = 0.;}
1442 
1443  for(j=0; j<nWDM-1; j+=M2) {
1444  J = M2 + j;
1445 
1446  __m128d* PR = (__m128d*) reX;
1447  for(m=0; m<M2; m+=2) {
1448  __m128d ptj = _mm_loadu_pd(pTS+j+m); // non-aligned pTS array
1449  __m128d pTJ = _mm_loadu_pd(pTS-J+m);
1450  __m128d pwj = _mm_load_pd(wdm+j+m); // aligned wdm and WDM arrays
1451  __m128d pWJ = _mm_load_pd(WDM-J+m);
1452  __m128d pjj = _mm_mul_pd(ptj,pwj);
1453  __m128d pJJ = _mm_mul_pd(pTJ,pWJ);
1454  *PR = _mm_add_pd(*PR, _mm_add_pd(pjj,pJJ));
1455  PR++;
1456  }
1457 
1458  }
1459 
1460  reX[0] += wdm[J]*pTS[J];
1461 // printf("%.16f %.16f \n",reX[0],reX[1]);
1462 
1463  fft.SetPoints(reX);
1464  fft.Transform();
1465  fft.GetPointsComplex(reX, imX);
1466 /*
1467  wavefft(re,im, M2,M2,M2, 1); // InvFFT(reX, imX, M2);
1468 
1469  printf("%d %.16f %.16f /",n,re[0],im[0]);
1470  printf("%.16f %.16f \n",reX[0],imX[0]);
1471  printf("%d %.16f %.16f /",n,re[1],im[1]);
1472  printf("%.16f %.16f \n",reX[1],imX[1]);
1473  printf("%d %.16f %.16f /",n,re[M],im[M]);
1474  printf("%.16f %.16f \n",reX[M],imX[M]);
1475 */
1476  reX[0] = imX[0] = reX[0]/sqrt2;
1477  reX[M] = imX[M] = reX[M]/sqrt2;
1478 
1479 // with fftw replace im with -im
1480 
1481  if(KK<0) {
1482  for(int m=0; m<=M; ++m) {
1483  if((m+odd) & 1){
1484  map00[m] = sqrt2*imX[m];
1485  map90[m] = sqrt2*reX[m];
1486  }
1487  else{
1488  map00[m] = sqrt2*reX[m];
1489  map90[m] = -sqrt2*imX[m];
1490  }
1491  }
1492  }
1493 
1494  else // power map
1495  for(int m=0; m<=M; m++)
1496  map00[m] = reX[m]*reX[m]+imX[m]*imX[m];
1497 
1498 
1499  odd = 1 - odd;
1500  pTS += MM;
1501  map00 += M1;
1502  map90 += M1;
1503  }
1504 
1505  this->m_Level = this->m_Layer;
1506 
1507  _mm_free(reX);
1508  _mm_free(imX);
1509  _mm_free(wdm);
1510  _mm_free(INV);
1511  _mm_free(ts);
1512 
1513  delete [] re;
1514  delete [] im;
1515 
1516 }
1517 
1518 
1519 
1520 template<class DataType_t>
1521 void WDM<DataType_t>::w2t(int flag)
1522 {
1523 // inverse transform
1524 // flag = -2 indicates that the Quadrature coefficients will be used
1525 
1526  if(flag==-2){
1527  w2tQ(0);
1528  return;
1529  }
1530  int n, j;
1531  int M = this->m_Layer;
1532  int M2 = M*2;
1533  int N = this->nWWS/(M2+2);
1534  int nWDM = this->m_H;
1535  int nTS = M*N + 2*nWDM;
1536 
1537  double sqrt2 = sqrt(2.);
1538  double* reX = new double[M2];
1539  double* imX = new double[M2];
1540  double* wdm = wdmFilter.data;
1541  DataType_t* TFmap = this->pWWS;
1542 
1543  if(M*N/this->nSTS!=1) {
1544  printf("WDM<DataType_t>::w2t: Inverse is not defined for the up-sampled map\n");
1545  exit(0);
1546  }
1547 
1549  DataType_t* tmp = ts.data + nWDM;
1550 
1551  int odd = 0;
1552  int sign,kind; sign=0; // dummy parameters
1553  TFFTComplexReal ifft(1,&M2,false);
1554  ifft.Init("ES",sign, &kind); // EX - optimized, ES - least optimized
1555 
1556  // Warning : if wavefft is used than reX/imX must be multiplied by sqrt(2)
1557  // if fftw is used than reX/imX must be divided by sqrt(2)
1558  for(n=0; n<N; ++n){
1559  for(j = 0; j<M2; ++j) reX[j] = imX[j] = 0;
1560  for(j=1; j<M; ++j)
1561  if( (n+j) & 1 ) imX[j] = TFmap[j]/sqrt2;
1562  else if(j&1) reX[j] = - TFmap[j]/sqrt2;
1563  else reX[j] = TFmap[j]/sqrt2;
1564 
1565  /* works only for EVEN M now
1566  if(n & 1);
1567  else{
1568  reX[0] = TFmap[0];
1569  reX[M] = TFmap[M];
1570  }
1571  */
1572 
1573  if( (n & 1) == 0 )reX[0] = TFmap[0];
1574  if( ((n+M) & 1) == 0 )
1575  if(M&1) reX[M] = -TFmap[M];
1576  else reX[M] = TFmap[M];
1577 
1578  ifft.SetPointsComplex(reX,imX);
1579  ifft.Transform();
1580  ifft.GetPoints(reX);
1581 
1582 // InvFFT(reX, imX, M2);
1583  DataType_t* x = tmp + n*M;
1584  int m = M*(n&1);
1585  int mm = m;
1586 
1587  x[0] += wdm[0]*reX[m];
1588  for(j = 1; j<nWDM; ++j){
1589  ++m;
1590  if(m==2*M)m=0;
1591  x[j] += wdm[j]*reX[m];
1592  if(mm==0)mm = 2*M - 1;
1593  else --mm;
1594  x[-j] += wdm[j]*reX[mm];
1595  }
1596  TFmap += M+1;
1597  }
1598 
1599  DataType_t* pTS = (DataType_t *)realloc(this->pWWS,this->nSTS*sizeof(DataType_t));
1600  this->release();
1601  this->allocate(size_t(this->nSTS), pTS);
1602  for(n=0; n<int(this->nSTS); n++) pTS[n] = tmp[n];
1603  this->m_Level = 0; // used in getSlice() function
1604 
1605  delete [] reX;
1606  delete [] imX;
1607 }
1608 
1609 
1610 // extra -i in C_k...
1611 
1612 template<class DataType_t>
1614 {
1615 // inverse transform using the quadrature coefficients
1616 
1617  int n, j;
1618  int M = this->m_Layer;
1619  int M1 = M+1;
1620  int M2 = M*2;
1621  int N = this->nWWS/(M2+2);
1622  int nWDM = this->m_H;
1623  int nTS = M*N + 2*nWDM;
1624 
1625  double sqrt2 = sqrt(2.);
1626  double* reX = new double[M2];
1627  double* imX = new double[M2];
1628  double* wdm = wdmFilter.data;
1629  //Data ype_t* TFmap = this->pWWS;
1630 
1631  DataType_t* map90 = this->pWWS + N*M1;
1632 
1633  if(M*N/this->nSTS!=1) {
1634  printf("WDM<DataType_t>::w2tQ: Inverse is not defined for the up-sampled map\n");
1635  exit(0);
1636  }
1637 
1639  DataType_t* tmp = ts.data + nWDM;
1640 
1641  int odd = 0;
1642  int sign,kind; sign=0; // dummy parameters
1643  TFFTComplexReal ifft(1,&M2,false);
1644  ifft.Init("ES",sign, &kind); // EX - optimized, ES - least optimized
1645 
1646  // Warning : if wavefft is used than reX/imX must be multiplied by sqrt(2)
1647  // if fftw is used than reX/imX must be divided by sqrt(2)
1648  for(n=0; n<N; ++n){
1649  for(j = 0; j<M2; ++j) reX[j] = imX[j] = 0;
1650  for(j=1; j<M; ++j)
1651  if( (n+j) & 1 ) reX[j] = map90[j]/sqrt2;
1652  else if(j&1) imX[j] = map90[j]/sqrt2;
1653  else imX[j] = -map90[j]/sqrt2;
1654 
1655  /* works only for EVEN M now
1656  if(n & 1){
1657  reX[0] = map90[0];
1658  reX[M] = map90[M];
1659  }
1660  */
1661 
1662  if((n+M) & 1)reX[M] = map90[M];
1663  if(n & 1)reX[0] = map90[0];
1664 
1665  ifft.SetPointsComplex(reX,imX);
1666  ifft.Transform();
1667  ifft.GetPoints(reX);
1668 
1669 // InvFFT(reX, imX, M2);
1670  DataType_t* x = tmp + n*M;
1671  int m = M*(n&1);
1672  int mm = m;
1673 
1674  x[0] += wdm[0]*reX[m];
1675  for(j = 1; j<nWDM; ++j){
1676  ++m;
1677  if(m==2*M)m=0;
1678  x[j] += wdm[j]*reX[m];
1679  if(mm==0)mm = 2*M - 1;
1680  else --mm;
1681  x[-j] += wdm[j]*reX[mm];
1682  }
1683 
1684  map90 += M+1;
1685  }
1686 
1687  DataType_t* pTS = (DataType_t *)realloc(this->pWWS,this->nSTS*sizeof(DataType_t));
1688  this->release();
1689  this->allocate(size_t(this->nSTS), pTS);
1690  for(n=0; n<int(this->nSTS); n++) pTS[n] = tmp[n];
1691  this->m_Level = 0; // used in getSlice() function
1692 
1693  delete [] reX;
1694  delete [] imX;
1695 }
1696 
1697 
1698 
1699 #define CLASS_INSTANTIATION(class_) template class WDM< class_ >;
1700 
1701 CLASS_INSTANTIATION(float)
1702 CLASS_INSTANTIATION(double)
1703 
1704 #undef CLASS_INSTANTIATION
void sse_dp5()
DataType_t ** TFMap00
Definition: WDM.hh:156
int getBaseWave(int m, int n, SymmArray< double > &w)
Definition: WDM.cc:305
double aux
Definition: testWDM_5.C:14
void w2t(int flag)
Definition: WDM.cc:1521
wavearray< float > cosTD
Definition: WDM.hh:154
#define MAXBETA
Definition: WDM.hh:19
printf("total live time: non-zero lags = %10.1f \n", liveTot)
wavearray< float > getTDvecSSE(int j, int K, char c, SSeries< double > *pss)
Definition: WDM.cc:1183
wavearray< double > a(hp.size())
#define B
int n
Definition: cwb_net.C:10
void initFourier()
Definition: WDM.cc:62
cout<< endl;cout<< "ts size = "<< ts.size()<< " ts rate = "<< ts.rate()<< endl;tf.Forward(ts, wdm);int levels=tf.getLevel();cout<< "tf size = "<< tf.size()<< endl;double dF=tf.resolution();double dT=1./(2 *dF);cout<< "rate(hz) : "<< RATE<< "\t layers : "<< nLAYERS<< "\t dF(hz) : "<< dF<< "\t dT(ms) : "<< dT *1000.<< endl;int itime=TIME_PIXEL_INDEX;int ifreq=FREQ_PIXEL_INDEX;int index=(levels+1)*itime+ifreq;double time=itime *dT;double freq=(ifreq >0)?ifreq *dF:dF/4;cout<< endl;cout<< "PIXEL TIME = "<< time<< " sec "<< endl;cout<< "PIXEL FREQ = "<< freq<< " Hz "<< endl;cout<< endl;wavearray< double > x
void sse_dp9()
WDM< double > wdm(nLAYERS, nLAYERS, 6, 10)
Record * SSEPointer()
Definition: SymmArraySSE.hh:23
float * watasm_data
STL namespace.
#define M
Definition: UniqSLagsList.C:3
int m
Definition: cwb_net.C:10
virtual WDM * Clone() const
return: Wavelet* - duplicate of *this, allocated on heap
Definition: WDM.cc:274
float watasm_xmm0[4]
int j
Definition: cwb_net.C:10
int getMaxLevel()
Definition: WDM.cc:1334
i drho i
int BetaOrder
Definition: WDM.hh:146
void setTDFilter(int nCoeffs, int L=1)
Definition: WDM.cc:621
SymmObjArray< SymmArraySSE< float > > Tx
Definition: WDM.hh:153
void sse_dp10()
void w2tQ(int)
Definition: WDM.cc:1613
#define N
void Resize(int sz)
Definition: SymmArray.cc:38
double getPixelAmplitude(int, int, int, bool=false)
Definition: WDM.cc:730
void inverse(int, int)
Definition: WDM.cc:1358
void sse_dp6()
int getOffset(int, int)
Definition: WDM.cc:1342
static int objCounter
Definition: WDM.hh:121
WDM()
Definition: WDM.cc:68
void getTFvec(int j, wavearray< float > &r)
Definition: WDM.cc:1259
wavearray< float > sinTDx
Definition: WDM.hh:154
size_t mode
wavearray< double > w
Definition: Test1.C:27
#define CLASS_INSTANTIATION(class_)
Definition: WDM.cc:1699
float M1
wavearray< float > getTDvec(int j, int K, char c='p')
Definition: WDM.cc:1141
double getPixelAmplitudeSSEOld(int, int, int, bool=false)
Definition: WDM.cc:815
void wavefft(double a[], double b[], int ntot, int n, int nspan, int isn)
Definition: wavefft.cc:23
int getBaseWaveQ(int m, int n, SymmArray< double > &w)
Definition: WDM.cc:356
int m_L
Definition: Wavelet.hh:106
bool GetSTFdata(int index, SymmArraySSE< float > *pS)
Definition: sseries.cc:344
unsigned long nSTS
Definition: WaveDWT.hh:125
wavearray< float > sinTD
Definition: WDM.hh:154
i() int(T_cor *100))
void sse_dp8()
void SetTFMap()
Definition: WDM.cc:429
double * tmp
Definition: testWDM_5.C:31
void FFT(double *a, double *b, int n)
Definition: WDM.cc:50
wavearray< double > getFilter(int n)
Definition: WDM.cc:480
wavearray< double > getTDFilter2(int n, int L)
Definition: WDM.cc:579
void sse_dp11()
Definition: Wavelet.hh:31
int LWDM
Definition: WDM.hh:149
double precision
int KWDM
Definition: WDM.hh:148
void forward(int, int)
Definition: WDM.cc:1350
int k
static double A
Definition: geodesics.cc:8
DataType_t ** TFMap90
pointer to 0-phase data, by default not initialized
Definition: WDM.hh:157
int m_Level
Definition: Wavelet.hh:97
void InvFFT(double *a, double *b, int n)
Definition: WDM.cc:55
virtual ~WDM()
Definition: WDM.cc:255
int Last()
Definition: SymmArray.hh:23
void t2w(int)
Definition: WDM.cc:1367
bool m_Heterodine
Definition: Wavelet.hh:108
float getPixelAmplitudeSSE(int m, int n, int dT, bool Quad)
Definition: WDM.cc:911
double dt
regression r
Definition: Regression_H1.C:44
s s
Definition: cwb_net.C:137
char filter[1024]
double TimeShiftTest(int dt)
Definition: WDM.cc:1303
float getTDamp(int n, int m, char c='p')
Definition: WDM.cc:1085
wavearray< double > ts(N)
wavearray< double > wdmFilter
Definition: WDM.hh:150
CosSize[2]
wavearray< int > index
WDM< double > * pWDM
Definition: testWDM_5.C:28
double fabs(const Complex &x)
Definition: numpy.cc:37
virtual WDM * Init() const
Definition: WDM.cc:283
float * td_buffer
Definition: WDM.hh:159
std::slice getSlice(double n)
Definition: WDM.cc:445
wavearray< double > getTDFilter1(int n, int L)
Definition: WDM.cc:533
double TimeShiftTestSSE(int dt)
Definition: WDM.cc:1318
DataType_t * data
Definition: wavearray.hh:301
void sse_dp7()
float M2
enum WAVETYPE m_WaveType
Definition: Wavelet.hh:88
TH1 * t1
double dT
Definition: testWDM_5.C:12
static const double Pi
Definition: WDM.cc:39
int m_Layer
Definition: Wavelet.hh:100
void sse_dp4()
double shift[NIFO_MAX]
virtual void resize(unsigned int)
Definition: wavearray.cc:445
int precision
Definition: WDM.hh:147
void initSSEPointers()
Definition: WDM.cc:683
float * watasm_filter
int m_H
Definition: Wavelet.hh:103
SymmObjArray< SymmArraySSE< float > > T0
Definition: WDM.hh:152
exit(0)
void cpf(const wavearray< DataType_t > &, int=0, int=0, int=0)
Definition: wavearray.cc:699