Point Cloud Library (PCL)  1.14.1-dev
sparse_matrix.hpp
1 /*
2 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 Redistributions of source code must retain the above copyright notice, this list of
9 conditions and the following disclaimer. Redistributions in binary form must reproduce
10 the above copyright notice, this list of conditions and the following disclaimer
11 in the documentation and/or other materials provided with the distribution.
12 
13 Neither the name of the Johns Hopkins University nor the names of its contributors
14 may be used to endorse or promote products derived from this software without specific
15 prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 DAMAGE.
27 */
28 
29 #ifdef _WIN32
30 # ifndef WIN32_LEAN_AND_MEAN
31 # define WIN32_LEAN_AND_MEAN
32 # endif // WIN32_LEAN_AND_MEAN
33 # ifndef NOMINMAX
34 # define NOMINMAX
35 # endif // NOMINMAX
36 # include <windows.h>
37 #endif //_WIN32
38 
39 ///////////////////
40 // SparseMatrix //
41 ///////////////////
42 ///////////////////////////////////////////
43 // Static Allocator Methods and Memebers //
44 ///////////////////////////////////////////
45 
46 namespace pcl
47 {
48  namespace poisson
49  {
50 
51 
52  template<class T> int SparseMatrix<T>::UseAlloc=0;
53  template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
54  template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
55  template<class T>
56  void SparseMatrix<T>::SetAllocator( int blockSize )
57  {
58  if(blockSize>0){
59  UseAlloc=1;
60  internalAllocator.set(blockSize);
61  }
62  else{UseAlloc=0;}
63  }
64  ///////////////////////////////////////
65  // SparseMatrix Methods and Memebers //
66  ///////////////////////////////////////
67 
68  template< class T >
70  {
71  _contiguous = false;
72  _maxEntriesPerRow = 0;
73  rows = 0;
74  rowSizes = NULL;
75  m_ppElements = NULL;
76  }
77 
78  template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
79  template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
80 
81  template< class T >
83  {
84  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
85  else Resize( M.rows );
86  for( int i=0 ; i<rows ; i++ )
87  {
88  SetRowSize( i , M.rowSizes[i] );
89  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
90  }
91  }
92  template<class T>
93  int SparseMatrix<T>::Entries( void ) const
94  {
95  int e = 0;
96  for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
97  return e;
98  }
99  template<class T>
101  {
102  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
103  else Resize( M.rows );
104  for( int i=0 ; i<rows ; i++ )
105  {
106  SetRowSize( i , M.rowSizes[i] );
107  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
108  }
109  return *this;
110  }
111 
112  template<class T>
113  SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
114 
115  template< class T >
116  bool SparseMatrix< T >::write( const char* fileName ) const
117  {
118  FILE* fp = fopen( fileName , "wb" );
119  if( !fp ) return false;
120  bool ret = write( fp );
121  fclose( fp );
122  return ret;
123  }
124  template< class T >
125  bool SparseMatrix< T >::read( const char* fileName )
126  {
127  FILE* fp = fopen( fileName , "rb" );
128  if( !fp ) return false;
129  bool ret = read( fp );
130  fclose( fp );
131  return ret;
132  }
133  template< class T >
134  bool SparseMatrix< T >::write( FILE* fp ) const
135  {
136  if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
137  if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
138  for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
139  return true;
140  }
141  template< class T >
142  bool SparseMatrix< T >::read( FILE* fp )
143  {
144  int r;
145  if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
146  Resize( r );
147  if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
148  for( int i=0 ; i<rows ; i++ )
149  {
150  r = rowSizes[i];
151  rowSizes[i] = 0;
152  SetRowSize( i , r );
153  if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
154  }
155  return true;
156  }
157 
158 
159  template< class T >
161  {
162  if( rows>0 )
163  {
164 
165  if( !UseAlloc )
166  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
168  free( m_ppElements );
169  free( rowSizes );
170  }
171  rows = r;
172  if( r )
173  {
174  rowSizes = ( int* )malloc( sizeof( int ) * r );
175  memset( rowSizes , 0 , sizeof( int ) * r );
176  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
177  }
178  _contiguous = false;
179  _maxEntriesPerRow = 0;
180  }
181  template< class T >
182  void SparseMatrix< T >::Resize( int r , int e )
183  {
184  if( rows>0 )
185  {
186  if( !UseAlloc )
187  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
189  free( m_ppElements );
190  free( rowSizes );
191  }
192  rows = r;
193  if( r )
194  {
195  rowSizes = ( int* )malloc( sizeof( int ) * r );
196  memset( rowSizes , 0 , sizeof( int ) * r );
197  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
198  m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
199  for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
200  }
201  _contiguous = true;
202  _maxEntriesPerRow = e;
203  }
204 
205  template<class T>
206  void SparseMatrix< T >::SetRowSize( int row , int count )
207  {
208  if( _contiguous )
209  {
210  if (count > _maxEntriesPerRow)
211  {
212  POISSON_THROW_EXCEPTION (pcl::poisson::PoissonBadArgumentException, "Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count << " > (maximum)" << _maxEntriesPerRow );
213  }
214  rowSizes[row] = count;
215  }
216  else if( row>=0 && row<rows )
217  {
218  if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
219  else
220  {
221  if( rowSizes[row] ) free( m_ppElements[row] );
222  if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
223  }
224  }
225  }
226 
227 
228  template<class T>
230  {
231  // copied from operator *=
232  for (int i=0; i<rows; i++)
233  {
234  for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value=T(0);}
235  }
236  }
237 
238  template<class T>
240  {
241  SetZero();
242  for(int ij=0; ij < std::min<int>( rows, _maxEntriesPerRow ); ij++)
243  (*this)(ij,ij) = T(1);
244  }
245 
246  template<class T>
248  {
249  SparseMatrix<T> M(*this);
250  M *= V;
251  return M;
252  }
253 
254  template<class T>
256  {
257  for (int i=0; i<rows; i++)
258  {
259  for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value*=V;}
260  }
261  return *this;
262  }
263 
264  template<class T>
266  {
267  SparseMatrix<T> R( rows, M._maxEntriesPerRow );
268  for(int i=0; i<R.rows; i++){
269  for(int ii=0;ii<rowSizes[i];ii++){
270  int N=m_ppElements[i][ii].N;
271  T Value=m_ppElements[i][ii].Value;
272  for(int jj=0;jj<M.rowSizes[N];jj++){
273  R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
274  }
275  }
276  }
277  return R;
278  }
279 
280  template<class T>
281  template<class T2>
283  {
284  Vector<T2> R( rows );
285 
286  for (int i=0; i<rows; i++)
287  {
288  T2 temp=T2();
289  for(int ii=0;ii<rowSizes[i];ii++){
290  temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
291  }
292  R(i)=temp;
293  }
294  return R;
295  }
296 
297  template<class T>
298  template<class T2>
299  void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
300  {
301 #pragma omp parallel for num_threads( threads ) schedule( static )
302  for( int i=0 ; i<rows ; i++ )
303  {
304  T2 temp = T2();
305  temp *= 0;
306  for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
307  Out.m_pV[i] = temp;
308  }
309  }
310 
311  template<class T>
313  {
314  return Multiply(M);
315  }
316  template<class T>
317  template<class T2>
319  {
320  return Multiply(V);
321  }
322 
323  template<class T>
325  {
326  SparseMatrix<T> M( _maxEntriesPerRow, rows );
327 
328  for (int i=0; i<rows; i++)
329  {
330  for(int ii=0;ii<rowSizes[i];ii++){
331  M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
332  }
333  }
334  return M;
335  }
336 
337  template<class T>
338  template<class T2>
339  int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
340  {
341  if( reset )
342  {
343  solution.Resize( b.Dimensions() );
344  solution.SetZero();
345  }
346  Vector< T2 > r;
347  r.Resize( solution.Dimensions() );
348  M.Multiply( solution , r );
349  r = b - r;
350  Vector< T2 > d = r;
351  double delta_new , delta_0;
352  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
353  delta_0 = delta_new;
354  if( delta_new<eps ) return 0;
355  int ii;
356  Vector< T2 > q;
357  q.Resize( d.Dimensions() );
358  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
359  {
360  M.Multiply( d , q , threads );
361  double dDotQ = 0 , alpha = 0;
362  for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
363  alpha = delta_new / dDotQ;
364 #pragma omp parallel for num_threads( threads ) schedule( static )
365  for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
366  if( !(ii%50) )
367  {
368  r.Resize( solution.Dimensions() );
369  M.Multiply( solution , r , threads );
370  r = b - r;
371  }
372  else
373 #pragma omp parallel for num_threads( threads ) schedule( static )
374  for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
375 
376  double delta_old = delta_new , beta;
377  delta_new = 0;
378  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
379  beta = delta_new / delta_old;
380 #pragma omp parallel for num_threads( threads ) schedule( static )
381  for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
382  }
383  return ii;
384  }
385 
386  // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
387  template<class T>
388  int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
389  SparseMatrix mTranspose=M.Transpose();
390  Vector<T> bb=mTranspose*b;
391  Vector<T> d,r,Md;
392  T alpha,beta,rDotR;
393  int i;
394 
395  solution.Resize(bb.Dimensions());
396  solution.SetZero();
397 
398  d=r=bb;
399  rDotR=r.Dot(r);
400  for(i=0;i<iters && rDotR>eps;i++){
401  T temp;
402  Md=mTranspose*(M*d);
403  alpha=rDotR/d.Dot(Md);
404  solution+=d*alpha;
405  r-=Md*alpha;
406  temp=r.Dot(r);
407  beta=temp/rDotR;
408  rDotR=temp;
409  d=r+d*beta;
410  }
411  return i;
412  }
413 
414 
415 
416 
417  ///////////////////////////
418  // SparseSymmetricMatrix //
419  ///////////////////////////
420  template<class T>
421  template<class T2>
423  template<class T>
424  template<class T2>
426  {
428 
429  for(int i=0; i<SparseMatrix<T>::rows; i++){
430  for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
431  int j=SparseMatrix<T>::m_ppElements[i][ii].N;
432  R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
433  R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
434  }
435  }
436  return R;
437  }
438 
439  template<class T>
440  template<class T2>
441  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
442  {
443  Out.SetZero();
444  const T2* in = &In[0];
445  T2* out = &Out[0];
446  T2 dcTerm = T2( 0 );
447  if( addDCTerm )
448  {
449  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
450  dcTerm /= SparseMatrix<T>::rows;
451  }
452  for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
453  {
455  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
456  const T2& in_i_ = in[i];
457  T2 out_i = T2(0);
458  for( ; temp!=end ; temp++ )
459  {
460  int j=temp->N;
461  T2 v=temp->Value;
462  out_i += v * in[j];
463  out[j] += v * in_i_;
464  }
465  out[i] += out_i;
466  }
467  if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
468  }
469  template<class T>
470  template<class T2>
471  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
472  {
473  int dim = int( In.Dimensions() );
474  const T2* in = &In[0];
475  int threads = OutScratch.threads();
476  if( addDCTerm )
477  {
478  T2 dcTerm = 0;
479 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
480  for( int t=0 ; t<threads ; t++ )
481  {
482  T2* out = OutScratch[t];
483  memset( out , 0 , sizeof( T2 ) * dim );
484  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
485  {
486  const T2& in_i_ = in[i];
487  T2& out_i_ = out[i];
488  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
489  {
490  int j = temp->N;
491  T2 v = temp->Value;
492  out_i_ += v * in[j];
493  out[j] += v * in_i_;
494  }
495  dcTerm += in_i_;
496  }
497  }
498  dcTerm /= dim;
499  dim = int( Out.Dimensions() );
500  T2* out = &Out[0];
501 #pragma omp parallel for num_threads( threads ) schedule( static )
502  for( int i=0 ; i<dim ; i++ )
503  {
504  T2 _out = dcTerm;
505  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
506  out[i] = _out;
507  }
508  }
509  else
510  {
511 #pragma omp parallel for num_threads( threads )
512  for( int t=0 ; t<threads ; t++ )
513  {
514  T2* out = OutScratch[t];
515  memset( out , 0 , sizeof( T2 ) * dim );
516  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
517  {
518  const T2& in_i_ = in[i];
519  T2& out_i_ = out[i];
520  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
521  {
522  int j = temp->N;
523  T2 v = temp->Value;
524  out_i_ += v * in[j];
525  out[j] += v * in_i_;
526  }
527  }
528  }
529  dim = int( Out.Dimensions() );
530  T2* out = &Out[0];
531 #pragma omp parallel for num_threads( threads ) schedule( static )
532  for( int i=0 ; i<dim ; i++ )
533  {
534  T2 _out = T2(0);
535  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
536  out[i] = _out;
537  }
538  }
539  }
540  template<class T>
541  template<class T2>
542  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
543  {
544  int dim = In.Dimensions();
545  const T2* in = &In[0];
546  int threads = OutScratch.size();
547 #pragma omp parallel for num_threads( threads )
548  for( int t=0 ; t<threads ; t++ )
549  for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
550 #pragma omp parallel for num_threads( threads )
551  for( int t=0 ; t<threads ; t++ )
552  {
553  T2* out = OutScratch[t];
554  for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
555  {
557  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
558  const T2& in_i_ = in[i];
559  T2& out_i_ = out[i];
560  for( ; temp!=end ; temp++ )
561  {
562  int j = temp->N;
563  T2 v = temp->Value;
564  out_i_ += v * in[j];
565  out[j] += v * in_i_;
566  }
567  }
568  }
569  T2* out = &Out[0];
570 #pragma omp parallel for num_threads( threads ) schedule( static )
571  for( int i=0 ; i<Out.Dimensions() ; i++ )
572  {
573  T2& _out = out[i];
574  _out = T2(0);
575  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
576  }
577  }
578 #if defined _WIN32 && !defined __MINGW32__
579 #ifndef _AtomicIncrement_
580 #define _AtomicIncrement_
581  inline void AtomicIncrement( volatile float* ptr , float addend )
582  {
583  float newValue = *ptr;
584  LONG& _newValue = *( (LONG*)&newValue );
585  LONG _oldValue;
586  for( ;; )
587  {
588  _oldValue = _newValue;
589  newValue += addend;
590  _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
591  if( _newValue==_oldValue ) break;
592  }
593  }
594  inline void AtomicIncrement( volatile double* ptr , double addend )
595  //inline void AtomicIncrement( double* ptr , double addend )
596  {
597  double newValue = *ptr;
598  LONGLONG& _newValue = *( (LONGLONG*)&newValue );
599  LONGLONG _oldValue;
600  do
601  {
602  _oldValue = _newValue;
603  newValue += addend;
604  _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
605  }
606  while( _newValue!=_oldValue );
607  }
608 #endif // _AtomicIncrement_
609  template< class T >
610  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
611  {
612  Out.SetZero();
613  const float* in = &In[0];
614  float* out = &Out[0];
615  if( partition )
616 #pragma omp parallel for num_threads( threads )
617  for( int t=0 ; t<threads ; t++ )
618  for( int i=partition[t] ; i<partition[t+1] ; i++ )
619  {
620  const MatrixEntry< T >* temp = A[i];
621  const MatrixEntry< T >* end = temp + A.rowSizes[i];
622  const float& in_i = in[i];
623  float out_i = 0.;
624  for( ; temp!=end ; temp++ )
625  {
626  int j = temp->N;
627  float v = temp->Value;
628  out_i += v * in[j];
629  AtomicIncrement( out+j , v * in_i );
630  }
631  AtomicIncrement( out+i , out_i );
632  }
633  else
634 #pragma omp parallel for num_threads( threads )
635  for( int i=0 ; i<A.rows ; i++ )
636  {
637  const MatrixEntry< T >* temp = A[i];
638  const MatrixEntry< T >* end = temp + A.rowSizes[i];
639  const float& in_i = in[i];
640  float out_i = 0.f;
641  for( ; temp!=end ; temp++ )
642  {
643  int j = temp->N;
644  float v = temp->Value;
645  out_i += v * in[j];
646  AtomicIncrement( out+j , v * in_i );
647  }
648  AtomicIncrement( out+i , out_i );
649  }
650  }
651  template< class T >
652  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
653  {
654  Out.SetZero();
655  const double* in = &In[0];
656  double* out = &Out[0];
657 
658  if( partition )
659 #pragma omp parallel for num_threads( threads )
660  for( int t=0 ; t<threads ; t++ )
661  for( int i=partition[t] ; i<partition[t+1] ; i++ )
662  {
663  const MatrixEntry< T >* temp = A[i];
664  const MatrixEntry< T >* end = temp + A.rowSizes[i];
665  const double& in_i = in[i];
666  double out_i = 0.;
667  for( ; temp!=end ; temp++ )
668  {
669  int j = temp->N;
670  T v = temp->Value;
671  out_i += v * in[j];
672  AtomicIncrement( out+j , v * in_i );
673  }
674  AtomicIncrement( out+i , out_i );
675  }
676  else
677 #pragma omp parallel for num_threads( threads )
678  for( int i=0 ; i<A.rows ; i++ )
679  {
680  const MatrixEntry< T >* temp = A[i];
681  const MatrixEntry< T >* end = temp + A.rowSizes[i];
682  const double& in_i = in[i];
683  double out_i = 0.;
684  for( ; temp!=end ; temp++ )
685  {
686  int j = temp->N;
687  T v = temp->Value;
688  out_i += v * in[j];
689  AtomicIncrement( out+j , v * in_i );
690  }
691  AtomicIncrement( out+i , out_i );
692  }
693  }
694 
695  template< class T >
696  template< class T2 >
697  int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
698  {
699  eps *= eps;
700  int dim = b.Dimensions();
701  if( reset )
702  {
703  x.Resize( dim );
704  x.SetZero();
705  }
706  Vector< T2 > r( dim ) , d( dim ) , q( dim );
707  Vector< T2 > temp;
708  if( solveNormal ) temp.Resize( dim );
709  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
710  const T2* _b = &b[0];
711 
712  std::vector< int > partition( threads+1 );
713  {
714  int eCount = 0;
715  for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
716  partition[0] = 0;
717 #pragma omp parallel for num_threads( threads )
718  for( int t=0 ; t<threads ; t++ )
719  {
720  int _eCount = 0;
721  for( int i=0 ; i<A.rows ; i++ )
722  {
723  _eCount += A.rowSizes[i];
724  if( _eCount*threads>=eCount*(t+1) )
725  {
726  partition[t+1] = i;
727  break;
728  }
729  }
730  }
731  partition[threads] = A.rows;
732  }
733  if( solveNormal )
734  {
735  MultiplyAtomic( A , x , temp , threads , &partition[0] );
736  MultiplyAtomic( A , temp , r , threads , &partition[0] );
737  MultiplyAtomic( A , b , temp , threads , &partition[0] );
738 #pragma omp parallel for num_threads( threads ) schedule( static )
739  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
740  }
741  else
742  {
743  MultiplyAtomic( A , x , r , threads , &partition[0] );
744 #pragma omp parallel for num_threads( threads ) schedule( static )
745  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
746  }
747  double delta_new = 0 , delta_0;
748  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
749  delta_0 = delta_new;
750  if( delta_new<eps )
751  {
752  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
753  return 0;
754  }
755  int ii;
756  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
757  {
758  if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
759  else MultiplyAtomic( A , d , q , threads , &partition[0] );
760  double dDotQ = 0;
761  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
762  T2 alpha = T2( delta_new / dDotQ );
763 #pragma omp parallel for num_threads( threads ) schedule( static )
764  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
765  if( (ii%50)==(50-1) )
766  {
767  r.Resize( dim );
768  if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
769  else MultiplyAtomic( A , x , r , threads , &partition[0] );
770 #pragma omp parallel for num_threads( threads ) schedule( static )
771  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
772  }
773  else
774 #pragma omp parallel for num_threads( threads ) schedule( static )
775  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
776 
777  double delta_old = delta_new;
778  delta_new = 0;
779  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
780  T2 beta = T2( delta_new / delta_old );
781 #pragma omp parallel for num_threads( threads ) schedule( static )
782  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
783  }
784  return ii;
785  }
786 #endif // _WIN32 && !__MINGW32__
787  template< class T >
788  template< class T2 >
789  int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
790  {
791  int threads = scratch.threads();
792  eps *= eps;
793  int dim = int( b.Dimensions() );
794  Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
795  if( reset ) x.Resize( dim );
796  if( solveNormal ) temp.Resize( dim );
797  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
798  const T2* _b = &b[0];
799 
800  double delta_new = 0 , delta_0;
801  if( solveNormal )
802  {
803  A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
804 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
805  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
806  }
807  else
808  {
809  A.Multiply( x , r , scratch , addDCTerm );
810 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
811  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
812  }
813  delta_0 = delta_new;
814  if( delta_new<eps )
815  {
816  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
817  return 0;
818  }
819  int ii;
820  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
821  {
822  if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
823  else A.Multiply( d , q , scratch , addDCTerm );
824  double dDotQ = 0;
825 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
826  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
827  T2 alpha = T2( delta_new / dDotQ );
828  double delta_old = delta_new;
829  delta_new = 0;
830  if( (ii%50)==(50-1) )
831  {
832 #pragma omp parallel for num_threads( threads )
833  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
834  r.Resize( dim );
835  if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
836  else A.Multiply( x , r , scratch , addDCTerm );
837 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
839  }
840  else
841 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
842  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
843 
844  T2 beta = T2( delta_new / delta_old );
845 #pragma omp parallel for num_threads( threads )
846  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
847  }
848  return ii;
849  }
850  template< class T >
851  template< class T2 >
852  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
853  {
854  eps *= eps;
855  int dim = int( b.Dimensions() );
856  MapReduceVector< T2 > outScratch;
857  if( threads<1 ) threads = 1;
858  if( threads>1 ) outScratch.resize( threads , dim );
859  if( reset ) x.Resize( dim );
860  Vector< T2 > r( dim ) , d( dim ) , q( dim );
861  Vector< T2 > temp;
862  if( solveNormal ) temp.Resize( dim );
863  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
864  const T2* _b = &b[0];
865 
866  double delta_new = 0 , delta_0;
867 
868  if( solveNormal )
869  {
870  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
871  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
872 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
873  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
874  }
875  else
876  {
877  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
878  else A.Multiply( x , r , addDCTerm );
879 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
880  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
881  }
882 
883  delta_0 = delta_new;
884  if( delta_new<eps )
885  {
886  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
887  return 0;
888  }
889  int ii;
890  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
891  {
892  if( solveNormal )
893  {
894  if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
895  else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
896  }
897  else
898  {
899  if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
900  else A.Multiply( d , q , addDCTerm );
901  }
902  double dDotQ = 0;
903 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
904  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
905  T2 alpha = T2( delta_new / dDotQ );
906  double delta_old = delta_new;
907  delta_new = 0;
908 
909  if( (ii%50)==(50-1) )
910  {
911 #pragma omp parallel for num_threads( threads )
912  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
913  r.SetZero();
914  if( solveNormal )
915  {
916  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
917  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
918  }
919  else
920  {
921  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
922  else A.Multiply( x , r , addDCTerm );
923  }
924 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
925  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
926  }
927  else
928  {
929 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
930  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
931  }
932 
933  T2 beta = T2( delta_new / delta_old );
934 #pragma omp parallel for num_threads( threads )
935  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
936  }
937  return ii;
938  }
939 
940  template<class T>
941  template<class T2>
942  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
943  {
944  Vector<T2> d,r,Md;
945 
946  if(reset)
947  {
948  solution.Resize(b.Dimensions());
949  solution.SetZero();
950  }
951  Md.Resize(M.rows);
952  for( int i=0 ; i<iters ; i++ )
953  {
954  M.Multiply( solution , Md );
955  r = b-Md;
956  // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
957  // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
958  // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
959  for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
960  }
961  return iters;
962  }
963  template< class T >
964  template< class T2 >
966  {
967  diagonal.Resize( SparseMatrix<T>::rows );
968  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
969  {
970  diagonal[i] = 0.;
971  for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
972  }
973  }
974 
975  }
976 }
An exception that is thrown when the arguments number or type is wrong/unhandled.
MatrixEntry< T > ** m_ppElements
Definition: sparse_matrix.h:66
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
Definition: sparse_matrix.h:60
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
std::size_t Dimensions() const
Definition: vector.hpp:91
void Resize(std::size_t N)
Definition: vector.hpp:63
T Dot(const Vector &V) const
Definition: vector.hpp:239
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
Definition: region_xy.h:46
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
Definition: region_xy.h:63
void resize(int threads, int dim)
Definition: sparse_matrix.h:46
T Value
Definition: sparse_matrix.h:50
int N
Definition: sparse_matrix.h:49