Point Cloud Library (PCL)  1.14.0-dev
sparse_matrix.hpp
1 /*
2 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 Redistributions of source code must retain the above copyright notice, this list of
9 conditions and the following disclaimer. Redistributions in binary form must reproduce
10 the above copyright notice, this list of conditions and the following disclaimer
11 in the documentation and/or other materials provided with the distribution.
12 
13 Neither the name of the Johns Hopkins University nor the names of its contributors
14 may be used to endorse or promote products derived from this software without specific
15 prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 DAMAGE.
27 */
28 
29 #ifdef _WIN32
30 # ifndef WIN32_LEAN_AND_MEAN
31 # define WIN32_LEAN_AND_MEAN
32 # endif // WIN32_LEAN_AND_MEAN
33 # ifndef NOMINMAX
34 # define NOMINMAX
35 # endif // NOMINMAX
36 # include <windows.h>
37 #endif //_WIN32
38 
39 ///////////////////
40 // SparseMatrix //
41 ///////////////////
42 ///////////////////////////////////////////
43 // Static Allocator Methods and Memebers //
44 ///////////////////////////////////////////
45 
46 namespace pcl
47 {
48  namespace poisson
49  {
50 
51 
52  template<class T> int SparseMatrix<T>::UseAlloc=0;
53  template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
54  template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
55  template<class T>
56  void SparseMatrix<T>::SetAllocator( int blockSize )
57  {
58  if(blockSize>0){
59  UseAlloc=1;
60  internalAllocator.set(blockSize);
61  }
62  else{UseAlloc=0;}
63  }
64  ///////////////////////////////////////
65  // SparseMatrix Methods and Memebers //
66  ///////////////////////////////////////
67 
68  template< class T >
70  {
71  _contiguous = false;
72  _maxEntriesPerRow = 0;
73  rows = 0;
74  rowSizes = NULL;
75  m_ppElements = NULL;
76  }
77 
78  template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
79  template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
80 
81  template< class T >
83  {
84  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
85  else Resize( M.rows );
86  for( int i=0 ; i<rows ; i++ )
87  {
88  SetRowSize( i , M.rowSizes[i] );
89  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
90  }
91  }
92  template<class T>
93  int SparseMatrix<T>::Entries( void ) const
94  {
95  int e = 0;
96  for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
97  return e;
98  }
99  template<class T>
101  {
102  if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
103  else Resize( M.rows );
104  for( int i=0 ; i<rows ; i++ )
105  {
106  SetRowSize( i , M.rowSizes[i] );
107  memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
108  }
109  return *this;
110  }
111 
112  template<class T>
113  SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
114 
115  template< class T >
116  bool SparseMatrix< T >::write( const char* fileName ) const
117  {
118  FILE* fp = fopen( fileName , "wb" );
119  if( !fp ) return false;
120  bool ret = write( fp );
121  fclose( fp );
122  return ret;
123  }
124  template< class T >
125  bool SparseMatrix< T >::read( const char* fileName )
126  {
127  FILE* fp = fopen( fileName , "rb" );
128  if( !fp ) return false;
129  bool ret = read( fp );
130  fclose( fp );
131  return ret;
132  }
133  template< class T >
134  bool SparseMatrix< T >::write( FILE* fp ) const
135  {
136  if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
137  if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
138  for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
139  return true;
140  }
141  template< class T >
142  bool SparseMatrix< T >::read( FILE* fp )
143  {
144  int r;
145  if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
146  Resize( r );
147  if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
148  for( int i=0 ; i<rows ; i++ )
149  {
150  r = rowSizes[i];
151  rowSizes[i] = 0;
152  SetRowSize( i , r );
153  if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
154  }
155  return true;
156  }
157 
158 
159  template< class T >
161  {
162  if( rows>0 )
163  {
164 
165  if( !UseAlloc )
166  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
168  free( m_ppElements );
169  free( rowSizes );
170  }
171  rows = r;
172  if( r )
173  {
174  rowSizes = ( int* )malloc( sizeof( int ) * r );
175  memset( rowSizes , 0 , sizeof( int ) * r );
176  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
177  }
178  _contiguous = false;
179  _maxEntriesPerRow = 0;
180  }
181  template< class T >
182  void SparseMatrix< T >::Resize( int r , int e )
183  {
184  if( rows>0 )
185  {
186  if( !UseAlloc )
187  if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188  else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
189  free( m_ppElements );
190  free( rowSizes );
191  }
192  rows = r;
193  if( r )
194  {
195  rowSizes = ( int* )malloc( sizeof( int ) * r );
196  memset( rowSizes , 0 , sizeof( int ) * r );
197  m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
198  m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
199  for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
200  }
201  _contiguous = true;
202  _maxEntriesPerRow = e;
203  }
204 
205  template<class T>
206  void SparseMatrix< T >::SetRowSize( int row , int count )
207  {
208  if( _contiguous )
209  {
210  if (count > _maxEntriesPerRow)
211  {
212  POISSON_THROW_EXCEPTION (pcl::poisson::PoissonBadArgumentException, "Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count << " > (maximum)" << _maxEntriesPerRow );
213  }
214  rowSizes[row] = count;
215  }
216  else if( row>=0 && row<rows )
217  {
218  if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
219  else
220  {
221  if( rowSizes[row] ) free( m_ppElements[row] );
222  if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
223  }
224  }
225  }
226 
227 
228  template<class T>
230  {
231  Resize(this->m_N, this->m_M);
232  }
233 
234  template<class T>
236  {
237  SetZero();
238  for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
239  (*this)(ij,ij) = T(1);
240  }
241 
242  template<class T>
244  {
245  SparseMatrix<T> M(*this);
246  M *= V;
247  return M;
248  }
249 
250  template<class T>
252  {
253  for (int i=0; i<rows; i++)
254  {
255  for(int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value*=V;}
256  }
257  return *this;
258  }
259 
260  template<class T>
262  {
263  SparseMatrix<T> R( rows, M._maxEntriesPerRow );
264  for(int i=0; i<R.rows; i++){
265  for(int ii=0;ii<rowSizes[i];ii++){
266  int N=m_ppElements[i][ii].N;
267  T Value=m_ppElements[i][ii].Value;
268  for(int jj=0;jj<M.rowSizes[N];jj++){
269  R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
270  }
271  }
272  }
273  return R;
274  }
275 
276  template<class T>
277  template<class T2>
279  {
280  Vector<T2> R( rows );
281 
282  for (int i=0; i<rows; i++)
283  {
284  T2 temp=T2();
285  for(int ii=0;ii<rowSizes[i];ii++){
286  temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
287  }
288  R(i)=temp;
289  }
290  return R;
291  }
292 
293  template<class T>
294  template<class T2>
295  void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
296  {
297 #pragma omp parallel for num_threads( threads ) schedule( static )
298  for( int i=0 ; i<rows ; i++ )
299  {
300  T2 temp = T2();
301  temp *= 0;
302  for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
303  Out.m_pV[i] = temp;
304  }
305  }
306 
307  template<class T>
309  {
310  return Multiply(M);
311  }
312  template<class T>
313  template<class T2>
315  {
316  return Multiply(V);
317  }
318 
319  template<class T>
321  {
322  SparseMatrix<T> M( _maxEntriesPerRow, rows );
323 
324  for (int i=0; i<rows; i++)
325  {
326  for(int ii=0;ii<rowSizes[i];ii++){
327  M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
328  }
329  }
330  return M;
331  }
332 
333  template<class T>
334  template<class T2>
335  int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
336  {
337  if( reset )
338  {
339  solution.Resize( b.Dimensions() );
340  solution.SetZero();
341  }
342  Vector< T2 > r;
343  r.Resize( solution.Dimensions() );
344  M.Multiply( solution , r );
345  r = b - r;
346  Vector< T2 > d = r;
347  double delta_new , delta_0;
348  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
349  delta_0 = delta_new;
350  if( delta_new<eps ) return 0;
351  int ii;
352  Vector< T2 > q;
353  q.Resize( d.Dimensions() );
354  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
355  {
356  M.Multiply( d , q , threads );
357  double dDotQ = 0 , alpha = 0;
358  for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
359  alpha = delta_new / dDotQ;
360 #pragma omp parallel for num_threads( threads ) schedule( static )
361  for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
362  if( !(ii%50) )
363  {
364  r.Resize( solution.Dimensions() );
365  M.Multiply( solution , r , threads );
366  r = b - r;
367  }
368  else
369 #pragma omp parallel for num_threads( threads ) schedule( static )
370  for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
371 
372  double delta_old = delta_new , beta;
373  delta_new = 0;
374  for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
375  beta = delta_new / delta_old;
376 #pragma omp parallel for num_threads( threads ) schedule( static )
377  for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
378  }
379  return ii;
380  }
381 
382  // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
383  template<class T>
384  int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
385  SparseMatrix mTranspose=M.Transpose();
386  Vector<T> bb=mTranspose*b;
387  Vector<T> d,r,Md;
388  T alpha,beta,rDotR;
389  int i;
390 
391  solution.Resize(M.Columns());
392  solution.SetZero();
393 
394  d=r=bb;
395  rDotR=r.Dot(r);
396  for(i=0;i<iters && rDotR>eps;i++){
397  T temp;
398  Md=mTranspose*(M*d);
399  alpha=rDotR/d.Dot(Md);
400  solution+=d*alpha;
401  r-=Md*alpha;
402  temp=r.Dot(r);
403  beta=temp/rDotR;
404  rDotR=temp;
405  d=r+d*beta;
406  }
407  return i;
408  }
409 
410 
411 
412 
413  ///////////////////////////
414  // SparseSymmetricMatrix //
415  ///////////////////////////
416  template<class T>
417  template<class T2>
419  template<class T>
420  template<class T2>
422  {
424 
425  for(int i=0; i<SparseMatrix<T>::rows; i++){
426  for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
427  int j=SparseMatrix<T>::m_ppElements[i][ii].N;
428  R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
429  R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
430  }
431  }
432  return R;
433  }
434 
435  template<class T>
436  template<class T2>
437  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
438  {
439  Out.SetZero();
440  const T2* in = &In[0];
441  T2* out = &Out[0];
442  T2 dcTerm = T2( 0 );
443  if( addDCTerm )
444  {
445  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
446  dcTerm /= SparseMatrix<T>::rows;
447  }
448  for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
449  {
451  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
452  const T2& in_i_ = in[i];
453  T2 out_i = T2(0);
454  for( ; temp!=end ; temp++ )
455  {
456  int j=temp->N;
457  T2 v=temp->Value;
458  out_i += v * in[j];
459  out[j] += v * in_i_;
460  }
461  out[i] += out_i;
462  }
463  if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
464  }
465  template<class T>
466  template<class T2>
467  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
468  {
469  int dim = int( In.Dimensions() );
470  const T2* in = &In[0];
471  int threads = OutScratch.threads();
472  if( addDCTerm )
473  {
474  T2 dcTerm = 0;
475 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
476  for( int t=0 ; t<threads ; t++ )
477  {
478  T2* out = OutScratch[t];
479  memset( out , 0 , sizeof( T2 ) * dim );
480  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
481  {
482  const T2& in_i_ = in[i];
483  T2& out_i_ = out[i];
484  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
485  {
486  int j = temp->N;
487  T2 v = temp->Value;
488  out_i_ += v * in[j];
489  out[j] += v * in_i_;
490  }
491  dcTerm += in_i_;
492  }
493  }
494  dcTerm /= dim;
495  dim = int( Out.Dimensions() );
496  T2* out = &Out[0];
497 #pragma omp parallel for num_threads( threads ) schedule( static )
498  for( int i=0 ; i<dim ; i++ )
499  {
500  T2 _out = dcTerm;
501  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
502  out[i] = _out;
503  }
504  }
505  else
506  {
507 #pragma omp parallel for num_threads( threads )
508  for( int t=0 ; t<threads ; t++ )
509  {
510  T2* out = OutScratch[t];
511  memset( out , 0 , sizeof( T2 ) * dim );
512  for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
513  {
514  const T2& in_i_ = in[i];
515  T2& out_i_ = out[i];
516  for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
517  {
518  int j = temp->N;
519  T2 v = temp->Value;
520  out_i_ += v * in[j];
521  out[j] += v * in_i_;
522  }
523  }
524  }
525  dim = int( Out.Dimensions() );
526  T2* out = &Out[0];
527 #pragma omp parallel for num_threads( threads ) schedule( static )
528  for( int i=0 ; i<dim ; i++ )
529  {
530  T2 _out = T2(0);
531  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
532  out[i] = _out;
533  }
534  }
535  }
536  template<class T>
537  template<class T2>
538  void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
539  {
540  int dim = In.Dimensions();
541  const T2* in = &In[0];
542  int threads = OutScratch.size();
543 #pragma omp parallel for num_threads( threads )
544  for( int t=0 ; t<threads ; t++ )
545  for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
546 #pragma omp parallel for num_threads( threads )
547  for( int t=0 ; t<threads ; t++ )
548  {
549  T2* out = OutScratch[t];
550  for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
551  {
553  const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
554  const T2& in_i_ = in[i];
555  T2& out_i_ = out[i];
556  for( ; temp!=end ; temp++ )
557  {
558  int j = temp->N;
559  T2 v = temp->Value;
560  out_i_ += v * in[j];
561  out[j] += v * in_i_;
562  }
563  }
564  }
565  T2* out = &Out[0];
566 #pragma omp parallel for num_threads( threads ) schedule( static )
567  for( int i=0 ; i<Out.Dimensions() ; i++ )
568  {
569  T2& _out = out[i];
570  _out = T2(0);
571  for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
572  }
573  }
574 #if defined _WIN32 && !defined __MINGW32__
575 #ifndef _AtomicIncrement_
576 #define _AtomicIncrement_
577  inline void AtomicIncrement( volatile float* ptr , float addend )
578  {
579  float newValue = *ptr;
580  LONG& _newValue = *( (LONG*)&newValue );
581  LONG _oldValue;
582  for( ;; )
583  {
584  _oldValue = _newValue;
585  newValue += addend;
586  _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
587  if( _newValue==_oldValue ) break;
588  }
589  }
590  inline void AtomicIncrement( volatile double* ptr , double addend )
591  //inline void AtomicIncrement( double* ptr , double addend )
592  {
593  double newValue = *ptr;
594  LONGLONG& _newValue = *( (LONGLONG*)&newValue );
595  LONGLONG _oldValue;
596  do
597  {
598  _oldValue = _newValue;
599  newValue += addend;
600  _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
601  }
602  while( _newValue!=_oldValue );
603  }
604 #endif // _AtomicIncrement_
605  template< class T >
606  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
607  {
608  Out.SetZero();
609  const float* in = &In[0];
610  float* out = &Out[0];
611  if( partition )
612 #pragma omp parallel for num_threads( threads )
613  for( int t=0 ; t<threads ; t++ )
614  for( int i=partition[t] ; i<partition[t+1] ; i++ )
615  {
616  const MatrixEntry< T >* temp = A[i];
617  const MatrixEntry< T >* end = temp + A.rowSizes[i];
618  const float& in_i = in[i];
619  float out_i = 0.;
620  for( ; temp!=end ; temp++ )
621  {
622  int j = temp->N;
623  float v = temp->Value;
624  out_i += v * in[j];
625  AtomicIncrement( out+j , v * in_i );
626  }
627  AtomicIncrement( out+i , out_i );
628  }
629  else
630 #pragma omp parallel for num_threads( threads )
631  for( int i=0 ; i<A.rows ; i++ )
632  {
633  const MatrixEntry< T >* temp = A[i];
634  const MatrixEntry< T >* end = temp + A.rowSizes[i];
635  const float& in_i = in[i];
636  float out_i = 0.f;
637  for( ; temp!=end ; temp++ )
638  {
639  int j = temp->N;
640  float v = temp->Value;
641  out_i += v * in[j];
642  AtomicIncrement( out+j , v * in_i );
643  }
644  AtomicIncrement( out+i , out_i );
645  }
646  }
647  template< class T >
648  void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
649  {
650  Out.SetZero();
651  const double* in = &In[0];
652  double* out = &Out[0];
653 
654  if( partition )
655 #pragma omp parallel for num_threads( threads )
656  for( int t=0 ; t<threads ; t++ )
657  for( int i=partition[t] ; i<partition[t+1] ; i++ )
658  {
659  const MatrixEntry< T >* temp = A[i];
660  const MatrixEntry< T >* end = temp + A.rowSizes[i];
661  const double& in_i = in[i];
662  double out_i = 0.;
663  for( ; temp!=end ; temp++ )
664  {
665  int j = temp->N;
666  T v = temp->Value;
667  out_i += v * in[j];
668  AtomicIncrement( out+j , v * in_i );
669  }
670  AtomicIncrement( out+i , out_i );
671  }
672  else
673 #pragma omp parallel for num_threads( threads )
674  for( int i=0 ; i<A.rows ; i++ )
675  {
676  const MatrixEntry< T >* temp = A[i];
677  const MatrixEntry< T >* end = temp + A.rowSizes[i];
678  const double& in_i = in[i];
679  double out_i = 0.;
680  for( ; temp!=end ; temp++ )
681  {
682  int j = temp->N;
683  T v = temp->Value;
684  out_i += v * in[j];
685  AtomicIncrement( out+j , v * in_i );
686  }
687  AtomicIncrement( out+i , out_i );
688  }
689  }
690 
691  template< class T >
692  template< class T2 >
693  int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
694  {
695  eps *= eps;
696  int dim = b.Dimensions();
697  if( reset )
698  {
699  x.Resize( dim );
700  x.SetZero();
701  }
702  Vector< T2 > r( dim ) , d( dim ) , q( dim );
703  Vector< T2 > temp;
704  if( solveNormal ) temp.Resize( dim );
705  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
706  const T2* _b = &b[0];
707 
708  std::vector< int > partition( threads+1 );
709  {
710  int eCount = 0;
711  for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
712  partition[0] = 0;
713 #pragma omp parallel for num_threads( threads )
714  for( int t=0 ; t<threads ; t++ )
715  {
716  int _eCount = 0;
717  for( int i=0 ; i<A.rows ; i++ )
718  {
719  _eCount += A.rowSizes[i];
720  if( _eCount*threads>=eCount*(t+1) )
721  {
722  partition[t+1] = i;
723  break;
724  }
725  }
726  }
727  partition[threads] = A.rows;
728  }
729  if( solveNormal )
730  {
731  MultiplyAtomic( A , x , temp , threads , &partition[0] );
732  MultiplyAtomic( A , temp , r , threads , &partition[0] );
733  MultiplyAtomic( A , b , temp , threads , &partition[0] );
734 #pragma omp parallel for num_threads( threads ) schedule( static )
735  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
736  }
737  else
738  {
739  MultiplyAtomic( A , x , r , threads , &partition[0] );
740 #pragma omp parallel for num_threads( threads ) schedule( static )
741  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
742  }
743  double delta_new = 0 , delta_0;
744  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
745  delta_0 = delta_new;
746  if( delta_new<eps )
747  {
748  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
749  return 0;
750  }
751  int ii;
752  for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
753  {
754  if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
755  else MultiplyAtomic( A , d , q , threads , &partition[0] );
756  double dDotQ = 0;
757  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
758  T2 alpha = T2( delta_new / dDotQ );
759 #pragma omp parallel for num_threads( threads ) schedule( static )
760  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
761  if( (ii%50)==(50-1) )
762  {
763  r.Resize( dim );
764  if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
765  else MultiplyAtomic( A , x , r , threads , &partition[0] );
766 #pragma omp parallel for num_threads( threads ) schedule( static )
767  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
768  }
769  else
770 #pragma omp parallel for num_threads( threads ) schedule( static )
771  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
772 
773  double delta_old = delta_new;
774  delta_new = 0;
775  for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
776  T2 beta = T2( delta_new / delta_old );
777 #pragma omp parallel for num_threads( threads ) schedule( static )
778  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
779  }
780  return ii;
781  }
782 #endif // _WIN32 && !__MINGW32__
783  template< class T >
784  template< class T2 >
785  int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
786  {
787  int threads = scratch.threads();
788  eps *= eps;
789  int dim = int( b.Dimensions() );
790  Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
791  if( reset ) x.Resize( dim );
792  if( solveNormal ) temp.Resize( dim );
793  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
794  const T2* _b = &b[0];
795 
796  double delta_new = 0 , delta_0;
797  if( solveNormal )
798  {
799  A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
800 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
801  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
802  }
803  else
804  {
805  A.Multiply( x , r , scratch , addDCTerm );
806 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
807  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
808  }
809  delta_0 = delta_new;
810  if( delta_new<eps )
811  {
812  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
813  return 0;
814  }
815  int ii;
816  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
817  {
818  if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
819  else A.Multiply( d , q , scratch , addDCTerm );
820  double dDotQ = 0;
821 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
822  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
823  T2 alpha = T2( delta_new / dDotQ );
824  double delta_old = delta_new;
825  delta_new = 0;
826  if( (ii%50)==(50-1) )
827  {
828 #pragma omp parallel for num_threads( threads )
829  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
830  r.Resize( dim );
831  if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
832  else A.Multiply( x , r , scratch , addDCTerm );
833 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
834  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
835  }
836  else
837 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
839 
840  T2 beta = T2( delta_new / delta_old );
841 #pragma omp parallel for num_threads( threads )
842  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
843  }
844  return ii;
845  }
846  template< class T >
847  template< class T2 >
848  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
849  {
850  eps *= eps;
851  int dim = int( b.Dimensions() );
852  MapReduceVector< T2 > outScratch;
853  if( threads<1 ) threads = 1;
854  if( threads>1 ) outScratch.resize( threads , dim );
855  if( reset ) x.Resize( dim );
856  Vector< T2 > r( dim ) , d( dim ) , q( dim );
857  Vector< T2 > temp;
858  if( solveNormal ) temp.Resize( dim );
859  T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
860  const T2* _b = &b[0];
861 
862  double delta_new = 0 , delta_0;
863 
864  if( solveNormal )
865  {
866  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
867  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
868 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
869  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
870  }
871  else
872  {
873  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
874  else A.Multiply( x , r , addDCTerm );
875 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
876  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
877  }
878 
879  delta_0 = delta_new;
880  if( delta_new<eps )
881  {
882  fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
883  return 0;
884  }
885  int ii;
886  for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
887  {
888  if( solveNormal )
889  {
890  if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
891  else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
892  }
893  else
894  {
895  if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
896  else A.Multiply( d , q , addDCTerm );
897  }
898  double dDotQ = 0;
899 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
900  for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
901  T2 alpha = T2( delta_new / dDotQ );
902  double delta_old = delta_new;
903  delta_new = 0;
904 
905  if( (ii%50)==(50-1) )
906  {
907 #pragma omp parallel for num_threads( threads )
908  for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
909  r.SetZero();
910  if( solveNormal )
911  {
912  if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
913  else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
914  }
915  else
916  {
917  if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
918  else A.Multiply( x , r , addDCTerm );
919  }
920 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
921  for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
922  }
923  else
924  {
925 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
926  for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
927  }
928 
929  T2 beta = T2( delta_new / delta_old );
930 #pragma omp parallel for num_threads( threads )
931  for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
932  }
933  return ii;
934  }
935 
936  template<class T>
937  template<class T2>
938  int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
939  {
940  Vector<T2> d,r,Md;
941 
942  if(reset)
943  {
944  solution.Resize(b.Dimensions());
945  solution.SetZero();
946  }
947  Md.Resize(M.rows);
948  for( int i=0 ; i<iters ; i++ )
949  {
950  M.Multiply( solution , Md );
951  r = b-Md;
952  // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
953  // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
954  // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
955  for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
956  }
957  return iters;
958  }
959  template< class T >
960  template< class T2 >
962  {
963  diagonal.Resize( SparseMatrix<T>::rows );
964  for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
965  {
966  diagonal[i] = 0.;
967  for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
968  }
969  }
970 
971  }
972 }
An exception that is thrown when the arguments number or type is wrong/unhandled.
MatrixEntry< T > ** m_ppElements
Definition: sparse_matrix.h:66
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
Definition: sparse_matrix.h:60
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
std::size_t Dimensions() const
Definition: vector.hpp:91
void Resize(std::size_t N)
Definition: vector.hpp:63
T Dot(const Vector &V) const
Definition: vector.hpp:239
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
Definition: region_xy.h:46
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
Definition: region_xy.h:63
void resize(int threads, int dim)
Definition: sparse_matrix.h:46
T Value
Definition: sparse_matrix.h:50
int N
Definition: sparse_matrix.h:49