30 # ifndef WIN32_LEAN_AND_MEAN
31 # define WIN32_LEAN_AND_MEAN
52 template<
class T>
int SparseMatrix<T>::UseAlloc=0;
60 internalAllocator.set(blockSize);
72 _maxEntriesPerRow = 0;
84 if( M._contiguous )
Resize( M.
rows , M._maxEntriesPerRow );
86 for(
int i=0 ; i<
rows ; i++ )
96 for(
int i=0 ; i<rows ; i++ ) e +=
int( rowSizes[i] );
102 if( M._contiguous ) Resize( M.
rows , M._maxEntriesPerRow );
103 else Resize( M.
rows );
104 for(
int i=0 ; i<rows ; i++ )
118 FILE* fp = fopen( fileName ,
"wb" );
119 if( !fp )
return false;
120 bool ret =
write( fp );
127 FILE* fp = fopen( fileName ,
"rb" );
128 if( !fp )
return false;
129 bool ret =
read( fp );
136 if( fwrite( &rows ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
137 if( fwrite( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
138 for(
int i=0 ; i<rows ; i++ )
if( fwrite( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
145 if( fread( &r ,
sizeof(
int ) , 1 , fp )!=1 )
return false;
147 if( fread( rowSizes ,
sizeof(
int ) , rows , fp )!=rows )
return false;
148 for(
int i=0 ; i<rows ; i++ )
153 if( fread( (*
this)[i] ,
sizeof(
MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] )
return false;
166 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
167 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
168 free( m_ppElements );
174 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
175 memset( rowSizes , 0 ,
sizeof(
int ) * r );
179 _maxEntriesPerRow = 0;
187 if( _contiguous ){
if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
188 else for(
int i=0 ; i<rows ; i++ ){
if( rowSizes[i] ) free( m_ppElements[i] ); }
189 free( m_ppElements );
195 rowSizes = (
int* )malloc(
sizeof(
int ) * r );
196 memset( rowSizes , 0 ,
sizeof(
int ) * r );
199 for(
int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
202 _maxEntriesPerRow = e;
210 if (count > _maxEntriesPerRow)
212 POISSON_THROW_EXCEPTION (
pcl::poisson::PoissonBadArgumentException,
"Attempted to set row size on contiguous matrix larger than max row size: (requested)"<< count <<
" > (maximum)" << _maxEntriesPerRow );
214 rowSizes[row] = count;
216 else if( row>=0 && row<rows )
218 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
221 if( rowSizes[row] ) free( m_ppElements[row] );
232 for (
int i=0; i<rows; i++)
234 for(
int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value=T(0);}
242 for(
int ij=0; ij < std::min<int>( rows, _maxEntriesPerRow ); ij++)
243 (*
this)(ij,ij) = T(1);
257 for (
int i=0; i<rows; i++)
259 for(
int ii=0;ii<rowSizes[i];ii++){m_ppElements[i][ii].Value*=V;}
268 for(
int i=0; i<R.
rows; i++){
269 for(
int ii=0;ii<rowSizes[i];ii++){
270 int N=m_ppElements[i][ii].N;
271 T Value=m_ppElements[i][ii].Value;
272 for(
int jj=0;jj<M.
rowSizes[N];jj++){
286 for (
int i=0; i<rows; i++)
289 for(
int ii=0;ii<rowSizes[i];ii++){
290 temp+=m_ppElements[i][ii].Value * V.
m_pV[m_ppElements[i][ii].N];
301 #pragma omp parallel for num_threads( threads ) schedule( static )
302 for(
int i=0 ; i<rows ; i++ )
306 for(
int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.
m_pV[m_ppElements[i][j].N];
328 for (
int i=0; i<rows; i++)
330 for(
int ii=0;ii<rowSizes[i];ii++){
331 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
351 double delta_new , delta_0;
354 if( delta_new<eps )
return 0;
358 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
361 double dDotQ = 0 , alpha = 0;
363 alpha = delta_new / dDotQ;
364 #pragma omp parallel
for num_threads( threads ) schedule(
static )
369 M.
Multiply( solution , r , threads );
373 #pragma omp parallel for num_threads( threads ) schedule( static )
376 double delta_old = delta_new , beta;
379 beta = delta_new / delta_old;
380 #pragma omp parallel
for num_threads( threads ) schedule(
static )
400 for(i=0;i<iters && rDotR>eps;i++){
403 alpha=rDotR/d.
Dot(Md);
429 for(
int i=0; i<SparseMatrix<T>::rows; i++){
430 for(
int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
444 const T2* in = &In[0];
449 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
456 const T2& in_i_ = in[i];
458 for( ; temp!=end ; temp++ )
467 if( addDCTerm )
for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
474 const T2* in = &In[0];
475 int threads = OutScratch.
threads();
479 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
480 for(
int t=0 ; t<threads ; t++ )
482 T2* out = OutScratch[t];
483 memset( out , 0 ,
sizeof( T2 ) * dim );
486 const T2& in_i_ = in[i];
501 #pragma omp parallel for num_threads( threads ) schedule( static )
502 for(
int i=0 ; i<dim ; i++ )
505 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
511 #pragma omp parallel for num_threads( threads )
512 for(
int t=0 ; t<threads ; t++ )
514 T2* out = OutScratch[t];
515 memset( out , 0 ,
sizeof( T2 ) * dim );
518 const T2& in_i_ = in[i];
531 #pragma omp parallel for num_threads( threads ) schedule( static )
532 for(
int i=0 ; i<dim ; i++ )
535 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
545 const T2* in = &In[0];
546 int threads = OutScratch.size();
547 #pragma omp parallel for num_threads( threads )
548 for(
int t=0 ; t<threads ; t++ )
549 for(
int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
550 #pragma omp parallel for num_threads( threads )
551 for(
int t=0 ; t<threads ; t++ )
553 T2* out = OutScratch[t];
554 for(
int i=bounds[t] ; i<bounds[t+1] ; i++ )
558 const T2& in_i_ = in[i];
560 for( ; temp!=end ; temp++ )
570 #pragma omp parallel for num_threads( threads ) schedule( static )
575 for(
int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
578 #if defined _WIN32 && !defined __MINGW32__
579 #ifndef _AtomicIncrement_
580 #define _AtomicIncrement_
583 float newValue = *ptr;
584 LONG& _newValue = *( (LONG*)&newValue );
588 _oldValue = _newValue;
590 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
591 if( _newValue==_oldValue )
break;
597 double newValue = *ptr;
598 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
602 _oldValue = _newValue;
604 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
606 while( _newValue!=_oldValue );
613 const float* in = &In[0];
614 float* out = &Out[0];
616 #pragma omp parallel for num_threads( threads )
617 for(
int t=0 ; t<threads ; t++ )
618 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
622 const float& in_i = in[i];
624 for( ; temp!=end ; temp++ )
627 float v = temp->
Value;
634 #pragma omp parallel for num_threads( threads )
635 for(
int i=0 ; i<A.
rows ; i++ )
639 const float& in_i = in[i];
641 for( ; temp!=end ; temp++ )
644 float v = temp->
Value;
655 const double* in = &In[0];
656 double* out = &Out[0];
659 #pragma omp parallel for num_threads( threads )
660 for(
int t=0 ; t<threads ; t++ )
661 for(
int i=partition[t] ; i<partition[t+1] ; i++ )
665 const double& in_i = in[i];
667 for( ; temp!=end ; temp++ )
677 #pragma omp parallel for num_threads( threads )
678 for(
int i=0 ; i<A.
rows ; i++ )
682 const double& in_i = in[i];
684 for( ; temp!=end ; temp++ )
708 if( solveNormal ) temp.
Resize( dim );
709 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
710 const T2* _b = &b[0];
712 std::vector< int > partition( threads+1 );
715 for(
int i=0 ; i<A.
rows ; i++ ) eCount += A.
rowSizes[i];
717 #pragma omp parallel
for num_threads( threads )
718 for(
int t=0 ; t<threads ; t++ )
721 for(
int i=0 ; i<A.
rows ; i++ )
724 if( _eCount*threads>=eCount*(t+1) )
731 partition[threads] = A.
rows;
738 #pragma omp parallel for num_threads( threads ) schedule( static )
739 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
744 #pragma omp parallel for num_threads( threads ) schedule( static )
745 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
747 double delta_new = 0 , delta_0;
748 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
752 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
756 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
761 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
762 T2 alpha = T2( delta_new / dDotQ );
763 #pragma omp parallel for num_threads( threads ) schedule( static )
764 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
765 if( (ii%50)==(50-1) )
770 #pragma omp parallel for num_threads( threads ) schedule( static )
771 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
774 #pragma omp parallel for num_threads( threads ) schedule( static )
775 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
777 double delta_old = delta_new;
779 for( std::size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
780 T2 beta = T2( delta_new / delta_old );
781 #pragma omp parallel for num_threads( threads ) schedule( static )
782 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
791 int threads = scratch.
threads();
795 if( reset ) x.
Resize( dim );
796 if( solveNormal ) temp.
Resize( dim );
797 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
798 const T2* _b = &b[0];
800 double delta_new = 0 , delta_0;
803 A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm ) , A.
Multiply( b , temp , scratch , addDCTerm );
804 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
805 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
809 A.
Multiply( x , r , scratch , addDCTerm );
810 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
811 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
816 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
820 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
822 if( solveNormal ) A.
Multiply( d , temp , scratch , addDCTerm ) , A.
Multiply( temp , q , scratch , addDCTerm );
823 else A.
Multiply( d , q , scratch , addDCTerm );
825 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
826 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
827 T2 alpha = T2( delta_new / dDotQ );
828 double delta_old = delta_new;
830 if( (ii%50)==(50-1) )
832 #pragma omp parallel for num_threads( threads )
833 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
835 if( solveNormal ) A.
Multiply( x , temp , scratch , addDCTerm ) , A.
Multiply( temp , r , scratch , addDCTerm );
836 else A.
Multiply( x , r , scratch , addDCTerm );
837 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
838 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
841 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
842 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
844 T2 beta = T2( delta_new / delta_old );
845 #pragma omp parallel for num_threads( threads )
846 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
857 if( threads<1 ) threads = 1;
858 if( threads>1 ) outScratch.
resize( threads , dim );
859 if( reset ) x.
Resize( dim );
862 if( solveNormal ) temp.
Resize( dim );
863 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
864 const T2* _b = &b[0];
866 double delta_new = 0 , delta_0;
870 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm ) , A.
Multiply( b , temp , outScratch , addDCTerm );
872 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
873 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
877 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
878 else A.
Multiply( x , r , addDCTerm );
879 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
880 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
886 fprintf( stderr ,
"[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
890 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
894 if( threads>1 ) A.
Multiply( d , temp , outScratch , addDCTerm ) , A.
Multiply( temp , q , outScratch , addDCTerm );
895 else A.
Multiply( d , temp , addDCTerm ) , A.
Multiply( temp , q , addDCTerm );
899 if( threads>1 ) A.
Multiply( d , q , outScratch , addDCTerm );
900 else A.
Multiply( d , q , addDCTerm );
903 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
904 for(
int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
905 T2 alpha = T2( delta_new / dDotQ );
906 double delta_old = delta_new;
909 if( (ii%50)==(50-1) )
911 #pragma omp parallel for num_threads( threads )
912 for(
int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
916 if( threads>1 ) A.
Multiply( x , temp , outScratch , addDCTerm ) , A.
Multiply( temp , r , outScratch , addDCTerm );
917 else A.
Multiply( x , temp , addDCTerm ) , A.
Multiply( temp , r , addDCTerm );
921 if( threads>1 ) A.
Multiply( x , r , outScratch , addDCTerm );
922 else A.
Multiply( x , r , addDCTerm );
924 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
925 for(
int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
929 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
930 for(
int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
933 T2 beta = T2( delta_new / delta_old );
934 #pragma omp parallel for num_threads( threads )
935 for(
int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
952 for(
int i=0 ; i<iters ; i++ )
959 for(
int j=0 ; j<int(M.
rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
968 for(
int i=0 ; i<SparseMatrix<T>::rows ; i++ )
An exception that is thrown when the arguments number or type is wrong/unhandled.
MatrixEntry< T > ** m_ppElements
SparseMatrix< T > Multiply(const SparseMatrix< T > &M) const
SparseMatrix< T > Transpose() const
static void SetAllocator(int blockSize)
SparseMatrix< T > & operator*=(const T &V)
SparseMatrix< T > & operator=(const SparseMatrix< T > &M)
bool write(FILE *fp) const
static int SolveSymmetric(const SparseMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, const T2 eps=1e-8, int reset=1, int threads=1)
void SetRowSize(int row, int count)
static int Solve(const SparseMatrix< T > &M, const Vector< T > &b, int iters, Vector< T > &solution, const T eps=1e-8)
SparseMatrix< T > operator*(const T &V) const
static int UseAllocator(void)
static Allocator< MatrixEntry< T > > internalAllocator
static int Solve(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool addDCTerm=false, bool solveNormal=false)
void getDiagonal(Vector< T2 > &diagonal) const
Vector< T2 > operator*(const Vector< T2 > &V) const
Vector< T2 > Multiply(const Vector< T2 > &V) const
static int SolveAtomic(const SparseSymmetricMatrix< T > &M, const Vector< T2 > &b, int iters, Vector< T2 > &solution, T2 eps=1e-8, int reset=1, int threads=0, bool solveNormal=false)
std::size_t Dimensions() const
void Resize(std::size_t N)
T Dot(const Vector &V) const
PCL_EXPORTS void Multiply(const double in1[2], const double in2[2], double out[2])
void MultiplyAtomic(const SparseSymmetricMatrix< T > &A, const Vector< float > &In, Vector< float > &Out, int threads, const int *partition=NULL)
void AtomicIncrement(volatile float *ptr, float addend)
void read(std::istream &stream, Type &value)
Function for reading data from a stream.
void write(std::ostream &stream, Type value)
Function for writing data to a stream.
void resize(int threads, int dim)