[Rcpp-commits] r3056 - pkg/RcppEigen/inst/include/Eigen/src/LU/arch
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Jun 13 23:01:52 CEST 2011
Author: dmbates
Date: 2011-06-13 23:01:50 +0200 (Mon, 13 Jun 2011)
New Revision: 3056
Modified:
pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h
Log:
Use an _m128d type instead of long long int for the mask.
Modified: pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h
===================================================================
--- pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h 2011-06-13 21:01:12 UTC (rev 3055)
+++ pkg/RcppEigen/inst/include/Eigen/src/LU/arch/Inverse_SSE.h 2011-06-13 21:01:50 UTC (rev 3056)
@@ -182,8 +182,11 @@
};
static void run(const MatrixType& matrix, ResultType& result)
{
- const EIGEN_ALIGN16 long long int _Sign_NP[2] = { 0x8000000000000000ll, 0x0000000000000000ll };
- const EIGEN_ALIGN16 long long int _Sign_PN[2] = { 0x0000000000000000ll, 0x8000000000000000ll };
+// const EIGEN_ALIGN16 long long int _Sign_NP[2] = { 0x8000000000000000ll, 0x0000000000000000ll };
+// const EIGEN_ALIGN16 long long int _Sign_PN[2] = { 0x0000000000000000ll, 0x8000000000000000ll };
+
+ const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+ const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
// The inverse is calculated using "Divide and Conquer" technique. The
// original matrix is divide into four 2x2 sub-matrices. Since each
@@ -316,8 +319,10 @@
iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
- d1 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_PN));
- d2 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_NP));
+ d1 = _mm_xor_pd(rd, _Sign_PN);
+ d2 = _mm_xor_pd(rd, _Sign_NP);
+// d1 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_PN));
+// d2 = _mm_xor_pd(rd, _mm_load_pd((double*)_Sign_NP));
// iC = B*|C| - A*C#*D;
dC = _mm_shuffle_pd(dC,dC,0);
More information about the Rcpp-commits
mailing list