COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
vector_test.cpp
Go to the documentation of this file.
1#include <iostream>
2#include <functional>
3#include <algorithm>
4#include <vector>
5#include <sstream>
6#include <sys/time.h> // for gettimeofday
7#include "promote.h"
8#include "Semirings.h"
9#include "Deleter.h"
10#include <ext/numeric>
11#include <tr1/array>
12#include <xmmintrin.h>
13#include <emmintrin.h>
14//#include <smmintrin.h> // SSE-4
15
16
17#define BETA 16
18#define ITER 1000000
19
20template<typename T, typename I>
21T ** allocate2D(I m, I n)
22{
23 T ** array = new T*[m];
24 for(I i = 0; i<m; ++i)
25 array[i] = new T[n];
26 return array;
27}
28template<typename T, typename I>
29void deallocate2D(T ** array, I m)
30{
31 for(I i = 0; i<m; ++i)
32 delete [] array[i];
33 delete [] array;
34}
35
36
37using namespace std;
38
39//typedef int vpackedsi __attribute__ ((vector_size (BETA*sizeof(int)))); // 64-bytes, a full cache line !
40
41//union ipackedvector
42//{
43// vpackedsi v;
44// int f[BETA];
45//};
46
47template <int D, typename T>
48void saxpy(T a, T * __restrict b, T * __restrict c)
49{
50 for(int i=0; i<D; ++i)
51 {
52 c[i] += a* b[i];
53 }
54}
55
56template <int D, typename T>
57void saxpy_array(T a, tr1::array<T,D> & b, tr1::array<T,D> & c)
58{
59 for(int i=0; i<D; ++i)
60 {
61 c[i] += a* b[i];
62 }
63}
64
65
66int main()
67{
68 /*
69 __m128i a, b, c;
70 int inp_sse1[4] __attribute__((aligned(16))) = { 2, 2, 2, 2 };
71 int inp_sse2[4] __attribute__((aligned(16))) = { 0, 1, 2, 3 };
72 int out_sse[4] __attribute__((aligned(16))) = {0, 0, 0, 0};
73
74 __m128i res = _mm_mul_epi32(a, b); */
75
76
77 int ** __restrict xx = allocate2D<int>(ITER,BETA);
78 int ** __restrict yy = allocate2D<int>(ITER,BETA);
79
80 for (int i=0; i< ITER; ++i)
81 {
82 for (int j=0; j< BETA; ++j)
83 {
84 xx[i][j] = j;
85 yy[i][j] = 0;
86 }
87 }
88
89 // tr1::array seems to be at least as fast as built-in C arrays
90 int a = 2;
91 tr1::array<int,BETA> * b = new tr1::array<int,BETA>[ITER];
92 tr1::array<int,BETA> * c = new tr1::array<int,BETA>[ITER](); // initialize to zero !
93
94
95 timeval tim;
96 gettimeofday(&tim, NULL);
97 double t1=tim.tv_sec+(tim.tv_usec/1000000.0);
98 int index = 0;
99
100 int * __restrict xxx = xx[index];
101 int * __restrict yyy = yy[index];
102
103 for(int i=0; i<ITER; ++i)
104 {
105 saxpy<BETA>(a, xxx, yyy);
106 }
107
108 gettimeofday(&tim, NULL);
109 double t2=tim.tv_sec+(tim.tv_usec/1000000.0);
110 printf("%.6lf seconds elapsed for gcc vector extensions\n", t2-t1);
111
112 copy(yy[0], yy[0]+BETA, ostream_iterator<float>( cout, " "));
113 cout << endl;
114
115 /*
116 ipackedvector av, bv, cv;
117 for (int i=0; i< ITER; ++i)
118 {
119 for (int j=0; j< BETA; ++j)
120 {
121 b[i][j] = j;
122 }
123 }
124 //float * test = new float[10]();
125 // note the empty set of parantheses as the initializer --> makes them default constructed
126 // The C++ standard says that: A default constructed POD type is zero-initialized,
127
128 //copy(test, test+10, ostream_iterator<float>( cout, " "));
129 //cout << endl;
130
131
132 //tr1::array<int,BETA> wtf = b[0] + c[0];
133
134 gettimeofday(&tim, NULL);
135 t1=tim.tv_sec+(tim.tv_usec/1000000.0);
136
137 for(int i=0; i<ITER; ++i)
138 {
139 //saxpy_array<BETA>(a, b[i], c[i]);
140 c[i] = c[i] + b[i];
141 }
142
143 gettimeofday(&tim, NULL);
144 t2=tim.tv_sec+(tim.tv_usec/1000000.0);
145 printf("%.6lf seconds elapsed for template unrolled loop\n", t2-t1);
146 copy(c[0].begin(), c[0].end(), ostream_iterator<int>( cout, " "));
147 cout << endl;*/
148
149
150// ipackedvector av, bv, cv;
151// for (int i=0; i<BETA; ++i)
152// {
153// av.f[i] = 2;
154// bv.f[i] = i;
155// cv.f[i] = 0;
156// }
157
158// gettimeofday(&tim, NULL);
159// t1=tim.tv_sec+(tim.tv_usec/1000000.0);
160// for(int i=0; i<ITER; ++i)
161// {
162// cv.v += av.v * bv.v;
163// }
164// gettimeofday(&tim, NULL);
165// t2=tim.tv_sec+(tim.tv_usec/1000000.0);
166// printf("%.6lf seconds elapsed for gcc vector extensions\n", t2-t1);
167
168// copy(cv.f, cv.f+BETA, ostream_iterator<int>( cout, " "));
169// cout << endl;
170
171 /*
172 vector<int> tvec;
173 tvec.reserve(10);
174
175 tvec[5] = 5;
176 cout << tvec.size() << " " << tvec.capacity() << endl; */
177 return 0;
178}
double D
Definition options.h:15
void saxpy_array(T a, tr1::array< T, D > &b, tr1::array< T, D > &c)
void deallocate2D(T **array, I m)
#define BETA
T ** allocate2D(I m, I n)
#define ITER
void saxpy(T a, T *__restrict b, T *__restrict c)
int main()