1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#ifndef ASSEMBLER
#ifdef __CUDACC__
extern "C" {
#endif
float sdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
void sbstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void sbdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG);
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int xaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int caxpyc_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zaxpyc_k (BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int xaxpyc_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int scopy_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int ccopy_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
int xcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int sswap_k (BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dswap_k (BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
int qswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
int cswap_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zswap_k (BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
int xswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
float sasum_k (BLASLONG, float *, BLASLONG);
double dasum_k (BLASLONG, double *, BLASLONG);
xdouble qasum_k (BLASLONG, xdouble *, BLASLONG);
float casum_k (BLASLONG, float *, BLASLONG);
double zasum_k (BLASLONG, double *, BLASLONG);
xdouble xasum_k (BLASLONG, xdouble *, BLASLONG);
float ssum_k (BLASLONG, float *, BLASLONG);
double dsum_k (BLASLONG, double *, BLASLONG);
xdouble qsum_k (BLASLONG, xdouble *, BLASLONG);
float csum_k (BLASLONG, float *, BLASLONG);
double zsum_k (BLASLONG, double *, BLASLONG);
xdouble xsum_k (BLASLONG, xdouble *, BLASLONG);
float samax_k (BLASLONG, float *, BLASLONG);
double damax_k (BLASLONG, double *, BLASLONG);
xdouble qamax_k (BLASLONG, xdouble *, BLASLONG);
float camax_k (BLASLONG, float *, BLASLONG);
double zamax_k (BLASLONG, double *, BLASLONG);
xdouble xamax_k (BLASLONG, xdouble *, BLASLONG);
float samin_k (BLASLONG, float *, BLASLONG);
double damin_k (BLASLONG, double *, BLASLONG);
xdouble qamin_k (BLASLONG, xdouble *, BLASLONG);
float camin_k (BLASLONG, float *, BLASLONG);
double zamin_k (BLASLONG, double *, BLASLONG);
xdouble xamin_k (BLASLONG, xdouble *, BLASLONG);
BLASLONG isamax_k(BLASLONG, float *, BLASLONG);
BLASLONG idamax_k(BLASLONG, double *, BLASLONG);
BLASLONG iqamax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG icamax_k(BLASLONG, float *, BLASLONG);
BLASLONG izamax_k(BLASLONG, double *, BLASLONG);
BLASLONG ixamax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG isamin_k(BLASLONG, float *, BLASLONG);
BLASLONG idamin_k(BLASLONG, double *, BLASLONG);
BLASLONG iqamin_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG icamin_k(BLASLONG, float *, BLASLONG);
BLASLONG izamin_k(BLASLONG, double *, BLASLONG);
BLASLONG ixamin_k(BLASLONG, xdouble *, BLASLONG);
float smax_k (BLASLONG, float *, BLASLONG);
double dmax_k (BLASLONG, double *, BLASLONG);
xdouble qmax_k (BLASLONG, xdouble *, BLASLONG);
float cmax_k (BLASLONG, float *, BLASLONG);
double zmax_k (BLASLONG, double *, BLASLONG);
xdouble xmax_k (BLASLONG, xdouble *, BLASLONG);
float smin_k (BLASLONG, float *, BLASLONG);
double dmin_k (BLASLONG, double *, BLASLONG);
xdouble qmin_k (BLASLONG, xdouble *, BLASLONG);
float cmin_k (BLASLONG, float *, BLASLONG);
double zmin_k (BLASLONG, double *, BLASLONG);
xdouble xmin_k (BLASLONG, xdouble *, BLASLONG);
BLASLONG ismax_k(BLASLONG, float *, BLASLONG);
BLASLONG idmax_k(BLASLONG, double *, BLASLONG);
BLASLONG iqmax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG icmax_k(BLASLONG, float *, BLASLONG);
BLASLONG izmax_k(BLASLONG, double *, BLASLONG);
BLASLONG ixmax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG ismin_k(BLASLONG, float *, BLASLONG);
BLASLONG idmin_k(BLASLONG, double *, BLASLONG);
BLASLONG iqmin_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG icmin_k(BLASLONG, float *, BLASLONG);
BLASLONG izmin_k(BLASLONG, double *, BLASLONG);
BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG);
int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int cscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int xscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int csscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zdscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int xqscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float snrm2_k(BLASLONG, float *, BLASLONG);
double dnrm2_k(BLASLONG, double *, BLASLONG);
xdouble qnrm2_k(BLASLONG, xdouble *, BLASLONG);
float cnrm2_k(BLASLONG, float *, BLASLONG);
double znrm2_k(BLASLONG, double *, BLASLONG);
xdouble xnrm2_k(BLASLONG, xdouble *, BLASLONG);
int srot_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float , float );
int drot_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int qrot_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
int csrot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG, float , float );
int zdrot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int xqrot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
int srotg_k(float *, float *, float *, float *);
int drotg_k(double *, double *, double *, double *);
int qrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);
int csrotg_k(float *, float *, float *, float *);
int zdrotg_k(double *, double *, double *, double *);
int xqrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);
int srotmg_k(float *, float *, float *, float *, float *);
int drotmg_k(double *, double *, double *, double *, double *);
int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *);
int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float);
int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);
int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
int daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
int caxpby_k (BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
int zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#ifdef __CUDACC__
}
#endif
#endif
|