summaryrefslogtreecommitdiff
path: root/common_level1.h
blob: d2ed47e5677b6308926e6cf5cc84e6c1a8d635bf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin.           */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/*   1. Redistributions of source code must retain the above         */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer.                                                  */
/*                                                                   */
/*   2. Redistributions in binary form must reproduce the above      */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer in the documentation and/or other materials       */
/*      provided with the distribution.                              */
/*                                                                   */
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
/*                                                                   */
/* The views and conclusions contained in the software and           */
/* documentation are those of the authors and should not be          */
/* interpreted as representing official policies, either expressed   */
/* or implied, of The University of Texas at Austin.                 */
/*********************************************************************/

#ifndef ASSEMBLER

#ifdef __CUDACC__
extern "C" {
#endif

float   sdot_k(BLASLONG, float   *, BLASLONG, float   *, BLASLONG);
double dsdot_k(BLASLONG, float   *, BLASLONG, float *, BLASLONG);
double  ddot_k(BLASLONG, double  *, BLASLONG, double  *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float  sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);

void   sbstobf16_k(BLASLONG, float    *, BLASLONG, bfloat16 *, BLASLONG);
void   sbdtobf16_k(BLASLONG, double   *, BLASLONG, bfloat16 *, BLASLONG);
void   sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float    *, BLASLONG);
void   dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double   *, BLASLONG);

openblas_complex_float cdotc_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
openblas_complex_float cdotu_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);

int    saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
	       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
int    daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
	       double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int    qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int    caxpy_k (BLASLONG, BLASLONG, BLASLONG, float,  float,
	       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
int    zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
	       double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int    xaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int    caxpyc_k (BLASLONG, BLASLONG, BLASLONG, float,  float,
	       float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
int    zaxpyc_k (BLASLONG, BLASLONG, BLASLONG, double, double,
	       double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int    xaxpyc_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);

int    scopy_k(BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
int    dcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
int    qcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int    ccopy_k(BLASLONG, float  *, BLASLONG, float  *, BLASLONG);
int    zcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
int    xcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);

int    sswap_k (BLASLONG, BLASLONG, BLASLONG, float,
	       float  *, BLASLONG, float  *, BLASLONG, float *, BLASLONG);
int    dswap_k (BLASLONG, BLASLONG, BLASLONG, double,
	       double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
int    qswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
int    cswap_k (BLASLONG, BLASLONG, BLASLONG, float,  float,
	       float  *, BLASLONG, float  *, BLASLONG, float *, BLASLONG);
int    zswap_k (BLASLONG, BLASLONG, BLASLONG, double, double,
	       double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
int    xswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);

float   sasum_k (BLASLONG, float  *, BLASLONG);
double  dasum_k (BLASLONG, double *, BLASLONG);
xdouble qasum_k (BLASLONG, xdouble *, BLASLONG);
float   casum_k (BLASLONG, float  *, BLASLONG);
double  zasum_k (BLASLONG, double *, BLASLONG);
xdouble xasum_k (BLASLONG, xdouble *, BLASLONG);

float   ssum_k (BLASLONG, float  *, BLASLONG);
double  dsum_k (BLASLONG, double *, BLASLONG);
xdouble qsum_k (BLASLONG, xdouble *, BLASLONG);
float   csum_k (BLASLONG, float  *, BLASLONG);
double  zsum_k (BLASLONG, double *, BLASLONG);
xdouble xsum_k (BLASLONG, xdouble *, BLASLONG);

float   samax_k (BLASLONG, float  *, BLASLONG);
double  damax_k (BLASLONG, double *, BLASLONG);
xdouble qamax_k (BLASLONG, xdouble *, BLASLONG);
float   camax_k (BLASLONG, float  *, BLASLONG);
double  zamax_k (BLASLONG, double *, BLASLONG);
xdouble xamax_k (BLASLONG, xdouble *, BLASLONG);

float   samin_k (BLASLONG, float  *, BLASLONG);
double  damin_k (BLASLONG, double *, BLASLONG);
xdouble qamin_k (BLASLONG, xdouble *, BLASLONG);
float   camin_k (BLASLONG, float  *, BLASLONG);
double  zamin_k (BLASLONG, double *, BLASLONG);
xdouble xamin_k (BLASLONG, xdouble *, BLASLONG);

BLASLONG   isamax_k(BLASLONG, float  *, BLASLONG);
BLASLONG   idamax_k(BLASLONG, double *, BLASLONG);
BLASLONG   iqamax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG   icamax_k(BLASLONG, float  *, BLASLONG);
BLASLONG   izamax_k(BLASLONG, double *, BLASLONG);
BLASLONG   ixamax_k(BLASLONG, xdouble *, BLASLONG);

BLASLONG   isamin_k(BLASLONG, float  *, BLASLONG);
BLASLONG   idamin_k(BLASLONG, double *, BLASLONG);
BLASLONG   iqamin_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG   icamin_k(BLASLONG, float  *, BLASLONG);
BLASLONG   izamin_k(BLASLONG, double *, BLASLONG);
BLASLONG   ixamin_k(BLASLONG, xdouble *, BLASLONG);

float   smax_k (BLASLONG, float  *, BLASLONG);
double  dmax_k (BLASLONG, double *, BLASLONG);
xdouble qmax_k (BLASLONG, xdouble *, BLASLONG);
float   cmax_k (BLASLONG, float  *, BLASLONG);
double  zmax_k (BLASLONG, double *, BLASLONG);
xdouble xmax_k (BLASLONG, xdouble *, BLASLONG);

float   smin_k (BLASLONG, float  *, BLASLONG);
double  dmin_k (BLASLONG, double *, BLASLONG);
xdouble qmin_k (BLASLONG, xdouble *, BLASLONG);
float   cmin_k (BLASLONG, float  *, BLASLONG);
double  zmin_k (BLASLONG, double *, BLASLONG);
xdouble xmin_k (BLASLONG, xdouble *, BLASLONG);

BLASLONG   ismax_k(BLASLONG, float  *, BLASLONG);
BLASLONG   idmax_k(BLASLONG, double *, BLASLONG);
BLASLONG   iqmax_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG   icmax_k(BLASLONG, float  *, BLASLONG);
BLASLONG   izmax_k(BLASLONG, double *, BLASLONG);
BLASLONG   ixmax_k(BLASLONG, xdouble *, BLASLONG);

BLASLONG   ismin_k(BLASLONG, float  *, BLASLONG);
BLASLONG   idmin_k(BLASLONG, double *, BLASLONG);
BLASLONG   iqmin_k(BLASLONG, xdouble *, BLASLONG);
BLASLONG   icmin_k(BLASLONG, float  *, BLASLONG);
BLASLONG   izmin_k(BLASLONG, double *, BLASLONG);
BLASLONG   ixmin_k(BLASLONG, xdouble *, BLASLONG);

int    sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
	      float *, BLASLONG, float *, BLASLONG,  float  *, BLASLONG);
int    dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
	      double *, BLASLONG, double *, BLASLONG,  double  *, BLASLONG);
int    qscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble,
	      xdouble *, BLASLONG, xdouble *, BLASLONG,  xdouble  *, BLASLONG);
int    cscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
	      float *, BLASLONG, float *, BLASLONG,  float  *, BLASLONG);
int    zscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
	      double *, BLASLONG, double *, BLASLONG,  double  *, BLASLONG);
int    xscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
	      xdouble *, BLASLONG, xdouble *, BLASLONG,  xdouble  *, BLASLONG);
int    csscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
	       float *, BLASLONG, float *, BLASLONG,  float  *, BLASLONG);
int    zdscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
	       double *, BLASLONG, double *, BLASLONG,  double  *, BLASLONG);
int    xqscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
	       xdouble *, BLASLONG, xdouble *, BLASLONG,  xdouble  *, BLASLONG);

float   snrm2_k(BLASLONG, float   *, BLASLONG);
double  dnrm2_k(BLASLONG, double  *, BLASLONG);
xdouble qnrm2_k(BLASLONG, xdouble *, BLASLONG);
float   cnrm2_k(BLASLONG, float   *, BLASLONG);
double  znrm2_k(BLASLONG, double  *, BLASLONG);
xdouble xnrm2_k(BLASLONG, xdouble *, BLASLONG);

int    srot_k (BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float , float );
int    drot_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int    qrot_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
int    csrot_k(BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float , float );
int    zdrot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int    xqrot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);

int    srotg_k(float  *, float  *, float  *, float  *);
int    drotg_k(double *, double *, double *, double *);
int    qrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);
int    csrotg_k(float  *, float  *, float  *, float  *);
int    zdrotg_k(double *, double *, double *, double *);
int    xqrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);

int    srotmg_k(float  *, float  *, float  *, float  *, float  *);
int    drotmg_k(double *, double *, double *, double *, double *);
int    qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *);

int    srotm_k (BLASLONG, float,  BLASLONG, float,  BLASLONG, float);
int    drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
int    qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);


int    saxpby_k (BLASLONG, float,  float  *, BLASLONG, float,  float  *, BLASLONG);
int    daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
int    caxpby_k (BLASLONG, float,  float,  float  *, BLASLONG, float,  float,  float  *, BLASLONG);
int    zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);


#ifdef __CUDACC__
}
#endif

#endif