From: Aaron Schaefer Date: Thu, 4 Dec 2008 05:42:34 +0000 (-0500) Subject: Merge branch 'master' of git://factorcode.org/git/factor X-Git-Tag: 0.94~2294^2~2^2~1 X-Git-Url: https://gitweb.factorcode.org/gitweb.cgi?p=factor.git;a=commitdiff_plain;h=13781ee48cf8335833ff8863a67694f1c5043b3c Merge branch 'master' of git://factorcode.org/git/factor --- 13781ee48cf8335833ff8863a67694f1c5043b3c diff --cc basis/math/blas/cblas/cblas.factor index 131007b9d0,0000000000..58f179af80 mode 100644,000000..100644 --- a/basis/math/blas/cblas/cblas.factor +++ b/basis/math/blas/cblas/cblas.factor @@@ -1,558 -1,0 +1,559 @@@ +USING: alien alien.c-types alien.syntax kernel system combinators ; +IN: math.blas.cblas + +<< "cblas" { + { [ os macosx? ] [ "libblas.dylib" "cdecl" add-library ] } + { [ os windows? ] [ "blas.dll" "cdecl" add-library ] } + { [ os openbsd? ] [ "libcblas.so" "cdecl" add-library ] } ++ { [ os freebsd? ] [ "libcblas.so" "cdecl" add-library ] } + [ "libblas.so" "cdecl" add-library ] +} cond >> + +LIBRARY: cblas + +TYPEDEF: int CBLAS_ORDER +: CblasRowMajor 101 ; inline +: CblasColMajor 102 ; inline + +TYPEDEF: int CBLAS_TRANSPOSE +: CblasNoTrans 111 ; inline +: CblasTrans 112 ; inline +: CblasConjTrans 113 ; inline + +TYPEDEF: int CBLAS_UPLO +: CblasUpper 121 ; inline +: CblasLower 122 ; inline + +TYPEDEF: int CBLAS_DIAG +: CblasNonUnit 131 ; inline +: CblasUnit 132 ; inline + +TYPEDEF: int CBLAS_SIDE +: CblasLeft 141 ; inline +: CblasRight 142 ; inline + +TYPEDEF: int CBLAS_INDEX + +C-STRUCT: CBLAS_C + { "float" "real" } + { "float" "imag" } ; +C-STRUCT: CBLAS_Z + { "double" "real" } + { "double" "imag" } ; + +! Level 1 BLAS (scalar-vector and vector-vector) + +FUNCTION: float cblas_sdsdot + ( int N, float alpha, float* X, int incX, float* Y, int incY ) ; +FUNCTION: double cblas_dsdot + ( int N, float* X, int incX, float* Y, int incY ) ; +FUNCTION: float cblas_sdot + ( int N, float* X, int incX, float* Y, int incY ) ; +FUNCTION: double cblas_ddot + ( int N, double* X, int incX, double* Y, int incY ) ; + +FUNCTION: void cblas_cdotu_sub + ( int N, CBLAS_C* X, int incX, CBLAS_C* Y, int incY, CBLAS_C* dotu ) ; +FUNCTION: void cblas_cdotc_sub + ( int N, CBLAS_C* X, int incX, CBLAS_C* Y, int incY, CBLAS_C* dotc ) ; + +FUNCTION: void cblas_zdotu_sub + ( int N, CBLAS_Z* X, int incX, CBLAS_Z* Y, int incY, CBLAS_Z* dotu ) ; +FUNCTION: void cblas_zdotc_sub + ( int N, CBLAS_Z* X, int incX, CBLAS_Z* Y, int incY, CBLAS_Z* dotc ) ; + +FUNCTION: float cblas_snrm2 + ( int N, float* X, int incX ) ; +FUNCTION: float cblas_sasum + ( int N, float* X, int incX ) ; + +FUNCTION: double cblas_dnrm2 + ( int N, double* X, int incX ) ; +FUNCTION: double cblas_dasum + ( int N, double* X, int incX ) ; + +FUNCTION: float cblas_scnrm2 + ( int N, CBLAS_C* X, int incX ) ; +FUNCTION: float cblas_scasum + ( int N, CBLAS_C* X, int incX ) ; + +FUNCTION: double cblas_dznrm2 + ( int N, CBLAS_Z* X, int incX ) ; +FUNCTION: double cblas_dzasum + ( int N, CBLAS_Z* X, int incX ) ; + +FUNCTION: CBLAS_INDEX cblas_isamax + ( int N, float* X, int incX ) ; +FUNCTION: CBLAS_INDEX cblas_idamax + ( int N, double* X, int incX ) ; +FUNCTION: CBLAS_INDEX cblas_icamax + ( int N, CBLAS_C* X, int incX ) ; +FUNCTION: CBLAS_INDEX cblas_izamax + ( int N, CBLAS_Z* X, int incX ) ; + +FUNCTION: void cblas_sswap + ( int N, float* X, int incX, float* Y, int incY ) ; +FUNCTION: void cblas_scopy + ( int N, float* X, int incX, float* Y, int incY ) ; +FUNCTION: void cblas_saxpy + ( int N, float alpha, float* X, int incX, float* Y, int incY ) ; + +FUNCTION: void cblas_dswap + ( int N, double* X, int incX, double* Y, int incY ) ; +FUNCTION: void cblas_dcopy + ( int N, double* X, int incX, double* Y, int incY ) ; +FUNCTION: void cblas_daxpy + ( int N, double alpha, double* X, int incX, double* Y, int incY ) ; + +FUNCTION: void cblas_cswap + ( int N, CBLAS_C* X, int incX, CBLAS_C* Y, int incY ) ; +FUNCTION: void cblas_ccopy + ( int N, CBLAS_C* X, int incX, CBLAS_C* Y, int incY ) ; +FUNCTION: void cblas_caxpy + ( int N, CBLAS_C* alpha, CBLAS_C* X, int incX, CBLAS_C* Y, int incY ) ; + +FUNCTION: void cblas_zswap + ( int N, CBLAS_Z* X, int incX, CBLAS_Z* Y, int incY ) ; +FUNCTION: void cblas_zcopy + ( int N, CBLAS_Z* X, int incX, CBLAS_Z* Y, int incY ) ; +FUNCTION: void cblas_zaxpy + ( int N, CBLAS_Z* alpha, CBLAS_Z* X, int incX, CBLAS_Z* Y, int incY ) ; + +FUNCTION: void cblas_sscal + ( int N, float alpha, float* X, int incX ) ; +FUNCTION: void cblas_dscal + ( int N, double alpha, double* X, int incX ) ; +FUNCTION: void cblas_cscal + ( int N, CBLAS_C* alpha, CBLAS_C* X, int incX ) ; +FUNCTION: void cblas_zscal + ( int N, CBLAS_Z* alpha, CBLAS_Z* X, int incX ) ; +FUNCTION: void cblas_csscal + ( int N, float alpha, CBLAS_C* X, int incX ) ; +FUNCTION: void cblas_zdscal + ( int N, double alpha, CBLAS_Z* X, int incX ) ; + +FUNCTION: void cblas_srotg + ( float* a, float* b, float* c, float* s ) ; +FUNCTION: void cblas_srotmg + ( float* d1, float* d2, float* b1, float b2, float* P ) ; +FUNCTION: void cblas_srot + ( int N, float* X, int incX, float* Y, int incY, float c, float s ) ; +FUNCTION: void cblas_srotm + ( int N, float* X, int incX, float* Y, int incY, float* P ) ; + +FUNCTION: void cblas_drotg + ( double* a, double* b, double* c, double* s ) ; +FUNCTION: void cblas_drotmg + ( double* d1, double* d2, double* b1, double b2, double* P ) ; +FUNCTION: void cblas_drot + ( int N, double* X, int incX, double* Y, int incY, double c, double s ) ; +FUNCTION: void cblas_drotm + ( int N, double* X, int incX, double* Y, int incY, double* P ) ; + +! Level 2 BLAS (matrix-vector) + +FUNCTION: void cblas_sgemv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + float alpha, float* A, int lda, + float* X, int incX, float beta, + float* Y, int incY ) ; +FUNCTION: void cblas_sgbmv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + int KL, int KU, float alpha, + float* A, int lda, float* X, + int incX, float beta, float* Y, int incY ) ; +FUNCTION: void cblas_strmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, float* A, int lda, + float* X, int incX ) ; +FUNCTION: void cblas_stbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, float* A, int lda, + float* X, int incX ) ; +FUNCTION: void cblas_stpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, float* Ap, float* X, int incX ) ; +FUNCTION: void cblas_strsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, float* A, int lda, float* X, + int incX ) ; +FUNCTION: void cblas_stbsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, float* A, int lda, + float* X, int incX ) ; +FUNCTION: void cblas_stpsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, float* Ap, float* X, int incX ) ; + +FUNCTION: void cblas_dgemv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + double alpha, double* A, int lda, + double* X, int incX, double beta, + double* Y, int incY ) ; +FUNCTION: void cblas_dgbmv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + int KL, int KU, double alpha, + double* A, int lda, double* X, + int incX, double beta, double* Y, int incY ) ; +FUNCTION: void cblas_dtrmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, double* A, int lda, + double* X, int incX ) ; +FUNCTION: void cblas_dtbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, double* A, int lda, + double* X, int incX ) ; +FUNCTION: void cblas_dtpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, double* Ap, double* X, int incX ) ; +FUNCTION: void cblas_dtrsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, double* A, int lda, double* X, + int incX ) ; +FUNCTION: void cblas_dtbsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, double* A, int lda, + double* X, int incX ) ; +FUNCTION: void cblas_dtpsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, double* Ap, double* X, int incX ) ; + +FUNCTION: void cblas_cgemv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + void* alpha, void* A, int lda, + void* X, int incX, void* beta, + void* Y, int incY ) ; +FUNCTION: void cblas_cgbmv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + int KL, int KU, void* alpha, + void* A, int lda, void* X, + int incX, void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_ctrmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ctbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ctpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* Ap, void* X, int incX ) ; +FUNCTION: void cblas_ctrsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* A, int lda, void* X, + int incX ) ; +FUNCTION: void cblas_ctbsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ctpsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* Ap, void* X, int incX ) ; + +FUNCTION: void cblas_zgemv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + void* alpha, void* A, int lda, + void* X, int incX, void* beta, + void* Y, int incY ) ; +FUNCTION: void cblas_zgbmv ( CBLAS_ORDER Order, + CBLAS_TRANSPOSE TransA, int M, int N, + int KL, int KU, void* alpha, + void* A, int lda, void* X, + int incX, void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_ztrmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ztbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ztpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* Ap, void* X, int incX ) ; +FUNCTION: void cblas_ztrsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* A, int lda, void* X, + int incX ) ; +FUNCTION: void cblas_ztbsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, int K, void* A, int lda, + void* X, int incX ) ; +FUNCTION: void cblas_ztpsv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag, + int N, void* Ap, void* X, int incX ) ; + + +FUNCTION: void cblas_ssymv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* A, + int lda, float* X, int incX, + float beta, float* Y, int incY ) ; +FUNCTION: void cblas_ssbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, int K, float alpha, float* A, + int lda, float* X, int incX, + float beta, float* Y, int incY ) ; +FUNCTION: void cblas_sspmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* Ap, + float* X, int incX, + float beta, float* Y, int incY ) ; +FUNCTION: void cblas_sger ( CBLAS_ORDER Order, int M, int N, + float alpha, float* X, int incX, + float* Y, int incY, float* A, int lda ) ; +FUNCTION: void cblas_ssyr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* X, + int incX, float* A, int lda ) ; +FUNCTION: void cblas_sspr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* X, + int incX, float* Ap ) ; +FUNCTION: void cblas_ssyr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* X, + int incX, float* Y, int incY, float* A, + int lda ) ; +FUNCTION: void cblas_sspr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, float* X, + int incX, float* Y, int incY, float* A ) ; + +FUNCTION: void cblas_dsymv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* A, + int lda, double* X, int incX, + double beta, double* Y, int incY ) ; +FUNCTION: void cblas_dsbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, int K, double alpha, double* A, + int lda, double* X, int incX, + double beta, double* Y, int incY ) ; +FUNCTION: void cblas_dspmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* Ap, + double* X, int incX, + double beta, double* Y, int incY ) ; +FUNCTION: void cblas_dger ( CBLAS_ORDER Order, int M, int N, + double alpha, double* X, int incX, + double* Y, int incY, double* A, int lda ) ; +FUNCTION: void cblas_dsyr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* X, + int incX, double* A, int lda ) ; +FUNCTION: void cblas_dspr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* X, + int incX, double* Ap ) ; +FUNCTION: void cblas_dsyr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* X, + int incX, double* Y, int incY, double* A, + int lda ) ; +FUNCTION: void cblas_dspr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, double* X, + int incX, double* Y, int incY, double* A ) ; + + +FUNCTION: void cblas_chemv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, void* alpha, void* A, + int lda, void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_chbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, int K, void* alpha, void* A, + int lda, void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_chpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, void* alpha, void* Ap, + void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_cgeru ( CBLAS_ORDER Order, int M, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_cgerc ( CBLAS_ORDER Order, int M, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_cher ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, void* X, int incX, + void* A, int lda ) ; +FUNCTION: void cblas_chpr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, float alpha, void* X, + int incX, void* A ) ; +FUNCTION: void cblas_cher2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_chpr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* Ap ) ; + +FUNCTION: void cblas_zhemv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, void* alpha, void* A, + int lda, void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_zhbmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, int K, void* alpha, void* A, + int lda, void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_zhpmv ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, void* alpha, void* Ap, + void* X, int incX, + void* beta, void* Y, int incY ) ; +FUNCTION: void cblas_zgeru ( CBLAS_ORDER Order, int M, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_zgerc ( CBLAS_ORDER Order, int M, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_zher ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, void* X, int incX, + void* A, int lda ) ; +FUNCTION: void cblas_zhpr ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + int N, double alpha, void* X, + int incX, void* A ) ; +FUNCTION: void cblas_zher2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* A, int lda ) ; +FUNCTION: void cblas_zhpr2 ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, int N, + void* alpha, void* X, int incX, + void* Y, int incY, void* Ap ) ; + +! Level 3 BLAS (matrix-matrix) + +FUNCTION: void cblas_sgemm ( CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, int M, int N, + int K, float alpha, float* A, + int lda, float* B, int ldb, + float beta, float* C, int ldc ) ; +FUNCTION: void cblas_ssymm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + float alpha, float* A, int lda, + float* B, int ldb, float beta, + float* C, int ldc ) ; +FUNCTION: void cblas_ssyrk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + float alpha, float* A, int lda, + float beta, float* C, int ldc ) ; +FUNCTION: void cblas_ssyr2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + float alpha, float* A, int lda, + float* B, int ldb, float beta, + float* C, int ldc ) ; +FUNCTION: void cblas_strmm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + float alpha, float* A, int lda, + float* B, int ldb ) ; +FUNCTION: void cblas_strsm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + float alpha, float* A, int lda, + float* B, int ldb ) ; + +FUNCTION: void cblas_dgemm ( CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, int M, int N, + int K, double alpha, double* A, + int lda, double* B, int ldb, + double beta, double* C, int ldc ) ; +FUNCTION: void cblas_dsymm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + double alpha, double* A, int lda, + double* B, int ldb, double beta, + double* C, int ldc ) ; +FUNCTION: void cblas_dsyrk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + double alpha, double* A, int lda, + double beta, double* C, int ldc ) ; +FUNCTION: void cblas_dsyr2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + double alpha, double* A, int lda, + double* B, int ldb, double beta, + double* C, int ldc ) ; +FUNCTION: void cblas_dtrmm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + double alpha, double* A, int lda, + double* B, int ldb ) ; +FUNCTION: void cblas_dtrsm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + double alpha, double* A, int lda, + double* B, int ldb ) ; + +FUNCTION: void cblas_cgemm ( CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, int M, int N, + int K, void* alpha, void* A, + int lda, void* B, int ldb, + void* beta, void* C, int ldc ) ; +FUNCTION: void cblas_csymm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_csyrk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* beta, void* C, int ldc ) ; +FUNCTION: void cblas_csyr2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_ctrmm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb ) ; +FUNCTION: void cblas_ctrsm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb ) ; + +FUNCTION: void cblas_zgemm ( CBLAS_ORDER Order, CBLAS_TRANSPOSE TransA, + CBLAS_TRANSPOSE TransB, int M, int N, + int K, void* alpha, void* A, + int lda, void* B, int ldb, + void* beta, void* C, int ldc ) ; +FUNCTION: void cblas_zsymm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_zsyrk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* beta, void* C, int ldc ) ; +FUNCTION: void cblas_zsyr2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_ztrmm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb ) ; +FUNCTION: void cblas_ztrsm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA, + CBLAS_DIAG Diag, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb ) ; + +FUNCTION: void cblas_chemm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_cherk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + float alpha, void* A, int lda, + float beta, void* C, int ldc ) ; +FUNCTION: void cblas_cher2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* B, int ldb, float beta, + void* C, int ldc ) ; +FUNCTION: void cblas_zhemm ( CBLAS_ORDER Order, CBLAS_SIDE Side, + CBLAS_UPLO Uplo, int M, int N, + void* alpha, void* A, int lda, + void* B, int ldb, void* beta, + void* C, int ldc ) ; +FUNCTION: void cblas_zherk ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + double alpha, void* A, int lda, + double beta, void* C, int ldc ) ; +FUNCTION: void cblas_zher2k ( CBLAS_ORDER Order, CBLAS_UPLO Uplo, + CBLAS_TRANSPOSE Trans, int N, int K, + void* alpha, void* A, int lda, + void* B, int ldb, double beta, + void* C, int ldc ) ; + diff --cc basis/math/blas/matrices/matrices.factor index 4f50543e73,0000000000..0899e2d079 mode 100755,000000..100755 --- a/basis/math/blas/matrices/matrices.factor +++ b/basis/math/blas/matrices/matrices.factor @@@ -1,310 -1,0 +1,310 @@@ +USING: accessors alien alien.c-types arrays byte-arrays combinators +combinators.lib combinators.short-circuit fry kernel locals macros +math math.blas.cblas math.blas.vectors math.blas.vectors.private +math.complex math.functions math.order multi-methods qualified +sequences sequences.merged sequences.private generalizations - shuffle symbols ; ++shuffle symbols speicalized-arrays.float specialized-arrays.double ; +QUALIFIED: syntax +IN: math.blas.matrices + +TUPLE: blas-matrix-base data ld rows cols transpose ; +TUPLE: float-blas-matrix < blas-matrix-base ; +TUPLE: double-blas-matrix < blas-matrix-base ; +TUPLE: float-complex-blas-matrix < blas-matrix-base ; +TUPLE: double-complex-blas-matrix < blas-matrix-base ; + +C: float-blas-matrix +C: double-blas-matrix +C: float-complex-blas-matrix +C: double-complex-blas-matrix + +METHOD: element-type { float-blas-matrix } + drop "float" ; +METHOD: element-type { double-blas-matrix } + drop "double" ; +METHOD: element-type { float-complex-blas-matrix } + drop "CBLAS_C" ; +METHOD: element-type { double-complex-blas-matrix } + drop "CBLAS_Z" ; + +: Mtransposed? ( matrix -- ? ) + transpose>> ; inline +: Mwidth ( matrix -- width ) + dup Mtransposed? [ rows>> ] [ cols>> ] if ; inline +: Mheight ( matrix -- height ) + dup Mtransposed? [ cols>> ] [ rows>> ] if ; inline + +> [ CblasTrans ] [ CblasNoTrans ] if ; + +GENERIC: (blas-matrix-like) ( data ld rows cols transpose exemplar -- matrix ) + +METHOD: (blas-matrix-like) { object object object object object float-blas-matrix } + drop ; +METHOD: (blas-matrix-like) { object object object object object double-blas-matrix } + drop ; +METHOD: (blas-matrix-like) { object object object object object float-complex-blas-matrix } + drop ; +METHOD: (blas-matrix-like) { object object object object object double-complex-blas-matrix } + drop ; + +METHOD: (blas-matrix-like) { object object object object object float-blas-vector } + drop ; +METHOD: (blas-matrix-like) { object object object object object double-blas-vector } + drop ; +METHOD: (blas-matrix-like) { object object object object object float-complex-blas-vector } + drop ; +METHOD: (blas-matrix-like) { object object object object object double-complex-blas-vector } + drop ; + +METHOD: (blas-vector-like) { object object object float-blas-matrix } + drop ; +METHOD: (blas-vector-like) { object object object double-blas-matrix } + drop ; +METHOD: (blas-vector-like) { object object object float-complex-blas-matrix } + drop ; +METHOD: (blas-vector-like) { object object object double-complex-blas-matrix } + drop ; + +: (validate-gemv) ( A x y -- ) + { + [ drop [ Mwidth ] [ length>> ] bi* = ] + [ nip [ Mheight ] [ length>> ] bi* = ] + } 3&& + [ "Mismatched matrix and vectors in matrix-vector multiplication" throw ] unless ; + +:: (prepare-gemv) ( alpha A x beta y >c-arg -- order A-trans m n alpha A-data A-ld x-data x-inc beta y-data y-inc y ) + A x y (validate-gemv) + CblasColMajor + A (blas-transpose) + A rows>> + A cols>> + alpha >c-arg call + A data>> + A ld>> + x data>> + x inc>> + beta >c-arg call + y data>> + y inc>> + y ; inline + +: (validate-ger) ( x y A -- ) + { + [ nip [ length>> ] [ Mheight ] bi* = ] + [ nipd [ length>> ] [ Mwidth ] bi* = ] + } 3&& + [ "Mismatched vertices and matrix in vector outer product" throw ] unless ; + +:: (prepare-ger) ( alpha x y A >c-arg -- order m n alpha x-data x-inc y-data y-inc A-data A-ld A ) + x y A (validate-ger) + CblasColMajor + A rows>> + A cols>> + alpha >c-arg call + x data>> + x inc>> + y data>> + y inc>> + A data>> + A ld>> + A f >>transpose ; inline + +: (validate-gemm) ( A B C -- ) + { + [ drop [ Mwidth ] [ Mheight ] bi* = ] + [ nip [ Mheight ] bi@ = ] + [ nipd [ Mwidth ] bi@ = ] + } 3&& [ "Mismatched matrices in matrix multiplication" throw ] unless ; + +:: (prepare-gemm) ( alpha A B beta C >c-arg -- order A-trans B-trans m n k alpha A-data A-ld B-data B-ld beta C-data C-ld C ) + A B C (validate-gemm) + CblasColMajor + A (blas-transpose) + B (blas-transpose) + C rows>> + C cols>> + A Mwidth + alpha >c-arg call + A data>> + A ld>> + B data>> + B ld>> + beta >c-arg call + C data>> + C ld>> + C f >>transpose ; inline + +: (>matrix) ( arrays >c-array -- c-array ld rows cols transpose ) + '[ @ ] [ length dup ] [ first length ] tri f ; inline + +PRIVATE> + +: >float-blas-matrix ( arrays -- matrix ) - [ >c-float-array ] (>matrix) ; ++ [ >float-array underlying>> ] (>matrix) ; +: >double-blas-matrix ( arrays -- matrix ) - [ >c-double-array ] (>matrix) ; ++ [ >double-array underlying>> ] (>matrix) ; +: >float-complex-blas-matrix ( arrays -- matrix ) - [ (flatten-complex-sequence) >c-float-array ] (>matrix) ++ [ (flatten-complex-sequence) >float-array underlying>> ] (>matrix) + ; +: >double-complex-blas-matrix ( arrays -- matrix ) - [ (flatten-complex-sequence) >c-double-array ] (>matrix) ++ [ (flatten-complex-sequence) >double-array underlying>> ] (>matrix) + ; + +GENERIC: n*M.V+n*V! ( alpha A x beta y -- y=alpha*A.x+b*y ) +GENERIC: n*V(*)V+M! ( alpha x y A -- A=alpha*x(*)y+A ) +GENERIC: n*V(*)Vconj+M! ( alpha x y A -- A=alpha*x(*)yconj+A ) +GENERIC: n*M.M+n*M! ( alpha A B beta C -- C=alpha*A.B+beta*C ) + +METHOD: n*M.V+n*V! { real float-blas-matrix float-blas-vector real float-blas-vector } + [ ] (prepare-gemv) [ cblas_sgemv ] dip ; +METHOD: n*M.V+n*V! { real double-blas-matrix double-blas-vector real double-blas-vector } + [ ] (prepare-gemv) [ cblas_dgemv ] dip ; +METHOD: n*M.V+n*V! { number float-complex-blas-matrix float-complex-blas-vector number float-complex-blas-vector } + [ (>c-complex) ] (prepare-gemv) [ cblas_cgemv ] dip ; +METHOD: n*M.V+n*V! { number double-complex-blas-matrix double-complex-blas-vector number double-complex-blas-vector } + [ (>z-complex) ] (prepare-gemv) [ cblas_zgemv ] dip ; + +METHOD: n*V(*)V+M! { real float-blas-vector float-blas-vector float-blas-matrix } + [ ] (prepare-ger) [ cblas_sger ] dip ; +METHOD: n*V(*)V+M! { real double-blas-vector double-blas-vector double-blas-matrix } + [ ] (prepare-ger) [ cblas_dger ] dip ; +METHOD: n*V(*)V+M! { number float-complex-blas-vector float-complex-blas-vector float-complex-blas-matrix } + [ (>c-complex) ] (prepare-ger) [ cblas_cgeru ] dip ; +METHOD: n*V(*)V+M! { number double-complex-blas-vector double-complex-blas-vector double-complex-blas-matrix } + [ (>z-complex) ] (prepare-ger) [ cblas_zgeru ] dip ; + +METHOD: n*V(*)Vconj+M! { real float-blas-vector float-blas-vector float-blas-matrix } + [ ] (prepare-ger) [ cblas_sger ] dip ; +METHOD: n*V(*)Vconj+M! { real double-blas-vector double-blas-vector double-blas-matrix } + [ ] (prepare-ger) [ cblas_dger ] dip ; +METHOD: n*V(*)Vconj+M! { number float-complex-blas-vector float-complex-blas-vector float-complex-blas-matrix } + [ (>c-complex) ] (prepare-ger) [ cblas_cgerc ] dip ; +METHOD: n*V(*)Vconj+M! { number double-complex-blas-vector double-complex-blas-vector double-complex-blas-matrix } + [ (>z-complex) ] (prepare-ger) [ cblas_zgerc ] dip ; + +METHOD: n*M.M+n*M! { real float-blas-matrix float-blas-matrix real float-blas-matrix } + [ ] (prepare-gemm) [ cblas_sgemm ] dip ; +METHOD: n*M.M+n*M! { real double-blas-matrix double-blas-matrix real double-blas-matrix } + [ ] (prepare-gemm) [ cblas_dgemm ] dip ; +METHOD: n*M.M+n*M! { number float-complex-blas-matrix float-complex-blas-matrix number float-complex-blas-matrix } + [ (>c-complex) ] (prepare-gemm) [ cblas_cgemm ] dip ; +METHOD: n*M.M+n*M! { number double-complex-blas-matrix double-complex-blas-matrix number double-complex-blas-matrix } + [ (>z-complex) ] (prepare-gemm) [ cblas_zgemm ] dip ; + +! XXX should do a dense clone +syntax:M: blas-matrix-base clone + [ + [ + { [ data>> ] [ ld>> ] [ cols>> ] [ element-type heap-size ] } cleave + * * memory>byte-array + ] [ { [ ld>> ] [ rows>> ] [ cols>> ] [ transpose>> ] } cleave ] bi + ] keep (blas-matrix-like) ; + +! XXX try rounding stride to next 128 bit bound for better vectorizin' +: ( rows cols exemplar -- matrix ) + [ element-type [ * ] dip ] + [ 2drop ] + [ f swap (blas-matrix-like) ] 3tri ; + +: n*M.V+n*V ( alpha A x beta y -- alpha*A.x+b*y ) + clone n*M.V+n*V! ; +: n*V(*)V+M ( alpha x y A -- alpha*x(*)y+A ) + clone n*V(*)V+M! ; +: n*V(*)Vconj+M ( alpha x y A -- alpha*x(*)yconj+A ) + clone n*V(*)Vconj+M! ; +: n*M.M+n*M ( alpha A B beta C -- alpha*A.B+beta*C ) + clone n*M.M+n*M! ; + +: n*M.V ( alpha A x -- alpha*A.x ) + 1.0 2over [ Mheight ] dip + n*M.V+n*V! ; inline + +: M.V ( A x -- A.x ) + 1.0 -rot n*M.V ; inline + +: n*V(*)V ( alpha x y -- alpha*x(*)y ) + 2dup [ length>> ] bi@ pick + n*V(*)V+M! ; +: n*V(*)Vconj ( alpha x y -- alpha*x(*)yconj ) + 2dup [ length>> ] bi@ pick + n*V(*)Vconj+M! ; + +: V(*) ( x y -- x(*)y ) + 1.0 -rot n*V(*)V ; inline +: V(*)conj ( x y -- x(*)yconj ) + 1.0 -rot n*V(*)Vconj ; inline + +: n*M.M ( alpha A B -- alpha*A.B ) + 2dup [ Mheight ] [ Mwidth ] bi* pick + 1.0 swap n*M.M+n*M! ; + +: M. ( A B -- A.B ) + 1.0 -rot n*M.M ; inline + +:: (Msub) ( matrix row col height width -- data ld rows cols ) + matrix ld>> col * row + matrix element-type heap-size * + matrix data>> + matrix ld>> + height + width ; + +: Msub ( matrix row col height width -- sub ) + 5 npick dup transpose>> + [ nip [ [ swap ] 2dip swap ] when (Msub) ] 2keep + swap (blas-matrix-like) ; + +TUPLE: blas-matrix-rowcol-sequence parent inc rowcol-length rowcol-jump length ; +C: blas-matrix-rowcol-sequence + +INSTANCE: blas-matrix-rowcol-sequence sequence + +syntax:M: blas-matrix-rowcol-sequence length + length>> ; +syntax:M: blas-matrix-rowcol-sequence nth-unsafe + { + [ + [ rowcol-jump>> ] + [ parent>> element-type heap-size ] + [ parent>> data>> ] tri + [ * * ] dip + ] + [ rowcol-length>> ] + [ inc>> ] + [ parent>> ] + } cleave (blas-vector-like) ; + +: (Mcols) ( A -- columns ) + { [ ] [ drop 1 ] [ rows>> ] [ ld>> ] [ cols>> ] } cleave + ; +: (Mrows) ( A -- rows ) + { [ ] [ ld>> ] [ cols>> ] [ drop 1 ] [ rows>> ] } cleave + ; + +: Mrows ( A -- rows ) + dup transpose>> [ (Mcols) ] [ (Mrows) ] if ; +: Mcols ( A -- cols ) + dup transpose>> [ (Mrows) ] [ (Mcols) ] if ; + +: n*M! ( n A -- A=n*A ) + [ (Mcols) [ n*V! drop ] with each ] keep ; + +: n*M ( n A -- n*A ) + clone n*M! ; inline + +: M*n ( A n -- A*n ) + swap n*M ; inline +: M/n ( A n -- A/n ) + recip swap n*M ; inline + +: Mtranspose ( matrix -- matrix^T ) + [ { [ data>> ] [ ld>> ] [ rows>> ] [ cols>> ] [ transpose>> not ] } cleave ] keep (blas-matrix-like) ; + +syntax:M: blas-matrix-base equal? + { + [ [ Mwidth ] bi@ = ] + [ [ Mcols ] bi@ [ = ] 2all? ] + } 2&& ; + diff --cc basis/math/blas/vectors/vectors.factor index a135f08f28,0000000000..f29ef30ab7 mode 100755,000000..100755 --- a/basis/math/blas/vectors/vectors.factor +++ b/basis/math/blas/vectors/vectors.factor @@@ -1,301 -1,0 +1,303 @@@ +USING: accessors alien alien.c-types arrays byte-arrays combinators +combinators.short-circuit fry kernel macros math math.blas.cblas +math.complex math.functions math.order multi-methods qualified - sequences sequences.private generalizations ; ++sequences sequences.private generalizations ++specialized-arrays.float specialized-arrays.double ++specialized-arrays.direct.float specialized-arrays.direct.double ; +QUALIFIED: syntax +IN: math.blas.vectors + +TUPLE: blas-vector-base data length inc ; +TUPLE: float-blas-vector < blas-vector-base ; +TUPLE: double-blas-vector < blas-vector-base ; +TUPLE: float-complex-blas-vector < blas-vector-base ; +TUPLE: double-complex-blas-vector < blas-vector-base ; + +INSTANCE: float-blas-vector sequence +INSTANCE: double-blas-vector sequence +INSTANCE: float-complex-blas-vector sequence +INSTANCE: double-complex-blas-vector sequence + +C: float-blas-vector +C: double-blas-vector +C: float-complex-blas-vector +C: double-complex-blas-vector + +GENERIC: n*V+V! ( alpha x y -- y=alpha*x+y ) +GENERIC: n*V! ( alpha x -- x=alpha*x ) + +GENERIC: V. ( x y -- x.y ) +GENERIC: V.conj ( x y -- xconj.y ) +GENERIC: Vnorm ( x -- norm ) +GENERIC: Vasum ( x -- sum ) +GENERIC: Vswap ( x y -- x=y y=x ) + +GENERIC: Viamax ( x -- max-i ) + +GENERIC: element-type ( v -- type ) + +METHOD: element-type { float-blas-vector } + drop "float" ; +METHOD: element-type { double-blas-vector } + drop "double" ; +METHOD: element-type { float-complex-blas-vector } + drop "CBLAS_C" ; +METHOD: element-type { double-complex-blas-vector } + drop "CBLAS_Z" ; + + ; +METHOD: (blas-vector-like) { object object object double-blas-vector } + drop ; +METHOD: (blas-vector-like) { object object object float-complex-blas-vector } + drop ; +METHOD: (blas-vector-like) { object object object double-complex-blas-vector } + drop ; + +: (prepare-copy) ( v element-size -- length v-data v-inc v-dest-data v-dest-inc ) + [ [ length>> ] [ data>> ] [ inc>> ] tri ] dip + 4 npick * + 1 ; + +MACRO: (do-copy) ( copy make-vector -- ) + '[ over 6 npick _ 2dip 1 @ ] ; + +: (prepare-swap) ( v1 v2 -- length v1-data v1-inc v2-data v2-inc v1 v2 ) + [ + [ [ length>> ] bi@ min ] + [ [ [ data>> ] [ inc>> ] bi ] bi@ ] 2bi + ] 2keep ; + +: (prepare-axpy) ( n v1 v2 -- length n v1-data v1-inc v2-data v2-inc v2 ) + [ + [ [ length>> ] bi@ min swap ] + [ [ [ data>> ] [ inc>> ] bi ] bi@ ] 2bi + ] keep ; + +: (prepare-scal) ( n v -- length n v-data v-inc v ) + [ [ length>> swap ] [ data>> ] [ inc>> ] tri ] keep ; + +: (prepare-dot) ( v1 v2 -- length v1-data v1-inc v2-data v2-inc ) + [ [ length>> ] bi@ min ] + [ [ [ data>> ] [ inc>> ] bi ] bi@ ] 2bi ; + +: (prepare-nrm2) ( v -- length v1-data v1-inc ) + [ length>> ] [ data>> ] [ inc>> ] tri ; + +: (flatten-complex-sequence) ( seq -- seq' ) + [ [ real-part ] [ imaginary-part ] bi 2array ] map concat ; + +: (>c-complex) ( complex -- alien ) - [ real-part ] [ imaginary-part ] bi 2array >c-float-array ; ++ [ real-part ] [ imaginary-part ] bi float-array{ } 2sequence underlying>> ; +: (>z-complex) ( complex -- alien ) - [ real-part ] [ imaginary-part ] bi 2array >c-double-array ; ++ [ real-part ] [ imaginary-part ] bi double-array{ } 2sequence underlying>> ; + +: (c-complex>) ( alien -- complex ) - 2 c-float-array> first2 rect> ; ++ 2 first2 rect> ; +: (z-complex>) ( alien -- complex ) - 2 c-double-array> first2 rect> ; ++ 2 first2 rect> ; + +: (prepare-nth) ( n v -- n*inc v-data ) + [ inc>> ] [ data>> ] bi [ * ] dip ; + +MACRO: (complex-nth) ( nth-quot -- ) + '[ + [ 2 * dup 1+ ] dip + _ curry bi@ rect> + ] ; + +: (c-complex-nth) ( n alien -- complex ) + [ float-nth ] (complex-nth) ; +: (z-complex-nth) ( n alien -- complex ) + [ double-nth ] (complex-nth) ; + +MACRO: (set-complex-nth) ( set-nth-quot -- ) + '[ + [ + [ [ real-part ] [ imaginary-part ] bi ] + [ 2 * dup 1+ ] bi* + swapd + ] dip + _ curry 2bi@ + ] ; + +: (set-c-complex-nth) ( complex n alien -- ) + [ set-float-nth ] (set-complex-nth) ; +: (set-z-complex-nth) ( complex n alien -- ) + [ set-double-nth ] (set-complex-nth) ; + +PRIVATE> + +: ( exemplar -- zero ) + [ element-type ] + [ length>> 0 ] + [ (blas-vector-like) ] tri ; + +: ( length exemplar -- vector ) + [ element-type ] + [ 1 swap ] 2bi + (blas-vector-like) ; + +syntax:M: blas-vector-base length + length>> ; + +syntax:M: float-blas-vector nth-unsafe + (prepare-nth) float-nth ; +syntax:M: float-blas-vector set-nth-unsafe + (prepare-nth) set-float-nth ; + +syntax:M: double-blas-vector nth-unsafe + (prepare-nth) double-nth ; +syntax:M: double-blas-vector set-nth-unsafe + (prepare-nth) set-double-nth ; + +syntax:M: float-complex-blas-vector nth-unsafe + (prepare-nth) (c-complex-nth) ; +syntax:M: float-complex-blas-vector set-nth-unsafe + (prepare-nth) (set-c-complex-nth) ; + +syntax:M: double-complex-blas-vector nth-unsafe + (prepare-nth) (z-complex-nth) ; +syntax:M: double-complex-blas-vector set-nth-unsafe + (prepare-nth) (set-z-complex-nth) ; + +syntax:M: blas-vector-base equal? + { + [ [ length ] bi@ = ] + [ [ = ] 2all? ] + } 2&& ; + +: >float-blas-vector ( seq -- v ) - [ >c-float-array ] [ length ] bi 1 ; ++ [ >float-array underlying>> ] [ length ] bi 1 ; +: >double-blas-vector ( seq -- v ) - [ >c-double-array ] [ length ] bi 1 ; ++ [ >double-array underlying>> ] [ length ] bi 1 ; +: >float-complex-blas-vector ( seq -- v ) - [ (flatten-complex-sequence) >c-float-array ] [ length ] bi ++ [ (flatten-complex-sequence) >float-array underlying>> ] [ length ] bi + 1 ; +: >double-complex-blas-vector ( seq -- v ) - [ (flatten-complex-sequence) >c-double-array ] [ length ] bi ++ [ (flatten-complex-sequence) >double-array underlying>> ] [ length ] bi + 1 ; + +syntax:M: float-blas-vector clone + "float" heap-size (prepare-copy) + [ cblas_scopy ] [ ] (do-copy) ; +syntax:M: double-blas-vector clone + "double" heap-size (prepare-copy) + [ cblas_dcopy ] [ ] (do-copy) ; +syntax:M: float-complex-blas-vector clone + "CBLAS_C" heap-size (prepare-copy) + [ cblas_ccopy ] [ ] (do-copy) ; +syntax:M: double-complex-blas-vector clone + "CBLAS_Z" heap-size (prepare-copy) + [ cblas_zcopy ] [ ] (do-copy) ; + +METHOD: Vswap { float-blas-vector float-blas-vector } + (prepare-swap) [ cblas_sswap ] 2dip ; +METHOD: Vswap { double-blas-vector double-blas-vector } + (prepare-swap) [ cblas_dswap ] 2dip ; +METHOD: Vswap { float-complex-blas-vector float-complex-blas-vector } + (prepare-swap) [ cblas_cswap ] 2dip ; +METHOD: Vswap { double-complex-blas-vector double-complex-blas-vector } + (prepare-swap) [ cblas_zswap ] 2dip ; + +METHOD: n*V+V! { real float-blas-vector float-blas-vector } + (prepare-axpy) [ cblas_saxpy ] dip ; +METHOD: n*V+V! { real double-blas-vector double-blas-vector } + (prepare-axpy) [ cblas_daxpy ] dip ; +METHOD: n*V+V! { number float-complex-blas-vector float-complex-blas-vector } + [ (>c-complex) ] 2dip + (prepare-axpy) [ cblas_caxpy ] dip ; +METHOD: n*V+V! { number double-complex-blas-vector double-complex-blas-vector } + [ (>z-complex) ] 2dip + (prepare-axpy) [ cblas_zaxpy ] dip ; + +METHOD: n*V! { real float-blas-vector } + (prepare-scal) [ cblas_sscal ] dip ; +METHOD: n*V! { real double-blas-vector } + (prepare-scal) [ cblas_dscal ] dip ; +METHOD: n*V! { number float-complex-blas-vector } + [ (>c-complex) ] dip + (prepare-scal) [ cblas_cscal ] dip ; +METHOD: n*V! { number double-complex-blas-vector } + [ (>z-complex) ] dip + (prepare-scal) [ cblas_zscal ] dip ; + +: n*V+V ( alpha x y -- alpha*x+y ) clone n*V+V! ; inline +: n*V ( alpha x -- alpha*x ) clone n*V! ; inline + +: V+ ( x y -- x+y ) + 1.0 -rot n*V+V ; inline +: V- ( x y -- x-y ) + -1.0 spin n*V+V ; inline + +: Vneg ( x -- -x ) + -1.0 swap n*V ; inline + +: V*n ( x alpha -- x*alpha ) + swap n*V ; inline +: V/n ( x alpha -- x/alpha ) + recip swap n*V ; inline + +METHOD: V. { float-blas-vector float-blas-vector } + (prepare-dot) cblas_sdot ; +METHOD: V. { double-blas-vector double-blas-vector } + (prepare-dot) cblas_ddot ; +METHOD: V. { float-complex-blas-vector float-complex-blas-vector } + (prepare-dot) + "CBLAS_C" [ cblas_cdotu_sub ] keep (c-complex>) ; +METHOD: V. { double-complex-blas-vector double-complex-blas-vector } + (prepare-dot) + "CBLAS_Z" [ cblas_zdotu_sub ] keep (z-complex>) ; + +METHOD: V.conj { float-blas-vector float-blas-vector } + (prepare-dot) cblas_sdot ; +METHOD: V.conj { double-blas-vector double-blas-vector } + (prepare-dot) cblas_ddot ; +METHOD: V.conj { float-complex-blas-vector float-complex-blas-vector } + (prepare-dot) + "CBLAS_C" [ cblas_cdotc_sub ] keep (c-complex>) ; +METHOD: V.conj { double-complex-blas-vector double-complex-blas-vector } + (prepare-dot) + "CBLAS_Z" [ cblas_zdotc_sub ] keep (z-complex>) ; + +METHOD: Vnorm { float-blas-vector } + (prepare-nrm2) cblas_snrm2 ; +METHOD: Vnorm { double-blas-vector } + (prepare-nrm2) cblas_dnrm2 ; +METHOD: Vnorm { float-complex-blas-vector } + (prepare-nrm2) cblas_scnrm2 ; +METHOD: Vnorm { double-complex-blas-vector } + (prepare-nrm2) cblas_dznrm2 ; + +METHOD: Vasum { float-blas-vector } + (prepare-nrm2) cblas_sasum ; +METHOD: Vasum { double-blas-vector } + (prepare-nrm2) cblas_dasum ; +METHOD: Vasum { float-complex-blas-vector } + (prepare-nrm2) cblas_scasum ; +METHOD: Vasum { double-complex-blas-vector } + (prepare-nrm2) cblas_dzasum ; + +METHOD: Viamax { float-blas-vector } + (prepare-nrm2) cblas_isamax ; +METHOD: Viamax { double-blas-vector } + (prepare-nrm2) cblas_idamax ; +METHOD: Viamax { float-complex-blas-vector } + (prepare-nrm2) cblas_icamax ; +METHOD: Viamax { double-complex-blas-vector } + (prepare-nrm2) cblas_izamax ; + +: Vamax ( x -- max ) + [ Viamax ] keep nth ; inline + +: Vsub ( v start length -- sub ) + rot [ + [ + nip [ inc>> ] [ element-type heap-size ] [ data>> ] tri + [ * * ] dip + ] [ swap 2nip ] [ 2nip inc>> ] 3tri + ] keep (blas-vector-like) ;