/**
 * @file pastix_scores.h
 *
 * PaStiX kernel header.
 *
 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 * @version 6.4.0
 * @author Mathieu Faverge
 * @author Pierre Ramet
 * @author Xavier Lacoste
 * @author Esragul Korkmaz
 * @author Gregoire Pichon
 * @author Tony Delarue
 * @author Alycia Lisito
 * @author Nolan Bredel
 * @date 2024-07-05
 * @generated from /build/pastix/src/pastix-6.4.0/kernels/pastix_zcores.h, normal z -> s, Thu Oct 23 06:51:46 2025
 *
 */
#ifndef _pastix_scores_h_
#define _pastix_scores_h_

#ifndef DOXYGEN_SHOULD_SKIP_THIS
#define pastix_cblk_lock( cblk_ )    pastix_atomic_lock( &((cblk_)->lock) )
#define pastix_cblk_unlock( cblk_ )  pastix_atomic_unlock( &((cblk_)->lock) )
#endif /* DOXYGEN_SHOULD_SKIP_THIS */

/**
 * @addtogroup kernel_blas_lapack
 * @{
 *    This module contains all the BLAS and LAPACK-like kernels that are working
 *    on lapack layout matrices.
 *
 *    @name PastixFloat BLAS kernels
 *    @{
 */
void core_splrnt( int                    m,
                  int                    n,
                  float    *A,
                  int                    lda,
                  int                    gM,
                  int                    m0,
                  int                    n0,
                  unsigned long long int seed );
void core_sgetmo( int                       m,
                  int                       n,
                  const float *A,
                  int                       lda,
                  float       *B,
                  int                       ldb );
int core_sgeadd( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 float        alpha,
                 const float *A,
                 pastix_int_t              LDA,
                 float        beta,
                 float       *B,
                 pastix_int_t              LDB );
int core_sgemdm( pastix_trans_t            transA,
                 pastix_trans_t            transB,
                 int                       M,
                 int                       N,
                 int                       K,
                 float        alpha,
                 const float *A,
                 int                       LDA,
                 const float *B,
                 int                       LDB,
                 float        beta,
                 float       *C,
                 int                       LDC,
                 const float *D,
                 int                       incD,
                 float       *WORK,
                 int                       LWORK );
int core_spqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 full_update,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 float *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 float *tau,
                 float *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_srqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 refine,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 float *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 float *tau,
                 float *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_srqrrt( float              tol,
                 pastix_int_t        maxrank,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 float *A,
                 pastix_int_t        lda,
                 float *tau,
                 float *B,
                 pastix_int_t        ldb,
                 float *tau_b,
                 float *work,
                 pastix_int_t        lwork,
                 float              normA );
int core_stqrcp( float              tol,
                 pastix_int_t        maxrank,
                 int                 unused,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 float *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 float *tau,
                 float *work,
                 pastix_int_t        lwork,
                 float             *rwork );
int core_stradd( pastix_uplo_t             uplo,
                 pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 float        alpha,
                 const float *A,
                 pastix_int_t              LDA,
                 float        beta,
                 float       *B,
                 pastix_int_t              LDB);
int core_sscalo( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 const float *A,
                 pastix_int_t              lda,
                 const float *D,
                 pastix_int_t              ldd,
                 float       *B,
                 pastix_int_t              ldb );

/**
 *    @}
 *    @name PastixFloat Othogonalization kernels for low-rank updates
 *    @{
 */
pastix_fixdbl_t core_slrorthu_fullqr( pastix_int_t        M,
                                      pastix_int_t        N,
                                      pastix_int_t        rank,
                                      float *U,
                                      pastix_int_t        ldu,
                                      float *V,
                                      pastix_int_t        ldv );
pastix_fixdbl_t core_slrorthu_partialqr( pastix_int_t        M,
                                         pastix_int_t        N,
                                         pastix_int_t        r1,
                                         pastix_int_t       *r2ptr,
                                         pastix_int_t        offx,
                                         pastix_int_t        offy,
                                         float *U,
                                         pastix_int_t        ldu,
                                         float *V,
                                         pastix_int_t        ldv );
pastix_fixdbl_t core_slrorthu_cgs( pastix_int_t        M1,
                                   pastix_int_t        N1,
                                   pastix_int_t        M2,
                                   pastix_int_t        N2,
                                   pastix_int_t        r1,
                                   pastix_int_t       *r2ptr,
                                   pastix_int_t        offx,
                                   pastix_int_t        offy,
                                   float *U,
                                   pastix_int_t        ldu,
                                   float *V,
                                   pastix_int_t        ldv );

/**
 *    @}
 *    @name PastixFloat LAPACK kernels
 *    @{
 */
void core_spotrfsp( pastix_int_t        n,
                    float *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
void core_spotrfsp( pastix_int_t        n,
                    float *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
void core_sgetrfsp( pastix_int_t        n,
                    float *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_ssytrfsp( pastix_int_t        n,
                    float *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );
#endif
void core_ssytrfsp( pastix_int_t        n,
                    float *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    float              criterion );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_fact
 * @{
 *    This module contains all the kernel working at the solver matrix structure
 *    level for the numerical factorization step.
 *
 *    @name PastixFloat cblk-BLAS CPU kernels
 *    @{
 */

int cpucblk_sgeaddsp1d( const SolverCblk         *cblk1,
                        SolverCblk               *cblk2,
                        const float *L1,
                        float       *L2,
                        const float *U1,
                        float       *U2 );

pastix_fixdbl_t cpucblk_sgemmsp( pastix_coefside_t   sideA,
                                 pastix_trans_t      trans,
                                 const SolverCblk   *cblk,
                                 const SolverBlok   *blok,
                                 SolverCblk         *fcblk,
                                 const void         *A,
                                 const void         *B,
                                 void               *C,
                                 float *work,
                                 pastix_int_t        lwork,
                                 const pastix_lr_t  *lowrank );
void cpucblk_strsmsp( pastix_side_t      side,
                      pastix_uplo_t      uplo,
                      pastix_trans_t     trans,
                      pastix_diag_t      diag,
                      const SolverCblk  *cblk,
                      const void        *A,
                      void              *C,
                      const pastix_lr_t *lowrank );
void cpucblk_sscalo ( pastix_trans_t     trans,
                      const SolverCblk  *cblk,
                      void              *dataL,
                      void              *dataLD );

pastix_fixdbl_t cpublok_sgemmsp( pastix_trans_t     trans,
                                 const SolverCblk  *cblk,
                                 SolverCblk        *fcblk,
                                 pastix_int_t       blok_mk,
                                 pastix_int_t       blok_nk,
                                 pastix_int_t       blok_mn,
                                 const void        *A,
                                 const void        *B,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
pastix_fixdbl_t cpublok_strsmsp( pastix_side_t      side,
                                 pastix_uplo_t      uplo,
                                 pastix_trans_t     trans,
                                 pastix_diag_t      diag,
                                 const SolverCblk  *cblk,
                                 pastix_int_t       blok_m,
                                 const void        *A,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
void cpublok_sscalo ( pastix_trans_t    trans,
                      const SolverCblk *cblk,
                      pastix_int_t      blok_m,
                      const void       *A,
                      const void       *dataD,
                      void             *dataB );

/**
 *    @}
 *    @name PastixFloat cblk LU kernels
 *    @{
 */
int cpucblk_sgetrfsp1d_getrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_sgetrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_sgetrfsp1d      ( SolverMatrix       *solvmtx,
                              SolverCblk         *cblk,
                              float *work,
                              pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixFloat cblk Cholesky kernels
 *    @{
 */
int cpucblk_spotrfsp1d_potrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_spotrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_spotrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        float *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */

#if defined(PRECISION_z) || defined(PRECISION_c)
 /**
 *    @name PastixFloat cblk LDL^h kernels
 *    @{
 */
int cpucblk_ssytrfsp1d_sytrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_ssytrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLh );
int cpucblk_ssytrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        float *work1,
                        float *work2,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixFloat cblk LL^t kernels
 *    @{
 */
int cpucblk_spotrfsp1d_pxtrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_spotrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_spotrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        float *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */
#endif

 /**
 *    @name PastixFloat cblk LDL^t kernels
 *    @{
 */
int cpucblk_ssytrfsp1d_sytrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_ssytrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLt );
int cpucblk_ssytrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        float *Dlt,
                        float *work,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixFloat initialization and additionnal routines
 *    @{
 */
void cpucblk_salloc_lrws( const SolverCblk   *cblk,
                          pastix_lrblock_t   *lrblok,
                          float *ws );
void cpucblk_salloc_lr( pastix_coefside_t  side,
                        SolverCblk        *cblk,
                        int                rkmax );
void cpucblk_salloc_fr( pastix_coefside_t  side,
                        SolverCblk        *cblk );
void cpucblk_salloc( pastix_coefside_t  side,
                     SolverCblk        *cblk );
void cpucblk_sfree( pastix_coefside_t  side,
                    SolverCblk        *cblk );
void cpucblk_sfillin( pastix_coefside_t    side,
                      const SolverMatrix  *solvmtx,
                      const pastix_bcsc_t *bcsc,
                      pastix_int_t         itercblk );
void cpucblk_sinit( pastix_coefside_t    side,
                    const SolverMatrix  *solvmtx,
                    const pastix_bcsc_t *bcsc,
                    pastix_int_t         itercblk,
                    const char          *directory );
void cpucblk_sgetschur( const SolverCblk   *cblk,
                        int                 upper_part,
                        float *S,
                        pastix_int_t        lds );
void cpucblk_sdump( pastix_coefside_t  side,
                    const SolverCblk  *cblk,
                    FILE              *stream );
int cpucblk_sdiff( pastix_coefside_t  side,
                   const SolverCblk  *cblkA,
                   SolverCblk        *cblkB );
pastix_fixdbl_t cpucblk_sadd( float  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              const void         *A,
                              void               *B,
                              float *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );
pastix_fixdbl_t cpublok_sadd( float  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              pastix_int_t        blokA_m,
                              pastix_int_t        blokB_m,
                              const void         *A,
                              void               *B,
                              float *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );

/**
 *    @}
 *    @name PastixFloat MPI routines
 *    @{
 */
int cpucblk_sincoming_deps( int                mt_flag,
                            pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            SolverCblk        *cblk );
void cpucblk_srelease_deps( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            const SolverCblk  *cblk,
                            SolverCblk        *fcbk );
void cpucblk_srequest_cleanup( pastix_coefside_t  side,
                               pastix_int_t       sched,
                               SolverMatrix      *solvmtx );
void cpucblk_supdate_reqtab( SolverMatrix *solvmtx );
#if defined( PASTIX_WITH_MPI )
void cpucblk_smpi_progress( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            int                threadid );
void cpucblk_sisend_rhs_bwd( SolverMatrix *solvmtx,
                             pastix_rhs_t  rhsb,
                             SolverCblk   *cblk );
#endif
void cpucblk_smpi_rhs_fwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_srelease_rhs_fwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_sincoming_rhs_fwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_srequest_rhs_fwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );

void cpucblk_smpi_rhs_bwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_srelease_rhs_bwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_sincoming_rhs_bwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_srequest_rhs_bwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );
void cpucblk_ssend_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                pastix_rhs_t        b );
void cpucblk_srecv_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                float *work,
                                pastix_rhs_t        b );
void cpucblk_ssend_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );
void cpucblk_srecv_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );

/**
 *    @}
 *    @name PastixFloat compression/uncompression routines
 *    @{
 */
pastix_fixdbl_t cpublok_scompress( const pastix_lr_t *lowrank,
                                   pastix_int_t        M,
                                   pastix_int_t        N,
                                   pastix_lrblock_t   *blok );
pastix_int_t cpucblk_scompress( const SolverMatrix *solvmtx,
                                pastix_coefside_t   side,
                                int                 max_ilulvl,
                                SolverCblk         *cblk );
void cpucblk_suncompress( pastix_coefside_t  side,
                          SolverCblk        *cblk );
void cpucblk_smemory( pastix_coefside_t   side,
                      const SolverMatrix *solvmtx,
                      SolverCblk         *cblk,
                      pastix_int_t       *orig,
                      pastix_int_t       *gain );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_solve
 * @{
 *    This module contains all the kernel working on the solver matrix structure
 *    for the solve step.
 *
 */

void solve_blok_strsm( pastix_side_t       side,
                       pastix_uplo_t       uplo,
                       pastix_trans_t      trans,
                       pastix_diag_t       diag,
                       const SolverCblk   *cblk,
                       int                 nrhs,
                       const void         *dataA,
                       float *b,
                       int                 ldb );
void solve_blok_sgemm( pastix_side_t             side,
                       pastix_trans_t            trans,
                       pastix_int_t              nrhs,
                       const SolverCblk         *cblk,
                       const SolverBlok         *blok,
                       SolverCblk               *fcbk,
                       const void               *dataA,
                       const float *B,
                       pastix_int_t              ldb,
                       float       *C,
                       pastix_int_t              ldc );

void solve_cblk_strsmsp_forward( const args_solve_t *enums,
                                 SolverMatrix       *datacode,
                                 const SolverCblk   *cblk,
                                 pastix_rhs_t        b );
void solve_cblk_strsmsp_backward( const args_solve_t *enums,
                                  SolverMatrix       *datacode,
                                  SolverCblk         *cblk,
                                  pastix_rhs_t        b );

void solve_cblk_sdiag( const SolverCblk   *cblk,
                       const void         *dataA,
                       int                 nrhs,
                       float *b,
                       int                 ldb,
                       float *work );
/**
 * @}
 *
 * @addtogroup kernel_fact_null
 * @{
 *    This module contains the three terms update functions for the LDL^t and
 *    LDL^h factorizations.
 *
 */
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_ssytrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const float *L,
                           float       *C,
                           float       *work );
#endif
void core_ssytrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const float *L,
                           float       *C,
                           float       *work );

int
cpucblk_spotrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_spotrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               float *work,
                               pastix_int_t        lwork );
int
cpucblk_ssytrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_ssytrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               float *work );
int
cpucblk_sgetrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_sgetrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               float *work,
                               pastix_int_t        lwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
int
cpucblk_spotrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_spotrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               float *work,
                               pastix_int_t        lwork );
int
cpucblk_ssytrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_ssytrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               float *work );
#endif

/**
 * @}
 */

#endif /* _pastix_scores_h_ */
