Actual source code: cycliccuda.cu

slepc-3.18.1 2022-11-02
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc singular value solver: "cyclic" (CUDA implementation)
 12: */
 13: #include <slepc/private/svdimpl.h>
 14: #include "../src/svd/impls/cyclic/cyclic.h"

 16: PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y)
 17: {
 18:   SVD_CYCLIC_SHELL  *ctx;
 19:   const PetscScalar *d_px;
 20:   PetscScalar       *d_py;
 21:   PetscInt          m;

 23:   MatShellGetContext(B,&ctx);
 24:   MatGetLocalSize(ctx->A,&m,NULL);
 25:   VecCUDAGetArrayRead(x,&d_px);
 26:   VecCUDAGetArrayWrite(y,&d_py);
 27:   VecCUDAPlaceArray(ctx->x1,d_px);
 28:   VecCUDAPlaceArray(ctx->x2,d_px+m);
 29:   VecCUDAPlaceArray(ctx->y1,d_py);
 30:   VecCUDAPlaceArray(ctx->y2,d_py+m);
 31:   MatMult(ctx->A,ctx->x2,ctx->y1);
 32:   MatMult(ctx->AT,ctx->x1,ctx->y2);
 33:   VecCUDAResetArray(ctx->x1);
 34:   VecCUDAResetArray(ctx->x2);
 35:   VecCUDAResetArray(ctx->y1);
 36:   VecCUDAResetArray(ctx->y2);
 37:   VecCUDARestoreArrayRead(x,&d_px);
 38:   VecCUDARestoreArrayWrite(y,&d_py);
 39:   return 0;
 40: }

 42: PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y)
 43: {
 44:   SVD_CYCLIC_SHELL  *ctx;
 45:   const PetscScalar *d_px;
 46:   PetscScalar       *d_py;
 47:   PetscInt          mn,m,n;

 49:   MatShellGetContext(B,&ctx);
 50:   MatGetLocalSize(ctx->A,NULL,&n);
 51:   VecGetLocalSize(y,&mn);
 52:   m = mn-n;
 53:   VecCUDAGetArrayRead(x,&d_px);
 54:   VecCUDAGetArrayWrite(y,&d_py);
 55:   VecCUDAPlaceArray(ctx->x1,d_px);
 56:   VecCUDAPlaceArray(ctx->x2,d_px+m);
 57:   VecCUDAPlaceArray(ctx->y1,d_py);
 58:   VecCUDAPlaceArray(ctx->y2,d_py+m);
 59:   VecCopy(ctx->x1,ctx->y1);
 60:   MatMult(ctx->A,ctx->x2,ctx->w);
 61:   MatMult(ctx->AT,ctx->w,ctx->y2);
 62:   VecCUDAResetArray(ctx->x1);
 63:   VecCUDAResetArray(ctx->x2);
 64:   VecCUDAResetArray(ctx->y1);
 65:   VecCUDAResetArray(ctx->y2);
 66:   VecCUDARestoreArrayRead(x,&d_px);
 67:   VecCUDARestoreArrayWrite(y,&d_py);
 68:   return 0;
 69: }