Actual source code: nepdefl.c

slepc-3.10.1 2018-10-23
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/nepimpl.h>         /*I "slepcnep.h" I*/
 12: #include <slepcblaslapack.h>
 13: #include "nepdefl.h"

 15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
 16: {

 20:   if (X) *X = extop->X;
 21:   if (H) {
 22:     MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
 23:   }
 24:   return(0);
 25: }

 27: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
 28: {
 30:   PetscScalar    *array1,*array2;
 31:   PetscInt       nloc;

 34:   if (extop->szd) {
 35:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
 36:     if (v) {
 37:       VecGetArray(v,&array1);
 38:       VecGetArray(vex,&array2);
 39:       if (back) {
 40:         PetscMemcpy(array1,array2,nloc*sizeof(PetscScalar));
 41:       } else {
 42:         PetscMemcpy(array2,array1,nloc*sizeof(PetscScalar));
 43:       }
 44:       VecRestoreArray(v,&array1);
 45:       VecRestoreArray(vex,&array2);
 46:     }
 47:     if (a) {
 48:       VecGetArray(vex,&array2);
 49:       if (back) {
 50:         PetscMemcpy(a,array2+nloc,extop->szd*sizeof(PetscScalar));
 51:       } else {
 52:         PetscMemcpy(array2+nloc,a,extop->szd*sizeof(PetscScalar));
 53:       }
 54:       VecRestoreArray(vex,&array2);
 55:     }
 56:   } else {
 57:     if (back) {VecCopy(vex,v);}
 58:     else {VecCopy(v,vex);}
 59:   }
 60:   return(0);
 61: }

 63: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
 64: {
 66:   PetscInt       nloc;
 67:   Vec            u;
 68:   VecType        type;

 71:   if (extop->szd) {
 72:     BVGetColumn(extop->nep->V,0,&u);
 73:     VecGetType(u,&type);
 74:     BVRestoreColumn(extop->nep->V,0,&u);
 75:     VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
 76:     VecSetType(*v,type);
 77:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
 78:     nloc += extop->szd;
 79:     VecSetSizes(*v,nloc,PETSC_DECIDE);
 80:   } else {
 81:     BVCreateVec(extop->nep->V,v);
 82:   }
 83:   return(0);
 84: }

 86: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
 87: {
 88:   PetscErrorCode     ierr;
 89:   PetscInt           nloc;
 90:   BVType             type;
 91:   BVOrthogType       otype;
 92:   BVOrthogRefineType oref;
 93:   PetscReal          oeta;
 94:   BVOrthogBlockType  oblock;
 95:   NEP                nep=extop->nep;

 98:   if (extop->szd) {
 99:     BVGetSizes(nep->V,&nloc,NULL,NULL);
100:     BVCreate(PetscObjectComm((PetscObject)nep),V);
101:     BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
102:     BVGetType(nep->V,&type);
103:     BVSetType(*V,type);
104:     BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
105:     BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
106:     PetscObjectStateIncrease((PetscObject)*V);
107:     PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
108:   } else {
109:     BVDuplicateResize(nep->V,sz,V);
110:   }
111:   return(0);
112: }

114: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
115: {
117:   PetscInt       n,next,i;
118:   PetscRandom    rand;
119:   PetscScalar    *array;
120:   PetscMPIInt    nn;

123:   BVGetRandomContext(extop->nep->V,&rand);
124:   VecSetRandom(v,rand);
125:   if (extop->szd) {
126:     BVGetSizes(extop->nep->V,&n,NULL,NULL);
127:     VecGetLocalSize(v,&next);
128:     VecGetArray(v,&array);
129:     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
130:     PetscMPIIntCast(extop->n,&nn);
131:     MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PETSC_COMM_WORLD);
132:     VecRestoreArray(v,&array);
133:   }
134:   return(0);
135: }

137: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
138: {
140:   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
141:   PetscScalar    sone=1.0,zero=0.0;
142:   PetscBLASInt   ldh_,ldhj_,n_;

145:   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
146:   PetscMemzero(Hj,i*sizeof(PetscScalar));
147:   PetscBLASIntCast(ldhj+1,&ldh_);
148:   PetscBLASIntCast(ldhj,&ldhj_);
149:   PetscBLASIntCast(n,&n_);
150:   if (idx<1) {
151:     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
152:     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
153:   } else {
154:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
155:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
156:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
157:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
158:   }
159:   if (idx<0) {
160:     idx = -idx;
161:     for (k=1;k<idx;k++) {
162:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
163:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
164:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
165:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
166:     }
167:   }
168:   return(0);
169: }

171: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
172: {
174:   Vec            uu;
175:   PetscInt       ld,i;
176:   PetscReal      norm;

179:   BVGetColumn(extop->X,extop->n,&uu);
180:   ld = extop->szd+1;
181:   NEPDeflationCopyToExtendedVec(extop,uu,extop->H+extop->n*ld,u,PETSC_TRUE);
182:   BVRestoreColumn(extop->X,extop->n,&uu);
183:   BVNormColumn(extop->X,extop->n,NORM_2,&norm);
184:   BVScaleColumn(extop->X,extop->n,1.0/norm);
185:   for (i=0;i<extop->n;i++) extop->H[extop->n*ld+i] /= norm;
186:   extop->H[extop->n*(ld+1)] = lambda;
187:   extop->n++;
188:   BVSetActiveColumns(extop->X,0,extop->n);
189:   if (extop->n <= extop->szd) {
190:     /* update XpX */
191:     BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
192:     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
193:     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
194:     /* determine minimality index */
195:     extop->midx = PetscMin(extop->max_midx,extop->n);
196:     /* polynominal basis coeficients */
197:     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
198:     /* evaluate the polynomial basis in H */
199:     NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
200:   }
201:   return(0);
202: }

204: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
205: {
207:   PetscInt       i,j,k,off,ini,fin,sz,ldh,n=extop->n;
208:   Mat            A,B;
209:   PetscScalar    *array;

212:   if (idx<0) {ini = 0; fin = extop->nep->nt;}
213:   else {ini = idx; fin = idx+1;}
214:   if (y) sz = hfjp?n+2:n+1;
215:   else sz = hfjp?3*n:2*n;
216:   ldh = extop->szd+1;
217:   MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
218:   MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
219:   MatDenseGetArray(A,&array);
220:   for (j=0;j<n;j++)
221:     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
222:   MatDenseRestoreArray(A,&array);
223:   if (y) {
224:     MatDenseGetArray(A,&array);
225:     array[extop->n*(sz+1)] = lambda;
226:     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
227:     for (i=0;i<n;i++) array[n*sz+i] = y[i];
228:     MatDenseRestoreArray(A,&array);
229:     for (j=ini;j<fin;j++) {
230:       FNEvaluateFunctionMat(extop->nep->f[j],A,B);
231:       MatDenseGetArray(B,&array);
232:       for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
233:       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
234:       MatDenseRestoreArray(B,&array);
235:     }
236:   } else {
237:     off = idx<0?ld*n:0;
238:     MatDenseGetArray(A,&array);
239:     for (k=0;k<n;k++) {
240:       array[(n+k)*sz+k] = 1.0;
241:       array[(n+k)*sz+n+k] = lambda;
242:     }
243:     if (hfjp) for (k=0;k<n;k++) {
244:       array[(2*n+k)*sz+n+k] = 1.0;
245:       array[(2*n+k)*sz+2*n+k] = lambda;
246:     }
247:     MatDenseRestoreArray(A,&array);
248:     for (j=ini;j<fin;j++) {
249:       FNEvaluateFunctionMat(extop->nep->f[j],A,B);
250:       MatDenseGetArray(B,&array);
251:       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
252:       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
253:       MatDenseRestoreArray(B,&array);
254:     }
255:   }
256:   MatDestroy(&A);
257:   MatDestroy(&B);
258:   return(0);
259: }

261: static PetscErrorCode NEPDeflationMatShell_MatMult(Mat M,Vec x,Vec y)
262: {
263:   NEP_DEF_MATSHELL  *matctx;
264:   PetscErrorCode    ierr;
265:   NEP_EXT_OP        extop;
266:   Vec               x1,y1;
267:   PetscScalar       *yy,sone=1.0,zero=0.0;
268:   const PetscScalar *xx;
269:   PetscInt          nloc,i;
270:   PetscBLASInt      n_,one=1,szd_;

273:   MatShellGetContext(M,(void**)&matctx);
274:   extop = matctx->extop;
275:   if (extop->szd) {
276:     x1 = matctx->w[0]; y1 = matctx->w[1];
277:     VecGetArrayRead(x,&xx);
278:     VecPlaceArray(x1,xx);
279:     VecGetArray(y,&yy);
280:     VecPlaceArray(y1,yy);
281:     MatMult(matctx->T,x1,y1);
282:     if (extop->n) {
283:       VecGetLocalSize(x1,&nloc);
284:       /* copy for avoiding warning of constant array xx */
285:       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i];
286:       BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
287:       BVDotVec(extop->X,x1,matctx->work);
288:       PetscBLASIntCast(extop->n,&n_);
289:       PetscBLASIntCast(extop->szd,&szd_);
290:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
291:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
292:     }
293:     VecResetArray(x1);
294:     VecRestoreArrayRead(x,&xx);
295:     VecResetArray(y1);
296:     VecRestoreArray(y,&yy);
297:   } else {
298:     MatMult(matctx->T,x,y);
299:   }
300:   return(0);
301: }

303: static PetscErrorCode NEPDeflationMatShell_CreateVecs(Mat M,Vec *right,Vec *left)
304: {
305:   PetscErrorCode   ierr;
306:   NEP_DEF_MATSHELL *matctx;

309:   MatShellGetContext(M,(void**)&matctx);
310:   if (right) {
311:     VecDuplicate(matctx->w[0],right);
312:   }
313:   if (left) {
314:     VecDuplicate(matctx->w[0],left);
315:   }
316:   return(0);
317: }

319: static PetscErrorCode NEPDeflationMatShell_Destroy(Mat M)
320: {
321:   PetscErrorCode   ierr;
322:   NEP_DEF_MATSHELL *matctx;

325:   MatShellGetContext(M,(void**)&matctx);
326:   if (matctx->extop->szd) {
327:     BVDestroy(&matctx->U);
328:     PetscFree2(matctx->hfj,matctx->work);
329:     PetscFree2(matctx->A,matctx->B);
330:     VecDestroy(&matctx->w[0]);
331:     VecDestroy(&matctx->w[1]);
332:   }
333:   MatDestroy(&matctx->T);
334:   PetscFree(matctx);
335:   return(0);
336: }

338: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
339: {
340:   PetscScalar p;
341:   PetscInt    i;

344:   if (!jacobian) {
345:     val[0] = 1.0;
346:     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
347:   } else {
348:     val[0] = 0.0;
349:     p = 1.0;
350:     for (i=1;i<extop->n;i++) {
351:       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
352:       p *= (lambda-extop->bc[i-1]);
353:     }
354:   }
355:   return(0);
356: }

358: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
359: {
360:   PetscErrorCode   ierr;
361:   NEP_DEF_MATSHELL *matctx,*matctxC;
362:   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
363:   Mat              F;
364:   Mat              Mshell,Mcomp;
365:   PetscBool        ini=PETSC_FALSE;
366:   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
367:   PetscBLASInt     n_,info,szd_;

370:   if (!M) {
371:     Mshell = jacobian?extop->MJ:extop->MF;
372:   } else Mshell = *M;
373:   Mcomp  = jacobian?extop->MF:extop->MJ;
374:   if (!Mshell) {
375:     ini = PETSC_TRUE;
376:     PetscNew(&matctx);
377:     MatGetLocalSize(extop->nep->function,&mloc,&nloc);
378:     nloc += szd; mloc += szd;
379:     MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
380:     MatShellSetOperation(Mshell,MATOP_MULT,(void(*)())NEPDeflationMatShell_MatMult);
381:     MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)())NEPDeflationMatShell_CreateVecs);
382:     MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)())NEPDeflationMatShell_Destroy);
383:     matctx->nep = extop->nep;
384:     matctx->extop = extop;
385:     if (!M) {
386:       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
387:       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
388:       PetscObjectReference((PetscObject)matctx->T);
389:     } else {
390:       matctx->jacob = jacobian;
391:       MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
392:       *M = Mshell;
393:     }
394:     if (szd) {
395:       BVCreateVec(extop->nep->V,matctx->w);
396:       VecDuplicate(matctx->w[0],matctx->w+1);
397:       BVDuplicateResize(extop->nep->V,szd,&matctx->U);
398:       PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
399:       PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
400:     }
401:   } else {
402:     MatShellGetContext(Mshell,(void**)&matctx);
403:   }
404:   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
405:     if (ini || matctx->theta != lambda) {
406:       if (jacobian) {
407:         NEPComputeJacobian(extop->nep,lambda,matctx->T);
408:       } else {
409:         NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
410:       }
411:     }
412:     if (n) {
413:       matctx->hfjset = PETSC_FALSE;
414:       if (!extop->simpU) {
415:         /* likely hfjp has been already computed */
416:         if (Mcomp) {
417:           MatShellGetContext(Mcomp,(void**)&matctxC);
418:           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
419:             PetscMemcpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt*sizeof(PetscScalar));
420:             matctx->hfjset = PETSC_TRUE;
421:           }
422:         }
423:         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
424:         if (!matctx->hfjset) {
425:           NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
426:           matctx->hfjset = PETSC_TRUE;
427:         }
428:         BVSetActiveColumns(matctx->U,0,n);
429:         hf = jacobian?hfjp:hfj;
430:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
431:         BVMatMult(extop->X,extop->nep->A[0],matctx->U);
432:         BVMultInPlace(matctx->U,F,0,n);
433:         BVSetActiveColumns(extop->W,0,extop->n);
434:         for (j=1;j<extop->nep->nt;j++) {
435:           BVMatMult(extop->X,extop->nep->A[j],extop->W);
436:           MatDensePlaceArray(F,hf+j*n*n);
437:           BVMult(matctx->U,1.0,1.0,extop->W,F);
438:           MatDenseResetArray(F);
439:         }
440:         MatDestroy(&F);
441:       } else {
442:         hfj = matctx->hfj;
443:         BVSetActiveColumns(matctx->U,0,n);
444:         BVMatMult(extop->X,matctx->T,matctx->U);
445:         for (j=0;j<n;j++) {
446:           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
447:           hfj[j*(n+1)] += lambda;
448:         }
449:         PetscBLASIntCast(n,&n_);
450:         PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
451:         SlepcCheckLapackInfo("trtri",info);
452:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
453:         BVMultInPlace(matctx->U,F,0,n);
454:         if (jacobian) {
455:           NEPDeflationComputeFunction(extop,lambda,NULL);
456:           MatShellGetContext(extop->MF,(void**)&matctxC);
457:           BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
458:         }
459:         MatDestroy(&F);
460:       }
461:       PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
462:       NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
463:       A = matctx->A;
464:       PetscMemzero(A,szd*szd*sizeof(PetscScalar));
465:       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
466:       for (j=0;j<n;j++)
467:         for (i=0;i<n;i++)
468:           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
469:       PetscBLASIntCast(n,&n_);
470:       PetscBLASIntCast(szd,&szd_);
471:       B = matctx->B;
472:       PetscMemzero(B,szd*szd*sizeof(PetscScalar));
473:       for (i=1;i<extop->midx;i++) {
474:         NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
475:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
476:         PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
477:         pts = hHprev; hHprev = hH; hH = pts;
478:       }
479:       PetscFree3(basisv,hH,hHprev);
480:     }
481:     matctx->theta = lambda;
482:     matctx->n = extop->n;
483:   }
484:   return(0);
485: }

487: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
488: {

492:   NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
493:   if (F) *F = extop->MF;
494:   return(0);
495: }

497: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
498: {

502:   NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
503:   if (J) *J = extop->MJ;
504:   return(0);
505: }

507: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
508: {
509:   PetscErrorCode    ierr;
510:   NEP_DEF_MATSHELL  *matctx;
511:   NEP_DEF_FUN_SOLVE solve;
512:   PetscInt          i,j,n=extop->n;
513:   Vec               u,tu;
514:   Mat               F;
515:   PetscScalar       snone=-1.0,sone=1.0;
516:   PetscBLASInt      n_,szd_,ldh_,*p,info;
517:   Mat               Mshell;

520:   solve = extop->solve;
521:   if (lambda!=solve->theta || n!=solve->n) {
522:     NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
523:     Mshell = (solve->sincf)?extop->MF:solve->T;
524:     MatShellGetContext(Mshell,(void**)&matctx);
525:     KSPSetOperators(solve->ksp,matctx->T,matctx->T);
526:     if (n) {
527:       PetscBLASIntCast(n,&n_);
528:       PetscBLASIntCast(extop->szd,&szd_);
529:       PetscBLASIntCast(extop->szd+1,&ldh_);
530:       if (!extop->simpU) {
531:         BVSetActiveColumns(solve->T_1U,0,n);
532:         for (i=0;i<n;i++) {
533:           BVGetColumn(matctx->U,i,&u);
534:           BVGetColumn(solve->T_1U,i,&tu);
535:           KSPSolve(solve->ksp,u,tu);
536:           BVRestoreColumn(solve->T_1U,i,&tu);
537:           BVRestoreColumn(matctx->U,i,&u);
538:         }
539:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
540:         BVDot(solve->T_1U,extop->X,F);
541:         MatDestroy(&F);
542:       } else {
543:         for (j=0;j<n;j++)
544:           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
545:         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
546:         PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
547:         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
548:       }
549:       PetscMemcpy(solve->M,matctx->B,extop->szd*extop->szd*sizeof(PetscScalar));
550:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
551:       PetscMalloc1(n,&p);
552:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
553:       SlepcCheckLapackInfo("getrf",info);
554:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
555:       SlepcCheckLapackInfo("getri",info);
556:       PetscFree(p);
557:     }
558:     solve->theta = lambda;
559:     solve->n = n;
560:   }
561:   return(0);
562: }

564: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
565: {
566:   PetscErrorCode    ierr;
567:   Vec               b1,x1;
568:   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
569:   NEP_DEF_MATSHELL  *matctx;
570:   NEP_DEF_FUN_SOLVE solve=extop->solve;
571:   PetscBLASInt      one=1,szd_,n_,ldh_;
572:   PetscInt          nloc,i;

575:   if (extop->szd) {
576:     x1 = solve->w[0]; b1 = solve->w[1];
577:     VecGetArray(x,&xx);
578:     VecPlaceArray(x1,xx);
579:     VecGetArray(b,&bb);
580:     VecPlaceArray(b1,bb);
581:   } else {
582:     b1 = b; x1 = x;
583:   }
584:   KSPSolve(extop->solve->ksp,b1,x1);
585:   if (extop->n) {
586:     PetscBLASIntCast(extop->szd,&szd_);
587:     PetscBLASIntCast(extop->n,&n_);
588:     PetscBLASIntCast(extop->szd+1,&ldh_);
589:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
590:     b2 = bb+nloc; x2 = xx+nloc;
591:     w = solve->work; w2 = solve->work+extop->n;
592:     MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
593:     PetscMemcpy(w2,b2,extop->n*sizeof(PetscScalar));
594:     BVDotVec(extop->X,x1,w);
595:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
596:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
597:     if (extop->simpU) {
598:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
599:       for (i=0;i<extop->n;i++) w[i] = x2[i];
600:       PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
601:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
602:       BVMultVec(extop->X,-1.0,1.0,x1,w);
603:     } else {
604:       BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
605:     }
606:   }
607:   if (extop->szd) {
608:     VecResetArray(x1);
609:     VecRestoreArray(x,&xx);
610:     VecResetArray(b1);
611:     VecRestoreArray(b,&bb);
612:   }
613:   return(0);
614: }

616: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
617: {
618:   PetscErrorCode    ierr;
619:   PetscInt          j;
620:   NEP_DEF_FUN_SOLVE solve;

623:   if (!extop) return(0);
624:   PetscFree(extop->H);
625:   BVDestroy(&extop->X);
626:   if (extop->szd) {
627:     PetscFree3(extop->Hj,extop->XpX,extop->bc);
628:     BVDestroy(&extop->W);
629:   }
630:   MatDestroy(&extop->MF);
631:   MatDestroy(&extop->MJ);
632:   if (extop->solve) {
633:     solve = extop->solve;
634:     if (extop->szd) {
635:       if (!extop->simpU) {BVDestroy(&solve->T_1U);}
636:       PetscFree2(solve->M,solve->work);
637:       VecDestroy(&solve->w[0]);
638:       VecDestroy(&solve->w[1]);
639:     }
640:     if (!solve->sincf) {
641:       MatDestroy(&solve->T);
642:     }
643:     PetscFree(extop->solve);
644:   }
645:   if (extop->proj) {
646:     if (extop->szd) {
647:       for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
648:       MatDestroy(&extop->proj->XpV1);
649:       PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
650:       VecDestroy(&extop->proj->w);
651:       BVDestroy(&extop->proj->V1);
652:     }
653:     PetscFree(extop->proj);
654:   }
655:   PetscFree(extop);
656:   return(0);
657: }

659: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
660: {
661:   PetscErrorCode    ierr;
662:   NEP_EXT_OP        op;
663:   NEP_DEF_FUN_SOLVE solve;
664:   PetscInt          szd;

667:   NEPDeflationReset(*extop);
668:   PetscNew(&op);
669:   *extop  = op;
670:   op->nep = nep;
671:   op->n   = 0;
672:   op->szd = szd = sz-1;
673:   op->max_midx = PetscMin(MAX_MINIDX,szd);
674:   op->X = X;
675:   if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
676:   else { PetscObjectReference((PetscObject)X); }
677:   PetscCalloc1(sz*sz,&(op)->H);
678:   if (op->szd) {
679:     op->simpU = PETSC_FALSE;
680:     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
681:       PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
682:     } else {
683:       op->simpU = PETSC_TRUE;
684:     }
685:     PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
686:     BVDuplicateResize(op->X,op->szd,&op->W);
687:   }
688:   if (ksp) {
689:     PetscNew(&solve);
690:     op->solve    = solve;
691:     solve->ksp   = ksp;
692:     solve->sincf = sincfun;
693:     solve->n     = -1;
694:     if (op->szd) {
695:       if (!op->simpU) {
696:         BVDuplicateResize(nep->V,szd,&solve->T_1U);
697:       }
698:       PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
699:       BVCreateVec(nep->V,&solve->w[0]);
700:       VecDuplicate(solve->w[0],&solve->w[1]);
701:     }
702:   }
703:   return(0);
704: }

706: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
707: {
708:   PetscScalar     *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
709:   PetscScalar     alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv;
710:   PetscInt        i,ldds,nwork=0,szd,nv,j,k,n;
711:   PetscBLASInt    inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
712:   NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
713:   NEP_EXT_OP      extop=proj->extop;
714:   NEP             nep=extop->nep;
715:   PetscErrorCode  ierr;

718:   DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
719:   DSGetLeadingDimension(ds,&ldds);
720:   DSGetArray(ds,mat,&T);
721:   PetscMemzero(T,ldds*nv*sizeof(PetscScalar));
722:   PetscBLASIntCast(ldds*nv,&dim2);
723:   /* mat = V1^*T(lambda)V1 */
724:   for (i=0;i<nep->nt;i++) {
725:     if (deriv) {
726:       FNEvaluateDerivative(nep->f[i],lambda,&alpha);
727:     } else {
728:       FNEvaluateFunction(nep->f[i],lambda,&alpha);
729:     }
730:     DSGetArray(ds,DSMatExtra[i],&E);
731:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
732:     DSRestoreArray(ds,DSMatExtra[i],&E);
733:   }
734:   if (extop->n) {
735:     n = extop->n;
736:     szd = extop->szd;
737:     PetscMemzero(proj->work,proj->lwork*sizeof(PetscScalar));
738:     PetscBLASIntCast(nv,&nv_);
739:     PetscBLASIntCast(n,&n_);
740:     PetscBLASIntCast(ldds,&ldds_);
741:     PetscBLASIntCast(szd,&szd_);
742:     PetscBLASIntCast(proj->dim,&dim_);
743:     PetscBLASIntCast(extop->szd+1,&ldh_);
744:     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
745:     nwork += 2*proj->dim*proj->dim;

747:     /* mat = mat + V1^*U(lambda)V2 */
748:     for (i=0;i<nep->nt;i++) {
749:       MatDenseGetArray(proj->V1pApX[i],&E);
750:       if (extop->simpU) {
751:         if (deriv) {
752:           FNEvaluateDerivative(nep->f[i],lambda,&alpha);
753:         } else {
754:           FNEvaluateFunction(nep->f[i],lambda,&alpha);
755:         }
756:         ww = w1; w = w2;
757:         PetscMemcpy(ww,proj->V2,szd*nv*sizeof(PetscScalar));
758:         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
759:         alpha = -alpha;
760:         PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
761:         if (deriv) {
762:           PetscBLASIntCast(szd*nv,&szdk);
763:           FNEvaluateFunction(nep->f[i],lambda,&alpha2);
764:           PetscMemcpy(w,proj->V2,szd*nv*sizeof(PetscScalar));
765:           alpha2 = -alpha2;
766:           PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
767:           alpha2 = 1.0;
768:           PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
769:           PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
770:         }
771:         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
772:       } else {
773:         NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
774:         w = deriv?w2:w1; ww = deriv?w1:w2;
775:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
776:       }
777:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
778:       MatDenseRestoreArray(proj->V1pApX[i],&E);
779:     }

781:     /* mat = mat + V2^*A(lambda)V1 */
782:     basisv = proj->work+nwork; nwork += szd;
783:     hH     = proj->work+nwork; nwork += szd*szd;
784:     hHprev = proj->work+nwork; nwork += szd*szd;
785:     AB     = proj->work+nwork; nwork += szd*szd;
786:     NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
787:     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
788:     for (j=0;j<n;j++)
789:       for (i=0;i<n;i++)
790:         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
791:     MatDenseGetArray(proj->XpV1,&E);
792:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
793:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
794:     MatDenseRestoreArray(proj->XpV1,&E);

796:     /* mat = mat + V2^*B(lambda)V2 */
797:     PetscMemzero(AB,szd*szd*sizeof(PetscScalar));
798:     for (i=1;i<extop->midx;i++) {
799:       NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
800:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
801:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
802:       pts = hHprev; hHprev = hH; hH = pts;
803:     }
804:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
805:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
806:   }
807:   DSRestoreArray(ds,mat,&T);
808:   return(0);
809: }

811: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
812: {
813:   PetscErrorCode  ierr;
814:   PetscInt        k,j,n=extop->n,dim;
815:   Vec             v,ve;
816:   BV              V1;
817:   Mat             G;
818:   NEP             nep=extop->nep;
819:   NEP_DEF_PROJECT proj;

822:   NEPCheckSplit(extop->nep,1);
823:   proj = extop->proj;
824:   if (!proj) {
825:     /* Initialize the projection data structure */
826:     PetscNew(&proj);
827:     extop->proj = proj;
828:     proj->extop = extop;
829:     BVGetSizes(Vext,NULL,NULL,&dim);
830:     proj->dim = dim;
831:     if (extop->szd) {
832:       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
833:       PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
834:       for (j=0;j<nep->nt;j++) {
835:          MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
836:       }
837:        MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
838:       BVCreateVec(extop->X,&proj->w);
839:       BVDuplicateResize(extop->X,proj->dim,&proj->V1);
840:     }
841:     DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
842:   }

844:   /* Split Vext in V1 and V2 */
845:   if (extop->szd) {
846:     for (j=j0;j<j1;j++) {
847:       BVGetColumn(Vext,j,&ve);
848:       BVGetColumn(proj->V1,j,&v);
849:       NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
850:       BVRestoreColumn(proj->V1,j,&v);
851:       BVRestoreColumn(Vext,j,&ve);
852:     }
853:     V1 = proj->V1;
854:   } else V1 = Vext;

856:   /* Compute matrices V1^* A_i V1 */
857:   BVSetActiveColumns(V1,j0,j1);
858:   for (k=0;k<nep->nt;k++) {
859:     DSGetMat(ds,DSMatExtra[k],&G);
860:     BVMatProject(V1,nep->A[k],V1,G);
861:     DSRestoreMat(ds,DSMatExtra[k],&G);
862:   }

864:   if (extop->n) {
865:     if (extop->szd) {
866:       /* Compute matrices V1^* A_i X  and V1^* X */
867:       BVSetActiveColumns(extop->W,0,n);
868:       for (k=0;k<nep->nt;k++) {
869:         BVMatMult(extop->X,nep->A[k],extop->W);
870:         BVDot(extop->W,V1,proj->V1pApX[k]);
871:       }
872:       BVDot(V1,extop->X,proj->XpV1);
873:     }
874:   }
875:   return(0);
876: }